feat(backend): replace Selenium with Playwright

BREAKING CHANGE: backend now requires Python 3.12 or newer.
This commit is contained in:
2026-05-04 21:20:30 +08:00
parent fa07b340e7
commit d811c20932
15 changed files with 451 additions and 1570 deletions
+5 -12
View File
@@ -38,18 +38,11 @@ SMTP_SENDER_PASSWORD=your-auth-code-here
# 是否使用 SSL/TLSTrue/False,默认 True # 是否使用 SSL/TLSTrue/False,默认 True
SMTP_USE_SSL=True SMTP_USE_SSL=True
# ==================== Selenium / Chrome 配置 ==================== # ==================== Playwright 浏览器配置 ====================
# Chrome 浏览器可执行文件路径(可选,留空则自动检测系统 Chrome # 浏览器可执行文件路径(可选,留空则使用 Playwright 管理的 Chromium
# Windows 示例:CHROME_BINARY_PATH=C:\Program Files\Google\Chrome\Application\chrome.exe # Windows 示例:BROWSER_EXECUTABLE_PATH=C:\Program Files\Google\Chrome\Application\chrome.exe
# Linux 示例:CHROME_BINARY_PATH=/usr/bin/google-chrome # Linux 示例:BROWSER_EXECUTABLE_PATH=/usr/bin/google-chrome
# 如果留空,Selenium 会使用系统默认 Chrome BROWSER_EXECUTABLE_PATH=
CHROME_BINARY_PATH=
# ChromeDriver 可执行文件路径(可选,留空则使用 Selenium Manager 自动下载)
# Windows 示例:CHROMEDRIVER_PATH=D:\chromedriver\chromedriver.exe
# Linux 示例:CHROMEDRIVER_PATH=/usr/local/bin/chromedriver
# 推荐留空,让 Selenium Manager 自动管理 ChromeDriver 版本
CHROMEDRIVER_PATH=
# ==================== 定时任务配置 ==================== # ==================== 定时任务配置 ====================
# 注意:每个任务的打卡时间由任务自身的 cron_expression 字段控制 # 注意:每个任务的打卡时间由任务自身的 cron_expression 字段控制
+4 -3
View File
@@ -2,7 +2,7 @@
[![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/) [![FastAPI](https://img.shields.io/badge/FastAPI-0.115+-green.svg)](https://fastapi.tiangolo.com/)
[![Vue 3](https://img.shields.io/badge/Vue-3.5+-brightgreen.svg)](https://vuejs.org/) [![Vue 3](https://img.shields.io/badge/Vue-3.5+-brightgreen.svg)](https://vuejs.org/)
[![Python](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/) [![Python](https://img.shields.io/badge/Python-3.12+-blue.svg)](https://www.python.org/)
接龙自动打卡系统,通过 QQ 登录实现每日自动考勤提交。 接龙自动打卡系统,通过 QQ 登录实现每日自动考勤提交。
@@ -18,7 +18,7 @@
## 技术栈 ## 技术栈
**后端**: FastAPI + SQLAlchemy + APScheduler + Selenium **后端**: FastAPI + SQLAlchemy + APScheduler + Playwright
**前端**: Vue 3 + TypeScript + shadcn-vue + Tailwind **前端**: Vue 3 + TypeScript + shadcn-vue + Tailwind
**数据库**: SQLite **数据库**: SQLite
@@ -26,7 +26,7 @@
### 环境要求 ### 环境要求
- Python 3.9+ - Python 3.12+
- uv - uv
- Node.js 20+ - Node.js 20+
- pnpm - pnpm
@@ -37,6 +37,7 @@
```bash ```bash
# 后端 # 后端
uv sync uv sync
uv run playwright install chromium
uv run python main.py backend uv run python main.py backend
# 前端 # 前端
+1 -1
View File
@@ -108,7 +108,7 @@ async def cancel_qrcode_session(session_id: str):
- **session_id**: 会话 ID - **session_id**: 会话 ID
用于用户关闭二维码对话框时,终止后台的 Selenium 进程 用于用户关闭二维码对话框时,终止后台的 Playwright 进程
""" """
try: try:
result = AuthService.cancel_qrcode_session(session_id) result = AuthService.cancel_qrcode_session(session_id)
+2 -3
View File
@@ -60,9 +60,8 @@ class Settings(BaseSettings):
TOKEN_CHECK_INTERVAL_MINUTES: int = 30 # Token 检查间隔(分钟) TOKEN_CHECK_INTERVAL_MINUTES: int = 30 # Token 检查间隔(分钟)
SESSION_CLEANUP_INTERVAL_HOURS: int = 24 # 会话清理间隔(小时) SESSION_CLEANUP_INTERVAL_HOURS: int = 24 # 会话清理间隔(小时)
# Selenium / Chrome 配置(从 .env 读取) # Playwright / browser 配置(从 .env 读取)
CHROME_BINARY_PATH: str = "" BROWSER_EXECUTABLE_PATH: str = ""
CHROMEDRIVER_PATH: str = ""
settings = Settings() settings = Settings()
+2 -2
View File
@@ -50,7 +50,7 @@ class AuthService:
# 老用户:刷新 Token # 老用户:刷新 Token
logger.info(f"老用户 {alias} 请求刷新 Token,会话: {session_id}") logger.info(f"老用户 {alias} 请求刷新 Token,会话: {session_id}")
# 在后台线程启动 Selenium,传入 jwt_sub # 在后台线程启动 Playwright,传入 jwt_sub
thread = threading.Thread( thread = threading.Thread(
target=get_token_headless, target=get_token_headless,
args=(session_id, existing_user.jwt_sub, alias, client_ip), args=(session_id, existing_user.jwt_sub, alias, client_ip),
@@ -69,7 +69,7 @@ class AuthService:
logger.info(f"新用户 {alias} 请求注册,会话: {session_id},已预占用户名") logger.info(f"新用户 {alias} 请求注册,会话: {session_id},已预占用户名")
# 在后台线程启动 Selenium,不传入 jwt_sub(新用户) # 在后台线程启动 Playwright,不传入 jwt_sub(新用户)
thread = threading.Thread( thread = threading.Thread(
target=get_token_headless, args=(session_id, None, alias, client_ip), daemon=True target=get_token_headless, args=(session_id, None, alias, client_ip), daemon=True
) )
+1 -1
View File
@@ -287,7 +287,7 @@ class CheckInService:
} }
# 执行打卡(传递 task 对象和用户 token) # 执行打卡(传递 task 对象和用户 token)
logger.info(f"🤖 调用 Selenium Worker 执行打卡...") logger.info(f"🤖 调用 Playwright Worker 执行打卡...")
result = perform_check_in(task, user.authorization) result = perform_check_in(task, user.authorization)
# 如果是 Token 过期导致的失败,处理 Token 过期情况 # 如果是 Token 过期导致的失败,处理 Token 过期情况
@@ -0,0 +1,70 @@
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Iterable, Mapping
DEFAULT_USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
)
def _mapping_get(item: Any, key: str, default: Any = None) -> Any:
if isinstance(item, Mapping):
return item.get(key, default)
return getattr(item, key, default)
@dataclass(frozen=True)
class PlaywrightLaunchConfig:
executable_path: str = ""
headless: bool = True
args: tuple[str, ...] = field(
default_factory=lambda: ("--no-sandbox", "--disable-dev-shm-usage")
)
user_agent: str = DEFAULT_USER_AGENT
viewport_width: int = 1920
viewport_height: int = 1080
ignore_https_errors: bool = True
def to_launch_kwargs(self) -> dict[str, Any]:
kwargs: dict[str, Any] = {
"headless": self.headless,
"args": list(self.args),
}
if self.executable_path:
kwargs["executable_path"] = self.executable_path
return kwargs
def to_context_kwargs(self) -> dict[str, Any]:
return {
"user_agent": self.user_agent,
"viewport": {"width": self.viewport_width, "height": self.viewport_height},
"ignore_https_errors": self.ignore_https_errors,
}
def extract_cookie_value(cookies: Iterable[Any], cookie_name: str) -> str | None:
for cookie in cookies:
if _mapping_get(cookie, "name") == cookie_name:
value = _mapping_get(cookie, "value")
if isinstance(value, str) and value.strip():
return value
return None
def extract_payload_header(headers: Mapping[str, Any]) -> str | None:
for key, value in headers.items():
if key.lower() == "x-api-request-payload" and isinstance(value, str):
stripped = value.strip()
if stripped:
return stripped
return None
def save_page_debug_artifacts(page: Any, screenshot_path: Path, html_path: Path) -> None:
screenshot_path.parent.mkdir(parents=True, exist_ok=True)
page.screenshot(path=str(screenshot_path))
html_path.write_text(page.content(), encoding="utf-8")
+82 -90
View File
@@ -1,23 +1,33 @@
import requests from __future__ import annotations
import json import json
import time
import os
import logging import logging
from selenium import webdriver import requests
from selenium.webdriver.chrome.service import Service import time
from selenium.webdriver.chrome.options import Options
from typing import Dict, Any from typing import Dict, Any
from playwright.sync_api import sync_playwright
from backend.config import settings from backend.config import settings
from backend.workers.browser_automation import (
PlaywrightLaunchConfig,
extract_payload_header,
save_page_debug_artifacts,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Chrome 配置路径 - 从设置中读取 BASE_DIR = settings.BASE_DIR
CHROME_BINARY_PATH = settings.CHROME_BINARY_PATH DEBUG_SCREENSHOT_PATH = BASE_DIR / "payload_debug.png"
CHROMEDRIVER_PATH = settings.CHROMEDRIVER_PATH DEBUG_PAGE_SOURCE_PATH = BASE_DIR / "payload_debug_page_source.html"
def get_live_x_api_payload(auth_token: str) -> str: def get_browser_config() -> PlaywrightLaunchConfig:
"""获取 Playwright 浏览器配置(从 settings 读取)"""
return PlaywrightLaunchConfig(executable_path=settings.BROWSER_EXECUTABLE_PATH)
def get_live_x_api_payload(auth_token: str) -> str | None:
""" """
启动一个临时的无头浏览器会话,获取新鲜的 x-api-request-payload 启动一个临时的无头浏览器会话,获取新鲜的 x-api-request-payload
@@ -27,89 +37,89 @@ def get_live_x_api_payload(auth_token: str) -> str:
Returns: Returns:
x-api-request-payload 值,失败返回 None x-api-request-payload 值,失败返回 None
""" """
logger.info("正在启动临时浏览器会话以监听网络日志...") logger.info("正在启动临时 Playwright 会话以监听网络请求...")
# 根据配置创建 Service
if CHROMEDRIVER_PATH:
service = Service(executable_path=CHROMEDRIVER_PATH)
else:
service = Service() # 使用 Selenium Manager 自动管理
chrome_options = Options()
# 如果配置了 Chrome 路径,则使用配置的路径
if CHROME_BINARY_PATH:
chrome_options.binary_location = CHROME_BINARY_PATH
# 开启性能日志记录功能
logging_prefs = {"performance": "ALL"}
chrome_options.set_capability("goog:loggingPrefs", logging_prefs)
# Headless 模式配置
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
chrome_options.add_argument(f"user-agent={user_agent}")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(service=service, options=chrome_options)
browser = None
context = None
page = None
payload_signature = None payload_signature = None
playwright = None
try: try:
# 导航到同源空白页,用于设置 Cookie browser_config = get_browser_config()
driver.get("https://i.jielong.com/my-class")
# 注入长期 Token playwright = sync_playwright().start()
driver.add_cookie({"name": "token", "value": auth_token, "domain": ".jielong.com"}) browser = playwright.chromium.launch(**browser_config.to_launch_kwargs())
context = browser.new_context(**browser_config.to_context_kwargs())
page = context.new_page()
# 导航到触发 API 的页面 def on_request(request) -> None:
driver.get("https://i.jielong.com/my-form") nonlocal payload_signature
if payload_signature:
return
# 等待并捕获 x-api-request-payload payload = extract_payload_header(request.headers)
max_wait_time = 20 # 最多等待20秒 if payload:
payload_signature = payload
logger.info("成功通过 Playwright 捕获到现场的 x-api-request-payload")
page.on("request", on_request)
page.goto("https://i.jielong.com/my-class", wait_until="domcontentloaded", timeout=60000)
context.add_cookies(
[
{
"name": "token",
"value": auth_token,
"domain": ".jielong.com",
"path": "/",
}
]
)
page.goto("https://i.jielong.com/my-form", wait_until="domcontentloaded", timeout=60000)
max_wait_time = 20
start_time = time.time() start_time = time.time()
found = False
while time.time() - start_time < max_wait_time: while time.time() - start_time < max_wait_time:
logs = driver.get_log("performance") if payload_signature:
for entry in logs:
log = json.loads(entry["message"])["message"]
if log["method"] == "Network.requestWillBeSent":
headers = log.get("params", {}).get("request", {}).get("headers", {})
headers_lower = {k.lower(): v for k, v in headers.items()}
if "x-api-request-payload" in headers_lower:
payload_signature = headers_lower["x-api-request-payload"]
logger.info("成功通过网络日志捕获到现场的 x-api-request-payload")
found = True
break break
if found: page.wait_for_timeout(500)
break
time.sleep(1)
if not payload_signature: if not payload_signature:
raise Exception( raise Exception(
f"{max_wait_time} 秒内未能通过网络日志捕获到 x-api-request-payload。" f"{max_wait_time} 秒内未能通过网络请求捕获到 x-api-request-payload。"
) )
except Exception as e: except Exception as e:
logger.error(f"获取现场 x-api-request-payload 时失败: {e}") logger.error(f"获取现场 x-api-request-payload 时失败: {e}")
try: try:
debug_screenshot = os.path.join(settings.BASE_DIR, "payload_debug.png") if page:
driver.save_screenshot(debug_screenshot) save_page_debug_artifacts(page, DEBUG_SCREENSHOT_PATH, DEBUG_PAGE_SOURCE_PATH)
except Exception as screenshot_error: except Exception as screenshot_error:
logger.warning(f"保存调试截图失败: {screenshot_error}") logger.warning(f"保存调试截图失败: {screenshot_error}")
finally: finally:
# 优雅关闭 WebDriver,避免 Windows asyncio ConnectionResetError if page:
try: try:
driver.quit() page.close()
except Exception:
pass
if context:
try:
context.close()
except Exception:
pass
if browser:
try:
browser.close()
except Exception as e: except Exception as e:
# 忽略 WebDriver 关闭时的连接错误(Windows 平台常见问题) logger.warning(f"关闭 Playwright 浏览器时出现警告: {e}")
if "WinError 10054" not in str(e) and "ConnectionResetError" not in str(e): if playwright:
logger.warning(f"关闭 WebDriver 时出现警告: {e}") try:
playwright.stop()
except Exception as e:
logger.warning(f"关闭 Playwright runtime 时出现警告: {e}")
return payload_signature return payload_signature
@@ -129,13 +139,12 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
- response_text: 响应文本 - response_text: 响应文本
- error_message: 错误信息 - error_message: 错误信息
""" """
# 从 payload_config 中提取 Signature 用于日志
from backend.utils.json_helpers import safe_parse_payload, extract_signature from backend.utils.json_helpers import safe_parse_payload, extract_signature
payload_dict = safe_parse_payload(task.payload_config) payload_dict = safe_parse_payload(task.payload_config)
signature = extract_signature(task.payload_config) or "Unknown" signature = extract_signature(task.payload_config) or "Unknown"
logger.info(f"Selenium打卡: 正在为任务 ID: {task.id} (Signature: {signature}) 执行打卡...") logger.info(f"Playwright打卡: 正在为任务 ID: {task.id} (Signature: {signature}) 执行打卡...")
if not user_token: if not user_token:
error_msg = f"任务 ID: {task.id} (Signature: {signature}) 的 Token 为空,跳过。" error_msg = f"任务 ID: {task.id} (Signature: {signature}) 的 Token 为空,跳过。"
@@ -147,7 +156,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
"error_message": error_msg, "error_message": error_msg,
} }
# 获取 x-api-request-payload
payload_signature = get_live_x_api_payload(user_token) payload_signature = get_live_x_api_payload(user_token)
if not payload_signature: if not payload_signature:
error_msg = f"任务 ID: {task.id} (Signature: {signature}) 未能获取到现场签名,打卡中止。" error_msg = f"任务 ID: {task.id} (Signature: {signature}) 未能获取到现场签名,打卡中止。"
@@ -160,7 +168,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
} }
try: try:
# 使用任务的 payload_config(从模板生成的完整配置,包含 ThreadId)
from backend.utils.json_helpers import safe_parse_payload, extract_thread_id from backend.utils.json_helpers import safe_parse_payload, extract_thread_id
payload = safe_parse_payload(task.payload_config) payload = safe_parse_payload(task.payload_config)
@@ -190,7 +197,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
url = "https://api.jielong.com/api/CheckIn/EditRecord" url = "https://api.jielong.com/api/CheckIn/EditRecord"
# 打印请求详情用于调试
payload_json = json.dumps(payload, ensure_ascii=False) payload_json = json.dumps(payload, ensure_ascii=False)
logger.info(f"📤 打卡请求详情 - 任务 ID: {task.id} (Signature: {signature})") logger.info(f"📤 打卡请求详情 - 任务 ID: {task.id} (Signature: {signature})")
logger.info(f"📍 URL: {url}") logger.info(f"📍 URL: {url}")
@@ -205,11 +211,8 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
f"✉️ 任务 ID: {task.id} (Signature: {signature}) 打卡请求完成!响应: {response_text}" f"✉️ 任务 ID: {task.id} (Signature: {signature}) 打卡请求完成!响应: {response_text}"
) )
# 判断响应内容(参考 V1 实现逻辑)
# 情况1: 明确包含"打卡成功" → 成功
if "打卡成功" in response_text: if "打卡成功" in response_text:
logger.info(f"✅ 检测到成功关键字 '打卡成功',打卡成功") logger.info(f"✅ 检测到成功关键字 '打卡成功',打卡成功")
# 发送成功邮件通知
if task.user and task.user.email: if task.user and task.user.email:
try: try:
from backend.services.email_service import EmailService from backend.services.email_service import EmailService
@@ -229,8 +232,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
"error_message": "", "error_message": "",
} }
# 情况2: 已经提交过了(重复提交)→ 视为成功,但不发送邮件
# 匹配 "已被提交" 或 "已经打卡"
elif ( elif (
"已被提交" in response_text "已被提交" in response_text
or "已经打卡" in response_text or "已经打卡" in response_text
@@ -244,8 +245,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
"error_message": "", "error_message": "",
} }
# 情况3: 不在打卡时间范围 → 标记为时间范围外
# 匹配 Data 或 Description 中的内容
elif "不在打卡时间范围" in response_text or "不在打卡时间" in response_text: elif "不在打卡时间范围" in response_text or "不在打卡时间" in response_text:
logger.warning(f"⏰ 检测到'不在打卡时间范围',打卡时间不符") logger.warning(f"⏰ 检测到'不在打卡时间范围',打卡时间不符")
return { return {
@@ -255,8 +254,6 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
"error_message": "不在打卡时间范围内", "error_message": "不在打卡时间范围内",
} }
# 情况4: Token 失效的特征标识 → 失败
# 扩展检测条件:检测多种 Token 失效的响应特征
elif ( elif (
"登录" in response_text "登录" in response_text
or "授权" in response_text or "授权" in response_text
@@ -266,16 +263,13 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
or response.status_code == 401 or response.status_code == 401
): ):
logger.warning(f"⚠️ 检测到Token失效特征,Token 可能已失效") logger.warning(f"⚠️ 检测到Token失效特征,Token 可能已失效")
# 发送打卡失败邮件通知(邮件内容已包含Token失效提醒和刷新指引)
if task.user and task.user.email: if task.user and task.user.email:
try: try:
from backend.services.email_service import EmailService from backend.services.email_service import EmailService
from backend.utils.json_helpers import build_task_info from backend.utils.json_helpers import build_task_info
# 使用辅助函数构建 task_info(从 task 对象提取信息)
task_info = build_task_info(task) task_info = build_task_info(task)
# 只发送打卡失败通知(内容已说明Token失效)
EmailService.notify_check_in_result( EmailService.notify_check_in_result(
task.user, task_info, False, "Token 已失效,需要重新授权" task.user, task_info, False, "Token 已失效,需要重新授权"
) )
@@ -284,15 +278,13 @@ def perform_check_in(task, user_token: str) -> Dict[str, Any]:
return { return {
"success": False, "success": False,
"status": "token_expired", # 特殊状态,用于标识 Token 过期 "status": "token_expired",
"response_text": response_text, "response_text": response_text,
"error_message": "Token 已失效,需要重新授权", "error_message": "Token 已失效,需要重新授权",
} }
# 情况5: 其他响应 → 需要人工确认(标记为异常)
else: else:
logger.warning(f"⚠️ 未识别的响应内容,请检查: {response_text[:200]}...") logger.warning(f"⚠️ 未识别的响应内容,请检查: {response_text[:200]}...")
# 标记为未知状态,记录完整响应供后续分析
return { return {
"success": False, "success": False,
"status": "unknown", "status": "unknown",
+121 -163
View File
@@ -1,34 +1,32 @@
import os from __future__ import annotations
import logging
import base64
import json import json
from pathlib import Path import logging
from selenium import webdriver import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from filelock import FileLock from filelock import FileLock
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
from playwright.sync_api import sync_playwright
from backend.config import settings from backend.config import settings
from backend.services.registration_manager import registration_manager
from backend.workers.browser_automation import (
PlaywrightLaunchConfig,
extract_cookie_value,
save_page_debug_artifacts,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Chrome 配置路径
BASE_DIR = settings.BASE_DIR BASE_DIR = settings.BASE_DIR
DEBUG_SCREENSHOT_PATH = BASE_DIR / "debug_screenshot.png"
# 调试文件路径 DEBUG_PAGE_SOURCE_PATH = BASE_DIR / "debug_page_source.html"
DEBUG_SCREENSHOT_PATH = os.path.join(BASE_DIR, "debug_screenshot.png")
DEBUG_PAGE_SOURCE_PATH = os.path.join(BASE_DIR, "debug_page_source.html")
def get_chrome_config(): def get_browser_config() -> PlaywrightLaunchConfig:
"""获取 Chrome 配置(从 settings 读取)""" """获取 Playwright 浏览器配置(从 settings 读取)"""
return { return PlaywrightLaunchConfig(executable_path=settings.BROWSER_EXECUTABLE_PATH)
"chrome_binary": settings.CHROME_BINARY_PATH,
"chromedriver": settings.CHROMEDRIVER_PATH,
}
def update_session_file(session_id: str, data: dict) -> None: def update_session_file(session_id: str, data: dict) -> None:
@@ -44,7 +42,7 @@ def update_session_file(session_id: str, data: dict) -> None:
logger.error(f"写入会话文件 {filepath} 失败: {e}") logger.error(f"写入会话文件 {filepath} 失败: {e}")
def get_session_status(session_id: str) -> str: def get_session_status(session_id: str) -> str | None:
"""安全地读取会话文件的状态""" """安全地读取会话文件的状态"""
filepath = settings.SESSION_DIR / f"{session_id}.json" filepath = settings.SESSION_DIR / f"{session_id}.json"
lock_path = settings.SESSION_DIR / f"{session_id}.json.lock" lock_path = settings.SESSION_DIR / f"{session_id}.json.lock"
@@ -67,7 +65,7 @@ def get_session_status(session_id: str) -> str:
return None return None
def get_session_data(session_id: str) -> dict: def get_session_data(session_id: str) -> dict | None:
"""读取完整的会话数据""" """读取完整的会话数据"""
filepath = settings.SESSION_DIR / f"{session_id}.json" filepath = settings.SESSION_DIR / f"{session_id}.json"
lock_path = settings.SESSION_DIR / f"{session_id}.json.lock" lock_path = settings.SESSION_DIR / f"{session_id}.json.lock"
@@ -108,7 +106,6 @@ def cancel_session(session_id: str) -> bool:
try: try:
with FileLock(lock_path, timeout=5): with FileLock(lock_path, timeout=5):
# 读取当前会话数据
from backend.utils.json_helpers import safe_parse_json from backend.utils.json_helpers import safe_parse_json
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, "r", encoding="utf-8") as f:
@@ -117,16 +114,13 @@ def cancel_session(session_id: str) -> bool:
return False return False
data = safe_parse_json(content, {}) data = safe_parse_json(content, {})
# 如果已经成功,不允许取消
if data.get("status") == "success": if data.get("status") == "success":
logger.info(f"会话 {session_id} 已成功,无法取消") logger.info(f"会话 {session_id} 已成功,无法取消")
return False return False
# 标记为已取消
data["status"] = "cancelled" data["status"] = "cancelled"
data["message"] = "用户取消登录" data["message"] = "用户取消登录"
# 写回文件
with open(filepath, "w", encoding="utf-8") as f: with open(filepath, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2) json.dump(data, f, ensure_ascii=False, indent=2)
@@ -138,11 +132,29 @@ def cancel_session(session_id: str) -> bool:
return False return False
def _close_quietly(resource, label: str) -> None:
if not resource:
return
try:
resource.close()
except Exception as e:
logger.warning(f"关闭 {label} 时出现警告: {e}")
def _release_alias_if_needed(alias: str | None, session_id: str) -> None:
if not alias:
return
registration_manager.release_alias(alias, session_id)
logger.info(f"释放用户名预占: {alias}")
def get_token_headless( def get_token_headless(
session_id: str, jwt_sub: str = None, alias: str = None, client_ip: str = "" session_id: str, jwt_sub: str = None, alias: str = None, client_ip: str = ""
) -> None: ) -> None:
""" """
使用 Selenium 获取 QQ 扫码登录的 Token 使用 Playwright 获取 QQ 扫码登录的 Token
Args: Args:
session_id: 会话 ID session_id: 会话 ID
@@ -150,177 +162,119 @@ def get_token_headless(
alias: 用户别名(用于新用户注册) alias: 用户别名(用于新用户注册)
client_ip: 客户端 IP 地址 client_ip: 客户端 IP 地址
""" """
driver = None
current_step = "初始化" current_step = "初始化"
browser = None
context = None
page = None
playwright = None
try: try:
# 获取 Chrome 配置 browser_config = get_browser_config()
chrome_config = get_chrome_config() logger.info(f"Playwright ({session_id}): {current_step}...")
chrome_binary_path = chrome_config["chrome_binary"]
chromedriver_path = chrome_config["chromedriver"]
# 配置 Chrome 选项 playwright = sync_playwright().start()
current_step = "配置 ChromeDriver" current_step = "启动浏览器"
logger.info(f"Selenium ({session_id}): {current_step}...") logger.info(f"Playwright ({session_id}): {current_step}...")
browser = playwright.chromium.launch(**browser_config.to_launch_kwargs())
logger.info(f"Playwright ({session_id}): 浏览器启动成功")
chrome_options = Options() current_step = "创建上下文"
context = browser.new_context(**browser_config.to_context_kwargs())
page = context.new_page()
# 如果指定了自定义 Chrome 路径,则使用
if chrome_binary_path:
chrome_options.binary_location = chrome_binary_path
logger.info(f"Selenium ({session_id}): 使用自定义 Chrome 路径: {chrome_binary_path}")
else:
logger.info(f"Selenium ({session_id}): 使用系统默认 Chrome")
# Headless 模式配置
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
chrome_options.add_argument(f"user-agent={user_agent}")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
# 启动浏览器
current_step = "启动 Chrome 浏览器"
logger.info(f"Selenium ({session_id}): {current_step}...")
# 如果指定了 ChromeDriver 路径,则使用 Service;否则让 Selenium 自动管理
if chromedriver_path:
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
logger.info(f"Selenium ({session_id}): 使用自定义 ChromeDriver: {chromedriver_path}")
else:
driver = webdriver.Chrome(options=chrome_options)
logger.info(f"Selenium ({session_id}): 使用 Selenium Manager 自动管理 ChromeDriver")
logger.info(f"Selenium ({session_id}): Chrome 浏览器启动成功")
current_step = "导航到登录页面" current_step = "导航到登录页面"
logger.info(f"Selenium ({session_id}): {current_step}...") logger.info(f"Playwright ({session_id}): {current_step}...")
driver.get("https://i.jielong.com/login?redirectTo=https%3A%2F%2Fi.jielong.com%2F") page.goto(
"https://i.jielong.com/login?redirectTo=https%3A%2F%2Fi.jielong.com%2F",
wait_until="domcontentloaded",
timeout=60000,
)
wait = WebDriverWait(driver, 60)
# --- 步骤 1: 点击切换到 QQ 登录 ---
current_step = "查找并点击切换按钮" current_step = "查找并点击切换按钮"
toggle_button_selector = "div.login-wrap .toggle" toggle_button = page.locator("div.login-wrap .toggle")
logger.info(f"Selenium ({session_id}): {current_step} ({toggle_button_selector})...") logger.info(f"Playwright ({session_id}): {current_step}...")
toggle_button = wait.until( toggle_button.click(timeout=60000)
EC.element_to_be_clickable((By.CSS_SELECTOR, toggle_button_selector))
)
toggle_button.click()
# --- 步骤 2: 勾选同意服务协议 ---
current_step = "勾选同意服务协议" current_step = "勾选同意服务协议"
checkbox_selector = "input.ant-checkbox-input[type='checkbox']" checkbox = page.locator("input.ant-checkbox-input[type='checkbox']")
logger.info(f"Selenium ({session_id}): {current_step} ({checkbox_selector})...") logger.info(f"Playwright ({session_id}): {current_step}...")
checkbox = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, checkbox_selector))) if not checkbox.is_checked():
if not checkbox.is_selected(): checkbox.click(timeout=60000)
checkbox.click() logger.info(f"Playwright ({session_id}): 已勾选服务协议")
logger.info(f"Selenium ({session_id}): 已勾选服务协议")
# --- 步骤 3: 点击"立即登录"按钮 ---
current_step = "点击立即登录按钮" current_step = "点击立即登录按钮"
login_button_selector = "button.css-1wli0ry.ant-btn.ant-btn-default.login-btn" login_button = page.locator("button.css-1wli0ry.ant-btn.ant-btn-default.login-btn")
logger.info(f"Selenium ({session_id}): {current_step} ({login_button_selector})...") logger.info(f"Playwright ({session_id}): {current_step}...")
login_button = wait.until( login_button.click(timeout=60000)
EC.element_to_be_clickable((By.CSS_SELECTOR, login_button_selector))
)
login_button.click()
# --- 步骤 4: 等待二维码加载 --- current_step = "等待二维码刷新"
import time page.wait_for_timeout(3000)
time.sleep(3) # 等待几秒让二维码刷新出来
current_step = "等待QQ二维码图片加载" current_step = "等待QQ二维码图片加载"
qq_qr_image_selector = "#login_container img" qr_locator = page.locator("#login_container img").first
logger.info(f"Selenium ({session_id}): {current_step} ({qq_qr_image_selector})...") logger.info(f"Playwright ({session_id}): {current_step}...")
qr_element = wait.until( qr_locator.wait_for(state="visible", timeout=60000)
EC.visibility_of_element_located((By.CSS_SELECTOR, qq_qr_image_selector))
)
logger.info(f"Selenium ({session_id}): 成功找到QQ二维码元素,正在截图...") logger.info(f"Playwright ({session_id}): 成功找到QQ二维码元素,正在截图...")
qr_base64 = qr_element.screenshot_as_base64 qr_base64 = base64.b64encode(qr_locator.screenshot(timeout=60000)).decode("ascii")
update_session_file( update_session_file(
session_id, session_id,
{ {
"status": "waiting_scan", "status": "waiting_scan",
"qr_image_data": qr_base64, "qr_image_data": qr_base64,
"jwt_sub": jwt_sub, "jwt_sub": jwt_sub,
"alias": alias, # 新增:保存 alias "alias": alias,
"client_ip": client_ip, # 新增:保存 IP "client_ip": client_ip,
}, },
) )
current_step = "等待用户扫描登录 (Cookie 'token' 出现)" current_step = "等待用户扫描登录 (Cookie 'token' 出现)"
cookie_name_to_find = "token" logger.info(f"Playwright ({session_id}): {current_step}...")
logger.info(f"Selenium ({session_id}): {current_step}...")
# 自定义等待逻辑:每秒检查cookie和session状态
max_wait_seconds = 120 max_wait_seconds = 120
import time for _ in range(max_wait_seconds):
for i in range(max_wait_seconds):
# 检查session是否被取消
status = get_session_status(session_id) status = get_session_status(session_id)
if status == "cancelled": if status == "cancelled":
logger.info(f"Selenium ({session_id}): 用户取消了登录,终止会话") logger.info(f"Playwright ({session_id}): 用户取消了登录,终止会话")
raise Exception("用户取消登录") _release_alias_if_needed(alias, session_id)
return
# 检查cookie是否出现 token = extract_cookie_value(context.cookies(), "token")
cookie = driver.get_cookie(cookie_name_to_find) if token:
if cookie: logger.info(f"Playwright ({session_id}): 成功在Cookie中捕获到Token")
break
time.sleep(1)
else:
# 超时未获取到cookie
raise TimeoutException("等待扫码超时")
cookie = driver.get_cookie(cookie_name_to_find)
if cookie:
logger.info(f"Selenium ({session_id}): 成功在Cookie中捕获到Token")
update_session_file( update_session_file(
session_id, session_id,
{ {
"status": "success", "status": "success",
"token": cookie["value"], "token": token,
"alias": alias, # 保存 alias "alias": alias,
"client_ip": client_ip, # 保存 IP "client_ip": client_ip,
}, },
) )
else: return
raise Exception("等待Cookie成功但获取失败")
except TimeoutException: time.sleep(1)
raise PlaywrightTimeoutError("等待扫码超时")
except PlaywrightTimeoutError:
if get_session_status(session_id) == "success": if get_session_status(session_id) == "success":
logger.warning( logger.warning(
f"Selenium ({session_id}): 一个并发线程超时,但会话已成功,将忽略此超时。" f"Playwright ({session_id}): 一个并发线程超时,但会话已成功,将忽略此超时。"
) )
else: else:
# 释放预占的用户名 _release_alias_if_needed(alias, session_id)
if alias: error_message = f"操作超时!卡在了步骤: '{current_step}'。请检查页面选择器或网络。"
from backend.services.registration_manager import registration_manager logger.error(f"Playwright ({session_id}): {error_message}")
registration_manager.release_alias(alias, session_id) if page:
logger.info(f"超时释放用户名预占: {alias}")
error_message = f"操作超时!卡在了步骤: '{current_step}'。请检查CSS选择器或网络。"
logger.error(f"Selenium ({session_id}): {error_message}")
# 保存调试信息(仅当 driver 已创建时)
if driver:
try: try:
driver.save_screenshot(DEBUG_SCREENSHOT_PATH) save_page_debug_artifacts(page, DEBUG_SCREENSHOT_PATH, DEBUG_PAGE_SOURCE_PATH)
with open(DEBUG_PAGE_SOURCE_PATH, "w", encoding="utf-8") as f:
f.write(driver.page_source)
logger.error( logger.error(
f"Selenium ({session_id}): 调试截图和源码已保存。当前URL: {driver.current_url}" f"Playwright ({session_id}): 调试截图和源码已保存。当前URL: {page.url}"
) )
except Exception as debug_error: except Exception as debug_error:
logger.error(f"Selenium ({session_id}): 保存调试信息失败: {debug_error}") logger.error(f"Playwright ({session_id}): 保存调试信息失败: {debug_error}")
update_session_file( update_session_file(
session_id, {"status": "error", "message": error_message, "jwt_sub": jwt_sub} session_id, {"status": "error", "message": error_message, "jwt_sub": jwt_sub}
@@ -329,25 +283,29 @@ def get_token_headless(
except Exception as e: except Exception as e:
if get_session_status(session_id) == "success": if get_session_status(session_id) == "success":
logger.warning( logger.warning(
f"Selenium ({session_id}): 一个并发线程出错 ({e}),但会话已成功,将忽略此错误。" f"Playwright ({session_id}): 一个并发线程出错 ({e}),但会话已成功,将忽略此错误。"
) )
else: else:
# 释放预占的用户名 _release_alias_if_needed(alias, session_id)
if alias: logger.error(f"Playwright ({session_id}): 发生未知错误: {e}", exc_info=True)
from backend.services.registration_manager import registration_manager
registration_manager.release_alias(alias, session_id) if page:
logger.info(f"异常释放用户名预占: {alias}") try:
save_page_debug_artifacts(page, DEBUG_SCREENSHOT_PATH, DEBUG_PAGE_SOURCE_PATH)
except Exception as debug_error:
logger.error(f"Playwright ({session_id}): 保存调试信息失败: {debug_error}")
logger.error(f"Selenium ({session_id}): 发生未知错误: {e}", exc_info=True)
update_session_file( update_session_file(
session_id, {"status": "error", "message": str(e), "jwt_sub": jwt_sub} session_id, {"status": "error", "message": str(e), "jwt_sub": jwt_sub}
) )
finally: finally:
if driver: _close_quietly(page, "页面")
_close_quietly(context, "浏览器上下文")
_close_quietly(browser, "浏览器")
if playwright:
try: try:
driver.quit() playwright.stop()
logger.info(f"Selenium ({session_id}): 浏览器已关闭") except Exception as e:
except Exception as quit_error: logger.warning(f"关闭 Playwright runtime 时出现警告: {e}")
logger.error(f"Selenium ({session_id}): 关闭浏览器失败: {quit_error}") logger.info(f"Playwright ({session_id}): 浏览器已关闭")
+6 -6
View File
@@ -68,7 +68,7 @@ CheckIn App V2 采用用户-任务分离架构,一个用户可管理多个打
- **FastAPI**: Web 框架,自动生成 API 文档 - **FastAPI**: Web 框架,自动生成 API 文档
- **SQLAlchemy**: ORM,支持多数据库 - **SQLAlchemy**: ORM,支持多数据库
- **APScheduler**: 任务调度,动态加载 cron 任务 - **APScheduler**: 任务调度,动态加载 cron 任务
- **Selenium**: 浏览器自动化,获取 QQ Token 和打卡 payload - **Playwright**: 浏览器自动化,获取 QQ Token 和打卡 payload
- **JWT**: 身份认证 - **JWT**: 身份认证
- **SMTP**: 邮件通知 - **SMTP**: 邮件通知
@@ -86,10 +86,10 @@ CheckIn App V2 采用用户-任务分离架构,一个用户可管理多个打
1. 用户输入 alias 1. 用户输入 alias
2. 后端检查 alias 可用性和频率限制 2. 后端检查 alias 可用性和频率限制
3. Selenium 启动 headless Chrome,打开接龙登录页 3. Playwright 启动 headless Chromium,打开接龙登录页
4. 生成 QR code,返回给前端 4. 生成 QR code,返回给前端
5. 用户手机 QQ 扫码 5. 用户手机 QQ 扫码
6. Selenium 检测登录成功,提取 authorization token 和 jwt 6. Playwright 检测登录成功,提取 authorization token 和 jwt
7. 存储用户信息(待审批状态) 7. 存储用户信息(待审批状态)
8. 管理员审批后用户可登录 8. 管理员审批后用户可登录
@@ -104,7 +104,7 @@ CheckIn App V2 采用用户-任务分离架构,一个用户可管理多个打
1. 用户点击任务的"立即打卡"按钮 1. 用户点击任务的"立即打卡"按钮
2. 后端异步执行打卡任务 2. 后端异步执行打卡任务
3. Selenium 获取最新 x-api-request-payload 3. Playwright 获取最新 x-api-request-payload
4. 使用用户的 authorization token 调用接龙 API 4. 使用用户的 authorization token 调用接龙 API
5. 解析响应,存储记录 5. 解析响应,存储记录
6. 返回结果 6. 返回结果
@@ -128,7 +128,7 @@ CheckIn App V2 采用用户-任务分离架构,一个用户可管理多个打
### 会话清理 ### 会话清理
- 间隔: 24 小时 - 间隔: 24 小时
- 功能: 删除旧的 Selenium 会话文件 - 功能: 删除旧的 Playwright 会话文件
### 用户清理 ### 用户清理
@@ -158,7 +158,7 @@ apps/backend/
├── services/ # 业务逻辑层 ├── services/ # 业务逻辑层
├── models/ # 数据模型层 ├── models/ # 数据模型层
├── schemas/ # 请求响应模型 ├── schemas/ # 请求响应模型
├── workers/ # Selenium 工作模块 ├── workers/ # Playwright 工作模块
└── scripts/ # 工具脚本 └── scripts/ # 工具脚本
``` ```
+7 -6
View File
@@ -5,7 +5,7 @@
### 系统要求 ### 系统要求
- Ubuntu 20.04+ / CentOS 7+ / Windows Server - Ubuntu 20.04+ / CentOS 7+ / Windows Server
- Python 3.9+ - Python 3.12+
- uv - uv
- Node.js 20+ - Node.js 20+
- pnpm - pnpm
@@ -191,14 +191,15 @@ sudo lsof -i :8000
sudo kill -9 <PID> sudo kill -9 <PID>
``` ```
### Selenium 问题 ### Playwright 问题
```bash ```bash
# 检查 Chrome 版本 # 安装浏览器
chromium --version uv run playwright install chromium
chromedriver --version
# 确保版本匹配 # 检查系统浏览器(可选)
chromium --version
google-chrome --version
``` ```
### 权限问题 ### 权限问题
+2 -2
View File
@@ -72,7 +72,7 @@ apps/backend/
│ ├── template_service.py │ ├── template_service.py
│ └── registration_manager.py │ └── registration_manager.py
└── workers/ # Selenium 自动化 └── workers/ # Playwright 自动化
├── token_refresher.py # QQ 登录 ├── token_refresher.py # QQ 登录
├── check_in_worker.py # 打卡执行 ├── check_in_worker.py # 打卡执行
└── email_notifier.py # 邮件发送 └── email_notifier.py # 邮件发送
@@ -337,7 +337,7 @@ engine = create_engine(
) )
``` ```
### Selenium 超时 ### Playwright 超时
增加等待时间: 增加等待时间:
+3 -3
View File
@@ -3,7 +3,7 @@ name = "checkin-app-backend"
version = "0.1.0" version = "0.1.0"
description = "FastAPI backend for CheckIn App" description = "FastAPI backend for CheckIn App"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.12"
dependencies = [ dependencies = [
"apscheduler>=3.10.4", "apscheduler>=3.10.4",
"bcrypt>=4.2.2", "bcrypt>=4.2.2",
@@ -15,7 +15,7 @@ dependencies = [
"pyjwt>=2.10.1", "pyjwt>=2.10.1",
"python-dotenv>=1.0.1", "python-dotenv>=1.0.1",
"requests>=2.32.3", "requests>=2.32.3",
"selenium>=4.28.1", "playwright>=1.50.0",
"slowapi>=0.1.9", "slowapi>=0.1.9",
"sqlalchemy>=2.0.36", "sqlalchemy>=2.0.36",
"uvicorn[standard]>=0.34.0", "uvicorn[standard]>=0.34.0",
@@ -48,7 +48,7 @@ namespaces = true
[tool.ruff] [tool.ruff]
line-length = 100 line-length = 100
target-version = "py39" target-version = "py312"
src = ["apps", "tests"] src = ["apps", "tests"]
[tool.ruff.lint] [tool.ruff.lint]
+37
View File
@@ -0,0 +1,37 @@
from __future__ import annotations
from backend.workers.browser_automation import (
PlaywrightLaunchConfig,
extract_cookie_value,
extract_payload_header,
)
def test_payload_header_extraction_is_case_insensitive() -> None:
headers = {
"Accept": "application/json",
"X-API-Request-Payload": "live-signature",
}
assert extract_payload_header(headers) == "live-signature"
def test_payload_header_extraction_ignores_blank_values() -> None:
headers = {"x-api-request-payload": " "}
assert extract_payload_header(headers) is None
def test_cookie_value_extraction_finds_named_cookie() -> None:
cookies = [
{"name": "other", "value": "unused"},
{"name": "token", "value": "qq-token"},
]
assert extract_cookie_value(cookies, "token") == "qq-token"
def test_launch_config_uses_optional_executable_path() -> None:
config = PlaywrightLaunchConfig(executable_path="/usr/bin/chromium")
assert config.to_launch_kwargs()["executable_path"] == "/usr/bin/chromium"
Generated
+103 -1273
View File
File diff suppressed because it is too large Load Diff