Quellcode durchsuchen

feat: 适配全项目 PaddleOCR 命令行以支持 VL 识别后端配置

- 更新 utils/paddleocr_fallback.py 中的 call_paddleocr_ocr,使其支持 --vl_rec_backend 和 --vl_rec_server_url
- 统一重构 processor/converter.py 和 converter.py,确保 doc_parser 模式也支持 VL 配置
- 优化导入逻辑,确保底层命令配置优先从统一的 config.py 读取

Co-authored-by: Cursor <cursoragent@cursor.com>
何文松 vor 2 Wochen
Ursprung
Commit
160834c486

+ 8 - 1
pdf_converter_v2/converter.py

@@ -19,9 +19,10 @@ from PIL import Image
 from ..utils.logging_config import get_logger
 from ..utils.file_utils import safe_stem
 from ..utils.paddleocr_fallback import _get_paddleocr_subprocess_env
+from .config import PADDLE_DOC_PARSER_CMD, VL_REC_BACKEND, VL_REC_SERVER_URL
 
 logger = get_logger("pdf_converter_v2.processor")
-PADDLE_CMD = os.getenv("PADDLE_DOC_PARSER_CMD", "paddleocr")
+PADDLE_CMD = PADDLE_DOC_PARSER_CMD
 
 
 async def _run_paddle_doc_parser(cmd: Sequence[str]) -> tuple[int, str, str]:
@@ -80,6 +81,12 @@ async def _convert_with_paddle(
         save_path_base,
     ]
     
+    # 添加 VL 识别后端配置(如果已配置)
+    if VL_REC_BACKEND:
+        cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
+    if VL_REC_SERVER_URL:
+        cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
+    
     try:
         return_code, _, stderr = await _run_paddle_doc_parser(cmd)
         if return_code != 0:

+ 10 - 2
pdf_converter_v2/processor/converter.py

@@ -26,9 +26,9 @@ from ..utils.paddleocr_fallback import (
     _paddle_ocr_device_args,
     _get_paddleocr_subprocess_env,
 )
+from ..config import PADDLE_DOC_PARSER_CMD, VL_REC_BACKEND, VL_REC_SERVER_URL
 
 logger = get_logger("pdf_converter_v2.processor")
-PADDLE_CMD = os.getenv("PADDLE_DOC_PARSER_CMD", "paddleocr")
 
 
 async def _run_paddle_doc_parser(cmd: Sequence[str]) -> tuple[int, str, str]:
@@ -52,7 +52,7 @@ async def _run_paddle_doc_parser(cmd: Sequence[str]) -> tuple[int, str, str]:
 
 def _paddle_base_cmd(input_path: str, save_path_base: str, device_args: list) -> list:
     """构建 PaddleOCR doc_parser 命令(含设备参数)。"""
-    return [
+    cmd = [
         PADDLE_CMD,
         "doc_parser",
         "-i",
@@ -68,6 +68,14 @@ def _paddle_base_cmd(input_path: str, save_path_base: str, device_args: list) ->
         "--save_path",
         save_path_base,
     ] + device_args
+    
+    # 添加 VL 识别后端配置(如果已配置)
+    if VL_REC_BACKEND:
+        cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
+    if VL_REC_SERVER_URL:
+        cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
+        
+    return cmd
 
 
 async def _convert_with_paddle(

+ 6 - 0
pdf_converter_v2/utils/paddleocr_fallback.py

@@ -767,6 +767,12 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
         # 构建paddleocr ocr命令(NPU 下需加 --device npu:0,否则走 CPU 易段错误)
         cmd = [_get_paddleocr_executable(), "ocr", "-i", image_path, "--save_path", save_path] + _paddle_ocr_device_args()
 
+        # 添加 VL 识别后端配置(如果已配置)
+        if VL_REC_BACKEND:
+            cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
+        if VL_REC_SERVER_URL:
+            cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
+
         logger.info(f"[PaddleOCR OCR] 执行命令: {' '.join(cmd)}")
 
         # 执行命令(env 含 LD_PRELOAD 与 PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK)