ソースを参照

fix: 未配置 VL 后端时使用传统 ocr 命令,避免加载 VL 模型导致 OOM

何文松 1 日 前
コミット
415a260763
1 ファイル変更36 行追加18 行削除
  1. 36 18
      pdf_converter_v2/utils/paddleocr_fallback.py

+ 36 - 18
pdf_converter_v2/utils/paddleocr_fallback.py

@@ -132,6 +132,11 @@ def _get_paddleocr_subprocess_env() -> Dict[str, str]:
     # doc_parser 加载 PaddleOCR-VL 时 safetensors 会触发 view_dtype(CPU, Undefined(AnyLayout), uint8),
     # 该 kernel 未注册;强制使用 STRIDED 布局可避免:RuntimeError: kernel view_dtype (CPU, Undefined(AnyLayout), uint8) is not registered
     env.setdefault("FLAGS_use_stride_kernel", "1")
+    
+    # 当未配置 VL 后端时,禁用 VL 模型加载(避免显存不足)
+    if not VL_REC_BACKEND:
+        env["PADDLEX_VLM_DISABLE"] = "1"
+    
     _PADDLEOCR_ENV = env
     return env
 
@@ -406,13 +411,16 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
         image_basename = os.path.splitext(os.path.basename(image_path))[0]
         save_path_base = os.path.join(image_dir, image_basename)
         
-        # 构建paddleocr命令(图表识别:开启 use_chart_recognition / use_layout_detection)
+        # 只有配置了 VL 后端时才启用图表识别(避免加载 VL 模型导致 OOM)
+        use_chart_recognition = "True" if VL_REC_BACKEND else "False"
+        
+        # 构建paddleocr命令(图表识别:根据 VL 后端配置决定)
         cmd = [
             _get_paddleocr_executable(), "doc_parser", "-i", image_path,
             "--precision", "fp32",
             "--use_doc_unwarping", "False",
             "--use_doc_orientation_classify", "True",
-            "--use_chart_recognition", "True",
+            "--use_chart_recognition", use_chart_recognition,
             "--save_path", save_path_base
         ] + _paddle_ocr_device_args()
         
@@ -817,22 +825,29 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
         save_path_base = os.path.join(save_path, image_basename)
         os.makedirs(save_path_base, exist_ok=True)
 
-        # 使用不识别图表的 doc_parser 参数(文本识别,无 --use_table_recognition)
-        cmd = [
-            _get_paddleocr_executable(), "doc_parser", "-i", image_path,
-            "--precision", "fp32",
-            "--use_doc_unwarping", "False",
-            "--use_doc_orientation_classify", "True",
-            "--use_chart_recognition", "False",
-            "--use_layout_detection", "False",
-            "--save_path", save_path_base
-        ] + _paddle_ocr_device_args()
-        if VL_REC_BACKEND:
+        # 当未配置 VL 后端时,使用传统 ocr 命令(避免 doc_parser 加载 VL 模型)
+        if not VL_REC_BACKEND:
+            cmd = [
+                _get_paddleocr_executable(), "ocr", "-i", image_path,
+                "--lang", "ch",
+                "--save_path", save_path_base
+            ] + _paddle_ocr_device_args()
+            logger.info(f"[PaddleOCR 文本识别] 执行命令(ocr): {' '.join(cmd)}")
+        else:
+            # 使用 doc_parser(支持 VL 后端)
+            cmd = [
+                _get_paddleocr_executable(), "doc_parser", "-i", image_path,
+                "--precision", "fp32",
+                "--use_doc_unwarping", "False",
+                "--use_doc_orientation_classify", "True",
+                "--use_chart_recognition", "False",
+                "--use_layout_detection", "False",
+                "--save_path", save_path_base
+            ] + _paddle_ocr_device_args()
             cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
-        if VL_REC_SERVER_URL:
-            cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
-
-        logger.info(f"[PaddleOCR 文本识别] 执行命令(doc_parser): {' '.join(cmd)}")
+            if VL_REC_SERVER_URL:
+                cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
+            logger.info(f"[PaddleOCR 文本识别] 执行命令(doc_parser): {' '.join(cmd)}")
 
         result = subprocess.run(
             cmd,
@@ -914,12 +929,15 @@ def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple
         save_path_base = os.path.join(save_path, image_basename)
         os.makedirs(save_path_base, exist_ok=True)
         
+        # 只有配置了 VL 后端时才启用图表识别(避免加载 VL 模型导致 OOM)
+        use_chart_recognition = "True" if VL_REC_BACKEND else "False"
+        
         cmd = [
             _get_paddleocr_executable(), "doc_parser", "-i", image_path,
             "--precision", "fp32",
             "--use_doc_unwarping", "False",
             "--use_doc_orientation_classify", "True",
-            "--use_chart_recognition", "True",
+            "--use_chart_recognition", use_chart_recognition,
             "--save_path", save_path_base
         ] + _paddle_ocr_device_args()
         if VL_REC_BACKEND: