Sfoglia il codice sorgente

Revert "fix: 未配置 VL 后端时使用传统 ocr 命令,避免加载 VL 模型导致 OOM"

This reverts commit 415a260763b3a390bf543db117d183ed09bdd421.
何文松 1 giorno fa
parent
commit
0282fe550c
1 ha cambiato i file con 18 aggiunte e 36 eliminazioni
  1. 18 36
      pdf_converter_v2/utils/paddleocr_fallback.py

+ 18 - 36
pdf_converter_v2/utils/paddleocr_fallback.py

@@ -132,11 +132,6 @@ def _get_paddleocr_subprocess_env() -> Dict[str, str]:
     # doc_parser 加载 PaddleOCR-VL 时 safetensors 会触发 view_dtype(CPU, Undefined(AnyLayout), uint8),
     # 该 kernel 未注册;强制使用 STRIDED 布局可避免:RuntimeError: kernel view_dtype (CPU, Undefined(AnyLayout), uint8) is not registered
     env.setdefault("FLAGS_use_stride_kernel", "1")
-    
-    # 当未配置 VL 后端时,禁用 VL 模型加载(避免显存不足)
-    if not VL_REC_BACKEND:
-        env["PADDLEX_VLM_DISABLE"] = "1"
-    
     _PADDLEOCR_ENV = env
     return env
 
@@ -411,16 +406,13 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
         image_basename = os.path.splitext(os.path.basename(image_path))[0]
         save_path_base = os.path.join(image_dir, image_basename)
         
-        # 只有配置了 VL 后端时才启用图表识别(避免加载 VL 模型导致 OOM)
-        use_chart_recognition = "True" if VL_REC_BACKEND else "False"
-        
-        # 构建paddleocr命令(图表识别:根据 VL 后端配置决定)
+        # 构建paddleocr命令(图表识别:开启 use_chart_recognition / use_layout_detection)
         cmd = [
             _get_paddleocr_executable(), "doc_parser", "-i", image_path,
             "--precision", "fp32",
             "--use_doc_unwarping", "False",
             "--use_doc_orientation_classify", "True",
-            "--use_chart_recognition", use_chart_recognition,
+            "--use_chart_recognition", "True",
             "--save_path", save_path_base
         ] + _paddle_ocr_device_args()
         
@@ -825,29 +817,22 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
         save_path_base = os.path.join(save_path, image_basename)
         os.makedirs(save_path_base, exist_ok=True)
 
-        # 当未配置 VL 后端时,使用传统 ocr 命令(避免 doc_parser 加载 VL 模型)
-        if not VL_REC_BACKEND:
-            cmd = [
-                _get_paddleocr_executable(), "ocr", "-i", image_path,
-                "--lang", "ch",
-                "--save_path", save_path_base
-            ] + _paddle_ocr_device_args()
-            logger.info(f"[PaddleOCR 文本识别] 执行命令(ocr): {' '.join(cmd)}")
-        else:
-            # 使用 doc_parser(支持 VL 后端)
-            cmd = [
-                _get_paddleocr_executable(), "doc_parser", "-i", image_path,
-                "--precision", "fp32",
-                "--use_doc_unwarping", "False",
-                "--use_doc_orientation_classify", "True",
-                "--use_chart_recognition", "False",
-                "--use_layout_detection", "False",
-                "--save_path", save_path_base
-            ] + _paddle_ocr_device_args()
+        # 使用不识别图表的 doc_parser 参数(文本识别,无 --use_table_recognition)
+        cmd = [
+            _get_paddleocr_executable(), "doc_parser", "-i", image_path,
+            "--precision", "fp32",
+            "--use_doc_unwarping", "False",
+            "--use_doc_orientation_classify", "True",
+            "--use_chart_recognition", "False",
+            "--use_layout_detection", "False",
+            "--save_path", save_path_base
+        ] + _paddle_ocr_device_args()
+        if VL_REC_BACKEND:
             cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
-            if VL_REC_SERVER_URL:
-                cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
-            logger.info(f"[PaddleOCR 文本识别] 执行命令(doc_parser): {' '.join(cmd)}")
+        if VL_REC_SERVER_URL:
+            cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
+
+        logger.info(f"[PaddleOCR 文本识别] 执行命令(doc_parser): {' '.join(cmd)}")
 
         result = subprocess.run(
             cmd,
@@ -929,15 +914,12 @@ def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple
         save_path_base = os.path.join(save_path, image_basename)
         os.makedirs(save_path_base, exist_ok=True)
         
-        # 只有配置了 VL 后端时才启用图表识别(避免加载 VL 模型导致 OOM)
-        use_chart_recognition = "True" if VL_REC_BACKEND else "False"
-        
         cmd = [
             _get_paddleocr_executable(), "doc_parser", "-i", image_path,
             "--precision", "fp32",
             "--use_doc_unwarping", "False",
             "--use_doc_orientation_classify", "True",
-            "--use_chart_recognition", use_chart_recognition,
+            "--use_chart_recognition", "True",
             "--save_path", save_path_base
         ] + _paddle_ocr_device_args()
         if VL_REC_BACKEND: