пре 2 дана · 5c5a032fbd
--- a/pdf_converter_v2/utils/paddleocr_fallback.py
+++ b/pdf_converter_v2/utils/paddleocr_fallback.py
@@ -524,7 +524,7 @@ def paddleocr_to_markdown(paddleocr_result: Dict[str, Any]) -> str:
 
															 def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
														
 
															-    """调用paddleocr命令解析图片
														
 
															+    """调用 PaddleOCR Python API 解析图片（图表识别模式）
														
 
															     Args:
														
 
															         image_path: 图片路径
														
@@ -533,68 +533,26 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
 
															         paddleocr解析结果，如果失败返回None
														
 
															     """
														
 
															     try:
														
 
															-        # 检查图片文件是否存在
														
 
															         if not os.path.exists(image_path):
														
 
															             logger.error(f"[PaddleOCR 图表识别] 图片文件不存在: {image_path}")
														
 
															             return None
														
 
															-        # 生成输出目录和基础文件名
														
 
															+        if not PADDLEOCR_API_AVAILABLE:
														
 
															+            logger.error(f"[PaddleOCR 图表识别] Python API 不可用")
														
 
															+            return None
														
 
															+        
														
 
															+        # 生成输出目录
														
 
															         image_dir = os.path.dirname(image_path)
														
 
															-        image_basename = os.path.splitext(os.path.basename(image_path))[0]
														
 
															-        save_path_base = os.path.join(image_dir, image_basename)
														
 
															-        
														
 
															-        # 构建paddleocr命令（图表识别：开启 use_chart_recognition / use_layout_detection）
														
 
															-        cmd = [
														
 
															-            _get_paddleocr_executable(), "doc_parser", "-i", image_path,
														
 
															-            "--precision", "fp32",
														
 
															-            "--use_doc_unwarping", "False",
														
 
															-            "--use_doc_orientation_classify", "True",
														
 
															-            "--use_chart_recognition", "True",
														
 
															-            "--save_path", save_path_base
														
 
															-        ] + _paddle_ocr_device_args()
														
 
															-        
														
 
															-        # 添加 VL 识别后端配置（如果已配置）
														
 
															-        if VL_REC_BACKEND:
														
 
															-            cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
														
 
															-        if VL_REC_SERVER_URL:
														
 
															-            cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
														
 
															-        
														
 
															-        # 设置环境变量，限制GPU内存使用
														
 
															-        # env = os.environ.copy()
														
 
															-        # 设置PaddlePaddle的GPU内存分配策略，使用更保守的内存分配
														
 
															-        # env["FLAGS_fraction_of_gpu_memory_to_use"] = "0.3"  # 只使用30%的GPU内存
														
 
															-        # env["FLAGS_allocator_strategy"] = "auto_growth"  # 使用自动增长策略，避免一次性分配过多内存
														
 
															-        
														
 
															-        logger.info(f"[PaddleOCR 图表识别] 执行命令: {' '.join(cmd)}")
														
 
															-        
														
 
															-        # 执行命令（env 含 PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK、FLAGS_use_stride_kernel）
														
 
															-        result = subprocess.run(
														
 
															-            cmd,
														
 
															-            capture_output=True,
														
 
															-            text=True,
														
 
															-            timeout=300,  # 5分钟超时
														
 
															-            check=False,
														
 
															-            env=_get_paddleocr_subprocess_env(),
														
 
															-        )
														
 
															-        if result.returncode != 0:
														
 
															-            logger.error(f"[PaddleOCR 图表识别] 命令执行失败，返回码: {result.returncode}")
														
 
															-            # doc_parser 已知问题：PP-DocLayoutV3 返回 3 值而管道按 2 值解包，报 "too many values to unpack (expected 2)"
														
 
															-            if result.stderr and ("too many values to unpack" in result.stderr or "Exception from the 'cv' worker" in result.stderr):
														
 
															-                logger.warning(
														
 
															-                    "[PaddleOCR 图表识别] doc_parser 报 cv worker 解包错误，多为 PaddleX 与 PP-DocLayoutV3 不兼容。"
														
 
															-                    " 可尝试: pip install -U paddlex；或仅需文字时改用 文本识别。详见 README_STARTUP.md。"
														
 
															-                )
														
 
															-            # 完整 stderr 便于排查（NPU 初始化日志较长，真正错误常在末尾）
														
 
															-            if result.stderr:
														
 
															-                logger.error(f"[PaddleOCR 图表识别] stderr: {result.stderr}")
														
 
															-            if result.stdout:
														
 
															-                logger.error(f"[PaddleOCR 图表识别] stdout(末 2000 字符): {result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout}")
														
 
															-            return None
														
 
															+        logger.info(f"[PaddleOCR 图表识别] 使用 Python API 方式")
														
 
															+        success, md_file = _call_paddleocr_api(
														
 
															+            image_path, 
														
 
															+            image_dir, 
														
 
															+            use_chart_recognition=True, 
														
 
															+            use_layout_detection=True
														
 
															+        )
														
 
															-        # 从保存的Markdown文件中读取结果
														
 
															-        md_file = os.path.join(save_path_base, f"{image_basename}.md")
														
 
															-        if os.path.exists(md_file):
														
 
															+        if success and md_file and os.path.exists(md_file):
														
 
															             logger.info(f"[PaddleOCR 图表识别] 从Markdown文件读取结果: {md_file}")
														
 
															             try:
														
 
															                 with open(md_file, 'r', encoding='utf-8') as f:
														
@@ -606,22 +564,10 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
 
															                         logger.warning("[PaddleOCR 图表识别] Markdown文件内容为空")
														
 
															             except Exception as e:
														
 
															                 logger.exception(f"[PaddleOCR 图表识别] 读取Markdown文件失败: {e}")
														
 
															-        else:
														
 
															-            logger.warning(f"[PaddleOCR 图表识别] Markdown文件不存在: {md_file}")
														
 
															-        
														
 
															-        output_text = result.stdout.strip()
														
 
															-        if output_text:
														
 
															-            logger.info("[PaddleOCR 图表识别] 从stdout解析输出")
														
 
															-            parsed_result = parse_paddleocr_output(output_text)
														
 
															-            logger.info(f"[PaddleOCR 图表识别] 解析成功，获得 {len(parsed_result.get('parsing_res_list', []))} 个区块")
														
 
															-            return parsed_result
														
 
															-        else:
														
 
															-            logger.warning("[PaddleOCR 图表识别] stdout输出为空，且未找到Markdown文件")
														
 
															-            return None
														
 
															-    except subprocess.TimeoutExpired:
														
 
															-        logger.error("[PaddleOCR 图表识别] 命令执行超时")
														
 
															+        logger.error("[PaddleOCR 图表识别] Python API 失败")
														
 
															         return None
														
 
															+        
														
 
															     except Exception as e:
														
 
															         logger.exception(f"[PaddleOCR 图表识别] 调用失败: {e}")
														
 
															         return None