2 tuần trước cách đây · 3e478f6b42
--- a/cut.py
+++ b/cut.py
@@ -1,128 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-PNG图片剪切脚本
			
 
				-支持通过命令行参数指定上下左右四个方向的剪切像素
			
 
				-"""
			
 
				-
			
 
				-import argparse
			
 
				-import sys
			
 
				-from PIL import Image
			
 
				-import os
			
 
				-
			
 
				-def crop_image(input_path, output_path, top=0, bottom=0, left=0, right=0):
			
 
				-    """
			
 
				-    剪切PNG图片
			
 
				-    
			
 
				-    Args:
			
 
				-        input_path: 输入图片路径
			
 
				-        output_path: 输出图片路径
			
 
				-        top: 上方剪切像素数
			
 
				-        bottom: 下方剪切像素数
			
 
				-        left: 左侧剪切像素数
			
 
				-        right: 右侧剪切像素数
			
 
				-    """
			
 
				-    try:
			
 
				-        # 打开图片
			
 
				-        with Image.open(input_path) as img:
			
 
				-            # 转换为RGB模式（确保兼容性）
			
 
				-            if img.mode != 'RGB':
			
 
				-                img = img.convert('RGB')
			
 
				-            
			
 
				-            width, height = img.size
			
 
				-            print(f"原图尺寸: {width} x {height}")
			
 
				-            
			
 
				-            # 计算剪切后的尺寸
			
 
				-            new_width = width - left - right
			
 
				-            new_height = height - top - bottom
			
 
				-            
			
 
				-            # 验证剪切参数是否有效
			
 
				-            if new_width <= 0 or new_height <= 0:
			
 
				-                raise ValueError("剪切后的图片尺寸无效，请检查剪切参数")
			
 
				-            
			
 
				-            if left + right >= width or top + bottom >= height:
			
 
				-                raise ValueError("剪切区域超出图片范围")
			
 
				-            
			
 
				-            # 计算剪切区域 (left, top, right, bottom)
			
 
				-            crop_box = (left, top, width - right, height - bottom)
			
 
				-            
			
 
				-            # 执行剪切
			
 
				-            cropped_img = img.crop(crop_box)
			
 
				-            
			
 
				-            # 保存图片
			
 
				-            cropped_img.save(output_path, 'PNG')
			
 
				-            
			
 
				-            print(f"剪切后尺寸: {new_width} x {new_height}")
			
 
				-            print(f"图片已保存至: {output_path}")
			
 
				-            
			
 
				-    except FileNotFoundError:
			
 
				-        print(f"错误: 找不到输入文件 '{input_path}'")
			
 
				-        sys.exit(1)
			
 
				-    except Exception as e:
			
 
				-        print(f"错误: {e}")
			
 
				-        sys.exit(1)
			
 
				-
			
 
				-def main():
			
 
				-    parser = argparse.ArgumentParser(
			
 
				-        description="PNG图片剪切工具",
			
 
				-        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				-        epilog="""
			
 
				-使用示例:
			
 
				-  # 从上方剪切10像素，下方20像素，左侧5像素，右侧15像素
			
 
				-  python png_crop.py input.png output.png -t 10 -b 20 -l 5 -r 15
			
 
				-  
			
 
				-  # 只剪切左右各50像素
			
 
				-  python png_crop.py input.png output.png -l 50 -r 50
			
 
				-  
			
 
				-  # 只剪切上方100像素
			
 
				-  python png_crop.py input.png output.png -t 100
			
 
				-        """
			
 
				-    )
			
 
				-    
			
 
				-    # 必需参数
			
 
				-    parser.add_argument('input', help='输入PNG文件路径')
			
 
				-    parser.add_argument('output', help='输出PNG文件路径')
			
 
				-    
			
 
				-    # 剪切参数
			
 
				-    parser.add_argument('-t', '--top', type=int, default=0, 
			
 
				-                       help='从上方剪切的像素数 (默认: 0)')
			
 
				-    parser.add_argument('-b', '--bottom', type=int, default=0, 
			
 
				-                       help='从下方剪切的像素数 (默认: 0)')
			
 
				-    parser.add_argument('-l', '--left', type=int, default=0, 
			
 
				-                       help='从左侧剪切的像素数 (默认: 0)')
			
 
				-    parser.add_argument('-r', '--right', type=int, default=0, 
			
 
				-                       help='从右侧剪切的像素数 (默认: 0)')
			
 
				-    
			
 
				-    # 可选参数
			
 
				-    parser.add_argument('--overwrite', action='store_true',
			
 
				-                       help='覆盖已存在的输出文件')
			
 
				-    
			
 
				-    args = parser.parse_args()
			
 
				-    
			
 
				-    # 检查输入文件是否存在
			
 
				-    if not os.path.exists(args.input):
			
 
				-        print(f"错误: 输入文件 '{args.input}' 不存在")
			
 
				-        sys.exit(1)
			
 
				-    
			
 
				-    # 检查输入文件是否为PNG
			
 
				-    if not args.input.lower().endswith('.png'):
			
 
				-        print("警告: 输入文件可能不是PNG格式，但脚本会尝试处理")
			
 
				-    
			
 
				-    # 检查输出文件是否已存在
			
 
				-    if os.path.exists(args.output) and not args.overwrite:
			
 
				-        response = input(f"输出文件 '{args.output}' 已存在，是否覆盖? (y/N): ")
			
 
				-        if response.lower() not in ['y', 'yes']:
			
 
				-            print("操作已取消")
			
 
				-            sys.exit(0)
			
 
				-    
			
 
				-    # 检查剪切参数是否为非负数
			
 
				-    for param_name, param_value in [('上边', args.top), ('下边', args.bottom), 
			
 
				-                                   ('左边', args.left), ('右边', args.right)]:
			
 
				-        if param_value < 0:
			
 
				-            print(f"错误: {param_name}剪切像素数不能为负数")
			
 
				-            sys.exit(1)
			
 
				-    
			
 
				-    # 执行剪切操作
			
 
				-    crop_image(args.input, args.output, args.top, args.bottom, args.left, args.right)
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/pdf_convert.py
+++ b/pdf_convert.py
@@ -1,13 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# Copyright (c) Opendatalab. All rights reserved.
			
 
				-
			
 
				-"""
			
 
				-PDF转换工具命令行入口
			
 
				-可以通过 python pdf_convert.py 来运行
			
 
				-"""
			
 
				-
			
 
				-from pdf_converter.main import main
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    main()
			
 
				-
			
--- a/pdf_converter_v2/README_STARTUP.md
+++ b/pdf_converter_v2/README_STARTUP.md
@@ -245,6 +245,52 @@ python3 -m uvicorn mineru.cli.fast_api:app --host 0.0.0.0 --port 5282
 
				 
			
 
				 systemd 服务可在 `Environment=` 里加上上述 `LD_PRELOAD`。
			
 
				 
			
 
				+**手动运行 `paddleocr` 命令时**（排查或单测），也必须先设置 `LD_PRELOAD`，否则会报同样的 static TLS 错。**若使用 `doc_parser`**，还需设置 `FLAGS_use_stride_kernel=1`，否则加载 PaddleOCR-VL 时会报：`RuntimeError: kernel view_dtype (CPU, Undefined(AnyLayout), uint8) is not registered`（`ocr` 模式无此问题）。
			
 
				+
			
 
				+```bash
			
 
				+export PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
			
 
				+export LD_PRELOAD=/usr/local/lib/python3.10/dist-packages/scikit_learn.libs/libgomp-d22c30c5.so.1.0.0
			
 
				+# 若上路径不存在，可试：export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libgomp.so.1
			
 
				+# doc_parser 必加，否则 VL 模型加载报 view_dtype kernel 未注册
			
 
				+export FLAGS_use_stride_kernel=1
			
 
				+paddleocr doc_parser -i /path/to/image.png --save_path /tmp/out --device npu:0 ...
			
 
				+```
			
 
				+
			
 
				+通过 pdf_converter_v2 API 调用时，子进程会自动带上上述环境变量，无需手动设置。
			
 
				+
			
 
				+### PaddleOCR doc_parser：`Exception from the 'cv' worker: too many values to unpack (expected 2)`
			
 
				+
			
 
				+VL 模型加载成功后，在 `predict` 阶段可能报：
			
 
				+
			
 
				+```text
			
 
				+RuntimeError: Exception from the 'cv' worker: too many values to unpack (expected 2)
			
 
				+```
			
 
				+
			
 
				+**原因简述：** 当前 PaddleX 的 paddleocr_vl 管道中，对布局模型（如 PP-DocLayoutV3）的返回值按 2 个解包（如 boxes, labels），而 PP-DocLayoutV3 实际返回 3 个（boxes, labels, scores），导致版本不兼容。此为 PaddleOCR/PaddleX 已知问题，需等上游修复或版本对齐。
			
 
				+
			
 
				+**建议：**
			
 
				+
			
 
				+1. **仅需文字识别时**：改用 `ocr` 模式，例如 `paddleocr ocr -i 图片.jpg`，或通过 API 使用 `backend=mineru` 等不依赖 doc_parser 的路径。
			
 
				+2. **必须用 doc_parser 时**：先尝试 `pip install -U paddlex`；**即使已升级到最新版（如 3.4.1）该错误仍可能出现**，属上游 PaddleX 与 PP-DocLayoutV3 返回值格式未对齐。可关注 [PaddleOCR Issues](https://github.com/PaddlePaddle/PaddleOCR/issues) 与 [PaddleX Issues](https://github.com/PaddlePaddle/PaddleX/issues) 中 doc_parser / cv worker / PP-DocLayoutV3 相关讨论，待官方修复后再用 doc_parser。
			
 
				+
			
 
				+### MinerU（NPU）：`TBE Subprocess[task_distribute] raise error[], main process disappeared!` 与 semaphore 泄漏
			
 
				+
			
 
				+MinerU 日志中可能出现：
			
 
				+
			
 
				+```text
			
 
				+[ERROR] TBE Subprocess[task_distribute] raise error[], main process disappeared!
			
 
				+resource_tracker: There appear to be 30 leaked semaphore objects to clean up at shutdown
			
 
				+```
			
 
				+
			
 
				+**原因简述：** 在 Ascend NPU 上，CANN/TBE 会起子进程做推理；主进程退出（被 kill、OOM 或正常退出）时，若子进程尚未结束，会报 “main process disappeared”。同时 Python 的 multiprocessing 未正确回收子进程，就会报 “leaked semaphore objects”。
			
 
				+
			
 
				+**建议：**
			
 
				+
			
 
				+1. **并发与负载**：单实例 MinerU 尽量控制并发请求（如前端/网关限流），避免同一进程内同时跑太多 NPU 任务导致异常退出。
			
 
				+2. **优雅退出**：用 `kill -TERM <pid>` 结束 MinerU 进程，给 uvicorn 几秒收尾，再视情况 `kill -9`，可减少 TBE 子进程“主进程已消失”的报错。
			
 
				+3. **定位是否 NPU 相关**：若频繁出现，可临时设 `MINERU_DEVICE_MODE=cpu` 启动 MinerU，若错误消失则多半与 NPU/CANN 子进程生命周期有关；再检查 CANN 版本、驱动与内存是否充足。
			
 
				+4. **semaphore 泄漏**：多为进程未正常 join/清理导致，一般出现在进程退出时，若不伴随业务失败可先观察；长期可关注 MinerU/Paddle/CANN 更新与 NPU 相关 issue。
			
 
				+
			
 
				 ---
			
 
				 
			
 
				 ## 五、环境变量速查
			
--- a/pdf_converter_v2/parser/table_parser.py
+++ b/pdf_converter_v2/parser/table_parser.py
@@ -180,25 +180,30 @@ def parse_operational_conditions(markdown_content: str, require_title: bool = Tr
 
				         logger.warning("[工况信息] 未能提取出任何表格内容")
			
 
				         return conditions
			
 
				     
			
 
				-    # 查找工况信息表格（通常包含"检测时间"、"电压"、"电流"等关键词）
			
 
				+    # 查找工况信息表格（通常包含"检测时间"/"时间"、"电压"/"U"、"电流"/"I"等关键词；支持两行表头）
			
 
				     for table in tables:
			
 
				         if not table or len(table) < 2:
			
 
				             continue
			
 
				         
			
 
				-        # 检查表头是否包含工况信息的关键词
			
 
				         header_row = table[0]
			
 
				-        has_operational_keywords = any(
			
 
				-            keyword in " ".join(header_row)
			
 
				-            for keyword in ["检测时间", "电压", "电流", "有功功率", "无功功率", "项目"]
			
 
				+        header_text = " ".join(header_row)
			
 
				+        # 第一行表头：检测时间/电压/电流/项目 或 名称/时间/运行工况
			
 
				+        has_row0 = any(
			
 
				+            k in header_text for k in ["检测时间", "电压", "电流", "有功功率", "无功功率", "项目", "时间", "名称", "运行工况"]
			
 
				         )
			
 
				-        
			
 
				+        # 两行表头时第二行常有 U(kV)、I(A)、P(MW)、Q(Mvar)
			
 
				+        header_row2 = table[1] if len(table) > 1 else []
			
 
				+        header2_text = " ".join(header_row2).lower()
			
 
				+        has_row1 = any(
			
 
				+            k in header2_text for k in ["u(", "i(", "p(", "q(", "电压", "电流", "有功", "无功", "kv", "mw", "mvar"]
			
 
				+        )
			
 
				+        has_operational_keywords = has_row0 or (len(header_row2) >= 4 and has_row1)
			
 
				         if not has_operational_keywords:
			
 
				             continue
			
 
				         
			
 
				         logger.info(f"[工况信息] 找到工况信息表格，行数: {len(table)}")
			
 
				         
			
 
				-        # 找到表头行的列索引
			
 
				-        header_row = table[0]
			
 
				+        # 列索引：优先用第二行表头（U/I/P/Q），否则用第一行
			
 
				         monitor_at_idx = -1
			
 
				         project_idx = -1
			
 
				         name_idx = -1
			
@@ -207,38 +212,62 @@ def parse_operational_conditions(markdown_content: str, require_title: bool = Tr
 
				         active_power_idx = -1
			
 
				         reactive_power_idx = -1
			
 
				         
			
 
				+        # 若第二行表头存在且含 U/I/P/Q，用其确定电压/电流/功率列
			
 
				+        if len(table) > 1 and has_row1:
			
 
				+            row2 = table[1]
			
 
				+            for idx, cell in enumerate(row2):
			
 
				+                cell_n = normalize_text(cell)
			
 
				+                if "u" in cell_n and ("kv" in cell_n or "k v" in cell_n or "电压" in cell):
			
 
				+                    voltage_idx = idx
			
 
				+                elif "i" in cell_n and ("a)" in cell_n or "a )" in cell_n or "电流" in cell):
			
 
				+                    current_idx = idx
			
 
				+                elif "p" in cell_n and ("mw" in cell_n or "m w" in cell_n or "有功" in cell):
			
 
				+                    active_power_idx = idx
			
 
				+                elif "q" in cell_n and ("mvar" in cell_n or "无功" in cell):
			
 
				+                    reactive_power_idx = idx
			
 
				+                elif ("时间" in cell or "检测时间" in cell or "监测时间" in cell) and monitor_at_idx == -1:
			
 
				+                    monitor_at_idx = idx
			
 
				+                elif ("名称" in cell or "主变" in cell) and name_idx == -1:
			
 
				+                    name_idx = idx
			
 
				+        
			
 
				+        # 用第一行表头补全未识别的列（名称、时间、项目等）
			
 
				         for idx, cell in enumerate(header_row):
			
 
				             cell_lower = cell.lower()
			
 
				-            if "检测时间" in cell or "监测时间" in cell:
			
 
				+            if ("检测时间" in cell or "监测时间" in cell or "时间" in cell) and monitor_at_idx == -1:
			
 
				                 monitor_at_idx = idx
			
 
				             elif "项目" in cell:
			
 
				-                # 项目列可能有colspan，需要找到实际的列
			
 
				                 if project_idx == -1:
			
 
				                     project_idx = idx
			
 
				-                # 检查下一列是否是名称列（如果项目列colspan=2，下一列可能是名称）
			
 
				                 if idx + 1 < len(header_row) and name_idx == -1:
			
 
				                     next_cell = header_row[idx + 1]
			
 
				                     if not any(k in next_cell.lower() for k in ["电压", "电流", "有功", "无功", "检测"]):
			
 
				                         name_idx = idx + 1
			
 
				-            elif "电压" in cell or "电压(kv)" in cell_lower:
			
 
				+            elif "电压" in cell or "电压(kv)" in cell_lower and voltage_idx == -1:
			
 
				                 voltage_idx = idx
			
 
				-            elif "电流" in cell or "电流(a)" in cell_lower:
			
 
				+            elif "电流" in cell or "电流(a)" in cell_lower and current_idx == -1:
			
 
				                 current_idx = idx
			
 
				-            elif "有功功率" in cell or ("有功" in cell and "功率" in cell):
			
 
				+            elif ("有功功率" in cell or ("有功" in cell and "功率" in cell)) and active_power_idx == -1:
			
 
				                 active_power_idx = idx
			
 
				-            elif "无功功率" in cell or ("无功" in cell and "功率" in cell):
			
 
				+            elif ("无功功率" in cell or ("无功" in cell and "功率" in cell)) and reactive_power_idx == -1:
			
 
				                 reactive_power_idx = idx
			
 
				             elif ("名称" in cell or "主变" in cell) and name_idx == -1:
			
 
				                 name_idx = idx
			
 
				         
			
 
				+        # 默认名称列0、时间列1（常见两行表头：名称、时间、运行工况 | 名称、时间、U、I、P、Q）
			
 
				+        if name_idx == -1 and len(header_row) > 0 and ("名称" in header_row[0] or not any("名称" in c for c in header_row2)):
			
 
				+            name_idx = 0
			
 
				+        if monitor_at_idx == -1 and len(header_row) > 1 and ("时间" in header_row[1] or (len(header_row2) > 1 and "时间" in header_row2[1])):
			
 
				+            monitor_at_idx = 1
			
 
				+        
			
 
				         logger.debug(f"[工况信息] 列索引: 检测时间={monitor_at_idx}, 项目={project_idx}, 名称={name_idx}, "
			
 
				                     f"电压={voltage_idx}, 电流={current_idx}, 有功功率={active_power_idx}, 无功功率={reactive_power_idx}")
			
 
				         
			
 
				-        # 处理数据行（从第二行开始，第一行是表头）
			
 
				+        # 数据行：两行表头时从第3行（索引2）开始，否则从第2行（索引1）开始
			
 
				+        data_start = 2 if (len(table) > 1 and has_row1) else 1
			
 
				         current_monitor_at = ""
			
 
				         current_project = ""
			
 
				         
			
 
				-        for row_idx in range(1, len(table)):
			
 
				+        for row_idx in range(data_start, len(table)):
			
 
				             row = table[row_idx]
			
 
				             if len(row) < 4:  # 至少需要检测时间、项目、名称等基本字段
			
 
				                 continue
			
--- a/pdf_converter_v2/requirements.txt
+++ b/pdf_converter_v2/requirements.txt
@@ -1,10 +1,12 @@
 
				-# PDF Converter v2 - 依赖（按代码实际使用整理）
			
 
				+# PDF Converter v2 - 核心依赖
			
 
				+# 基础功能所需的依赖，不包含可选的 OCR
			
 
				 
			
 
				 # ========== Web API（运行 API 服务必装） ==========
			
 
				 fastapi>=0.100.0
			
 
				 uvicorn[standard]>=0.23.0
			
 
				 pydantic>=2.0.0
			
 
				 typing-extensions>=4.0.0
			
 
				+python-multipart>=0.0.6
			
 
				 
			
 
				 # ========== 异步与 HTTP ==========
			
 
				 aiohttp>=3.8.0          # 调用 MinerU file_parse、重试上传
			
@@ -12,7 +14,7 @@ aiofiles>=23.0.0        # 异步读写文件（converter 解压/写 md）
 
				 
			
 
				 # ========== 图片处理 ==========
			
 
				 Pillow>=9.0.0           # 图片处理（converter、parser、test_no、pdf_watermark_remover）
			
 
				-numpy>=1.20.0           # image_preprocessor 去水印/裁剪页眉页脚
			
 
				+numpy>=1.20.0,<2.0.0    # image_preprocessor 去水印/裁剪页眉页脚（限制 <2.0 避免兼容性问题）
			
 
				 opencv-python>=4.5.0    # image_preprocessor（去水印、裁剪）、pdf_watermark_remover 依赖
			
 
				 
			
 
				 # ========== PDF 处理 ==========
			
@@ -23,9 +25,10 @@ pdfplumber>=0.11.0      # table_extractor 表格提取、file_utils 检测 PDF
 
				 
			
 
				 # ========== 表格提取（/convert 结算报告/设计评审等类型） ==========
			
 
				 pandas>=1.3.0           # table_extractor 表格数据处理
			
 
				+openpyxl>=3.0.0         # Excel 文件读写
			
 
				 
			
 
				 # ========== 日志 ==========
			
 
				 loguru>=0.7.0
			
 
				 
			
 
				-# ========== 可选 / 测试 ==========
			
 
				-# requests>=2.28.0      # 仅 test_api.py 调用接口时需要，按需安装
			
 
				+# ========== HTTP 客户端（测试用） ==========
			
 
				+requests>=2.28.0        # test_api.py 调用接口时需要
			
--- a/pdf_converter_v2/utils/paddleocr_fallback.py
+++ b/pdf_converter_v2/utils/paddleocr_fallback.py
@@ -110,6 +110,9 @@ def _get_paddleocr_subprocess_env() -> Dict[str, str]:
 
				     env = dict(os.environ)
			
 
				     # 跳过「Checking connectivity to the model hosters」
			
 
				     env.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
			
 
				+    # doc_parser 加载 PaddleOCR-VL 时 safetensors 会触发 view_dtype(CPU, Undefined(AnyLayout), uint8)，
			
 
				+    # 该 kernel 未注册；强制使用 STRIDED 布局可避免：RuntimeError: kernel view_dtype (CPU, Undefined(AnyLayout), uint8) is not registered
			
 
				+    env.setdefault("FLAGS_use_stride_kernel", "1")
			
 
				     # 子进程若无 LD_PRELOAD，会触发 sklearn/paddlex 的「cannot allocate memory in static TLS block」
			
 
				     if not env.get("LD_PRELOAD"):
			
 
				         preload_paths: List[str] = []
			
@@ -432,7 +435,17 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
 
				         
			
 
				         if result.returncode != 0:
			
 
				             logger.error(f"[PaddleOCR] 命令执行失败，返回码: {result.returncode}")
			
 
				-            logger.error(f"[PaddleOCR] 错误输出: {result.stderr}")
			
 
				+            # doc_parser 已知问题：PP-DocLayoutV3 返回 3 值而管道按 2 值解包，报 "too many values to unpack (expected 2)"
			
 
				+            if result.stderr and ("too many values to unpack" in result.stderr or "Exception from the 'cv' worker" in result.stderr):
			
 
				+                logger.warning(
			
 
				+                    "[PaddleOCR] doc_parser 报 cv worker 解包错误，多为 PaddleX 与 PP-DocLayoutV3 不兼容。"
			
 
				+                    " 可尝试: pip install -U paddlex；或仅需文字时改用 ocr 模式。详见 README_STARTUP.md。"
			
 
				+                )
			
 
				+            # 完整 stderr 便于排查（NPU 初始化日志较长，真正错误常在末尾）
			
 
				+            if result.stderr:
			
 
				+                logger.error(f"[PaddleOCR] stderr: {result.stderr}")
			
 
				+            if result.stdout:
			
 
				+                logger.error(f"[PaddleOCR] stdout(末 2000 字符): {result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout}")
			
 
				             return None
			
 
				         
			
 
				         # 从保存的Markdown文件中读取结果
			
@@ -854,9 +867,14 @@ def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple
 
				         
			
 
				         if result.returncode != 0:
			
 
				             logger.error(f"[PaddleOCR DocParser] 命令执行失败，返回码: {result.returncode}")
			
 
				+            if result.stderr and ("too many values to unpack" in result.stderr or "Exception from the 'cv' worker" in result.stderr):
			
 
				+                logger.warning(
			
 
				+                    "[PaddleOCR DocParser] 报 cv worker 解包错误，多为 PaddleX 与 PP-DocLayoutV3 不兼容。"
			
 
				+                    " 可尝试: pip install -U paddlex；或改用 ocr 模式提取文字。详见 README_STARTUP.md。"
			
 
				+                )
			
 
				             logger.error(f"[PaddleOCR DocParser] 错误输出: {result.stderr}")
			
 
				             return None, None
			
 
				-        
			
 
				+
			
 
				         # 查找保存的Markdown文件
			
 
				         # PaddleOCR会在save_path下创建目录，文件路径为: {save_path}/{basename}.md
			
 
				         md_file = os.path.join(save_path_base, f"{image_basename}.md")
			
--- a/start_api.py
+++ b/start_api.py
@@ -1,35 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# Copyright (c) Opendatalab. All rights reserved.
			
 
				-
			
 
				-"""
			
 
				-FastAPI服务器启动脚本（项目根目录版本）
			
 
				-可以通过 python start_api.py 来启动API服务
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-from pathlib import Path
			
 
				-
			
 
				-# 确保当前目录在Python路径中
			
 
				-current_dir = Path(__file__).parent.absolute()
			
 
				-sys.path.insert(0, str(current_dir))
			
 
				-
			
 
				-import uvicorn
			
 
				-from pdf_converter.api.main import app
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    # 可以通过环境变量配置端口和主机
			
 
				-    host = os.getenv("API_HOST", "0.0.0.0")
			
 
				-    port = int(os.getenv("API_PORT", "4213"))
			
 
				-    
			
 
				-    print(f"启动PDF转换工具API服务...")
			
 
				-    print(f"访问地址: http://{host}:{port}")
			
 
				-    print(f"API文档: http://{host}:{port}/docs")
			
 
				-    
			
 
				-    uvicorn.run(
			
 
				-        app,
			
 
				-        host=host,
			
 
				-        port=port,
			
 
				-        log_level="info"
			
 
				-    )
			
 
				-