|
|
@@ -26,9 +26,9 @@ from ..utils.paddleocr_fallback import (
|
|
|
_paddle_ocr_device_args,
|
|
|
_get_paddleocr_subprocess_env,
|
|
|
)
|
|
|
+from ..config import PADDLE_DOC_PARSER_CMD, VL_REC_BACKEND, VL_REC_SERVER_URL
|
|
|
|
|
|
logger = get_logger("pdf_converter_v2.processor")
|
|
|
-PADDLE_CMD = os.getenv("PADDLE_DOC_PARSER_CMD", "paddleocr")
|
|
|
|
|
|
|
|
|
async def _run_paddle_doc_parser(cmd: Sequence[str]) -> tuple[int, str, str]:
|
|
|
@@ -52,7 +52,7 @@ async def _run_paddle_doc_parser(cmd: Sequence[str]) -> tuple[int, str, str]:
|
|
|
|
|
|
def _paddle_base_cmd(input_path: str, save_path_base: str, device_args: list) -> list:
|
|
|
"""构建 PaddleOCR doc_parser 命令(含设备参数)。"""
|
|
|
- return [
|
|
|
+ cmd = [
|
|
|
PADDLE_CMD,
|
|
|
"doc_parser",
|
|
|
"-i",
|
|
|
@@ -68,6 +68,14 @@ def _paddle_base_cmd(input_path: str, save_path_base: str, device_args: list) ->
|
|
|
"--save_path",
|
|
|
save_path_base,
|
|
|
] + device_args
|
|
|
+
|
|
|
+ # 添加 VL 识别后端配置(如果已配置)
|
|
|
+ if VL_REC_BACKEND:
|
|
|
+ cmd.extend(["--vl_rec_backend", VL_REC_BACKEND])
|
|
|
+ if VL_REC_SERVER_URL:
|
|
|
+ cmd.extend(["--vl_rec_server_url", VL_REC_SERVER_URL])
|
|
|
+
|
|
|
+ return cmd
|
|
|
|
|
|
|
|
|
async def _convert_with_paddle(
|