Explorar o código

pdf_converter_v2: sync api/main.py, test_api.py from Clerk2.5

何文松 hai 3 semanas
pai
achega
c218d627a8
Modificáronse 2 ficheiros con 8 adicións e 6 borrados
  1. 7 5
      pdf_converter_v2/api/main.py
  2. 1 1
      pdf_converter_v2/test_api.py

+ 7 - 5
pdf_converter_v2/api/main.py

@@ -43,7 +43,7 @@ except ImportError:
     DEFAULT_DPI = 200
     DEFAULT_MAX_PAGES = 10
     DEFAULT_API_URL = os.getenv("API_URL", "http://127.0.0.1:5282")
-    DEFAULT_BACKEND = os.getenv("BACKEND", "vlm-vllm-async-engine")
+    DEFAULT_BACKEND = os.getenv("BACKEND", "pipeline")
     DEFAULT_PARSE_METHOD = os.getenv("PARSE_METHOD", "auto")
     DEFAULT_START_PAGE_ID = int(os.getenv("START_PAGE_ID", "0"))
     DEFAULT_END_PAGE_ID = int(os.getenv("END_PAGE_ID", "99999"))
@@ -330,7 +330,7 @@ async def process_conversion_task(
         tables_info = None
         
         # 针对投资估算类型,需要先切割附件页
-        if request.doc_type in ("fsApproval", "fsReview", "pdApproval"):
+        if request.doc_type in ("fsApproval", "fsReview", "pdApproval", "safetyFsApproval"):
             logger.info(f"[任务 {task_id}] 文档类型 {request.doc_type},需要先切割附件页")
             
             # 导入附件页切割函数
@@ -654,10 +654,10 @@ async def process_pdf_to_markdown_task(
 @app.post("/convert", response_model=ConversionResponse)
 async def convert_file(
     file: Annotated[UploadFile, File(description="上传的PDF或图片文件")],
-    # 新增:类型参数(英文传参) noiseRec | emRec | opStatus | settlementReport | designReview | fsApproval | fsReview | pdApproval | finalAccount
+    # 新增:类型参数(英文传参)含 safetyFsApproval 安评可研批复
     type: Annotated[
-        Optional[Literal["noiseRec", "emRec", "opStatus", "settlementReport", "designReview", "fsApproval", "fsReview", "pdApproval", "finalAccount"]],
-        Form(description="文档类型:noiseRec | emRec | opStatus | settlementReport | designReview | fsApproval | fsReview | pdApproval | finalAccount")
+        Optional[Literal["noiseRec", "emRec", "opStatus", "settlementReport", "designReview", "fsApproval", "fsReview", "pdApproval", "safetyFsApproval", "finalAccount"]],
+        Form(description="文档类型:noiseRec | emRec | opStatus | settlementReport | designReview | fsApproval | fsReview | pdApproval | safetyFsApproval | finalAccount")
     ] = None,
 ):
     """
@@ -679,6 +679,7 @@ async def convert_file(
       * fsApproval - 可研批复投资估算
       * fsReview - 可研评审投资估算
       * pdApproval - 初设批复概算投资
+      * safetyFsApproval - 安评可研批复
     
     注意:v2 版本内部使用外部API进行转换,v2特有的配置参数(如API URL、backend等)
     通过环境变量或配置文件设置,不通过API参数传入。
@@ -797,6 +798,7 @@ async def convert_file(
         "fsApproval": "fsApproval",
         "fsReview": "fsReview",
         "pdApproval": "pdApproval",
+        "safetyFsApproval": "safetyFsApproval",
         # 决算报告
         "finalAccount": "finalAccount",
     }

+ 1 - 1
pdf_converter_v2/test_api.py

@@ -24,7 +24,7 @@ from pathlib import Path
 from typing import Optional, Dict, Any, List
 
 # API 配置(默认本机 4214 端口;可通过环境变量 PDF_CONVERTER_API_URL 覆盖)
-API_BASE_URL = os.getenv("PDF_CONVERTER_API_URL", "http://127.0.0.1:4214")
+API_BASE_URL = os.getenv("PDF_CONVERTER_API_URL", "http://47.108.80.98:4214")
 
 # 测试文件配置
 TEST_DIR = Path(__file__).parent / "test"