|
@@ -1,34 +1,61 @@
|
|
|
# Copyright (c) Opendatalab. All rights reserved.
|
|
# Copyright (c) Opendatalab. All rights reserved.
|
|
|
|
|
|
|
|
"""
|
|
"""
|
|
|
-配置文件 v2
|
|
|
|
|
|
|
+配置文件 v2 - 从配置文件读取配置(不使用环境变量)
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
-import os
|
|
|
|
|
-
|
|
|
|
|
-# 设备环境:nvi(NVIDIA GPU)/ npu(华为昇腾 NPU)/ cpu,用于按环境选择 VLLM_PLUGINS、PADDLE_OCR_DEVICE 等
|
|
|
|
|
|
|
+from .config_loader import get_config_loader
|
|
|
from .utils.device_env import detect_device_kind
|
|
from .utils.device_env import detect_device_kind
|
|
|
|
|
|
|
|
-DEVICE_KIND = os.getenv("PDF_CONVERTER_DEVICE_KIND") or detect_device_kind()
|
|
|
|
|
|
|
+# 加载配置文件
|
|
|
|
|
+_config = get_config_loader()
|
|
|
|
|
+
|
|
|
|
|
+# 设备环境:nvi(NVIDIA GPU)/ npu(华为昇腾 NPU)/ cpu,用于按环境选择 VLLM_PLUGINS、PADDLE_OCR_DEVICE 等
|
|
|
|
|
+# 优先从配置文件读取,如果未配置则自动检测
|
|
|
|
|
+_device_kind_from_config = _config.get_str("device_kind", "")
|
|
|
|
|
+DEVICE_KIND = _device_kind_from_config if _device_kind_from_config else detect_device_kind()
|
|
|
|
|
|
|
|
-# 默认模型配置(与 v1 保持一致)
|
|
|
|
|
-DEFAULT_MODEL_NAME = "OpenDataLab/MinerU2.5-2509-1.2B"
|
|
|
|
|
-DEFAULT_GPU_MEMORY_UTILIZATION = 0.9
|
|
|
|
|
-DEFAULT_DPI = 200
|
|
|
|
|
-DEFAULT_MAX_PAGES = 10
|
|
|
|
|
|
|
+# 默认模型配置
|
|
|
|
|
+DEFAULT_MODEL_NAME = _config.get_str("default_model_name", "OpenDataLab/MinerU2.5-2509-1.2B")
|
|
|
|
|
+DEFAULT_GPU_MEMORY_UTILIZATION = _config.get_float("default_gpu_memory_utilization", 0.9)
|
|
|
|
|
+DEFAULT_DPI = _config.get_int("default_dpi", 200)
|
|
|
|
|
+DEFAULT_MAX_PAGES = _config.get_int("default_max_pages", 10)
|
|
|
|
|
|
|
|
# v2 特有配置(外部API相关)
|
|
# v2 特有配置(外部API相关)
|
|
|
-DEFAULT_API_URL = os.getenv("API_URL", "http://127.0.0.1:5282")
|
|
|
|
|
-DEFAULT_BACKEND = os.getenv("BACKEND", "vlm-vllm-async-engine")
|
|
|
|
|
-DEFAULT_PARSE_METHOD = os.getenv("PARSE_METHOD", "auto")
|
|
|
|
|
-DEFAULT_START_PAGE_ID = int(os.getenv("START_PAGE_ID", "0"))
|
|
|
|
|
-DEFAULT_END_PAGE_ID = int(os.getenv("END_PAGE_ID", "99999"))
|
|
|
|
|
-DEFAULT_LANGUAGE = os.getenv("LANGUAGE", "ch")
|
|
|
|
|
-DEFAULT_RESPONSE_FORMAT_ZIP = os.getenv("RESPONSE_FORMAT_ZIP", "true").lower() == "true"
|
|
|
|
|
-DEFAULT_RETURN_MIDDLE_JSON = os.getenv("RETURN_MIDDLE_JSON", "false").lower() == "true"
|
|
|
|
|
-DEFAULT_RETURN_MODEL_OUTPUT = os.getenv("RETURN_MODEL_OUTPUT", "true").lower() == "true"
|
|
|
|
|
-DEFAULT_RETURN_MD = os.getenv("RETURN_MD", "true").lower() == "true"
|
|
|
|
|
-DEFAULT_RETURN_IMAGES = os.getenv("RETURN_IMAGES", "false").lower() == "true"
|
|
|
|
|
-DEFAULT_RETURN_CONTENT_LIST = os.getenv("RETURN_CONTENT_LIST", "false").lower() == "true"
|
|
|
|
|
-DEFAULT_SERVER_URL = os.getenv("SERVER_URL", "string")
|
|
|
|
|
|
|
+DEFAULT_API_URL = _config.get_str("api_url", "http://127.0.0.1:5282")
|
|
|
|
|
+DEFAULT_BACKEND = _config.get_str("backend", "vlm-vllm-async-engine")
|
|
|
|
|
+DEFAULT_PARSE_METHOD = _config.get_str("parse_method", "auto")
|
|
|
|
|
+DEFAULT_START_PAGE_ID = _config.get_int("start_page_id", 0)
|
|
|
|
|
+DEFAULT_END_PAGE_ID = _config.get_int("end_page_id", 99999)
|
|
|
|
|
+DEFAULT_LANGUAGE = _config.get_str("language", "ch")
|
|
|
|
|
+DEFAULT_RESPONSE_FORMAT_ZIP = _config.get_bool("response_format_zip", True)
|
|
|
|
|
+DEFAULT_RETURN_MIDDLE_JSON = _config.get_bool("return_middle_json", False)
|
|
|
|
|
+DEFAULT_RETURN_MODEL_OUTPUT = _config.get_bool("return_model_output", True)
|
|
|
|
|
+DEFAULT_RETURN_MD = _config.get_bool("return_md", True)
|
|
|
|
|
+DEFAULT_RETURN_IMAGES = _config.get_bool("return_images", False)
|
|
|
|
|
+DEFAULT_RETURN_CONTENT_LIST = _config.get_bool("return_content_list", False)
|
|
|
|
|
+DEFAULT_SERVER_URL = _config.get_str("server_url", "string")
|
|
|
|
|
+
|
|
|
|
|
+# API 服务启动配置
|
|
|
|
|
+API_HOST = _config.get_str("api_host", "0.0.0.0")
|
|
|
|
|
+API_PORT = _config.get_int("api_port", 4214)
|
|
|
|
|
+
|
|
|
|
|
+# MinerU 服务管理配置
|
|
|
|
|
+MINERU_API_HOST = _config.get_str("mineru_api_host", "127.0.0.1")
|
|
|
|
|
+MINERU_API_PORT = _config.get_int("mineru_api_port", 5282)
|
|
|
|
|
+MINERU_IDLE_TIMEOUT = _config.get_int("mineru_idle_timeout", 60)
|
|
|
|
|
+MINERU_CHECK_INTERVAL = _config.get_int("mineru_check_interval", 60)
|
|
|
|
|
+MINERU_START_TIMEOUT = _config.get_int("mineru_start_timeout", 120)
|
|
|
|
|
+
|
|
|
|
|
+# PaddleOCR 配置
|
|
|
|
|
+PADDLEOCR_CMD = _config.get_str("paddleocr_cmd", "paddleocr")
|
|
|
|
|
+PADDLE_OCR_DEVICE = _config.get_str("paddle_ocr_device", "")
|
|
|
|
|
+PADDLE_OCR_DEVICES = _config.get_str("paddle_ocr_devices", "")
|
|
|
|
|
+PADDLE_DOC_PARSER_CMD = _config.get_str("paddle_doc_parser_cmd", "paddleocr")
|
|
|
|
|
+
|
|
|
|
|
+# 日志配置(可选)
|
|
|
|
|
+LOG_DIR = _config.get_str("log_dir", "./logs")
|
|
|
|
|
+LOG_LEVEL = _config.get_str("log_level", "INFO")
|
|
|
|
|
+LOG_TO_FILE = _config.get_bool("log_to_file", True)
|
|
|
|
|
+LOG_TO_CONSOLE = _config.get_bool("log_to_console", True)
|
|
|
|
|
|