# Copyright (c) Opendatalab. All rights reserved. """ 配置文件 v2 - 从配置文件读取配置(不使用环境变量) """ import os from .config_loader import get_config_loader from .utils.device_env import detect_device_kind # 加载配置文件 _config = get_config_loader() # 设备环境:nvi(NVIDIA GPU)/ npu(华为昇腾 NPU)/ cpu,用于按环境选择 VLLM_PLUGINS、PADDLE_OCR_DEVICE 等 # 优先从配置文件读取,如果未配置则自动检测 _device_kind_from_config = _config.get_str("device_kind", "") DEVICE_KIND = _device_kind_from_config if _device_kind_from_config else detect_device_kind() # 默认模型配置 DEFAULT_MODEL_NAME = "OpenDataLab/MinerU2.5-2509-1.2B" DEFAULT_GPU_MEMORY_UTILIZATION = _config.get_float("default_gpu_memory_utilization", 0.9) DEFAULT_DPI = _config.get_int("default_dpi", 200) DEFAULT_MAX_PAGES = _config.get_int("default_max_pages", 10) # v2 特有配置(外部API相关) DEFAULT_API_URL = _config.get_str("api_url", "http://127.0.0.1:5282") # 根据设备类型自动选择 MinerU backend(配置文件显式指定时优先) # npu -> vlm-http-client(NPU 不支持本地 vllm,需通过 http 调用外部推理服务) # nvi -> vlm-auto-engine(NVIDIA GPU 本地推理) # cpu -> pipeline(纯 CPU 通用管线) def _auto_backend() -> str: if DEVICE_KIND == "npu": return "vlm-http-client" elif DEVICE_KIND == "nvi": return "vlm-auto-engine" return "pipeline" def _auto_server_url() -> str: """server_url 仅在 http-client 类 backend 时有意义""" if DEVICE_KIND == "npu": return "http://172.17.0.1:30000" return "" _backend_from_config = _config.get_str("backend", "") DEFAULT_BACKEND = _backend_from_config if _backend_from_config else _auto_backend() _server_url_from_config = _config.get_str("server_url", "") DEFAULT_SERVER_URL = _server_url_from_config if _server_url_from_config else _auto_server_url() DEFAULT_PARSE_METHOD = _config.get_str("parse_method", "auto") DEFAULT_START_PAGE_ID = _config.get_int("start_page_id", 0) DEFAULT_END_PAGE_ID = _config.get_int("end_page_id", -1) DEFAULT_LANGUAGE = _config.get_str("language", "ch") DEFAULT_RESPONSE_FORMAT_ZIP = _config.get_bool("response_format_zip", True) DEFAULT_RETURN_MIDDLE_JSON = _config.get_bool("return_middle_json", True) DEFAULT_RETURN_MODEL_OUTPUT = _config.get_bool("return_model_output", False) DEFAULT_RETURN_MD = _config.get_bool("return_md", True) DEFAULT_RETURN_IMAGES = _config.get_bool("return_images", True) DEFAULT_RETURN_CONTENT_LIST = _config.get_bool("return_content_list", True) DEFAULT_TABLE_ENABLE = _config.get_bool("table_enable", True) DEFAULT_FORMULA_ENABLE = _config.get_bool("formula_enable", True) # PaddleOCR 配置 PADDLEOCR_CMD = _config.get_str("paddleocr_cmd", "paddleocr") PADDLE_DOC_PARSER_CMD = _config.get_str("paddle_doc_parser_cmd", "paddleocr") # PaddleOCR 设备参数(留空则根据 DEVICE_KIND 自动选择) # npu -> "npu:0" | nvi -> ""(PaddlePaddle 自动使用 CUDA) | cpu -> "cpu" def _auto_paddle_device() -> str: if DEVICE_KIND == "npu": return "npu:0" elif DEVICE_KIND == "cpu": return "cpu" return "" # nvi: PaddlePaddle 自动检测 GPU _paddle_device_from_config = _config.get_str("paddle_ocr_device", "") PADDLE_OCR_DEVICE = _paddle_device_from_config if _paddle_device_from_config else _auto_paddle_device() PADDLE_OCR_DEVICES = _config.get_str("paddle_ocr_devices", "") # VL 识别后端(仅 NPU 环境需要通过 vllm-server 调用外部推理,NVIDIA 本地推理不需要) def _auto_vl_rec_backend() -> str: if DEVICE_KIND == "npu": return "vllm-server" return "" # nvi/cpu: 不使用 VL 识别后端 def _auto_vl_rec_server_url() -> str: if DEVICE_KIND == "npu": return "http://172.17.0.1:8118/v1" return "" _vl_backend_from_config = _config.get_str("vl_rec_backend", "") VL_REC_BACKEND = _vl_backend_from_config if _vl_backend_from_config else _auto_vl_rec_backend() _vl_url_from_config = _config.get_str("vl_rec_server_url", "") VL_REC_SERVER_URL = _vl_url_from_config if _vl_url_from_config else _auto_vl_rec_server_url() # 日志配置(可选) LOG_DIR = _config.get_str("log_dir", "./logs") LOG_LEVEL = _config.get_str("log_level", "INFO") LOG_TO_FILE = _config.get_bool("log_to_file", True) LOG_TO_CONSOLE = _config.get_bool("log_to_console", True)