| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- # Copyright (c) Opendatalab. All rights reserved.
- """
- 配置文件 v2 - 从配置文件读取配置(不使用环境变量)
- """
- import os
- from .config_loader import get_config_loader
- from .utils.device_env import detect_device_kind
- # 加载配置文件
- _config = get_config_loader()
- # 设备环境:nvi(NVIDIA GPU)/ npu(华为昇腾 NPU)/ cpu,用于按环境选择 VLLM_PLUGINS、PADDLE_OCR_DEVICE 等
- # 优先从配置文件读取,如果未配置则自动检测
- _device_kind_from_config = _config.get_str("device_kind", "")
- DEVICE_KIND = _device_kind_from_config if _device_kind_from_config else detect_device_kind()
- # 默认模型配置
- DEFAULT_MODEL_NAME = "OpenDataLab/MinerU2.5-2509-1.2B"
- DEFAULT_GPU_MEMORY_UTILIZATION = _config.get_float("default_gpu_memory_utilization", 0.9)
- DEFAULT_DPI = _config.get_int("default_dpi", 200)
- DEFAULT_MAX_PAGES = _config.get_int("default_max_pages", 10)
- # v2 特有配置(外部API相关)
- DEFAULT_API_URL = _config.get_str("api_url", "http://127.0.0.1:5282")
- # 根据设备类型自动选择 MinerU backend(配置文件显式指定时优先)
- # npu -> vlm-http-client(NPU 不支持本地 vllm,需通过 http 调用外部推理服务)
- # nvi -> vlm-auto-engine(NVIDIA GPU 本地推理)
- # cpu -> pipeline(纯 CPU 通用管线)
- def _auto_backend() -> str:
- if DEVICE_KIND == "npu":
- return "vlm-http-client"
- elif DEVICE_KIND == "nvi":
- return "vlm-auto-engine"
- return "pipeline"
- def _auto_server_url() -> str:
- """server_url 仅在 http-client 类 backend 时有意义"""
- if DEVICE_KIND == "npu":
- return "http://172.17.0.1:30000"
- return ""
- _backend_from_config = _config.get_str("backend", "")
- DEFAULT_BACKEND = _backend_from_config if _backend_from_config else _auto_backend()
- _server_url_from_config = _config.get_str("server_url", "")
- DEFAULT_SERVER_URL = _server_url_from_config if _server_url_from_config else _auto_server_url()
- DEFAULT_PARSE_METHOD = _config.get_str("parse_method", "auto")
- DEFAULT_START_PAGE_ID = _config.get_int("start_page_id", 0)
- DEFAULT_END_PAGE_ID = _config.get_int("end_page_id", -1)
- DEFAULT_LANGUAGE = _config.get_str("language", "ch")
- DEFAULT_RESPONSE_FORMAT_ZIP = _config.get_bool("response_format_zip", True)
- DEFAULT_RETURN_MIDDLE_JSON = _config.get_bool("return_middle_json", True)
- DEFAULT_RETURN_MODEL_OUTPUT = _config.get_bool("return_model_output", False)
- DEFAULT_RETURN_MD = _config.get_bool("return_md", True)
- DEFAULT_RETURN_IMAGES = _config.get_bool("return_images", True)
- DEFAULT_RETURN_CONTENT_LIST = _config.get_bool("return_content_list", True)
- DEFAULT_TABLE_ENABLE = _config.get_bool("table_enable", True)
- DEFAULT_FORMULA_ENABLE = _config.get_bool("formula_enable", True)
- # PaddleOCR 配置
- PADDLEOCR_CMD = _config.get_str("paddleocr_cmd", "paddleocr")
- PADDLE_DOC_PARSER_CMD = _config.get_str("paddle_doc_parser_cmd", "paddleocr")
- # PaddleOCR 设备参数(留空则根据 DEVICE_KIND 自动选择)
- # npu -> "npu:0" | nvi -> ""(PaddlePaddle 自动使用 CUDA) | cpu -> "cpu"
- def _auto_paddle_device() -> str:
- if DEVICE_KIND == "npu":
- return "npu:0"
- elif DEVICE_KIND == "cpu":
- return "cpu"
- return "" # nvi: PaddlePaddle 自动检测 GPU
- _paddle_device_from_config = _config.get_str("paddle_ocr_device", "")
- PADDLE_OCR_DEVICE = _paddle_device_from_config if _paddle_device_from_config else _auto_paddle_device()
- PADDLE_OCR_DEVICES = _config.get_str("paddle_ocr_devices", "")
- # VL 识别后端(仅 NPU 环境需要通过 vllm-server 调用外部推理,NVIDIA 本地推理不需要)
- def _auto_vl_rec_backend() -> str:
- if DEVICE_KIND == "npu":
- return "vllm-server"
- return "" # nvi/cpu: 不使用 VL 识别后端
- def _auto_vl_rec_server_url() -> str:
- if DEVICE_KIND == "npu":
- return "http://172.17.0.1:8118/v1"
- return ""
- _vl_backend_from_config = _config.get_str("vl_rec_backend", "")
- VL_REC_BACKEND = _vl_backend_from_config if _vl_backend_from_config else _auto_vl_rec_backend()
- _vl_url_from_config = _config.get_str("vl_rec_server_url", "")
- VL_REC_SERVER_URL = _vl_url_from_config if _vl_url_from_config else _auto_vl_rec_server_url()
- # 日志配置(可选)
- LOG_DIR = _config.get_str("log_dir", "./logs")
- LOG_LEVEL = _config.get_str("log_level", "INFO")
- LOG_TO_FILE = _config.get_bool("log_to_file", True)
- LOG_TO_CONSOLE = _config.get_bool("log_to_console", True)
|