config.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # Copyright (c) Opendatalab. All rights reserved.
  2. """
  3. 配置文件 v2 - 从配置文件读取配置(不使用环境变量)
  4. """
  5. import os
  6. from .config_loader import get_config_loader
  7. from .utils.device_env import detect_device_kind
  8. # 加载配置文件
  9. _config = get_config_loader()
  10. # 设备环境:nvi(NVIDIA GPU)/ npu(华为昇腾 NPU)/ cpu,用于按环境选择 VLLM_PLUGINS、PADDLE_OCR_DEVICE 等
  11. # 优先从配置文件读取,如果未配置则自动检测
  12. _device_kind_from_config = _config.get_str("device_kind", "")
  13. DEVICE_KIND = _device_kind_from_config if _device_kind_from_config else detect_device_kind()
  14. # 默认模型配置
  15. DEFAULT_MODEL_NAME = "OpenDataLab/MinerU2.5-2509-1.2B"
  16. DEFAULT_GPU_MEMORY_UTILIZATION = _config.get_float("default_gpu_memory_utilization", 0.9)
  17. DEFAULT_DPI = _config.get_int("default_dpi", 200)
  18. DEFAULT_MAX_PAGES = _config.get_int("default_max_pages", 10)
  19. # v2 特有配置(外部API相关)
  20. DEFAULT_API_URL = _config.get_str("api_url", "http://127.0.0.1:5282")
  21. # 根据设备类型自动选择 MinerU backend(配置文件显式指定时优先)
  22. # npu -> vlm-http-client(NPU 不支持本地 vllm,需通过 http 调用外部推理服务)
  23. # nvi -> vlm-auto-engine(NVIDIA GPU 本地推理)
  24. # cpu -> pipeline(纯 CPU 通用管线)
  25. def _auto_backend() -> str:
  26. if DEVICE_KIND == "npu":
  27. return "vlm-http-client"
  28. elif DEVICE_KIND == "nvi":
  29. return "vlm-auto-engine"
  30. return "pipeline"
  31. def _auto_server_url() -> str:
  32. """server_url 仅在 http-client 类 backend 时有意义"""
  33. if DEVICE_KIND == "npu":
  34. return "http://172.17.0.1:30000"
  35. return ""
  36. _backend_from_config = _config.get_str("backend", "")
  37. DEFAULT_BACKEND = _backend_from_config if _backend_from_config else _auto_backend()
  38. _server_url_from_config = _config.get_str("server_url", "")
  39. DEFAULT_SERVER_URL = _server_url_from_config if _server_url_from_config else _auto_server_url()
  40. DEFAULT_PARSE_METHOD = _config.get_str("parse_method", "auto")
  41. DEFAULT_START_PAGE_ID = _config.get_int("start_page_id", 0)
  42. DEFAULT_END_PAGE_ID = _config.get_int("end_page_id", -1)
  43. DEFAULT_LANGUAGE = _config.get_str("language", "ch")
  44. DEFAULT_RESPONSE_FORMAT_ZIP = _config.get_bool("response_format_zip", True)
  45. DEFAULT_RETURN_MIDDLE_JSON = _config.get_bool("return_middle_json", True)
  46. DEFAULT_RETURN_MODEL_OUTPUT = _config.get_bool("return_model_output", False)
  47. DEFAULT_RETURN_MD = _config.get_bool("return_md", True)
  48. DEFAULT_RETURN_IMAGES = _config.get_bool("return_images", True)
  49. DEFAULT_RETURN_CONTENT_LIST = _config.get_bool("return_content_list", True)
  50. DEFAULT_TABLE_ENABLE = _config.get_bool("table_enable", True)
  51. DEFAULT_FORMULA_ENABLE = _config.get_bool("formula_enable", True)
  52. # PaddleOCR 配置
  53. PADDLEOCR_CMD = _config.get_str("paddleocr_cmd", "paddleocr")
  54. PADDLE_DOC_PARSER_CMD = _config.get_str("paddle_doc_parser_cmd", "paddleocr")
  55. # PaddleOCR 设备参数(留空则根据 DEVICE_KIND 自动选择)
  56. # npu -> "npu:0" | nvi -> ""(PaddlePaddle 自动使用 CUDA) | cpu -> "cpu"
  57. def _auto_paddle_device() -> str:
  58. if DEVICE_KIND == "npu":
  59. return "npu:0"
  60. elif DEVICE_KIND == "cpu":
  61. return "cpu"
  62. return "" # nvi: PaddlePaddle 自动检测 GPU
  63. _paddle_device_from_config = _config.get_str("paddle_ocr_device", "")
  64. PADDLE_OCR_DEVICE = _paddle_device_from_config if _paddle_device_from_config else _auto_paddle_device()
  65. PADDLE_OCR_DEVICES = _config.get_str("paddle_ocr_devices", "")
  66. # VL 识别后端(仅 NPU 环境需要通过 vllm-server 调用外部推理,NVIDIA 本地推理不需要)
  67. def _auto_vl_rec_backend() -> str:
  68. if DEVICE_KIND == "npu":
  69. return "vllm-server"
  70. return "" # nvi/cpu: 不使用 VL 识别后端
  71. def _auto_vl_rec_server_url() -> str:
  72. if DEVICE_KIND == "npu":
  73. return "http://172.17.0.1:8118/v1"
  74. return ""
  75. _vl_backend_from_config = _config.get_str("vl_rec_backend", "")
  76. VL_REC_BACKEND = _vl_backend_from_config if _vl_backend_from_config else _auto_vl_rec_backend()
  77. _vl_url_from_config = _config.get_str("vl_rec_server_url", "")
  78. VL_REC_SERVER_URL = _vl_url_from_config if _vl_url_from_config else _auto_vl_rec_server_url()
  79. # 日志配置(可选)
  80. LOG_DIR = _config.get_str("log_dir", "./logs")
  81. LOG_LEVEL = _config.get_str("log_level", "INFO")
  82. LOG_TO_FILE = _config.get_bool("log_to_file", True)
  83. LOG_TO_CONSOLE = _config.get_bool("log_to_console", True)