device_env.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Copyright (c) Opendatalab. All rights reserved.
  2. """
  3. 设备环境识别:区分本地是 NVIDIA GPU (nvi) 还是华为昇腾 NPU (npu)。
  4. 用于在代码中按环境设置 VLLM_PLUGINS、LD_PRELOAD、PADDLE_OCR_DEVICE 等。
  5. """
  6. import os
  7. import subprocess
  8. from typing import Literal
  9. DeviceKind = Literal["nvi", "npu", "cpu"]
  10. # 环境变量显式指定时优先使用(nvi / npu / cpu)
  11. ENV_DEVICE_KIND = "PDF_CONVERTER_DEVICE_KIND"
  12. def _nvidia_available() -> bool:
  13. """检测是否有可用 NVIDIA 环境(CUDA / nvidia-smi)。"""
  14. if os.getenv("CUDA_VISIBLE_DEVICES") is not None:
  15. # 若显式设为空字符串表示隐藏 GPU,不视为 nvi
  16. if os.getenv("CUDA_VISIBLE_DEVICES", "").strip() == "":
  17. return False
  18. return True
  19. try:
  20. r = subprocess.run(
  21. ["nvidia-smi"],
  22. capture_output=True,
  23. timeout=5,
  24. check=False,
  25. )
  26. return r.returncode == 0
  27. except (FileNotFoundError, subprocess.TimeoutExpired):
  28. return False
  29. def _npu_available() -> bool:
  30. """检测是否有华为昇腾 NPU 环境。"""
  31. if os.getenv("ASCEND_HOME"):
  32. return True
  33. if os.getenv("ASCEND_RT_VISIBLE_DEVICES") is not None:
  34. return True
  35. if os.getenv("MINERU_DEVICE_MODE", "").lower().startswith("npu"):
  36. return True
  37. try:
  38. r = subprocess.run(
  39. ["npu-smi", "info"],
  40. capture_output=True,
  41. timeout=5,
  42. check=False,
  43. )
  44. return r.returncode == 0
  45. except (FileNotFoundError, subprocess.TimeoutExpired):
  46. pass
  47. return False
  48. def detect_device_kind() -> DeviceKind:
  49. """
  50. 识别当前运行环境为 nvi(NVIDIA GPU)、npu(华为昇腾 NPU)或 cpu。
  51. 优先级:
  52. 1. 环境变量 PDF_CONVERTER_DEVICE_KIND(nvi / npu / cpu)
  53. 2. NPU 相关环境或 npu-smi 可用 -> npu
  54. 3. NVIDIA 相关环境或 nvidia-smi 可用 -> nvi
  55. 4. 否则 -> cpu
  56. Returns:
  57. "nvi" | "npu" | "cpu"
  58. """
  59. raw = os.getenv(ENV_DEVICE_KIND, "").strip().lower()
  60. if raw in ("nvi", "npu", "cpu"):
  61. return raw # type: ignore[return-value]
  62. if _npu_available():
  63. return "npu"
  64. if _nvidia_available():
  65. return "nvi"
  66. return "cpu"
  67. def is_nvidia() -> bool:
  68. """当前是否为 NVIDIA GPU 环境。"""
  69. return detect_device_kind() == "nvi"
  70. def is_npu() -> bool:
  71. """当前是否为华为昇腾 NPU 环境。"""
  72. return detect_device_kind() == "npu"
  73. def is_cpu_only() -> bool:
  74. """当前是否仅为 CPU 环境(无 nvi/npu)。"""
  75. return detect_device_kind() == "cpu"