|
|
@@ -44,6 +44,15 @@ except ImportError:
|
|
|
MINERU_LOCK_FILE = "/tmp/mineru_service_lock"
|
|
|
MINERU_COUNT_FILE = "/tmp/mineru_service_count"
|
|
|
|
|
|
+# PaddleOCR 推理设备:NPU 环境下需设为 npu 或 npu:0,否则会走 CPU 并可能段错误
|
|
|
+# 通过环境变量 PADDLE_OCR_DEVICE 指定;未设置时默认 npu:0,便于 NPU 容器内直接启动 API 时也能走 NPU
|
|
|
+def _paddle_ocr_device_args() -> list:
|
|
|
+ """返回 PaddleOCR 命令的 --device 参数列表(未设置时默认 npu:0;设为空字符串则不添加)"""
|
|
|
+ device = os.getenv("PADDLE_OCR_DEVICE", "npu:0").strip()
|
|
|
+ if device:
|
|
|
+ return ["--device", device]
|
|
|
+ return []
|
|
|
+
|
|
|
|
|
|
def _acquire_service_lock() -> Optional[object]:
|
|
|
"""获取服务操作锁(文件锁)
|
|
|
@@ -120,12 +129,37 @@ def _decrement_service_count(lock_file: object) -> int:
|
|
|
return 0
|
|
|
|
|
|
|
|
|
+def _systemd_available() -> bool:
|
|
|
+ """检测当前环境是否有 systemd(容器内通常无 systemd,无法操作 mineru-api.service)"""
|
|
|
+ # 容器内通常没有 /run/systemd/system,先做快速判断
|
|
|
+ if not os.path.exists("/run/systemd/system"):
|
|
|
+ return False
|
|
|
+ try:
|
|
|
+ r = subprocess.run(
|
|
|
+ ["systemctl", "is-system-running"],
|
|
|
+ capture_output=True,
|
|
|
+ text=True,
|
|
|
+ timeout=3,
|
|
|
+ check=False,
|
|
|
+ )
|
|
|
+ out = (r.stderr or "") + (r.stdout or "")
|
|
|
+ if "Failed to connect to bus" in out or "not been booted with systemd" in out or "Connection refused" in out:
|
|
|
+ return False
|
|
|
+ return True
|
|
|
+ except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
def stop_mineru_service() -> bool:
|
|
|
"""停止mineru-api.service以释放GPU内存(线程安全)
|
|
|
+ 容器内无 systemd 时直接返回 True,不报错。
|
|
|
|
|
|
Returns:
|
|
|
- True表示成功停止或已停止,False表示失败
|
|
|
+ True表示成功停止或已停止/无需操作,False表示失败
|
|
|
"""
|
|
|
+ if not _systemd_available():
|
|
|
+ logger.debug("[PaddleOCR] 无 systemd,跳过停止 mineru-api.service")
|
|
|
+ return True
|
|
|
lock_file = _acquire_service_lock()
|
|
|
if not lock_file:
|
|
|
# 如果无法获取锁,等待一小段时间后检查服务状态
|
|
|
@@ -191,10 +225,14 @@ def stop_mineru_service() -> bool:
|
|
|
|
|
|
def start_mineru_service() -> bool:
|
|
|
"""启动mineru-api.service(线程安全)
|
|
|
+ 容器内无 systemd 时直接返回 True,不报错。
|
|
|
|
|
|
Returns:
|
|
|
- True表示成功启动或已启动,False表示失败
|
|
|
+ True表示成功启动或已启动/无需操作,False表示失败
|
|
|
"""
|
|
|
+ if not _systemd_available():
|
|
|
+ logger.debug("[PaddleOCR] 无 systemd,跳过启动 mineru-api.service")
|
|
|
+ return True
|
|
|
lock_file = _acquire_service_lock()
|
|
|
if not lock_file:
|
|
|
# 如果无法获取锁,等待一小段时间后检查服务状态
|
|
|
@@ -250,10 +288,17 @@ def start_mineru_service() -> bool:
|
|
|
if result.returncode == 0:
|
|
|
logger.info("[PaddleOCR] 成功启动mineru-api.service")
|
|
|
return True
|
|
|
- else:
|
|
|
- logger.warning(f"[PaddleOCR] 启动mineru-api.service失败: {result.stderr}")
|
|
|
- return False
|
|
|
+ err = (result.stderr or "") + (result.stdout or "")
|
|
|
+ if "Failed to connect to bus" in err or "not been booted with systemd" in err:
|
|
|
+ logger.debug("[PaddleOCR] 无 systemd(容器环境),跳过启动 mineru-api.service")
|
|
|
+ return True
|
|
|
+ logger.warning(f"[PaddleOCR] 启动mineru-api.service失败: {result.stderr}")
|
|
|
+ return False
|
|
|
except Exception as e:
|
|
|
+ err_str = str(e)
|
|
|
+ if "Failed to connect to bus" in err_str or "not been booted" in err_str:
|
|
|
+ logger.debug("[PaddleOCR] 无 systemd(容器环境),跳过启动 mineru-api.service")
|
|
|
+ return True
|
|
|
logger.warning(f"[PaddleOCR] 启动mineru-api.service时出错: {e}")
|
|
|
return False
|
|
|
finally:
|
|
|
@@ -504,7 +549,7 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
|
|
|
image_basename = os.path.splitext(os.path.basename(image_path))[0]
|
|
|
save_path_base = os.path.join(image_dir, image_basename)
|
|
|
|
|
|
- # 构建paddleocr命令,添加所有参数
|
|
|
+ # 构建paddleocr命令,添加所有参数(NPU 下需加 --device npu:0,否则走 CPU 易段错误)
|
|
|
# PaddleOCR会在save_path下创建目录,文件保存在该目录内
|
|
|
cmd = [
|
|
|
"paddleocr", "doc_parser", "-i", image_path,
|
|
|
@@ -513,7 +558,7 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
|
|
|
"--use_doc_orientation_classify", "True",
|
|
|
"--use_chart_recognition", "True",
|
|
|
"--save_path", save_path_base
|
|
|
- ]
|
|
|
+ ] + _paddle_ocr_device_args()
|
|
|
|
|
|
# 设置环境变量,限制GPU内存使用
|
|
|
# env = os.environ.copy()
|
|
|
@@ -847,8 +892,8 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
|
|
|
logger.error(f"[PaddleOCR OCR] 图片文件不存在: {image_path}")
|
|
|
return None, None
|
|
|
|
|
|
- # 构建paddleocr ocr命令
|
|
|
- cmd = ["paddleocr", "ocr", "-i", image_path, "--save_path", save_path]
|
|
|
+ # 构建paddleocr ocr命令(NPU 下需加 --device npu:0,否则走 CPU 易段错误)
|
|
|
+ cmd = ["paddleocr", "ocr", "-i", image_path, "--save_path", save_path] + _paddle_ocr_device_args()
|
|
|
|
|
|
logger.info(f"[PaddleOCR OCR] 执行命令: {' '.join(cmd)}")
|
|
|
|
|
|
@@ -942,7 +987,7 @@ def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple
|
|
|
save_path_base = os.path.join(save_path, image_basename)
|
|
|
os.makedirs(save_path_base, exist_ok=True)
|
|
|
|
|
|
- # 构建paddleocr doc_parser命令
|
|
|
+ # 构建paddleocr doc_parser命令(NPU 下需加 --device npu:0,否则走 CPU 易段错误)
|
|
|
cmd = [
|
|
|
"paddleocr", "doc_parser", "-i", image_path,
|
|
|
"--precision", "fp32",
|
|
|
@@ -950,7 +995,7 @@ def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple
|
|
|
"--use_doc_orientation_classify", "True",
|
|
|
"--use_chart_recognition", "True",
|
|
|
"--save_path", save_path_base
|
|
|
- ]
|
|
|
+ ] + _paddle_ocr_device_args()
|
|
|
|
|
|
logger.info(f"[PaddleOCR DocParser] 执行命令: {' '.join(cmd)}")
|
|
|
|