Browse Source

pdf_converter_v2: 移除停止 mineru-api.service 的逻辑及开关

何文松 3 weeks ago
parent
commit
14d0f42f6d
2 changed files with 0 additions and 243 deletions
  1. 0 2
      pdf_converter_v2/README.md
  2. 0 241
      pdf_converter_v2/utils/paddleocr_fallback.py

+ 0 - 2
pdf_converter_v2/README.md

@@ -300,7 +300,6 @@ sudo journalctl -u pdf-converter-v2 -f
 - `API_PORT`: 服务监听端口(默认: 4214)
 - `LOG_LEVEL`: 日志级别(默认: info)
 - `PDF_CONVERTER_LOG_DIR`: 日志目录(默认: ./logs)
-- `MINERU_RELEASE_BEFORE_PADDLE_OCR`: 调用 PaddleOCR 前是否释放 MinerU 服务(停止 mineru-api.service 以腾出显存)。设为 `false`/`0`/`no` 时不释放,MinerU 保持运行(默认: true)
 
 ## 注意事项
 
@@ -446,7 +445,6 @@ pdf_converter_v2 API 默认只连一个 MinerU 地址(如 `API_URL=http://127.
 | 组件 | 环境变量 | 示例 |
 |------|----------|------|
 | MinerU | `MINERU_DEVICE_MODE` | `npu`、`npu:0`、`npu:1` |
-| MinerU 释放开关 | `MINERU_RELEASE_BEFORE_PADDLE_OCR` | `true`(默认,PaddleOCR 前释放 MinerU)/ `false`(不释放) |
 | PaddleOCR | `PADDLE_OCR_DEVICE` | `npu:0`、`npu:1` |
 | 昇腾可见卡 | `ASCEND_RT_VISIBLE_DEVICES` | `0`、`1,2`(物理卡号) |
 

+ 0 - 241
pdf_converter_v2/utils/paddleocr_fallback.py

@@ -9,7 +9,6 @@ import sys
 import tempfile
 import time
 import random
-import fcntl
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple
 import ast
@@ -41,17 +40,6 @@ except ImportError:
     logger.warning("[PaddleOCR备用] PIL未安装,无法处理图片")
 
 
-# 用于管理mineru服务状态的锁文件路径
-MINERU_LOCK_FILE = "/tmp/mineru_service_lock"
-MINERU_COUNT_FILE = "/tmp/mineru_service_count"
-
-
-def _should_release_mineru_for_paddle() -> bool:
-    """是否在调用 PaddleOCR 前释放 MinerU 服务(停止 mineru-api.service 以腾出显存)。
-    可通过环境变量 MINERU_RELEASE_BEFORE_PADDLE_OCR 控制:设为 false/0/no 时不释放,MinerU 保持运行。"""
-    v = os.getenv("MINERU_RELEASE_BEFORE_PADDLE_OCR", "true").strip().lower()
-    return v not in ("false", "0", "no")
-
 def _get_paddleocr_executable() -> str:
     """返回 paddleocr 可执行文件路径或命令名,供 subprocess 使用。
     当以 systemd 等方式运行时 PATH 可能不包含 venv/bin,故优先使用当前 Python 同目录下的 paddleocr。
@@ -81,221 +69,6 @@ def _paddle_ocr_device_args() -> list:
     return []
 
 
-def _acquire_service_lock() -> Optional[object]:
-    """获取服务操作锁(文件锁)
-    
-    Returns:
-        文件对象(用于释放锁),如果失败返回None
-    """
-    try:
-        lock_file = open(MINERU_LOCK_FILE, 'w')
-        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-        return lock_file
-    except (IOError, OSError) as e:
-        logger.debug(f"[PaddleOCR] 获取服务锁失败(可能其他进程正在操作): {e}")
-        return None
-
-
-def _release_service_lock(lock_file: object) -> None:
-    """释放服务操作锁
-    
-    Args:
-        lock_file: 锁文件对象
-    """
-    try:
-        if lock_file:
-            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
-            lock_file.close()
-    except Exception as e:
-        logger.warning(f"[PaddleOCR] 释放服务锁失败: {e}")
-
-
-def _increment_service_count(lock_file: object) -> int:
-    """增加服务使用计数(需要在锁保护下调用)
-    
-    Args:
-        lock_file: 已获取的锁文件对象
-    
-    Returns:
-        当前计数
-    """
-    try:
-        count = 0
-        if os.path.exists(MINERU_COUNT_FILE):
-            with open(MINERU_COUNT_FILE, 'r') as f:
-                count = int(f.read().strip() or '0')
-        count += 1
-        with open(MINERU_COUNT_FILE, 'w') as f:
-            f.write(str(count))
-        return count
-    except Exception as e:
-        logger.warning(f"[PaddleOCR] 增加服务计数失败: {e}")
-        return 1
-
-
-def _decrement_service_count(lock_file: object) -> int:
-    """减少服务使用计数(需要在锁保护下调用)
-    
-    Args:
-        lock_file: 已获取的锁文件对象
-    
-    Returns:
-        当前计数
-    """
-    try:
-        count = 0
-        if os.path.exists(MINERU_COUNT_FILE):
-            with open(MINERU_COUNT_FILE, 'r') as f:
-                count = int(f.read().strip() or '0')
-        count = max(0, count - 1)
-        with open(MINERU_COUNT_FILE, 'w') as f:
-            f.write(str(count))
-        return count
-    except Exception as e:
-        logger.warning(f"[PaddleOCR] 减少服务计数失败: {e}")
-        return 0
-
-
-def stop_mineru_service() -> bool:
-    """停止mineru-api.service以释放GPU内存(线程安全)
-    
-    Returns:
-        True表示成功停止或已停止,False表示失败
-    """
-    lock_file = _acquire_service_lock()
-    if not lock_file:
-        # 如果无法获取锁,等待一小段时间后检查服务状态
-        time.sleep(0.5)
-        try:
-            result = subprocess.run(
-                ["systemctl", "is-active", "mineru-api.service"],
-                capture_output=True,
-                text=True,
-                timeout=5,
-                check=False
-            )
-            if result.returncode != 0 or result.stdout.strip() != "active":
-                # 服务已经停止
-                logger.debug("[PaddleOCR] 服务已停止(其他进程已处理)")
-                return True
-        except Exception:
-            pass
-        return False
-    
-    try:
-        # 检查服务当前状态
-        result = subprocess.run(
-            ["systemctl", "is-active", "mineru-api.service"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-            check=False
-        )
-        is_active = result.returncode == 0 and result.stdout.strip() == "active"
-        
-        if not is_active:
-            logger.debug("[PaddleOCR] mineru-api.service已经停止")
-            return True
-        
-        # 增加使用计数(在锁保护下)
-        count = _increment_service_count(lock_file)
-        logger.debug(f"[PaddleOCR] 服务使用计数: {count}")
-        
-        # 停止服务
-        result = subprocess.run(
-            ["systemctl", "stop", "mineru-api.service"],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            check=False
-        )
-        if result.returncode == 0:
-            logger.info("[PaddleOCR] 成功停止mineru-api.service以释放GPU内存")
-            return True
-        else:
-            logger.warning(f"[PaddleOCR] 停止mineru-api.service失败: {result.stderr}")
-            _decrement_service_count(lock_file)  # 回滚计数
-            return False
-    except Exception as e:
-        logger.warning(f"[PaddleOCR] 停止mineru-api.service时出错: {e}")
-        if lock_file:
-            _decrement_service_count(lock_file)  # 回滚计数
-        return False
-    finally:
-        _release_service_lock(lock_file)
-
-
-def start_mineru_service() -> bool:
-    """启动mineru-api.service(线程安全)
-    
-    Returns:
-        True表示成功启动或已启动,False表示失败
-    """
-    lock_file = _acquire_service_lock()
-    if not lock_file:
-        # 如果无法获取锁,等待一小段时间后检查服务状态
-        time.sleep(0.5)
-        try:
-            result = subprocess.run(
-                ["systemctl", "is-active", "mineru-api.service"],
-                capture_output=True,
-                text=True,
-                timeout=5,
-                check=False
-            )
-            if result.returncode == 0 and result.stdout.strip() == "active":
-                # 服务已经启动
-                logger.debug("[PaddleOCR] 服务已启动(其他进程已处理)")
-                return True
-        except Exception:
-            pass
-        return False
-    
-    try:
-        # 减少使用计数(在锁保护下)
-        count = _decrement_service_count(lock_file)
-        logger.debug(f"[PaddleOCR] 服务使用计数: {count}")
-        
-        # 如果还有其他进程在使用,不启动服务
-        if count > 0:
-            logger.info(f"[PaddleOCR] 还有其他进程在使用GPU(计数={count}),暂不启动mineru-api.service")
-            return True
-        
-        # 检查服务当前状态
-        result = subprocess.run(
-            ["systemctl", "is-active", "mineru-api.service"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-            check=False
-        )
-        is_active = result.returncode == 0 and result.stdout.strip() == "active"
-        
-        if is_active:
-            logger.debug("[PaddleOCR] mineru-api.service已经启动")
-            return True
-        
-        # 启动服务
-        result = subprocess.run(
-            ["systemctl", "start", "mineru-api.service"],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            check=False
-        )
-        if result.returncode == 0:
-            logger.info("[PaddleOCR] 成功启动mineru-api.service")
-            return True
-        else:
-            logger.warning(f"[PaddleOCR] 启动mineru-api.service失败: {result.stderr}")
-            return False
-    except Exception as e:
-        logger.warning(f"[PaddleOCR] 启动mineru-api.service时出错: {e}")
-        return False
-    finally:
-        _release_service_lock(lock_file)
-
-
 def detect_file_type(file_path: str) -> Optional[str]:
     """通过文件内容(魔数)检测文件类型,不依赖扩展名
     
@@ -526,9 +299,6 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
     Returns:
         paddleocr解析结果,如果失败返回None
     """
-    # 在调用PaddleOCR前按配置决定是否停止mineru服务以释放GPU内存
-    mineru_stopped = stop_mineru_service() if _should_release_mineru_for_paddle() else False
-
     try:
         # 检查图片文件是否存在
         if not os.path.exists(image_path):
@@ -613,10 +383,6 @@ def call_paddleocr(image_path: str) -> Optional[Dict[str, Any]]:
     except Exception as e:
         logger.exception(f"[PaddleOCR] 调用失败: {e}")
         return None
-    finally:
-        # 无论成功或失败,都尝试重启mineru服务
-        if mineru_stopped:
-            start_mineru_service()
 
 
 def extract_first_page_from_pdf(pdf_path: str, output_dir: str) -> Optional[str]:
@@ -875,9 +641,6 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
     Returns:
         (OCR识别的文本列表, JSON文件路径),如果失败返回(None, None)
     """
-    # 在调用PaddleOCR前按配置决定是否停止mineru服务以释放GPU内存
-    mineru_stopped = stop_mineru_service() if _should_release_mineru_for_paddle() else False
-
     try:
         if not os.path.exists(image_path):
             logger.error(f"[PaddleOCR OCR] 图片文件不存在: {image_path}")
@@ -951,10 +714,6 @@ def call_paddleocr_ocr(image_path: str, save_path: str) -> tuple[Optional[List[s
     except Exception as e:
         logger.exception(f"[PaddleOCR OCR] 调用失败: {e}")
         return None, None
-    finally:
-        # 无论成功或失败,都尝试重启mineru服务
-        if mineru_stopped:
-            start_mineru_service()
 
 
 def call_paddleocr_doc_parser_for_text(image_path: str, save_path: str) -> tuple[Optional[List[str]], Optional[str]]: