Prechádzať zdrojové kódy

feat: 改用异步任务+轮询模式替代 SSE 流式

Python NER 服务:
- 新增 POST /ner/extract/async 提交异步任务
- 新增 GET /ner/task/{task_id} 查询任务状态和进度
- 新增 DELETE /ner/task/{task_id} 删除已完成任务
- 任务状态:pending/processing/completed/failed

Java 后端:
- 使用轮询模式替代 SSE 流式
- 每 3 秒轮询一次任务状态
- 进度变化时打印日志
- 任务完成后自动删除释放内存
- 失败时自动回退到同步 API
何文松 1 mesiac pred
rodič
commit
79f48028a2

+ 100 - 131
backend/graph-service/src/main/java/com/lingyue/graph/listener/DocumentParsedEventListener.java

@@ -1,6 +1,5 @@
 package com.lingyue.graph.listener;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
 import com.lingyue.common.event.DocumentParsedEvent;
 import com.lingyue.graph.service.GraphNerService;
 import lombok.RequiredArgsConstructor;
@@ -11,11 +10,8 @@ import org.springframework.http.*;
 import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Component;
 import org.springframework.web.client.RestTemplate;
-import org.springframework.web.reactive.function.client.WebClient;
 
-import java.time.Duration;
 import java.util.*;
-import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * 文档解析完成事件监听器
@@ -31,7 +27,6 @@ public class DocumentParsedEventListener {
 
     private final GraphNerService graphNerService;
     private final RestTemplate restTemplate;
-    private final ObjectMapper objectMapper;
 
     @Value("${ner.auto-extract.enabled:true}")
     private boolean nerAutoExtractEnabled;
@@ -39,8 +34,14 @@ public class DocumentParsedEventListener {
     @Value("${ner.python-service.url:http://localhost:8001}")
     private String nerServiceUrl;
     
-    @Value("${ner.python-service.use-stream:true}")
-    private boolean useStreamApi;
+    @Value("${ner.python-service.use-async:true}")
+    private boolean useAsyncApi;
+    
+    @Value("${ner.python-service.poll-interval:3000}")
+    private long pollInterval;  // 轮询间隔(毫秒)
+    
+    @Value("${ner.python-service.max-wait-time:600000}")
+    private long maxWaitTime;  // 最大等待时间(毫秒)
 
     /**
      * 处理文档解析完成事件
@@ -74,10 +75,10 @@ public class DocumentParsedEventListener {
                 return;
             }
 
-            // 2. 调用 Python NER 服务(根据配置选择流式或普通 API)
+            // 2. 调用 Python NER 服务(根据配置选择异步轮询或同步 API)
             Map<String, Object> nerResponse;
-            if (useStreamApi) {
-                nerResponse = callPythonNerServiceWithStream(documentId, text, userId);
+            if (useAsyncApi) {
+                nerResponse = callPythonNerServiceAsync(documentId, text, userId);
             } else {
                 nerResponse = callPythonNerService(documentId, text, userId);
             }
@@ -113,7 +114,7 @@ public class DocumentParsedEventListener {
     }
 
     /**
-     * 调用 Python NER 服务(普通 REST API)
+     * 调用 Python NER 服务(同步 REST API)
      */
     private Map<String, Object> callPythonNerService(String documentId, String text, String userId) {
         try {
@@ -147,146 +148,114 @@ public class DocumentParsedEventListener {
     }
     
     /**
-     * 调用 Python NER 服务(SSE 流式 API,带进度反馈)
+     * 调用 Python NER 服务(异步 + 轮询模式)
+     * 
+     * 流程:
+     * 1. 提交异步任务,立即获得 task_id
+     * 2. 定期轮询任务状态
+     * 3. 任务完成后获取结果
      */
-    private Map<String, Object> callPythonNerServiceWithStream(String documentId, String text, String userId) {
+    private Map<String, Object> callPythonNerServiceAsync(String documentId, String text, String userId) {
         try {
+            // 1. 提交异步任务
+            String submitUrl = nerServiceUrl + "/ner/extract/async";
+            
             Map<String, Object> request = new HashMap<>();
             request.put("documentId", documentId);
             request.put("text", text);
             request.put("userId", userId);
             request.put("extractRelations", true);
             
-            // 使用 WebClient 处理 SSE
-            WebClient webClient = WebClient.builder()
-                    .baseUrl(nerServiceUrl)
-                    .codecs(configurer -> configurer.defaultCodecs().maxInMemorySize(10 * 1024 * 1024))  // 10MB
-                    .build();
-            
-            AtomicReference<Map<String, Object>> resultRef = new AtomicReference<>();
-            StringBuilder buffer = new StringBuilder();
-            
-            log.info("开始 SSE 流式 NER 请求: documentId={}", documentId);
+            HttpHeaders headers = new HttpHeaders();
+            headers.setContentType(MediaType.APPLICATION_JSON);
             
-            webClient.post()
-                    .uri("/ner/extract/stream")
-                    .contentType(MediaType.APPLICATION_JSON)
-                    .accept(MediaType.TEXT_EVENT_STREAM)
-                    .bodyValue(request)
-                    .retrieve()
-                    .bodyToFlux(org.springframework.core.io.buffer.DataBuffer.class)
-                    .timeout(Duration.ofMinutes(10))  // 10 分钟总超时
-                    .doOnNext(dataBuffer -> {
-                        // 从 DataBuffer 读取字符串
-                        byte[] bytes = new byte[dataBuffer.readableByteCount()];
-                        dataBuffer.read(bytes);
-                        org.springframework.core.io.buffer.DataBufferUtils.release(dataBuffer);
-                        String chunk = new String(bytes, java.nio.charset.StandardCharsets.UTF_8);
-                        
-                        log.debug("收到 SSE 数据块: length={}", chunk.length());
-                        
-                        // 累积数据到缓冲区
-                        buffer.append(chunk);
-                        
-                        // 处理完整的 SSE 事件(以双换行符分隔)
-                        String bufferStr = buffer.toString();
-                        int eventEnd;
-                        while ((eventEnd = bufferStr.indexOf("\n\n")) != -1) {
-                            String eventBlock = bufferStr.substring(0, eventEnd);
-                            bufferStr = bufferStr.substring(eventEnd + 2);
-                            buffer.setLength(0);
-                            buffer.append(bufferStr);
-                            
-                            // 解析单个 SSE 事件
-                            parseSseEvent(eventBlock, documentId, resultRef);
-                        }
-                    })
-                    .doOnComplete(() -> log.info("SSE 流完成: documentId={}", documentId))
-                    .doOnError(e -> log.error("SSE 流处理错误: documentId={}, error={}", documentId, e.getMessage(), e))
-                    .blockLast();  // 阻塞等待完成
+            HttpEntity<Map<String, Object>> entity = new HttpEntity<>(request, headers);
             
-            // 处理缓冲区中剩余的数据
-            if (buffer.length() > 0) {
-                log.debug("处理剩余缓冲区数据: length={}", buffer.length());
-                parseSseEvent(buffer.toString(), documentId, resultRef);
-            }
+            @SuppressWarnings("unchecked")
+            ResponseEntity<Map<String, Object>> submitResponse = restTemplate.exchange(
+                    submitUrl, HttpMethod.POST, entity, 
+                    (Class<Map<String, Object>>) (Class<?>) Map.class);
             
-            Map<String, Object> result = resultRef.get();
-            if (result == null) {
-                log.warn("SSE 处理完成但未获取到结果,回退到普通 API: documentId={}", documentId);
-                return callPythonNerService(documentId, text, userId);
+            if (!submitResponse.getStatusCode().is2xxSuccessful() || submitResponse.getBody() == null) {
+                log.error("提交异步 NER 任务失败: documentId={}", documentId);
+                return null;
             }
             
-            return result;
-            
-        } catch (Exception e) {
-            log.error("调用 Python NER SSE 服务失败: documentId={}, error={}", documentId, e.getMessage(), e);
-            // 回退到普通 API
-            log.info("回退到普通 NER API: documentId={}", documentId);
-            return callPythonNerService(documentId, text, userId);
-        }
-    }
-    
-    /**
-     * 解析单个 SSE 事件
-     */
-    private void parseSseEvent(String eventBlock, String documentId, AtomicReference<Map<String, Object>> resultRef) {
-        try {
-            if (eventBlock == null || eventBlock.trim().isEmpty()) {
-                return;
-            }
+            String taskId = (String) submitResponse.getBody().get("task_id");
+            log.info("异步 NER 任务已提交: documentId={}, taskId={}", documentId, taskId);
             
-            String eventType = null;
-            String eventData = null;
+            // 2. 轮询任务状态
+            String statusUrl = nerServiceUrl + "/ner/task/" + taskId;
+            long startTime = System.currentTimeMillis();
+            int lastProgress = -1;
             
-            // 解析事件块
-            for (String line : eventBlock.split("\n")) {
-                line = line.trim();
-                if (line.startsWith("event:")) {
-                    eventType = line.substring(6).trim();
-                } else if (line.startsWith("data:")) {
-                    eventData = line.substring(5).trim();
+            while (System.currentTimeMillis() - startTime < maxWaitTime) {
+                try {
+                    Thread.sleep(pollInterval);
+                } catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                    log.warn("轮询被中断: taskId={}", taskId);
+                    break;
                 }
-            }
-            
-            log.debug("解析 SSE 事件: type={}, dataLength={}", eventType, eventData != null ? eventData.length() : 0);
-            
-            if (eventData == null || eventData.isEmpty()) {
-                log.debug("SSE 事件数据为空,跳过: type={}", eventType);
-                return;
-            }
-            
-            @SuppressWarnings("unchecked")
-            Map<String, Object> data = objectMapper.readValue(eventData, Map.class);
-            
-            // 根据事件类型处理
-            if ("start".equals(eventType)) {
-                log.info("NER 开始处理: documentId={}", documentId);
-            } else if ("progress".equals(eventType) || "chunk_complete".equals(eventType)) {
-                Object progressObj = data.get("progress_percent");
-                if (progressObj != null) {
-                    int progress = progressObj instanceof Integer ? (Integer) progressObj : ((Number) progressObj).intValue();
-                    String message = (String) data.getOrDefault("message", "处理中...");
-                    log.info("NER 进度: documentId={}, progress={}%, message={}", documentId, progress, message);
+                
+                try {
+                    @SuppressWarnings("unchecked")
+                    ResponseEntity<Map<String, Object>> statusResponse = restTemplate.exchange(
+                            statusUrl, HttpMethod.GET, null,
+                            (Class<Map<String, Object>>) (Class<?>) Map.class);
+                    
+                    if (!statusResponse.getStatusCode().is2xxSuccessful() || statusResponse.getBody() == null) {
+                        log.warn("查询任务状态失败: taskId={}", taskId);
+                        continue;
+                    }
+                    
+                    Map<String, Object> taskStatus = statusResponse.getBody();
+                    String status = (String) taskStatus.get("status");
+                    int progress = taskStatus.get("progress") != null ? 
+                            ((Number) taskStatus.get("progress")).intValue() : 0;
+                    String message = (String) taskStatus.get("message");
+                    
+                    // 只在进度变化时打印日志
+                    if (progress != lastProgress) {
+                        log.info("NER 进度: documentId={}, taskId={}, status={}, progress={}%, message={}", 
+                                documentId, taskId, status, progress, message);
+                        lastProgress = progress;
+                    }
+                    
+                    // 检查任务是否完成
+                    if ("completed".equals(status)) {
+                        @SuppressWarnings("unchecked")
+                        Map<String, Object> result = (Map<String, Object>) taskStatus.get("result");
+                        log.info("异步 NER 任务完成: documentId={}, taskId={}", documentId, taskId);
+                        
+                        // 删除任务(释放服务端内存)
+                        try {
+                            restTemplate.delete(statusUrl);
+                        } catch (Exception e) {
+                            log.debug("删除任务失败(可忽略): taskId={}", taskId);
+                        }
+                        
+                        return result;
+                    } else if ("failed".equals(status)) {
+                        String error = (String) taskStatus.get("error");
+                        log.error("异步 NER 任务失败: documentId={}, taskId={}, error={}", documentId, taskId, error);
+                        return null;
+                    }
+                    
+                } catch (Exception e) {
+                    log.warn("轮询任务状态异常: taskId={}, error={}", taskId, e.getMessage());
                 }
-            } else if ("entities_data".equals(eventType)) {
-                log.debug("收到 entities_data 事件,实体数: {}", data.get("total_entities"));
-            } else if ("complete".equals(eventType)) {
-                // 完成事件,包含最终结果
-                resultRef.set(data);
-                int entityCount = data.get("entities") != null ? ((List<?>) data.get("entities")).size() : 0;
-                log.info("NER 流式处理完成: documentId={}, entities={}, success={}", 
-                        documentId, entityCount, data.get("success"));
-            } else if ("error".equals(eventType)) {
-                log.error("NER 服务返回错误: documentId={}, error={}", documentId, data.get("error"));
-            } else {
-                log.debug("未知 SSE 事件类型: {}", eventType);
             }
             
+            log.error("异步 NER 任务超时: documentId={}, taskId={}, maxWaitTime={}ms", 
+                    documentId, taskId, maxWaitTime);
+            return null;
+            
         } catch (Exception e) {
-            log.warn("解析 SSE 事件时出错: eventBlock={}, error={}", 
-                    eventBlock.length() > 200 ? eventBlock.substring(0, 200) + "..." : eventBlock, 
-                    e.getMessage());
+            log.error("调用异步 NER 服务失败: documentId={}, error={}", documentId, e.getMessage(), e);
+            // 回退到同步 API
+            log.info("回退到同步 NER API: documentId={}", documentId);
+            return callPythonNerService(documentId, text, userId);
         }
     }
 }

+ 6 - 2
backend/lingyue-starter/src/main/resources/application.properties

@@ -129,8 +129,12 @@ ner.python-service.timeout=300000
 ner.python-service.connect-timeout=5000
 ner.python-service.max-retries=3
 ner.python-service.retry-interval=1000
-# 是否使用 SSE 流式 API(实时进度反馈)
-ner.python-service.use-stream=true
+# 是否使用异步轮询 API(推荐开启,可实时查看进度)
+ner.python-service.use-async=true
+# 轮询间隔(毫秒)
+ner.python-service.poll-interval=3000
+# 最大等待时间(毫秒),10 分钟
+ner.python-service.max-wait-time=600000
 
 # NER 实体类型配置
 ner.entity-types=PERSON,ORG,LOC,DATE,NUMBER,DEVICE,PROJECT,TERM

+ 197 - 117
python-services/ner-service/app/routers/ner.py

@@ -3,16 +3,36 @@ NER 路由
 """
 import json
 import asyncio
-from fastapi import APIRouter, HTTPException
+import uuid
+from typing import Dict, Any, Optional
+from fastapi import APIRouter, HTTPException, BackgroundTasks
 from fastapi.responses import StreamingResponse
 from loguru import logger
 import time
+from pydantic import BaseModel
 
 from ..models import NerRequest, NerResponse, EntityInfo
 from ..services.ner_service import ner_service
 
 router = APIRouter()
 
+# ============== 任务存储 ==============
+# 存储异步任务状态和结果
+_tasks: Dict[str, Dict[str, Any]] = {}
+
+
+class TaskStatus(BaseModel):
+    """任务状态响应"""
+    task_id: str
+    document_id: str
+    status: str  # pending, processing, completed, failed
+    progress: int = 0  # 0-100
+    message: str = ""
+    result: Optional[Dict] = None
+    error: Optional[str] = None
+    created_at: float = 0
+    updated_at: float = 0
+
 
 async def sse_event(event: str, data: dict):
     """生成 SSE 事件格式"""
@@ -82,124 +102,184 @@ async def extract_entities(request: NerRequest):
         )
 
 
-@router.post("/extract/stream")
-async def extract_entities_stream(request: NerRequest):
+# ============== 异步任务接口(轮询模式) ==============
+
+@router.post("/extract/async")
+async def extract_entities_async(request: NerRequest, background_tasks: BackgroundTasks):
     """
-    从文本中提取命名实体(SSE 流式响应)
+    异步提取命名实体,立即返回任务 ID
     
-    实时推送进度事件:
-    - progress: 处理进度(分块处理时)
-    - entity: 发现新实体
-    - complete: 处理完成
-    - error: 处理出错
+    使用方式:
+    1. 调用此接口,获取 task_id
+    2. 轮询 /ner/task/{task_id} 查询进度和结果
     """
-    async def generate():
-        start_time = time.time()
-        
-        try:
-            # 发送开始事件
-            yield await sse_event("start", {
-                "document_id": request.document_id,
-                "text_length": len(request.text),
-                "message": "开始 NER 提取"
-            })
-            
-            # 验证文本长度
-            if len(request.text) > 50000:
-                yield await sse_event("error", {
-                    "document_id": request.document_id,
-                    "error": "文本长度超过限制(最大50000字符)"
-                })
-                return
-            
-            # 使用带进度回调的提取方法
-            all_entities = []
-            all_relations = []
-            
-            # 调用带进度的 NER 服务
-            from ..services.ner_service import ner_service
-            
-            # 检查是否支持流式提取
-            if hasattr(ner_service, 'extract_entities_with_progress'):
-                async for event_str in ner_service.extract_entities_with_progress(
-                    text=request.text,
-                    entity_types=request.entity_types
-                ):
-                    # 转发进度事件
-                    yield event_str
-                    
-                    # 解析事件获取实体数据
-                    if "entities_data" in event_str:
-                        try:
-                            # 从 SSE 格式中提取 JSON 数据
-                            lines = event_str.strip().split('\n')
-                            for line in lines:
-                                if line.startswith('data:'):
-                                    data = json.loads(line[5:].strip())
-                                    if 'entities' in data:
-                                        # 将字典转换回 EntityInfo 对象
-                                        all_entities = [
-                                            EntityInfo(**e) for e in data['entities']
-                                        ]
-                        except Exception as parse_err:
-                            logger.warning(f"解析实体数据事件失败: {parse_err}")
-            else:
-                # 回退到普通提取
-                all_entities = await ner_service.extract_entities(
-                    text=request.text,
-                    entity_types=request.entity_types
-                )
-                yield await sse_event("progress", {
-                    "document_id": request.document_id,
-                    "chunk_index": 1,
-                    "total_chunks": 1,
-                    "total_entities": len(all_entities),
-                    "progress_percent": 100
-                })
-            
-            # 提取关系
-            if request.extract_relations and len(all_entities) > 1:
-                yield await sse_event("progress", {
-                    "document_id": request.document_id,
-                    "message": "正在提取实体关系...",
-                    "stage": "relations"
-                })
-                
-                from ..services.relation_service import relation_service
-                all_relations = await relation_service.extract_relations(
-                    text=request.text,
-                    entities=all_entities
-                )
-            
-            processing_time = int((time.time() - start_time) * 1000)
-            
-            # 发送完成事件(包含完整结果)
-            response = NerResponse.success_response(
-                document_id=request.document_id,
-                entities=all_entities,
-                relations=all_relations,
-                processing_time=processing_time
-            )
-            
-            yield await sse_event("complete", response.dict())
-            
-            logger.info(f"SSE 提取完成: document_id={request.document_id}, "
-                       f"entity_count={len(all_entities)}, relation_count={len(all_relations)}, "
-                       f"processing_time={processing_time}ms")
-            
-        except Exception as e:
-            logger.error(f"SSE 提取失败: document_id={request.document_id}, error={str(e)}")
-            yield await sse_event("error", {
-                "document_id": request.document_id,
-                "error": str(e)
-            })
+    task_id = str(uuid.uuid4())
+    now = time.time()
     
-    return StreamingResponse(
-        generate(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no"
-        }
+    # 初始化任务状态
+    _tasks[task_id] = {
+        "task_id": task_id,
+        "document_id": request.document_id,
+        "status": "pending",
+        "progress": 0,
+        "message": "任务已创建,等待处理",
+        "result": None,
+        "error": None,
+        "created_at": now,
+        "updated_at": now
+    }
+    
+    # 启动后台任务
+    background_tasks.add_task(
+        _process_ner_task,
+        task_id,
+        request.document_id,
+        request.text,
+        request.entity_types,
+        request.extract_relations
     )
+    
+    logger.info(f"创建异步 NER 任务: task_id={task_id}, document_id={request.document_id}")
+    
+    return {
+        "task_id": task_id,
+        "document_id": request.document_id,
+        "status": "pending",
+        "message": "任务已创建,请轮询 /ner/task/{task_id} 获取进度"
+    }
+
+
+@router.get("/task/{task_id}", response_model=TaskStatus)
+async def get_task_status(task_id: str):
+    """
+    查询异步任务状态和结果
+    
+    状态说明:
+    - pending: 等待处理
+    - processing: 正在处理(progress 字段表示进度 0-100)
+    - completed: 处理完成(result 字段包含结果)
+    - failed: 处理失败(error 字段包含错误信息)
+    """
+    if task_id not in _tasks:
+        raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}")
+    
+    task = _tasks[task_id]
+    return TaskStatus(**task)
+
+
+@router.delete("/task/{task_id}")
+async def delete_task(task_id: str):
+    """删除已完成的任务(释放内存)"""
+    if task_id not in _tasks:
+        raise HTTPException(status_code=404, detail=f"任务不存在: {task_id}")
+    
+    del _tasks[task_id]
+    return {"message": f"任务已删除: {task_id}"}
+
+
+async def _process_ner_task(
+    task_id: str,
+    document_id: str,
+    text: str,
+    entity_types: list,
+    extract_relations: bool
+):
+    """后台处理 NER 任务"""
+    start_time = time.time()
+    
+    def update_progress(progress: int, message: str):
+        """更新任务进度"""
+        if task_id in _tasks:
+            _tasks[task_id]["status"] = "processing"
+            _tasks[task_id]["progress"] = progress
+            _tasks[task_id]["message"] = message
+            _tasks[task_id]["updated_at"] = time.time()
+    
+    try:
+        update_progress(0, "开始处理")
+        
+        # 验证文本长度
+        if len(text) > 50000:
+            raise ValueError("文本长度超过限制(最大50000字符)")
+        
+        # 使用带进度回调的提取(如果支持)
+        all_entities = []
+        
+        if hasattr(ner_service, 'extract_entities_with_progress'):
+            chunk_count = 0
+            total_chunks = 1
+            
+            async for event_str in ner_service.extract_entities_with_progress(
+                text=text,
+                entity_types=entity_types
+            ):
+                # 解析进度事件
+                try:
+                    lines = event_str.strip().split('\n')
+                    for line in lines:
+                        if line.startswith('data:'):
+                            data = json.loads(line[5:].strip())
+                            
+                            # 更新进度
+                            if 'progress_percent' in data:
+                                progress = min(data['progress_percent'], 90)  # 预留 10% 给关系提取
+                                message = data.get('message', f"处理中 {progress}%")
+                                update_progress(progress, message)
+                            
+                            if 'total_chunks' in data:
+                                total_chunks = data['total_chunks']
+                            if 'chunk_index' in data:
+                                chunk_count = data['chunk_index']
+                            
+                            # 获取实体数据
+                            if 'entities' in data:
+                                all_entities = [EntityInfo(**e) for e in data['entities']]
+                except:
+                    pass
+        else:
+            update_progress(10, "正在提取实体...")
+            all_entities = await ner_service.extract_entities(
+                text=text,
+                entity_types=entity_types
+            )
+        
+        update_progress(90, f"实体提取完成,共 {len(all_entities)} 个")
+        
+        # 提取关系
+        all_relations = []
+        if extract_relations and len(all_entities) > 1:
+            update_progress(92, "正在提取实体关系...")
+            from ..services.relation_service import relation_service
+            all_relations = await relation_service.extract_relations(
+                text=text,
+                entities=all_entities
+            )
+        
+        processing_time = int((time.time() - start_time) * 1000)
+        
+        # 构建结果
+        response = NerResponse.success_response(
+            document_id=document_id,
+            entities=all_entities,
+            relations=all_relations,
+            processing_time=processing_time
+        )
+        
+        # 更新任务状态为完成
+        if task_id in _tasks:
+            _tasks[task_id]["status"] = "completed"
+            _tasks[task_id]["progress"] = 100
+            _tasks[task_id]["message"] = f"处理完成: {len(all_entities)} 个实体, {len(all_relations)} 个关系"
+            _tasks[task_id]["result"] = response.dict()
+            _tasks[task_id]["updated_at"] = time.time()
+        
+        logger.info(f"异步 NER 任务完成: task_id={task_id}, document_id={document_id}, "
+                   f"entities={len(all_entities)}, relations={len(all_relations)}, time={processing_time}ms")
+        
+    except Exception as e:
+        logger.error(f"异步 NER 任务失败: task_id={task_id}, document_id={document_id}, error={str(e)}")
+        if task_id in _tasks:
+            _tasks[task_id]["status"] = "failed"
+            _tasks[task_id]["error"] = str(e)
+            _tasks[task_id]["message"] = f"处理失败: {str(e)}"
+            _tasks[task_id]["updated_at"] = time.time()