|
|
@@ -1,5 +1,6 @@
|
|
|
package com.lingyue.graph.listener;
|
|
|
|
|
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
import com.lingyue.common.event.DocumentParsedEvent;
|
|
|
import com.lingyue.graph.service.GraphNerService;
|
|
|
import lombok.RequiredArgsConstructor;
|
|
|
@@ -10,8 +11,11 @@ import org.springframework.http.*;
|
|
|
import org.springframework.scheduling.annotation.Async;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
import org.springframework.web.client.RestTemplate;
|
|
|
+import org.springframework.web.reactive.function.client.WebClient;
|
|
|
|
|
|
+import java.time.Duration;
|
|
|
import java.util.*;
|
|
|
+import java.util.concurrent.atomic.AtomicReference;
|
|
|
|
|
|
/**
|
|
|
* 文档解析完成事件监听器
|
|
|
@@ -27,12 +31,16 @@ public class DocumentParsedEventListener {
|
|
|
|
|
|
private final GraphNerService graphNerService;
|
|
|
private final RestTemplate restTemplate;
|
|
|
+ private final ObjectMapper objectMapper;
|
|
|
|
|
|
@Value("${ner.auto-extract.enabled:true}")
|
|
|
private boolean nerAutoExtractEnabled;
|
|
|
|
|
|
@Value("${ner.python-service.url:http://localhost:8001}")
|
|
|
private String nerServiceUrl;
|
|
|
+
|
|
|
+ @Value("${ner.python-service.use-stream:true}")
|
|
|
+ private boolean useStreamApi;
|
|
|
|
|
|
/**
|
|
|
* 处理文档解析完成事件
|
|
|
@@ -66,8 +74,13 @@ public class DocumentParsedEventListener {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- // 2. 调用 Python NER 服务
|
|
|
- Map<String, Object> nerResponse = callPythonNerService(documentId, text, userId);
|
|
|
+ // 2. 调用 Python NER 服务(根据配置选择流式或普通 API)
|
|
|
+ Map<String, Object> nerResponse;
|
|
|
+ if (useStreamApi) {
|
|
|
+ nerResponse = callPythonNerServiceWithStream(documentId, text, userId);
|
|
|
+ } else {
|
|
|
+ nerResponse = callPythonNerService(documentId, text, userId);
|
|
|
+ }
|
|
|
|
|
|
if (nerResponse == null || !Boolean.TRUE.equals(nerResponse.get("success"))) {
|
|
|
log.warn("NER 服务调用失败: documentId={}, error={}",
|
|
|
@@ -100,7 +113,7 @@ public class DocumentParsedEventListener {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 调用 Python NER 服务
|
|
|
+ * 调用 Python NER 服务(普通 REST API)
|
|
|
*/
|
|
|
private Map<String, Object> callPythonNerService(String documentId, String text, String userId) {
|
|
|
try {
|
|
|
@@ -117,7 +130,6 @@ public class DocumentParsedEventListener {
|
|
|
|
|
|
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(request, headers);
|
|
|
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
ResponseEntity<Map> response = restTemplate.exchange(url, HttpMethod.POST, entity, Map.class);
|
|
|
|
|
|
if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
|
|
|
@@ -133,4 +145,73 @@ public class DocumentParsedEventListener {
|
|
|
return null;
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 调用 Python NER 服务(SSE 流式 API,带进度反馈)
|
|
|
+ */
|
|
|
+ private Map<String, Object> callPythonNerServiceWithStream(String documentId, String text, String userId) {
|
|
|
+ try {
|
|
|
+ Map<String, Object> request = new HashMap<>();
|
|
|
+ request.put("documentId", documentId);
|
|
|
+ request.put("text", text);
|
|
|
+ request.put("userId", userId);
|
|
|
+ request.put("extractRelations", true);
|
|
|
+
|
|
|
+ // 使用 WebClient 处理 SSE
|
|
|
+ WebClient webClient = WebClient.builder()
|
|
|
+ .baseUrl(nerServiceUrl)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ AtomicReference<Map<String, Object>> resultRef = new AtomicReference<>();
|
|
|
+
|
|
|
+ // 订阅 SSE 事件流
|
|
|
+ webClient.post()
|
|
|
+ .uri("/ner/extract/stream")
|
|
|
+ .contentType(MediaType.APPLICATION_JSON)
|
|
|
+ .bodyValue(request)
|
|
|
+ .retrieve()
|
|
|
+ .bodyToFlux(String.class)
|
|
|
+ .timeout(Duration.ofMinutes(10)) // 10 分钟总超时
|
|
|
+ .doOnNext(data -> {
|
|
|
+ try {
|
|
|
+ // 解析 SSE 数据
|
|
|
+ if (data.startsWith("event:")) {
|
|
|
+ // 跳过事件类型行
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (data.startsWith("data:")) {
|
|
|
+ String jsonData = data.substring(5).trim();
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ Map<String, Object> eventData = objectMapper.readValue(jsonData, Map.class);
|
|
|
+
|
|
|
+ // 检查是否包含进度信息
|
|
|
+ if (eventData.containsKey("progress_percent")) {
|
|
|
+ int progress = (Integer) eventData.get("progress_percent");
|
|
|
+ String message = (String) eventData.getOrDefault("message", "处理中...");
|
|
|
+ log.info("NER 进度: documentId={}, progress={}%, message={}",
|
|
|
+ documentId, progress, message);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 检查是否是完成事件(包含 entities)
|
|
|
+ if (eventData.containsKey("entities")) {
|
|
|
+ resultRef.set(eventData);
|
|
|
+ log.info("NER 流式处理完成: documentId={}", documentId);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.debug("解析 SSE 数据时出错: {}", e.getMessage());
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .doOnError(e -> log.error("SSE 流处理错误: documentId={}, error={}", documentId, e.getMessage()))
|
|
|
+ .blockLast(); // 阻塞等待完成
|
|
|
+
|
|
|
+ return resultRef.get();
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("调用 Python NER SSE 服务失败: {}", e.getMessage());
|
|
|
+ // 回退到普通 API
|
|
|
+ log.info("回退到普通 NER API: documentId={}", documentId);
|
|
|
+ return callPythonNerService(documentId, text, userId);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|