|
@@ -108,12 +108,17 @@ public class DocumentParsedEventListener {
|
|
|
String docType = document.getType();
|
|
String docType = document.getType();
|
|
|
if (!"word".equalsIgnoreCase(docType)) {
|
|
if (!"word".equalsIgnoreCase(docType)) {
|
|
|
log.debug("非 Word 文档,跳过结构化解析: documentId={}, type={}", documentId, docType);
|
|
log.debug("非 Word 文档,跳过结构化解析: documentId={}, type={}", documentId, docType);
|
|
|
|
|
+ // 标记为完成(非Word文档无需结构化解析)
|
|
|
|
|
+ updateTaskProgress(documentId, "structured", "completed", 100, null);
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
log.info("开始自动结构化解析: documentId={}", documentId);
|
|
log.info("开始自动结构化解析: documentId={}", documentId);
|
|
|
long startTime = System.currentTimeMillis();
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
+ // 更新进度:开始
|
|
|
|
|
+ updateTaskProgress(documentId, "structured", "processing", 10, null);
|
|
|
|
|
+
|
|
|
// 调用本地 API 触发结构化解析
|
|
// 调用本地 API 触发结构化解析
|
|
|
String url = "http://localhost:" + serverPort + "/parse/structured/" + documentId;
|
|
String url = "http://localhost:" + serverPort + "/parse/structured/" + documentId;
|
|
|
|
|
|
|
@@ -122,12 +127,26 @@ public class DocumentParsedEventListener {
|
|
|
if (response.getStatusCode().is2xxSuccessful()) {
|
|
if (response.getStatusCode().is2xxSuccessful()) {
|
|
|
long time = System.currentTimeMillis() - startTime;
|
|
long time = System.currentTimeMillis() - startTime;
|
|
|
log.info("结构化解析完成: documentId={}, time={}ms", documentId, time);
|
|
log.info("结构化解析完成: documentId={}, time={}ms", documentId, time);
|
|
|
|
|
+
|
|
|
|
|
+ // 提取结果信息并更新进度
|
|
|
|
|
+ Map<String, Object> data = (Map<String, Object>) response.getBody().get("data");
|
|
|
|
|
+ if (data != null) {
|
|
|
|
|
+ Map<String, Object> progressData = new HashMap<>();
|
|
|
|
|
+ progressData.put("status", "completed");
|
|
|
|
|
+ progressData.put("progress", 100);
|
|
|
|
|
+ progressData.put("elementCount", data.get("totalElements"));
|
|
|
|
|
+ progressData.put("imageCount", data.get("imageCount"));
|
|
|
|
|
+ progressData.put("tableCount", data.get("tableCount"));
|
|
|
|
|
+ updateTaskProgress(documentId, "structured", progressData);
|
|
|
|
|
+ }
|
|
|
} else {
|
|
} else {
|
|
|
log.warn("结构化解析失败: documentId={}, status={}", documentId, response.getStatusCode());
|
|
log.warn("结构化解析失败: documentId={}, status={}", documentId, response.getStatusCode());
|
|
|
|
|
+ updateTaskProgress(documentId, "structured", "failed", 0, null);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
log.error("自动结构化解析异常: documentId={}, error={}", documentId, e.getMessage());
|
|
log.error("自动结构化解析异常: documentId={}, error={}", documentId, e.getMessage());
|
|
|
|
|
+ updateTaskProgress(documentId, "structured", "failed", 0, null);
|
|
|
// 异常不向上抛出,不影响后续处理
|
|
// 异常不向上抛出,不影响后续处理
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -152,6 +171,9 @@ public class DocumentParsedEventListener {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
log.info("开始自动 NER 提取: documentId={}", documentId);
|
|
log.info("开始自动 NER 提取: documentId={}", documentId);
|
|
|
|
|
+
|
|
|
|
|
+ // 更新进度:开始
|
|
|
|
|
+ updateTaskProgress(documentId, "ner", "processing", 5, null);
|
|
|
|
|
|
|
|
// 2. 调用 Python NER 服务(根据配置选择异步轮询或同步 API)
|
|
// 2. 调用 Python NER 服务(根据配置选择异步轮询或同步 API)
|
|
|
Map<String, Object> nerResponse;
|
|
Map<String, Object> nerResponse;
|
|
@@ -164,13 +186,23 @@ public class DocumentParsedEventListener {
|
|
|
if (nerResponse == null || !Boolean.TRUE.equals(nerResponse.get("success"))) {
|
|
if (nerResponse == null || !Boolean.TRUE.equals(nerResponse.get("success"))) {
|
|
|
log.warn("NER 服务调用失败: documentId={}, error={}",
|
|
log.warn("NER 服务调用失败: documentId={}, error={}",
|
|
|
documentId, nerResponse != null ? nerResponse.get("errorMessage") : "null response");
|
|
documentId, nerResponse != null ? nerResponse.get("errorMessage") : "null response");
|
|
|
|
|
+ updateTaskProgress(documentId, "ner", "failed", 0, null);
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // 更新进度:NER 完成,开始保存
|
|
|
|
|
+ updateTaskProgress(documentId, "ner", "processing", 80, null);
|
|
|
|
|
+
|
|
|
|
|
+ // 更新图构建进度:开始
|
|
|
|
|
+ updateTaskProgress(documentId, "graph", "processing", 10, null);
|
|
|
|
|
|
|
|
// 3. 保存实体到图数据库
|
|
// 3. 保存实体到图数据库
|
|
|
@SuppressWarnings("unchecked")
|
|
@SuppressWarnings("unchecked")
|
|
|
List<Map<String, Object>> entities = (List<Map<String, Object>>) nerResponse.get("entities");
|
|
List<Map<String, Object>> entities = (List<Map<String, Object>>) nerResponse.get("entities");
|
|
|
Map<String, String> tempIdToNodeId = graphNerService.saveEntitiesToGraph(documentId, userId, entities);
|
|
Map<String, String> tempIdToNodeId = graphNerService.saveEntitiesToGraph(documentId, userId, entities);
|
|
|
|
|
+
|
|
|
|
|
+ // 更新图构建进度
|
|
|
|
|
+ updateTaskProgress(documentId, "graph", "processing", 50, null);
|
|
|
|
|
|
|
|
// 4. 保存关系到图数据库
|
|
// 4. 保存关系到图数据库
|
|
|
@SuppressWarnings("unchecked")
|
|
@SuppressWarnings("unchecked")
|
|
@@ -184,14 +216,24 @@ public class DocumentParsedEventListener {
|
|
|
|
|
|
|
|
long processingTime = System.currentTimeMillis() - startTime;
|
|
long processingTime = System.currentTimeMillis() - startTime;
|
|
|
|
|
|
|
|
|
|
+ // 更新 NER 完成进度
|
|
|
|
|
+ int entityCount = entities != null ? entities.size() : 0;
|
|
|
|
|
+ Map<String, Object> nerProgressData = new HashMap<>();
|
|
|
|
|
+ nerProgressData.put("status", "completed");
|
|
|
|
|
+ nerProgressData.put("progress", 100);
|
|
|
|
|
+ nerProgressData.put("entityCount", entityCount);
|
|
|
|
|
+ nerProgressData.put("relationCount", relationCount);
|
|
|
|
|
+ updateTaskProgress(documentId, "ner", nerProgressData);
|
|
|
|
|
+
|
|
|
|
|
+ // 更新图构建完成进度
|
|
|
|
|
+ updateTaskProgress(documentId, "graph", "completed", 100, null);
|
|
|
|
|
+
|
|
|
log.info("NER 自动提取完成: documentId={}, entityCount={}, relationCount={}, time={}ms",
|
|
log.info("NER 自动提取完成: documentId={}, entityCount={}, relationCount={}, time={}ms",
|
|
|
- documentId,
|
|
|
|
|
- entities != null ? entities.size() : 0,
|
|
|
|
|
- relationCount,
|
|
|
|
|
- processingTime);
|
|
|
|
|
|
|
+ documentId, entityCount, relationCount, processingTime);
|
|
|
|
|
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
log.error("NER 自动提取异常: documentId={}", documentId, e);
|
|
log.error("NER 自动提取异常: documentId={}", documentId, e);
|
|
|
|
|
+ updateTaskProgress(documentId, "ner", "failed", 0, null);
|
|
|
// 异常不向上抛出,不影响其他处理
|
|
// 异常不向上抛出,不影响其他处理
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -341,4 +383,40 @@ public class DocumentParsedEventListener {
|
|
|
return callPythonNerService(documentId, text, userId);
|
|
return callPythonNerService(documentId, text, userId);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // ==================== 任务进度更新 ====================
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 更新任务进度(简单版本)
|
|
|
|
|
+ */
|
|
|
|
|
+ private void updateTaskProgress(String documentId, String stage, String status, Integer progress, String errorMessage) {
|
|
|
|
|
+ Map<String, Object> data = new HashMap<>();
|
|
|
|
|
+ data.put("status", status);
|
|
|
|
|
+ data.put("progress", progress);
|
|
|
|
|
+ if (errorMessage != null) {
|
|
|
|
|
+ data.put("errorMessage", errorMessage);
|
|
|
|
|
+ }
|
|
|
|
|
+ updateTaskProgress(documentId, stage, data);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 更新任务进度(完整版本)
|
|
|
|
|
+ */
|
|
|
|
|
+ private void updateTaskProgress(String documentId, String stage, Map<String, Object> data) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ String url = "http://localhost:" + serverPort + "/api/internal/task-progress/" + stage + "/" + documentId;
|
|
|
|
|
+
|
|
|
|
|
+ HttpHeaders headers = new HttpHeaders();
|
|
|
|
|
+ headers.setContentType(MediaType.APPLICATION_JSON);
|
|
|
|
|
+
|
|
|
|
|
+ HttpEntity<Map<String, Object>> entity = new HttpEntity<>(data, headers);
|
|
|
|
|
+
|
|
|
|
|
+ restTemplate.postForEntity(url, entity, Map.class);
|
|
|
|
|
+
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ // 进度更新失败不影响主流程
|
|
|
|
|
+ log.debug("更新任务进度失败(可忽略): documentId={}, stage={}, error={}",
|
|
|
|
|
+ documentId, stage, e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|