|
|
@@ -0,0 +1,342 @@
|
|
|
+package com.lingyue.ai.service.impl;
|
|
|
+
|
|
|
+import com.lingyue.ai.client.PythonNerClient;
|
|
|
+import com.lingyue.ai.dto.ner.*;
|
|
|
+import com.lingyue.ai.service.NerService;
|
|
|
+import com.lingyue.common.exception.ServiceException;
|
|
|
+import com.lingyue.graph.entity.GraphNode;
|
|
|
+import com.lingyue.graph.entity.GraphRelation;
|
|
|
+import com.lingyue.graph.entity.TextStorage;
|
|
|
+import com.lingyue.graph.repository.GraphNodeRepository;
|
|
|
+import com.lingyue.graph.repository.GraphRelationRepository;
|
|
|
+import com.lingyue.graph.repository.TextStorageRepository;
|
|
|
+import lombok.RequiredArgsConstructor;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.scheduling.annotation.Async;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
+
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Path;
|
|
|
+import java.util.*;
|
|
|
+import java.util.concurrent.CompletableFuture;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * NER 服务实现
|
|
|
+ *
|
|
|
+ * @author lingyue
|
|
|
+ * @since 2026-01-19
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Service
|
|
|
+@RequiredArgsConstructor
|
|
|
+public class NerServiceImpl implements NerService {
|
|
|
+
|
|
|
+ private final PythonNerClient pythonNerClient;
|
|
|
+ private final TextStorageRepository textStorageRepository;
|
|
|
+ private final GraphNodeRepository graphNodeRepository;
|
|
|
+ private final GraphRelationRepository graphRelationRepository;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public NerResponse extractEntities(NerRequest request) {
|
|
|
+ log.info("开始提取实体: documentId={}, textLength={}",
|
|
|
+ request.getDocumentId(),
|
|
|
+ request.getText() != null ? request.getText().length() : 0);
|
|
|
+
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 验证请求
|
|
|
+ if (request.getText() == null || request.getText().isEmpty()) {
|
|
|
+ return NerResponse.error(request.getDocumentId(), "文本内容不能为空");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 调用 Python NER 服务
|
|
|
+ NerResponse response = pythonNerClient.extractEntities(request);
|
|
|
+
|
|
|
+ long processingTime = System.currentTimeMillis() - startTime;
|
|
|
+ log.info("实体提取完成: documentId={}, entityCount={}, relationCount={}, time={}ms",
|
|
|
+ request.getDocumentId(),
|
|
|
+ response.getEntityCount(),
|
|
|
+ response.getRelationCount(),
|
|
|
+ processingTime);
|
|
|
+
|
|
|
+ return response;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("实体提取失败: documentId={}", request.getDocumentId(), e);
|
|
|
+ return NerResponse.error(request.getDocumentId(), e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ @Async
|
|
|
+ public CompletableFuture<NerResponse> extractEntitiesAsync(NerRequest request) {
|
|
|
+ return CompletableFuture.supplyAsync(() -> extractEntities(request));
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public RelationResponse extractRelations(RelationRequest request) {
|
|
|
+ log.info("开始提取关系: documentId={}, entityCount={}",
|
|
|
+ request.getDocumentId(),
|
|
|
+ request.getEntities() != null ? request.getEntities().size() : 0);
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 验证请求
|
|
|
+ if (request.getEntities() == null || request.getEntities().size() < 2) {
|
|
|
+ return RelationResponse.success(request.getDocumentId(), Collections.emptyList(), 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 调用 Python NER 服务
|
|
|
+ RelationResponse response = pythonNerClient.extractRelations(request);
|
|
|
+
|
|
|
+ log.info("关系提取完成: documentId={}, relationCount={}",
|
|
|
+ request.getDocumentId(), response.getRelationCount());
|
|
|
+
|
|
|
+ return response;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("关系提取失败: documentId={}", request.getDocumentId(), e);
|
|
|
+ return RelationResponse.error(request.getDocumentId(), e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ @Transactional
|
|
|
+ public NerResponse extractAndSaveForDocument(String documentId, String userId) {
|
|
|
+ log.info("开始对文档执行 NER 并保存: documentId={}, userId={}", documentId, userId);
|
|
|
+
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 1. 获取文档的文本内容
|
|
|
+ String text = getDocumentText(documentId);
|
|
|
+ if (text == null || text.isEmpty()) {
|
|
|
+ return NerResponse.error(documentId, "文档文本内容为空");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 调用 NER 提取
|
|
|
+ NerRequest request = NerRequest.builder()
|
|
|
+ .documentId(documentId)
|
|
|
+ .text(text)
|
|
|
+ .userId(userId)
|
|
|
+ .extractRelations(true)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ NerResponse nerResponse = pythonNerClient.extractEntities(request);
|
|
|
+
|
|
|
+ if (!nerResponse.getSuccess()) {
|
|
|
+ return nerResponse;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 保存实体到图数据库
|
|
|
+ Map<String, String> tempIdToNodeId = saveEntitiesToGraph(
|
|
|
+ documentId, userId, nerResponse.getEntities());
|
|
|
+
|
|
|
+ // 4. 保存关系到图数据库
|
|
|
+ if (nerResponse.getRelations() != null && !nerResponse.getRelations().isEmpty()) {
|
|
|
+ saveRelationsToGraph(nerResponse.getRelations(), tempIdToNodeId);
|
|
|
+ }
|
|
|
+
|
|
|
+ long processingTime = System.currentTimeMillis() - startTime;
|
|
|
+
|
|
|
+ log.info("文档 NER 完成并保存: documentId={}, entityCount={}, relationCount={}, time={}ms",
|
|
|
+ documentId, nerResponse.getEntityCount(), nerResponse.getRelationCount(), processingTime);
|
|
|
+
|
|
|
+ // 更新响应的处理时间
|
|
|
+ nerResponse.setProcessingTime(processingTime);
|
|
|
+ return nerResponse;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("文档 NER 失败: documentId={}", documentId, e);
|
|
|
+ return NerResponse.error(documentId, e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ @Async
|
|
|
+ public CompletableFuture<NerResponse> extractAndSaveForDocumentAsync(String documentId, String userId) {
|
|
|
+ return CompletableFuture.supplyAsync(() -> extractAndSaveForDocument(documentId, userId));
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<List<EntityInfo>> batchExtractEntities(List<String> texts) {
|
|
|
+ log.info("批量提取实体: count={}", texts.size());
|
|
|
+
|
|
|
+ return texts.stream()
|
|
|
+ .map(text -> {
|
|
|
+ try {
|
|
|
+ NerRequest request = NerRequest.builder()
|
|
|
+ .text(text)
|
|
|
+ .extractRelations(false)
|
|
|
+ .build();
|
|
|
+ NerResponse response = extractEntities(request);
|
|
|
+ return response.getEntities() != null ? response.getEntities() : Collections.<EntityInfo>emptyList();
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("批量提取实体失败: {}", e.getMessage());
|
|
|
+ return Collections.<EntityInfo>emptyList();
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取文档的文本内容
|
|
|
+ */
|
|
|
+ private String getDocumentText(String documentId) {
|
|
|
+ TextStorage textStorage = textStorageRepository.findByDocumentId(documentId);
|
|
|
+ if (textStorage == null) {
|
|
|
+ throw new ServiceException("文档文本存储记录不存在: documentId=" + documentId);
|
|
|
+ }
|
|
|
+
|
|
|
+ String filePath = textStorage.getFilePath();
|
|
|
+ try {
|
|
|
+ return Files.readString(Path.of(filePath), StandardCharsets.UTF_8);
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new ServiceException("读取文档文本失败: " + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 保存实体到图数据库
|
|
|
+ *
|
|
|
+ * @return tempId 到实际 nodeId 的映射
|
|
|
+ */
|
|
|
+ private Map<String, String> saveEntitiesToGraph(String documentId, String userId,
|
|
|
+ List<EntityInfo> entities) {
|
|
|
+ Map<String, String> tempIdToNodeId = new HashMap<>();
|
|
|
+
|
|
|
+ if (entities == null || entities.isEmpty()) {
|
|
|
+ return tempIdToNodeId;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (EntityInfo entity : entities) {
|
|
|
+ GraphNode node = new GraphNode();
|
|
|
+ node.setId(UUID.randomUUID().toString().replace("-", ""));
|
|
|
+ node.setDocumentId(documentId);
|
|
|
+ node.setUserId(userId != null ? userId : "system");
|
|
|
+ node.setName(entity.getName());
|
|
|
+ node.setType(entity.getType() != null ? entity.getType().toLowerCase() : "other");
|
|
|
+ node.setValue(entity.getValue());
|
|
|
+ node.setLevel(0);
|
|
|
+ node.setCreateTime(new Date());
|
|
|
+ node.setUpdateTime(new Date());
|
|
|
+
|
|
|
+ // 转换位置信息
|
|
|
+ if (entity.getPosition() != null) {
|
|
|
+ Map<String, Object> position = new HashMap<>();
|
|
|
+ position.put("charStart", entity.getPosition().getCharStart());
|
|
|
+ position.put("charEnd", entity.getPosition().getCharEnd());
|
|
|
+ if (entity.getPosition().getLine() != null) {
|
|
|
+ position.put("line", entity.getPosition().getLine());
|
|
|
+ }
|
|
|
+ if (entity.getPosition().getPage() != null) {
|
|
|
+ position.put("page", entity.getPosition().getPage());
|
|
|
+ }
|
|
|
+ node.setPosition(position);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 保存元数据
|
|
|
+ Map<String, Object> metadata = new HashMap<>();
|
|
|
+ if (entity.getContext() != null) {
|
|
|
+ metadata.put("context", entity.getContext());
|
|
|
+ }
|
|
|
+ if (entity.getConfidence() != null) {
|
|
|
+ metadata.put("confidence", entity.getConfidence());
|
|
|
+ }
|
|
|
+ metadata.put("source", "ner");
|
|
|
+ node.setMetadata(metadata);
|
|
|
+
|
|
|
+ graphNodeRepository.insert(node);
|
|
|
+
|
|
|
+ // 记录 tempId 到 nodeId 的映射
|
|
|
+ if (entity.getTempId() != null) {
|
|
|
+ tempIdToNodeId.put(entity.getTempId(), node.getId());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ log.debug("保存实体到图数据库完成: count={}", entities.size());
|
|
|
+ return tempIdToNodeId;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 保存关系到图数据库
|
|
|
+ */
|
|
|
+ private void saveRelationsToGraph(List<RelationInfo> relations, Map<String, String> tempIdToNodeId) {
|
|
|
+ if (relations == null || relations.isEmpty()) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ int savedCount = 0;
|
|
|
+ for (RelationInfo relation : relations) {
|
|
|
+ // 通过 tempId 获取实际的 nodeId
|
|
|
+ String fromNodeId = relation.getFromEntityId() != null ?
|
|
|
+ tempIdToNodeId.get(relation.getFromEntityId()) : null;
|
|
|
+ String toNodeId = relation.getToEntityId() != null ?
|
|
|
+ tempIdToNodeId.get(relation.getToEntityId()) : null;
|
|
|
+
|
|
|
+ // 如果无法找到对应的节点,跳过
|
|
|
+ if (fromNodeId == null || toNodeId == null) {
|
|
|
+ log.debug("跳过关系保存(节点不存在): from={}, to={}",
|
|
|
+ relation.getFromEntity(), relation.getToEntity());
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ GraphRelation graphRelation = new GraphRelation();
|
|
|
+ graphRelation.setId(UUID.randomUUID().toString().replace("-", ""));
|
|
|
+ graphRelation.setFromNodeId(fromNodeId);
|
|
|
+ graphRelation.setToNodeId(toNodeId);
|
|
|
+ graphRelation.setRelationType(mapRelationType(relation.getRelationType()));
|
|
|
+ graphRelation.setOrderIndex(0);
|
|
|
+ graphRelation.setCreateTime(new Date());
|
|
|
+ graphRelation.setUpdateTime(new Date());
|
|
|
+
|
|
|
+ // 保存元数据
|
|
|
+ Map<String, Object> metadata = new HashMap<>();
|
|
|
+ if (relation.getConfidence() != null) {
|
|
|
+ metadata.put("confidence", relation.getConfidence());
|
|
|
+ }
|
|
|
+ metadata.put("originalType", relation.getRelationType());
|
|
|
+ metadata.put("source", "ner");
|
|
|
+ graphRelation.setMetadata(metadata);
|
|
|
+
|
|
|
+ graphRelationRepository.insert(graphRelation);
|
|
|
+ savedCount++;
|
|
|
+ }
|
|
|
+
|
|
|
+ log.debug("保存关系到图数据库完成: count={}", savedCount);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 映射关系类型到系统定义的类型
|
|
|
+ */
|
|
|
+ private String mapRelationType(String originalType) {
|
|
|
+ if (originalType == null) {
|
|
|
+ return "DEP";
|
|
|
+ }
|
|
|
+
|
|
|
+ // 映射常见关系类型
|
|
|
+ switch (originalType) {
|
|
|
+ case "负责":
|
|
|
+ case "管理":
|
|
|
+ case "承担":
|
|
|
+ return "DEP"; // 依赖关系
|
|
|
+ case "属于":
|
|
|
+ case "隶属":
|
|
|
+ return "DEP";
|
|
|
+ case "包含":
|
|
|
+ case "包括":
|
|
|
+ return "DEP";
|
|
|
+ case "位于":
|
|
|
+ case "在":
|
|
|
+ return "DEP";
|
|
|
+ case "使用":
|
|
|
+ case "采用":
|
|
|
+ return "DEP";
|
|
|
+ default:
|
|
|
+ return "DEP"; // 默认为依赖关系
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|