package com.lingyue.graph.service; import com.lingyue.common.exception.ServiceException; import com.lingyue.graph.entity.GraphNode; import com.lingyue.graph.entity.GraphRelation; import com.lingyue.graph.entity.TextStorage; import com.lingyue.graph.repository.GraphNodeRepository; import com.lingyue.graph.repository.GraphRelationRepository; import com.lingyue.graph.repository.TextStorageRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; /** * 图数据库 NER 服务 * * 负责将 NER 提取结果保存到图数据库。 * 此服务位于 graph-service 模块,可以直接访问图数据库相关的实体和仓库。 * * @author lingyue * @since 2026-01-19 */ @Slf4j @Service @RequiredArgsConstructor public class GraphNerService { private final TextStorageRepository textStorageRepository; private final GraphNodeRepository graphNodeRepository; private final GraphRelationRepository graphRelationRepository; /** * 获取文档的文本内容 * * @param documentId 文档ID * @return 文本内容 */ public String getDocumentText(String documentId) { TextStorage textStorage = textStorageRepository.findByDocumentId(documentId); if (textStorage == null) { throw new ServiceException("文档文本存储记录不存在: documentId=" + documentId); } String filePath = textStorage.getFilePath(); try { return Files.readString(Path.of(filePath), StandardCharsets.UTF_8); } catch (Exception e) { throw new ServiceException("读取文档文本失败: " + e.getMessage(), e); } } /** * 检查文档是否存在文本存储 * * @param documentId 文档ID * @return 是否存在 */ public boolean hasDocumentText(String documentId) { TextStorage textStorage = textStorageRepository.findByDocumentId(documentId); return textStorage != null; } /** * 批量保存 NER 实体到图数据库 * * @param documentId 文档ID * @param userId 用户ID * @param entities 实体列表(包含 name, type, value, position, context, confidence, tempId) * @return tempId 到实际 nodeId 的映射 */ @Transactional public Map saveEntitiesToGraph(String documentId, String userId, List> entities) { Map tempIdToNodeId = new HashMap<>(); if (entities == null || entities.isEmpty()) { log.debug("无实体需要保存"); return tempIdToNodeId; } log.info("开始保存实体到图数据库: documentId={}, count={}", documentId, entities.size()); for (Map entity : entities) { GraphNode node = new GraphNode(); node.setId(UUID.randomUUID().toString().replace("-", "")); node.setDocumentId(documentId); node.setUserId(userId); // 可为 null,自动 NER 提取时没有用户上下文 node.setName(getStringValue(entity, "name")); node.setType(getStringValue(entity, "type", "other").toLowerCase()); node.setValue(getStringValue(entity, "value")); node.setLevel(0); node.setCreateTime(new Date()); node.setUpdateTime(new Date()); // 转换位置信息(直接使用字符偏移) Object positionObj = entity.get("position"); if (positionObj instanceof Map) { @SuppressWarnings("unchecked") Map posMap = (Map) positionObj; log.debug("实体位置信息: name={}, position={}", node.getName(), posMap); node.setPosition(posMap); } else { log.debug("实体无位置信息: name={}, positionObj={}", node.getName(), positionObj); } // 保存元数据 Map metadata = new HashMap<>(); String context = getStringValue(entity, "context"); if (context != null) { metadata.put("context", context); } Object confidence = entity.get("confidence"); if (confidence != null) { metadata.put("confidence", confidence); } metadata.put("source", "ner"); node.setMetadata(metadata); graphNodeRepository.insert(node); // 记录 tempId 到 nodeId 的映射 String tempId = getStringValue(entity, "tempId"); if (tempId != null) { tempIdToNodeId.put(tempId, node.getId()); } } log.info("实体保存完成: documentId={}, savedCount={}", documentId, entities.size()); return tempIdToNodeId; } /** * 批量保存 NER 关系到图数据库 * * @param relations 关系列表 * @param tempIdToNodeId tempId 到 nodeId 的映射 * @return 保存成功的数量 */ @Transactional public int saveRelationsToGraph(List> relations, Map tempIdToNodeId) { if (relations == null || relations.isEmpty()) { log.debug("无关系需要保存"); return 0; } log.info("开始保存关系到图数据库: count={}", relations.size()); int savedCount = 0; for (Map relation : relations) { // 通过 tempId 获取实际的 nodeId String fromEntityId = getStringValue(relation, "fromEntityId"); String toEntityId = getStringValue(relation, "toEntityId"); String fromNodeId = fromEntityId != null ? tempIdToNodeId.get(fromEntityId) : null; String toNodeId = toEntityId != null ? tempIdToNodeId.get(toEntityId) : null; // 如果无法找到对应的节点,跳过 if (fromNodeId == null || toNodeId == null) { log.debug("跳过关系保存(节点不存在): from={}, to={}", getStringValue(relation, "fromEntity"), getStringValue(relation, "toEntity")); continue; } GraphRelation graphRelation = new GraphRelation(); graphRelation.setId(UUID.randomUUID().toString().replace("-", "")); graphRelation.setFromNodeId(fromNodeId); graphRelation.setToNodeId(toNodeId); graphRelation.setRelationType(mapRelationType(getStringValue(relation, "relationType"))); graphRelation.setOrderIndex(0); graphRelation.setCreateTime(new Date()); graphRelation.setUpdateTime(new Date()); // 保存元数据 Map metadata = new HashMap<>(); Object confidence = relation.get("confidence"); if (confidence != null) { metadata.put("confidence", confidence); } metadata.put("originalType", getStringValue(relation, "relationType")); metadata.put("source", "ner"); graphRelation.setMetadata(metadata); graphRelationRepository.insert(graphRelation); savedCount++; } log.info("关系保存完成: savedCount={}", savedCount); return savedCount; } /** * 删除文档的所有 NER 生成的节点和关系 * * @param documentId 文档ID * @return 删除的节点数量 */ @Transactional public int deleteNerResultsByDocumentId(String documentId) { log.info("删除文档 NER 结果: documentId={}", documentId); // 获取文档的所有节点 List nodes = graphNodeRepository.findByDocumentId(documentId); int deletedCount = 0; for (GraphNode node : nodes) { // 检查是否是 NER 生成的节点 if (node.getMetadata() instanceof Map) { Map metadata = (Map) node.getMetadata(); if ("ner".equals(metadata.get("source"))) { // 先删除相关的关系 List relations = graphRelationRepository.findByNodeId(node.getId()); for (GraphRelation relation : relations) { graphRelationRepository.deleteById(relation.getId()); } // 再删除节点 graphNodeRepository.deleteById(node.getId()); deletedCount++; } } } log.info("NER 结果删除完成: documentId={}, deletedCount={}", documentId, deletedCount); return deletedCount; } /** * 获取文档的 NER 统计信息 * * @param documentId 文档ID * @return 统计信息 */ public Map getNerStatsByDocumentId(String documentId) { List nodes = graphNodeRepository.findByDocumentId(documentId); int nerNodeCount = 0; Map typeStats = new HashMap<>(); for (GraphNode node : nodes) { if (node.getMetadata() instanceof Map) { Map metadata = (Map) node.getMetadata(); if ("ner".equals(metadata.get("source"))) { nerNodeCount++; String type = node.getType(); typeStats.put(type, typeStats.getOrDefault(type, 0) + 1); } } } Map stats = new HashMap<>(); stats.put("documentId", documentId); stats.put("totalNerNodes", nerNodeCount); stats.put("typeStats", typeStats); return stats; } /** * 映射关系类型到系统定义的类型 */ private String mapRelationType(String originalType) { if (originalType == null) { return "DEP"; } // 映射常见关系类型 switch (originalType) { case "负责": case "管理": case "承担": case "属于": case "隶属": case "包含": case "包括": case "位于": case "在": case "使用": case "采用": return "DEP"; // 依赖关系 default: return "DEP"; // 默认为依赖关系 } } /** * 从 Map 中获取字符串值 */ private String getStringValue(Map map, String key) { return getStringValue(map, key, null); } /** * 从 Map 中获取字符串值,带默认值 */ private String getStringValue(Map map, String key, String defaultValue) { Object value = map.get(key); if (value == null) { return defaultValue; } return value.toString(); } }