| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309 |
- package com.lingyue.graph.service;
- import com.lingyue.common.exception.ServiceException;
- import com.lingyue.graph.entity.GraphNode;
- import com.lingyue.graph.entity.GraphRelation;
- import com.lingyue.graph.entity.TextStorage;
- import com.lingyue.graph.repository.GraphNodeRepository;
- import com.lingyue.graph.repository.GraphRelationRepository;
- import com.lingyue.graph.repository.TextStorageRepository;
- import lombok.RequiredArgsConstructor;
- import lombok.extern.slf4j.Slf4j;
- import org.springframework.stereotype.Service;
- import org.springframework.transaction.annotation.Transactional;
- import java.nio.charset.StandardCharsets;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.util.*;
- /**
- * 图数据库 NER 服务
- *
- * 负责将 NER 提取结果保存到图数据库。
- * 此服务位于 graph-service 模块,可以直接访问图数据库相关的实体和仓库。
- *
- * @author lingyue
- * @since 2026-01-19
- */
- @Slf4j
- @Service
- @RequiredArgsConstructor
- public class GraphNerService {
- private final TextStorageRepository textStorageRepository;
- private final GraphNodeRepository graphNodeRepository;
- private final GraphRelationRepository graphRelationRepository;
- /**
- * 获取文档的文本内容
- *
- * @param documentId 文档ID
- * @return 文本内容
- */
- public String getDocumentText(String documentId) {
- TextStorage textStorage = textStorageRepository.findByDocumentId(documentId);
- if (textStorage == null) {
- throw new ServiceException("文档文本存储记录不存在: documentId=" + documentId);
- }
-
- String filePath = textStorage.getFilePath();
- try {
- return Files.readString(Path.of(filePath), StandardCharsets.UTF_8);
- } catch (Exception e) {
- throw new ServiceException("读取文档文本失败: " + e.getMessage(), e);
- }
- }
- /**
- * 检查文档是否存在文本存储
- *
- * @param documentId 文档ID
- * @return 是否存在
- */
- public boolean hasDocumentText(String documentId) {
- TextStorage textStorage = textStorageRepository.findByDocumentId(documentId);
- return textStorage != null;
- }
- /**
- * 批量保存 NER 实体到图数据库
- *
- * @param documentId 文档ID
- * @param userId 用户ID
- * @param entities 实体列表(包含 name, type, value, position, context, confidence, tempId)
- * @return tempId 到实际 nodeId 的映射
- */
- @Transactional
- public Map<String, String> saveEntitiesToGraph(String documentId, String userId,
- List<Map<String, Object>> entities) {
- Map<String, String> tempIdToNodeId = new HashMap<>();
-
- if (entities == null || entities.isEmpty()) {
- log.debug("无实体需要保存");
- return tempIdToNodeId;
- }
-
- log.info("开始保存实体到图数据库: documentId={}, count={}", documentId, entities.size());
-
- for (Map<String, Object> entity : entities) {
- GraphNode node = new GraphNode();
- node.setId(UUID.randomUUID().toString().replace("-", ""));
- node.setDocumentId(documentId);
- node.setUserId(userId); // 可为 null,自动 NER 提取时没有用户上下文
- node.setName(getStringValue(entity, "name"));
- node.setType(getStringValue(entity, "type", "other").toLowerCase());
- node.setValue(getStringValue(entity, "value"));
- node.setLevel(0);
- node.setCreateTime(new Date());
- node.setUpdateTime(new Date());
-
- // 转换位置信息(直接使用字符偏移)
- Object positionObj = entity.get("position");
- if (positionObj instanceof Map) {
- @SuppressWarnings("unchecked")
- Map<String, Object> posMap = (Map<String, Object>) positionObj;
- log.debug("实体位置信息: name={}, position={}", node.getName(), posMap);
- node.setPosition(posMap);
- } else {
- log.debug("实体无位置信息: name={}, positionObj={}", node.getName(), positionObj);
- }
-
- // 保存元数据
- Map<String, Object> metadata = new HashMap<>();
- String context = getStringValue(entity, "context");
- if (context != null) {
- metadata.put("context", context);
- }
- Object confidence = entity.get("confidence");
- if (confidence != null) {
- metadata.put("confidence", confidence);
- }
- metadata.put("source", "ner");
- node.setMetadata(metadata);
-
- graphNodeRepository.insert(node);
-
- // 记录 tempId 到 nodeId 的映射
- String tempId = getStringValue(entity, "tempId");
- if (tempId != null) {
- tempIdToNodeId.put(tempId, node.getId());
- }
- }
-
- log.info("实体保存完成: documentId={}, savedCount={}", documentId, entities.size());
- return tempIdToNodeId;
- }
- /**
- * 批量保存 NER 关系到图数据库
- *
- * @param relations 关系列表
- * @param tempIdToNodeId tempId 到 nodeId 的映射
- * @return 保存成功的数量
- */
- @Transactional
- public int saveRelationsToGraph(List<Map<String, Object>> relations,
- Map<String, String> tempIdToNodeId) {
- if (relations == null || relations.isEmpty()) {
- log.debug("无关系需要保存");
- return 0;
- }
-
- log.info("开始保存关系到图数据库: count={}", relations.size());
-
- int savedCount = 0;
- for (Map<String, Object> relation : relations) {
- // 通过 tempId 获取实际的 nodeId
- String fromEntityId = getStringValue(relation, "fromEntityId");
- String toEntityId = getStringValue(relation, "toEntityId");
-
- String fromNodeId = fromEntityId != null ? tempIdToNodeId.get(fromEntityId) : null;
- String toNodeId = toEntityId != null ? tempIdToNodeId.get(toEntityId) : null;
-
- // 如果无法找到对应的节点,跳过
- if (fromNodeId == null || toNodeId == null) {
- log.debug("跳过关系保存(节点不存在): from={}, to={}",
- getStringValue(relation, "fromEntity"),
- getStringValue(relation, "toEntity"));
- continue;
- }
-
- GraphRelation graphRelation = new GraphRelation();
- graphRelation.setId(UUID.randomUUID().toString().replace("-", ""));
- graphRelation.setFromNodeId(fromNodeId);
- graphRelation.setToNodeId(toNodeId);
- graphRelation.setRelationType(mapRelationType(getStringValue(relation, "relationType")));
- graphRelation.setOrderIndex(0);
- graphRelation.setCreateTime(new Date());
- graphRelation.setUpdateTime(new Date());
-
- // 保存元数据
- Map<String, Object> metadata = new HashMap<>();
- Object confidence = relation.get("confidence");
- if (confidence != null) {
- metadata.put("confidence", confidence);
- }
- metadata.put("originalType", getStringValue(relation, "relationType"));
- metadata.put("source", "ner");
- graphRelation.setMetadata(metadata);
-
- graphRelationRepository.insert(graphRelation);
- savedCount++;
- }
-
- log.info("关系保存完成: savedCount={}", savedCount);
- return savedCount;
- }
- /**
- * 删除文档的所有 NER 生成的节点和关系
- *
- * @param documentId 文档ID
- * @return 删除的节点数量
- */
- @Transactional
- public int deleteNerResultsByDocumentId(String documentId) {
- log.info("删除文档 NER 结果: documentId={}", documentId);
-
- // 获取文档的所有节点
- List<GraphNode> nodes = graphNodeRepository.findByDocumentId(documentId);
-
- int deletedCount = 0;
- for (GraphNode node : nodes) {
- // 检查是否是 NER 生成的节点
- if (node.getMetadata() instanceof Map) {
- Map<?, ?> metadata = (Map<?, ?>) node.getMetadata();
- if ("ner".equals(metadata.get("source"))) {
- // 先删除相关的关系
- List<GraphRelation> relations = graphRelationRepository.findByNodeId(node.getId());
- for (GraphRelation relation : relations) {
- graphRelationRepository.deleteById(relation.getId());
- }
- // 再删除节点
- graphNodeRepository.deleteById(node.getId());
- deletedCount++;
- }
- }
- }
-
- log.info("NER 结果删除完成: documentId={}, deletedCount={}", documentId, deletedCount);
- return deletedCount;
- }
- /**
- * 获取文档的 NER 统计信息
- *
- * @param documentId 文档ID
- * @return 统计信息
- */
- public Map<String, Object> getNerStatsByDocumentId(String documentId) {
- List<GraphNode> nodes = graphNodeRepository.findByDocumentId(documentId);
-
- int nerNodeCount = 0;
- Map<String, Integer> typeStats = new HashMap<>();
-
- for (GraphNode node : nodes) {
- if (node.getMetadata() instanceof Map) {
- Map<?, ?> metadata = (Map<?, ?>) node.getMetadata();
- if ("ner".equals(metadata.get("source"))) {
- nerNodeCount++;
- String type = node.getType();
- typeStats.put(type, typeStats.getOrDefault(type, 0) + 1);
- }
- }
- }
-
- Map<String, Object> stats = new HashMap<>();
- stats.put("documentId", documentId);
- stats.put("totalNerNodes", nerNodeCount);
- stats.put("typeStats", typeStats);
-
- return stats;
- }
- /**
- * 映射关系类型到系统定义的类型
- */
- private String mapRelationType(String originalType) {
- if (originalType == null) {
- return "DEP";
- }
-
- // 映射常见关系类型
- switch (originalType) {
- case "负责":
- case "管理":
- case "承担":
- case "属于":
- case "隶属":
- case "包含":
- case "包括":
- case "位于":
- case "在":
- case "使用":
- case "采用":
- return "DEP"; // 依赖关系
- default:
- return "DEP"; // 默认为依赖关系
- }
- }
- /**
- * 从 Map 中获取字符串值
- */
- private String getStringValue(Map<String, Object> map, String key) {
- return getStringValue(map, key, null);
- }
- /**
- * 从 Map 中获取字符串值,带默认值
- */
- private String getStringValue(Map<String, Object> map, String key, String defaultValue) {
- Object value = map.get(key);
- if (value == null) {
- return defaultValue;
- }
- return value.toString();
- }
- }
|