GraphNerService.java 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. package com.lingyue.graph.service;
  2. import com.lingyue.common.exception.ServiceException;
  3. import com.lingyue.graph.entity.GraphNode;
  4. import com.lingyue.graph.entity.GraphRelation;
  5. import com.lingyue.graph.entity.TextStorage;
  6. import com.lingyue.graph.repository.GraphNodeRepository;
  7. import com.lingyue.graph.repository.GraphRelationRepository;
  8. import com.lingyue.graph.repository.TextStorageRepository;
  9. import lombok.RequiredArgsConstructor;
  10. import lombok.extern.slf4j.Slf4j;
  11. import org.springframework.stereotype.Service;
  12. import org.springframework.transaction.annotation.Transactional;
  13. import java.nio.charset.StandardCharsets;
  14. import java.nio.file.Files;
  15. import java.nio.file.Path;
  16. import java.util.*;
  17. /**
  18. * 图数据库 NER 服务
  19. *
  20. * 负责将 NER 提取结果保存到图数据库。
  21. * 此服务位于 graph-service 模块,可以直接访问图数据库相关的实体和仓库。
  22. *
  23. * @author lingyue
  24. * @since 2026-01-19
  25. */
  26. @Slf4j
  27. @Service
  28. @RequiredArgsConstructor
  29. public class GraphNerService {
  30. private final TextStorageRepository textStorageRepository;
  31. private final GraphNodeRepository graphNodeRepository;
  32. private final GraphRelationRepository graphRelationRepository;
  33. /**
  34. * 获取文档的文本内容
  35. *
  36. * @param documentId 文档ID
  37. * @return 文本内容
  38. */
  39. public String getDocumentText(String documentId) {
  40. TextStorage textStorage = textStorageRepository.findByDocumentId(documentId);
  41. if (textStorage == null) {
  42. throw new ServiceException("文档文本存储记录不存在: documentId=" + documentId);
  43. }
  44. String filePath = textStorage.getFilePath();
  45. try {
  46. return Files.readString(Path.of(filePath), StandardCharsets.UTF_8);
  47. } catch (Exception e) {
  48. throw new ServiceException("读取文档文本失败: " + e.getMessage(), e);
  49. }
  50. }
  51. /**
  52. * 检查文档是否存在文本存储
  53. *
  54. * @param documentId 文档ID
  55. * @return 是否存在
  56. */
  57. public boolean hasDocumentText(String documentId) {
  58. TextStorage textStorage = textStorageRepository.findByDocumentId(documentId);
  59. return textStorage != null;
  60. }
  61. /**
  62. * 批量保存 NER 实体到图数据库
  63. *
  64. * @param documentId 文档ID
  65. * @param userId 用户ID
  66. * @param entities 实体列表(包含 name, type, value, position, context, confidence, tempId)
  67. * @return tempId 到实际 nodeId 的映射
  68. */
  69. @Transactional
  70. public Map<String, String> saveEntitiesToGraph(String documentId, String userId,
  71. List<Map<String, Object>> entities) {
  72. Map<String, String> tempIdToNodeId = new HashMap<>();
  73. if (entities == null || entities.isEmpty()) {
  74. log.debug("无实体需要保存");
  75. return tempIdToNodeId;
  76. }
  77. log.info("开始保存实体到图数据库: documentId={}, count={}", documentId, entities.size());
  78. for (Map<String, Object> entity : entities) {
  79. GraphNode node = new GraphNode();
  80. node.setId(UUID.randomUUID().toString().replace("-", ""));
  81. node.setDocumentId(documentId);
  82. node.setUserId(userId); // 可为 null,自动 NER 提取时没有用户上下文
  83. node.setName(getStringValue(entity, "name"));
  84. node.setType(getStringValue(entity, "type", "other").toLowerCase());
  85. node.setValue(getStringValue(entity, "value"));
  86. node.setLevel(0);
  87. node.setCreateTime(new Date());
  88. node.setUpdateTime(new Date());
  89. // 转换位置信息(直接使用字符偏移)
  90. Object positionObj = entity.get("position");
  91. if (positionObj instanceof Map) {
  92. @SuppressWarnings("unchecked")
  93. Map<String, Object> posMap = (Map<String, Object>) positionObj;
  94. log.debug("实体位置信息: name={}, position={}", node.getName(), posMap);
  95. node.setPosition(posMap);
  96. } else {
  97. log.debug("实体无位置信息: name={}, positionObj={}", node.getName(), positionObj);
  98. }
  99. // 保存元数据
  100. Map<String, Object> metadata = new HashMap<>();
  101. String context = getStringValue(entity, "context");
  102. if (context != null) {
  103. metadata.put("context", context);
  104. }
  105. Object confidence = entity.get("confidence");
  106. if (confidence != null) {
  107. metadata.put("confidence", confidence);
  108. }
  109. metadata.put("source", "ner");
  110. node.setMetadata(metadata);
  111. graphNodeRepository.insert(node);
  112. // 记录 tempId 到 nodeId 的映射
  113. String tempId = getStringValue(entity, "tempId");
  114. if (tempId != null) {
  115. tempIdToNodeId.put(tempId, node.getId());
  116. }
  117. }
  118. log.info("实体保存完成: documentId={}, savedCount={}", documentId, entities.size());
  119. return tempIdToNodeId;
  120. }
  121. /**
  122. * 批量保存 NER 关系到图数据库
  123. *
  124. * @param relations 关系列表
  125. * @param tempIdToNodeId tempId 到 nodeId 的映射
  126. * @return 保存成功的数量
  127. */
  128. @Transactional
  129. public int saveRelationsToGraph(List<Map<String, Object>> relations,
  130. Map<String, String> tempIdToNodeId) {
  131. if (relations == null || relations.isEmpty()) {
  132. log.debug("无关系需要保存");
  133. return 0;
  134. }
  135. log.info("开始保存关系到图数据库: count={}", relations.size());
  136. int savedCount = 0;
  137. for (Map<String, Object> relation : relations) {
  138. // 通过 tempId 获取实际的 nodeId
  139. String fromEntityId = getStringValue(relation, "fromEntityId");
  140. String toEntityId = getStringValue(relation, "toEntityId");
  141. String fromNodeId = fromEntityId != null ? tempIdToNodeId.get(fromEntityId) : null;
  142. String toNodeId = toEntityId != null ? tempIdToNodeId.get(toEntityId) : null;
  143. // 如果无法找到对应的节点,跳过
  144. if (fromNodeId == null || toNodeId == null) {
  145. log.debug("跳过关系保存(节点不存在): from={}, to={}",
  146. getStringValue(relation, "fromEntity"),
  147. getStringValue(relation, "toEntity"));
  148. continue;
  149. }
  150. GraphRelation graphRelation = new GraphRelation();
  151. graphRelation.setId(UUID.randomUUID().toString().replace("-", ""));
  152. graphRelation.setFromNodeId(fromNodeId);
  153. graphRelation.setToNodeId(toNodeId);
  154. graphRelation.setRelationType(mapRelationType(getStringValue(relation, "relationType")));
  155. graphRelation.setOrderIndex(0);
  156. graphRelation.setCreateTime(new Date());
  157. graphRelation.setUpdateTime(new Date());
  158. // 保存元数据
  159. Map<String, Object> metadata = new HashMap<>();
  160. Object confidence = relation.get("confidence");
  161. if (confidence != null) {
  162. metadata.put("confidence", confidence);
  163. }
  164. metadata.put("originalType", getStringValue(relation, "relationType"));
  165. metadata.put("source", "ner");
  166. graphRelation.setMetadata(metadata);
  167. graphRelationRepository.insert(graphRelation);
  168. savedCount++;
  169. }
  170. log.info("关系保存完成: savedCount={}", savedCount);
  171. return savedCount;
  172. }
  173. /**
  174. * 删除文档的所有 NER 生成的节点和关系
  175. *
  176. * @param documentId 文档ID
  177. * @return 删除的节点数量
  178. */
  179. @Transactional
  180. public int deleteNerResultsByDocumentId(String documentId) {
  181. log.info("删除文档 NER 结果: documentId={}", documentId);
  182. // 获取文档的所有节点
  183. List<GraphNode> nodes = graphNodeRepository.findByDocumentId(documentId);
  184. int deletedCount = 0;
  185. for (GraphNode node : nodes) {
  186. // 检查是否是 NER 生成的节点
  187. if (node.getMetadata() instanceof Map) {
  188. Map<?, ?> metadata = (Map<?, ?>) node.getMetadata();
  189. if ("ner".equals(metadata.get("source"))) {
  190. // 先删除相关的关系
  191. List<GraphRelation> relations = graphRelationRepository.findByNodeId(node.getId());
  192. for (GraphRelation relation : relations) {
  193. graphRelationRepository.deleteById(relation.getId());
  194. }
  195. // 再删除节点
  196. graphNodeRepository.deleteById(node.getId());
  197. deletedCount++;
  198. }
  199. }
  200. }
  201. log.info("NER 结果删除完成: documentId={}, deletedCount={}", documentId, deletedCount);
  202. return deletedCount;
  203. }
  204. /**
  205. * 获取文档的 NER 统计信息
  206. *
  207. * @param documentId 文档ID
  208. * @return 统计信息
  209. */
  210. public Map<String, Object> getNerStatsByDocumentId(String documentId) {
  211. List<GraphNode> nodes = graphNodeRepository.findByDocumentId(documentId);
  212. int nerNodeCount = 0;
  213. Map<String, Integer> typeStats = new HashMap<>();
  214. for (GraphNode node : nodes) {
  215. if (node.getMetadata() instanceof Map) {
  216. Map<?, ?> metadata = (Map<?, ?>) node.getMetadata();
  217. if ("ner".equals(metadata.get("source"))) {
  218. nerNodeCount++;
  219. String type = node.getType();
  220. typeStats.put(type, typeStats.getOrDefault(type, 0) + 1);
  221. }
  222. }
  223. }
  224. Map<String, Object> stats = new HashMap<>();
  225. stats.put("documentId", documentId);
  226. stats.put("totalNerNodes", nerNodeCount);
  227. stats.put("typeStats", typeStats);
  228. return stats;
  229. }
  230. /**
  231. * 映射关系类型到系统定义的类型
  232. */
  233. private String mapRelationType(String originalType) {
  234. if (originalType == null) {
  235. return "DEP";
  236. }
  237. // 映射常见关系类型
  238. switch (originalType) {
  239. case "负责":
  240. case "管理":
  241. case "承担":
  242. case "属于":
  243. case "隶属":
  244. case "包含":
  245. case "包括":
  246. case "位于":
  247. case "在":
  248. case "使用":
  249. case "采用":
  250. return "DEP"; // 依赖关系
  251. default:
  252. return "DEP"; // 默认为依赖关系
  253. }
  254. }
  255. /**
  256. * 从 Map 中获取字符串值
  257. */
  258. private String getStringValue(Map<String, Object> map, String key) {
  259. return getStringValue(map, key, null);
  260. }
  261. /**
  262. * 从 Map 中获取字符串值,带默认值
  263. */
  264. private String getStringValue(Map<String, Object> map, String key, String defaultValue) {
  265. Object value = map.get(key);
  266. if (value == null) {
  267. return defaultValue;
  268. }
  269. return value.toString();
  270. }
  271. }