1 månad sedan · 9ac992d618
--- a/backend/graph-service/src/main/java/com/lingyue/graph/service/GraphNerService.java
+++ b/backend/graph-service/src/main/java/com/lingyue/graph/service/GraphNerService.java
@@ -1,7 +1,6 @@
 
				 package com.lingyue.graph.service;
			
 
				 
			
 
				 import com.lingyue.common.exception.ServiceException;
			
 
				-import com.lingyue.graph.dto.*;
			
 
				 import com.lingyue.graph.entity.GraphNode;
			
 
				 import com.lingyue.graph.entity.GraphRelation;
			
 
				 import com.lingyue.graph.entity.TextStorage;
			
@@ -35,6 +34,7 @@ public class GraphNerService {
 
				     private final TextStorageRepository textStorageRepository;
			
 
				     private final GraphNodeRepository graphNodeRepository;
			
 
				     private final GraphRelationRepository graphRelationRepository;
			
 
				+    private final PositionMappingService positionMappingService;
			
 
				 
			
 
				     /**
			
 
				      * 获取文档的文本内容
			
@@ -99,13 +99,17 @@ public class GraphNerService {
 
				             node.setCreateTime(new Date());
			
 
				             node.setUpdateTime(new Date());
			
 
				             
			
 
				-            // 转换位置信息
			
 
				+            // 转换位置信息并补充页码/行号
			
 
				             Object positionObj = entity.get("position");
			
 
				             if (positionObj instanceof Map) {
			
 
				                 @SuppressWarnings("unchecked")
			
 
				                 Map<String, Object> posMap = (Map<String, Object>) positionObj;
			
 
				-                log.debug("实体位置信息: name={}, position={}", node.getName(), posMap);
			
 
				-                node.setPosition(posMap);
			
 
				+                
			
 
				+                // 使用 PositionMappingService 补充页码和行号
			
 
				+                Map<String, Object> enrichedPosition = enrichPosition(documentId, posMap);
			
 
				+                
			
 
				+                log.debug("实体位置信息: name={}, position={}", node.getName(), enrichedPosition);
			
 
				+                node.setPosition(enrichedPosition);
			
 
				             } else {
			
 
				                 log.debug("实体无位置信息: name={}, positionObj={}", node.getName(), positionObj);
			
 
				             }
			
@@ -290,6 +294,63 @@ public class GraphNerService {
 
				         }
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * 丰富位置信息，补充页码和行号
			
 
				+     * 
			
 
				+     * @param documentId 文档ID
			
 
				+     * @param posMap 原始位置信息（包含 charStart, charEnd）
			
 
				+     * @return 丰富后的位置信息（包含 charStart, charEnd, page, line）
			
 
				+     */
			
 
				+    private Map<String, Object> enrichPosition(String documentId, Map<String, Object> posMap) {
			
 
				+        // 如果已经有页码和行号，直接返回
			
 
				+        if (posMap.containsKey("page") && posMap.get("page") != null 
			
 
				+            && posMap.containsKey("line") && posMap.get("line") != null) {
			
 
				+            return posMap;
			
 
				+        }
			
 
				+        
			
 
				+        // 获取字符位置
			
 
				+        Integer charStart = getIntValue(posMap, "charStart");
			
 
				+        Integer charEnd = getIntValue(posMap, "charEnd");
			
 
				+        
			
 
				+        if (charStart == null || charEnd == null) {
			
 
				+            return posMap;
			
 
				+        }
			
 
				+        
			
 
				+        try {
			
 
				+            // 使用 PositionMappingService 映射页码和行号
			
 
				+            Map<String, Object> mappedPosition = positionMappingService.mapCharToPosition(
			
 
				+                documentId, charStart, charEnd);
			
 
				+            
			
 
				+            // 合并原始位置信息和映射结果
			
 
				+            Map<String, Object> enrichedPosition = new HashMap<>(posMap);
			
 
				+            enrichedPosition.putAll(mappedPosition);
			
 
				+            
			
 
				+            return enrichedPosition;
			
 
				+        } catch (Exception e) {
			
 
				+            log.warn("位置映射失败: documentId={}, charStart={}, charEnd={}, error={}", 
			
 
				+                    documentId, charStart, charEnd, e.getMessage());
			
 
				+            return posMap;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 从 Map 中获取整数值
			
 
				+     */
			
 
				+    private Integer getIntValue(Map<String, Object> map, String key) {
			
 
				+        Object value = map.get(key);
			
 
				+        if (value == null) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        if (value instanceof Number) {
			
 
				+            return ((Number) value).intValue();
			
 
				+        }
			
 
				+        try {
			
 
				+            return Integer.parseInt(value.toString());
			
 
				+        } catch (NumberFormatException e) {
			
 
				+            return null;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				     /**
			
 
				      * 从 Map 中获取字符串值
			
 
				      */
			
--- a/backend/graph-service/src/main/java/com/lingyue/graph/service/PositionMappingService.java
+++ b/backend/graph-service/src/main/java/com/lingyue/graph/service/PositionMappingService.java
@@ -0,0 +1,334 @@
 
				+package com.lingyue.graph.service;
			
 
				+
			
 
				+import com.fasterxml.jackson.databind.JsonNode;
			
 
				+import com.fasterxml.jackson.databind.ObjectMapper;
			
 
				+import lombok.Data;
			
 
				+import lombok.extern.slf4j.Slf4j;
			
 
				+import org.springframework.beans.factory.annotation.Value;
			
 
				+import org.springframework.stereotype.Service;
			
 
				+
			
 
				+import java.nio.file.Files;
			
 
				+import java.nio.file.Path;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+import java.util.concurrent.ConcurrentHashMap;
			
 
				+
			
 
				+/**
			
 
				+ * 位置映射服务
			
 
				+ * 根据文档索引将字符位置映射到页码和行号
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-20
			
 
				+ */
			
 
				+@Slf4j
			
 
				+@Service
			
 
				+public class PositionMappingService {
			
 
				+    
			
 
				+    private final ObjectMapper objectMapper;
			
 
				+    
			
 
				+    @Value("${file.storage.text-path:/data/lingyue/texts}")
			
 
				+    private String textStoragePath;
			
 
				+    
			
 
				+    // 索引缓存，避免重复读取文件
			
 
				+    private final Map<String, DocumentIndex> indexCache = new ConcurrentHashMap<>();
			
 
				+    
			
 
				+    public PositionMappingService(ObjectMapper objectMapper) {
			
 
				+        this.objectMapper = objectMapper;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据字符位置获取完整位置信息（包含页码和行号）
			
 
				+     * 
			
 
				+     * @param documentId 文档ID
			
 
				+     * @param charStart 字符起始位置
			
 
				+     * @param charEnd 字符结束位置
			
 
				+     * @return 位置信息 Map（包含 charStart, charEnd, page, line）
			
 
				+     */
			
 
				+    public Map<String, Object> mapCharToPosition(String documentId, int charStart, int charEnd) {
			
 
				+        Map<String, Object> position = new HashMap<>();
			
 
				+        position.put("charStart", charStart);
			
 
				+        position.put("charEnd", charEnd);
			
 
				+        
			
 
				+        // 尝试加载文档索引
			
 
				+        DocumentIndex index = loadDocumentIndex(documentId);
			
 
				+        if (index == null) {
			
 
				+            log.debug("未找到文档索引，返回仅包含字符位置的信息: documentId={}", documentId);
			
 
				+            return position;
			
 
				+        }
			
 
				+        
			
 
				+        // 查找页码
			
 
				+        int page = findPage(index, charStart);
			
 
				+        position.put("page", page);
			
 
				+        
			
 
				+        // 查找行号
			
 
				+        int line = findLine(index, charStart);
			
 
				+        position.put("line", line);
			
 
				+        
			
 
				+        // 计算全局行号（如果有行索引）
			
 
				+        if (index.getLines() != null && index.getLines().length > 0) {
			
 
				+            position.put("globalLine", line);
			
 
				+        }
			
 
				+        
			
 
				+        return position;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据页码和行号获取字符位置范围
			
 
				+     * 
			
 
				+     * @param documentId 文档ID
			
 
				+     * @param page 页码
			
 
				+     * @param line 行号（页内行号）
			
 
				+     * @return 字符位置范围 [charStart, charEnd] 或 null
			
 
				+     */
			
 
				+    public int[] mapPageLineToChar(String documentId, int page, int line) {
			
 
				+        DocumentIndex index = loadDocumentIndex(documentId);
			
 
				+        if (index == null || index.getPages() == null) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        
			
 
				+        // 找到对应页
			
 
				+        PageIndex pageIndex = null;
			
 
				+        for (PageIndex p : index.getPages()) {
			
 
				+            if (p.getPage() == page) {
			
 
				+                pageIndex = p;
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        if (pageIndex == null) {
			
 
				+            return null;
			
 
				+        }
			
 
				+        
			
 
				+        // 如果有行索引，找到具体行
			
 
				+        if (index.getLines() != null) {
			
 
				+            // 计算目标全局行号
			
 
				+            int targetGlobalLine = pageIndex.getLineStart() + line - 1;
			
 
				+            for (LineIndex lineIndex : index.getLines()) {
			
 
				+                if (lineIndex.getLine() == targetGlobalLine) {
			
 
				+                    return new int[]{lineIndex.getCharStart(), lineIndex.getCharEnd()};
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        // 没有行索引，返回页的范围
			
 
				+        return new int[]{pageIndex.getCharStart(), pageIndex.getCharEnd()};
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 批量映射位置信息
			
 
				+     * 用于一次性处理多个实体的位置
			
 
				+     * 
			
 
				+     * @param documentId 文档ID
			
 
				+     * @param charPositions 字符位置列表 [[charStart1, charEnd1], [charStart2, charEnd2], ...]
			
 
				+     * @return 完整位置信息列表
			
 
				+     */
			
 
				+    public Map<String, Object>[] mapCharToPositionBatch(String documentId, int[][] charPositions) {
			
 
				+        // 预加载索引
			
 
				+        DocumentIndex index = loadDocumentIndex(documentId);
			
 
				+        
			
 
				+        @SuppressWarnings("unchecked")
			
 
				+        Map<String, Object>[] results = new Map[charPositions.length];
			
 
				+        
			
 
				+        for (int i = 0; i < charPositions.length; i++) {
			
 
				+            int charStart = charPositions[i][0];
			
 
				+            int charEnd = charPositions[i][1];
			
 
				+            
			
 
				+            Map<String, Object> position = new HashMap<>();
			
 
				+            position.put("charStart", charStart);
			
 
				+            position.put("charEnd", charEnd);
			
 
				+            
			
 
				+            if (index != null) {
			
 
				+                position.put("page", findPage(index, charStart));
			
 
				+                position.put("line", findLine(index, charStart));
			
 
				+            }
			
 
				+            
			
 
				+            results[i] = position;
			
 
				+        }
			
 
				+        
			
 
				+        return results;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 清除缓存的索引
			
 
				+     */
			
 
				+    public void clearCache(String documentId) {
			
 
				+        indexCache.remove(documentId);
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 清除所有缓存
			
 
				+     */
			
 
				+    public void clearAllCache() {
			
 
				+        indexCache.clear();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 加载文档索引
			
 
				+     */
			
 
				+    private DocumentIndex loadDocumentIndex(String documentId) {
			
 
				+        // 先检查缓存
			
 
				+        if (indexCache.containsKey(documentId)) {
			
 
				+            return indexCache.get(documentId);
			
 
				+        }
			
 
				+        
			
 
				+        // 构建索引文件路径
			
 
				+        String indexFilePath = buildIndexFilePath(documentId);
			
 
				+        Path path = Path.of(indexFilePath);
			
 
				+        
			
 
				+        if (!Files.exists(path)) {
			
 
				+            log.debug("索引文件不存在: {}", indexFilePath);
			
 
				+            return null;
			
 
				+        }
			
 
				+        
			
 
				+        try {
			
 
				+            String json = Files.readString(path);
			
 
				+            JsonNode root = objectMapper.readTree(json);
			
 
				+            
			
 
				+            DocumentIndex index = new DocumentIndex();
			
 
				+            index.setDocumentId(root.path("documentId").asText(documentId));
			
 
				+            index.setTotalChars(root.path("totalChars").asInt(0));
			
 
				+            index.setTotalLines(root.path("totalLines").asInt(0));
			
 
				+            index.setTotalPages(root.path("totalPages").asInt(0));
			
 
				+            
			
 
				+            // 解析页面索引
			
 
				+            JsonNode pagesNode = root.path("pages");
			
 
				+            if (pagesNode.isArray()) {
			
 
				+                PageIndex[] pages = new PageIndex[pagesNode.size()];
			
 
				+                for (int i = 0; i < pagesNode.size(); i++) {
			
 
				+                    JsonNode pageNode = pagesNode.get(i);
			
 
				+                    PageIndex pageIndex = new PageIndex();
			
 
				+                    pageIndex.setPage(pageNode.path("page").asInt(i + 1));
			
 
				+                    pageIndex.setCharStart(pageNode.path("charStart").asInt(0));
			
 
				+                    pageIndex.setCharEnd(pageNode.path("charEnd").asInt(0));
			
 
				+                    pageIndex.setLineStart(pageNode.path("lineStart").asInt(1));
			
 
				+                    pageIndex.setLineEnd(pageNode.path("lineEnd").asInt(1));
			
 
				+                    pages[i] = pageIndex;
			
 
				+                }
			
 
				+                index.setPages(pages);
			
 
				+            }
			
 
				+            
			
 
				+            // 解析行索引
			
 
				+            JsonNode linesNode = root.path("lines");
			
 
				+            if (linesNode.isArray()) {
			
 
				+                LineIndex[] lines = new LineIndex[linesNode.size()];
			
 
				+                for (int i = 0; i < linesNode.size(); i++) {
			
 
				+                    JsonNode lineNode = linesNode.get(i);
			
 
				+                    LineIndex lineIndex = new LineIndex();
			
 
				+                    lineIndex.setLine(lineNode.path("line").asInt(i + 1));
			
 
				+                    lineIndex.setCharStart(lineNode.path("charStart").asInt(0));
			
 
				+                    lineIndex.setCharEnd(lineNode.path("charEnd").asInt(0));
			
 
				+                    lines[i] = lineIndex;
			
 
				+                }
			
 
				+                index.setLines(lines);
			
 
				+            }
			
 
				+            
			
 
				+            // 缓存
			
 
				+            indexCache.put(documentId, index);
			
 
				+            log.debug("已加载并缓存文档索引: documentId={}, pages={}, lines={}", 
			
 
				+                    documentId, 
			
 
				+                    index.getPages() != null ? index.getPages().length : 0,
			
 
				+                    index.getLines() != null ? index.getLines().length : 0);
			
 
				+            
			
 
				+            return index;
			
 
				+        } catch (Exception e) {
			
 
				+            log.error("加载文档索引失败: documentId={}", documentId, e);
			
 
				+            return null;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 构建索引文件路径
			
 
				+     */
			
 
				+    private String buildIndexFilePath(String documentId) {
			
 
				+        return Path.of(
			
 
				+                textStoragePath,
			
 
				+                documentId.substring(0, 2),
			
 
				+                documentId + "_index.json"
			
 
				+        ).toString();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 使用二分查找页码
			
 
				+     */
			
 
				+    private int findPage(DocumentIndex index, int charPosition) {
			
 
				+        if (index.getPages() == null || index.getPages().length == 0) {
			
 
				+            return 1;
			
 
				+        }
			
 
				+        
			
 
				+        for (PageIndex page : index.getPages()) {
			
 
				+            if (charPosition >= page.getCharStart() && charPosition <= page.getCharEnd()) {
			
 
				+                return page.getPage();
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        // 如果未找到，返回最后一页
			
 
				+        return index.getPages()[index.getPages().length - 1].getPage();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 使用二分查找行号
			
 
				+     */
			
 
				+    private int findLine(DocumentIndex index, int charPosition) {
			
 
				+        if (index.getLines() == null || index.getLines().length == 0) {
			
 
				+            return 1;
			
 
				+        }
			
 
				+        
			
 
				+        // 二分查找
			
 
				+        int left = 0;
			
 
				+        int right = index.getLines().length - 1;
			
 
				+        
			
 
				+        while (left <= right) {
			
 
				+            int mid = (left + right) / 2;
			
 
				+            LineIndex line = index.getLines()[mid];
			
 
				+            
			
 
				+            if (charPosition < line.getCharStart()) {
			
 
				+                right = mid - 1;
			
 
				+            } else if (charPosition > line.getCharEnd()) {
			
 
				+                left = mid + 1;
			
 
				+            } else {
			
 
				+                return line.getLine();
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        // 如果未找到，返回最近的行
			
 
				+        if (left >= index.getLines().length) {
			
 
				+            return index.getLines()[index.getLines().length - 1].getLine();
			
 
				+        }
			
 
				+        return index.getLines()[left].getLine();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 文档索引
			
 
				+     */
			
 
				+    @Data
			
 
				+    public static class DocumentIndex {
			
 
				+        private String documentId;
			
 
				+        private PageIndex[] pages;
			
 
				+        private LineIndex[] lines;
			
 
				+        private int totalChars;
			
 
				+        private int totalLines;
			
 
				+        private int totalPages;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 页面索引
			
 
				+     */
			
 
				+    @Data
			
 
				+    public static class PageIndex {
			
 
				+        private int page;
			
 
				+        private int charStart;
			
 
				+        private int charEnd;
			
 
				+        private int lineStart;
			
 
				+        private int lineEnd;
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * 行索引
			
 
				+     */
			
 
				+    @Data
			
 
				+    public static class LineIndex {
			
 
				+        private int line;
			
 
				+        private int charStart;
			
 
				+        private int charEnd;
			
 
				+    }
			
 
				+}