Просмотр исходного кода

fix: prevent chunking infinite loops at text end

- Clamp overlap to a safe range
- Stop chunking once end reaches text length
- Add safety max-chunk limit calculation
何文松 1 месяц назад
Родитель
Сommit
80c7be74c5

+ 30 - 1
backend/graph-service/src/main/java/com/lingyue/graph/service/TextChunkService.java

@@ -46,6 +46,16 @@ public class TextChunkService {
         }
         }
         log.info("开始文本分块: documentId={}, textLength={}", documentId, text.length());
         log.info("开始文本分块: documentId={}, textLength={}", documentId, text.length());
 
 
+        int safeChunkOverlap = chunkOverlap;
+        if (safeChunkOverlap < 0) {
+            log.warn("分块重叠小于 0,已修正为 0: overlap={}", chunkOverlap);
+            safeChunkOverlap = 0;
+        }
+        if (safeChunkOverlap >= chunkSize) {
+            log.warn("分块重叠过大,已修正为 size-1: overlap={}, size={}", chunkOverlap, chunkSize);
+            safeChunkOverlap = Math.max(0, chunkSize - 1);
+        }
+
         // 先删除已有的分块
         // 先删除已有的分块
         textChunkRepository.deleteByDocumentId(documentId);
         textChunkRepository.deleteByDocumentId(documentId);
 
 
@@ -53,8 +63,16 @@ public class TextChunkService {
         int start = 0;
         int start = 0;
         int chunkIndex = 0;
         int chunkIndex = 0;
         int lastReportedChunk = 0;
         int lastReportedChunk = 0;
+        int prevStart = -1;
+        int maxChunks = Math.max(1, (text.length() / Math.max(1, (chunkSize - safeChunkOverlap))) + 2);
 
 
         while (start < text.length()) {
         while (start < text.length()) {
+            if (chunks.size() > maxChunks) {
+                log.warn("分块数量异常,提前终止: documentId={}, chunks={}, textLength={}, chunkSize={}, overlap={}",
+                        documentId, chunks.size(), text.length(), chunkSize, chunkOverlap);
+                break;
+            }
+
             int end = Math.min(start + chunkSize, text.length());
             int end = Math.min(start + chunkSize, text.length());
 
 
             // 尝试在句子边界分割(避免截断句子)
             // 尝试在句子边界分割(避免截断句子)
@@ -95,8 +113,19 @@ public class TextChunkService {
             textChunkRepository.insert(chunk);
             textChunkRepository.insert(chunk);
             chunks.add(chunk);
             chunks.add(chunk);
 
 
+            if (end >= text.length()) {
+                break;
+            }
+
             // 移动起始位置(考虑重叠)
             // 移动起始位置(考虑重叠)
-            start = end - chunkOverlap;
+            start = end - safeChunkOverlap;
+            if (start <= prevStart) {
+                log.warn("分块起点未推进,修正并终止: documentId={}, prevStart={}, start={}, end={}, overlap={}",
+                        documentId, prevStart, start, end, chunkOverlap);
+                start = end;
+                break;
+            }
+            prevStart = start;
             if (start >= text.length() || start <= 0) {
             if (start >= text.length() || start <= 0) {
                 break;
                 break;
             }
             }