Kaynağa Gözat

feat: add progress logs for chunking and embedding

Emit periodic progress logs during text chunking and
Ollama embedding to make RAG indexing progress visible.
何文松 1 ay önce
ebeveyn
işleme
1dad89ed93

+ 9 - 0
backend/graph-service/src/main/java/com/lingyue/graph/service/OllamaEmbeddingService.java

@@ -135,14 +135,23 @@ public class OllamaEmbeddingService {
     @Transactional
     public List<VectorEmbedding> embedBatch(List<TextChunk> chunks) {
         List<VectorEmbedding> embeddings = new ArrayList<>();
+        int processed = 0;
+        int failed = 0;
 
         for (TextChunk chunk : chunks) {
             try {
                 VectorEmbedding embedding = embedAndSave(chunk);
                 embeddings.add(embedding);
+                processed++;
             } catch (Exception e) {
                 log.error("分块 {} 向量化失败: {}", chunk.getId(), e.getMessage());
                 // 继续处理其他分块
+                failed++;
+            }
+
+            if (processed % 10 == 0 || processed + failed == chunks.size()) {
+                log.info("向量化进度: {}/{} (成功: {}, 失败: {})",
+                        processed + failed, chunks.size(), processed, failed);
             }
         }
 

+ 9 - 0
backend/graph-service/src/main/java/com/lingyue/graph/service/TextChunkService.java

@@ -44,6 +44,7 @@ public class TextChunkService {
             log.warn("文本为空,跳过分块: documentId={}", documentId);
             return Collections.emptyList();
         }
+        log.info("开始文本分块: documentId={}, textLength={}", documentId, text.length());
 
         // 先删除已有的分块
         textChunkRepository.deleteByDocumentId(documentId);
@@ -51,6 +52,7 @@ public class TextChunkService {
         List<TextChunk> chunks = new ArrayList<>();
         int start = 0;
         int chunkIndex = 0;
+        int lastReportedChunk = 0;
 
         while (start < text.length()) {
             int end = Math.min(start + chunkSize, text.length());
@@ -98,6 +100,13 @@ public class TextChunkService {
             if (start >= text.length() || start <= 0) {
                 break;
             }
+
+            // 进度日志:每 20 个分块输出一次
+            if (chunks.size() - lastReportedChunk >= 20) {
+                log.info("文本分块进度: documentId={}, chunks={}, pos={}/{}",
+                        documentId, chunks.size(), end, text.length());
+                lastReportedChunk = chunks.size();
+            }
         }
 
         log.info("文档 {} 分块完成,共 {} 块", documentId, chunks.size());