Преглед на файлове

feat: add batch embedding config for RAG

Process embeddings in configurable batches to reduce
memory pressure during RAG indexing. Adds
rag.embedding.batch-size config.
何文松 преди 1 месец
родител
ревизия
a128eda8ef

+ 21 - 13
backend/graph-service/src/main/java/com/lingyue/graph/service/OllamaEmbeddingService.java

@@ -31,6 +31,9 @@ public class OllamaEmbeddingService {
     @Value("${ollama.embedding.model:nomic-embed-text}")
     private String embeddingModel;
 
+    @Value("${rag.embedding.batch-size:10}")
+    private int embeddingBatchSize;
+
     /**
      * 对单个文本进行向量化
      *
@@ -138,21 +141,26 @@ public class OllamaEmbeddingService {
         int processed = 0;
         int failed = 0;
 
-        for (TextChunk chunk : chunks) {
-            try {
-                VectorEmbedding embedding = embedAndSave(chunk);
-                embeddings.add(embedding);
-                processed++;
-            } catch (Exception e) {
-                log.error("分块 {} 向量化失败: {}", chunk.getId(), e.getMessage());
-                // 继续处理其他分块
-                failed++;
+        int batchSize = Math.max(1, embeddingBatchSize);
+        for (int batchStart = 0; batchStart < chunks.size(); batchStart += batchSize) {
+            int batchEnd = Math.min(batchStart + batchSize, chunks.size());
+            List<TextChunk> batch = chunks.subList(batchStart, batchEnd);
+            log.info("向量化批次开始: batch={}-{}, total={}", batchStart + 1, batchEnd, chunks.size());
+
+            for (TextChunk chunk : batch) {
+                try {
+                    VectorEmbedding embedding = embedAndSave(chunk);
+                    embeddings.add(embedding);
+                    processed++;
+                } catch (Exception e) {
+                    log.error("分块 {} 向量化失败: {}", chunk.getId(), e.getMessage());
+                    // 继续处理其他分块
+                    failed++;
+                }
             }
 
-            if (processed % 10 == 0 || processed + failed == chunks.size()) {
-                log.info("向量化进度: {}/{} (成功: {}, 失败: {})",
-                        processed + failed, chunks.size(), processed, failed);
-            }
+            log.info("向量化批次完成: processed={}/{} (成功: {}, 失败: {})",
+                    processed + failed, chunks.size(), processed, failed);
         }
 
         log.info("批量向量化完成,成功 {}/{} 块", embeddings.size(), chunks.size());

+ 4 - 1
backend/lingyue-starter/src/main/resources/application.properties

@@ -88,9 +88,12 @@ ollama.embedding.model=${OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
 ollama.timeout=60000
 
 # RAG 分块配置
-rag.chunk.size=500
+rag.chunk.size=1200
 rag.chunk.overlap=50
 
+# 向量化批处理配置
+rag.embedding.batch-size=10
+
 # RAG 检索配置
 rag.search.top-k=3