|
|
@@ -0,0 +1,229 @@
|
|
|
+package com.lingyue.graph.service;
|
|
|
+
|
|
|
+import com.lingyue.ai.client.DeepSeekClient;
|
|
|
+import com.lingyue.common.exception.ServiceException;
|
|
|
+import com.lingyue.graph.entity.TextChunk;
|
|
|
+import lombok.RequiredArgsConstructor;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Path;
|
|
|
+import java.util.List;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * RAG(检索增强生成)服务
|
|
|
+ * 核心业务逻辑:索引文档、向量检索、问答生成
|
|
|
+ *
|
|
|
+ * @author lingyue
|
|
|
+ * @since 2026-01-15
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Service
|
|
|
+@RequiredArgsConstructor
|
|
|
+public class RAGService {
|
|
|
+
|
|
|
+ private final TextChunkService textChunkService;
|
|
|
+ private final OllamaEmbeddingService ollamaEmbeddingService;
|
|
|
+ private final VectorSearchService vectorSearchService;
|
|
|
+ private final DeepSeekClient deepSeekClient;
|
|
|
+
|
|
|
+ @Value("${rag.search.top-k:3}")
|
|
|
+ private int defaultTopK;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 索引文档(分块 + 向量化)
|
|
|
+ *
|
|
|
+ * @param documentId 文档ID
|
|
|
+ * @param textStorageId 文本存储ID
|
|
|
+ * @param text 文档文本内容
|
|
|
+ * @return 索引的分块数量
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public int indexDocument(String documentId, String textStorageId, String text) {
|
|
|
+ if (text == null || text.trim().isEmpty()) {
|
|
|
+ log.warn("文本为空,跳过索引: documentId={}", documentId);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ log.info("开始索引文档: documentId={}, textLength={}", documentId, text.length());
|
|
|
+
|
|
|
+ // 1. 文本分块
|
|
|
+ List<TextChunk> chunks = textChunkService.chunkText(documentId, textStorageId, text);
|
|
|
+
|
|
|
+ if (chunks.isEmpty()) {
|
|
|
+ log.warn("文档分块为空: documentId={}", documentId);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 向量化并保存
|
|
|
+ ollamaEmbeddingService.embedBatch(chunks);
|
|
|
+
|
|
|
+ log.info("文档索引完成: documentId={}, chunks={}", documentId, chunks.size());
|
|
|
+ return chunks.size();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从文件路径索引文档
|
|
|
+ *
|
|
|
+ * @param documentId 文档ID
|
|
|
+ * @param textStorageId 文本存储ID
|
|
|
+ * @param filePath 文本文件路径
|
|
|
+ * @return 索引的分块数量
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public int indexDocumentFromFile(String documentId, String textStorageId, String filePath) {
|
|
|
+ try {
|
|
|
+ String text = Files.readString(Path.of(filePath), StandardCharsets.UTF_8);
|
|
|
+ return indexDocument(documentId, textStorageId, text);
|
|
|
+ } catch (IOException e) {
|
|
|
+ log.error("读取文件失败: {}", filePath, e);
|
|
|
+ throw new ServiceException("读取文件失败: " + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除文档索引
|
|
|
+ *
|
|
|
+ * @param documentId 文档ID
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public void deleteIndex(String documentId) {
|
|
|
+ // 删除分块会级联删除向量(外键约束)
|
|
|
+ int count = textChunkService.deleteByDocumentId(documentId);
|
|
|
+ log.info("删除文档索引: documentId={}, chunks={}", documentId, count);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * RAG 问答
|
|
|
+ *
|
|
|
+ * @param question 用户问题
|
|
|
+ * @param documentId 文档ID(可选,为null时全局检索)
|
|
|
+ * @param topK 检索数量
|
|
|
+ * @return RAG 回答结果
|
|
|
+ */
|
|
|
+ public RAGResult query(String question, String documentId, Integer topK) {
|
|
|
+ if (question == null || question.trim().isEmpty()) {
|
|
|
+ throw new ServiceException("问题不能为空");
|
|
|
+ }
|
|
|
+
|
|
|
+ int k = topK != null ? topK : defaultTopK;
|
|
|
+
|
|
|
+ log.info("RAG 查询: question='{}', documentId={}, topK={}",
|
|
|
+ question.substring(0, Math.min(50, question.length())), documentId, k);
|
|
|
+
|
|
|
+ // 1. 向量检索
|
|
|
+ List<VectorSearchService.SearchResult> searchResults =
|
|
|
+ vectorSearchService.search(question, documentId, k);
|
|
|
+
|
|
|
+ if (searchResults.isEmpty()) {
|
|
|
+ return RAGResult.builder()
|
|
|
+ .question(question)
|
|
|
+ .answer("未找到相关信息,请尝试其他问题。")
|
|
|
+ .chunks(List.of())
|
|
|
+ .build();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 构建上下文
|
|
|
+ String context = buildContext(searchResults);
|
|
|
+
|
|
|
+ // 3. 构建 Prompt 并调用 LLM
|
|
|
+ String prompt = buildPrompt(context, question);
|
|
|
+ String answer = deepSeekClient.complete(prompt);
|
|
|
+
|
|
|
+ // 4. 构建结果
|
|
|
+ List<RAGResult.ChunkInfo> chunkInfos = searchResults.stream()
|
|
|
+ .map(r -> RAGResult.ChunkInfo.builder()
|
|
|
+ .chunkId(r.getChunkId())
|
|
|
+ .documentId(r.getDocumentId())
|
|
|
+ .content(r.getContent())
|
|
|
+ .similarity(r.getSimilarity())
|
|
|
+ .build())
|
|
|
+ .collect(Collectors.toList());
|
|
|
+
|
|
|
+ RAGResult result = RAGResult.builder()
|
|
|
+ .question(question)
|
|
|
+ .answer(answer)
|
|
|
+ .chunks(chunkInfos)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ log.info("RAG 查询完成: chunks={}, answerLength={}",
|
|
|
+ searchResults.size(), answer != null ? answer.length() : 0);
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 构建上下文(拼接检索到的文本块)
|
|
|
+ */
|
|
|
+ private String buildContext(List<VectorSearchService.SearchResult> results) {
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+
|
|
|
+ for (int i = 0; i < results.size(); i++) {
|
|
|
+ VectorSearchService.SearchResult result = results.get(i);
|
|
|
+ sb.append(String.format("【片段%d】(相似度: %.2f)\n%s\n\n",
|
|
|
+ i + 1, result.getSimilarity(), result.getContent()));
|
|
|
+ }
|
|
|
+
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 构建 Prompt
|
|
|
+ */
|
|
|
+ private String buildPrompt(String context, String question) {
|
|
|
+ return String.format("""
|
|
|
+ 你是一个专业的文档分析助手。请基于以下文档内容回答用户问题。
|
|
|
+
|
|
|
+ ## 文档内容
|
|
|
+ %s
|
|
|
+
|
|
|
+ ## 用户问题
|
|
|
+ %s
|
|
|
+
|
|
|
+ ## 回答要求
|
|
|
+ 1. 仅基于文档内容回答,不要编造信息
|
|
|
+ 2. 如果文档中没有相关信息,请明确说明"文档中未找到相关信息"
|
|
|
+ 3. 回答要准确、简洁、专业
|
|
|
+ 4. 如果需要引用文档内容,请标注来源片段
|
|
|
+
|
|
|
+ 请回答:
|
|
|
+ """, context, question);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * RAG 查询结果
|
|
|
+ */
|
|
|
+ @lombok.Data
|
|
|
+ @lombok.Builder
|
|
|
+ public static class RAGResult {
|
|
|
+ /**
|
|
|
+ * 用户问题
|
|
|
+ */
|
|
|
+ private String question;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * AI 回答
|
|
|
+ */
|
|
|
+ private String answer;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 检索到的文本块
|
|
|
+ */
|
|
|
+ private List<ChunkInfo> chunks;
|
|
|
+
|
|
|
+ @lombok.Data
|
|
|
+ @lombok.Builder
|
|
|
+ public static class ChunkInfo {
|
|
|
+ private String chunkId;
|
|
|
+ private String documentId;
|
|
|
+ private String content;
|
|
|
+ private Double similarity;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|