|
|
@@ -0,0 +1,428 @@
|
|
|
+package com.lingyue.extract.service;
|
|
|
+
|
|
|
+import com.lingyue.extract.dto.response.VariablePreviewResponse;
|
|
|
+import com.lingyue.extract.entity.Generation;
|
|
|
+import com.lingyue.extract.entity.Variable;
|
|
|
+import com.lingyue.extract.repository.GenerationRepository;
|
|
|
+import com.lingyue.extract.repository.VariableRepository;
|
|
|
+import lombok.RequiredArgsConstructor;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.http.HttpHeaders;
|
|
|
+import org.springframework.http.MediaType;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
+import org.springframework.web.reactive.function.client.WebClient;
|
|
|
+
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 数据提取服务
|
|
|
+ *
|
|
|
+ * 负责从来源文档中提取变量值
|
|
|
+ *
|
|
|
+ * @author lingyue
|
|
|
+ * @since 2026-01-24
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Service
|
|
|
+@RequiredArgsConstructor
|
|
|
+public class ExtractionService {
|
|
|
+
|
|
|
+ private final VariableRepository variableRepository;
|
|
|
+ private final GenerationRepository generationRepository;
|
|
|
+
|
|
|
+ @Value("${ai.service.base-url:http://localhost:8080}")
|
|
|
+ private String aiServiceBaseUrl;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 预览提取结果(单个变量)
|
|
|
+ */
|
|
|
+ public VariablePreviewResponse previewExtract(String variableId, String documentId) {
|
|
|
+ Variable variable = variableRepository.selectById(variableId);
|
|
|
+ if (variable == null) {
|
|
|
+ return VariablePreviewResponse.error(variableId, null, "变量不存在");
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 根据来源类型进行提取
|
|
|
+ String extractedValue = extractValue(variable, documentId);
|
|
|
+
|
|
|
+ return VariablePreviewResponse.success(
|
|
|
+ variableId,
|
|
|
+ variable.getName(),
|
|
|
+ extractedValue,
|
|
|
+ 0.90, // 模拟置信度
|
|
|
+ "从文档中提取"
|
|
|
+ );
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("预览提取失败: variableId={}, error={}", variableId, e.getMessage());
|
|
|
+ return VariablePreviewResponse.error(variableId, variable.getName(), e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 执行生成任务的变量提取
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public void executeExtraction(String generationId) {
|
|
|
+ Generation generation = generationRepository.selectById(generationId);
|
|
|
+ if (generation == null) {
|
|
|
+ throw new RuntimeException("生成任务不存在");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取模板的变量列表
|
|
|
+ List<Variable> variables = variableRepository.findByTemplateId(generation.getTemplateId());
|
|
|
+
|
|
|
+ int total = variables.size();
|
|
|
+ int completed = 0;
|
|
|
+
|
|
|
+ for (Variable variable : variables) {
|
|
|
+ try {
|
|
|
+ // 获取来源文档ID
|
|
|
+ String documentId = null;
|
|
|
+ if (variable.getSourceFileAlias() != null && generation.getSourceFileMap() != null) {
|
|
|
+ documentId = generation.getSourceFileMap().get(variable.getSourceFileAlias());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取值
|
|
|
+ String value = extractValue(variable, documentId);
|
|
|
+
|
|
|
+ // 更新变量值
|
|
|
+ Generation.VariableValue vv = generation.getVariableValues().get(variable.getName());
|
|
|
+ if (vv != null) {
|
|
|
+ vv.setValue(value);
|
|
|
+ vv.setConfidence(0.90);
|
|
|
+ vv.setStatus(Generation.VariableValue.STATUS_EXTRACTED);
|
|
|
+ }
|
|
|
+
|
|
|
+ completed++;
|
|
|
+ generation.setProgress((int) ((completed * 100.0) / total));
|
|
|
+ generationRepository.updateById(generation);
|
|
|
+
|
|
|
+ log.debug("提取变量成功: generationId={}, variable={}, value={}",
|
|
|
+ generationId, variable.getName(), value);
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("提取变量失败: generationId={}, variable={}, error={}",
|
|
|
+ generationId, variable.getName(), e.getMessage());
|
|
|
+
|
|
|
+ Generation.VariableValue vv = generation.getVariableValues().get(variable.getName());
|
|
|
+ if (vv != null) {
|
|
|
+ vv.setStatus(Generation.VariableValue.STATUS_ERROR);
|
|
|
+ vv.setErrorMessage(e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 更新状态为待确认
|
|
|
+ generation.setStatus(Generation.STATUS_REVIEW);
|
|
|
+ generation.setProgress(100);
|
|
|
+ generationRepository.updateById(generation);
|
|
|
+
|
|
|
+ log.info("提取完成: generationId={}, total={}, completed={}", generationId, total, completed);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 根据变量配置提取值
|
|
|
+ */
|
|
|
+ private String extractValue(Variable variable, String documentId) {
|
|
|
+ String sourceType = variable.getSourceType();
|
|
|
+
|
|
|
+ switch (sourceType) {
|
|
|
+ case Variable.SOURCE_TYPE_FIXED:
|
|
|
+ return extractFixed(variable);
|
|
|
+
|
|
|
+ case Variable.SOURCE_TYPE_MANUAL:
|
|
|
+ return null; // 手动输入,不自动提取
|
|
|
+
|
|
|
+ case Variable.SOURCE_TYPE_REFERENCE:
|
|
|
+ return null; // 引用类型,需要在所有变量提取完成后处理
|
|
|
+
|
|
|
+ case Variable.SOURCE_TYPE_DOCUMENT:
|
|
|
+ default:
|
|
|
+ return extractFromDocument(variable, documentId);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 提取固定值
|
|
|
+ */
|
|
|
+ private String extractFixed(Variable variable) {
|
|
|
+ Map<String, Object> config = variable.getSourceConfig();
|
|
|
+ if (config != null && config.containsKey("fixedValue")) {
|
|
|
+ return (String) config.get("fixedValue");
|
|
|
+ }
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从文档提取
|
|
|
+ */
|
|
|
+ private String extractFromDocument(Variable variable, String documentId) {
|
|
|
+ if (documentId == null) {
|
|
|
+ log.warn("文档ID为空,使用示例值: variable={}", variable.getName());
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ String extractType = variable.getExtractType();
|
|
|
+
|
|
|
+ if (Variable.EXTRACT_TYPE_DIRECT.equals(extractType)) {
|
|
|
+ return extractDirect(variable, documentId);
|
|
|
+ } else if (Variable.EXTRACT_TYPE_AI_EXTRACT.equals(extractType)) {
|
|
|
+ return extractByAI(variable, documentId);
|
|
|
+ } else if (Variable.EXTRACT_TYPE_AI_SUMMARIZE.equals(extractType)) {
|
|
|
+ return summarizeByAI(variable, documentId);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 默认返回示例值
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 直接提取(按位置)
|
|
|
+ */
|
|
|
+ private String extractDirect(Variable variable, String documentId) {
|
|
|
+ // TODO: 调用 document-service 获取文档内容
|
|
|
+ // TODO: 根据 variable.getLocation() 定位并提取文本
|
|
|
+ log.info("直接提取: variable={}, documentId={}", variable.getName(), documentId);
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * AI提取
|
|
|
+ */
|
|
|
+ private String extractByAI(Variable variable, String documentId) {
|
|
|
+ Map<String, Object> extractConfig = variable.getExtractConfig();
|
|
|
+ if (extractConfig == null) {
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ String targetDescription = (String) extractConfig.get("targetDescription");
|
|
|
+ if (targetDescription == null) {
|
|
|
+ targetDescription = "提取 " + variable.getDisplayName();
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 构建提示词
|
|
|
+ String prompt = buildExtractionPrompt(variable, targetDescription, documentId);
|
|
|
+
|
|
|
+ // 调用AI服务
|
|
|
+ String result = callAIService(prompt);
|
|
|
+
|
|
|
+ return result != null ? result.trim() : variable.getExampleValue();
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("AI提取失败: variable={}, error={}", variable.getName(), e.getMessage());
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * AI总结
|
|
|
+ */
|
|
|
+ private String summarizeByAI(Variable variable, String documentId) {
|
|
|
+ Map<String, Object> extractConfig = variable.getExtractConfig();
|
|
|
+ if (extractConfig == null) {
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ String summarizePrompt = (String) extractConfig.get("summarizePrompt");
|
|
|
+ if (summarizePrompt == null) {
|
|
|
+ summarizePrompt = "请对以下内容进行总结";
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 构建总结提示词
|
|
|
+ String prompt = buildSummarizePrompt(variable, summarizePrompt, documentId);
|
|
|
+
|
|
|
+ // 调用AI服务
|
|
|
+ String result = callAIService(prompt);
|
|
|
+
|
|
|
+ return result != null ? result.trim() : variable.getExampleValue();
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("AI总结失败: variable={}, error={}", variable.getName(), e.getMessage());
|
|
|
+ return variable.getExampleValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 构建提取提示词
|
|
|
+ */
|
|
|
+ private String buildExtractionPrompt(Variable variable, String targetDescription, String documentId) {
|
|
|
+ StringBuilder prompt = new StringBuilder();
|
|
|
+ prompt.append("请从以下文档内容中提取信息。\n\n");
|
|
|
+ prompt.append("提取目标:").append(targetDescription).append("\n");
|
|
|
+ prompt.append("字段类型:").append(variable.getValueType()).append("\n");
|
|
|
+
|
|
|
+ Map<String, Object> extractConfig = variable.getExtractConfig();
|
|
|
+ if (extractConfig != null) {
|
|
|
+ Object expectedFormat = extractConfig.get("expectedFormat");
|
|
|
+ if (expectedFormat != null) {
|
|
|
+ prompt.append("期望格式:").append(expectedFormat).append("\n");
|
|
|
+ }
|
|
|
+ Object examples = extractConfig.get("examples");
|
|
|
+ if (examples != null) {
|
|
|
+ prompt.append("示例:").append(examples).append("\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ prompt.append("\n请只输出提取的值,不要包含其他解释。\n");
|
|
|
+ prompt.append("\n文档内容:\n");
|
|
|
+ prompt.append("[文档ID: ").append(documentId).append("]\n");
|
|
|
+ // TODO: 这里需要获取实际的文档内容
|
|
|
+ prompt.append("(文档内容待获取)");
|
|
|
+
|
|
|
+ return prompt.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 构建总结提示词
|
|
|
+ */
|
|
|
+ private String buildSummarizePrompt(Variable variable, String summarizePrompt, String documentId) {
|
|
|
+ StringBuilder prompt = new StringBuilder();
|
|
|
+ prompt.append(summarizePrompt).append("\n\n");
|
|
|
+
|
|
|
+ Map<String, Object> extractConfig = variable.getExtractConfig();
|
|
|
+ if (extractConfig != null) {
|
|
|
+ Object focusPoints = extractConfig.get("focusPoints");
|
|
|
+ if (focusPoints != null) {
|
|
|
+ prompt.append("重点关注:").append(focusPoints).append("\n");
|
|
|
+ }
|
|
|
+ Object rules = extractConfig.get("rules");
|
|
|
+ if (rules != null) {
|
|
|
+ prompt.append("总结要求:").append(rules).append("\n");
|
|
|
+ }
|
|
|
+ Object maxLength = extractConfig.get("maxLength");
|
|
|
+ if (maxLength != null) {
|
|
|
+ prompt.append("字数限制:").append(maxLength).append("字以内\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ prompt.append("\n文档内容:\n");
|
|
|
+ prompt.append("[文档ID: ").append(documentId).append("]\n");
|
|
|
+ // TODO: 这里需要获取实际的文档内容
|
|
|
+ prompt.append("(文档内容待获取)");
|
|
|
+
|
|
|
+ return prompt.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 调用AI服务
|
|
|
+ */
|
|
|
+ private String callAIService(String prompt) {
|
|
|
+ try {
|
|
|
+ WebClient webClient = WebClient.builder()
|
|
|
+ .baseUrl(aiServiceBaseUrl)
|
|
|
+ .defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ // 构建请求
|
|
|
+ Map<String, Object> request = Map.of(
|
|
|
+ "prompt", prompt,
|
|
|
+ "maxTokens", 500
|
|
|
+ );
|
|
|
+
|
|
|
+ // 调用AI接口
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ Map<String, Object> response = webClient
|
|
|
+ .post()
|
|
|
+ .uri("/api/v1/ai/complete")
|
|
|
+ .bodyValue(request)
|
|
|
+ .retrieve()
|
|
|
+ .bodyToMono(Map.class)
|
|
|
+ .block();
|
|
|
+
|
|
|
+ if (response != null && response.containsKey("data")) {
|
|
|
+ Object data = response.get("data");
|
|
|
+ if (data instanceof String) {
|
|
|
+ return (String) data;
|
|
|
+ } else if (data instanceof Map) {
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ Map<String, Object> dataMap = (Map<String, Object>) data;
|
|
|
+ return (String) dataMap.get("content");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("调用AI服务失败: {}", e.getMessage());
|
|
|
+ throw new RuntimeException("AI服务调用失败: " + e.getMessage(), e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 处理引用类型变量
|
|
|
+ * 在所有基础变量提取完成后调用
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public void processReferenceVariables(String generationId) {
|
|
|
+ Generation generation = generationRepository.selectById(generationId);
|
|
|
+ if (generation == null) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ List<Variable> refVariables = variableRepository.findByTemplateIdAndSourceType(
|
|
|
+ generation.getTemplateId(), Variable.SOURCE_TYPE_REFERENCE);
|
|
|
+
|
|
|
+ for (Variable variable : refVariables) {
|
|
|
+ try {
|
|
|
+ String value = processReference(variable, generation.getVariableValues());
|
|
|
+
|
|
|
+ Generation.VariableValue vv = generation.getVariableValues().get(variable.getName());
|
|
|
+ if (vv != null) {
|
|
|
+ vv.setValue(value);
|
|
|
+ vv.setStatus(Generation.VariableValue.STATUS_EXTRACTED);
|
|
|
+ }
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("处理引用变量失败: variable={}, error={}", variable.getName(), e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ generationRepository.updateById(generation);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 处理引用
|
|
|
+ */
|
|
|
+ private String processReference(Variable variable, Map<String, Generation.VariableValue> variableValues) {
|
|
|
+ Map<String, Object> config = variable.getSourceConfig();
|
|
|
+ if (config == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ List<String> referenceVariables = (List<String>) config.get("referenceVariables");
|
|
|
+ String combineTemplate = (String) config.get("combineTemplate");
|
|
|
+
|
|
|
+ if (referenceVariables == null || referenceVariables.isEmpty()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (combineTemplate == null) {
|
|
|
+ // 默认直接拼接
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ for (String refName : referenceVariables) {
|
|
|
+ Generation.VariableValue vv = variableValues.get(refName);
|
|
|
+ if (vv != null && vv.getValue() != null) {
|
|
|
+ sb.append(vv.getValue());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 使用模板替换
|
|
|
+ String result = combineTemplate;
|
|
|
+ for (String refName : referenceVariables) {
|
|
|
+ Generation.VariableValue vv = variableValues.get(refName);
|
|
|
+ String value = vv != null && vv.getValue() != null ? vv.getValue() : "";
|
|
|
+ result = result.replace("{" + refName + "}", value);
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+}
|