1 mese fa · cccd5afa42
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/AIExtractConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/AIExtractConfig.java
@@ -0,0 +1,49 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * AI字段提取配置
			
 
				+ * 
			
 
				+ * 用于 extractType = "ai_extract" 的规则
			
 
				+ * 使用AI从内容中提取特定字段
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "AI字段提取配置")
			
 
				+public class AIExtractConfig {
			
 
				+    
			
 
				+    @Schema(description = "提取目标描述，如'工程名称'、'建设日期'")
			
 
				+    private String targetDescription;
			
 
				+    
			
 
				+    @Schema(description = "字段类型: text-文本, date-日期, number-数字, amount-金额, list-列表")
			
 
				+    private String fieldType;
			
 
				+    
			
 
				+    @Schema(description = "预期格式，如'YYYY-MM-DD'、'XX万元'")
			
 
				+    private String expectedFormat;
			
 
				+    
			
 
				+    @Schema(description = "示例值列表，帮助AI理解")
			
 
				+    private List<String> examples;
			
 
				+    
			
 
				+    @Schema(description = "额外的提取说明")
			
 
				+    private String additionalInstructions;
			
 
				+    
			
 
				+    @Schema(description = "使用的AI模型（可选，默认使用项目配置）")
			
 
				+    private String modelName;
			
 
				+    
			
 
				+    @Schema(description = "温度参数（控制随机性，0-1）")
			
 
				+    private Double temperature;
			
 
				+    
			
 
				+    // ==================== 字段类型常量 ====================
			
 
				+    
			
 
				+    public static final String FIELD_TYPE_TEXT = "text";
			
 
				+    public static final String FIELD_TYPE_DATE = "date";
			
 
				+    public static final String FIELD_TYPE_NUMBER = "number";
			
 
				+    public static final String FIELD_TYPE_AMOUNT = "amount";
			
 
				+    public static final String FIELD_TYPE_LIST = "list";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/AISummarizeConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/AISummarizeConfig.java
@@ -0,0 +1,50 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * AI总结配置
			
 
				+ * 
			
 
				+ * 用于 extractType = "ai_summarize" 的规则
			
 
				+ * 使用AI对内容进行总结、提炼
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "AI总结配置")
			
 
				+public class AISummarizeConfig {
			
 
				+    
			
 
				+    @Schema(description = "总结要求/提示词")
			
 
				+    private String summarizePrompt;
			
 
				+    
			
 
				+    @Schema(description = "关注维度列表")
			
 
				+    private List<String> focusPoints;
			
 
				+    
			
 
				+    @Schema(description = "总结规则（如：必须包含XXX，不要提及XXX）")
			
 
				+    private List<String> rules;
			
 
				+    
			
 
				+    @Schema(description = "输出风格: formal-正式, concise-简洁, detailed-详细")
			
 
				+    private String style;
			
 
				+    
			
 
				+    @Schema(description = "最大字数限制")
			
 
				+    private Integer maxLength;
			
 
				+    
			
 
				+    @Schema(description = "上下文字段Key列表（引用已提取的字段作为上下文）")
			
 
				+    private List<String> contextFieldKeys;
			
 
				+    
			
 
				+    @Schema(description = "使用的AI模型（可选）")
			
 
				+    private String modelName;
			
 
				+    
			
 
				+    @Schema(description = "温度参数")
			
 
				+    private Double temperature;
			
 
				+    
			
 
				+    // ==================== 风格常量 ====================
			
 
				+    
			
 
				+    public static final String STYLE_FORMAL = "formal";
			
 
				+    public static final String STYLE_CONCISE = "concise";
			
 
				+    public static final String STYLE_DETAILED = "detailed";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/DirectExtractConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/DirectExtractConfig.java
@@ -0,0 +1,36 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+/**
			
 
				+ * 直接提取配置
			
 
				+ * 
			
 
				+ * 用于 extractType = "direct" 的规则
			
 
				+ * 直接使用定位到的内容，不进行AI处理
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "直接提取配置")
			
 
				+public class DirectExtractConfig {
			
 
				+    
			
 
				+    @Schema(description = "去除首尾空白")
			
 
				+    private Boolean trimWhitespace;
			
 
				+    
			
 
				+    @Schema(description = "移除换行符")
			
 
				+    private Boolean removeLineBreaks;
			
 
				+    
			
 
				+    @Schema(description = "合并连续空格")
			
 
				+    private Boolean mergeSpaces;
			
 
				+    
			
 
				+    @Schema(description = "正则提取模式（可选，用于从内容中提取特定部分）")
			
 
				+    private String regexPattern;
			
 
				+    
			
 
				+    @Schema(description = "正则提取分组索引（默认0表示整个匹配）")
			
 
				+    private Integer regexGroupIndex;
			
 
				+    
			
 
				+    @Schema(description = "值转换配置")
			
 
				+    private TransformConfig transform;
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/DocumentSourceConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/DocumentSourceConfig.java
@@ -0,0 +1,49 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+/**
			
 
				+ * 文档来源配置
			
 
				+ * 
			
 
				+ * 用于 sourceType = "document" 的规则
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "文档来源配置")
			
 
				+public class DocumentSourceConfig {
			
 
				+    
			
 
				+    @Schema(description = "来源文档ID（SourceDocument.id）")
			
 
				+    private String sourceDocId;
			
 
				+    
			
 
				+    @Schema(description = "定位配置")
			
 
				+    private LocationConfig location;
			
 
				+    
			
 
				+    @Schema(description = "是否包含整个定位范围的内容")
			
 
				+    private Boolean includeFullRange;
			
 
				+    
			
 
				+    @Schema(description = "内容预处理选项")
			
 
				+    private PreprocessConfig preprocess;
			
 
				+    
			
 
				+    /**
			
 
				+     * 内容预处理配置
			
 
				+     */
			
 
				+    @Data
			
 
				+    @Schema(description = "内容预处理配置")
			
 
				+    public static class PreprocessConfig {
			
 
				+        
			
 
				+        @Schema(description = "去除首尾空白")
			
 
				+        private Boolean trimWhitespace;
			
 
				+        
			
 
				+        @Schema(description = "移除换行符")
			
 
				+        private Boolean removeLineBreaks;
			
 
				+        
			
 
				+        @Schema(description = "合并连续空格")
			
 
				+        private Boolean mergeSpaces;
			
 
				+        
			
 
				+        @Schema(description = "最大内容长度（用于AI调用时截断）")
			
 
				+        private Integer maxLength;
			
 
				+    }
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/FixedSourceConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/FixedSourceConfig.java
@@ -0,0 +1,30 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+/**
			
 
				+ * 固定内容来源配置
			
 
				+ * 
			
 
				+ * 用于 sourceType = "fixed" 的规则
			
 
				+ * 直接使用固定的内容值
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "固定内容来源配置")
			
 
				+public class FixedSourceConfig {
			
 
				+    
			
 
				+    @Schema(description = "固定内容")
			
 
				+    private String fixedContent;
			
 
				+    
			
 
				+    @Schema(description = "内容类型: text-文本, html-HTML, markdown-Markdown")
			
 
				+    private String contentType;
			
 
				+    
			
 
				+    // ==================== 内容类型常量 ====================
			
 
				+    
			
 
				+    public static final String CONTENT_TYPE_TEXT = "text";
			
 
				+    public static final String CONTENT_TYPE_HTML = "html";
			
 
				+    public static final String CONTENT_TYPE_MARKDOWN = "markdown";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/LocationConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/LocationConfig.java
@@ -0,0 +1,61 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 定位配置
			
 
				+ * 
			
 
				+ * 描述如何在文档中定位内容
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "定位配置")
			
 
				+public class LocationConfig {
			
 
				+    
			
 
				+    @Schema(description = "定位类型: page-按页码, chapter-按章节, element-按元素ID, cell-按单元格")
			
 
				+    private String type;
			
 
				+    
			
 
				+    // ==================== 按页码定位 ====================
			
 
				+    
			
 
				+    @Schema(description = "起始页码")
			
 
				+    private Integer pageStart;
			
 
				+    
			
 
				+    @Schema(description = "结束页码")
			
 
				+    private Integer pageEnd;
			
 
				+    
			
 
				+    @Schema(description = "段落关键词过滤")
			
 
				+    private String paragraphKeyword;
			
 
				+    
			
 
				+    // ==================== 按章节定位 ====================
			
 
				+    
			
 
				+    @Schema(description = "章节路径，如 ['3', '5', '3', '3'] 表示 3.5.3.3")
			
 
				+    private List<String> chapterPath;
			
 
				+    
			
 
				+    @Schema(description = "章节标题关键词")
			
 
				+    private String chapterTitle;
			
 
				+    
			
 
				+    // ==================== 按元素ID定位 ====================
			
 
				+    
			
 
				+    @Schema(description = "元素ID列表")
			
 
				+    private List<String> elementIds;
			
 
				+    
			
 
				+    // ==================== 按单元格定位（Excel） ====================
			
 
				+    
			
 
				+    @Schema(description = "Sheet名称")
			
 
				+    private String sheetName;
			
 
				+    
			
 
				+    @Schema(description = "单元格引用，如 'A1:C10' 或 '1.5.1'")
			
 
				+    private String cellRef;
			
 
				+    
			
 
				+    // ==================== 定位类型常量 ====================
			
 
				+    
			
 
				+    public static final String TYPE_PAGE = "page";
			
 
				+    public static final String TYPE_CHAPTER = "chapter";
			
 
				+    public static final String TYPE_ELEMENT = "element";
			
 
				+    public static final String TYPE_CELL = "cell";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/ManualSourceConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/ManualSourceConfig.java
@@ -0,0 +1,43 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+/**
			
 
				+ * 手动输入来源配置
			
 
				+ * 
			
 
				+ * 用于 sourceType = "manual" 的规则
			
 
				+ * 需要用户手动填写内容
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "手动输入来源配置")
			
 
				+public class ManualSourceConfig {
			
 
				+    
			
 
				+    @Schema(description = "输入提示")
			
 
				+    private String placeholder;
			
 
				+    
			
 
				+    @Schema(description = "默认值")
			
 
				+    private String defaultValue;
			
 
				+    
			
 
				+    @Schema(description = "是否必填")
			
 
				+    private Boolean required;
			
 
				+    
			
 
				+    @Schema(description = "验证规则（正则表达式）")
			
 
				+    private String validationPattern;
			
 
				+    
			
 
				+    @Schema(description = "验证错误提示")
			
 
				+    private String validationMessage;
			
 
				+    
			
 
				+    @Schema(description = "输入类型: text-单行文本, textarea-多行文本, date-日期, number-数字")
			
 
				+    private String inputType;
			
 
				+    
			
 
				+    // ==================== 输入类型常量 ====================
			
 
				+    
			
 
				+    public static final String INPUT_TYPE_TEXT = "text";
			
 
				+    public static final String INPUT_TYPE_TEXTAREA = "textarea";
			
 
				+    public static final String INPUT_TYPE_DATE = "date";
			
 
				+    public static final String INPUT_TYPE_NUMBER = "number";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/OcrExtractConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/OcrExtractConfig.java
@@ -0,0 +1,58 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+/**
			
 
				+ * OCR提取配置
			
 
				+ * 
			
 
				+ * 用于 extractType = "ocr" 的规则
			
 
				+ * 使用OCR识别图片中的文字
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "OCR提取配置")
			
 
				+public class OcrExtractConfig {
			
 
				+    
			
 
				+    @Schema(description = "OCR引擎: paddleocr-飞桨OCR, tesseract-Tesseract")
			
 
				+    private String ocrEngine;
			
 
				+    
			
 
				+    @Schema(description = "识别语言: chi_sim-简体中文, eng-英文")
			
 
				+    private String language;
			
 
				+    
			
 
				+    @Schema(description = "是否进行表格识别")
			
 
				+    private Boolean detectTable;
			
 
				+    
			
 
				+    @Schema(description = "是否进行版面分析")
			
 
				+    private Boolean layoutAnalysis;
			
 
				+    
			
 
				+    @Schema(description = "图片预处理选项")
			
 
				+    private PreprocessOptions preprocessOptions;
			
 
				+    
			
 
				+    /**
			
 
				+     * 图片预处理选项
			
 
				+     */
			
 
				+    @Data
			
 
				+    @Schema(description = "图片预处理选项")
			
 
				+    public static class PreprocessOptions {
			
 
				+        
			
 
				+        @Schema(description = "是否转为灰度图")
			
 
				+        private Boolean grayscale;
			
 
				+        
			
 
				+        @Schema(description = "是否进行二值化")
			
 
				+        private Boolean binarize;
			
 
				+        
			
 
				+        @Schema(description = "是否去噪")
			
 
				+        private Boolean denoise;
			
 
				+        
			
 
				+        @Schema(description = "是否校正倾斜")
			
 
				+        private Boolean deskew;
			
 
				+    }
			
 
				+    
			
 
				+    // ==================== OCR引擎常量 ====================
			
 
				+    
			
 
				+    public static final String ENGINE_PADDLEOCR = "paddleocr";
			
 
				+    public static final String ENGINE_TESSERACT = "tesseract";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/SelfReferenceSourceConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/SelfReferenceSourceConfig.java
@@ -0,0 +1,35 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 自引用来源配置
			
 
				+ * 
			
 
				+ * 用于 sourceType = "self_reference" 的规则
			
 
				+ * 引用已提取的其他字段值
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "自引用来源配置")
			
 
				+public class SelfReferenceSourceConfig {
			
 
				+    
			
 
				+    @Schema(description = "引用的字段Key列表")
			
 
				+    private List<String> referenceFieldKeys;
			
 
				+    
			
 
				+    @Schema(description = "组合模板，如 '{project_name} - {project_code}'")
			
 
				+    private String combineTemplate;
			
 
				+    
			
 
				+    @Schema(description = "值转换配置")
			
 
				+    private TransformConfig transform;
			
 
				+    
			
 
				+    @Schema(description = "当引用字段为空时的默认值")
			
 
				+    private String defaultValue;
			
 
				+    
			
 
				+    @Schema(description = "是否必须所有引用字段都有值")
			
 
				+    private Boolean requireAll;
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/TransformConfig.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/dto/config/TransformConfig.java
@@ -0,0 +1,44 @@
 
				+package com.lingyue.extract.dto.config;
			
 
				+
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 转换配置
			
 
				+ * 
			
 
				+ * 描述如何对值进行转换
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@Schema(description = "转换配置")
			
 
				+public class TransformConfig {
			
 
				+    
			
 
				+    @Schema(description = "转换类型: mapping-映射, format-格式化, concat-拼接, split-分割")
			
 
				+    private String type;
			
 
				+    
			
 
				+    @Schema(description = "映射表，如 {'是': 'true', '否': 'false'}")
			
 
				+    private Map<String, String> mapping;
			
 
				+    
			
 
				+    @Schema(description = "格式化模板，如 '{value}万元'")
			
 
				+    private String formatTemplate;
			
 
				+    
			
 
				+    @Schema(description = "拼接分隔符")
			
 
				+    private String concatSeparator;
			
 
				+    
			
 
				+    @Schema(description = "分割分隔符")
			
 
				+    private String splitSeparator;
			
 
				+    
			
 
				+    @Schema(description = "取分割后的第几个（0-based）")
			
 
				+    private Integer splitIndex;
			
 
				+    
			
 
				+    // ==================== 转换类型常量 ====================
			
 
				+    
			
 
				+    public static final String TYPE_MAPPING = "mapping";
			
 
				+    public static final String TYPE_FORMAT = "format";
			
 
				+    public static final String TYPE_CONCAT = "concat";
			
 
				+    public static final String TYPE_SPLIT = "split";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/entity/ExtractResult.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/entity/ExtractResult.java
@@ -0,0 +1,118 @@
 
				+package com.lingyue.extract.entity;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.annotation.TableField;
			
 
				+import com.baomidou.mybatisplus.annotation.TableId;
			
 
				+import com.baomidou.mybatisplus.annotation.TableName;
			
 
				+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
			
 
				+import com.fasterxml.jackson.annotation.JsonFormat;
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.math.BigDecimal;
			
 
				+import java.util.Date;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 提取结果实体
			
 
				+ * 
			
 
				+ * 规则执行后的提取值，保存来源追溯信息
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@TableName(value = "extract_results", autoResultMap = true)
			
 
				+@Schema(description = "提取结果")
			
 
				+public class ExtractResult {
			
 
				+    
			
 
				+    @Schema(description = "ID")
			
 
				+    @TableId
			
 
				+    private String id;
			
 
				+    
			
 
				+    @Schema(description = "规则ID")
			
 
				+    @TableField("rule_id")
			
 
				+    private String ruleId;
			
 
				+    
			
 
				+    @Schema(description = "项目ID")
			
 
				+    @TableField("project_id")
			
 
				+    private String projectId;
			
 
				+    
			
 
				+    // ==================== 提取结果 ====================
			
 
				+    
			
 
				+    @Schema(description = "提取出的值")
			
 
				+    @TableField("extracted_value")
			
 
				+    private String extractedValue;
			
 
				+    
			
 
				+    @Schema(description = "值类型: text-文本, table-表格, image-图片, list-列表")
			
 
				+    @TableField("value_type")
			
 
				+    private String valueType;
			
 
				+    
			
 
				+    // ==================== 来源追溯 ====================
			
 
				+    
			
 
				+    @Schema(description = "来源原文内容")
			
 
				+    @TableField("source_content")
			
 
				+    private String sourceContent;
			
 
				+    
			
 
				+    @Schema(description = "来源位置信息")
			
 
				+    @TableField(value = "source_location", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> sourceLocation;
			
 
				+    
			
 
				+    // ==================== 质量评估 ====================
			
 
				+    
			
 
				+    @Schema(description = "AI提取置信度 0-1")
			
 
				+    @TableField("confidence")
			
 
				+    private BigDecimal confidence;
			
 
				+    
			
 
				+    // ==================== 状态 ====================
			
 
				+    
			
 
				+    @Schema(description = "状态: extracted-已提取, confirmed-已确认, rejected-已拒绝, modified-已修正")
			
 
				+    @TableField("status")
			
 
				+    private String status;
			
 
				+    
			
 
				+    // ==================== 人工处理 ====================
			
 
				+    
			
 
				+    @Schema(description = "人工修正后的值")
			
 
				+    @TableField("modified_value")
			
 
				+    private String modifiedValue;
			
 
				+    
			
 
				+    @Schema(description = "确认时间")
			
 
				+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
			
 
				+    @TableField("confirmed_at")
			
 
				+    private Date confirmedAt;
			
 
				+    
			
 
				+    @Schema(description = "确认人")
			
 
				+    @TableField("confirmed_by")
			
 
				+    private String confirmedBy;
			
 
				+    
			
 
				+    @Schema(description = "拒绝原因")
			
 
				+    @TableField("reject_reason")
			
 
				+    private String rejectReason;
			
 
				+    
			
 
				+    // ==================== 元数据 ====================
			
 
				+    
			
 
				+    @Schema(description = "元数据（AI输出、处理日志等）")
			
 
				+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> metadata;
			
 
				+    
			
 
				+    @Schema(description = "创建时间")
			
 
				+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
			
 
				+    @TableField("create_time")
			
 
				+    private Date createTime;
			
 
				+    
			
 
				+    // ==================== 状态常量 ====================
			
 
				+    
			
 
				+    public static final String STATUS_EXTRACTED = "extracted";
			
 
				+    public static final String STATUS_CONFIRMED = "confirmed";
			
 
				+    public static final String STATUS_REJECTED = "rejected";
			
 
				+    public static final String STATUS_MODIFIED = "modified";
			
 
				+    
			
 
				+    /**
			
 
				+     * 获取最终值（优先使用修正后的值）
			
 
				+     */
			
 
				+    public String getFinalValue() {
			
 
				+        if (modifiedValue != null && !modifiedValue.isEmpty()) {
			
 
				+            return modifiedValue;
			
 
				+        }
			
 
				+        return extractedValue;
			
 
				+    }
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/entity/ExtractRule.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/entity/ExtractRule.java
@@ -0,0 +1,125 @@
 
				+package com.lingyue.extract.entity;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.annotation.TableField;
			
 
				+import com.baomidou.mybatisplus.annotation.TableName;
			
 
				+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
			
 
				+import com.lingyue.common.domain.entity.SimpleModel;
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+import lombok.EqualsAndHashCode;
			
 
				+
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 提取规则实体
			
 
				+ * 
			
 
				+ * 描述如何从来源文档中提取数据的配置
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@EqualsAndHashCode(callSuper = true)
			
 
				+@Data
			
 
				+@TableName(value = "extract_rules", autoResultMap = true)
			
 
				+@Schema(description = "数据提取规则")
			
 
				+public class ExtractRule extends SimpleModel {
			
 
				+    
			
 
				+    @Schema(description = "项目ID")
			
 
				+    @TableField("project_id")
			
 
				+    private String projectId;
			
 
				+    
			
 
				+    @Schema(description = "来源文档ID（可为空，表示引用/固定/手动类型）")
			
 
				+    @TableField("source_doc_id")
			
 
				+    private String sourceDocId;
			
 
				+    
			
 
				+    // ==================== 目标字段 ====================
			
 
				+    
			
 
				+    @Schema(description = "目标字段Key（程序用）")
			
 
				+    @TableField("target_field_key")
			
 
				+    private String targetFieldKey;
			
 
				+    
			
 
				+    @Schema(description = "目标字段名称（显示用）")
			
 
				+    @TableField("target_field_name")
			
 
				+    private String targetFieldName;
			
 
				+    
			
 
				+    @Schema(description = "字段分组")
			
 
				+    @TableField("target_field_group")
			
 
				+    private String targetFieldGroup;
			
 
				+    
			
 
				+    @Schema(description = "规则顺序")
			
 
				+    @TableField("rule_index")
			
 
				+    private Integer ruleIndex;
			
 
				+    
			
 
				+    // ==================== 来源配置 ====================
			
 
				+    
			
 
				+    @Schema(description = "来源类型: document-文档, self_reference-引用已提取字段, fixed-固定值, manual-手动输入")
			
 
				+    @TableField("source_type")
			
 
				+    private String sourceType;
			
 
				+    
			
 
				+    @Schema(description = "来源配置")
			
 
				+    @TableField(value = "source_config", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> sourceConfig;
			
 
				+    
			
 
				+    // ==================== 提取配置 ====================
			
 
				+    
			
 
				+    @Schema(description = "提取类型: direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结, ocr-OCR识别")
			
 
				+    @TableField("extract_type")
			
 
				+    private String extractType;
			
 
				+    
			
 
				+    @Schema(description = "提取配置")
			
 
				+    @TableField(value = "extract_config", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> extractConfig;
			
 
				+    
			
 
				+    // ==================== 结果 ====================
			
 
				+    
			
 
				+    @Schema(description = "状态: pending-待提取, extracting-提取中, extracted-已提取, confirmed-已确认, error-错误")
			
 
				+    @TableField("status")
			
 
				+    private String status;
			
 
				+    
			
 
				+    @Schema(description = "提取出的值")
			
 
				+    @TableField("extracted_value")
			
 
				+    private String extractedValue;
			
 
				+    
			
 
				+    @Schema(description = "值类型: text-文本, table-表格, image-图片, list-列表")
			
 
				+    @TableField("value_type")
			
 
				+    private String valueType;
			
 
				+    
			
 
				+    @Schema(description = "错误信息")
			
 
				+    @TableField("error_message")
			
 
				+    private String errorMessage;
			
 
				+    
			
 
				+    // ==================== 元数据 ====================
			
 
				+    
			
 
				+    @Schema(description = "元数据")
			
 
				+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> metadata;
			
 
				+    
			
 
				+    // ==================== 来源类型常量 ====================
			
 
				+    
			
 
				+    public static final String SOURCE_TYPE_DOCUMENT = "document";
			
 
				+    public static final String SOURCE_TYPE_SELF_REFERENCE = "self_reference";
			
 
				+    public static final String SOURCE_TYPE_FIXED = "fixed";
			
 
				+    public static final String SOURCE_TYPE_MANUAL = "manual";
			
 
				+    
			
 
				+    // ==================== 提取类型常量 ====================
			
 
				+    
			
 
				+    public static final String EXTRACT_TYPE_DIRECT = "direct";
			
 
				+    public static final String EXTRACT_TYPE_AI_EXTRACT = "ai_extract";
			
 
				+    public static final String EXTRACT_TYPE_AI_SUMMARIZE = "ai_summarize";
			
 
				+    public static final String EXTRACT_TYPE_OCR = "ocr";
			
 
				+    
			
 
				+    // ==================== 状态常量 ====================
			
 
				+    
			
 
				+    public static final String STATUS_PENDING = "pending";
			
 
				+    public static final String STATUS_EXTRACTING = "extracting";
			
 
				+    public static final String STATUS_EXTRACTED = "extracted";
			
 
				+    public static final String STATUS_CONFIRMED = "confirmed";
			
 
				+    public static final String STATUS_ERROR = "error";
			
 
				+    
			
 
				+    // ==================== 值类型常量 ====================
			
 
				+    
			
 
				+    public static final String VALUE_TYPE_TEXT = "text";
			
 
				+    public static final String VALUE_TYPE_TABLE = "table";
			
 
				+    public static final String VALUE_TYPE_IMAGE = "image";
			
 
				+    public static final String VALUE_TYPE_LIST = "list";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/entity/Project.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/entity/Project.java
@@ -0,0 +1,53 @@
 
				+package com.lingyue.extract.entity;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.annotation.TableField;
			
 
				+import com.baomidou.mybatisplus.annotation.TableName;
			
 
				+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
			
 
				+import com.lingyue.common.domain.entity.SimpleModel;
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+import lombok.EqualsAndHashCode;
			
 
				+
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 数据提取项目实体
			
 
				+ * 
			
 
				+ * 一个项目代表一个报告生成任务，包含多个来源文档和提取规则
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@EqualsAndHashCode(callSuper = true)
			
 
				+@Data
			
 
				+@TableName(value = "extract_projects", autoResultMap = true)
			
 
				+@Schema(description = "数据提取项目")
			
 
				+public class Project extends SimpleModel {
			
 
				+    
			
 
				+    @Schema(description = "用户ID")
			
 
				+    @TableField("user_id")
			
 
				+    private String userId;
			
 
				+    
			
 
				+    @Schema(description = "项目名称")
			
 
				+    @TableField("name")
			
 
				+    private String name;
			
 
				+    
			
 
				+    @Schema(description = "项目描述")
			
 
				+    @TableField("description")
			
 
				+    private String description;
			
 
				+    
			
 
				+    @Schema(description = "状态: draft-草稿, extracting-提取中, completed-已完成, archived-已归档")
			
 
				+    @TableField("status")
			
 
				+    private String status;
			
 
				+    
			
 
				+    @Schema(description = "项目配置")
			
 
				+    @TableField(value = "config", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> config;
			
 
				+    
			
 
				+    // ==================== 状态常量 ====================
			
 
				+    
			
 
				+    public static final String STATUS_DRAFT = "draft";
			
 
				+    public static final String STATUS_EXTRACTING = "extracting";
			
 
				+    public static final String STATUS_COMPLETED = "completed";
			
 
				+    public static final String STATUS_ARCHIVED = "archived";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleTemplate.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleTemplate.java
@@ -0,0 +1,67 @@
 
				+package com.lingyue.extract.entity;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.annotation.TableField;
			
 
				+import com.baomidou.mybatisplus.annotation.TableName;
			
 
				+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
			
 
				+import com.lingyue.common.domain.entity.SimpleModel;
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+import lombok.EqualsAndHashCode;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 规则模板实体
			
 
				+ * 
			
 
				+ * 用于保存和复用规则配置
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@EqualsAndHashCode(callSuper = true)
			
 
				+@Data
			
 
				+@TableName(value = "extract_rule_templates", autoResultMap = true)
			
 
				+@Schema(description = "规则模板")
			
 
				+public class RuleTemplate extends SimpleModel {
			
 
				+    
			
 
				+    @Schema(description = "用户ID")
			
 
				+    @TableField("user_id")
			
 
				+    private String userId;
			
 
				+    
			
 
				+    @Schema(description = "模板名称")
			
 
				+    @TableField("name")
			
 
				+    private String name;
			
 
				+    
			
 
				+    @Schema(description = "模板描述")
			
 
				+    @TableField("description")
			
 
				+    private String description;
			
 
				+    
			
 
				+    // ==================== 模板内容 ====================
			
 
				+    
			
 
				+    @Schema(description = "规则配置快照")
			
 
				+    @TableField(value = "rules_snapshot", typeHandler = JacksonTypeHandler.class)
			
 
				+    private List<Map<String, Object>> rulesSnapshot;
			
 
				+    
			
 
				+    @Schema(description = "适用的文档类型模式")
			
 
				+    @TableField(value = "doc_type_pattern", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> docTypePattern;
			
 
				+    
			
 
				+    // ==================== 可见性 ====================
			
 
				+    
			
 
				+    @Schema(description = "是否公开")
			
 
				+    @TableField("is_public")
			
 
				+    private Boolean isPublic;
			
 
				+    
			
 
				+    // ==================== 统计 ====================
			
 
				+    
			
 
				+    @Schema(description = "使用次数")
			
 
				+    @TableField("use_count")
			
 
				+    private Integer useCount;
			
 
				+    
			
 
				+    // ==================== 元数据 ====================
			
 
				+    
			
 
				+    @Schema(description = "元数据")
			
 
				+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> metadata;
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/entity/SourceDocument.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/entity/SourceDocument.java
@@ -0,0 +1,65 @@
 
				+package com.lingyue.extract.entity;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.annotation.TableField;
			
 
				+import com.baomidou.mybatisplus.annotation.TableId;
			
 
				+import com.baomidou.mybatisplus.annotation.TableName;
			
 
				+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
			
 
				+import com.fasterxml.jackson.annotation.JsonFormat;
			
 
				+import io.swagger.v3.oas.annotations.media.Schema;
			
 
				+import lombok.Data;
			
 
				+
			
 
				+import java.util.Date;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * 来源文档实体
			
 
				+ * 
			
 
				+ * 项目中用到的文档，关联已解析的 Document
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Data
			
 
				+@TableName(value = "extract_source_documents", autoResultMap = true)
			
 
				+@Schema(description = "项目来源文档")
			
 
				+public class SourceDocument {
			
 
				+    
			
 
				+    @Schema(description = "ID")
			
 
				+    @TableId
			
 
				+    private String id;
			
 
				+    
			
 
				+    @Schema(description = "项目ID")
			
 
				+    @TableField("project_id")
			
 
				+    private String projectId;
			
 
				+    
			
 
				+    @Schema(description = "关联的 Document ID")
			
 
				+    @TableField("document_id")
			
 
				+    private String documentId;
			
 
				+    
			
 
				+    @Schema(description = "文档别名，如'可研批复'")
			
 
				+    @TableField("alias")
			
 
				+    private String alias;
			
 
				+    
			
 
				+    @Schema(description = "文档类型: pdf/docx/xlsx")
			
 
				+    @TableField("doc_type")
			
 
				+    private String docType;
			
 
				+    
			
 
				+    @Schema(description = "显示顺序")
			
 
				+    @TableField("display_order")
			
 
				+    private Integer displayOrder;
			
 
				+    
			
 
				+    @Schema(description = "元数据")
			
 
				+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
			
 
				+    private Map<String, Object> metadata;
			
 
				+    
			
 
				+    @Schema(description = "创建时间")
			
 
				+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
			
 
				+    @TableField("create_time")
			
 
				+    private Date createTime;
			
 
				+    
			
 
				+    // ==================== 文档类型常量 ====================
			
 
				+    
			
 
				+    public static final String DOC_TYPE_PDF = "pdf";
			
 
				+    public static final String DOC_TYPE_DOCX = "docx";
			
 
				+    public static final String DOC_TYPE_XLSX = "xlsx";
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/repository/ExtractResultRepository.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/repository/ExtractResultRepository.java
@@ -0,0 +1,74 @@
 
				+package com.lingyue.extract.repository;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
			
 
				+import com.lingyue.extract.entity.ExtractResult;
			
 
				+import org.apache.ibatis.annotations.Delete;
			
 
				+import org.apache.ibatis.annotations.Mapper;
			
 
				+import org.apache.ibatis.annotations.Param;
			
 
				+import org.apache.ibatis.annotations.Select;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 提取结果 Repository
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Mapper
			
 
				+public interface ExtractResultRepository extends BaseMapper<ExtractResult> {
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据规则ID查询结果列表
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_results WHERE rule_id = #{ruleId} ORDER BY create_time DESC")
			
 
				+    List<ExtractResult> findByRuleId(@Param("ruleId") String ruleId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据规则ID查询最新结果
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_results WHERE rule_id = #{ruleId} ORDER BY create_time DESC LIMIT 1")
			
 
				+    ExtractResult findLatestByRuleId(@Param("ruleId") String ruleId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据项目ID查询所有结果
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_results WHERE project_id = #{projectId} ORDER BY create_time DESC")
			
 
				+    List<ExtractResult> findByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据状态查询结果
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_results WHERE project_id = #{projectId} AND status = #{status} ORDER BY create_time DESC")
			
 
				+    List<ExtractResult> findByProjectIdAndStatus(@Param("projectId") String projectId, @Param("status") String status);
			
 
				+    
			
 
				+    /**
			
 
				+     * 删除规则的所有结果
			
 
				+     */
			
 
				+    @Delete("DELETE FROM extract_results WHERE rule_id = #{ruleId}")
			
 
				+    int deleteByRuleId(@Param("ruleId") String ruleId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 删除项目的所有结果
			
 
				+     */
			
 
				+    @Delete("DELETE FROM extract_results WHERE project_id = #{projectId}")
			
 
				+    int deleteByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计项目结果数量
			
 
				+     */
			
 
				+    @Select("SELECT COUNT(*) FROM extract_results WHERE project_id = #{projectId}")
			
 
				+    int countByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 按状态统计项目结果数量
			
 
				+     */
			
 
				+    @Select("SELECT status, COUNT(*) as count FROM extract_results WHERE project_id = #{projectId} GROUP BY status")
			
 
				+    List<java.util.Map<String, Object>> countByProjectIdGroupByStatus(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 查询待确认的结果
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_results WHERE project_id = #{projectId} AND status = 'extracted' ORDER BY create_time")
			
 
				+    List<ExtractResult> findPendingConfirmByProjectId(@Param("projectId") String projectId);
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/repository/ExtractRuleRepository.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/repository/ExtractRuleRepository.java
@@ -0,0 +1,88 @@
 
				+package com.lingyue.extract.repository;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
			
 
				+import com.lingyue.extract.entity.ExtractRule;
			
 
				+import org.apache.ibatis.annotations.Delete;
			
 
				+import org.apache.ibatis.annotations.Mapper;
			
 
				+import org.apache.ibatis.annotations.Param;
			
 
				+import org.apache.ibatis.annotations.Select;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 提取规则 Repository
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Mapper
			
 
				+public interface ExtractRuleRepository extends BaseMapper<ExtractRule> {
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据项目ID查询规则列表（按顺序）
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rules WHERE project_id = #{projectId} ORDER BY rule_index")
			
 
				+    List<ExtractRule> findByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据状态查询规则
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rules WHERE project_id = #{projectId} AND status = #{status} ORDER BY rule_index")
			
 
				+    List<ExtractRule> findByProjectIdAndStatus(@Param("projectId") String projectId, @Param("status") String status);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据目标字段Key查询
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rules WHERE project_id = #{projectId} AND target_field_key = #{targetFieldKey}")
			
 
				+    ExtractRule findByProjectIdAndTargetFieldKey(@Param("projectId") String projectId, 
			
 
				+                                                  @Param("targetFieldKey") String targetFieldKey);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据来源文档ID查询
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rules WHERE source_doc_id = #{sourceDocId} ORDER BY rule_index")
			
 
				+    List<ExtractRule> findBySourceDocId(@Param("sourceDocId") String sourceDocId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据来源类型查询
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rules WHERE project_id = #{projectId} AND source_type = #{sourceType} ORDER BY rule_index")
			
 
				+    List<ExtractRule> findByProjectIdAndSourceType(@Param("projectId") String projectId, 
			
 
				+                                                    @Param("sourceType") String sourceType);
			
 
				+    
			
 
				+    /**
			
 
				+     * 删除项目的所有规则
			
 
				+     */
			
 
				+    @Delete("DELETE FROM extract_rules WHERE project_id = #{projectId}")
			
 
				+    int deleteByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计项目规则数量
			
 
				+     */
			
 
				+    @Select("SELECT COUNT(*) FROM extract_rules WHERE project_id = #{projectId}")
			
 
				+    int countByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 按状态统计项目规则数量
			
 
				+     */
			
 
				+    @Select("SELECT status, COUNT(*) as count FROM extract_rules WHERE project_id = #{projectId} GROUP BY status")
			
 
				+    List<java.util.Map<String, Object>> countByProjectIdGroupByStatus(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 获取项目最大规则顺序
			
 
				+     */
			
 
				+    @Select("SELECT COALESCE(MAX(rule_index), 0) FROM extract_rules WHERE project_id = #{projectId}")
			
 
				+    int getMaxRuleIndex(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 查询引用了指定字段的规则（用于依赖分析）
			
 
				+     */
			
 
				+    @Select("""
			
 
				+            SELECT * FROM extract_rules 
			
 
				+            WHERE project_id = #{projectId} 
			
 
				+            AND source_type = 'self_reference'
			
 
				+            AND source_config::text LIKE '%' || #{targetFieldKey} || '%'
			
 
				+            """)
			
 
				+    List<ExtractRule> findRulesReferencingField(@Param("projectId") String projectId, 
			
 
				+                                                 @Param("targetFieldKey") String targetFieldKey);
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/repository/ProjectRepository.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/repository/ProjectRepository.java
@@ -0,0 +1,49 @@
 
				+package com.lingyue.extract.repository;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
			
 
				+import com.lingyue.extract.entity.Project;
			
 
				+import org.apache.ibatis.annotations.Mapper;
			
 
				+import org.apache.ibatis.annotations.Param;
			
 
				+import org.apache.ibatis.annotations.Select;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 项目 Repository
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Mapper
			
 
				+public interface ProjectRepository extends BaseMapper<Project> {
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据用户ID查询项目列表
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_projects WHERE user_id = #{userId} ORDER BY create_time DESC")
			
 
				+    List<Project> findByUserId(@Param("userId") String userId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据状态查询项目
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_projects WHERE status = #{status} ORDER BY create_time DESC")
			
 
				+    List<Project> findByStatus(@Param("status") String status);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据用户ID和状态查询项目
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_projects WHERE user_id = #{userId} AND status = #{status} ORDER BY create_time DESC")
			
 
				+    List<Project> findByUserIdAndStatus(@Param("userId") String userId, @Param("status") String status);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计用户项目数量
			
 
				+     */
			
 
				+    @Select("SELECT COUNT(*) FROM extract_projects WHERE user_id = #{userId}")
			
 
				+    int countByUserId(@Param("userId") String userId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计用户各状态项目数量
			
 
				+     */
			
 
				+    @Select("SELECT status, COUNT(*) as count FROM extract_projects WHERE user_id = #{userId} GROUP BY status")
			
 
				+    List<java.util.Map<String, Object>> countByUserIdGroupByStatus(@Param("userId") String userId);
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleTemplateRepository.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleTemplateRepository.java
@@ -0,0 +1,65 @@
 
				+package com.lingyue.extract.repository;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
			
 
				+import com.lingyue.extract.entity.RuleTemplate;
			
 
				+import org.apache.ibatis.annotations.Mapper;
			
 
				+import org.apache.ibatis.annotations.Param;
			
 
				+import org.apache.ibatis.annotations.Select;
			
 
				+import org.apache.ibatis.annotations.Update;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 规则模板 Repository
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Mapper
			
 
				+public interface RuleTemplateRepository extends BaseMapper<RuleTemplate> {
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据用户ID查询模板列表
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rule_templates WHERE user_id = #{userId} ORDER BY create_time DESC")
			
 
				+    List<RuleTemplate> findByUserId(@Param("userId") String userId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 查询公开模板
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_rule_templates WHERE is_public = true ORDER BY use_count DESC, create_time DESC")
			
 
				+    List<RuleTemplate> findPublicTemplates();
			
 
				+    
			
 
				+    /**
			
 
				+     * 查询用户可见的模板（自己的 + 公开的）
			
 
				+     */
			
 
				+    @Select("""
			
 
				+            SELECT * FROM extract_rule_templates 
			
 
				+            WHERE user_id = #{userId} OR is_public = true 
			
 
				+            ORDER BY use_count DESC, create_time DESC
			
 
				+            """)
			
 
				+    List<RuleTemplate> findAccessibleByUserId(@Param("userId") String userId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据名称模糊查询
			
 
				+     */
			
 
				+    @Select("""
			
 
				+            SELECT * FROM extract_rule_templates 
			
 
				+            WHERE (user_id = #{userId} OR is_public = true) 
			
 
				+            AND name LIKE '%' || #{keyword} || '%'
			
 
				+            ORDER BY use_count DESC, create_time DESC
			
 
				+            """)
			
 
				+    List<RuleTemplate> searchByName(@Param("userId") String userId, @Param("keyword") String keyword);
			
 
				+    
			
 
				+    /**
			
 
				+     * 增加使用次数
			
 
				+     */
			
 
				+    @Update("UPDATE extract_rule_templates SET use_count = use_count + 1 WHERE id = #{id}")
			
 
				+    int incrementUseCount(@Param("id") String id);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计用户模板数量
			
 
				+     */
			
 
				+    @Select("SELECT COUNT(*) FROM extract_rule_templates WHERE user_id = #{userId}")
			
 
				+    int countByUserId(@Param("userId") String userId);
			
 
				+}
			
--- a/backend/extract-service/src/main/java/com/lingyue/extract/repository/SourceDocumentRepository.java
+++ b/backend/extract-service/src/main/java/com/lingyue/extract/repository/SourceDocumentRepository.java
@@ -0,0 +1,56 @@
 
				+package com.lingyue.extract.repository;
			
 
				+
			
 
				+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
			
 
				+import com.lingyue.extract.entity.SourceDocument;
			
 
				+import org.apache.ibatis.annotations.Delete;
			
 
				+import org.apache.ibatis.annotations.Mapper;
			
 
				+import org.apache.ibatis.annotations.Param;
			
 
				+import org.apache.ibatis.annotations.Select;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * 来源文档 Repository
			
 
				+ * 
			
 
				+ * @author lingyue
			
 
				+ * @since 2026-01-22
			
 
				+ */
			
 
				+@Mapper
			
 
				+public interface SourceDocumentRepository extends BaseMapper<SourceDocument> {
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据项目ID查询来源文档列表
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_source_documents WHERE project_id = #{projectId} ORDER BY display_order")
			
 
				+    List<SourceDocument> findByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据关联的 Document ID 查询
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_source_documents WHERE document_id = #{documentId}")
			
 
				+    List<SourceDocument> findByDocumentId(@Param("documentId") String documentId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 根据项目ID和别名查询
			
 
				+     */
			
 
				+    @Select("SELECT * FROM extract_source_documents WHERE project_id = #{projectId} AND alias = #{alias}")
			
 
				+    SourceDocument findByProjectIdAndAlias(@Param("projectId") String projectId, @Param("alias") String alias);
			
 
				+    
			
 
				+    /**
			
 
				+     * 删除项目的所有来源文档
			
 
				+     */
			
 
				+    @Delete("DELETE FROM extract_source_documents WHERE project_id = #{projectId}")
			
 
				+    int deleteByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 统计项目来源文档数量
			
 
				+     */
			
 
				+    @Select("SELECT COUNT(*) FROM extract_source_documents WHERE project_id = #{projectId}")
			
 
				+    int countByProjectId(@Param("projectId") String projectId);
			
 
				+    
			
 
				+    /**
			
 
				+     * 获取项目最大显示顺序
			
 
				+     */
			
 
				+    @Select("SELECT COALESCE(MAX(display_order), 0) FROM extract_source_documents WHERE project_id = #{projectId}")
			
 
				+    int getMaxDisplayOrder(@Param("projectId") String projectId);
			
 
				+}