Przeglądaj źródła

feat(extract-service): 数据模型重构 v2.0

根据新设计理念重构数据模型:
- 从「规则配置驱动」改为「示例文档驱动」

数据库迁移:
- V2026_01_23_01__refactor_extract_to_template.sql
- 创建 templates 表(替代 extract_projects)
- 创建 source_files 表(来源文件定义)
- 创建 variables 表(替代 extract_rules)
- 创建 generations 表(生成任务)

新实体类(4个):
- Template: 报告模板,包含示例报告和配置
- SourceFile: 来源文件定义,用别名标识
- Variable: 模板变量,绑定到文档位置
- Generation: 生成任务,记录一次报告生成

新 DTO 类(2个):
- VariableLocation: 变量在文档中的位置
- ReferenceSourceConfig: 引用来源配置

新 Repository(4个):
- TemplateRepository
- SourceFileRepository
- VariableRepository
- GenerationRepository

注:旧实体类(Project/SourceDocument/ExtractRule等)暂保留
何文松 1 miesiąc temu
rodzic
commit
62221a1694

+ 35 - 0
backend/extract-service/src/main/java/com/lingyue/extract/dto/config/ReferenceSourceConfig.java

@@ -0,0 +1,35 @@
+package com.lingyue.extract.dto.config;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * 引用来源配置
+ * 
+ * 用于 sourceType = "reference" 的变量
+ * 引用其他已提取的变量值进行组合
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Data
+@Schema(description = "引用来源配置")
+public class ReferenceSourceConfig {
+    
+    @Schema(description = "引用的变量名列表")
+    private List<String> referenceVariables;
+    
+    @Schema(description = "组合模板,如 '《{project_name}可行性研究报告》由{design_unit}编制'")
+    private String combineTemplate;
+    
+    @Schema(description = "值转换配置")
+    private TransformConfig transform;
+    
+    @Schema(description = "当引用变量为空时的默认值")
+    private String defaultValue;
+    
+    @Schema(description = "是否要求所有引用变量都有值")
+    private Boolean requireAll;
+}

+ 45 - 0
backend/extract-service/src/main/java/com/lingyue/extract/dto/config/VariableLocation.java

@@ -0,0 +1,45 @@
+package com.lingyue.extract.dto.config;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+/**
+ * 变量在文档中的位置
+ * 
+ * 描述变量在示例报告中的精确位置
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Data
+@Schema(description = "变量位置配置")
+public class VariableLocation {
+    
+    @Schema(description = "DocumentElement ID")
+    private String elementId;
+    
+    @Schema(description = "位置类型: text-文本, table_cell-表格单元格, paragraph-段落")
+    private String type;
+    
+    // ==================== 文本定位 ====================
+    
+    @Schema(description = "文本起始偏移量")
+    private Integer startOffset;
+    
+    @Schema(description = "文本结束偏移量")
+    private Integer endOffset;
+    
+    // ==================== 表格定位 ====================
+    
+    @Schema(description = "表格行索引(0-based)")
+    private Integer rowIndex;
+    
+    @Schema(description = "表格列索引(0-based)")
+    private Integer colIndex;
+    
+    // ==================== 位置类型常量 ====================
+    
+    public static final String TYPE_TEXT = "text";
+    public static final String TYPE_TABLE_CELL = "table_cell";
+    public static final String TYPE_PARAGRAPH = "paragraph";
+}

+ 127 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/Generation.java

@@ -0,0 +1,127 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+import java.util.Map;
+
+/**
+ * 报告生成任务实体
+ * 
+ * 使用模板生成新报告的一次任务
+ * 记录来源文件映射、变量提取结果、生成的文档
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Data
+@TableName(value = "generations", autoResultMap = true)
+@Schema(description = "报告生成任务")
+public class Generation {
+    
+    @Schema(description = "ID")
+    @TableId
+    private String id;
+    
+    @Schema(description = "模板ID")
+    @TableField("template_id")
+    private String templateId;
+    
+    @Schema(description = "用户ID")
+    @TableField("user_id")
+    private String userId;
+    
+    @Schema(description = "任务名称")
+    @TableField("name")
+    private String name;
+    
+    // ==================== 来源文件映射 ====================
+    
+    @Schema(description = "来源文件映射:别名 → 文档ID,如 {\"可研批复\": \"doc_123\"}")
+    @TableField(value = "source_file_map", typeHandler = JacksonTypeHandler.class)
+    private Map<String, String> sourceFileMap;
+    
+    // ==================== 变量提取结果 ====================
+    
+    @Schema(description = "变量值,包含值、置信度、状态等")
+    @TableField(value = "variable_values", typeHandler = JacksonTypeHandler.class)
+    private Map<String, VariableValue> variableValues;
+    
+    // ==================== 生成的文档 ====================
+    
+    @Schema(description = "输出文档ID")
+    @TableField("output_document_id")
+    private String outputDocumentId;
+    
+    @Schema(description = "输出文件路径")
+    @TableField("output_file_path")
+    private String outputFilePath;
+    
+    // ==================== 状态 ====================
+    
+    @Schema(description = "状态: pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误")
+    @TableField("status")
+    private String status;
+    
+    @Schema(description = "错误信息")
+    @TableField("error_message")
+    private String errorMessage;
+    
+    @Schema(description = "进度百分比 0-100")
+    @TableField("progress")
+    private Integer progress;
+    
+    // ==================== 时间 ====================
+    
+    @Schema(description = "创建时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("create_time")
+    private Date createTime;
+    
+    @Schema(description = "完成时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("completed_at")
+    private Date completedAt;
+    
+    // ==================== 状态常量 ====================
+    
+    public static final String STATUS_PENDING = "pending";
+    public static final String STATUS_EXTRACTING = "extracting";
+    public static final String STATUS_REVIEW = "review";
+    public static final String STATUS_COMPLETED = "completed";
+    public static final String STATUS_ERROR = "error";
+    
+    /**
+     * 变量值内部类
+     */
+    @Data
+    public static class VariableValue {
+        
+        @Schema(description = "提取的值")
+        private String value;
+        
+        @Schema(description = "AI置信度 0-1")
+        private Double confidence;
+        
+        @Schema(description = "来源内容预览")
+        private String sourcePreview;
+        
+        @Schema(description = "状态: pending-待提取, extracted-已提取, modified-已修改, error-错误")
+        private String status;
+        
+        @Schema(description = "错误信息")
+        private String errorMessage;
+        
+        // 状态常量
+        public static final String STATUS_PENDING = "pending";
+        public static final String STATUS_EXTRACTED = "extracted";
+        public static final String STATUS_MODIFIED = "modified";
+        public static final String STATUS_ERROR = "error";
+    }
+}

+ 64 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/SourceFile.java

@@ -0,0 +1,64 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+import java.util.List;
+
+/**
+ * 来源文件定义实体
+ * 
+ * 模板需要的来源文件类型,用别名标识
+ * 用户创建模板时自定义需要哪些来源文件
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Data
+@TableName(value = "source_files", autoResultMap = true)
+@Schema(description = "来源文件定义")
+public class SourceFile {
+    
+    @Schema(description = "ID")
+    @TableId
+    private String id;
+    
+    @Schema(description = "模板ID")
+    @TableField("template_id")
+    private String templateId;
+    
+    @Schema(description = "文件别名,如'可研批复'")
+    @TableField("alias")
+    private String alias;
+    
+    @Schema(description = "文件说明")
+    @TableField("description")
+    private String description;
+    
+    @Schema(description = "允许的文件类型")
+    @TableField(value = "file_types", typeHandler = JacksonTypeHandler.class)
+    private List<String> fileTypes;
+    
+    @Schema(description = "是否必须")
+    @TableField("required")
+    private Boolean required;
+    
+    @Schema(description = "创建模板时使用的示例文件")
+    @TableField("example_document_id")
+    private String exampleDocumentId;
+    
+    @Schema(description = "显示顺序")
+    @TableField("display_order")
+    private Integer displayOrder;
+    
+    @Schema(description = "创建时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("create_time")
+    private Date createTime;
+}

+ 65 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/Template.java

@@ -0,0 +1,65 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.lingyue.common.domain.entity.SimpleModel;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+import java.util.Map;
+
+/**
+ * 报告模板实体
+ * 
+ * 基于真实报告创建的模板,包含变量定义和来源文件配置
+ * 用户上传示例报告,标记变量,保存为模板后可用于生成新报告
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+@TableName(value = "templates", autoResultMap = true)
+@Schema(description = "报告模板")
+public class Template extends SimpleModel {
+    
+    @Schema(description = "用户ID")
+    @TableField("user_id")
+    private String userId;
+    
+    @Schema(description = "模板名称")
+    @TableField("name")
+    private String name;
+    
+    @Schema(description = "模板描述")
+    @TableField("description")
+    private String description;
+    
+    @Schema(description = "示例报告文档ID,关联 documents 表")
+    @TableField("base_document_id")
+    private String baseDocumentId;
+    
+    @Schema(description = "状态: draft-草稿, published-已发布, archived-已归档")
+    @TableField("status")
+    private String status;
+    
+    @Schema(description = "模板配置")
+    @TableField(value = "config", typeHandler = JacksonTypeHandler.class)
+    private Map<String, Object> config;
+    
+    @Schema(description = "是否公开")
+    @TableField("is_public")
+    private Boolean isPublic;
+    
+    @Schema(description = "使用次数")
+    @TableField("use_count")
+    private Integer useCount;
+    
+    // ==================== 状态常量 ====================
+    
+    public static final String STATUS_DRAFT = "draft";
+    public static final String STATUS_PUBLISHED = "published";
+    public static final String STATUS_ARCHIVED = "archived";
+}

+ 126 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/Variable.java

@@ -0,0 +1,126 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import com.lingyue.extract.dto.config.VariableLocation;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+import java.util.Map;
+
+/**
+ * 模板变量实体
+ * 
+ * 报告中需要动态替换的内容,绑定到文档中的具体位置
+ * 用户在示例报告中选中文本,标记为变量
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Data
+@TableName(value = "variables", autoResultMap = true)
+@Schema(description = "模板变量")
+public class Variable {
+    
+    @Schema(description = "ID")
+    @TableId
+    private String id;
+    
+    @Schema(description = "模板ID")
+    @TableField("template_id")
+    private String templateId;
+    
+    // ==================== 变量标识 ====================
+    
+    @Schema(description = "变量名(程序用),模板内唯一")
+    @TableField("name")
+    private String name;
+    
+    @Schema(description = "显示名称")
+    @TableField("display_name")
+    private String displayName;
+    
+    @Schema(description = "变量分组")
+    @TableField("variable_group")
+    private String variableGroup;
+    
+    // ==================== 在示例报告中的位置 ====================
+    
+    @Schema(description = "变量在文档中的位置")
+    @TableField(value = "location", typeHandler = JacksonTypeHandler.class)
+    private VariableLocation location;
+    
+    // ==================== 示例值 ====================
+    
+    @Schema(description = "示例值(原文档中的值)")
+    @TableField("example_value")
+    private String exampleValue;
+    
+    @Schema(description = "值类型: text-文本, date-日期, number-数字, table-表格")
+    @TableField("value_type")
+    private String valueType;
+    
+    // ==================== 数据来源 ====================
+    
+    @Schema(description = "来源文件别名,对应 SourceFile.alias")
+    @TableField("source_file_alias")
+    private String sourceFileAlias;
+    
+    @Schema(description = "来源类型: document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值")
+    @TableField("source_type")
+    private String sourceType;
+    
+    @Schema(description = "来源配置")
+    @TableField(value = "source_config", typeHandler = JacksonTypeHandler.class)
+    private Map<String, Object> sourceConfig;
+    
+    // ==================== 提取方式 ====================
+    
+    @Schema(description = "提取类型: direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结")
+    @TableField("extract_type")
+    private String extractType;
+    
+    @Schema(description = "提取配置")
+    @TableField(value = "extract_config", typeHandler = JacksonTypeHandler.class)
+    private Map<String, Object> extractConfig;
+    
+    // ==================== 其他 ====================
+    
+    @Schema(description = "显示顺序")
+    @TableField("display_order")
+    private Integer displayOrder;
+    
+    @Schema(description = "创建时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("create_time")
+    private Date createTime;
+    
+    @Schema(description = "更新时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("update_time")
+    private Date updateTime;
+    
+    // ==================== 来源类型常量 ====================
+    
+    public static final String SOURCE_TYPE_DOCUMENT = "document";
+    public static final String SOURCE_TYPE_MANUAL = "manual";
+    public static final String SOURCE_TYPE_REFERENCE = "reference";
+    public static final String SOURCE_TYPE_FIXED = "fixed";
+    
+    // ==================== 提取类型常量 ====================
+    
+    public static final String EXTRACT_TYPE_DIRECT = "direct";
+    public static final String EXTRACT_TYPE_AI_EXTRACT = "ai_extract";
+    public static final String EXTRACT_TYPE_AI_SUMMARIZE = "ai_summarize";
+    
+    // ==================== 值类型常量 ====================
+    
+    public static final String VALUE_TYPE_TEXT = "text";
+    public static final String VALUE_TYPE_DATE = "date";
+    public static final String VALUE_TYPE_NUMBER = "number";
+    public static final String VALUE_TYPE_TABLE = "table";
+}

+ 74 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/GenerationRepository.java

@@ -0,0 +1,74 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.Generation;
+import org.apache.ibatis.annotations.Mapper;
+import org.apache.ibatis.annotations.Param;
+import org.apache.ibatis.annotations.Select;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 生成任务 Repository
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Mapper
+public interface GenerationRepository extends BaseMapper<Generation> {
+    
+    /**
+     * 根据模板ID查询生成任务列表
+     */
+    @Select("SELECT * FROM generations WHERE template_id = #{templateId} ORDER BY create_time DESC")
+    List<Generation> findByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 根据用户ID查询生成任务列表
+     */
+    @Select("SELECT * FROM generations WHERE user_id = #{userId} ORDER BY create_time DESC")
+    List<Generation> findByUserId(@Param("userId") String userId);
+    
+    /**
+     * 根据状态查询生成任务
+     */
+    @Select("SELECT * FROM generations WHERE status = #{status} ORDER BY create_time DESC")
+    List<Generation> findByStatus(@Param("status") String status);
+    
+    /**
+     * 根据用户ID和状态查询
+     */
+    @Select("SELECT * FROM generations WHERE user_id = #{userId} AND status = #{status} ORDER BY create_time DESC")
+    List<Generation> findByUserIdAndStatus(@Param("userId") String userId, @Param("status") String status);
+    
+    /**
+     * 统计模板生成任务数量
+     */
+    @Select("SELECT COUNT(*) FROM generations WHERE template_id = #{templateId}")
+    int countByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 统计用户生成任务数量
+     */
+    @Select("SELECT COUNT(*) FROM generations WHERE user_id = #{userId}")
+    int countByUserId(@Param("userId") String userId);
+    
+    /**
+     * 按状态统计用户生成任务数量
+     */
+    @Select("SELECT status, COUNT(*) as count FROM generations WHERE user_id = #{userId} GROUP BY status")
+    List<Map<String, Object>> countByUserIdGroupByStatus(@Param("userId") String userId);
+    
+    /**
+     * 查询待处理的生成任务
+     */
+    @Select("SELECT * FROM generations WHERE status IN ('pending', 'extracting') ORDER BY create_time")
+    List<Generation> findPendingGenerations();
+    
+    /**
+     * 查询用户最近的生成任务
+     */
+    @Select("SELECT * FROM generations WHERE user_id = #{userId} ORDER BY create_time DESC LIMIT #{limit}")
+    List<Generation> findRecentByUserId(@Param("userId") String userId, @Param("limit") int limit);
+}

+ 56 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/SourceFileRepository.java

@@ -0,0 +1,56 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.SourceFile;
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Mapper;
+import org.apache.ibatis.annotations.Param;
+import org.apache.ibatis.annotations.Select;
+
+import java.util.List;
+
+/**
+ * 来源文件定义 Repository
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Mapper
+public interface SourceFileRepository extends BaseMapper<SourceFile> {
+    
+    /**
+     * 根据模板ID查询来源文件定义列表
+     */
+    @Select("SELECT * FROM source_files WHERE template_id = #{templateId} ORDER BY display_order")
+    List<SourceFile> findByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 根据模板ID和别名查询
+     */
+    @Select("SELECT * FROM source_files WHERE template_id = #{templateId} AND alias = #{alias}")
+    SourceFile findByTemplateIdAndAlias(@Param("templateId") String templateId, @Param("alias") String alias);
+    
+    /**
+     * 删除模板的所有来源文件定义
+     */
+    @Delete("DELETE FROM source_files WHERE template_id = #{templateId}")
+    int deleteByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 统计模板来源文件定义数量
+     */
+    @Select("SELECT COUNT(*) FROM source_files WHERE template_id = #{templateId}")
+    int countByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 获取模板最大显示顺序
+     */
+    @Select("SELECT COALESCE(MAX(display_order), 0) FROM source_files WHERE template_id = #{templateId}")
+    int getMaxDisplayOrder(@Param("templateId") String templateId);
+    
+    /**
+     * 查询必须的来源文件定义
+     */
+    @Select("SELECT * FROM source_files WHERE template_id = #{templateId} AND required = true ORDER BY display_order")
+    List<SourceFile> findRequiredByTemplateId(@Param("templateId") String templateId);
+}

+ 84 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/TemplateRepository.java

@@ -0,0 +1,84 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.Template;
+import org.apache.ibatis.annotations.Mapper;
+import org.apache.ibatis.annotations.Param;
+import org.apache.ibatis.annotations.Select;
+import org.apache.ibatis.annotations.Update;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 模板 Repository
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Mapper
+public interface TemplateRepository extends BaseMapper<Template> {
+    
+    /**
+     * 根据用户ID查询模板列表
+     */
+    @Select("SELECT * FROM templates WHERE user_id = #{userId} ORDER BY create_time DESC")
+    List<Template> findByUserId(@Param("userId") String userId);
+    
+    /**
+     * 根据状态查询模板
+     */
+    @Select("SELECT * FROM templates WHERE status = #{status} ORDER BY create_time DESC")
+    List<Template> findByStatus(@Param("status") String status);
+    
+    /**
+     * 根据用户ID和状态查询模板
+     */
+    @Select("SELECT * FROM templates WHERE user_id = #{userId} AND status = #{status} ORDER BY create_time DESC")
+    List<Template> findByUserIdAndStatus(@Param("userId") String userId, @Param("status") String status);
+    
+    /**
+     * 查询公开模板
+     */
+    @Select("SELECT * FROM templates WHERE is_public = true AND status = 'published' ORDER BY use_count DESC, create_time DESC")
+    List<Template> findPublicTemplates();
+    
+    /**
+     * 查询用户可见的模板(自己的 + 公开的)
+     */
+    @Select("""
+            SELECT * FROM templates 
+            WHERE user_id = #{userId} OR (is_public = true AND status = 'published')
+            ORDER BY use_count DESC, create_time DESC
+            """)
+    List<Template> findAccessibleByUserId(@Param("userId") String userId);
+    
+    /**
+     * 根据名称模糊查询
+     */
+    @Select("""
+            SELECT * FROM templates 
+            WHERE (user_id = #{userId} OR (is_public = true AND status = 'published'))
+            AND name LIKE '%' || #{keyword} || '%'
+            ORDER BY use_count DESC, create_time DESC
+            """)
+    List<Template> searchByName(@Param("userId") String userId, @Param("keyword") String keyword);
+    
+    /**
+     * 增加使用次数
+     */
+    @Update("UPDATE templates SET use_count = use_count + 1 WHERE id = #{id}")
+    int incrementUseCount(@Param("id") String id);
+    
+    /**
+     * 统计用户模板数量
+     */
+    @Select("SELECT COUNT(*) FROM templates WHERE user_id = #{userId}")
+    int countByUserId(@Param("userId") String userId);
+    
+    /**
+     * 按状态统计用户模板数量
+     */
+    @Select("SELECT status, COUNT(*) as count FROM templates WHERE user_id = #{userId} GROUP BY status")
+    List<Map<String, Object>> countByUserIdGroupByStatus(@Param("userId") String userId);
+}

+ 87 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/VariableRepository.java

@@ -0,0 +1,87 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.Variable;
+import org.apache.ibatis.annotations.Delete;
+import org.apache.ibatis.annotations.Mapper;
+import org.apache.ibatis.annotations.Param;
+import org.apache.ibatis.annotations.Select;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 变量 Repository
+ * 
+ * @author lingyue
+ * @since 2026-01-23
+ */
+@Mapper
+public interface VariableRepository extends BaseMapper<Variable> {
+    
+    /**
+     * 根据模板ID查询变量列表(按顺序)
+     */
+    @Select("SELECT * FROM variables WHERE template_id = #{templateId} ORDER BY display_order")
+    List<Variable> findByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 根据模板ID和变量名查询
+     */
+    @Select("SELECT * FROM variables WHERE template_id = #{templateId} AND name = #{name}")
+    Variable findByTemplateIdAndName(@Param("templateId") String templateId, @Param("name") String name);
+    
+    /**
+     * 根据来源文件别名查询
+     */
+    @Select("SELECT * FROM variables WHERE template_id = #{templateId} AND source_file_alias = #{alias} ORDER BY display_order")
+    List<Variable> findByTemplateIdAndSourceFileAlias(@Param("templateId") String templateId, @Param("alias") String alias);
+    
+    /**
+     * 根据来源类型查询
+     */
+    @Select("SELECT * FROM variables WHERE template_id = #{templateId} AND source_type = #{sourceType} ORDER BY display_order")
+    List<Variable> findByTemplateIdAndSourceType(@Param("templateId") String templateId, @Param("sourceType") String sourceType);
+    
+    /**
+     * 根据变量分组查询
+     */
+    @Select("SELECT * FROM variables WHERE template_id = #{templateId} AND variable_group = #{group} ORDER BY display_order")
+    List<Variable> findByTemplateIdAndGroup(@Param("templateId") String templateId, @Param("group") String group);
+    
+    /**
+     * 删除模板的所有变量
+     */
+    @Delete("DELETE FROM variables WHERE template_id = #{templateId}")
+    int deleteByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 统计模板变量数量
+     */
+    @Select("SELECT COUNT(*) FROM variables WHERE template_id = #{templateId}")
+    int countByTemplateId(@Param("templateId") String templateId);
+    
+    /**
+     * 按来源类型统计模板变量数量
+     */
+    @Select("SELECT source_type, COUNT(*) as count FROM variables WHERE template_id = #{templateId} GROUP BY source_type")
+    List<Map<String, Object>> countByTemplateIdGroupBySourceType(@Param("templateId") String templateId);
+    
+    /**
+     * 获取模板最大显示顺序
+     */
+    @Select("SELECT COALESCE(MAX(display_order), 0) FROM variables WHERE template_id = #{templateId}")
+    int getMaxDisplayOrder(@Param("templateId") String templateId);
+    
+    /**
+     * 查询引用了指定变量的变量(用于依赖分析)
+     */
+    @Select("""
+            SELECT * FROM variables 
+            WHERE template_id = #{templateId} 
+            AND source_type = 'reference'
+            AND source_config::text LIKE '%' || #{variableName} || '%'
+            """)
+    List<Variable> findVariablesReferencingVariable(@Param("templateId") String templateId, 
+                                                     @Param("variableName") String variableName);
+}

+ 159 - 0
database/migrations/V2026_01_23_01__refactor_extract_to_template.sql

@@ -0,0 +1,159 @@
+-- =====================================================
+-- 数据提取规则系统重构 v2.0
+-- 从「规则配置驱动」改为「示例文档驱动」
+-- =====================================================
+
+-- 1. 创建 templates 表(替代 extract_projects)
+CREATE TABLE IF NOT EXISTS templates (
+    id VARCHAR(36) PRIMARY KEY,
+    user_id VARCHAR(36) NOT NULL,
+    name VARCHAR(255) NOT NULL,
+    description TEXT,
+    base_document_id VARCHAR(36) NOT NULL COMMENT '示例报告文档ID',
+    status VARCHAR(32) DEFAULT 'draft' COMMENT '状态: draft/published/archived',
+    config JSONB COMMENT '模板配置',
+    is_public BOOLEAN DEFAULT FALSE COMMENT '是否公开',
+    use_count INT DEFAULT 0 COMMENT '使用次数',
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    create_by VARCHAR(36),
+    create_by_name VARCHAR(100),
+    update_by VARCHAR(36),
+    update_by_name VARCHAR(100)
+);
+
+CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
+CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
+CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
+
+COMMENT ON TABLE templates IS '报告模板';
+COMMENT ON COLUMN templates.base_document_id IS '示例报告文档ID,关联 documents 表';
+COMMENT ON COLUMN templates.status IS 'draft-草稿, published-已发布, archived-已归档';
+COMMENT ON COLUMN templates.config IS '模板配置,如默认AI模型等';
+
+-- 2. 创建 source_files 表(来源文件定义,替代 extract_source_documents)
+CREATE TABLE IF NOT EXISTS source_files (
+    id VARCHAR(36) PRIMARY KEY,
+    template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
+    alias VARCHAR(100) NOT NULL COMMENT '文件别名,如"可研批复"',
+    description TEXT COMMENT '文件说明',
+    file_types JSONB DEFAULT '["pdf", "docx"]' COMMENT '允许的文件类型',
+    required BOOLEAN DEFAULT TRUE COMMENT '是否必须',
+    example_document_id VARCHAR(36) COMMENT '创建模板时使用的示例文件',
+    display_order INT DEFAULT 0 COMMENT '显示顺序',
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    
+    CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
+);
+
+CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
+
+COMMENT ON TABLE source_files IS '来源文件定义';
+COMMENT ON COLUMN source_files.alias IS '用户自定义的别名,用于引用';
+COMMENT ON COLUMN source_files.file_types IS '允许上传的文件类型列表';
+COMMENT ON COLUMN source_files.example_document_id IS '创建模板时使用的示例文件,用于预览';
+
+-- 3. 创建 variables 表(替代 extract_rules)
+CREATE TABLE IF NOT EXISTS variables (
+    id VARCHAR(36) PRIMARY KEY,
+    template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
+    
+    -- 变量标识
+    name VARCHAR(100) NOT NULL COMMENT '变量名(程序用)',
+    display_name VARCHAR(200) NOT NULL COMMENT '显示名称',
+    variable_group VARCHAR(100) COMMENT '变量分组',
+    
+    -- 在示例报告中的位置
+    location JSONB NOT NULL COMMENT '文档中的位置',
+    
+    -- 示例值
+    example_value TEXT COMMENT '示例值(原文档中的值)',
+    value_type VARCHAR(32) DEFAULT 'text' COMMENT '值类型: text/date/number/table',
+    
+    -- 数据来源
+    source_file_alias VARCHAR(100) COMMENT '来源文件别名',
+    source_type VARCHAR(32) NOT NULL COMMENT '来源类型: document/manual/reference/fixed',
+    source_config JSONB COMMENT '来源配置',
+    
+    -- 提取方式
+    extract_type VARCHAR(32) COMMENT '提取类型: direct/ai_extract/ai_summarize',
+    extract_config JSONB COMMENT '提取配置',
+    
+    display_order INT DEFAULT 0 COMMENT '显示顺序',
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    
+    CONSTRAINT uk_variables_name UNIQUE (template_id, name)
+);
+
+CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
+CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
+
+COMMENT ON TABLE variables IS '模板变量';
+COMMENT ON COLUMN variables.name IS '变量名,模板内唯一,用于程序引用';
+COMMENT ON COLUMN variables.location IS '变量在文档中的位置,包含 element_id、偏移量等';
+COMMENT ON COLUMN variables.source_type IS 'document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值';
+COMMENT ON COLUMN variables.extract_type IS 'direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结';
+
+-- 4. 创建 generations 表(生成任务)
+CREATE TABLE IF NOT EXISTS generations (
+    id VARCHAR(36) PRIMARY KEY,
+    template_id VARCHAR(36) NOT NULL REFERENCES templates(id),
+    user_id VARCHAR(36) NOT NULL,
+    
+    name VARCHAR(255) COMMENT '任务名称',
+    
+    -- 来源文件映射:别名 → 文档ID
+    source_file_map JSONB NOT NULL COMMENT '来源文件映射',
+    
+    -- 变量提取结果
+    variable_values JSONB COMMENT '变量值',
+    
+    -- 生成的文档
+    output_document_id VARCHAR(36) COMMENT '输出文档ID',
+    output_file_path VARCHAR(500) COMMENT '输出文件路径',
+    
+    status VARCHAR(32) DEFAULT 'pending' COMMENT '状态',
+    error_message TEXT COMMENT '错误信息',
+    progress INT DEFAULT 0 COMMENT '进度百分比',
+    
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    completed_at TIMESTAMP COMMENT '完成时间'
+);
+
+CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
+CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
+CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
+
+COMMENT ON TABLE generations IS '报告生成任务';
+COMMENT ON COLUMN generations.source_file_map IS '来源文件映射,如 {"可研批复": "doc_123"}';
+COMMENT ON COLUMN generations.variable_values IS '变量提取结果,包含值、置信度、状态等';
+COMMENT ON COLUMN generations.status IS 'pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误';
+
+-- 5. 添加 update_time 触发器
+CREATE OR REPLACE FUNCTION update_timestamp()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.update_time = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS trigger_templates_update_time ON templates;
+CREATE TRIGGER trigger_templates_update_time
+    BEFORE UPDATE ON templates
+    FOR EACH ROW
+    EXECUTE FUNCTION update_timestamp();
+
+DROP TRIGGER IF EXISTS trigger_variables_update_time ON variables;
+CREATE TRIGGER trigger_variables_update_time
+    BEFORE UPDATE ON variables
+    FOR EACH ROW
+    EXECUTE FUNCTION update_timestamp();
+
+-- 6. 注释:旧表可以保留用于数据迁移,或在确认无需后删除
+-- DROP TABLE IF EXISTS extract_rule_templates CASCADE;
+-- DROP TABLE IF EXISTS extract_results CASCADE;
+-- DROP TABLE IF EXISTS extract_rules CASCADE;
+-- DROP TABLE IF EXISTS extract_source_documents CASCADE;
+-- DROP TABLE IF EXISTS extract_projects CASCADE;