Browse Source

feat: 文档分块/章节、抽取规则与图谱实体等能力

- document: DocumentChunk/DocumentSection 及 Repository,Document 相关改造
- extract: Rule/RuleCondition/RuleAction、GenerationOutput、VariableValueItem 等
- graph: Entity/EntityRelation/Embedding/StandardEntity 等实体与 Repository
- common: BaseEntity 基类
- database: init.sql 更新
- parse/document/graph 服务联动调整

Co-authored-by: Cursor <cursoragent@cursor.com>
何文松 2 weeks ago
parent
commit
90f11dab10
40 changed files with 1633 additions and 614 deletions
  1. 33 0
      backend/common/src/main/java/com/lingyue/common/domain/entity/BaseEntity.java
  2. 5 5
      backend/document-service/src/main/java/com/lingyue/document/controller/DocumentController.java
  3. 52 49
      backend/document-service/src/main/java/com/lingyue/document/entity/Document.java
  4. 64 0
      backend/document-service/src/main/java/com/lingyue/document/entity/DocumentChunk.java
  5. 92 0
      backend/document-service/src/main/java/com/lingyue/document/entity/DocumentSection.java
  6. 12 0
      backend/document-service/src/main/java/com/lingyue/document/repository/DocumentChunkRepository.java
  7. 12 0
      backend/document-service/src/main/java/com/lingyue/document/repository/DocumentSectionRepository.java
  8. 89 44
      backend/document-service/src/main/java/com/lingyue/document/service/DocumentService.java
  9. 30 28
      backend/extract-service/src/main/java/com/lingyue/extract/dto/response/GenerationResponse.java
  10. 33 0
      backend/extract-service/src/main/java/com/lingyue/extract/dto/response/VariableValueItem.java
  11. 90 86
      backend/extract-service/src/main/java/com/lingyue/extract/entity/Generation.java
  12. 55 0
      backend/extract-service/src/main/java/com/lingyue/extract/entity/GenerationOutput.java
  13. 55 0
      backend/extract-service/src/main/java/com/lingyue/extract/entity/Rule.java
  14. 44 0
      backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleAction.java
  15. 44 0
      backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleCondition.java
  16. 12 0
      backend/extract-service/src/main/java/com/lingyue/extract/repository/GenerationOutputRepository.java
  17. 12 0
      backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleActionRepository.java
  18. 12 0
      backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleConditionRepository.java
  19. 12 0
      backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleRepository.java
  20. 4 2
      backend/extract-service/src/main/java/com/lingyue/extract/service/ExtractionService.java
  21. 38 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/Embedding.java
  22. 56 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/Entity.java
  23. 40 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityMergeRecord.java
  24. 49 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityRelation.java
  25. 59 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityType.java
  26. 53 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/RelationType.java
  27. 47 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/StandardEntity.java
  28. 50 0
      backend/graph-service/src/main/java/com/lingyue/graph/entity/StandardRelation.java
  29. 2 2
      backend/graph-service/src/main/java/com/lingyue/graph/listener/DocumentParsedEventListener.java
  30. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/EmbeddingRepository.java
  31. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityMergeRecordRepository.java
  32. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityRelationRepository.java
  33. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityRepository.java
  34. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityTypeRepository.java
  35. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/RelationTypeRepository.java
  36. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/StandardEntityRepository.java
  37. 12 0
      backend/graph-service/src/main/java/com/lingyue/graph/repository/StandardRelationRepository.java
  38. 4 4
      backend/parse-service/src/main/java/com/lingyue/parse/controller/ParseController.java
  39. 4 5
      backend/parse-service/src/main/java/com/lingyue/parse/service/FileUploadService.java
  40. 373 389
      database/init.sql

+ 33 - 0
backend/common/src/main/java/com/lingyue/common/domain/entity/BaseEntity.java

@@ -0,0 +1,33 @@
+package com.lingyue.common.domain.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 基础实体(新表结构用:仅 id + create_by/update_by/time,无 create_by_name)
+ */
+@Data
+public abstract class BaseEntity extends AssignUuidModel {
+
+    @Schema(description = "创建者ID")
+    @TableField("create_by")
+    private String createBy;
+
+    @Schema(description = "创建时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("create_time")
+    private Date createTime;
+
+    @Schema(description = "更新者ID")
+    @TableField("update_by")
+    private String updateBy;
+
+    @Schema(description = "更新时间")
+    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+    @TableField("update_time")
+    private Date updateTime;
+}

+ 5 - 5
backend/document-service/src/main/java/com/lingyue/document/controller/DocumentController.java

@@ -123,11 +123,11 @@ public class DocumentController {
         
         ParseStatusResponse response = new ParseStatusResponse();
         response.setDocumentId(documentId);
-        response.setStatus(document.getParseStatus());
-        response.setProgress(document.getParseProgress());
-        response.setError(document.getParseError());
-        response.setStartedAt(document.getParseStartedAt());
-        response.setCompletedAt(document.getParseCompletedAt());
+        response.setStatus(document.getStatus());
+        response.setProgress(null);
+        response.setError(null);
+        response.setStartedAt(null);
+        response.setCompletedAt(null);
         
         return AjaxResult.success(response);
     }

+ 52 - 49
backend/document-service/src/main/java/com/lingyue/document/entity/Document.java

@@ -2,75 +2,78 @@ package com.lingyue.document.entity;
 
 import com.baomidou.mybatisplus.annotation.TableField;
 import com.baomidou.mybatisplus.annotation.TableName;
-import com.lingyue.common.domain.entity.SimpleModel;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.lingyue.common.domain.entity.BaseEntity;
 import io.swagger.v3.oas.annotations.media.Schema;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 
-import java.util.Date;
-
 /**
- * 文档实体
+ * 文档实体(设计:documents 表)
  */
 @EqualsAndHashCode(callSuper = true)
 @Data
-@TableName("documents")
+@TableName(value = "documents", autoResultMap = true)
 @Schema(description = "文档实体")
-public class Document extends SimpleModel {
-    
+public class Document extends BaseEntity {
+
     @Schema(description = "用户ID")
     @TableField("user_id")
     private String userId;
-    
+
     @Schema(description = "文档名称")
     @TableField("name")
     private String name;
-    
-    @Schema(description = "文档类型")
-    @TableField("type")
-    private String type; // pdf/word/image/markdown/other
-    
-    @Schema(description = "状态")
-    @TableField("status")
-    private String status = "pending"; // pending/uploading/parsing/completed/failed
-    
+
+    @Schema(description = "原始文件名")
+    @TableField("file_name")
+    private String fileName;
+
+    @Schema(description = "存储路径")
+    @TableField("file_path")
+    private String filePath;
+
     @Schema(description = "文件大小")
     @TableField("file_size")
     private Long fileSize;
-    
-    @Schema(description = "文件URL")
-    @TableField("file_url")
-    private String fileUrl;
-    
-    @Schema(description = "缩略图URL")
-    @TableField("thumbnail_url")
-    private String thumbnailUrl;
-    
-    @Schema(description = "解析后的文")
+
+    @Schema(description = "文件类型:pdf/docx")
+    @TableField("file_type")
+    private String fileType;
+
+    @Schema(description = "状态:uploaded/parsing/parsed/ner_processing/completed/failed")
+    @TableField("status")
+    private String status = "uploaded";
+
+    @Schema(description = "解析后的文")
     @TableField("parsed_text")
     private String parsedText;
-    
-    @Schema(description = "解析状态")
-    @TableField("parse_status")
-    private String parseStatus; // pending/parsing/completed/failed
-    
-    @Schema(description = "解析进度")
-    @TableField("parse_progress")
-    private Integer parseProgress = 0; // 0-100
-    
-    @Schema(description = "解析错误")
-    @TableField("parse_error")
-    private String parseError;
-    
-    @Schema(description = "解析开始时间")
-    @TableField("parse_started_at")
-    private Date parseStartedAt;
-    
-    @Schema(description = "解析完成时间")
-    @TableField("parse_completed_at")
-    private Date parseCompletedAt;
-    
+
+    @Schema(description = "页数")
+    @TableField("page_count")
+    private Integer pageCount;
+
+    @Schema(description = "字数")
+    @TableField("word_count")
+    private Integer wordCount;
+
+    @Schema(description = "实体数量")
+    @TableField("entity_count")
+    private Integer entityCount = 0;
+
+    @Schema(description = "关系数量")
+    @TableField("relation_count")
+    private Integer relationCount = 0;
+
+    @Schema(description = "规则数量")
+    @TableField("rule_count")
+    private Integer ruleCount = 0;
+
     @Schema(description = "元数据")
-    @TableField(value = "metadata", typeHandler = com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler.class)
+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
     private Object metadata;
+
+    @Schema(description = "删除标记")
+    @TableField("del_flag")
+    private Boolean delFlag = false;
 }

+ 64 - 0
backend/document-service/src/main/java/com/lingyue/document/entity/DocumentChunk.java

@@ -0,0 +1,64 @@
+package com.lingyue.document.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 文档分块实体(设计:document_chunks 表,用于 RAG)
+ */
+@Data
+@TableName(value = "document_chunks", autoResultMap = true)
+@Schema(description = "文档分块")
+public class DocumentChunk {
+
+    @Schema(description = "主键")
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @Schema(description = "文档ID")
+    @TableField("document_id")
+    private String documentId;
+
+    @Schema(description = "章节ID")
+    @TableField("section_id")
+    private String sectionId;
+
+    @Schema(description = "分块序号")
+    @TableField("chunk_index")
+    private Integer chunkIndex;
+
+    @Schema(description = "分块内容")
+    @TableField("content")
+    private String content;
+
+    @Schema(description = "起始字符位置")
+    @TableField("start_char")
+    private Integer startChar;
+
+    @Schema(description = "结束字符位置")
+    @TableField("end_char")
+    private Integer endChar;
+
+    @Schema(description = "页码")
+    @TableField("page_number")
+    private Integer pageNumber;
+
+    @Schema(description = "Token 数量")
+    @TableField("token_count")
+    private Integer tokenCount;
+
+    @Schema(description = "元数据")
+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
+    private Object metadata;
+
+    @Schema(description = "创建时间")
+    @TableField("create_time")
+    private Date createTime;
+}

+ 92 - 0
backend/document-service/src/main/java/com/lingyue/document/entity/DocumentSection.java

@@ -0,0 +1,92 @@
+package com.lingyue.document.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 文档章节结构实体(设计:document_sections 表)
+ */
+@Data
+@TableName(value = "document_sections", autoResultMap = true)
+@Schema(description = "文档章节结构")
+public class DocumentSection {
+
+    @Schema(description = "主键")
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @Schema(description = "文档ID")
+    @TableField("document_id")
+    private String documentId;
+
+    @Schema(description = "父章节ID")
+    @TableField("parent_id")
+    private String parentId;
+
+    @Schema(description = "章节序号")
+    @TableField("section_index")
+    private Integer sectionIndex;
+
+    @Schema(description = "层级")
+    @TableField("level")
+    private Integer level = 1;
+
+    @Schema(description = "章节标题")
+    @TableField("title")
+    private String title;
+
+    @Schema(description = "章节内容")
+    @TableField("content")
+    private String content;
+
+    @Schema(description = "起始页码")
+    @TableField("start_page")
+    private Integer startPage;
+
+    @Schema(description = "结束页码")
+    @TableField("end_page")
+    private Integer endPage;
+
+    @Schema(description = "起始字符位置")
+    @TableField("start_char")
+    private Integer startChar;
+
+    @Schema(description = "结束字符位置")
+    @TableField("end_char")
+    private Integer endChar;
+
+    @Schema(description = "类型:heading/paragraph/table/image/list")
+    @TableField("section_type")
+    private String sectionType = "heading";
+
+    @Schema(description = "表格数据")
+    @TableField(value = "table_data", typeHandler = JacksonTypeHandler.class)
+    private Object tableData;
+
+    @Schema(description = "图片路径")
+    @TableField("image_path")
+    private String imagePath;
+
+    @Schema(description = "图片说明")
+    @TableField("image_caption")
+    private String imageCaption;
+
+    @Schema(description = "元数据")
+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
+    private Object metadata;
+
+    @Schema(description = "排序")
+    @TableField("sort_order")
+    private Integer sortOrder = 0;
+
+    @Schema(description = "创建时间")
+    @TableField("create_time")
+    private Date createTime;
+}

+ 12 - 0
backend/document-service/src/main/java/com/lingyue/document/repository/DocumentChunkRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.document.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.document.entity.DocumentChunk;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 文档分块 Repository(设计:document_chunks)
+ */
+@Mapper
+public interface DocumentChunkRepository extends BaseMapper<DocumentChunk> {
+}

+ 12 - 0
backend/document-service/src/main/java/com/lingyue/document/repository/DocumentSectionRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.document.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.document.entity.DocumentSection;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 文档章节结构 Repository(设计:document_sections)
+ */
+@Mapper
+public interface DocumentSectionRepository extends BaseMapper<DocumentSection> {
+}

+ 89 - 44
backend/document-service/src/main/java/com/lingyue/document/service/DocumentService.java

@@ -91,20 +91,25 @@ public class DocumentService {
     
     /**
      * 获取文档提取的文本内容
-     * 
+     * 优先返回 documents.parsed_text,否则从文本文件读取(兼容旧存储)
+     *
      * @param documentId 文档ID
      * @return 文本内容
      */
     public String getDocumentText(String documentId) {
-        // 构建文本文件路径
-        String subDir = documentId.substring(0, 2);
+        Document document = documentRepository.selectById(documentId);
+        if (document == null) {
+            return null;
+        }
+        if (StringUtils.hasText(document.getParsedText())) {
+            return document.getParsedText();
+        }
+        String subDir = documentId.length() >= 2 ? documentId.substring(0, 2) : documentId;
         Path textFilePath = Path.of(textStoragePath, subDir, documentId + ".txt");
-        
         if (!Files.exists(textFilePath)) {
             log.warn("文档文本文件不存在: {}", textFilePath);
             return null;
         }
-        
         try {
             return Files.readString(textFilePath, StandardCharsets.UTF_8);
         } catch (Exception e) {
@@ -182,17 +187,15 @@ public class DocumentService {
     
     /**
      * 级联删除文档及其所有关联数据
-     * 
+     *
      * 删除顺序(遵循外键约束):
-     * 1. vector_embeddings (通过 chunk_id 关联)
-     * 2. text_chunks (document_id)
-     * 3. document_elements (document_id)
-     * 4. parse_tasks (document_id)
+     * 1. embeddings (通过 document_chunks.id)
+     * 2. entity_relations、entities (document_id)
+     * 3. document_chunks、document_sections (document_id)
+     * 4. document_elements、parse_tasks (document_id,表可能不存在时忽略)
      * 5. documents (主表)
-     * 6. 文本文件
-     * 7. 图片目录
-     * (graph_nodes / graph_relations 已移除)
-     * 
+     * 6. 文本文件、图片目录
+     *
      * @param documentId 文档ID
      */
     @Transactional
@@ -205,23 +208,37 @@ public class DocumentService {
         String userId = document.getUserId();
         log.info("开始级联删除文档: documentId={}, userId={}", documentId, userId);
         
-        // 1. 删除向量嵌入(通过 text_chunks 关联)
-        int vectorCount = deleteVectorEmbeddingsByDocumentId(documentId);
+        // 1. 删除向量嵌入(通过 document_chunks 关联)
+        int vectorCount = deleteEmbeddingsByDocumentId(documentId);
         log.debug("删除向量嵌入: count={}", vectorCount);
         
-        // 2. 删除文本分块
-        int chunkCount = deleteTextChunksByDocumentId(documentId);
-        log.debug("删除文本分块: count={}", chunkCount);
+        // 2. 删除实体关系、实体
+        int relationCount = deleteEntityRelationsByDocumentId(documentId);
+        int entityCount = deleteEntitiesByDocumentId(documentId);
+        log.debug("删除实体关系: count={}, 实体: count={}", relationCount, entityCount);
+        
+        // 3. 删除文档分块
+        int chunkCount = deleteDocumentChunksByDocumentId(documentId);
+        log.debug("删除文档分块: count={}", chunkCount);
         
-        // 3. 删除结构化元素
-        int elementCount = documentElementRepository.deleteByDocumentId(documentId);
-        log.debug("删除结构化元素: count={}", elementCount);
+        // 4. 删除文档章节
+        int sectionCount = deleteDocumentSectionsByDocumentId(documentId);
+        log.debug("删除文档章节: count={}", sectionCount);
         
-        // 4. 删除解析任务
+        // 5. 删除结构化元素(兼容旧表 document_elements,可能不存在)
+        int elementCount = 0;
+        try {
+            elementCount = documentElementRepository.deleteByDocumentId(documentId);
+            log.debug("删除结构化元素: count={}", elementCount);
+        } catch (Exception e) {
+            log.debug("删除结构化元素跳过(表可能不存在): {}", e.getMessage());
+        }
+        
+        // 6. 删除解析任务(兼容旧表 parse_tasks,可能不存在)
         int taskCount = deleteParseTasksByDocumentId(documentId);
         log.debug("删除解析任务: count={}", taskCount);
         
-        // 5. 删除文档记录
+        // 7. 删除文档记录
         documentRepository.deleteById(documentId);
         log.debug("删除文档记录");
         
@@ -231,25 +248,17 @@ public class DocumentService {
         // 7. 删除图片目录(不影响事务)
         deleteImageDirectory(userId, documentId);
         
-        log.info("级联删除文档完成: documentId={}, 删除向量={}, 分块={}, 元素={}, 任务={}",
-                documentId, vectorCount, chunkCount, elementCount, taskCount);
+        log.info("级联删除文档完成: documentId={}, 删除向量={}, 关系={}, 实体={}, 分块={}, 章节={}, 元素={}, 任务={}",
+                documentId, vectorCount, relationCount, entityCount, chunkCount, sectionCount, elementCount, taskCount);
     }
     
     /**
-     * 删除向量嵌入
+     * 删除向量嵌入(设计:embeddings 表,通过 document_chunks 关联)
      */
-    private int deleteVectorEmbeddingsByDocumentId(String documentId) {
-        if (jdbcTemplate == null) {
-            log.warn("JdbcTemplate 未注入,跳过向量嵌入删除");
-            return 0;
-        }
+    private int deleteEmbeddingsByDocumentId(String documentId) {
+        if (jdbcTemplate == null) return 0;
         try {
-            String sql = """
-                DELETE FROM vector_embeddings 
-                WHERE chunk_id IN (
-                    SELECT id FROM text_chunks WHERE document_id = ?
-                )
-                """;
+            String sql = "DELETE FROM embeddings WHERE chunk_id IN (SELECT id FROM document_chunks WHERE document_id = ?)";
             return jdbcTemplate.update(sql, documentId);
         } catch (Exception e) {
             log.warn("删除向量嵌入失败: {}", e.getMessage());
@@ -258,17 +267,53 @@ public class DocumentService {
     }
     
     /**
-     * 删除文本分块
+     * 删除实体关系(设计:entity_relations 表)
      */
-    private int deleteTextChunksByDocumentId(String documentId) {
-        if (jdbcTemplate == null) {
-            log.warn("JdbcTemplate 未注入,跳过文本分块删除");
+    private int deleteEntityRelationsByDocumentId(String documentId) {
+        if (jdbcTemplate == null) return 0;
+        try {
+            return jdbcTemplate.update("DELETE FROM entity_relations WHERE document_id = ?", documentId);
+        } catch (Exception e) {
+            log.warn("删除实体关系失败: {}", e.getMessage());
             return 0;
         }
+    }
+    
+    /**
+     * 删除实体(设计:entities 表)
+     */
+    private int deleteEntitiesByDocumentId(String documentId) {
+        if (jdbcTemplate == null) return 0;
+        try {
+            return jdbcTemplate.update("DELETE FROM entities WHERE document_id = ?", documentId);
+        } catch (Exception e) {
+            log.warn("删除实体失败: {}", e.getMessage());
+            return 0;
+        }
+    }
+    
+    /**
+     * 删除文档分块(设计:document_chunks 表)
+     */
+    private int deleteDocumentChunksByDocumentId(String documentId) {
+        if (jdbcTemplate == null) return 0;
+        try {
+            return jdbcTemplate.update("DELETE FROM document_chunks WHERE document_id = ?", documentId);
+        } catch (Exception e) {
+            log.warn("删除文档分块失败: {}", e.getMessage());
+            return 0;
+        }
+    }
+    
+    /**
+     * 删除文档章节(设计:document_sections 表)
+     */
+    private int deleteDocumentSectionsByDocumentId(String documentId) {
+        if (jdbcTemplate == null) return 0;
         try {
-            return jdbcTemplate.update("DELETE FROM text_chunks WHERE document_id = ?", documentId);
+            return jdbcTemplate.update("DELETE FROM document_sections WHERE document_id = ?", documentId);
         } catch (Exception e) {
-            log.warn("删除文本分块失败: {}", e.getMessage());
+            log.warn("删除文档章节失败: {}", e.getMessage());
             return 0;
         }
     }

+ 30 - 28
backend/extract-service/src/main/java/com/lingyue/extract/dto/response/GenerationResponse.java

@@ -9,64 +9,65 @@ import java.util.Map;
 
 /**
  * 生成任务响应
- * 
- * @author lingyue
- * @since 2026-01-24
  */
 @Data
 @Schema(description = "生成任务响应")
 public class GenerationResponse {
-    
+
     @Schema(description = "任务ID")
     private String id;
-    
+
     @Schema(description = "模板ID")
     private String templateId;
-    
-    @Schema(description = "模板名称")
+
+    @Schema(description = "模板名称(兼容)")
     private String templateName;
-    
+
+    @Schema(description = "文档ID")
+    private String documentId;
+
     @Schema(description = "用户ID")
     private String userId;
-    
+
     @Schema(description = "任务名称")
     private String name;
-    
+
     @Schema(description = "来源文件映射")
     private Map<String, String> sourceFileMap;
-    
+
     @Schema(description = "变量值")
     private Map<String, Generation.VariableValue> variableValues;
-    
+
     @Schema(description = "输出文档ID")
     private String outputDocumentId;
-    
+
     @Schema(description = "输出文件路径")
     private String outputFilePath;
-    
-    @Schema(description = "状态: pending/extracting/review/generating/completed/error")
+
+    @Schema(description = "输出文件名")
+    private String outputFileName;
+
+    @Schema(description = "状态")
     private String status;
-    
+
     @Schema(description = "错误信息")
     private String errorMessage;
-    
+
     @Schema(description = "进度 0-100")
     private Integer progress;
-    
+
     @Schema(description = "创建时间")
     private Date createTime;
-    
+
     @Schema(description = "完成时间")
     private Date completedAt;
-    
-    // ==================== 统计信息 ====================
-    
+
     @Schema(description = "变量总数")
     private Integer totalVariables;
-    
+
     @Schema(description = "已提取变量数")
     private Integer extractedVariables;
-    
+
     /**
      * 从实体转换
      */
@@ -74,19 +75,21 @@ public class GenerationResponse {
         GenerationResponse response = new GenerationResponse();
         response.setId(generation.getId());
         response.setTemplateId(generation.getTemplateId());
+        response.setTemplateName(null); // 由调用方按需填充
+        response.setDocumentId(generation.getDocumentId());
         response.setUserId(generation.getUserId());
         response.setName(generation.getName());
-        response.setSourceFileMap(generation.getSourceFileMap());
+        response.setSourceFileMap(generation.getSourceFileMap() instanceof Map
+                ? (Map<String, String>) generation.getSourceFileMap() : null);
         response.setVariableValues(generation.getVariableValues());
         response.setOutputDocumentId(generation.getOutputDocumentId());
         response.setOutputFilePath(generation.getOutputFilePath());
+        response.setOutputFileName(generation.getOutputFileName());
         response.setStatus(generation.getStatus());
         response.setErrorMessage(generation.getErrorMessage());
         response.setProgress(generation.getProgress());
         response.setCreateTime(generation.getCreateTime());
         response.setCompletedAt(generation.getCompletedAt());
-        
-        // 统计变量
         if (generation.getVariableValues() != null) {
             response.setTotalVariables(generation.getVariableValues().size());
             long extracted = generation.getVariableValues().values().stream()
@@ -94,7 +97,6 @@ public class GenerationResponse {
                     .count();
             response.setExtractedVariables((int) extracted);
         }
-        
         return response;
     }
 }

+ 33 - 0
backend/extract-service/src/main/java/com/lingyue/extract/dto/response/VariableValueItem.java

@@ -0,0 +1,33 @@
+package com.lingyue.extract.dto.response;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+/**
+ * 变量值项(用于生成任务中的变量值展示,兼容旧 Generation.VariableValue)
+ */
+@Data
+@Schema(description = "变量值项")
+public class VariableValueItem {
+
+    @Schema(description = "变量名")
+    private String variableName;
+
+    @Schema(description = "显示名称")
+    private String displayName;
+
+    @Schema(description = "提取的值")
+    private String value;
+
+    @Schema(description = "置信度 0-1")
+    private Double confidence;
+
+    @Schema(description = "来源内容预览")
+    private String sourcePreview;
+
+    @Schema(description = "状态")
+    private String status;
+
+    @Schema(description = "错误信息")
+    private String errorMessage;
+}

+ 90 - 86
backend/extract-service/src/main/java/com/lingyue/extract/entity/Generation.java

@@ -1,131 +1,135 @@
 package com.lingyue.extract.entity;
 
 import com.baomidou.mybatisplus.annotation.TableField;
-import com.baomidou.mybatisplus.annotation.TableId;
 import com.baomidou.mybatisplus.annotation.TableName;
 import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
-import com.fasterxml.jackson.annotation.JsonFormat;
+import com.lingyue.common.domain.entity.BaseEntity;
 import io.swagger.v3.oas.annotations.media.Schema;
 import lombok.Data;
+import lombok.EqualsAndHashCode;
 
+import java.util.Collections;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
- * 报告生成任务实体
- * 
- * 使用模板生成新报告的一次任务
- * 记录来源文件映射、变量提取结果、生成的文档
- * 
- * @author lingyue
- * @since 2026-01-23
+ * 生成任务实体(设计:generations 表)
+ * variableValues 与 extractedData 双向同步,兼容旧业务逻辑。
  */
+@EqualsAndHashCode(callSuper = true)
 @Data
 @TableName(value = "generations", autoResultMap = true)
-@Schema(description = "报告生成任务")
-public class Generation {
-    
-    @Schema(description = "ID")
-    @TableId
-    private String id;
-    
-    @Schema(description = "模板ID")
-    @TableField("template_id")
-    private String templateId;
-    
-    @Schema(description = "用户ID")
+@Schema(description = "生成任务")
+public class Generation extends BaseEntity {
+
     @TableField("user_id")
     private String userId;
-    
-    @Schema(description = "任务名称")
+
+    @TableField("document_id")
+    private String documentId;
+
+    @TableField("template_id")
+    private String templateId;
+
     @TableField("name")
     private String name;
-    
-    // ==================== 来源文件映射 ====================
-    
-    @Schema(description = "来源文件映射:别名 → 文档ID,如 {\"可研批复\": \"doc_123\"}")
+
+    @TableField(value = "applied_rules", typeHandler = JacksonTypeHandler.class)
+    private Object appliedRules;
+
     @TableField(value = "source_file_map", typeHandler = JacksonTypeHandler.class)
-    private Map<String, String> sourceFileMap;
-    
-    // ==================== 变量提取结果 ====================
-    
-    @Schema(description = "变量值,包含值、置信度、状态等")
-    @TableField(value = "variable_values", typeHandler = JacksonTypeHandler.class)
-    private Map<String, VariableValue> variableValues;
-    
-    // ==================== 生成的文档 ====================
-    
-    @Schema(description = "输出文档ID")
-    @TableField("output_document_id")
-    private String outputDocumentId;
-    
-    @Schema(description = "输出文件路径")
+    private Object sourceFileMap;
+
+    @TableField(value = "extracted_data", typeHandler = JacksonTypeHandler.class)
+    private Object extractedData;
+
+    @TableField(value = "confirmed_data", typeHandler = JacksonTypeHandler.class)
+    private Object confirmedData;
+
     @TableField("output_file_path")
     private String outputFilePath;
-    
-    // ==================== 状态 ====================
-    
-    @Schema(description = "状态: pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误")
+
+    @TableField("output_file_name")
+    private String outputFileName;
+
+    @TableField("output_document_id")
+    private String outputDocumentId;
+
+    @TableField("progress")
+    private Integer progress;
+
     @TableField("status")
-    private String status;
-    
-    @Schema(description = "错误信息")
+    private String status = "pending";
+
     @TableField("error_message")
     private String errorMessage;
-    
-    @Schema(description = "进度百分比 0-100")
-    @TableField("progress")
-    private Integer progress;
-    
-    // ==================== 时间 ====================
-    
-    @Schema(description = "创建时间")
-    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
-    @TableField("create_time")
-    private Date createTime;
-    
-    @Schema(description = "完成时间")
-    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
+
+    @TableField("started_at")
+    private Date startedAt;
+
+    @TableField("reviewed_at")
+    private Date reviewedAt;
+
     @TableField("completed_at")
     private Date completedAt;
-    
-    // ==================== 状态常量 ====================
-    
+
+    @TableField("del_flag")
+    private Boolean delFlag = false;
+
     public static final String STATUS_PENDING = "pending";
     public static final String STATUS_EXTRACTING = "extracting";
-    public static final String STATUS_REVIEW = "review";
+    public static final String STATUS_REVIEWING = "reviewing";
+    /** @deprecated 使用 STATUS_REVIEWING */
+    public static final String STATUS_REVIEW = "reviewing";
     public static final String STATUS_GENERATING = "generating";
     public static final String STATUS_COMPLETED = "completed";
-    public static final String STATUS_ERROR = "error";
-    
+    public static final String STATUS_FAILED = "failed";
+    /** @deprecated 使用 STATUS_FAILED */
+    public static final String STATUS_ERROR = "failed";
+
+    /** 兼容旧逻辑:变量值视图,与 extractedData 同步,不单独持久化 */
+    @TableField(exist = false)
+    private Map<String, VariableValue> variableValues;
+
+    public Map<String, VariableValue> getVariableValues() {
+        if (variableValues == null && extractedData != null && extractedData instanceof Map) {
+            @SuppressWarnings("unchecked")
+            Map<String, Object> data = (Map<String, Object>) extractedData;
+            variableValues = new HashMap<>();
+            for (Map.Entry<String, Object> e : data.entrySet()) {
+                VariableValue vv = new VariableValue();
+                vv.setVariableName(e.getKey());
+                vv.setValue(e.getValue() != null ? e.getValue().toString() : null);
+                vv.setStatus(VariableValue.STATUS_EXTRACTED);
+                variableValues.put(e.getKey(), vv);
+            }
+        }
+        return variableValues != null ? variableValues : Collections.emptyMap();
+    }
+
+    public void setVariableValues(Map<String, VariableValue> variableValues) {
+        this.variableValues = variableValues;
+        if (variableValues != null) {
+            this.extractedData = variableValues.entrySet().stream()
+                    .collect(Collectors.toMap(Map.Entry::getKey, e -> (Object) (e.getValue().getValue() != null ? e.getValue().getValue() : "")));
+        }
+    }
+
     /**
-     * 变量值内部类
+     * 变量值项(兼容旧接口与业务逻辑)
      */
     @Data
+    @Schema(description = "变量值")
     public static class VariableValue {
-        
-        @Schema(description = "变量名")
         private String variableName;
-        
-        @Schema(description = "显示名称")
         private String displayName;
-        
-        @Schema(description = "提取的值")
         private String value;
-        
-        @Schema(description = "AI置信度 0-1")
         private Double confidence;
-        
-        @Schema(description = "来源内容预览")
         private String sourcePreview;
-        
-        @Schema(description = "状态: pending-待提取, extracted-已提取, manual-手动修改, error-错误")
         private String status;
-        
-        @Schema(description = "错误信息")
         private String errorMessage;
-        
-        // 状态常量
         public static final String STATUS_PENDING = "pending";
         public static final String STATUS_EXTRACTED = "extracted";
         public static final String STATUS_MANUAL = "manual";

+ 55 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/GenerationOutput.java

@@ -0,0 +1,55 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 生成输出(设计:generation_outputs 表)
+ */
+@Data
+@TableName("generation_outputs")
+@Schema(description = "生成输出")
+public class GenerationOutput {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("generation_id")
+    private String generationId;
+
+    @TableField("version")
+    private Integer version = 1;
+
+    @TableField("file_path")
+    private String filePath;
+
+    @TableField("file_name")
+    private String fileName;
+
+    @TableField("file_size")
+    private Long fileSize;
+
+    @TableField("file_type")
+    private String fileType = "docx";
+
+    @TableField("share_token")
+    private String shareToken;
+
+    @TableField("share_expires_at")
+    private Date shareExpiresAt;
+
+    @TableField("download_count")
+    private Integer downloadCount = 0;
+
+    @TableField("is_final")
+    private Boolean isFinal = false;
+
+    @TableField("create_time")
+    private Date createTime;
+}

+ 55 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/Rule.java

@@ -0,0 +1,55 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.lingyue.common.domain.entity.BaseEntity;
+import com.lingyue.common.mybatis.PostgreSqlVectorTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * 规则实体(设计:rules 表)
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+@TableName(value = "rules", autoResultMap = true)
+@Schema(description = "规则")
+public class Rule extends BaseEntity {
+
+    @TableField("document_id")
+    private String documentId;
+
+    @TableField("user_id")
+    private String userId;
+
+    @TableField("name")
+    private String name;
+
+    @TableField("description")
+    private String description;
+
+    @TableField("category")
+    private String category;
+
+    @TableField("rule_type")
+    private String ruleType;
+
+    @TableField("source")
+    private String source = "auto";
+
+    @TableField("priority")
+    private Integer priority = 0;
+
+    @TableField("status")
+    private String status = "draft";
+
+    @TableField(value = "embedding", typeHandler = PostgreSqlVectorTypeHandler.class)
+    private String embedding;
+
+    @TableField("is_global")
+    private Boolean isGlobal = false;
+
+    @TableField("del_flag")
+    private Boolean delFlag = false;
+}

+ 44 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleAction.java

@@ -0,0 +1,44 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 规则动作(设计:rule_actions 表)
+ */
+@Data
+@TableName(value = "rule_actions", autoResultMap = true)
+@Schema(description = "规则动作")
+public class RuleAction {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("rule_id")
+    private String ruleId;
+
+    @TableField("action_index")
+    private Integer actionIndex;
+
+    @TableField("action_type")
+    private String actionType;
+
+    @TableField(value = "config", typeHandler = JacksonTypeHandler.class)
+    private Object config;
+
+    @TableField("target_field")
+    private String targetField;
+
+    @TableField("sort_order")
+    private Integer sortOrder = 0;
+
+    @TableField("create_time")
+    private Date createTime;
+}

+ 44 - 0
backend/extract-service/src/main/java/com/lingyue/extract/entity/RuleCondition.java

@@ -0,0 +1,44 @@
+package com.lingyue.extract.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 规则条件(设计:rule_conditions 表)
+ */
+@Data
+@TableName(value = "rule_conditions", autoResultMap = true)
+@Schema(description = "规则条件")
+public class RuleCondition {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("rule_id")
+    private String ruleId;
+
+    @TableField("condition_index")
+    private Integer conditionIndex;
+
+    @TableField("condition_type")
+    private String conditionType;
+
+    @TableField(value = "config", typeHandler = JacksonTypeHandler.class)
+    private Object config;
+
+    @TableField("logic_operator")
+    private String logicOperator = "AND";
+
+    @TableField("sort_order")
+    private Integer sortOrder = 0;
+
+    @TableField("create_time")
+    private Date createTime;
+}

+ 12 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/GenerationOutputRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.GenerationOutput;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 生成输出 Repository(设计:generation_outputs)
+ */
+@Mapper
+public interface GenerationOutputRepository extends BaseMapper<GenerationOutput> {
+}

+ 12 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleActionRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.RuleAction;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 规则动作 Repository(设计:rule_actions)
+ */
+@Mapper
+public interface RuleActionRepository extends BaseMapper<RuleAction> {
+}

+ 12 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleConditionRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.RuleCondition;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 规则条件 Repository(设计:rule_conditions)
+ */
+@Mapper
+public interface RuleConditionRepository extends BaseMapper<RuleCondition> {
+}

+ 12 - 0
backend/extract-service/src/main/java/com/lingyue/extract/repository/RuleRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.extract.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.extract.entity.Rule;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 规则 Repository(设计:rules)
+ */
+@Mapper
+public interface RuleRepository extends BaseMapper<Rule> {
+}

+ 4 - 2
backend/extract-service/src/main/java/com/lingyue/extract/service/ExtractionService.java

@@ -82,8 +82,10 @@ public class ExtractionService {
             try {
                 // 获取来源文档ID
                 String documentId = null;
-                if (variable.getSourceFileAlias() != null && generation.getSourceFileMap() != null) {
-                    documentId = generation.getSourceFileMap().get(variable.getSourceFileAlias());
+                if (variable.getSourceFileAlias() != null && generation.getSourceFileMap() != null && generation.getSourceFileMap() instanceof Map) {
+                    @SuppressWarnings("unchecked")
+                    Map<String, String> map = (Map<String, String>) generation.getSourceFileMap();
+                    documentId = map.get(variable.getSourceFileAlias());
                 }
                 
                 // 提取值

+ 38 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/Embedding.java

@@ -0,0 +1,38 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.lingyue.common.mybatis.PostgreSqlVectorTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 向量嵌入(设计:embeddings 表,RAG)
+ */
+@Data
+@TableName(value = "embeddings", autoResultMap = true)
+@Schema(description = "向量嵌入")
+public class Embedding {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("chunk_id")
+    private String chunkId;
+
+    @TableField(value = "embedding", typeHandler = PostgreSqlVectorTypeHandler.class)
+    private String embedding;
+
+    @TableField("model_name")
+    private String modelName = "text-embedding-ada-002";
+
+    @TableField("model_version")
+    private String modelVersion;
+
+    @TableField("create_time")
+    private Date createTime;
+}

+ 56 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/Entity.java

@@ -0,0 +1,56 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.lingyue.common.domain.entity.BaseEntity;
+import com.lingyue.common.mybatis.PostgreSqlVectorTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * 文档实体(设计:entities 表,NER 识别结果去重后)
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+@TableName(value = "entities", autoResultMap = true)
+@Schema(description = "实体")
+public class Entity extends BaseEntity {
+
+    @TableField("document_id")
+    private String documentId;
+
+    @TableField("entity_type")
+    private String entityType;
+
+    @TableField("entity_text")
+    private String entityText;
+
+    @TableField("normalized_text")
+    private String normalizedText;
+
+    @TableField("occurrence_count")
+    private Integer occurrenceCount = 1;
+
+    @TableField("confidence")
+    private Float confidence = 1.0f;
+
+    @TableField("business_label")
+    private String businessLabel;
+
+    @TableField(value = "embedding", typeHandler = PostgreSqlVectorTypeHandler.class)
+    private String embedding;
+
+    @TableField("is_confirmed")
+    private Boolean isConfirmed = false;
+
+    @TableField("is_merged")
+    private Boolean isMerged = false;
+
+    @TableField("merged_to_id")
+    private String mergedToId;
+
+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
+    private Object metadata;
+}

+ 40 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityMergeRecord.java

@@ -0,0 +1,40 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 实体合并记录(设计:entity_merge_records 表)
+ */
+@Data
+@TableName("entity_merge_records")
+@Schema(description = "实体合并记录")
+public class EntityMergeRecord {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("source_entity_id")
+    private String sourceEntityId;
+
+    @TableField("target_entity_id")
+    private String targetEntityId;
+
+    @TableField("similarity_score")
+    private Float similarityScore;
+
+    @TableField("merge_type")
+    private String mergeType = "auto";
+
+    @TableField("create_by")
+    private String createBy;
+
+    @TableField("create_time")
+    private Date createTime;
+}

+ 49 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityRelation.java

@@ -0,0 +1,49 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.lingyue.common.domain.entity.BaseEntity;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * 实体关系(设计:entity_relations 表)
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+@TableName(value = "entity_relations", autoResultMap = true)
+@Schema(description = "实体关系")
+public class EntityRelation extends BaseEntity {
+
+    @TableField("document_id")
+    private String documentId;
+
+    @TableField("source_entity_id")
+    private String sourceEntityId;
+
+    @TableField("target_entity_id")
+    private String targetEntityId;
+
+    @TableField("relation_type")
+    private String relationType;
+
+    @TableField("relation_text")
+    private String relationText;
+
+    @TableField("confidence")
+    private Float confidence = 1.0f;
+
+    @TableField("extraction_method")
+    private String extractionMethod = "auto";
+
+    @TableField("evidence_text")
+    private String evidenceText;
+
+    @TableField("is_confirmed")
+    private Boolean isConfirmed = false;
+
+    @TableField(value = "metadata", typeHandler = JacksonTypeHandler.class)
+    private Object metadata;
+}

+ 59 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/EntityType.java

@@ -0,0 +1,59 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 实体类型(设计:entity_types 表)
+ */
+@Data
+@TableName(value = "entity_types", autoResultMap = true)
+@Schema(description = "实体类型")
+public class EntityType {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("type_code")
+    private String typeCode;
+
+    @TableField("type_name")
+    private String typeName;
+
+    @TableField("category")
+    private String category;
+
+    @TableField("color")
+    private String color;
+
+    @TableField("icon")
+    private String icon;
+
+    @TableField(value = "patterns", typeHandler = JacksonTypeHandler.class)
+    private Object patterns;
+
+    @TableField(value = "examples", typeHandler = JacksonTypeHandler.class)
+    private Object examples;
+
+    @TableField("description")
+    private String description;
+
+    @TableField("is_active")
+    private Boolean isActive = true;
+
+    @TableField("sort_order")
+    private Integer sortOrder = 0;
+
+    @TableField("create_time")
+    private Date createTime;
+
+    @TableField("update_time")
+    private Date updateTime;
+}

+ 53 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/RelationType.java

@@ -0,0 +1,53 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 关系类型(设计:relation_types 表)
+ */
+@Data
+@TableName(value = "relation_types", autoResultMap = true)
+@Schema(description = "关系类型")
+public class RelationType {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("type_code")
+    private String typeCode;
+
+    @TableField("type_name")
+    private String typeName;
+
+    @TableField(value = "source_entity_types", typeHandler = JacksonTypeHandler.class)
+    private Object sourceEntityTypes;
+
+    @TableField(value = "target_entity_types", typeHandler = JacksonTypeHandler.class)
+    private Object targetEntityTypes;
+
+    @TableField("is_symmetric")
+    private Boolean isSymmetric = false;
+
+    @TableField("description")
+    private String description;
+
+    @TableField("is_active")
+    private Boolean isActive = true;
+
+    @TableField("sort_order")
+    private Integer sortOrder = 0;
+
+    @TableField("create_time")
+    private Date createTime;
+
+    @TableField("update_time")
+    private Date updateTime;
+}

+ 47 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/StandardEntity.java

@@ -0,0 +1,47 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.lingyue.common.domain.entity.BaseEntity;
+import com.lingyue.common.mybatis.PostgreSqlVectorTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * 标准实体(设计:standard_entities 表,知识落地)
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+@TableName(value = "standard_entities", autoResultMap = true)
+@Schema(description = "标准实体")
+public class StandardEntity extends BaseEntity {
+
+    @TableField("entity_type")
+    private String entityType;
+
+    @TableField("name")
+    private String name;
+
+    @TableField(value = "aliases", typeHandler = JacksonTypeHandler.class)
+    private Object aliases;
+
+    @TableField(value = "attributes", typeHandler = JacksonTypeHandler.class)
+    private Object attributes;
+
+    @TableField(value = "embedding", typeHandler = PostgreSqlVectorTypeHandler.class)
+    private String embedding;
+
+    @TableField("occurrence_count")
+    private Integer occurrenceCount = 1;
+
+    @TableField("document_count")
+    private Integer documentCount = 1;
+
+    @TableField("is_verified")
+    private Boolean isVerified = false;
+
+    @TableField("del_flag")
+    private Boolean delFlag = false;
+}

+ 50 - 0
backend/graph-service/src/main/java/com/lingyue/graph/entity/StandardRelation.java

@@ -0,0 +1,50 @@
+package com.lingyue.graph.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * 标准关系(设计:standard_relations 表)
+ */
+@Data
+@TableName(value = "standard_relations", autoResultMap = true)
+@Schema(description = "标准关系")
+public class StandardRelation {
+
+    @TableId(type = IdType.ASSIGN_UUID)
+    private String id;
+
+    @TableField("source_entity_id")
+    private String sourceEntityId;
+
+    @TableField("target_entity_id")
+    private String targetEntityId;
+
+    @TableField("relation_type")
+    private String relationType;
+
+    @TableField(value = "attributes", typeHandler = JacksonTypeHandler.class)
+    private Object attributes;
+
+    @TableField("occurrence_count")
+    private Integer occurrenceCount = 1;
+
+    @TableField("is_verified")
+    private Boolean isVerified = false;
+
+    @TableField("del_flag")
+    private Boolean delFlag = false;
+
+    @TableField("create_time")
+    private Date createTime;
+
+    @TableField("update_time")
+    private Date updateTime;
+}

+ 2 - 2
backend/graph-service/src/main/java/com/lingyue/graph/listener/DocumentParsedEventListener.java

@@ -69,8 +69,8 @@ public class DocumentParsedEventListener {
                 return;
             }
 
-            String docType = document.getType();
-            if (!"word".equalsIgnoreCase(docType)) {
+            String docType = document.getFileType();
+            if (!"docx".equalsIgnoreCase(docType) && !"word".equalsIgnoreCase(docType)) {
                 log.debug("非 Word 文档,跳过结构化解析: documentId={}, type={}", documentId, docType);
                 updateTaskProgress(documentId, "structured", "completed", 100, null);
                 return;

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/EmbeddingRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.Embedding;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 向量嵌入 Repository(设计:embeddings)
+ */
+@Mapper
+public interface EmbeddingRepository extends BaseMapper<Embedding> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityMergeRecordRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.EntityMergeRecord;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 实体合并记录 Repository(设计:entity_merge_records)
+ */
+@Mapper
+public interface EntityMergeRecordRepository extends BaseMapper<EntityMergeRecord> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityRelationRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.EntityRelation;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 实体关系 Repository(设计:entity_relations)
+ */
+@Mapper
+public interface EntityRelationRepository extends BaseMapper<EntityRelation> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.Entity;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 实体 Repository(设计:entities)
+ */
+@Mapper
+public interface EntityRepository extends BaseMapper<Entity> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/EntityTypeRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.EntityType;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 实体类型 Repository(设计:entity_types)
+ */
+@Mapper
+public interface EntityTypeRepository extends BaseMapper<EntityType> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/RelationTypeRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.RelationType;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 关系类型 Repository(设计:relation_types)
+ */
+@Mapper
+public interface RelationTypeRepository extends BaseMapper<RelationType> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/StandardEntityRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.StandardEntity;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 标准实体 Repository(设计:standard_entities)
+ */
+@Mapper
+public interface StandardEntityRepository extends BaseMapper<StandardEntity> {
+}

+ 12 - 0
backend/graph-service/src/main/java/com/lingyue/graph/repository/StandardRelationRepository.java

@@ -0,0 +1,12 @@
+package com.lingyue.graph.repository;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.lingyue.graph.entity.StandardRelation;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 标准关系 Repository(设计:standard_relations)
+ */
+@Mapper
+public interface StandardRelationRepository extends BaseMapper<StandardRelation> {
+}

+ 4 - 4
backend/parse-service/src/main/java/com/lingyue/parse/controller/ParseController.java

@@ -71,7 +71,7 @@ public class ParseController {
             return AjaxResult.error("文档不存在: " + documentId);
         }
         
-        String filePath = document.getFileUrl();
+        String filePath = document.getFilePath();
         if (filePath == null || filePath.isEmpty()) {
             return AjaxResult.error("文档文件路径为空");
         }
@@ -123,14 +123,14 @@ public class ParseController {
             return AjaxResult.error("文档不存在: " + documentId);
         }
         
-        String filePath = document.getFileUrl();
+        String filePath = document.getFilePath();
         if (filePath == null || filePath.isEmpty()) {
             return AjaxResult.error("文档文件路径为空");
         }
         
         // 检查是否是 Word 文档
-        if (!"word".equalsIgnoreCase(document.getType())) {
-            return AjaxResult.error("仅支持 Word 文档的结构化解析,当前文档类型: " + document.getType());
+        if (!"docx".equalsIgnoreCase(document.getFileType()) && !"word".equalsIgnoreCase(document.getFileType())) {
+            return AjaxResult.error("仅支持 Word 文档的结构化解析,当前文档类型: " + document.getFileType());
         }
         
         // 提取结构化内容

+ 4 - 5
backend/parse-service/src/main/java/com/lingyue/parse/service/FileUploadService.java

@@ -72,12 +72,11 @@ public class FileUploadService {
         document.setId(documentId);
         document.setUserId(userId);
         document.setName(file.getOriginalFilename());
-        document.setType(mapFileTypeToDocType(fileType));
-        document.setStatus("uploaded");  // 状态改为已上传,等待解析
+        document.setFileName(file.getOriginalFilename());
+        document.setFileType(mapFileTypeToDocType(fileType));
+        document.setStatus("uploaded");
         document.setFileSize(file.getSize());
-        document.setFileUrl(filePath);
-        document.setParseStatus("pending");
-        document.setParseProgress(0);
+        document.setFilePath(filePath);
         documentService.saveDocument(document);
         log.info("文档记录创建成功, documentId={}", documentId);
 

+ 373 - 389
database/init.sql

@@ -1,509 +1,493 @@
 -- =====================================================
--- 灵越智报 v2.0 数据库初始化脚本(单文件)
--- PostgreSQL 15+
--- 包含所有表结构,不含 graph_nodes / graph_relations(已移除)
+-- 灵越智报 2.0 数据库初始化脚本
+-- PostgreSQL 15+ / pgvector
+-- 设计参考: a_docs/数据库设计文档.md
 -- =====================================================
 
 CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 CREATE EXTENSION IF NOT EXISTS vector;
 
--- ============================================ 一、基础模块 ============================================
-
+-- ==================== 用户表 ====================
 CREATE TABLE IF NOT EXISTS users (
     id VARCHAR(36) PRIMARY KEY,
-    username VARCHAR(50) UNIQUE NOT NULL,
-    email VARCHAR(100) UNIQUE NOT NULL,
+    username VARCHAR(50) NOT NULL UNIQUE,
+    email VARCHAR(100) UNIQUE,
     password_hash VARCHAR(255) NOT NULL,
+    display_name VARCHAR(100),
     avatar_url VARCHAR(500),
-    role VARCHAR(20) NOT NULL DEFAULT 'user',
-    preferences TEXT DEFAULT '{}',
+    role VARCHAR(20) DEFAULT 'user',
+    preferences JSONB DEFAULT '{}',
+    is_active BOOLEAN DEFAULT TRUE,
     last_login_at TIMESTAMP,
     create_by VARCHAR(36),
     create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     update_by VARCHAR(36),
     update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
 CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
 CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
 
+-- ==================== 文档表 ====================
 CREATE TABLE IF NOT EXISTS documents (
     id VARCHAR(36) PRIMARY KEY,
     user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
     name VARCHAR(255) NOT NULL,
-    type VARCHAR(20) NOT NULL,
-    status VARCHAR(20) NOT NULL DEFAULT 'pending',
+    file_name VARCHAR(500) NOT NULL,
+    file_path VARCHAR(500) NOT NULL,
     file_size BIGINT,
-    file_url VARCHAR(500),
-    thumbnail_url VARCHAR(500),
+    file_type VARCHAR(20) NOT NULL,
+    status VARCHAR(20) DEFAULT 'uploaded',
     parsed_text TEXT,
-    parse_status VARCHAR(20),
-    parse_progress INTEGER DEFAULT 0,
-    parse_error TEXT,
-    parse_started_at TIMESTAMP,
-    parse_completed_at TIMESTAMP,
+    page_count INT,
+    word_count INT,
+    entity_count INT DEFAULT 0,
+    relation_count INT DEFAULT 0,
+    rule_count INT DEFAULT 0,
     metadata JSONB DEFAULT '{}',
-    structured_status VARCHAR(20) DEFAULT 'pending',
-    image_count INT DEFAULT 0,
-    table_count INT DEFAULT 0,
-    element_count INT DEFAULT 0,
+    del_flag BOOLEAN DEFAULT FALSE,
     create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
+CREATE INDEX IF NOT EXISTS idx_documents_user ON documents(user_id);
+CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(file_type);
 CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
-CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type);
-CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC);
-CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata);
 
-CREATE TABLE IF NOT EXISTS elements (
+-- ==================== 文档章节结构表 ====================
+CREATE TABLE IF NOT EXISTS document_sections (
     id VARCHAR(36) PRIMARY KEY,
     document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    type VARCHAR(20) NOT NULL,
-    label VARCHAR(100) NOT NULL,
-    value TEXT NOT NULL,
-    position JSONB,
-    confidence DECIMAL(3,2),
-    extraction_method VARCHAR(20),
+    parent_id VARCHAR(36) REFERENCES document_sections(id) ON DELETE CASCADE,
+    section_index INT NOT NULL,
+    level INT NOT NULL DEFAULT 1,
+    title VARCHAR(500),
+    content TEXT,
+    start_page INT,
+    end_page INT,
+    start_char INT,
+    end_char INT,
+    section_type VARCHAR(32) DEFAULT 'heading',
+    table_data JSONB,
+    image_path VARCHAR(500),
+    image_caption VARCHAR(500),
     metadata JSONB DEFAULT '{}',
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    sort_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id);
-CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id);
-CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type);
-CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position);
+CREATE INDEX IF NOT EXISTS idx_doc_sections_document ON document_sections(document_id);
+CREATE INDEX IF NOT EXISTS idx_doc_sections_parent ON document_sections(parent_id);
+CREATE INDEX IF NOT EXISTS idx_doc_sections_level ON document_sections(level);
+CREATE INDEX IF NOT EXISTS idx_doc_sections_type ON document_sections(section_type);
 
-CREATE TABLE IF NOT EXISTS annotations (
+-- ==================== 文档分块表 ====================
+CREATE TABLE IF NOT EXISTS document_chunks (
     id VARCHAR(36) PRIMARY KEY,
     document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    text TEXT NOT NULL,
-    position JSONB NOT NULL,
-    type VARCHAR(20) NOT NULL,
-    suggestion TEXT,
-    ai_generated BOOLEAN DEFAULT FALSE,
-    confidence DECIMAL(3,2),
-    status VARCHAR(20) DEFAULT 'pending',
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
-);
-CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id);
-CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id);
-CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type);
-CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status);
-
-CREATE TABLE IF NOT EXISTS graphs (
-    id VARCHAR(36) PRIMARY KEY,
-    document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    name VARCHAR(255) NOT NULL,
-    nodes JSONB NOT NULL DEFAULT '[]',
-    edges JSONB NOT NULL DEFAULT '[]',
-    calculation_result JSONB,
-    calculation_status VARCHAR(20),
+    section_id VARCHAR(36) REFERENCES document_sections(id) ON DELETE SET NULL,
+    chunk_index INT NOT NULL,
+    content TEXT NOT NULL,
+    start_char INT,
+    end_char INT,
+    page_number INT,
+    token_count INT,
     metadata JSONB DEFAULT '{}',
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id);
-CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id);
-CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes);
-CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges);
+CREATE INDEX IF NOT EXISTS idx_doc_chunks_document ON document_chunks(document_id);
+CREATE INDEX IF NOT EXISTS idx_doc_chunks_section ON document_chunks(section_id);
 
+-- ==================== 解析任务表(parse-service 依赖) ====================
 CREATE TABLE IF NOT EXISTS parse_tasks (
     id VARCHAR(36) PRIMARY KEY,
     document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
-    status VARCHAR(20) NOT NULL DEFAULT 'pending',
-    progress INTEGER DEFAULT 0,
-    current_step VARCHAR(100),
+    status VARCHAR(20) DEFAULT 'pending',
+    progress INT DEFAULT 0,
+    current_step VARCHAR(32),
     error_message TEXT,
     options JSONB DEFAULT '{}',
     started_at TIMESTAMP,
     completed_at TIMESTAMP,
     parse_status VARCHAR(20) DEFAULT 'pending',
-    parse_progress INTEGER DEFAULT 0,
+    parse_progress INT DEFAULT 0,
     rag_status VARCHAR(20) DEFAULT 'pending',
-    rag_progress INTEGER DEFAULT 0,
+    rag_progress INT DEFAULT 0,
     structured_status VARCHAR(20) DEFAULT 'pending',
-    structured_progress INTEGER DEFAULT 0,
-    structured_element_count INTEGER,
-    structured_image_count INTEGER,
-    structured_table_count INTEGER,
+    structured_progress INT DEFAULT 0,
+    structured_element_count INT,
+    structured_image_count INT,
+    structured_table_count INT,
     ner_status VARCHAR(20) DEFAULT 'pending',
-    ner_progress INTEGER DEFAULT 0,
-    ner_task_id VARCHAR(64),
-    ner_entity_count INTEGER,
-    ner_relation_count INTEGER,
-    ner_message VARCHAR(255),
+    ner_progress INT DEFAULT 0,
+    ner_task_id VARCHAR(100),
+    ner_entity_count INT,
+    ner_relation_count INT,
+    ner_message TEXT,
     graph_status VARCHAR(20) DEFAULT 'pending',
-    graph_progress INTEGER DEFAULT 0,
+    graph_progress INT DEFAULT 0,
     create_by VARCHAR(36),
     create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     update_by VARCHAR(36),
     update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
-);
-CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id);
-CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status);
-
-CREATE TABLE IF NOT EXISTS sessions (
-    id VARCHAR(36) PRIMARY KEY,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    token_hash VARCHAR(255) NOT NULL UNIQUE,
-    refresh_token_hash VARCHAR(255) NOT NULL UNIQUE,
-    expires_at TIMESTAMP NOT NULL,
-    ip_address VARCHAR(45),
-    user_agent TEXT,
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id);
-CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash);
-CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at);
-
--- ============================================ 二、补充模块 ============================================
+CREATE INDEX IF NOT EXISTS idx_parse_tasks_document ON parse_tasks(document_id);
 
-CREATE TABLE IF NOT EXISTS rules (
+-- ==================== 文档结构化元素表(结构化解析 / 前端展示依赖) ====================
+CREATE TABLE IF NOT EXISTS document_elements (
     id VARCHAR(36) PRIMARY KEY,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    name VARCHAR(255) NOT NULL,
-    description TEXT,
-    entry_node_id VARCHAR(36),
-    exit_node_id VARCHAR(36),
-    rule_chain JSONB NOT NULL DEFAULT '[]',
-    status VARCHAR(20) DEFAULT 'active',
-    metadata JSONB DEFAULT '{}',
+    document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+    element_index INT,
+    element_type VARCHAR(50),
+    content TEXT,
+    style JSONB DEFAULT '{}',
+    runs JSONB DEFAULT '[]',
+    image_url VARCHAR(500),
+    image_path VARCHAR(500),
+    image_alt VARCHAR(500),
+    image_width INT,
+    image_height INT,
+    image_format VARCHAR(20),
+    table_index INT,
+    table_data JSONB,
+    table_row_count INT,
+    table_col_count INT,
+    table_text TEXT,
     create_by VARCHAR(36),
     create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     update_by VARCHAR(36),
     update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id);
-CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
+CREATE INDEX IF NOT EXISTS idx_document_elements_document ON document_elements(document_id);
 
-CREATE TABLE IF NOT EXISTS data_sources (
+-- ==================== 向量嵌入表 ====================
+CREATE TABLE IF NOT EXISTS embeddings (
     id VARCHAR(36) PRIMARY KEY,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
-    name VARCHAR(255) NOT NULL,
-    type VARCHAR(50) NOT NULL,
-    source_type VARCHAR(50) NOT NULL DEFAULT 'manual',
-    node_ids JSONB DEFAULT '{"refs": []}',
-    config JSONB DEFAULT '{}',
-    metadata JSONB DEFAULT '{}',
-    value_type VARCHAR(20) DEFAULT 'text',
-    aggregate_type VARCHAR(20) DEFAULT 'first',
-    separator VARCHAR(50) DEFAULT '',
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    chunk_id VARCHAR(36) NOT NULL REFERENCES document_chunks(id) ON DELETE CASCADE,
+    embedding vector(1536),
+    model_name VARCHAR(100) DEFAULT 'text-embedding-ada-002',
+    model_version VARCHAR(50),
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id);
-CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id);
-CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type);
+CREATE INDEX IF NOT EXISTS idx_embeddings_chunk ON embeddings(chunk_id);
+CREATE INDEX IF NOT EXISTS idx_embeddings_vector ON embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
 
-CREATE TABLE IF NOT EXISTS text_storage (
+-- ==================== 实体类型表 ====================
+CREATE TABLE IF NOT EXISTS entity_types (
     id VARCHAR(36) PRIMARY KEY,
-    document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
-    file_path VARCHAR(500) NOT NULL,
-    file_size BIGINT,
-    checksum VARCHAR(64),
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    type_code VARCHAR(50) NOT NULL UNIQUE,
+    type_name VARCHAR(100) NOT NULL,
+    category VARCHAR(32),
+    color VARCHAR(20),
+    icon VARCHAR(50),
+    patterns JSONB DEFAULT '[]',
+    examples JSONB DEFAULT '[]',
+    description TEXT,
+    is_active BOOLEAN DEFAULT TRUE,
+    sort_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id);
-CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id);
-
--- ============================================ 三、RAG 模块 ============================================
+CREATE INDEX IF NOT EXISTS idx_entity_types_code ON entity_types(type_code);
 
-CREATE TABLE IF NOT EXISTS text_chunks (
+-- ==================== 实体表 ====================
+CREATE TABLE IF NOT EXISTS entities (
     id VARCHAR(36) PRIMARY KEY,
-    document_id VARCHAR(36) NOT NULL,
-    text_storage_id VARCHAR(36),
-    chunk_index INTEGER NOT NULL,
-    content TEXT NOT NULL,
-    token_count INTEGER,
+    document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+    entity_type VARCHAR(50) NOT NULL,
+    entity_text VARCHAR(500) NOT NULL,
+    normalized_text VARCHAR(500),
+    occurrence_count INT DEFAULT 1,
+    confidence FLOAT DEFAULT 1.0,
+    business_label VARCHAR(100),
+    embedding vector(1536),
+    is_confirmed BOOLEAN DEFAULT FALSE,
+    is_merged BOOLEAN DEFAULT FALSE,
+    merged_to_id VARCHAR(36),
     metadata JSONB DEFAULT '{}',
-    create_by VARCHAR(36),
-    create_by_name VARCHAR(100),
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(36),
-    update_by_name VARCHAR(100),
-    update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(document_id, entity_type, normalized_text)
 );
-CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
-CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
-CREATE UNIQUE INDEX IF NOT EXISTS idx_text_chunks_doc_index ON text_chunks(document_id, chunk_index);
-
-CREATE TABLE IF NOT EXISTS vector_embeddings (
+CREATE INDEX IF NOT EXISTS idx_entities_document ON entities(document_id);
+CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type);
+CREATE INDEX IF NOT EXISTS idx_entities_text ON entities(entity_text);
+CREATE INDEX IF NOT EXISTS idx_entities_normalized ON entities(normalized_text);
+CREATE INDEX IF NOT EXISTS idx_entities_label ON entities(business_label);
+
+-- ==================== 关系类型表 ====================
+CREATE TABLE IF NOT EXISTS relation_types (
     id VARCHAR(36) PRIMARY KEY,
-    chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
-    embedding vector(768),
-    model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
-    create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+    type_code VARCHAR(50) NOT NULL UNIQUE,
+    type_name VARCHAR(100) NOT NULL,
+    source_entity_types JSONB DEFAULT '[]',
+    target_entity_types JSONB DEFAULT '[]',
+    is_symmetric BOOLEAN DEFAULT FALSE,
+    description TEXT,
+    is_active BOOLEAN DEFAULT TRUE,
+    sort_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
-CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
-CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops);
-
--- ============================================ 四、文档结构化模块 ============================================
+CREATE INDEX IF NOT EXISTS idx_relation_types_code ON relation_types(type_code);
 
-CREATE TABLE IF NOT EXISTS document_blocks (
-    id VARCHAR(64) PRIMARY KEY,
-    document_id VARCHAR(64) NOT NULL,
-    parent_id VARCHAR(64),
-    children JSONB,
-    block_index INTEGER NOT NULL,
-    block_type VARCHAR(32) NOT NULL,
-    elements JSONB,
-    style JSONB,
-    metadata JSONB,
-    create_by VARCHAR(64),
-    create_by_name VARCHAR(128),
+-- ==================== 实体关系表 ====================
+CREATE TABLE IF NOT EXISTS entity_relations (
+    id VARCHAR(36) PRIMARY KEY,
+    document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+    source_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
+    target_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
+    relation_type VARCHAR(50) NOT NULL,
+    relation_text VARCHAR(500),
+    confidence FLOAT DEFAULT 1.0,
+    extraction_method VARCHAR(32) DEFAULT 'auto',
+    evidence_text TEXT,
+    is_confirmed BOOLEAN DEFAULT FALSE,
+    metadata JSONB DEFAULT '{}',
     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(64),
-    update_by_name VARCHAR(128),
     update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
-CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
-CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
-CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
+CREATE INDEX IF NOT EXISTS idx_relations_document ON entity_relations(document_id);
+CREATE INDEX IF NOT EXISTS idx_relations_source ON entity_relations(source_entity_id);
+CREATE INDEX IF NOT EXISTS idx_relations_target ON entity_relations(target_entity_id);
+CREATE INDEX IF NOT EXISTS idx_relations_type ON entity_relations(relation_type);
 
-CREATE TABLE IF NOT EXISTS document_entities (
-    id VARCHAR(64) PRIMARY KEY,
-    document_id VARCHAR(64) NOT NULL,
-    block_id VARCHAR(64),
-    name VARCHAR(512) NOT NULL,
-    entity_type VARCHAR(32) NOT NULL,
-    value TEXT,
-    block_char_start INTEGER,
-    block_char_end INTEGER,
-    global_char_start INTEGER,
-    global_char_end INTEGER,
-    anchor_before VARCHAR(100),
-    anchor_after VARCHAR(100),
-    source VARCHAR(16) DEFAULT 'auto',
-    confidence DECIMAL(5,4),
-    confirmed BOOLEAN DEFAULT FALSE,
-    metadata JSONB,
-    create_by VARCHAR(64),
-    create_by_name VARCHAR(128),
+-- ==================== 规则表 ====================
+CREATE TABLE IF NOT EXISTS rules (
+    id VARCHAR(36) PRIMARY KEY,
+    document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
+    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+    name VARCHAR(200) NOT NULL,
+    description TEXT,
+    category VARCHAR(50),
+    rule_type VARCHAR(32) NOT NULL,
+    source VARCHAR(32) DEFAULT 'auto',
+    priority INT DEFAULT 0,
+    status VARCHAR(20) DEFAULT 'draft',
+    embedding vector(1536),
+    is_global BOOLEAN DEFAULT FALSE,
+    del_flag BOOLEAN DEFAULT FALSE,
+    create_by VARCHAR(36),
     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(64),
-    update_by_name VARCHAR(128),
+    update_by VARCHAR(36),
     update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
-CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
-CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
-CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
-CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
+CREATE INDEX IF NOT EXISTS idx_rules_document ON rules(document_id);
+CREATE INDEX IF NOT EXISTS idx_rules_user ON rules(user_id);
+CREATE INDEX IF NOT EXISTS idx_rules_type ON rules(rule_type);
+CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
+CREATE INDEX IF NOT EXISTS idx_rules_category ON rules(category);
 
-CREATE TABLE IF NOT EXISTS document_elements (
-    id VARCHAR(64) PRIMARY KEY,
-    document_id VARCHAR(64) NOT NULL,
-    element_index INT NOT NULL,
-    element_type VARCHAR(32) NOT NULL,
-    content TEXT,
-    style JSONB,
-    runs JSONB,
-    image_url VARCHAR(500),
-    image_path VARCHAR(500),
-    image_alt VARCHAR(255),
-    image_width INT,
-    image_height INT,
-    image_format VARCHAR(16),
-    table_index INT,
-    table_data JSONB,
-    table_row_count INT,
-    table_col_count INT,
-    table_text TEXT,
-    create_by VARCHAR(64),
-    create_by_name VARCHAR(128),
+-- ==================== 规则条件表 ====================
+CREATE TABLE IF NOT EXISTS rule_conditions (
+    id VARCHAR(36) PRIMARY KEY,
+    rule_id VARCHAR(36) NOT NULL REFERENCES rules(id) ON DELETE CASCADE,
+    condition_index INT NOT NULL,
+    condition_type VARCHAR(32) NOT NULL,
+    config JSONB NOT NULL,
+    logic_operator VARCHAR(10) DEFAULT 'AND',
+    sort_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_rule_conditions_rule ON rule_conditions(rule_id);
+
+-- ==================== 规则动作表 ====================
+CREATE TABLE IF NOT EXISTS rule_actions (
+    id VARCHAR(36) PRIMARY KEY,
+    rule_id VARCHAR(36) NOT NULL REFERENCES rules(id) ON DELETE CASCADE,
+    action_index INT NOT NULL,
+    action_type VARCHAR(32) NOT NULL,
+    config JSONB NOT NULL,
+    target_field VARCHAR(100),
+    sort_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_rule_actions_rule ON rule_actions(rule_id);
+
+-- ==================== 生成任务表 ====================
+CREATE TABLE IF NOT EXISTS generations (
+    id VARCHAR(36) PRIMARY KEY,
+    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+    document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
+    template_id VARCHAR(36),
+    name VARCHAR(200),
+    applied_rules JSONB DEFAULT '[]',
+    source_file_map JSONB DEFAULT '{}',
+    extracted_data JSONB DEFAULT '{}',
+    confirmed_data JSONB DEFAULT '{}',
+    output_file_path VARCHAR(500),
+    output_file_name VARCHAR(255),
+    output_document_id VARCHAR(36),
+    progress INTEGER,
+    status VARCHAR(20) DEFAULT 'pending',
+    error_message TEXT,
+    started_at TIMESTAMP,
+    reviewed_at TIMESTAMP,
+    completed_at TIMESTAMP,
+    del_flag BOOLEAN DEFAULT FALSE,
+    create_by VARCHAR(36),
     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_by VARCHAR(64),
-    update_by_name VARCHAR(128),
+    update_by VARCHAR(36),
     update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id);
-CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type);
-CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index);
+CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
+CREATE INDEX IF NOT EXISTS idx_generations_document ON generations(document_id);
+CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
 
--- ============================================ 五、模板系统模块 ============================================
+-- ==================== 生成输出表 ====================
+CREATE TABLE IF NOT EXISTS generation_outputs (
+    id VARCHAR(36) PRIMARY KEY,
+    generation_id VARCHAR(36) NOT NULL REFERENCES generations(id) ON DELETE CASCADE,
+    version INT NOT NULL DEFAULT 1,
+    file_path VARCHAR(500) NOT NULL,
+    file_name VARCHAR(255) NOT NULL,
+    file_size BIGINT,
+    file_type VARCHAR(20) DEFAULT 'docx',
+    share_token VARCHAR(100),
+    share_expires_at TIMESTAMP,
+    download_count INT DEFAULT 0,
+    is_final BOOLEAN DEFAULT FALSE,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_gen_outputs_generation ON generation_outputs(generation_id);
+CREATE INDEX IF NOT EXISTS idx_gen_outputs_share ON generation_outputs(share_token);
 
+-- ==================== 模板/变量/来源文件(兼容现有模板流程) ====================
 CREATE TABLE IF NOT EXISTS templates (
     id VARCHAR(36) PRIMARY KEY,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    name VARCHAR(255) NOT NULL,
+    user_id VARCHAR(36) NOT NULL,
+    name VARCHAR(200),
     description TEXT,
-    base_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
-    status VARCHAR(32) DEFAULT 'draft',
+    base_document_id VARCHAR(36),
+    status VARCHAR(20) DEFAULT 'draft',
     config JSONB DEFAULT '{}',
     is_public BOOLEAN DEFAULT FALSE,
     use_count INT DEFAULT 0,
-    rating DECIMAL(2,1) DEFAULT 0.0,
-    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    rating DOUBLE PRECISION,
     create_by VARCHAR(36),
     create_by_name VARCHAR(100),
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
     update_by VARCHAR(36),
-    update_by_name VARCHAR(100)
+    update_by_name VARCHAR(100),
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_templates_user ON templates(user_id);
+
+CREATE TABLE IF NOT EXISTS variables (
+    id VARCHAR(36) PRIMARY KEY,
+    template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
+    name VARCHAR(100) NOT NULL,
+    display_name VARCHAR(200),
+    variable_group VARCHAR(100),
+    category VARCHAR(50),
+    location JSONB,
+    example_value TEXT,
+    value_type VARCHAR(20),
+    source_file_alias VARCHAR(100),
+    source_type VARCHAR(30),
+    source_config JSONB DEFAULT '{}',
+    extract_type VARCHAR(30),
+    extract_config JSONB DEFAULT '{}',
+    display_order INT DEFAULT 0,
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
-CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
-CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
-CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id);
+CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
 
 CREATE TABLE IF NOT EXISTS source_files (
     id VARCHAR(36) PRIMARY KEY,
     template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
     alias VARCHAR(100) NOT NULL,
     description TEXT,
-    file_types JSONB DEFAULT '["pdf", "docx"]',
+    file_types JSONB DEFAULT '[]',
     required BOOLEAN DEFAULT TRUE,
-    example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
+    example_document_id VARCHAR(36),
     display_order INT DEFAULT 0,
-    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
 CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
 
-CREATE TABLE IF NOT EXISTS variables (
+-- ==================== 标准实体表 ====================
+CREATE TABLE IF NOT EXISTS standard_entities (
     id VARCHAR(36) PRIMARY KEY,
-    template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
-    name VARCHAR(100) NOT NULL,
-    display_name VARCHAR(200) NOT NULL,
-    variable_group VARCHAR(100),
-    category VARCHAR(32),
-    location JSONB NOT NULL,
-    example_value TEXT,
-    value_type VARCHAR(32) DEFAULT 'text',
-    source_file_alias VARCHAR(100),
-    source_type VARCHAR(32) NOT NULL,
-    source_config JSONB,
-    extract_type VARCHAR(32),
-    extract_config JSONB,
-    display_order INT DEFAULT 0,
+    entity_type VARCHAR(50) NOT NULL,
+    name VARCHAR(500) NOT NULL,
+    aliases JSONB DEFAULT '[]',
+    attributes JSONB DEFAULT '{}',
+    embedding vector(1536),
+    occurrence_count INT DEFAULT 1,
+    document_count INT DEFAULT 1,
+    is_verified BOOLEAN DEFAULT FALSE,
+    del_flag BOOLEAN DEFAULT FALSE,
+    create_by VARCHAR(36),
     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    CONSTRAINT uk_variables_name UNIQUE (template_id, name)
+    update_by VARCHAR(36),
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
-CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
-CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type);
-CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category);
+CREATE INDEX IF NOT EXISTS idx_std_entities_type ON standard_entities(entity_type);
+CREATE INDEX IF NOT EXISTS idx_std_entities_name ON standard_entities(name);
+CREATE INDEX IF NOT EXISTS idx_std_entities_vector ON standard_entities USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
 
-CREATE TABLE IF NOT EXISTS generations (
+-- ==================== 标准关系表 ====================
+CREATE TABLE IF NOT EXISTS standard_relations (
     id VARCHAR(36) PRIMARY KEY,
-    template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT,
-    user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
-    name VARCHAR(255),
-    source_file_map JSONB NOT NULL,
-    variable_values JSONB,
-    output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
-    output_file_path VARCHAR(500),
-    status VARCHAR(32) DEFAULT 'pending',
-    error_message TEXT,
-    progress INT DEFAULT 0,
+    source_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
+    target_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
+    relation_type VARCHAR(50) NOT NULL,
+    attributes JSONB DEFAULT '{}',
+    occurrence_count INT DEFAULT 1,
+    is_verified BOOLEAN DEFAULT FALSE,
+    del_flag BOOLEAN DEFAULT FALSE,
     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-    completed_at TIMESTAMP
+    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 );
-CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
-CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
-CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
-CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC);
-
--- ============================================ 触发器 ============================================
-
-CREATE OR REPLACE FUNCTION update_update_time_column()
-RETURNS TRIGGER AS $$
-BEGIN
-    NEW.update_time = CURRENT_TIMESTAMP;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
-DO $$
-DECLARE
-    tbl TEXT;
-    tables TEXT[] := ARRAY[
-        'users', 'documents', 'elements', 'annotations', 'graphs',
-        'parse_tasks', 'sessions', 'rules', 'data_sources', 'text_storage',
-        'text_chunks', 'document_blocks', 'document_entities', 'document_elements',
-        'templates', 'source_files', 'variables', 'generations'
-    ];
-BEGIN
-    FOREACH tbl IN ARRAY tables LOOP
-        EXECUTE format('DROP TRIGGER IF EXISTS trigger_%s_update_time ON %I', tbl, tbl);
-        EXECUTE format('CREATE TRIGGER trigger_%s_update_time BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_update_time_column()', tbl, tbl);
-    END LOOP;
-END $$;
+CREATE INDEX IF NOT EXISTS idx_std_relations_source ON standard_relations(source_entity_id);
+CREATE INDEX IF NOT EXISTS idx_std_relations_target ON standard_relations(target_entity_id);
+CREATE INDEX IF NOT EXISTS idx_std_relations_type ON standard_relations(relation_type);
 
--- ============================================ 向量检索函数 ============================================
-
-CREATE OR REPLACE FUNCTION search_similar_chunks(
-    query_embedding vector(768),
-    target_document_id VARCHAR(36),
-    result_limit INTEGER DEFAULT 3
-)
-RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
-BEGIN
-    RETURN QUERY
-    SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
-           1 - (ve.embedding <=> query_embedding) AS similarity
-    FROM text_chunks tc
-    JOIN vector_embeddings ve ON tc.id = ve.chunk_id
-    WHERE tc.document_id = target_document_id
-    ORDER BY ve.embedding <=> query_embedding
-    LIMIT result_limit;
-END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION search_similar_chunks_global(
-    query_embedding vector(768),
-    result_limit INTEGER DEFAULT 5
-)
-RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
-BEGIN
-    RETURN QUERY
-    SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
-           1 - (ve.embedding <=> query_embedding) AS similarity
-    FROM text_chunks tc
-    JOIN vector_embeddings ve ON tc.id = ve.chunk_id
-    ORDER BY ve.embedding <=> query_embedding
-    LIMIT result_limit;
-END;
-$$ LANGUAGE plpgsql;
-
-SELECT '灵越智报 v2.0 数据库初始化完成' AS result;
+-- ==================== 实体合并记录表 ====================
+CREATE TABLE IF NOT EXISTS entity_merge_records (
+    id VARCHAR(36) PRIMARY KEY,
+    source_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
+    target_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
+    similarity_score FLOAT,
+    merge_type VARCHAR(32) DEFAULT 'auto',
+    create_by VARCHAR(36),
+    create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_merge_records_source ON entity_merge_records(source_entity_id);
+CREATE INDEX IF NOT EXISTS idx_merge_records_target ON entity_merge_records(target_entity_id);
+
+-- ==================== 预置数据 ====================
+INSERT INTO entity_types (id, type_code, type_name, category, color, icon, description, sort_order) VALUES
+('et-001', 'PERSON', '人名', 'basic', '#1890ff', 'user', '人物姓名,如:张经理、李总', 1),
+('et-002', 'ORG', '机构', 'basic', '#faad14', 'bank', '组织机构,如:成都检测公司、环保局', 2),
+('et-003', 'LOC', '地点', 'basic', '#52c41a', 'environment', '地理位置,如:成都市高新区', 3),
+('et-004', 'DATE', '日期', 'basic', '#722ed1', 'calendar', '日期时间,如:2024年5月15日', 4),
+('et-005', 'NUMBER', '数值', 'basic', '#13c2c2', 'number', '数值数量,如:50分贝、100万元', 5),
+('et-006', 'DEVICE', '设备', 'domain', '#eb2f96', 'tool', '设备仪器,如:噪音检测设备、光谱仪', 10),
+('et-007', 'PROJECT', '项目', 'domain', '#f5222d', 'project', '项目名称,如:环境监测项目', 11),
+('et-008', 'TERM', '术语', 'domain', '#a0d911', 'book', '专业术语,如:COD、BOD、PM2.5', 12),
+('et-009', 'STANDARD', '标准', 'domain', '#2f54eb', 'file-text', '标准规范,如:GB 3096-2008', 13),
+('et-010', 'MATERIAL', '材料', 'domain', '#fa8c16', 'experiment', '材料物质,如:甲醛、苯', 14),
+('et-011', 'METHOD', '方法', 'domain', '#1890ff', 'api', '检测方法,如:气相色谱法', 15)
+ON CONFLICT (id) DO NOTHING;
+
+INSERT INTO relation_types (id, type_code, type_name, source_entity_types, target_entity_types, is_symmetric, description) VALUES
+('rt-001', 'LOCATED_IN', '位于', '["ORG", "PERSON", "PROJECT"]', '["LOC"]', false, '实体位于某地'),
+('rt-002', 'WORKS_FOR', '任职于', '["PERSON"]', '["ORG"]', false, '人员任职于机构'),
+('rt-003', 'BELONGS_TO', '属于', '["ORG", "PROJECT"]', '["ORG"]', false, '隶属关系'),
+('rt-004', 'RESPONSIBLE_FOR', '负责', '["PERSON", "ORG"]', '["PROJECT", "LOC"]', false, '负责某事/某地'),
+('rt-005', 'USES', '使用', '["ORG", "PERSON", "PROJECT"]', '["DEVICE", "METHOD"]', false, '使用设备/方法'),
+('rt-006', 'DETECTS', '检测', '["ORG", "PERSON", "DEVICE"]', '["MATERIAL", "TERM"]', false, '检测某物质/指标'),
+('rt-007', 'CONTAINS', '包含', '["PROJECT", "ORG"]', '["PROJECT", "TERM", "MATERIAL"]', false, '包含关系'),
+('rt-008', 'COMPLIES_WITH', '符合', '["PROJECT", "ORG", "METHOD"]', '["STANDARD"]', false, '符合某标准'),
+('rt-009', 'PRODUCES', '产生', '["ORG", "PROJECT", "DEVICE"]', '["NUMBER", "MATERIAL"]', false, '产生数据/物质'),
+('rt-010', 'COOPERATES', '合作', '["ORG"]', '["ORG"]', true, '机构合作关系'),
+('rt-011', 'OCCURS_AT', '发生于', '["PROJECT"]', '["DATE"]', false, '发生在某时间')
+ON CONFLICT (id) DO NOTHING;