|
@@ -0,0 +1,159 @@
|
|
|
|
|
+-- =====================================================
|
|
|
|
|
+-- 数据提取规则系统重构 v2.0
|
|
|
|
|
+-- 从「规则配置驱动」改为「示例文档驱动」
|
|
|
|
|
+-- =====================================================
|
|
|
|
|
+
|
|
|
|
|
+-- 1. 创建 templates 表(替代 extract_projects)
|
|
|
|
|
+CREATE TABLE IF NOT EXISTS templates (
|
|
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
|
|
+ user_id VARCHAR(36) NOT NULL,
|
|
|
|
|
+ name VARCHAR(255) NOT NULL,
|
|
|
|
|
+ description TEXT,
|
|
|
|
|
+ base_document_id VARCHAR(36) NOT NULL COMMENT '示例报告文档ID',
|
|
|
|
|
+ status VARCHAR(32) DEFAULT 'draft' COMMENT '状态: draft/published/archived',
|
|
|
|
|
+ config JSONB COMMENT '模板配置',
|
|
|
|
|
+ is_public BOOLEAN DEFAULT FALSE COMMENT '是否公开',
|
|
|
|
|
+ use_count INT DEFAULT 0 COMMENT '使用次数',
|
|
|
|
|
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+ update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+ create_by VARCHAR(36),
|
|
|
|
|
+ create_by_name VARCHAR(100),
|
|
|
|
|
+ update_by VARCHAR(36),
|
|
|
|
|
+ update_by_name VARCHAR(100)
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
|
|
|
|
|
+
|
|
|
|
|
+COMMENT ON TABLE templates IS '报告模板';
|
|
|
|
|
+COMMENT ON COLUMN templates.base_document_id IS '示例报告文档ID,关联 documents 表';
|
|
|
|
|
+COMMENT ON COLUMN templates.status IS 'draft-草稿, published-已发布, archived-已归档';
|
|
|
|
|
+COMMENT ON COLUMN templates.config IS '模板配置,如默认AI模型等';
|
|
|
|
|
+
|
|
|
|
|
+-- 2. 创建 source_files 表(来源文件定义,替代 extract_source_documents)
|
|
|
|
|
+CREATE TABLE IF NOT EXISTS source_files (
|
|
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
|
|
+ template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
|
|
|
|
|
+ alias VARCHAR(100) NOT NULL COMMENT '文件别名,如"可研批复"',
|
|
|
|
|
+ description TEXT COMMENT '文件说明',
|
|
|
|
|
+ file_types JSONB DEFAULT '["pdf", "docx"]' COMMENT '允许的文件类型',
|
|
|
|
|
+ required BOOLEAN DEFAULT TRUE COMMENT '是否必须',
|
|
|
|
|
+ example_document_id VARCHAR(36) COMMENT '创建模板时使用的示例文件',
|
|
|
|
|
+ display_order INT DEFAULT 0 COMMENT '显示顺序',
|
|
|
|
|
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+
|
|
|
|
|
+ CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
|
|
|
|
|
+
|
|
|
|
|
+COMMENT ON TABLE source_files IS '来源文件定义';
|
|
|
|
|
+COMMENT ON COLUMN source_files.alias IS '用户自定义的别名,用于引用';
|
|
|
|
|
+COMMENT ON COLUMN source_files.file_types IS '允许上传的文件类型列表';
|
|
|
|
|
+COMMENT ON COLUMN source_files.example_document_id IS '创建模板时使用的示例文件,用于预览';
|
|
|
|
|
+
|
|
|
|
|
+-- 3. 创建 variables 表(替代 extract_rules)
|
|
|
|
|
+CREATE TABLE IF NOT EXISTS variables (
|
|
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
|
|
+ template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
|
|
|
|
|
+
|
|
|
|
|
+ -- 变量标识
|
|
|
|
|
+ name VARCHAR(100) NOT NULL COMMENT '变量名(程序用)',
|
|
|
|
|
+ display_name VARCHAR(200) NOT NULL COMMENT '显示名称',
|
|
|
|
|
+ variable_group VARCHAR(100) COMMENT '变量分组',
|
|
|
|
|
+
|
|
|
|
|
+ -- 在示例报告中的位置
|
|
|
|
|
+ location JSONB NOT NULL COMMENT '文档中的位置',
|
|
|
|
|
+
|
|
|
|
|
+ -- 示例值
|
|
|
|
|
+ example_value TEXT COMMENT '示例值(原文档中的值)',
|
|
|
|
|
+ value_type VARCHAR(32) DEFAULT 'text' COMMENT '值类型: text/date/number/table',
|
|
|
|
|
+
|
|
|
|
|
+ -- 数据来源
|
|
|
|
|
+ source_file_alias VARCHAR(100) COMMENT '来源文件别名',
|
|
|
|
|
+ source_type VARCHAR(32) NOT NULL COMMENT '来源类型: document/manual/reference/fixed',
|
|
|
|
|
+ source_config JSONB COMMENT '来源配置',
|
|
|
|
|
+
|
|
|
|
|
+ -- 提取方式
|
|
|
|
|
+ extract_type VARCHAR(32) COMMENT '提取类型: direct/ai_extract/ai_summarize',
|
|
|
|
|
+ extract_config JSONB COMMENT '提取配置',
|
|
|
|
|
+
|
|
|
|
|
+ display_order INT DEFAULT 0 COMMENT '显示顺序',
|
|
|
|
|
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+ update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+
|
|
|
|
|
+ CONSTRAINT uk_variables_name UNIQUE (template_id, name)
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
|
|
|
|
|
+
|
|
|
|
|
+COMMENT ON TABLE variables IS '模板变量';
|
|
|
|
|
+COMMENT ON COLUMN variables.name IS '变量名,模板内唯一,用于程序引用';
|
|
|
|
|
+COMMENT ON COLUMN variables.location IS '变量在文档中的位置,包含 element_id、偏移量等';
|
|
|
|
|
+COMMENT ON COLUMN variables.source_type IS 'document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值';
|
|
|
|
|
+COMMENT ON COLUMN variables.extract_type IS 'direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结';
|
|
|
|
|
+
|
|
|
|
|
+-- 4. 创建 generations 表(生成任务)
|
|
|
|
|
+CREATE TABLE IF NOT EXISTS generations (
|
|
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
|
|
+ template_id VARCHAR(36) NOT NULL REFERENCES templates(id),
|
|
|
|
|
+ user_id VARCHAR(36) NOT NULL,
|
|
|
|
|
+
|
|
|
|
|
+ name VARCHAR(255) COMMENT '任务名称',
|
|
|
|
|
+
|
|
|
|
|
+ -- 来源文件映射:别名 → 文档ID
|
|
|
|
|
+ source_file_map JSONB NOT NULL COMMENT '来源文件映射',
|
|
|
|
|
+
|
|
|
|
|
+ -- 变量提取结果
|
|
|
|
|
+ variable_values JSONB COMMENT '变量值',
|
|
|
|
|
+
|
|
|
|
|
+ -- 生成的文档
|
|
|
|
|
+ output_document_id VARCHAR(36) COMMENT '输出文档ID',
|
|
|
|
|
+ output_file_path VARCHAR(500) COMMENT '输出文件路径',
|
|
|
|
|
+
|
|
|
|
|
+ status VARCHAR(32) DEFAULT 'pending' COMMENT '状态',
|
|
|
|
|
+ error_message TEXT COMMENT '错误信息',
|
|
|
|
|
+ progress INT DEFAULT 0 COMMENT '进度百分比',
|
|
|
|
|
+
|
|
|
|
|
+ create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
|
+ completed_at TIMESTAMP COMMENT '完成时间'
|
|
|
|
|
+);
|
|
|
|
|
+
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
|
|
|
|
|
+CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
|
|
|
|
|
+
|
|
|
|
|
+COMMENT ON TABLE generations IS '报告生成任务';
|
|
|
|
|
+COMMENT ON COLUMN generations.source_file_map IS '来源文件映射,如 {"可研批复": "doc_123"}';
|
|
|
|
|
+COMMENT ON COLUMN generations.variable_values IS '变量提取结果,包含值、置信度、状态等';
|
|
|
|
|
+COMMENT ON COLUMN generations.status IS 'pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误';
|
|
|
|
|
+
|
|
|
|
|
+-- 5. 添加 update_time 触发器
|
|
|
|
|
+CREATE OR REPLACE FUNCTION update_timestamp()
|
|
|
|
|
+RETURNS TRIGGER AS $$
|
|
|
|
|
+BEGIN
|
|
|
|
|
+ NEW.update_time = CURRENT_TIMESTAMP;
|
|
|
|
|
+ RETURN NEW;
|
|
|
|
|
+END;
|
|
|
|
|
+$$ LANGUAGE plpgsql;
|
|
|
|
|
+
|
|
|
|
|
+DROP TRIGGER IF EXISTS trigger_templates_update_time ON templates;
|
|
|
|
|
+CREATE TRIGGER trigger_templates_update_time
|
|
|
|
|
+ BEFORE UPDATE ON templates
|
|
|
|
|
+ FOR EACH ROW
|
|
|
|
|
+ EXECUTE FUNCTION update_timestamp();
|
|
|
|
|
+
|
|
|
|
|
+DROP TRIGGER IF EXISTS trigger_variables_update_time ON variables;
|
|
|
|
|
+CREATE TRIGGER trigger_variables_update_time
|
|
|
|
|
+ BEFORE UPDATE ON variables
|
|
|
|
|
+ FOR EACH ROW
|
|
|
|
|
+ EXECUTE FUNCTION update_timestamp();
|
|
|
|
|
+
|
|
|
|
|
+-- 6. 注释:旧表可以保留用于数据迁移,或在确认无需后删除
|
|
|
|
|
+-- DROP TABLE IF EXISTS extract_rule_templates CASCADE;
|
|
|
|
|
+-- DROP TABLE IF EXISTS extract_results CASCADE;
|
|
|
|
|
+-- DROP TABLE IF EXISTS extract_rules CASCADE;
|
|
|
|
|
+-- DROP TABLE IF EXISTS extract_source_documents CASCADE;
|
|
|
|
|
+-- DROP TABLE IF EXISTS extract_projects CASCADE;
|