-- ===================================================== -- 灵越智报 v2.0 数据库完整表结构 -- PostgreSQL 15+ -- -- 生成时间: 2026-01-23 -- -- 包含所有表(按模块分组): -- 一、基础模块 -- 1. users - 用户表 -- 2. documents - 文档表 -- 3. elements - 要素表 -- 4. annotations - 批注表 -- 5. graphs - 关系网络表 -- 6. parse_tasks - 解析任务表 -- 7. sessions - 会话表 -- -- 二、图谱模块 -- 8. graph_nodes - 图节点表 -- 9. graph_relations - 图关系表 -- -- 三、补充模块 -- 10. rules - 规则表 -- 11. data_sources - 数据源表 -- 12. text_storage - 文本存储表 -- -- 四、RAG 模块 -- 13. text_chunks - 文本分块表 -- 14. vector_embeddings - 向量嵌入表 -- -- 五、文档结构化模块 -- 15. document_blocks - 文档块表 -- 16. document_entities - 文档实体表 -- 17. document_elements - 文档元素表 -- -- 六、模板系统模块(v2.0) -- 18. templates - 报告模板表 -- 19. source_files - 来源文件定义表 -- 20. variables - 模板变量表 -- 21. generations - 生成任务表 -- ===================================================== -- 启用扩展 CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS vector; -- pgvector 用于向量检索 -- ============================================ -- 一、基础模块 -- ============================================ -- 1. 用户表 (users) CREATE TABLE IF NOT EXISTS users ( id VARCHAR(36) PRIMARY KEY, username VARCHAR(50) UNIQUE NOT NULL, email VARCHAR(100) UNIQUE NOT NULL, password_hash VARCHAR(255) NOT NULL, avatar_url VARCHAR(500), role VARCHAR(20) NOT NULL DEFAULT 'user', -- admin/user/guest preferences TEXT DEFAULT '{}', -- JSON字符串格式存储偏好设置 last_login_at TIMESTAMP, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_users_username ON users(username); CREATE INDEX IF NOT EXISTS idx_users_email ON users(email); COMMENT ON TABLE users IS '用户表'; COMMENT ON COLUMN users.role IS '角色: admin-管理员, user-普通用户, guest-访客'; -- 2. 文档表 (documents) CREATE TABLE IF NOT EXISTS documents ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, type VARCHAR(20) NOT NULL, -- pdf/word/image/markdown/other status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/uploading/parsing/completed/failed file_size BIGINT, file_url VARCHAR(500), thumbnail_url VARCHAR(500), parsed_text TEXT, parse_status VARCHAR(20), -- pending/parsing/completed/failed parse_progress INTEGER DEFAULT 0, -- 0-100 parse_error TEXT, parse_started_at TIMESTAMP, parse_completed_at TIMESTAMP, metadata JSONB DEFAULT '{}', -- pageCount, ocrConfidence, layoutStructure等 -- 结构化解析增强字段 structured_status VARCHAR(20) DEFAULT 'pending', image_count INT DEFAULT 0, table_count INT DEFAULT 0, element_count INT DEFAULT 0, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id); CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status); CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type); CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC); CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata); COMMENT ON TABLE documents IS '文档表'; COMMENT ON COLUMN documents.type IS '文档类型: pdf/word/image/markdown/other'; COMMENT ON COLUMN documents.status IS '状态: pending/uploading/parsing/completed/failed'; COMMENT ON COLUMN documents.structured_status IS '结构化解析状态: pending/completed/failed'; -- 3. 要素表 (elements) CREATE TABLE IF NOT EXISTS elements ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, type VARCHAR(20) NOT NULL, -- amount/company/person/location/date/other label VARCHAR(100) NOT NULL, value TEXT NOT NULL, position JSONB, -- {page, x, y, width, height} confidence DECIMAL(3,2), -- 0.00-1.00 extraction_method VARCHAR(20), -- ai/regex/rule/manual graph_node_id VARCHAR(36), -- 关联的图节点ID metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id); CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id); CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type); CREATE INDEX IF NOT EXISTS idx_elements_graph_node_id ON elements(graph_node_id); CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position); COMMENT ON TABLE elements IS '要素表 - 文档中提取的结构化要素'; COMMENT ON COLUMN elements.type IS '要素类型: amount/company/person/location/date/other'; -- 4. 批注表 (annotations) CREATE TABLE IF NOT EXISTS annotations ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, text TEXT NOT NULL, position JSONB NOT NULL, -- {page, start: {x, y}, end: {x, y}} type VARCHAR(20) NOT NULL, -- highlight/strikethrough/suggestion suggestion TEXT, ai_generated BOOLEAN DEFAULT FALSE, confidence DECIMAL(3,2), status VARCHAR(20) DEFAULT 'pending', -- pending/accepted/rejected create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id); CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id); CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type); CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status); COMMENT ON TABLE annotations IS '批注表'; COMMENT ON COLUMN annotations.type IS '批注类型: highlight/strikethrough/suggestion'; -- 5. 关系网络表 (graphs) CREATE TABLE IF NOT EXISTS graphs ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, nodes JSONB NOT NULL DEFAULT '[]', -- GraphNode数组 edges JSONB NOT NULL DEFAULT '[]', -- GraphEdge数组 calculation_result JSONB, calculation_status VARCHAR(20), -- pending/completed/failed metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id); CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id); CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes); CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges); COMMENT ON TABLE graphs IS '关系网络表'; -- 6. 解析任务表 (parse_tasks) CREATE TABLE IF NOT EXISTS parse_tasks ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/processing/completed/failed progress INTEGER DEFAULT 0, -- 0-100 current_step VARCHAR(100), error_message TEXT, options JSONB DEFAULT '{}', -- 解析选项 started_at TIMESTAMP, completed_at TIMESTAMP, -- 多阶段进度跟踪 parse_status VARCHAR(20) DEFAULT 'pending', parse_progress INTEGER DEFAULT 0, rag_status VARCHAR(20) DEFAULT 'pending', rag_progress INTEGER DEFAULT 0, structured_status VARCHAR(20) DEFAULT 'pending', structured_progress INTEGER DEFAULT 0, structured_element_count INTEGER, structured_image_count INTEGER, structured_table_count INTEGER, ner_status VARCHAR(20) DEFAULT 'pending', ner_progress INTEGER DEFAULT 0, ner_task_id VARCHAR(64), ner_entity_count INTEGER, ner_relation_count INTEGER, graph_status VARCHAR(20) DEFAULT 'pending', graph_progress INTEGER DEFAULT 0, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id); CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status); CREATE INDEX IF NOT EXISTS idx_parse_tasks_ner_task_id ON parse_tasks(ner_task_id); COMMENT ON TABLE parse_tasks IS '解析任务表'; COMMENT ON COLUMN parse_tasks.parse_status IS '解析阶段状态'; COMMENT ON COLUMN parse_tasks.rag_status IS 'RAG向量化阶段状态'; COMMENT ON COLUMN parse_tasks.structured_status IS '结构化解析阶段状态'; COMMENT ON COLUMN parse_tasks.ner_status IS 'NER实体提取阶段状态'; COMMENT ON COLUMN parse_tasks.graph_status IS '图构建阶段状态'; -- 7. 会话表 (sessions) CREATE TABLE IF NOT EXISTS sessions ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, token_hash VARCHAR(255) NOT NULL UNIQUE, refresh_token_hash VARCHAR(255) NOT NULL UNIQUE, expires_at TIMESTAMP NOT NULL, ip_address VARCHAR(45), user_agent TEXT, create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id); CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash); CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at); COMMENT ON TABLE sessions IS '会话表'; -- ============================================ -- 二、图谱模块 -- ============================================ -- 8. 图节点表 (graph_nodes) CREATE TABLE IF NOT EXISTS graph_nodes ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL, user_id VARCHAR(36), -- 可为空,自动提取时可能没有用户上下文 name VARCHAR(255) NOT NULL, type VARCHAR(50) NOT NULL, -- text/table/image/number/date/ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等 value TEXT, position JSONB, -- {charStart, charEnd, line} parent_id VARCHAR(36), level INTEGER DEFAULT 0, metadata JSONB DEFAULT '{}', create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_graph_nodes_document_id ON graph_nodes(document_id); CREATE INDEX IF NOT EXISTS idx_graph_nodes_user_id ON graph_nodes(user_id); CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(type); CREATE INDEX IF NOT EXISTS idx_graph_nodes_parent_id ON graph_nodes(parent_id); CREATE INDEX IF NOT EXISTS idx_graph_nodes_position ON graph_nodes USING GIN(position); COMMENT ON TABLE graph_nodes IS '图节点表 - NER实体识别结果'; COMMENT ON COLUMN graph_nodes.type IS '节点类型: ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等'; -- 9. 图关系表 (graph_relations) CREATE TABLE IF NOT EXISTS graph_relations ( id VARCHAR(36) PRIMARY KEY, from_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE, to_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE, relation_type VARCHAR(50) NOT NULL, -- BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等 action_type VARCHAR(50), action_config JSONB, order_index INTEGER DEFAULT 0, condition_expr TEXT, metadata JSONB DEFAULT '{}', create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_graph_relations_from_node ON graph_relations(from_node_id); CREATE INDEX IF NOT EXISTS idx_graph_relations_to_node ON graph_relations(to_node_id); CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type); COMMENT ON TABLE graph_relations IS '图关系表 - 实体之间的关系'; COMMENT ON COLUMN graph_relations.relation_type IS '关系类型: BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等'; -- ============================================ -- 三、补充模块 -- ============================================ -- 10. 规则表 (rules) CREATE TABLE IF NOT EXISTS rules ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, description TEXT, entry_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL, exit_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL, rule_chain JSONB NOT NULL DEFAULT '[]', -- 规则链(节点ID序列) status VARCHAR(20) DEFAULT 'active', -- active/inactive metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id); CREATE INDEX IF NOT EXISTS idx_rules_entry_node ON rules(entry_node_id); CREATE INDEX IF NOT EXISTS idx_rules_exit_node ON rules(exit_node_id); CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status); COMMENT ON TABLE rules IS '规则表'; -- 11. 数据源表 (data_sources) CREATE TABLE IF NOT EXISTS data_sources ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, name VARCHAR(255) NOT NULL, type VARCHAR(50) NOT NULL, -- table/text/image source_type VARCHAR(50) NOT NULL DEFAULT 'manual', -- file/manual/rule_result node_ids JSONB DEFAULT '{"refs": []}', -- 节点引用列表 config JSONB DEFAULT '{}', -- 数据源配置 metadata JSONB DEFAULT '{}', value_type VARCHAR(20) DEFAULT 'text', -- text/image/table/mixed aggregate_type VARCHAR(20) DEFAULT 'first', -- first/last/concat/sum/avg/list separator VARCHAR(50) DEFAULT '', -- 聚合分隔符 create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id); CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id); CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type); COMMENT ON TABLE data_sources IS '数据源表'; COMMENT ON COLUMN data_sources.value_type IS '值类型: text/image/table/mixed'; COMMENT ON COLUMN data_sources.aggregate_type IS '聚合方式: first/last/concat/sum/avg/list'; -- 12. 文本存储表 (text_storage) CREATE TABLE IF NOT EXISTS text_storage ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, file_path VARCHAR(500) NOT NULL, -- TXT文件路径 file_size BIGINT, checksum VARCHAR(64), -- 文件校验和 create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id); CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id); COMMENT ON TABLE text_storage IS '文本存储表 - 存储文档解析后的TXT文件路径'; -- ============================================ -- 四、RAG 模块 -- ============================================ -- 13. 文本分块表 (text_chunks) CREATE TABLE IF NOT EXISTS text_chunks ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL, text_storage_id VARCHAR(36), chunk_index INTEGER NOT NULL, content TEXT NOT NULL, token_count INTEGER, metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id); CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id); CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_id, chunk_index); COMMENT ON TABLE text_chunks IS '文本分块表 - RAG分块存储'; -- 14. 向量嵌入表 (vector_embeddings) CREATE TABLE IF NOT EXISTS vector_embeddings ( id VARCHAR(36) PRIMARY KEY, chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE, embedding vector(768), -- nomic-embed-text 维度为 768 model_name VARCHAR(100) DEFAULT 'nomic-embed-text', create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops); COMMENT ON TABLE vector_embeddings IS '向量嵌入表 - RAG向量检索'; COMMENT ON COLUMN vector_embeddings.embedding IS '768维向量(nomic-embed-text)'; -- ============================================ -- 五、文档结构化模块 -- ============================================ -- 15. 文档块表 (document_blocks) CREATE TABLE IF NOT EXISTS document_blocks ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, parent_id VARCHAR(64), children JSONB, block_index INTEGER NOT NULL, block_type VARCHAR(32) NOT NULL, -- heading1/heading2/paragraph/table/list/image/quote elements JSONB, -- TextElement数组 style JSONB, metadata JSONB, create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id); CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id); CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type); CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops); COMMENT ON TABLE document_blocks IS '文档块表 - 参考飞书Block设计'; COMMENT ON COLUMN document_blocks.block_type IS '块类型: heading1/heading2/paragraph/table/list/image/quote'; COMMENT ON COLUMN document_blocks.elements IS 'TextElement数组,实体作为元素嵌入'; -- 16. 文档实体表 (document_entities) CREATE TABLE IF NOT EXISTS document_entities ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, block_id VARCHAR(64), name VARCHAR(512) NOT NULL, entity_type VARCHAR(32) NOT NULL, -- PERSON/ORG/LOC/DATE/NUMBER/MONEY/CONCEPT/DATA/DEVICE/TERM value TEXT, block_char_start INTEGER, block_char_end INTEGER, global_char_start INTEGER, global_char_end INTEGER, anchor_before VARCHAR(100), anchor_after VARCHAR(100), source VARCHAR(16) DEFAULT 'auto', -- auto/manual confidence DECIMAL(5,4), confirmed BOOLEAN DEFAULT FALSE, graph_node_id VARCHAR(64), metadata JSONB, create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id); CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id); CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type); CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name); CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end); CREATE INDEX IF NOT EXISTS idx_document_entities_graph_node ON document_entities(graph_node_id); COMMENT ON TABLE document_entities IS '文档实体表 - 文档中标记的实体/要素'; COMMENT ON COLUMN document_entities.entity_type IS '实体类型: PERSON/ORG/LOC/DATE/NUMBER/MONEY等'; COMMENT ON COLUMN document_entities.source IS '来源: auto=自动识别, manual=手动标注'; -- 17. 文档元素表 (document_elements) CREATE TABLE IF NOT EXISTS document_elements ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, element_index INT NOT NULL, -- 元素在文档中的顺序 element_type VARCHAR(32) NOT NULL, -- paragraph/heading/heading1-9/list_item/image/table/title/toc content TEXT, -- 文本内容(文本类型) style JSONB, -- 样式信息 -- 图片相关 image_url VARCHAR(500), image_path VARCHAR(500), image_alt VARCHAR(255), image_width INT, image_height INT, image_format VARCHAR(16), -- 表格相关 table_index INT, table_data JSONB, -- [[{row,col,text,colSpan},...],...] table_row_count INT, table_col_count INT, table_text TEXT, -- 表格文本(用于搜索) create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id); CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type); CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index); COMMENT ON TABLE document_elements IS '文档元素表 - Word/PDF结构化提取'; COMMENT ON COLUMN document_elements.element_type IS '元素类型: paragraph/heading/image/table等'; COMMENT ON COLUMN document_elements.style IS '样式信息JSON: {alignment, fontSize, bold, color等}'; COMMENT ON COLUMN document_elements.table_data IS '表格数据JSON: [[{row,col,text,colSpan},...],...]'; -- ============================================ -- 六、模板系统模块(v2.0) -- ============================================ -- 18. 报告模板表 (templates) CREATE TABLE IF NOT EXISTS templates ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, description TEXT, base_document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE RESTRICT, status VARCHAR(32) DEFAULT 'draft', -- draft/published/archived config JSONB DEFAULT '{}', is_public BOOLEAN DEFAULT FALSE, use_count INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, create_by VARCHAR(36), create_by_name VARCHAR(100), update_by VARCHAR(36), update_by_name VARCHAR(100) ); CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id); CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status); CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public); CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id); COMMENT ON TABLE templates IS '报告模板表 - 示例文档驱动的模板'; COMMENT ON COLUMN templates.base_document_id IS '示例报告文档ID'; COMMENT ON COLUMN templates.status IS 'draft-草稿, published-已发布, archived-已归档'; COMMENT ON COLUMN templates.is_public IS '是否公开给其他用户使用'; -- 19. 来源文件定义表 (source_files) CREATE TABLE IF NOT EXISTS source_files ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE, alias VARCHAR(100) NOT NULL, -- 文件别名,如"可研批复" description TEXT, file_types JSONB DEFAULT '["pdf", "docx"]', -- 允许的文件类型 required BOOLEAN DEFAULT TRUE, example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, display_order INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias) ); CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id); COMMENT ON TABLE source_files IS '来源文件定义表'; COMMENT ON COLUMN source_files.alias IS '用户自定义的别名,用于引用'; COMMENT ON COLUMN source_files.file_types IS '允许上传的文件类型列表'; COMMENT ON COLUMN source_files.example_document_id IS '创建模板时使用的示例文件'; -- 20. 模板变量表 (variables) CREATE TABLE IF NOT EXISTS variables ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE, -- 变量标识 name VARCHAR(100) NOT NULL, -- 变量名(程序用) display_name VARCHAR(200) NOT NULL, -- 显示名称 variable_group VARCHAR(100), -- 变量分组 category VARCHAR(32), -- 变量类别(前端显示用): entity/concept/data/location/asset -- 在示例报告中的位置 location JSONB NOT NULL, -- {elementId, type, startOffset, endOffset, rowIndex, colIndex} -- 示例值 example_value TEXT, value_type VARCHAR(32) DEFAULT 'text', -- text/date/number/table -- 数据来源 source_file_alias VARCHAR(100), -- 来源文件别名 source_type VARCHAR(32) NOT NULL, -- document/manual/reference/fixed source_config JSONB, -- 提取方式 extract_type VARCHAR(32), -- direct/ai_extract/ai_summarize extract_config JSONB, display_order INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uk_variables_name UNIQUE (template_id, name) ); CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id); CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias); CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type); CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category); COMMENT ON TABLE variables IS '模板变量表'; COMMENT ON COLUMN variables.name IS '变量名,模板内唯一'; COMMENT ON COLUMN variables.location IS '变量在文档中的位置'; COMMENT ON COLUMN variables.source_type IS 'document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值'; COMMENT ON COLUMN variables.extract_type IS 'direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结'; COMMENT ON COLUMN variables.category IS 'entity-核心实体, concept-概念/技术, data-数据/指标, location-地点/组织, asset-资源模板'; -- 21. 生成任务表 (generations) CREATE TABLE IF NOT EXISTS generations ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255), -- 来源文件映射 source_file_map JSONB NOT NULL, -- {"可研批复": "doc_123", ...} -- 变量提取结果 variable_values JSONB, -- {varName: {value, confidence, status, ...}} -- 生成的文档 output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, output_file_path VARCHAR(500), status VARCHAR(32) DEFAULT 'pending', -- pending/extracting/review/completed/error error_message TEXT, progress INT DEFAULT 0, -- 0-100 create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id); CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id); CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status); CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC); COMMENT ON TABLE generations IS '报告生成任务表'; COMMENT ON COLUMN generations.source_file_map IS '来源文件映射: {"别名": "文档ID"}'; COMMENT ON COLUMN generations.variable_values IS '变量提取结果'; COMMENT ON COLUMN generations.status IS 'pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误'; -- ============================================ -- 七、触发器函数与触发器 -- ============================================ -- 更新时间触发器函数 CREATE OR REPLACE FUNCTION update_update_time_column() RETURNS TRIGGER AS $$ BEGIN NEW.update_time = CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; -- 为所有需要的表创建触发器 DO $$ DECLARE tbl TEXT; tables TEXT[] := ARRAY[ 'users', 'documents', 'elements', 'annotations', 'graphs', 'parse_tasks', 'sessions', 'graph_nodes', 'graph_relations', 'rules', 'data_sources', 'text_storage', 'text_chunks', 'document_blocks', 'document_entities', 'document_elements', 'templates', 'variables' ]; BEGIN FOREACH tbl IN ARRAY tables LOOP EXECUTE format('DROP TRIGGER IF EXISTS trigger_%s_update_time ON %I', tbl, tbl); EXECUTE format('CREATE TRIGGER trigger_%s_update_time BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_update_time_column()', tbl, tbl); END LOOP; END $$; -- ============================================ -- 八、辅助函数 -- ============================================ -- 向量相似度检索(按文档) CREATE OR REPLACE FUNCTION search_similar_chunks( query_embedding vector(768), target_document_id VARCHAR(36), result_limit INTEGER DEFAULT 3 ) RETURNS TABLE ( chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT ) AS $$ BEGIN RETURN QUERY SELECT tc.id AS chunk_id, tc.document_id, tc.content, tc.chunk_index, 1 - (ve.embedding <=> query_embedding) AS similarity FROM text_chunks tc JOIN vector_embeddings ve ON tc.id = ve.chunk_id WHERE tc.document_id = target_document_id ORDER BY ve.embedding <=> query_embedding LIMIT result_limit; END; $$ LANGUAGE plpgsql; -- 向量相似度检索(全局) CREATE OR REPLACE FUNCTION search_similar_chunks_global( query_embedding vector(768), result_limit INTEGER DEFAULT 5 ) RETURNS TABLE ( chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT ) AS $$ BEGIN RETURN QUERY SELECT tc.id AS chunk_id, tc.document_id, tc.content, tc.chunk_index, 1 - (ve.embedding <=> query_embedding) AS similarity FROM text_chunks tc JOIN vector_embeddings ve ON tc.id = ve.chunk_id ORDER BY ve.embedding <=> query_embedding LIMIT result_limit; END; $$ LANGUAGE plpgsql; -- ============================================ -- 完成 -- ============================================ SELECT '灵越智报 v2.0 数据库表创建完成' AS result; SELECT COUNT(*) AS "表总数" FROM pg_tables WHERE schemaname = 'public';