|
|
@@ -1,61 +1,22 @@
|
|
|
-- =====================================================
|
|
|
--- 灵越智报 v2.0 数据库完整表结构
|
|
|
+-- 灵越智报 v2.0 数据库初始化脚本(单文件)
|
|
|
-- PostgreSQL 15+
|
|
|
---
|
|
|
--- 生成时间: 2026-01-23
|
|
|
---
|
|
|
--- 包含所有表(按模块分组):
|
|
|
--- 一、基础模块
|
|
|
--- 1. users - 用户表
|
|
|
--- 2. documents - 文档表
|
|
|
--- 3. elements - 要素表
|
|
|
--- 4. annotations - 批注表
|
|
|
--- 5. graphs - 关系网络表
|
|
|
--- 6. parse_tasks - 解析任务表
|
|
|
--- 7. sessions - 会话表
|
|
|
---
|
|
|
--- 二、图谱模块
|
|
|
--- 8. graph_nodes - 图节点表
|
|
|
--- 9. graph_relations - 图关系表
|
|
|
---
|
|
|
--- 三、补充模块
|
|
|
--- 10. rules - 规则表
|
|
|
--- 11. data_sources - 数据源表
|
|
|
--- 12. text_storage - 文本存储表
|
|
|
---
|
|
|
--- 四、RAG 模块
|
|
|
--- 13. text_chunks - 文本分块表
|
|
|
--- 14. vector_embeddings - 向量嵌入表
|
|
|
---
|
|
|
--- 五、文档结构化模块
|
|
|
--- 15. document_blocks - 文档块表
|
|
|
--- 16. document_entities - 文档实体表
|
|
|
--- 17. document_elements - 文档元素表
|
|
|
---
|
|
|
--- 六、模板系统模块(v2.0)
|
|
|
--- 18. templates - 报告模板表
|
|
|
--- 19. source_files - 来源文件定义表
|
|
|
--- 20. variables - 模板变量表
|
|
|
--- 21. generations - 生成任务表
|
|
|
+-- 包含所有表结构,不含 graph_nodes / graph_relations(已移除)
|
|
|
-- =====================================================
|
|
|
|
|
|
--- 启用扩展
|
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
-CREATE EXTENSION IF NOT EXISTS vector; -- pgvector 用于向量检索
|
|
|
+CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
|
|
|
--- ============================================
|
|
|
--- 一、基础模块
|
|
|
--- ============================================
|
|
|
+-- ============================================ 一、基础模块 ============================================
|
|
|
|
|
|
--- 1. 用户表 (users)
|
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
username VARCHAR(50) UNIQUE NOT NULL,
|
|
|
email VARCHAR(100) UNIQUE NOT NULL,
|
|
|
password_hash VARCHAR(255) NOT NULL,
|
|
|
avatar_url VARCHAR(500),
|
|
|
- role VARCHAR(20) NOT NULL DEFAULT 'user', -- admin/user/guest
|
|
|
- preferences TEXT DEFAULT '{}', -- JSON字符串格式存储偏好设置
|
|
|
+ role VARCHAR(20) NOT NULL DEFAULT 'user',
|
|
|
+ preferences TEXT DEFAULT '{}',
|
|
|
last_login_at TIMESTAMP,
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
@@ -64,31 +25,25 @@ CREATE TABLE IF NOT EXISTS users (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
|
|
|
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
|
|
|
|
|
|
-COMMENT ON TABLE users IS '用户表';
|
|
|
-COMMENT ON COLUMN users.role IS '角色: admin-管理员, user-普通用户, guest-访客';
|
|
|
-
|
|
|
--- 2. 文档表 (documents)
|
|
|
CREATE TABLE IF NOT EXISTS documents (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
name VARCHAR(255) NOT NULL,
|
|
|
- type VARCHAR(20) NOT NULL, -- pdf/word/image/markdown/other
|
|
|
- status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/uploading/parsing/completed/failed
|
|
|
+ type VARCHAR(20) NOT NULL,
|
|
|
+ status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
|
|
file_size BIGINT,
|
|
|
file_url VARCHAR(500),
|
|
|
thumbnail_url VARCHAR(500),
|
|
|
parsed_text TEXT,
|
|
|
- parse_status VARCHAR(20), -- pending/parsing/completed/failed
|
|
|
- parse_progress INTEGER DEFAULT 0, -- 0-100
|
|
|
+ parse_status VARCHAR(20),
|
|
|
+ parse_progress INTEGER DEFAULT 0,
|
|
|
parse_error TEXT,
|
|
|
parse_started_at TIMESTAMP,
|
|
|
parse_completed_at TIMESTAMP,
|
|
|
- metadata JSONB DEFAULT '{}', -- pageCount, ocrConfidence, layoutStructure等
|
|
|
- -- 结构化解析增强字段
|
|
|
+ metadata JSONB DEFAULT '{}',
|
|
|
structured_status VARCHAR(20) DEFAULT 'pending',
|
|
|
image_count INT DEFAULT 0,
|
|
|
table_count INT DEFAULT 0,
|
|
|
@@ -100,30 +55,22 @@ CREATE TABLE IF NOT EXISTS documents (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC);
|
|
|
CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata);
|
|
|
|
|
|
-COMMENT ON TABLE documents IS '文档表';
|
|
|
-COMMENT ON COLUMN documents.type IS '文档类型: pdf/word/image/markdown/other';
|
|
|
-COMMENT ON COLUMN documents.status IS '状态: pending/uploading/parsing/completed/failed';
|
|
|
-COMMENT ON COLUMN documents.structured_status IS '结构化解析状态: pending/completed/failed';
|
|
|
-
|
|
|
--- 3. 要素表 (elements)
|
|
|
CREATE TABLE IF NOT EXISTS elements (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
- type VARCHAR(20) NOT NULL, -- amount/company/person/location/date/other
|
|
|
+ type VARCHAR(20) NOT NULL,
|
|
|
label VARCHAR(100) NOT NULL,
|
|
|
value TEXT NOT NULL,
|
|
|
- position JSONB, -- {page, x, y, width, height}
|
|
|
- confidence DECIMAL(3,2), -- 0.00-1.00
|
|
|
- extraction_method VARCHAR(20), -- ai/regex/rule/manual
|
|
|
- graph_node_id VARCHAR(36), -- 关联的图节点ID
|
|
|
+ position JSONB,
|
|
|
+ confidence DECIMAL(3,2),
|
|
|
+ extraction_method VARCHAR(20),
|
|
|
metadata JSONB DEFAULT '{}',
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
@@ -132,28 +79,22 @@ CREATE TABLE IF NOT EXISTS elements (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_elements_graph_node_id ON elements(graph_node_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position);
|
|
|
|
|
|
-COMMENT ON TABLE elements IS '要素表 - 文档中提取的结构化要素';
|
|
|
-COMMENT ON COLUMN elements.type IS '要素类型: amount/company/person/location/date/other';
|
|
|
-
|
|
|
--- 4. 批注表 (annotations)
|
|
|
CREATE TABLE IF NOT EXISTS annotations (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
text TEXT NOT NULL,
|
|
|
- position JSONB NOT NULL, -- {page, start: {x, y}, end: {x, y}}
|
|
|
- type VARCHAR(20) NOT NULL, -- highlight/strikethrough/suggestion
|
|
|
+ position JSONB NOT NULL,
|
|
|
+ type VARCHAR(20) NOT NULL,
|
|
|
suggestion TEXT,
|
|
|
ai_generated BOOLEAN DEFAULT FALSE,
|
|
|
confidence DECIMAL(3,2),
|
|
|
- status VARCHAR(20) DEFAULT 'pending', -- pending/accepted/rejected
|
|
|
+ status VARCHAR(20) DEFAULT 'pending',
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
@@ -161,25 +102,20 @@ CREATE TABLE IF NOT EXISTS annotations (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status);
|
|
|
|
|
|
-COMMENT ON TABLE annotations IS '批注表';
|
|
|
-COMMENT ON COLUMN annotations.type IS '批注类型: highlight/strikethrough/suggestion';
|
|
|
-
|
|
|
--- 5. 关系网络表 (graphs)
|
|
|
CREATE TABLE IF NOT EXISTS graphs (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
name VARCHAR(255) NOT NULL,
|
|
|
- nodes JSONB NOT NULL DEFAULT '[]', -- GraphNode数组
|
|
|
- edges JSONB NOT NULL DEFAULT '[]', -- GraphEdge数组
|
|
|
+ nodes JSONB NOT NULL DEFAULT '[]',
|
|
|
+ edges JSONB NOT NULL DEFAULT '[]',
|
|
|
calculation_result JSONB,
|
|
|
- calculation_status VARCHAR(20), -- pending/completed/failed
|
|
|
+ calculation_status VARCHAR(20),
|
|
|
metadata JSONB DEFAULT '{}',
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
@@ -188,26 +124,21 @@ CREATE TABLE IF NOT EXISTS graphs (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes);
|
|
|
CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges);
|
|
|
|
|
|
-COMMENT ON TABLE graphs IS '关系网络表';
|
|
|
-
|
|
|
--- 6. 解析任务表 (parse_tasks)
|
|
|
CREATE TABLE IF NOT EXISTS parse_tasks (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
|
- status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/processing/completed/failed
|
|
|
- progress INTEGER DEFAULT 0, -- 0-100
|
|
|
+ status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
|
|
+ progress INTEGER DEFAULT 0,
|
|
|
current_step VARCHAR(100),
|
|
|
error_message TEXT,
|
|
|
- options JSONB DEFAULT '{}', -- 解析选项
|
|
|
+ options JSONB DEFAULT '{}',
|
|
|
started_at TIMESTAMP,
|
|
|
completed_at TIMESTAMP,
|
|
|
- -- 多阶段进度跟踪
|
|
|
parse_status VARCHAR(20) DEFAULT 'pending',
|
|
|
parse_progress INTEGER DEFAULT 0,
|
|
|
rag_status VARCHAR(20) DEFAULT 'pending',
|
|
|
@@ -222,6 +153,7 @@ CREATE TABLE IF NOT EXISTS parse_tasks (
|
|
|
ner_task_id VARCHAR(64),
|
|
|
ner_entity_count INTEGER,
|
|
|
ner_relation_count INTEGER,
|
|
|
+ ner_message VARCHAR(255),
|
|
|
graph_status VARCHAR(20) DEFAULT 'pending',
|
|
|
graph_progress INTEGER DEFAULT 0,
|
|
|
create_by VARCHAR(36),
|
|
|
@@ -231,19 +163,9 @@ CREATE TABLE IF NOT EXISTS parse_tasks (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_parse_tasks_ner_task_id ON parse_tasks(ner_task_id);
|
|
|
-
|
|
|
-COMMENT ON TABLE parse_tasks IS '解析任务表';
|
|
|
-COMMENT ON COLUMN parse_tasks.parse_status IS '解析阶段状态';
|
|
|
-COMMENT ON COLUMN parse_tasks.rag_status IS 'RAG向量化阶段状态';
|
|
|
-COMMENT ON COLUMN parse_tasks.structured_status IS '结构化解析阶段状态';
|
|
|
-COMMENT ON COLUMN parse_tasks.ner_status IS 'NER实体提取阶段状态';
|
|
|
-COMMENT ON COLUMN parse_tasks.graph_status IS '图构建阶段状态';
|
|
|
|
|
|
--- 7. 会话表 (sessions)
|
|
|
CREATE TABLE IF NOT EXISTS sessions (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
@@ -255,78 +177,21 @@ CREATE TABLE IF NOT EXISTS sessions (
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash);
|
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at);
|
|
|
|
|
|
-COMMENT ON TABLE sessions IS '会话表';
|
|
|
-
|
|
|
--- ============================================
|
|
|
--- 二、图谱模块
|
|
|
--- ============================================
|
|
|
-
|
|
|
--- 8. 图节点表 (graph_nodes)
|
|
|
-CREATE TABLE IF NOT EXISTS graph_nodes (
|
|
|
- id VARCHAR(36) PRIMARY KEY,
|
|
|
- document_id VARCHAR(36) NOT NULL,
|
|
|
- user_id VARCHAR(36), -- 可为空,自动提取时可能没有用户上下文
|
|
|
- name VARCHAR(255) NOT NULL,
|
|
|
- type VARCHAR(50) NOT NULL, -- text/table/image/number/date/ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等
|
|
|
- value TEXT,
|
|
|
- position JSONB, -- {charStart, charEnd, line}
|
|
|
- parent_id VARCHAR(36),
|
|
|
- level INTEGER DEFAULT 0,
|
|
|
- metadata JSONB DEFAULT '{}',
|
|
|
- create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
- update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
-);
|
|
|
-
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_nodes_document_id ON graph_nodes(document_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_nodes_user_id ON graph_nodes(user_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(type);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_nodes_parent_id ON graph_nodes(parent_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_nodes_position ON graph_nodes USING GIN(position);
|
|
|
-
|
|
|
-COMMENT ON TABLE graph_nodes IS '图节点表 - NER实体识别结果';
|
|
|
-COMMENT ON COLUMN graph_nodes.type IS '节点类型: ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等';
|
|
|
-
|
|
|
--- 9. 图关系表 (graph_relations)
|
|
|
-CREATE TABLE IF NOT EXISTS graph_relations (
|
|
|
- id VARCHAR(36) PRIMARY KEY,
|
|
|
- from_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE,
|
|
|
- to_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE,
|
|
|
- relation_type VARCHAR(50) NOT NULL, -- BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等
|
|
|
- action_type VARCHAR(50),
|
|
|
- action_config JSONB,
|
|
|
- order_index INTEGER DEFAULT 0,
|
|
|
- condition_expr TEXT,
|
|
|
- metadata JSONB DEFAULT '{}',
|
|
|
- create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
- update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
-);
|
|
|
+-- ============================================ 二、补充模块 ============================================
|
|
|
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_relations_from_node ON graph_relations(from_node_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_relations_to_node ON graph_relations(to_node_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type);
|
|
|
-
|
|
|
-COMMENT ON TABLE graph_relations IS '图关系表 - 实体之间的关系';
|
|
|
-COMMENT ON COLUMN graph_relations.relation_type IS '关系类型: BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等';
|
|
|
-
|
|
|
--- ============================================
|
|
|
--- 三、补充模块
|
|
|
--- ============================================
|
|
|
-
|
|
|
--- 10. 规则表 (rules)
|
|
|
CREATE TABLE IF NOT EXISTS rules (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
name VARCHAR(255) NOT NULL,
|
|
|
description TEXT,
|
|
|
- entry_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL,
|
|
|
- exit_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL,
|
|
|
- rule_chain JSONB NOT NULL DEFAULT '[]', -- 规则链(节点ID序列)
|
|
|
- status VARCHAR(20) DEFAULT 'active', -- active/inactive
|
|
|
+ entry_node_id VARCHAR(36),
|
|
|
+ exit_node_id VARCHAR(36),
|
|
|
+ rule_chain JSONB NOT NULL DEFAULT '[]',
|
|
|
+ status VARCHAR(20) DEFAULT 'active',
|
|
|
metadata JSONB DEFAULT '{}',
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
@@ -335,28 +200,22 @@ CREATE TABLE IF NOT EXISTS rules (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_rules_entry_node ON rules(entry_node_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_rules_exit_node ON rules(exit_node_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
|
|
|
|
|
|
-COMMENT ON TABLE rules IS '规则表';
|
|
|
-
|
|
|
--- 11. 数据源表 (data_sources)
|
|
|
CREATE TABLE IF NOT EXISTS data_sources (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
|
|
|
name VARCHAR(255) NOT NULL,
|
|
|
- type VARCHAR(50) NOT NULL, -- table/text/image
|
|
|
- source_type VARCHAR(50) NOT NULL DEFAULT 'manual', -- file/manual/rule_result
|
|
|
- node_ids JSONB DEFAULT '{"refs": []}', -- 节点引用列表
|
|
|
- config JSONB DEFAULT '{}', -- 数据源配置
|
|
|
+ type VARCHAR(50) NOT NULL,
|
|
|
+ source_type VARCHAR(50) NOT NULL DEFAULT 'manual',
|
|
|
+ node_ids JSONB DEFAULT '{"refs": []}',
|
|
|
+ config JSONB DEFAULT '{}',
|
|
|
metadata JSONB DEFAULT '{}',
|
|
|
- value_type VARCHAR(20) DEFAULT 'text', -- text/image/table/mixed
|
|
|
- aggregate_type VARCHAR(20) DEFAULT 'first', -- first/last/concat/sum/avg/list
|
|
|
- separator VARCHAR(50) DEFAULT '', -- 聚合分隔符
|
|
|
+ value_type VARCHAR(20) DEFAULT 'text',
|
|
|
+ aggregate_type VARCHAR(20) DEFAULT 'first',
|
|
|
+ separator VARCHAR(50) DEFAULT '',
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
@@ -364,22 +223,16 @@ CREATE TABLE IF NOT EXISTS data_sources (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type);
|
|
|
|
|
|
-COMMENT ON TABLE data_sources IS '数据源表';
|
|
|
-COMMENT ON COLUMN data_sources.value_type IS '值类型: text/image/table/mixed';
|
|
|
-COMMENT ON COLUMN data_sources.aggregate_type IS '聚合方式: first/last/concat/sum/avg/list';
|
|
|
-
|
|
|
--- 12. 文本存储表 (text_storage)
|
|
|
CREATE TABLE IF NOT EXISTS text_storage (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
|
- file_path VARCHAR(500) NOT NULL, -- TXT文件路径
|
|
|
+ file_path VARCHAR(500) NOT NULL,
|
|
|
file_size BIGINT,
|
|
|
- checksum VARCHAR(64), -- 文件校验和
|
|
|
+ checksum VARCHAR(64),
|
|
|
create_by VARCHAR(36),
|
|
|
create_by_name VARCHAR(100),
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
@@ -387,17 +240,11 @@ CREATE TABLE IF NOT EXISTS text_storage (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id);
|
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id);
|
|
|
|
|
|
-COMMENT ON TABLE text_storage IS '文本存储表 - 存储文档解析后的TXT文件路径';
|
|
|
+-- ============================================ 三、RAG 模块 ============================================
|
|
|
|
|
|
--- ============================================
|
|
|
--- 四、RAG 模块
|
|
|
--- ============================================
|
|
|
-
|
|
|
--- 13. 文本分块表 (text_chunks)
|
|
|
CREATE TABLE IF NOT EXISTS text_chunks (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
document_id VARCHAR(36) NOT NULL,
|
|
|
@@ -413,42 +260,31 @@ CREATE TABLE IF NOT EXISTS text_chunks (
|
|
|
update_by_name VARCHAR(100),
|
|
|
update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_id, chunk_index);
|
|
|
+CREATE UNIQUE INDEX IF NOT EXISTS idx_text_chunks_doc_index ON text_chunks(document_id, chunk_index);
|
|
|
|
|
|
-COMMENT ON TABLE text_chunks IS '文本分块表 - RAG分块存储';
|
|
|
-
|
|
|
--- 14. 向量嵌入表 (vector_embeddings)
|
|
|
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
|
|
|
- embedding vector(768), -- nomic-embed-text 维度为 768
|
|
|
+ embedding vector(768),
|
|
|
model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
|
|
|
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops);
|
|
|
|
|
|
-COMMENT ON TABLE vector_embeddings IS '向量嵌入表 - RAG向量检索';
|
|
|
-COMMENT ON COLUMN vector_embeddings.embedding IS '768维向量(nomic-embed-text)';
|
|
|
+-- ============================================ 四、文档结构化模块 ============================================
|
|
|
|
|
|
--- ============================================
|
|
|
--- 五、文档结构化模块
|
|
|
--- ============================================
|
|
|
-
|
|
|
--- 15. 文档块表 (document_blocks)
|
|
|
CREATE TABLE IF NOT EXISTS document_blocks (
|
|
|
id VARCHAR(64) PRIMARY KEY,
|
|
|
document_id VARCHAR(64) NOT NULL,
|
|
|
parent_id VARCHAR(64),
|
|
|
children JSONB,
|
|
|
block_index INTEGER NOT NULL,
|
|
|
- block_type VARCHAR(32) NOT NULL, -- heading1/heading2/paragraph/table/list/image/quote
|
|
|
- elements JSONB, -- TextElement数组
|
|
|
+ block_type VARCHAR(32) NOT NULL,
|
|
|
+ elements JSONB,
|
|
|
style JSONB,
|
|
|
metadata JSONB,
|
|
|
create_by VARCHAR(64),
|
|
|
@@ -458,23 +294,17 @@ CREATE TABLE IF NOT EXISTS document_blocks (
|
|
|
update_by_name VARCHAR(128),
|
|
|
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
|
|
|
|
|
|
-COMMENT ON TABLE document_blocks IS '文档块表 - 参考飞书Block设计';
|
|
|
-COMMENT ON COLUMN document_blocks.block_type IS '块类型: heading1/heading2/paragraph/table/list/image/quote';
|
|
|
-COMMENT ON COLUMN document_blocks.elements IS 'TextElement数组,实体作为元素嵌入';
|
|
|
-
|
|
|
--- 16. 文档实体表 (document_entities)
|
|
|
CREATE TABLE IF NOT EXISTS document_entities (
|
|
|
id VARCHAR(64) PRIMARY KEY,
|
|
|
document_id VARCHAR(64) NOT NULL,
|
|
|
block_id VARCHAR(64),
|
|
|
name VARCHAR(512) NOT NULL,
|
|
|
- entity_type VARCHAR(32) NOT NULL, -- PERSON/ORG/LOC/DATE/NUMBER/MONEY/CONCEPT/DATA/DEVICE/TERM
|
|
|
+ entity_type VARCHAR(32) NOT NULL,
|
|
|
value TEXT,
|
|
|
block_char_start INTEGER,
|
|
|
block_char_end INTEGER,
|
|
|
@@ -482,10 +312,9 @@ CREATE TABLE IF NOT EXISTS document_entities (
|
|
|
global_char_end INTEGER,
|
|
|
anchor_before VARCHAR(100),
|
|
|
anchor_after VARCHAR(100),
|
|
|
- source VARCHAR(16) DEFAULT 'auto', -- auto/manual
|
|
|
+ source VARCHAR(16) DEFAULT 'auto',
|
|
|
confidence DECIMAL(5,4),
|
|
|
confirmed BOOLEAN DEFAULT FALSE,
|
|
|
- graph_node_id VARCHAR(64),
|
|
|
metadata JSONB,
|
|
|
create_by VARCHAR(64),
|
|
|
create_by_name VARCHAR(128),
|
|
|
@@ -494,39 +323,31 @@ CREATE TABLE IF NOT EXISTS document_entities (
|
|
|
update_by_name VARCHAR(128),
|
|
|
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
|
|
|
-CREATE INDEX IF NOT EXISTS idx_document_entities_graph_node ON document_entities(graph_node_id);
|
|
|
|
|
|
-COMMENT ON TABLE document_entities IS '文档实体表 - 文档中标记的实体/要素';
|
|
|
-COMMENT ON COLUMN document_entities.entity_type IS '实体类型: PERSON/ORG/LOC/DATE/NUMBER/MONEY等';
|
|
|
-COMMENT ON COLUMN document_entities.source IS '来源: auto=自动识别, manual=手动标注';
|
|
|
-
|
|
|
--- 17. 文档元素表 (document_elements)
|
|
|
CREATE TABLE IF NOT EXISTS document_elements (
|
|
|
id VARCHAR(64) PRIMARY KEY,
|
|
|
document_id VARCHAR(64) NOT NULL,
|
|
|
- element_index INT NOT NULL, -- 元素在文档中的顺序
|
|
|
- element_type VARCHAR(32) NOT NULL, -- paragraph/heading/heading1-9/list_item/image/table/title/toc
|
|
|
- content TEXT, -- 文本内容(文本类型)
|
|
|
- style JSONB, -- 样式信息
|
|
|
- -- 图片相关
|
|
|
+ element_index INT NOT NULL,
|
|
|
+ element_type VARCHAR(32) NOT NULL,
|
|
|
+ content TEXT,
|
|
|
+ style JSONB,
|
|
|
+ runs JSONB,
|
|
|
image_url VARCHAR(500),
|
|
|
image_path VARCHAR(500),
|
|
|
image_alt VARCHAR(255),
|
|
|
image_width INT,
|
|
|
image_height INT,
|
|
|
image_format VARCHAR(16),
|
|
|
- -- 表格相关
|
|
|
table_index INT,
|
|
|
- table_data JSONB, -- [[{row,col,text,colSpan},...],...]
|
|
|
+ table_data JSONB,
|
|
|
table_row_count INT,
|
|
|
table_col_count INT,
|
|
|
- table_text TEXT, -- 表格文本(用于搜索)
|
|
|
+ table_text TEXT,
|
|
|
create_by VARCHAR(64),
|
|
|
create_by_name VARCHAR(128),
|
|
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
@@ -534,31 +355,23 @@ CREATE TABLE IF NOT EXISTS document_elements (
|
|
|
update_by_name VARCHAR(128),
|
|
|
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index);
|
|
|
|
|
|
-COMMENT ON TABLE document_elements IS '文档元素表 - Word/PDF结构化提取';
|
|
|
-COMMENT ON COLUMN document_elements.element_type IS '元素类型: paragraph/heading/image/table等';
|
|
|
-COMMENT ON COLUMN document_elements.style IS '样式信息JSON: {alignment, fontSize, bold, color等}';
|
|
|
-COMMENT ON COLUMN document_elements.table_data IS '表格数据JSON: [[{row,col,text,colSpan},...],...]';
|
|
|
+-- ============================================ 五、模板系统模块 ============================================
|
|
|
|
|
|
--- ============================================
|
|
|
--- 六、模板系统模块(v2.0)
|
|
|
--- ============================================
|
|
|
-
|
|
|
--- 18. 报告模板表 (templates)
|
|
|
CREATE TABLE IF NOT EXISTS templates (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
name VARCHAR(255) NOT NULL,
|
|
|
description TEXT,
|
|
|
base_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
|
|
|
- status VARCHAR(32) DEFAULT 'draft', -- draft/published/archived
|
|
|
+ status VARCHAR(32) DEFAULT 'draft',
|
|
|
config JSONB DEFAULT '{}',
|
|
|
is_public BOOLEAN DEFAULT FALSE,
|
|
|
use_count INT DEFAULT 0,
|
|
|
+ rating DECIMAL(2,1) DEFAULT 0.0,
|
|
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
create_by VARCHAR(36),
|
|
|
@@ -566,114 +379,73 @@ CREATE TABLE IF NOT EXISTS templates (
|
|
|
update_by VARCHAR(36),
|
|
|
update_by_name VARCHAR(100)
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
|
|
|
CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
|
|
|
CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id);
|
|
|
|
|
|
-COMMENT ON TABLE templates IS '报告模板表 - 示例文档驱动的模板';
|
|
|
-COMMENT ON COLUMN templates.base_document_id IS '示例报告文档ID(可选,空白模板可为空)';
|
|
|
-COMMENT ON COLUMN templates.status IS 'draft-草稿, published-已发布, archived-已归档';
|
|
|
-COMMENT ON COLUMN templates.is_public IS '是否公开给其他用户使用';
|
|
|
-
|
|
|
--- 19. 来源文件定义表 (source_files)
|
|
|
CREATE TABLE IF NOT EXISTS source_files (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
|
|
|
- alias VARCHAR(100) NOT NULL, -- 文件别名,如"可研批复"
|
|
|
+ alias VARCHAR(100) NOT NULL,
|
|
|
description TEXT,
|
|
|
- file_types JSONB DEFAULT '["pdf", "docx"]', -- 允许的文件类型
|
|
|
+ file_types JSONB DEFAULT '["pdf", "docx"]',
|
|
|
required BOOLEAN DEFAULT TRUE,
|
|
|
example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
|
|
|
display_order INT DEFAULT 0,
|
|
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
-
|
|
|
+ update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
|
|
|
|
|
|
-COMMENT ON TABLE source_files IS '来源文件定义表';
|
|
|
-COMMENT ON COLUMN source_files.alias IS '用户自定义的别名,用于引用';
|
|
|
-COMMENT ON COLUMN source_files.file_types IS '允许上传的文件类型列表';
|
|
|
-COMMENT ON COLUMN source_files.example_document_id IS '创建模板时使用的示例文件';
|
|
|
-
|
|
|
--- 20. 模板变量表 (variables)
|
|
|
CREATE TABLE IF NOT EXISTS variables (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
|
|
|
- -- 变量标识
|
|
|
- name VARCHAR(100) NOT NULL, -- 变量名(程序用)
|
|
|
- display_name VARCHAR(200) NOT NULL, -- 显示名称
|
|
|
- variable_group VARCHAR(100), -- 变量分组
|
|
|
- category VARCHAR(32), -- 变量类别(前端显示用): entity/concept/data/location/asset
|
|
|
- -- 在示例报告中的位置
|
|
|
- location JSONB NOT NULL, -- {elementId, type, startOffset, endOffset, rowIndex, colIndex}
|
|
|
- -- 示例值
|
|
|
+ name VARCHAR(100) NOT NULL,
|
|
|
+ display_name VARCHAR(200) NOT NULL,
|
|
|
+ variable_group VARCHAR(100),
|
|
|
+ category VARCHAR(32),
|
|
|
+ location JSONB NOT NULL,
|
|
|
example_value TEXT,
|
|
|
- value_type VARCHAR(32) DEFAULT 'text', -- text/date/number/table
|
|
|
- -- 数据来源
|
|
|
- source_file_alias VARCHAR(100), -- 来源文件别名
|
|
|
- source_type VARCHAR(32) NOT NULL, -- document/manual/reference/fixed
|
|
|
+ value_type VARCHAR(32) DEFAULT 'text',
|
|
|
+ source_file_alias VARCHAR(100),
|
|
|
+ source_type VARCHAR(32) NOT NULL,
|
|
|
source_config JSONB,
|
|
|
- -- 提取方式
|
|
|
- extract_type VARCHAR(32), -- direct/ai_extract/ai_summarize
|
|
|
+ extract_type VARCHAR(32),
|
|
|
extract_config JSONB,
|
|
|
display_order INT DEFAULT 0,
|
|
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
-
|
|
|
CONSTRAINT uk_variables_name UNIQUE (template_id, name)
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
|
|
|
CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type);
|
|
|
CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category);
|
|
|
|
|
|
-COMMENT ON TABLE variables IS '模板变量表';
|
|
|
-COMMENT ON COLUMN variables.name IS '变量名,模板内唯一';
|
|
|
-COMMENT ON COLUMN variables.location IS '变量在文档中的位置';
|
|
|
-COMMENT ON COLUMN variables.source_type IS 'document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值';
|
|
|
-COMMENT ON COLUMN variables.extract_type IS 'direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结';
|
|
|
-COMMENT ON COLUMN variables.category IS 'entity-核心实体, concept-概念/技术, data-数据/指标, location-地点/组织, asset-资源模板';
|
|
|
-
|
|
|
--- 21. 生成任务表 (generations)
|
|
|
CREATE TABLE IF NOT EXISTS generations (
|
|
|
id VARCHAR(36) PRIMARY KEY,
|
|
|
template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT,
|
|
|
user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
|
name VARCHAR(255),
|
|
|
- -- 来源文件映射
|
|
|
- source_file_map JSONB NOT NULL, -- {"可研批复": "doc_123", ...}
|
|
|
- -- 变量提取结果
|
|
|
- variable_values JSONB, -- {varName: {value, confidence, status, ...}}
|
|
|
- -- 生成的文档
|
|
|
+ source_file_map JSONB NOT NULL,
|
|
|
+ variable_values JSONB,
|
|
|
output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
|
|
|
output_file_path VARCHAR(500),
|
|
|
- status VARCHAR(32) DEFAULT 'pending', -- pending/extracting/review/completed/error
|
|
|
+ status VARCHAR(32) DEFAULT 'pending',
|
|
|
error_message TEXT,
|
|
|
- progress INT DEFAULT 0, -- 0-100
|
|
|
+ progress INT DEFAULT 0,
|
|
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
completed_at TIMESTAMP
|
|
|
);
|
|
|
-
|
|
|
CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
|
|
|
CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
|
|
|
CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC);
|
|
|
|
|
|
-COMMENT ON TABLE generations IS '报告生成任务表';
|
|
|
-COMMENT ON COLUMN generations.source_file_map IS '来源文件映射: {"别名": "文档ID"}';
|
|
|
-COMMENT ON COLUMN generations.variable_values IS '变量提取结果';
|
|
|
-COMMENT ON COLUMN generations.status IS 'pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误';
|
|
|
-
|
|
|
--- ============================================
|
|
|
--- 七、触发器函数与触发器
|
|
|
--- ============================================
|
|
|
+-- ============================================ 触发器 ============================================
|
|
|
|
|
|
--- 更新时间触发器函数
|
|
|
CREATE OR REPLACE FUNCTION update_update_time_column()
|
|
|
RETURNS TRIGGER AS $$
|
|
|
BEGIN
|
|
|
@@ -682,16 +454,14 @@ BEGIN
|
|
|
END;
|
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
|
|
--- 为所有需要的表创建触发器
|
|
|
DO $$
|
|
|
DECLARE
|
|
|
tbl TEXT;
|
|
|
tables TEXT[] := ARRAY[
|
|
|
- 'users', 'documents', 'elements', 'annotations', 'graphs',
|
|
|
- 'parse_tasks', 'sessions', 'graph_nodes', 'graph_relations',
|
|
|
- 'rules', 'data_sources', 'text_storage', 'text_chunks',
|
|
|
- 'document_blocks', 'document_entities', 'document_elements',
|
|
|
- 'templates', 'variables'
|
|
|
+ 'users', 'documents', 'elements', 'annotations', 'graphs',
|
|
|
+ 'parse_tasks', 'sessions', 'rules', 'data_sources', 'text_storage',
|
|
|
+ 'text_chunks', 'document_blocks', 'document_entities', 'document_elements',
|
|
|
+ 'templates', 'source_files', 'variables', 'generations'
|
|
|
];
|
|
|
BEGIN
|
|
|
FOREACH tbl IN ARRAY tables LOOP
|
|
|
@@ -700,31 +470,18 @@ BEGIN
|
|
|
END LOOP;
|
|
|
END $$;
|
|
|
|
|
|
--- ============================================
|
|
|
--- 八、辅助函数
|
|
|
--- ============================================
|
|
|
+-- ============================================ 向量检索函数 ============================================
|
|
|
|
|
|
--- 向量相似度检索(按文档)
|
|
|
CREATE OR REPLACE FUNCTION search_similar_chunks(
|
|
|
query_embedding vector(768),
|
|
|
target_document_id VARCHAR(36),
|
|
|
result_limit INTEGER DEFAULT 3
|
|
|
)
|
|
|
-RETURNS TABLE (
|
|
|
- chunk_id VARCHAR(36),
|
|
|
- document_id VARCHAR(36),
|
|
|
- content TEXT,
|
|
|
- chunk_index INTEGER,
|
|
|
- similarity FLOAT
|
|
|
-) AS $$
|
|
|
+RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
|
|
|
BEGIN
|
|
|
RETURN QUERY
|
|
|
- SELECT
|
|
|
- tc.id AS chunk_id,
|
|
|
- tc.document_id,
|
|
|
- tc.content,
|
|
|
- tc.chunk_index,
|
|
|
- 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
+ SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
|
|
|
+ 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
FROM text_chunks tc
|
|
|
JOIN vector_embeddings ve ON tc.id = ve.chunk_id
|
|
|
WHERE tc.document_id = target_document_id
|
|
|
@@ -733,26 +490,15 @@ BEGIN
|
|
|
END;
|
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
|
|
--- 向量相似度检索(全局)
|
|
|
CREATE OR REPLACE FUNCTION search_similar_chunks_global(
|
|
|
query_embedding vector(768),
|
|
|
result_limit INTEGER DEFAULT 5
|
|
|
)
|
|
|
-RETURNS TABLE (
|
|
|
- chunk_id VARCHAR(36),
|
|
|
- document_id VARCHAR(36),
|
|
|
- content TEXT,
|
|
|
- chunk_index INTEGER,
|
|
|
- similarity FLOAT
|
|
|
-) AS $$
|
|
|
+RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
|
|
|
BEGIN
|
|
|
RETURN QUERY
|
|
|
- SELECT
|
|
|
- tc.id AS chunk_id,
|
|
|
- tc.document_id,
|
|
|
- tc.content,
|
|
|
- tc.chunk_index,
|
|
|
- 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
+ SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
|
|
|
+ 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
FROM text_chunks tc
|
|
|
JOIN vector_embeddings ve ON tc.id = ve.chunk_id
|
|
|
ORDER BY ve.embedding <=> query_embedding
|
|
|
@@ -760,8 +506,4 @@ BEGIN
|
|
|
END;
|
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
|
|
--- ============================================
|
|
|
--- 完成
|
|
|
--- ============================================
|
|
|
-SELECT '灵越智报 v2.0 数据库表创建完成' AS result;
|
|
|
-SELECT COUNT(*) AS "表总数" FROM pg_tables WHERE schemaname = 'public';
|
|
|
+SELECT '灵越智报 v2.0 数据库初始化完成' AS result;
|