-- ===================================================== -- 灵越智报 v2.0 数据库初始化脚本(单文件) -- PostgreSQL 15+ -- 包含所有表结构,不含 graph_nodes / graph_relations(已移除) -- ===================================================== CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS vector; -- ============================================ 一、基础模块 ============================================ CREATE TABLE IF NOT EXISTS users ( id VARCHAR(36) PRIMARY KEY, username VARCHAR(50) UNIQUE NOT NULL, email VARCHAR(100) UNIQUE NOT NULL, password_hash VARCHAR(255) NOT NULL, avatar_url VARCHAR(500), role VARCHAR(20) NOT NULL DEFAULT 'user', preferences TEXT DEFAULT '{}', last_login_at TIMESTAMP, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_users_username ON users(username); CREATE INDEX IF NOT EXISTS idx_users_email ON users(email); CREATE TABLE IF NOT EXISTS documents ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, type VARCHAR(20) NOT NULL, status VARCHAR(20) NOT NULL DEFAULT 'pending', file_size BIGINT, file_url VARCHAR(500), thumbnail_url VARCHAR(500), parsed_text TEXT, parse_status VARCHAR(20), parse_progress INTEGER DEFAULT 0, parse_error TEXT, parse_started_at TIMESTAMP, parse_completed_at TIMESTAMP, metadata JSONB DEFAULT '{}', structured_status VARCHAR(20) DEFAULT 'pending', image_count INT DEFAULT 0, table_count INT DEFAULT 0, element_count INT DEFAULT 0, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id); CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status); CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type); CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC); CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata); CREATE TABLE IF NOT EXISTS elements ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, type VARCHAR(20) NOT NULL, label VARCHAR(100) NOT NULL, value TEXT NOT NULL, position JSONB, confidence DECIMAL(3,2), extraction_method VARCHAR(20), metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id); CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id); CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type); CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position); CREATE TABLE IF NOT EXISTS annotations ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, text TEXT NOT NULL, position JSONB NOT NULL, type VARCHAR(20) NOT NULL, suggestion TEXT, ai_generated BOOLEAN DEFAULT FALSE, confidence DECIMAL(3,2), status VARCHAR(20) DEFAULT 'pending', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id); CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id); CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type); CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status); CREATE TABLE IF NOT EXISTS graphs ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, nodes JSONB NOT NULL DEFAULT '[]', edges JSONB NOT NULL DEFAULT '[]', calculation_result JSONB, calculation_status VARCHAR(20), metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id); CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id); CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes); CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges); CREATE TABLE IF NOT EXISTS parse_tasks ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, status VARCHAR(20) NOT NULL DEFAULT 'pending', progress INTEGER DEFAULT 0, current_step VARCHAR(100), error_message TEXT, options JSONB DEFAULT '{}', started_at TIMESTAMP, completed_at TIMESTAMP, parse_status VARCHAR(20) DEFAULT 'pending', parse_progress INTEGER DEFAULT 0, rag_status VARCHAR(20) DEFAULT 'pending', rag_progress INTEGER DEFAULT 0, structured_status VARCHAR(20) DEFAULT 'pending', structured_progress INTEGER DEFAULT 0, structured_element_count INTEGER, structured_image_count INTEGER, structured_table_count INTEGER, ner_status VARCHAR(20) DEFAULT 'pending', ner_progress INTEGER DEFAULT 0, ner_task_id VARCHAR(64), ner_entity_count INTEGER, ner_relation_count INTEGER, ner_message VARCHAR(255), graph_status VARCHAR(20) DEFAULT 'pending', graph_progress INTEGER DEFAULT 0, create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id); CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status); CREATE TABLE IF NOT EXISTS sessions ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, token_hash VARCHAR(255) NOT NULL UNIQUE, refresh_token_hash VARCHAR(255) NOT NULL UNIQUE, expires_at TIMESTAMP NOT NULL, ip_address VARCHAR(45), user_agent TEXT, create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id); CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash); CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at); -- ============================================ 二、补充模块 ============================================ CREATE TABLE IF NOT EXISTS rules ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, description TEXT, entry_node_id VARCHAR(36), exit_node_id VARCHAR(36), rule_chain JSONB NOT NULL DEFAULT '[]', status VARCHAR(20) DEFAULT 'active', metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id); CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status); CREATE TABLE IF NOT EXISTS data_sources ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, name VARCHAR(255) NOT NULL, type VARCHAR(50) NOT NULL, source_type VARCHAR(50) NOT NULL DEFAULT 'manual', node_ids JSONB DEFAULT '{"refs": []}', config JSONB DEFAULT '{}', metadata JSONB DEFAULT '{}', value_type VARCHAR(20) DEFAULT 'text', aggregate_type VARCHAR(20) DEFAULT 'first', separator VARCHAR(50) DEFAULT '', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id); CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id); CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type); CREATE TABLE IF NOT EXISTS text_storage ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE, file_path VARCHAR(500) NOT NULL, file_size BIGINT, checksum VARCHAR(64), create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id); CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id); -- ============================================ 三、RAG 模块 ============================================ CREATE TABLE IF NOT EXISTS text_chunks ( id VARCHAR(36) PRIMARY KEY, document_id VARCHAR(36) NOT NULL, text_storage_id VARCHAR(36), chunk_index INTEGER NOT NULL, content TEXT NOT NULL, token_count INTEGER, metadata JSONB DEFAULT '{}', create_by VARCHAR(36), create_by_name VARCHAR(100), create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(36), update_by_name VARCHAR(100), update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id); CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id); CREATE UNIQUE INDEX IF NOT EXISTS idx_text_chunks_doc_index ON text_chunks(document_id, chunk_index); CREATE TABLE IF NOT EXISTS vector_embeddings ( id VARCHAR(36) PRIMARY KEY, chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE, embedding vector(768), model_name VARCHAR(100) DEFAULT 'nomic-embed-text', create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name); CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops); -- ============================================ 四、文档结构化模块 ============================================ CREATE TABLE IF NOT EXISTS document_blocks ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, parent_id VARCHAR(64), children JSONB, block_index INTEGER NOT NULL, block_type VARCHAR(32) NOT NULL, elements JSONB, style JSONB, metadata JSONB, create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id); CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id); CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type); CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops); CREATE TABLE IF NOT EXISTS document_entities ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, block_id VARCHAR(64), name VARCHAR(512) NOT NULL, entity_type VARCHAR(32) NOT NULL, value TEXT, block_char_start INTEGER, block_char_end INTEGER, global_char_start INTEGER, global_char_end INTEGER, anchor_before VARCHAR(100), anchor_after VARCHAR(100), source VARCHAR(16) DEFAULT 'auto', confidence DECIMAL(5,4), confirmed BOOLEAN DEFAULT FALSE, metadata JSONB, create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id); CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id); CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type); CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name); CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end); CREATE TABLE IF NOT EXISTS document_elements ( id VARCHAR(64) PRIMARY KEY, document_id VARCHAR(64) NOT NULL, element_index INT NOT NULL, element_type VARCHAR(32) NOT NULL, content TEXT, style JSONB, runs JSONB, image_url VARCHAR(500), image_path VARCHAR(500), image_alt VARCHAR(255), image_width INT, image_height INT, image_format VARCHAR(16), table_index INT, table_data JSONB, table_row_count INT, table_col_count INT, table_text TEXT, create_by VARCHAR(64), create_by_name VARCHAR(128), create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_by VARCHAR(64), update_by_name VARCHAR(128), update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id); CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type); CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index); -- ============================================ 五、模板系统模块 ============================================ CREATE TABLE IF NOT EXISTS templates ( id VARCHAR(36) PRIMARY KEY, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, description TEXT, base_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, status VARCHAR(32) DEFAULT 'draft', config JSONB DEFAULT '{}', is_public BOOLEAN DEFAULT FALSE, use_count INT DEFAULT 0, rating DECIMAL(2,1) DEFAULT 0.0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, create_by VARCHAR(36), create_by_name VARCHAR(100), update_by VARCHAR(36), update_by_name VARCHAR(100) ); CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id); CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status); CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public); CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id); CREATE TABLE IF NOT EXISTS source_files ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE, alias VARCHAR(100) NOT NULL, description TEXT, file_types JSONB DEFAULT '["pdf", "docx"]', required BOOLEAN DEFAULT TRUE, example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, display_order INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias) ); CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id); CREATE TABLE IF NOT EXISTS variables ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE, name VARCHAR(100) NOT NULL, display_name VARCHAR(200) NOT NULL, variable_group VARCHAR(100), category VARCHAR(32), location JSONB NOT NULL, example_value TEXT, value_type VARCHAR(32) DEFAULT 'text', source_file_alias VARCHAR(100), source_type VARCHAR(32) NOT NULL, source_config JSONB, extract_type VARCHAR(32), extract_config JSONB, display_order INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, CONSTRAINT uk_variables_name UNIQUE (template_id, name) ); CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id); CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias); CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type); CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category); CREATE TABLE IF NOT EXISTS generations ( id VARCHAR(36) PRIMARY KEY, template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT, user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255), source_file_map JSONB NOT NULL, variable_values JSONB, output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL, output_file_path VARCHAR(500), status VARCHAR(32) DEFAULT 'pending', error_message TEXT, progress INT DEFAULT 0, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id); CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id); CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status); CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC); -- ============================================ 触发器 ============================================ CREATE OR REPLACE FUNCTION update_update_time_column() RETURNS TRIGGER AS $$ BEGIN NEW.update_time = CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; DO $$ DECLARE tbl TEXT; tables TEXT[] := ARRAY[ 'users', 'documents', 'elements', 'annotations', 'graphs', 'parse_tasks', 'sessions', 'rules', 'data_sources', 'text_storage', 'text_chunks', 'document_blocks', 'document_entities', 'document_elements', 'templates', 'source_files', 'variables', 'generations' ]; BEGIN FOREACH tbl IN ARRAY tables LOOP EXECUTE format('DROP TRIGGER IF EXISTS trigger_%s_update_time ON %I', tbl, tbl); EXECUTE format('CREATE TRIGGER trigger_%s_update_time BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_update_time_column()', tbl, tbl); END LOOP; END $$; -- ============================================ 向量检索函数 ============================================ CREATE OR REPLACE FUNCTION search_similar_chunks( query_embedding vector(768), target_document_id VARCHAR(36), result_limit INTEGER DEFAULT 3 ) RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$ BEGIN RETURN QUERY SELECT tc.id, tc.document_id, tc.content, tc.chunk_index, 1 - (ve.embedding <=> query_embedding) AS similarity FROM text_chunks tc JOIN vector_embeddings ve ON tc.id = ve.chunk_id WHERE tc.document_id = target_document_id ORDER BY ve.embedding <=> query_embedding LIMIT result_limit; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION search_similar_chunks_global( query_embedding vector(768), result_limit INTEGER DEFAULT 5 ) RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$ BEGIN RETURN QUERY SELECT tc.id, tc.document_id, tc.content, tc.chunk_index, 1 - (ve.embedding <=> query_embedding) AS similarity FROM text_chunks tc JOIN vector_embeddings ve ON tc.id = ve.chunk_id ORDER BY ve.embedding <=> query_embedding LIMIT result_limit; END; $$ LANGUAGE plpgsql; SELECT '灵越智报 v2.0 数据库初始化完成' AS result;