|
|
@@ -0,0 +1,115 @@
|
|
|
+-- ============================================
|
|
|
+-- RAG 向量化存储相关表(兼容 SimpleModel 版本)
|
|
|
+-- 灵越智报 v2.0
|
|
|
+-- 字段名与 SimpleModel 基类兼容
|
|
|
+-- ============================================
|
|
|
+
|
|
|
+-- 启用 pgvector 扩展(需要先安装:apt install postgresql-15-pgvector)
|
|
|
+CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
+
|
|
|
+-- ============================================
|
|
|
+-- 1. 文本分块表 (text_chunks)
|
|
|
+-- ============================================
|
|
|
+CREATE TABLE IF NOT EXISTS text_chunks (
|
|
|
+ id VARCHAR(32) PRIMARY KEY,
|
|
|
+ document_id VARCHAR(32) NOT NULL,
|
|
|
+ text_storage_id VARCHAR(32),
|
|
|
+ chunk_index INTEGER NOT NULL,
|
|
|
+ content TEXT NOT NULL,
|
|
|
+ token_count INTEGER,
|
|
|
+ metadata JSONB DEFAULT '{}',
|
|
|
+ create_by VARCHAR(36),
|
|
|
+ create_by_name VARCHAR(100),
|
|
|
+ create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
|
+ update_by VARCHAR(36),
|
|
|
+ update_by_name VARCHAR(100),
|
|
|
+ update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
+);
|
|
|
+
|
|
|
+-- 索引
|
|
|
+CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
|
|
|
+CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
|
|
|
+CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_id, chunk_index);
|
|
|
+
|
|
|
+-- ============================================
|
|
|
+-- 2. 向量嵌入表 (vector_embeddings)
|
|
|
+-- ============================================
|
|
|
+CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
|
+ id VARCHAR(32) PRIMARY KEY,
|
|
|
+ chunk_id VARCHAR(32) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
|
|
|
+ embedding vector(768), -- nomic-embed-text 维度为 768
|
|
|
+ model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
|
|
|
+ create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
+);
|
|
|
+
|
|
|
+-- 普通索引
|
|
|
+CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
|
|
|
+CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
|
|
|
+
|
|
|
+-- HNSW 向量索引(用于高效相似度检索)
|
|
|
+-- 使用余弦距离操作符
|
|
|
+CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings
|
|
|
+ USING hnsw (embedding vector_cosine_ops);
|
|
|
+
|
|
|
+-- ============================================
|
|
|
+-- 3. 辅助函数:向量相似度检索
|
|
|
+-- ============================================
|
|
|
+
|
|
|
+-- 按文档ID检索相似文本块
|
|
|
+CREATE OR REPLACE FUNCTION search_similar_chunks(
|
|
|
+ query_embedding vector(768),
|
|
|
+ target_document_id VARCHAR(32),
|
|
|
+ result_limit INTEGER DEFAULT 3
|
|
|
+)
|
|
|
+RETURNS TABLE (
|
|
|
+ chunk_id VARCHAR(32),
|
|
|
+ document_id VARCHAR(32),
|
|
|
+ content TEXT,
|
|
|
+ chunk_index INTEGER,
|
|
|
+ similarity FLOAT
|
|
|
+) AS $$
|
|
|
+BEGIN
|
|
|
+ RETURN QUERY
|
|
|
+ SELECT
|
|
|
+ tc.id AS chunk_id,
|
|
|
+ tc.document_id,
|
|
|
+ tc.content,
|
|
|
+ tc.chunk_index,
|
|
|
+ 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
+ FROM text_chunks tc
|
|
|
+ JOIN vector_embeddings ve ON tc.id = ve.chunk_id
|
|
|
+ WHERE tc.document_id = target_document_id
|
|
|
+ ORDER BY ve.embedding <=> query_embedding
|
|
|
+ LIMIT result_limit;
|
|
|
+END;
|
|
|
+$$ LANGUAGE plpgsql;
|
|
|
+
|
|
|
+-- 全局检索相似文本块(不限制文档)
|
|
|
+CREATE OR REPLACE FUNCTION search_similar_chunks_global(
|
|
|
+ query_embedding vector(768),
|
|
|
+ result_limit INTEGER DEFAULT 5
|
|
|
+)
|
|
|
+RETURNS TABLE (
|
|
|
+ chunk_id VARCHAR(32),
|
|
|
+ document_id VARCHAR(32),
|
|
|
+ content TEXT,
|
|
|
+ chunk_index INTEGER,
|
|
|
+ similarity FLOAT
|
|
|
+) AS $$
|
|
|
+BEGIN
|
|
|
+ RETURN QUERY
|
|
|
+ SELECT
|
|
|
+ tc.id AS chunk_id,
|
|
|
+ tc.document_id,
|
|
|
+ tc.content,
|
|
|
+ tc.chunk_index,
|
|
|
+ 1 - (ve.embedding <=> query_embedding) AS similarity
|
|
|
+ FROM text_chunks tc
|
|
|
+ JOIN vector_embeddings ve ON tc.id = ve.chunk_id
|
|
|
+ ORDER BY ve.embedding <=> query_embedding
|
|
|
+ LIMIT result_limit;
|
|
|
+END;
|
|
|
+$$ LANGUAGE plpgsql;
|
|
|
+
|
|
|
+-- 显示创建结果
|
|
|
+SELECT 'RAG 表创建成功(兼容 SimpleModel)' AS result;
|