rag_tables_compatible.sql 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. -- ============================================
  2. -- RAG 向量化存储相关表(兼容 SimpleModel 版本)
  3. -- 灵越智报 v2.0
  4. -- 字段名与 SimpleModel 基类兼容
  5. -- ============================================
  6. -- 启用 pgvector 扩展(需要先安装:apt install postgresql-15-pgvector)
  7. CREATE EXTENSION IF NOT EXISTS vector;
  8. -- ============================================
  9. -- 1. 文本分块表 (text_chunks)
  10. -- ============================================
  11. CREATE TABLE IF NOT EXISTS text_chunks (
  12. id VARCHAR(36) PRIMARY KEY,
  13. document_id VARCHAR(36) NOT NULL,
  14. text_storage_id VARCHAR(36),
  15. chunk_index INTEGER NOT NULL,
  16. content TEXT NOT NULL,
  17. token_count INTEGER,
  18. metadata JSONB DEFAULT '{}',
  19. create_by VARCHAR(36),
  20. create_by_name VARCHAR(100),
  21. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  22. update_by VARCHAR(36),
  23. update_by_name VARCHAR(100),
  24. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  25. );
  26. -- 索引
  27. CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
  28. CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
  29. CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_id, chunk_index);
  30. -- ============================================
  31. -- 2. 向量嵌入表 (vector_embeddings)
  32. -- ============================================
  33. CREATE TABLE IF NOT EXISTS vector_embeddings (
  34. id VARCHAR(36) PRIMARY KEY,
  35. chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
  36. embedding vector(768), -- nomic-embed-text 维度为 768
  37. model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
  38. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  39. );
  40. -- 普通索引
  41. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
  42. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
  43. -- HNSW 向量索引(用于高效相似度检索)
  44. -- 使用余弦距离操作符
  45. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings
  46. USING hnsw (embedding vector_cosine_ops);
  47. -- ============================================
  48. -- 3. 辅助函数:向量相似度检索
  49. -- ============================================
  50. -- 按文档ID检索相似文本块
  51. CREATE OR REPLACE FUNCTION search_similar_chunks(
  52. query_embedding vector(768),
  53. target_document_id VARCHAR(36),
  54. result_limit INTEGER DEFAULT 3
  55. )
  56. RETURNS TABLE (
  57. chunk_id VARCHAR(36),
  58. document_id VARCHAR(36),
  59. content TEXT,
  60. chunk_index INTEGER,
  61. similarity FLOAT
  62. ) AS $$
  63. BEGIN
  64. RETURN QUERY
  65. SELECT
  66. tc.id AS chunk_id,
  67. tc.document_id,
  68. tc.content,
  69. tc.chunk_index,
  70. 1 - (ve.embedding <=> query_embedding) AS similarity
  71. FROM text_chunks tc
  72. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  73. WHERE tc.document_id = target_document_id
  74. ORDER BY ve.embedding <=> query_embedding
  75. LIMIT result_limit;
  76. END;
  77. $$ LANGUAGE plpgsql;
  78. -- 全局检索相似文本块(不限制文档)
  79. CREATE OR REPLACE FUNCTION search_similar_chunks_global(
  80. query_embedding vector(768),
  81. result_limit INTEGER DEFAULT 5
  82. )
  83. RETURNS TABLE (
  84. chunk_id VARCHAR(36),
  85. document_id VARCHAR(36),
  86. content TEXT,
  87. chunk_index INTEGER,
  88. similarity FLOAT
  89. ) AS $$
  90. BEGIN
  91. RETURN QUERY
  92. SELECT
  93. tc.id AS chunk_id,
  94. tc.document_id,
  95. tc.content,
  96. tc.chunk_index,
  97. 1 - (ve.embedding <=> query_embedding) AS similarity
  98. FROM text_chunks tc
  99. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  100. ORDER BY ve.embedding <=> query_embedding
  101. LIMIT result_limit;
  102. END;
  103. $$ LANGUAGE plpgsql;
  104. -- 显示创建结果
  105. SELECT 'RAG 表创建成功(兼容 SimpleModel)' AS result;