|
|
@@ -11,9 +11,9 @@ CREATE EXTENSION IF NOT EXISTS vector;
|
|
|
-- 1. 文本分块表 (text_chunks)
|
|
|
-- ============================================
|
|
|
CREATE TABLE IF NOT EXISTS text_chunks (
|
|
|
- id VARCHAR(32) PRIMARY KEY,
|
|
|
- document_id VARCHAR(32) NOT NULL,
|
|
|
- text_storage_id VARCHAR(32),
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
+ document_id VARCHAR(36) NOT NULL,
|
|
|
+ text_storage_id VARCHAR(36),
|
|
|
chunk_index INTEGER NOT NULL,
|
|
|
content TEXT NOT NULL,
|
|
|
token_count INTEGER,
|
|
|
@@ -35,8 +35,8 @@ CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_i
|
|
|
-- 2. 向量嵌入表 (vector_embeddings)
|
|
|
-- ============================================
|
|
|
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
|
- id VARCHAR(32) PRIMARY KEY,
|
|
|
- chunk_id VARCHAR(32) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
|
|
|
+ id VARCHAR(36) PRIMARY KEY,
|
|
|
+ chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
|
|
|
embedding vector(768), -- nomic-embed-text 维度为 768
|
|
|
model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
|
|
|
create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
|
@@ -58,12 +58,12 @@ CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings
|
|
|
-- 按文档ID检索相似文本块
|
|
|
CREATE OR REPLACE FUNCTION search_similar_chunks(
|
|
|
query_embedding vector(768),
|
|
|
- target_document_id VARCHAR(32),
|
|
|
+ target_document_id VARCHAR(36),
|
|
|
result_limit INTEGER DEFAULT 3
|
|
|
)
|
|
|
RETURNS TABLE (
|
|
|
- chunk_id VARCHAR(32),
|
|
|
- document_id VARCHAR(32),
|
|
|
+ chunk_id VARCHAR(36),
|
|
|
+ document_id VARCHAR(36),
|
|
|
content TEXT,
|
|
|
chunk_index INTEGER,
|
|
|
similarity FLOAT
|
|
|
@@ -90,8 +90,8 @@ CREATE OR REPLACE FUNCTION search_similar_chunks_global(
|
|
|
result_limit INTEGER DEFAULT 5
|
|
|
)
|
|
|
RETURNS TABLE (
|
|
|
- chunk_id VARCHAR(32),
|
|
|
- document_id VARCHAR(32),
|
|
|
+ chunk_id VARCHAR(36),
|
|
|
+ document_id VARCHAR(36),
|
|
|
content TEXT,
|
|
|
chunk_index INTEGER,
|
|
|
similarity FLOAT
|