init.sql 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. -- =====================================================
  2. -- 灵越智报 v2.0 数据库初始化脚本(单文件)
  3. -- PostgreSQL 15+
  4. -- 包含所有表结构,不含 graph_nodes / graph_relations(已移除)
  5. -- =====================================================
  6. CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
  7. CREATE EXTENSION IF NOT EXISTS vector;
  8. -- ============================================ 一、基础模块 ============================================
  9. CREATE TABLE IF NOT EXISTS users (
  10. id VARCHAR(36) PRIMARY KEY,
  11. username VARCHAR(50) UNIQUE NOT NULL,
  12. email VARCHAR(100) UNIQUE NOT NULL,
  13. password_hash VARCHAR(255) NOT NULL,
  14. avatar_url VARCHAR(500),
  15. role VARCHAR(20) NOT NULL DEFAULT 'user',
  16. preferences TEXT DEFAULT '{}',
  17. last_login_at TIMESTAMP,
  18. create_by VARCHAR(36),
  19. create_by_name VARCHAR(100),
  20. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  21. update_by VARCHAR(36),
  22. update_by_name VARCHAR(100),
  23. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  24. );
  25. CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
  26. CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
  27. CREATE TABLE IF NOT EXISTS documents (
  28. id VARCHAR(36) PRIMARY KEY,
  29. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  30. name VARCHAR(255) NOT NULL,
  31. type VARCHAR(20) NOT NULL,
  32. status VARCHAR(20) NOT NULL DEFAULT 'pending',
  33. file_size BIGINT,
  34. file_url VARCHAR(500),
  35. thumbnail_url VARCHAR(500),
  36. parsed_text TEXT,
  37. parse_status VARCHAR(20),
  38. parse_progress INTEGER DEFAULT 0,
  39. parse_error TEXT,
  40. parse_started_at TIMESTAMP,
  41. parse_completed_at TIMESTAMP,
  42. metadata JSONB DEFAULT '{}',
  43. structured_status VARCHAR(20) DEFAULT 'pending',
  44. image_count INT DEFAULT 0,
  45. table_count INT DEFAULT 0,
  46. element_count INT DEFAULT 0,
  47. create_by VARCHAR(36),
  48. create_by_name VARCHAR(100),
  49. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  50. update_by VARCHAR(36),
  51. update_by_name VARCHAR(100),
  52. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  53. );
  54. CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
  55. CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
  56. CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type);
  57. CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC);
  58. CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata);
  59. CREATE TABLE IF NOT EXISTS elements (
  60. id VARCHAR(36) PRIMARY KEY,
  61. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  62. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  63. type VARCHAR(20) NOT NULL,
  64. label VARCHAR(100) NOT NULL,
  65. value TEXT NOT NULL,
  66. position JSONB,
  67. confidence DECIMAL(3,2),
  68. extraction_method VARCHAR(20),
  69. metadata JSONB DEFAULT '{}',
  70. create_by VARCHAR(36),
  71. create_by_name VARCHAR(100),
  72. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  73. update_by VARCHAR(36),
  74. update_by_name VARCHAR(100),
  75. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  76. );
  77. CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id);
  78. CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id);
  79. CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type);
  80. CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position);
  81. CREATE TABLE IF NOT EXISTS annotations (
  82. id VARCHAR(36) PRIMARY KEY,
  83. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  84. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  85. text TEXT NOT NULL,
  86. position JSONB NOT NULL,
  87. type VARCHAR(20) NOT NULL,
  88. suggestion TEXT,
  89. ai_generated BOOLEAN DEFAULT FALSE,
  90. confidence DECIMAL(3,2),
  91. status VARCHAR(20) DEFAULT 'pending',
  92. create_by VARCHAR(36),
  93. create_by_name VARCHAR(100),
  94. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  95. update_by VARCHAR(36),
  96. update_by_name VARCHAR(100),
  97. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  98. );
  99. CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id);
  100. CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id);
  101. CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type);
  102. CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status);
  103. CREATE TABLE IF NOT EXISTS graphs (
  104. id VARCHAR(36) PRIMARY KEY,
  105. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  106. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  107. name VARCHAR(255) NOT NULL,
  108. nodes JSONB NOT NULL DEFAULT '[]',
  109. edges JSONB NOT NULL DEFAULT '[]',
  110. calculation_result JSONB,
  111. calculation_status VARCHAR(20),
  112. metadata JSONB DEFAULT '{}',
  113. create_by VARCHAR(36),
  114. create_by_name VARCHAR(100),
  115. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  116. update_by VARCHAR(36),
  117. update_by_name VARCHAR(100),
  118. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  119. );
  120. CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id);
  121. CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id);
  122. CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes);
  123. CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges);
  124. CREATE TABLE IF NOT EXISTS parse_tasks (
  125. id VARCHAR(36) PRIMARY KEY,
  126. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  127. status VARCHAR(20) NOT NULL DEFAULT 'pending',
  128. progress INTEGER DEFAULT 0,
  129. current_step VARCHAR(100),
  130. error_message TEXT,
  131. options JSONB DEFAULT '{}',
  132. started_at TIMESTAMP,
  133. completed_at TIMESTAMP,
  134. parse_status VARCHAR(20) DEFAULT 'pending',
  135. parse_progress INTEGER DEFAULT 0,
  136. rag_status VARCHAR(20) DEFAULT 'pending',
  137. rag_progress INTEGER DEFAULT 0,
  138. structured_status VARCHAR(20) DEFAULT 'pending',
  139. structured_progress INTEGER DEFAULT 0,
  140. structured_element_count INTEGER,
  141. structured_image_count INTEGER,
  142. structured_table_count INTEGER,
  143. ner_status VARCHAR(20) DEFAULT 'pending',
  144. ner_progress INTEGER DEFAULT 0,
  145. ner_task_id VARCHAR(64),
  146. ner_entity_count INTEGER,
  147. ner_relation_count INTEGER,
  148. ner_message VARCHAR(255),
  149. graph_status VARCHAR(20) DEFAULT 'pending',
  150. graph_progress INTEGER DEFAULT 0,
  151. create_by VARCHAR(36),
  152. create_by_name VARCHAR(100),
  153. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  154. update_by VARCHAR(36),
  155. update_by_name VARCHAR(100),
  156. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  157. );
  158. CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id);
  159. CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status);
  160. CREATE TABLE IF NOT EXISTS sessions (
  161. id VARCHAR(36) PRIMARY KEY,
  162. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  163. token_hash VARCHAR(255) NOT NULL UNIQUE,
  164. refresh_token_hash VARCHAR(255) NOT NULL UNIQUE,
  165. expires_at TIMESTAMP NOT NULL,
  166. ip_address VARCHAR(45),
  167. user_agent TEXT,
  168. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  169. last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  170. );
  171. CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id);
  172. CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash);
  173. CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at);
  174. -- ============================================ 二、补充模块 ============================================
  175. CREATE TABLE IF NOT EXISTS rules (
  176. id VARCHAR(36) PRIMARY KEY,
  177. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  178. name VARCHAR(255) NOT NULL,
  179. description TEXT,
  180. entry_node_id VARCHAR(36),
  181. exit_node_id VARCHAR(36),
  182. rule_chain JSONB NOT NULL DEFAULT '[]',
  183. status VARCHAR(20) DEFAULT 'active',
  184. metadata JSONB DEFAULT '{}',
  185. create_by VARCHAR(36),
  186. create_by_name VARCHAR(100),
  187. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  188. update_by VARCHAR(36),
  189. update_by_name VARCHAR(100),
  190. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  191. );
  192. CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id);
  193. CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
  194. CREATE TABLE IF NOT EXISTS data_sources (
  195. id VARCHAR(36) PRIMARY KEY,
  196. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  197. document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  198. name VARCHAR(255) NOT NULL,
  199. type VARCHAR(50) NOT NULL,
  200. source_type VARCHAR(50) NOT NULL DEFAULT 'manual',
  201. node_ids JSONB DEFAULT '{"refs": []}',
  202. config JSONB DEFAULT '{}',
  203. metadata JSONB DEFAULT '{}',
  204. value_type VARCHAR(20) DEFAULT 'text',
  205. aggregate_type VARCHAR(20) DEFAULT 'first',
  206. separator VARCHAR(50) DEFAULT '',
  207. create_by VARCHAR(36),
  208. create_by_name VARCHAR(100),
  209. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  210. update_by VARCHAR(36),
  211. update_by_name VARCHAR(100),
  212. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  213. );
  214. CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id);
  215. CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id);
  216. CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type);
  217. CREATE TABLE IF NOT EXISTS text_storage (
  218. id VARCHAR(36) PRIMARY KEY,
  219. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  220. file_path VARCHAR(500) NOT NULL,
  221. file_size BIGINT,
  222. checksum VARCHAR(64),
  223. create_by VARCHAR(36),
  224. create_by_name VARCHAR(100),
  225. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  226. update_by VARCHAR(36),
  227. update_by_name VARCHAR(100),
  228. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  229. );
  230. CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id);
  231. CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id);
  232. -- ============================================ 三、RAG 模块 ============================================
  233. CREATE TABLE IF NOT EXISTS text_chunks (
  234. id VARCHAR(36) PRIMARY KEY,
  235. document_id VARCHAR(36) NOT NULL,
  236. text_storage_id VARCHAR(36),
  237. chunk_index INTEGER NOT NULL,
  238. content TEXT NOT NULL,
  239. token_count INTEGER,
  240. metadata JSONB DEFAULT '{}',
  241. create_by VARCHAR(36),
  242. create_by_name VARCHAR(100),
  243. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  244. update_by VARCHAR(36),
  245. update_by_name VARCHAR(100),
  246. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  247. );
  248. CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
  249. CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
  250. CREATE UNIQUE INDEX IF NOT EXISTS idx_text_chunks_doc_index ON text_chunks(document_id, chunk_index);
  251. CREATE TABLE IF NOT EXISTS vector_embeddings (
  252. id VARCHAR(36) PRIMARY KEY,
  253. chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
  254. embedding vector(768),
  255. model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
  256. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  257. );
  258. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
  259. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
  260. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops);
  261. -- ============================================ 四、文档结构化模块 ============================================
  262. CREATE TABLE IF NOT EXISTS document_blocks (
  263. id VARCHAR(64) PRIMARY KEY,
  264. document_id VARCHAR(64) NOT NULL,
  265. parent_id VARCHAR(64),
  266. children JSONB,
  267. block_index INTEGER NOT NULL,
  268. block_type VARCHAR(32) NOT NULL,
  269. elements JSONB,
  270. style JSONB,
  271. metadata JSONB,
  272. create_by VARCHAR(64),
  273. create_by_name VARCHAR(128),
  274. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  275. update_by VARCHAR(64),
  276. update_by_name VARCHAR(128),
  277. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  278. );
  279. CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
  280. CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
  281. CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
  282. CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
  283. CREATE TABLE IF NOT EXISTS document_entities (
  284. id VARCHAR(64) PRIMARY KEY,
  285. document_id VARCHAR(64) NOT NULL,
  286. block_id VARCHAR(64),
  287. name VARCHAR(512) NOT NULL,
  288. entity_type VARCHAR(32) NOT NULL,
  289. value TEXT,
  290. block_char_start INTEGER,
  291. block_char_end INTEGER,
  292. global_char_start INTEGER,
  293. global_char_end INTEGER,
  294. anchor_before VARCHAR(100),
  295. anchor_after VARCHAR(100),
  296. source VARCHAR(16) DEFAULT 'auto',
  297. confidence DECIMAL(5,4),
  298. confirmed BOOLEAN DEFAULT FALSE,
  299. metadata JSONB,
  300. create_by VARCHAR(64),
  301. create_by_name VARCHAR(128),
  302. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  303. update_by VARCHAR(64),
  304. update_by_name VARCHAR(128),
  305. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  306. );
  307. CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
  308. CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
  309. CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
  310. CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
  311. CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
  312. CREATE TABLE IF NOT EXISTS document_elements (
  313. id VARCHAR(64) PRIMARY KEY,
  314. document_id VARCHAR(64) NOT NULL,
  315. element_index INT NOT NULL,
  316. element_type VARCHAR(32) NOT NULL,
  317. content TEXT,
  318. style JSONB,
  319. runs JSONB,
  320. image_url VARCHAR(500),
  321. image_path VARCHAR(500),
  322. image_alt VARCHAR(255),
  323. image_width INT,
  324. image_height INT,
  325. image_format VARCHAR(16),
  326. table_index INT,
  327. table_data JSONB,
  328. table_row_count INT,
  329. table_col_count INT,
  330. table_text TEXT,
  331. create_by VARCHAR(64),
  332. create_by_name VARCHAR(128),
  333. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  334. update_by VARCHAR(64),
  335. update_by_name VARCHAR(128),
  336. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  337. );
  338. CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id);
  339. CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type);
  340. CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index);
  341. -- ============================================ 五、模板系统模块 ============================================
  342. CREATE TABLE IF NOT EXISTS templates (
  343. id VARCHAR(36) PRIMARY KEY,
  344. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  345. name VARCHAR(255) NOT NULL,
  346. description TEXT,
  347. base_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  348. status VARCHAR(32) DEFAULT 'draft',
  349. config JSONB DEFAULT '{}',
  350. is_public BOOLEAN DEFAULT FALSE,
  351. use_count INT DEFAULT 0,
  352. rating DECIMAL(2,1) DEFAULT 0.0,
  353. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  354. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  355. create_by VARCHAR(36),
  356. create_by_name VARCHAR(100),
  357. update_by VARCHAR(36),
  358. update_by_name VARCHAR(100)
  359. );
  360. CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
  361. CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
  362. CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
  363. CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id);
  364. CREATE TABLE IF NOT EXISTS source_files (
  365. id VARCHAR(36) PRIMARY KEY,
  366. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  367. alias VARCHAR(100) NOT NULL,
  368. description TEXT,
  369. file_types JSONB DEFAULT '["pdf", "docx"]',
  370. required BOOLEAN DEFAULT TRUE,
  371. example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  372. display_order INT DEFAULT 0,
  373. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  374. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  375. CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
  376. );
  377. CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
  378. CREATE TABLE IF NOT EXISTS variables (
  379. id VARCHAR(36) PRIMARY KEY,
  380. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  381. name VARCHAR(100) NOT NULL,
  382. display_name VARCHAR(200) NOT NULL,
  383. variable_group VARCHAR(100),
  384. category VARCHAR(32),
  385. location JSONB NOT NULL,
  386. example_value TEXT,
  387. value_type VARCHAR(32) DEFAULT 'text',
  388. source_file_alias VARCHAR(100),
  389. source_type VARCHAR(32) NOT NULL,
  390. source_config JSONB,
  391. extract_type VARCHAR(32),
  392. extract_config JSONB,
  393. display_order INT DEFAULT 0,
  394. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  395. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  396. CONSTRAINT uk_variables_name UNIQUE (template_id, name)
  397. );
  398. CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
  399. CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
  400. CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type);
  401. CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category);
  402. CREATE TABLE IF NOT EXISTS generations (
  403. id VARCHAR(36) PRIMARY KEY,
  404. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT,
  405. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  406. name VARCHAR(255),
  407. source_file_map JSONB NOT NULL,
  408. variable_values JSONB,
  409. output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  410. output_file_path VARCHAR(500),
  411. status VARCHAR(32) DEFAULT 'pending',
  412. error_message TEXT,
  413. progress INT DEFAULT 0,
  414. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  415. completed_at TIMESTAMP
  416. );
  417. CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
  418. CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
  419. CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
  420. CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC);
  421. -- ============================================ 触发器 ============================================
  422. CREATE OR REPLACE FUNCTION update_update_time_column()
  423. RETURNS TRIGGER AS $$
  424. BEGIN
  425. NEW.update_time = CURRENT_TIMESTAMP;
  426. RETURN NEW;
  427. END;
  428. $$ LANGUAGE plpgsql;
  429. DO $$
  430. DECLARE
  431. tbl TEXT;
  432. tables TEXT[] := ARRAY[
  433. 'users', 'documents', 'elements', 'annotations', 'graphs',
  434. 'parse_tasks', 'sessions', 'rules', 'data_sources', 'text_storage',
  435. 'text_chunks', 'document_blocks', 'document_entities', 'document_elements',
  436. 'templates', 'source_files', 'variables', 'generations'
  437. ];
  438. BEGIN
  439. FOREACH tbl IN ARRAY tables LOOP
  440. EXECUTE format('DROP TRIGGER IF EXISTS trigger_%s_update_time ON %I', tbl, tbl);
  441. EXECUTE format('CREATE TRIGGER trigger_%s_update_time BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_update_time_column()', tbl, tbl);
  442. END LOOP;
  443. END $$;
  444. -- ============================================ 向量检索函数 ============================================
  445. CREATE OR REPLACE FUNCTION search_similar_chunks(
  446. query_embedding vector(768),
  447. target_document_id VARCHAR(36),
  448. result_limit INTEGER DEFAULT 3
  449. )
  450. RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
  451. BEGIN
  452. RETURN QUERY
  453. SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
  454. 1 - (ve.embedding <=> query_embedding) AS similarity
  455. FROM text_chunks tc
  456. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  457. WHERE tc.document_id = target_document_id
  458. ORDER BY ve.embedding <=> query_embedding
  459. LIMIT result_limit;
  460. END;
  461. $$ LANGUAGE plpgsql;
  462. CREATE OR REPLACE FUNCTION search_similar_chunks_global(
  463. query_embedding vector(768),
  464. result_limit INTEGER DEFAULT 5
  465. )
  466. RETURNS TABLE (chunk_id VARCHAR(36), document_id VARCHAR(36), content TEXT, chunk_index INTEGER, similarity FLOAT) AS $$
  467. BEGIN
  468. RETURN QUERY
  469. SELECT tc.id, tc.document_id, tc.content, tc.chunk_index,
  470. 1 - (ve.embedding <=> query_embedding) AS similarity
  471. FROM text_chunks tc
  472. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  473. ORDER BY ve.embedding <=> query_embedding
  474. LIMIT result_limit;
  475. END;
  476. $$ LANGUAGE plpgsql;
  477. SELECT '灵越智报 v2.0 数据库初始化完成' AS result;