all_tables.sql 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. -- =====================================================
  2. -- 灵越智报 v2.0 数据库完整表结构
  3. -- PostgreSQL 15+
  4. --
  5. -- 生成时间: 2026-01-23
  6. --
  7. -- 包含所有表(按模块分组):
  8. -- 一、基础模块
  9. -- 1. users - 用户表
  10. -- 2. documents - 文档表
  11. -- 3. elements - 要素表
  12. -- 4. annotations - 批注表
  13. -- 5. graphs - 关系网络表
  14. -- 6. parse_tasks - 解析任务表
  15. -- 7. sessions - 会话表
  16. --
  17. -- 二、图谱模块
  18. -- 8. graph_nodes - 图节点表
  19. -- 9. graph_relations - 图关系表
  20. --
  21. -- 三、补充模块
  22. -- 10. rules - 规则表
  23. -- 11. data_sources - 数据源表
  24. -- 12. text_storage - 文本存储表
  25. --
  26. -- 四、RAG 模块
  27. -- 13. text_chunks - 文本分块表
  28. -- 14. vector_embeddings - 向量嵌入表
  29. --
  30. -- 五、文档结构化模块
  31. -- 15. document_blocks - 文档块表
  32. -- 16. document_entities - 文档实体表
  33. -- 17. document_elements - 文档元素表
  34. --
  35. -- 六、模板系统模块(v2.0)
  36. -- 18. templates - 报告模板表
  37. -- 19. source_files - 来源文件定义表
  38. -- 20. variables - 模板变量表
  39. -- 21. generations - 生成任务表
  40. -- =====================================================
  41. -- 启用扩展
  42. CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
  43. CREATE EXTENSION IF NOT EXISTS vector; -- pgvector 用于向量检索
  44. -- ============================================
  45. -- 一、基础模块
  46. -- ============================================
  47. -- 1. 用户表 (users)
  48. CREATE TABLE IF NOT EXISTS users (
  49. id VARCHAR(36) PRIMARY KEY,
  50. username VARCHAR(50) UNIQUE NOT NULL,
  51. email VARCHAR(100) UNIQUE NOT NULL,
  52. password_hash VARCHAR(255) NOT NULL,
  53. avatar_url VARCHAR(500),
  54. role VARCHAR(20) NOT NULL DEFAULT 'user', -- admin/user/guest
  55. preferences TEXT DEFAULT '{}', -- JSON字符串格式存储偏好设置
  56. last_login_at TIMESTAMP,
  57. create_by VARCHAR(36),
  58. create_by_name VARCHAR(100),
  59. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  60. update_by VARCHAR(36),
  61. update_by_name VARCHAR(100),
  62. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  63. );
  64. CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
  65. CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
  66. COMMENT ON TABLE users IS '用户表';
  67. COMMENT ON COLUMN users.role IS '角色: admin-管理员, user-普通用户, guest-访客';
  68. -- 2. 文档表 (documents)
  69. CREATE TABLE IF NOT EXISTS documents (
  70. id VARCHAR(36) PRIMARY KEY,
  71. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  72. name VARCHAR(255) NOT NULL,
  73. type VARCHAR(20) NOT NULL, -- pdf/word/image/markdown/other
  74. status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/uploading/parsing/completed/failed
  75. file_size BIGINT,
  76. file_url VARCHAR(500),
  77. thumbnail_url VARCHAR(500),
  78. parsed_text TEXT,
  79. parse_status VARCHAR(20), -- pending/parsing/completed/failed
  80. parse_progress INTEGER DEFAULT 0, -- 0-100
  81. parse_error TEXT,
  82. parse_started_at TIMESTAMP,
  83. parse_completed_at TIMESTAMP,
  84. metadata JSONB DEFAULT '{}', -- pageCount, ocrConfidence, layoutStructure等
  85. -- 结构化解析增强字段
  86. structured_status VARCHAR(20) DEFAULT 'pending',
  87. image_count INT DEFAULT 0,
  88. table_count INT DEFAULT 0,
  89. element_count INT DEFAULT 0,
  90. create_by VARCHAR(36),
  91. create_by_name VARCHAR(100),
  92. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  93. update_by VARCHAR(36),
  94. update_by_name VARCHAR(100),
  95. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  96. );
  97. CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id);
  98. CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
  99. CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(type);
  100. CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(create_time DESC);
  101. CREATE INDEX IF NOT EXISTS idx_documents_metadata ON documents USING GIN(metadata);
  102. COMMENT ON TABLE documents IS '文档表';
  103. COMMENT ON COLUMN documents.type IS '文档类型: pdf/word/image/markdown/other';
  104. COMMENT ON COLUMN documents.status IS '状态: pending/uploading/parsing/completed/failed';
  105. COMMENT ON COLUMN documents.structured_status IS '结构化解析状态: pending/completed/failed';
  106. -- 3. 要素表 (elements)
  107. CREATE TABLE IF NOT EXISTS elements (
  108. id VARCHAR(36) PRIMARY KEY,
  109. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  110. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  111. type VARCHAR(20) NOT NULL, -- amount/company/person/location/date/other
  112. label VARCHAR(100) NOT NULL,
  113. value TEXT NOT NULL,
  114. position JSONB, -- {page, x, y, width, height}
  115. confidence DECIMAL(3,2), -- 0.00-1.00
  116. extraction_method VARCHAR(20), -- ai/regex/rule/manual
  117. graph_node_id VARCHAR(36), -- 关联的图节点ID
  118. metadata JSONB DEFAULT '{}',
  119. create_by VARCHAR(36),
  120. create_by_name VARCHAR(100),
  121. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  122. update_by VARCHAR(36),
  123. update_by_name VARCHAR(100),
  124. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  125. );
  126. CREATE INDEX IF NOT EXISTS idx_elements_document_id ON elements(document_id);
  127. CREATE INDEX IF NOT EXISTS idx_elements_user_id ON elements(user_id);
  128. CREATE INDEX IF NOT EXISTS idx_elements_type ON elements(type);
  129. CREATE INDEX IF NOT EXISTS idx_elements_graph_node_id ON elements(graph_node_id);
  130. CREATE INDEX IF NOT EXISTS idx_elements_position ON elements USING GIN(position);
  131. COMMENT ON TABLE elements IS '要素表 - 文档中提取的结构化要素';
  132. COMMENT ON COLUMN elements.type IS '要素类型: amount/company/person/location/date/other';
  133. -- 4. 批注表 (annotations)
  134. CREATE TABLE IF NOT EXISTS annotations (
  135. id VARCHAR(36) PRIMARY KEY,
  136. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  137. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  138. text TEXT NOT NULL,
  139. position JSONB NOT NULL, -- {page, start: {x, y}, end: {x, y}}
  140. type VARCHAR(20) NOT NULL, -- highlight/strikethrough/suggestion
  141. suggestion TEXT,
  142. ai_generated BOOLEAN DEFAULT FALSE,
  143. confidence DECIMAL(3,2),
  144. status VARCHAR(20) DEFAULT 'pending', -- pending/accepted/rejected
  145. create_by VARCHAR(36),
  146. create_by_name VARCHAR(100),
  147. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  148. update_by VARCHAR(36),
  149. update_by_name VARCHAR(100),
  150. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  151. );
  152. CREATE INDEX IF NOT EXISTS idx_annotations_document_id ON annotations(document_id);
  153. CREATE INDEX IF NOT EXISTS idx_annotations_user_id ON annotations(user_id);
  154. CREATE INDEX IF NOT EXISTS idx_annotations_type ON annotations(type);
  155. CREATE INDEX IF NOT EXISTS idx_annotations_status ON annotations(status);
  156. COMMENT ON TABLE annotations IS '批注表';
  157. COMMENT ON COLUMN annotations.type IS '批注类型: highlight/strikethrough/suggestion';
  158. -- 5. 关系网络表 (graphs)
  159. CREATE TABLE IF NOT EXISTS graphs (
  160. id VARCHAR(36) PRIMARY KEY,
  161. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  162. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  163. name VARCHAR(255) NOT NULL,
  164. nodes JSONB NOT NULL DEFAULT '[]', -- GraphNode数组
  165. edges JSONB NOT NULL DEFAULT '[]', -- GraphEdge数组
  166. calculation_result JSONB,
  167. calculation_status VARCHAR(20), -- pending/completed/failed
  168. metadata JSONB DEFAULT '{}',
  169. create_by VARCHAR(36),
  170. create_by_name VARCHAR(100),
  171. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  172. update_by VARCHAR(36),
  173. update_by_name VARCHAR(100),
  174. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  175. );
  176. CREATE INDEX IF NOT EXISTS idx_graphs_document_id ON graphs(document_id);
  177. CREATE INDEX IF NOT EXISTS idx_graphs_user_id ON graphs(user_id);
  178. CREATE INDEX IF NOT EXISTS idx_graphs_nodes ON graphs USING GIN(nodes);
  179. CREATE INDEX IF NOT EXISTS idx_graphs_edges ON graphs USING GIN(edges);
  180. COMMENT ON TABLE graphs IS '关系网络表';
  181. -- 6. 解析任务表 (parse_tasks)
  182. CREATE TABLE IF NOT EXISTS parse_tasks (
  183. id VARCHAR(36) PRIMARY KEY,
  184. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  185. status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending/processing/completed/failed
  186. progress INTEGER DEFAULT 0, -- 0-100
  187. current_step VARCHAR(100),
  188. error_message TEXT,
  189. options JSONB DEFAULT '{}', -- 解析选项
  190. started_at TIMESTAMP,
  191. completed_at TIMESTAMP,
  192. -- 多阶段进度跟踪
  193. parse_status VARCHAR(20) DEFAULT 'pending',
  194. parse_progress INTEGER DEFAULT 0,
  195. rag_status VARCHAR(20) DEFAULT 'pending',
  196. rag_progress INTEGER DEFAULT 0,
  197. structured_status VARCHAR(20) DEFAULT 'pending',
  198. structured_progress INTEGER DEFAULT 0,
  199. structured_element_count INTEGER,
  200. structured_image_count INTEGER,
  201. structured_table_count INTEGER,
  202. ner_status VARCHAR(20) DEFAULT 'pending',
  203. ner_progress INTEGER DEFAULT 0,
  204. ner_task_id VARCHAR(64),
  205. ner_entity_count INTEGER,
  206. ner_relation_count INTEGER,
  207. graph_status VARCHAR(20) DEFAULT 'pending',
  208. graph_progress INTEGER DEFAULT 0,
  209. create_by VARCHAR(36),
  210. create_by_name VARCHAR(100),
  211. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  212. update_by VARCHAR(36),
  213. update_by_name VARCHAR(100),
  214. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  215. );
  216. CREATE INDEX IF NOT EXISTS idx_parse_tasks_document_id ON parse_tasks(document_id);
  217. CREATE INDEX IF NOT EXISTS idx_parse_tasks_status ON parse_tasks(status);
  218. CREATE INDEX IF NOT EXISTS idx_parse_tasks_ner_task_id ON parse_tasks(ner_task_id);
  219. COMMENT ON TABLE parse_tasks IS '解析任务表';
  220. COMMENT ON COLUMN parse_tasks.parse_status IS '解析阶段状态';
  221. COMMENT ON COLUMN parse_tasks.rag_status IS 'RAG向量化阶段状态';
  222. COMMENT ON COLUMN parse_tasks.structured_status IS '结构化解析阶段状态';
  223. COMMENT ON COLUMN parse_tasks.ner_status IS 'NER实体提取阶段状态';
  224. COMMENT ON COLUMN parse_tasks.graph_status IS '图构建阶段状态';
  225. -- 7. 会话表 (sessions)
  226. CREATE TABLE IF NOT EXISTS sessions (
  227. id VARCHAR(36) PRIMARY KEY,
  228. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  229. token_hash VARCHAR(255) NOT NULL UNIQUE,
  230. refresh_token_hash VARCHAR(255) NOT NULL UNIQUE,
  231. expires_at TIMESTAMP NOT NULL,
  232. ip_address VARCHAR(45),
  233. user_agent TEXT,
  234. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  235. last_used_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  236. );
  237. CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id);
  238. CREATE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions(token_hash);
  239. CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at);
  240. COMMENT ON TABLE sessions IS '会话表';
  241. -- ============================================
  242. -- 二、图谱模块
  243. -- ============================================
  244. -- 8. 图节点表 (graph_nodes)
  245. CREATE TABLE IF NOT EXISTS graph_nodes (
  246. id VARCHAR(36) PRIMARY KEY,
  247. document_id VARCHAR(36) NOT NULL,
  248. user_id VARCHAR(36), -- 可为空,自动提取时可能没有用户上下文
  249. name VARCHAR(255) NOT NULL,
  250. type VARCHAR(50) NOT NULL, -- text/table/image/number/date/ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等
  251. value TEXT,
  252. position JSONB, -- {charStart, charEnd, line}
  253. parent_id VARCHAR(36),
  254. level INTEGER DEFAULT 0,
  255. metadata JSONB DEFAULT '{}',
  256. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  257. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  258. );
  259. CREATE INDEX IF NOT EXISTS idx_graph_nodes_document_id ON graph_nodes(document_id);
  260. CREATE INDEX IF NOT EXISTS idx_graph_nodes_user_id ON graph_nodes(user_id);
  261. CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(type);
  262. CREATE INDEX IF NOT EXISTS idx_graph_nodes_parent_id ON graph_nodes(parent_id);
  263. CREATE INDEX IF NOT EXISTS idx_graph_nodes_position ON graph_nodes USING GIN(position);
  264. COMMENT ON TABLE graph_nodes IS '图节点表 - NER实体识别结果';
  265. COMMENT ON COLUMN graph_nodes.type IS '节点类型: ORG/PERSON/LOC/TIME/DEVICE/PROJECT/METHOD等';
  266. -- 9. 图关系表 (graph_relations)
  267. CREATE TABLE IF NOT EXISTS graph_relations (
  268. id VARCHAR(36) PRIMARY KEY,
  269. from_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE,
  270. to_node_id VARCHAR(36) NOT NULL REFERENCES graph_nodes(id) ON DELETE CASCADE,
  271. relation_type VARCHAR(50) NOT NULL, -- BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等
  272. action_type VARCHAR(50),
  273. action_config JSONB,
  274. order_index INTEGER DEFAULT 0,
  275. condition_expr TEXT,
  276. metadata JSONB DEFAULT '{}',
  277. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  278. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  279. );
  280. CREATE INDEX IF NOT EXISTS idx_graph_relations_from_node ON graph_relations(from_node_id);
  281. CREATE INDEX IF NOT EXISTS idx_graph_relations_to_node ON graph_relations(to_node_id);
  282. CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type);
  283. COMMENT ON TABLE graph_relations IS '图关系表 - 实体之间的关系';
  284. COMMENT ON COLUMN graph_relations.relation_type IS '关系类型: BELONGS_TO/USES/LOCATED_IN/EXECUTES/MONITORS等';
  285. -- ============================================
  286. -- 三、补充模块
  287. -- ============================================
  288. -- 10. 规则表 (rules)
  289. CREATE TABLE IF NOT EXISTS rules (
  290. id VARCHAR(36) PRIMARY KEY,
  291. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  292. name VARCHAR(255) NOT NULL,
  293. description TEXT,
  294. entry_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL,
  295. exit_node_id VARCHAR(36) REFERENCES graph_nodes(id) ON DELETE SET NULL,
  296. rule_chain JSONB NOT NULL DEFAULT '[]', -- 规则链(节点ID序列)
  297. status VARCHAR(20) DEFAULT 'active', -- active/inactive
  298. metadata JSONB DEFAULT '{}',
  299. create_by VARCHAR(36),
  300. create_by_name VARCHAR(100),
  301. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  302. update_by VARCHAR(36),
  303. update_by_name VARCHAR(100),
  304. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  305. );
  306. CREATE INDEX IF NOT EXISTS idx_rules_user_id ON rules(user_id);
  307. CREATE INDEX IF NOT EXISTS idx_rules_entry_node ON rules(entry_node_id);
  308. CREATE INDEX IF NOT EXISTS idx_rules_exit_node ON rules(exit_node_id);
  309. CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
  310. COMMENT ON TABLE rules IS '规则表';
  311. -- 11. 数据源表 (data_sources)
  312. CREATE TABLE IF NOT EXISTS data_sources (
  313. id VARCHAR(36) PRIMARY KEY,
  314. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  315. document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  316. name VARCHAR(255) NOT NULL,
  317. type VARCHAR(50) NOT NULL, -- table/text/image
  318. source_type VARCHAR(50) NOT NULL DEFAULT 'manual', -- file/manual/rule_result
  319. node_ids JSONB DEFAULT '{"refs": []}', -- 节点引用列表
  320. config JSONB DEFAULT '{}', -- 数据源配置
  321. metadata JSONB DEFAULT '{}',
  322. value_type VARCHAR(20) DEFAULT 'text', -- text/image/table/mixed
  323. aggregate_type VARCHAR(20) DEFAULT 'first', -- first/last/concat/sum/avg/list
  324. separator VARCHAR(50) DEFAULT '', -- 聚合分隔符
  325. create_by VARCHAR(36),
  326. create_by_name VARCHAR(100),
  327. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  328. update_by VARCHAR(36),
  329. update_by_name VARCHAR(100),
  330. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  331. );
  332. CREATE INDEX IF NOT EXISTS idx_data_sources_user_id ON data_sources(user_id);
  333. CREATE INDEX IF NOT EXISTS idx_data_sources_document_id ON data_sources(document_id);
  334. CREATE INDEX IF NOT EXISTS idx_data_sources_type ON data_sources(type);
  335. COMMENT ON TABLE data_sources IS '数据源表';
  336. COMMENT ON COLUMN data_sources.value_type IS '值类型: text/image/table/mixed';
  337. COMMENT ON COLUMN data_sources.aggregate_type IS '聚合方式: first/last/concat/sum/avg/list';
  338. -- 12. 文本存储表 (text_storage)
  339. CREATE TABLE IF NOT EXISTS text_storage (
  340. id VARCHAR(36) PRIMARY KEY,
  341. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  342. file_path VARCHAR(500) NOT NULL, -- TXT文件路径
  343. file_size BIGINT,
  344. checksum VARCHAR(64), -- 文件校验和
  345. create_by VARCHAR(36),
  346. create_by_name VARCHAR(100),
  347. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  348. update_by VARCHAR(36),
  349. update_by_name VARCHAR(100),
  350. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  351. );
  352. CREATE INDEX IF NOT EXISTS idx_text_storage_document_id ON text_storage(document_id);
  353. CREATE UNIQUE INDEX IF NOT EXISTS idx_text_storage_document_unique ON text_storage(document_id);
  354. COMMENT ON TABLE text_storage IS '文本存储表 - 存储文档解析后的TXT文件路径';
  355. -- ============================================
  356. -- 四、RAG 模块
  357. -- ============================================
  358. -- 13. 文本分块表 (text_chunks)
  359. CREATE TABLE IF NOT EXISTS text_chunks (
  360. id VARCHAR(36) PRIMARY KEY,
  361. document_id VARCHAR(36) NOT NULL,
  362. text_storage_id VARCHAR(36),
  363. chunk_index INTEGER NOT NULL,
  364. content TEXT NOT NULL,
  365. token_count INTEGER,
  366. metadata JSONB DEFAULT '{}',
  367. create_by VARCHAR(36),
  368. create_by_name VARCHAR(100),
  369. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
  370. update_by VARCHAR(36),
  371. update_by_name VARCHAR(100),
  372. update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  373. );
  374. CREATE INDEX IF NOT EXISTS idx_text_chunks_document_id ON text_chunks(document_id);
  375. CREATE INDEX IF NOT EXISTS idx_text_chunks_text_storage_id ON text_chunks(text_storage_id);
  376. CREATE INDEX IF NOT EXISTS idx_text_chunks_chunk_index ON text_chunks(document_id, chunk_index);
  377. COMMENT ON TABLE text_chunks IS '文本分块表 - RAG分块存储';
  378. -- 14. 向量嵌入表 (vector_embeddings)
  379. CREATE TABLE IF NOT EXISTS vector_embeddings (
  380. id VARCHAR(36) PRIMARY KEY,
  381. chunk_id VARCHAR(36) NOT NULL REFERENCES text_chunks(id) ON DELETE CASCADE,
  382. embedding vector(768), -- nomic-embed-text 维度为 768
  383. model_name VARCHAR(100) DEFAULT 'nomic-embed-text',
  384. create_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
  385. );
  386. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_chunk_id ON vector_embeddings(chunk_id);
  387. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(model_name);
  388. CREATE INDEX IF NOT EXISTS idx_vector_embeddings_hnsw ON vector_embeddings USING hnsw (embedding vector_cosine_ops);
  389. COMMENT ON TABLE vector_embeddings IS '向量嵌入表 - RAG向量检索';
  390. COMMENT ON COLUMN vector_embeddings.embedding IS '768维向量(nomic-embed-text)';
  391. -- ============================================
  392. -- 五、文档结构化模块
  393. -- ============================================
  394. -- 15. 文档块表 (document_blocks)
  395. CREATE TABLE IF NOT EXISTS document_blocks (
  396. id VARCHAR(64) PRIMARY KEY,
  397. document_id VARCHAR(64) NOT NULL,
  398. parent_id VARCHAR(64),
  399. children JSONB,
  400. block_index INTEGER NOT NULL,
  401. block_type VARCHAR(32) NOT NULL, -- heading1/heading2/paragraph/table/list/image/quote
  402. elements JSONB, -- TextElement数组
  403. style JSONB,
  404. metadata JSONB,
  405. create_by VARCHAR(64),
  406. create_by_name VARCHAR(128),
  407. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  408. update_by VARCHAR(64),
  409. update_by_name VARCHAR(128),
  410. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  411. );
  412. CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
  413. CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
  414. CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
  415. CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
  416. COMMENT ON TABLE document_blocks IS '文档块表 - 参考飞书Block设计';
  417. COMMENT ON COLUMN document_blocks.block_type IS '块类型: heading1/heading2/paragraph/table/list/image/quote';
  418. COMMENT ON COLUMN document_blocks.elements IS 'TextElement数组,实体作为元素嵌入';
  419. -- 16. 文档实体表 (document_entities)
  420. CREATE TABLE IF NOT EXISTS document_entities (
  421. id VARCHAR(64) PRIMARY KEY,
  422. document_id VARCHAR(64) NOT NULL,
  423. block_id VARCHAR(64),
  424. name VARCHAR(512) NOT NULL,
  425. entity_type VARCHAR(32) NOT NULL, -- PERSON/ORG/LOC/DATE/NUMBER/MONEY/CONCEPT/DATA/DEVICE/TERM
  426. value TEXT,
  427. block_char_start INTEGER,
  428. block_char_end INTEGER,
  429. global_char_start INTEGER,
  430. global_char_end INTEGER,
  431. anchor_before VARCHAR(100),
  432. anchor_after VARCHAR(100),
  433. source VARCHAR(16) DEFAULT 'auto', -- auto/manual
  434. confidence DECIMAL(5,4),
  435. confirmed BOOLEAN DEFAULT FALSE,
  436. graph_node_id VARCHAR(64),
  437. metadata JSONB,
  438. create_by VARCHAR(64),
  439. create_by_name VARCHAR(128),
  440. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  441. update_by VARCHAR(64),
  442. update_by_name VARCHAR(128),
  443. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  444. );
  445. CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
  446. CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
  447. CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
  448. CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
  449. CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
  450. CREATE INDEX IF NOT EXISTS idx_document_entities_graph_node ON document_entities(graph_node_id);
  451. COMMENT ON TABLE document_entities IS '文档实体表 - 文档中标记的实体/要素';
  452. COMMENT ON COLUMN document_entities.entity_type IS '实体类型: PERSON/ORG/LOC/DATE/NUMBER/MONEY等';
  453. COMMENT ON COLUMN document_entities.source IS '来源: auto=自动识别, manual=手动标注';
  454. -- 17. 文档元素表 (document_elements)
  455. CREATE TABLE IF NOT EXISTS document_elements (
  456. id VARCHAR(64) PRIMARY KEY,
  457. document_id VARCHAR(64) NOT NULL,
  458. element_index INT NOT NULL, -- 元素在文档中的顺序
  459. element_type VARCHAR(32) NOT NULL, -- paragraph/heading/heading1-9/list_item/image/table/title/toc
  460. content TEXT, -- 文本内容(文本类型)
  461. style JSONB, -- 样式信息
  462. -- 图片相关
  463. image_url VARCHAR(500),
  464. image_path VARCHAR(500),
  465. image_alt VARCHAR(255),
  466. image_width INT,
  467. image_height INT,
  468. image_format VARCHAR(16),
  469. -- 表格相关
  470. table_index INT,
  471. table_data JSONB, -- [[{row,col,text,colSpan},...],...]
  472. table_row_count INT,
  473. table_col_count INT,
  474. table_text TEXT, -- 表格文本(用于搜索)
  475. create_by VARCHAR(64),
  476. create_by_name VARCHAR(128),
  477. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  478. update_by VARCHAR(64),
  479. update_by_name VARCHAR(128),
  480. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  481. );
  482. CREATE INDEX IF NOT EXISTS idx_document_elements_document_id ON document_elements(document_id);
  483. CREATE INDEX IF NOT EXISTS idx_document_elements_type ON document_elements(element_type);
  484. CREATE INDEX IF NOT EXISTS idx_document_elements_order ON document_elements(document_id, element_index);
  485. COMMENT ON TABLE document_elements IS '文档元素表 - Word/PDF结构化提取';
  486. COMMENT ON COLUMN document_elements.element_type IS '元素类型: paragraph/heading/image/table等';
  487. COMMENT ON COLUMN document_elements.style IS '样式信息JSON: {alignment, fontSize, bold, color等}';
  488. COMMENT ON COLUMN document_elements.table_data IS '表格数据JSON: [[{row,col,text,colSpan},...],...]';
  489. -- ============================================
  490. -- 六、模板系统模块(v2.0)
  491. -- ============================================
  492. -- 18. 报告模板表 (templates)
  493. CREATE TABLE IF NOT EXISTS templates (
  494. id VARCHAR(36) PRIMARY KEY,
  495. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  496. name VARCHAR(255) NOT NULL,
  497. description TEXT,
  498. base_document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE RESTRICT,
  499. status VARCHAR(32) DEFAULT 'draft', -- draft/published/archived
  500. config JSONB DEFAULT '{}',
  501. is_public BOOLEAN DEFAULT FALSE,
  502. use_count INT DEFAULT 0,
  503. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  504. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  505. create_by VARCHAR(36),
  506. create_by_name VARCHAR(100),
  507. update_by VARCHAR(36),
  508. update_by_name VARCHAR(100)
  509. );
  510. CREATE INDEX IF NOT EXISTS idx_templates_user_id ON templates(user_id);
  511. CREATE INDEX IF NOT EXISTS idx_templates_status ON templates(status);
  512. CREATE INDEX IF NOT EXISTS idx_templates_is_public ON templates(is_public);
  513. CREATE INDEX IF NOT EXISTS idx_templates_base_document ON templates(base_document_id);
  514. COMMENT ON TABLE templates IS '报告模板表 - 示例文档驱动的模板';
  515. COMMENT ON COLUMN templates.base_document_id IS '示例报告文档ID';
  516. COMMENT ON COLUMN templates.status IS 'draft-草稿, published-已发布, archived-已归档';
  517. COMMENT ON COLUMN templates.is_public IS '是否公开给其他用户使用';
  518. -- 19. 来源文件定义表 (source_files)
  519. CREATE TABLE IF NOT EXISTS source_files (
  520. id VARCHAR(36) PRIMARY KEY,
  521. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  522. alias VARCHAR(100) NOT NULL, -- 文件别名,如"可研批复"
  523. description TEXT,
  524. file_types JSONB DEFAULT '["pdf", "docx"]', -- 允许的文件类型
  525. required BOOLEAN DEFAULT TRUE,
  526. example_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  527. display_order INT DEFAULT 0,
  528. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  529. CONSTRAINT uk_source_files_alias UNIQUE (template_id, alias)
  530. );
  531. CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
  532. COMMENT ON TABLE source_files IS '来源文件定义表';
  533. COMMENT ON COLUMN source_files.alias IS '用户自定义的别名,用于引用';
  534. COMMENT ON COLUMN source_files.file_types IS '允许上传的文件类型列表';
  535. COMMENT ON COLUMN source_files.example_document_id IS '创建模板时使用的示例文件';
  536. -- 20. 模板变量表 (variables)
  537. CREATE TABLE IF NOT EXISTS variables (
  538. id VARCHAR(36) PRIMARY KEY,
  539. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  540. -- 变量标识
  541. name VARCHAR(100) NOT NULL, -- 变量名(程序用)
  542. display_name VARCHAR(200) NOT NULL, -- 显示名称
  543. variable_group VARCHAR(100), -- 变量分组
  544. category VARCHAR(32), -- 变量类别(前端显示用): entity/concept/data/location/asset
  545. -- 在示例报告中的位置
  546. location JSONB NOT NULL, -- {elementId, type, startOffset, endOffset, rowIndex, colIndex}
  547. -- 示例值
  548. example_value TEXT,
  549. value_type VARCHAR(32) DEFAULT 'text', -- text/date/number/table
  550. -- 数据来源
  551. source_file_alias VARCHAR(100), -- 来源文件别名
  552. source_type VARCHAR(32) NOT NULL, -- document/manual/reference/fixed
  553. source_config JSONB,
  554. -- 提取方式
  555. extract_type VARCHAR(32), -- direct/ai_extract/ai_summarize
  556. extract_config JSONB,
  557. display_order INT DEFAULT 0,
  558. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  559. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  560. CONSTRAINT uk_variables_name UNIQUE (template_id, name)
  561. );
  562. CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
  563. CREATE INDEX IF NOT EXISTS idx_variables_source_alias ON variables(source_file_alias);
  564. CREATE INDEX IF NOT EXISTS idx_variables_source_type ON variables(source_type);
  565. CREATE INDEX IF NOT EXISTS idx_variables_category ON variables(category);
  566. COMMENT ON TABLE variables IS '模板变量表';
  567. COMMENT ON COLUMN variables.name IS '变量名,模板内唯一';
  568. COMMENT ON COLUMN variables.location IS '变量在文档中的位置';
  569. COMMENT ON COLUMN variables.source_type IS 'document-从来源文件提取, manual-手动输入, reference-引用其他变量, fixed-固定值';
  570. COMMENT ON COLUMN variables.extract_type IS 'direct-直接提取, ai_extract-AI字段提取, ai_summarize-AI总结';
  571. COMMENT ON COLUMN variables.category IS 'entity-核心实体, concept-概念/技术, data-数据/指标, location-地点/组织, asset-资源模板';
  572. -- 21. 生成任务表 (generations)
  573. CREATE TABLE IF NOT EXISTS generations (
  574. id VARCHAR(36) PRIMARY KEY,
  575. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE RESTRICT,
  576. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  577. name VARCHAR(255),
  578. -- 来源文件映射
  579. source_file_map JSONB NOT NULL, -- {"可研批复": "doc_123", ...}
  580. -- 变量提取结果
  581. variable_values JSONB, -- {varName: {value, confidence, status, ...}}
  582. -- 生成的文档
  583. output_document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  584. output_file_path VARCHAR(500),
  585. status VARCHAR(32) DEFAULT 'pending', -- pending/extracting/review/completed/error
  586. error_message TEXT,
  587. progress INT DEFAULT 0, -- 0-100
  588. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  589. completed_at TIMESTAMP
  590. );
  591. CREATE INDEX IF NOT EXISTS idx_generations_template ON generations(template_id);
  592. CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
  593. CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
  594. CREATE INDEX IF NOT EXISTS idx_generations_create_time ON generations(create_time DESC);
  595. COMMENT ON TABLE generations IS '报告生成任务表';
  596. COMMENT ON COLUMN generations.source_file_map IS '来源文件映射: {"别名": "文档ID"}';
  597. COMMENT ON COLUMN generations.variable_values IS '变量提取结果';
  598. COMMENT ON COLUMN generations.status IS 'pending-待执行, extracting-提取中, review-待确认, completed-已完成, error-错误';
  599. -- ============================================
  600. -- 七、触发器函数与触发器
  601. -- ============================================
  602. -- 更新时间触发器函数
  603. CREATE OR REPLACE FUNCTION update_update_time_column()
  604. RETURNS TRIGGER AS $$
  605. BEGIN
  606. NEW.update_time = CURRENT_TIMESTAMP;
  607. RETURN NEW;
  608. END;
  609. $$ LANGUAGE plpgsql;
  610. -- 为所有需要的表创建触发器
  611. DO $$
  612. DECLARE
  613. tbl TEXT;
  614. tables TEXT[] := ARRAY[
  615. 'users', 'documents', 'elements', 'annotations', 'graphs',
  616. 'parse_tasks', 'sessions', 'graph_nodes', 'graph_relations',
  617. 'rules', 'data_sources', 'text_storage', 'text_chunks',
  618. 'document_blocks', 'document_entities', 'document_elements',
  619. 'templates', 'variables'
  620. ];
  621. BEGIN
  622. FOREACH tbl IN ARRAY tables LOOP
  623. EXECUTE format('DROP TRIGGER IF EXISTS trigger_%s_update_time ON %I', tbl, tbl);
  624. EXECUTE format('CREATE TRIGGER trigger_%s_update_time BEFORE UPDATE ON %I FOR EACH ROW EXECUTE FUNCTION update_update_time_column()', tbl, tbl);
  625. END LOOP;
  626. END $$;
  627. -- ============================================
  628. -- 八、辅助函数
  629. -- ============================================
  630. -- 向量相似度检索(按文档)
  631. CREATE OR REPLACE FUNCTION search_similar_chunks(
  632. query_embedding vector(768),
  633. target_document_id VARCHAR(36),
  634. result_limit INTEGER DEFAULT 3
  635. )
  636. RETURNS TABLE (
  637. chunk_id VARCHAR(36),
  638. document_id VARCHAR(36),
  639. content TEXT,
  640. chunk_index INTEGER,
  641. similarity FLOAT
  642. ) AS $$
  643. BEGIN
  644. RETURN QUERY
  645. SELECT
  646. tc.id AS chunk_id,
  647. tc.document_id,
  648. tc.content,
  649. tc.chunk_index,
  650. 1 - (ve.embedding <=> query_embedding) AS similarity
  651. FROM text_chunks tc
  652. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  653. WHERE tc.document_id = target_document_id
  654. ORDER BY ve.embedding <=> query_embedding
  655. LIMIT result_limit;
  656. END;
  657. $$ LANGUAGE plpgsql;
  658. -- 向量相似度检索(全局)
  659. CREATE OR REPLACE FUNCTION search_similar_chunks_global(
  660. query_embedding vector(768),
  661. result_limit INTEGER DEFAULT 5
  662. )
  663. RETURNS TABLE (
  664. chunk_id VARCHAR(36),
  665. document_id VARCHAR(36),
  666. content TEXT,
  667. chunk_index INTEGER,
  668. similarity FLOAT
  669. ) AS $$
  670. BEGIN
  671. RETURN QUERY
  672. SELECT
  673. tc.id AS chunk_id,
  674. tc.document_id,
  675. tc.content,
  676. tc.chunk_index,
  677. 1 - (ve.embedding <=> query_embedding) AS similarity
  678. FROM text_chunks tc
  679. JOIN vector_embeddings ve ON tc.id = ve.chunk_id
  680. ORDER BY ve.embedding <=> query_embedding
  681. LIMIT result_limit;
  682. END;
  683. $$ LANGUAGE plpgsql;
  684. -- ============================================
  685. -- 完成
  686. -- ============================================
  687. SELECT '灵越智报 v2.0 数据库表创建完成' AS result;
  688. SELECT COUNT(*) AS "表总数" FROM pg_tables WHERE schemaname = 'public';