init.sql 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. -- =====================================================
  2. -- 灵越智报 2.0 数据库初始化脚本
  3. -- PostgreSQL 15+ / pgvector
  4. -- 设计参考: a_docs/数据库设计文档.md
  5. -- =====================================================
  6. CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
  7. CREATE EXTENSION IF NOT EXISTS vector;
  8. -- ==================== 用户表 ====================
  9. CREATE TABLE IF NOT EXISTS users (
  10. id VARCHAR(36) PRIMARY KEY,
  11. username VARCHAR(50) NOT NULL UNIQUE,
  12. email VARCHAR(100) UNIQUE,
  13. password_hash VARCHAR(255) NOT NULL,
  14. display_name VARCHAR(100),
  15. avatar_url VARCHAR(500),
  16. role VARCHAR(20) DEFAULT 'user',
  17. preferences JSONB DEFAULT '{}',
  18. is_active BOOLEAN DEFAULT TRUE,
  19. last_login_at TIMESTAMP,
  20. create_by VARCHAR(36),
  21. create_by_name VARCHAR(100),
  22. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  23. update_by VARCHAR(36),
  24. update_by_name VARCHAR(100),
  25. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  26. );
  27. CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
  28. CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
  29. -- ==================== 文档表 ====================
  30. CREATE TABLE IF NOT EXISTS documents (
  31. id VARCHAR(36) PRIMARY KEY,
  32. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  33. name VARCHAR(255) NOT NULL,
  34. file_name VARCHAR(500) NOT NULL,
  35. file_path VARCHAR(500) NOT NULL,
  36. file_size BIGINT,
  37. file_type VARCHAR(20) NOT NULL,
  38. status VARCHAR(20) DEFAULT 'uploaded',
  39. parsed_text TEXT,
  40. page_count INT,
  41. word_count INT,
  42. entity_count INT DEFAULT 0,
  43. relation_count INT DEFAULT 0,
  44. rule_count INT DEFAULT 0,
  45. metadata JSONB DEFAULT '{}',
  46. del_flag BOOLEAN DEFAULT FALSE,
  47. create_by VARCHAR(36),
  48. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  49. update_by VARCHAR(36),
  50. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  51. );
  52. CREATE INDEX IF NOT EXISTS idx_documents_user ON documents(user_id);
  53. CREATE INDEX IF NOT EXISTS idx_documents_type ON documents(file_type);
  54. CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(status);
  55. -- ==================== 文档章节结构表 ====================
  56. CREATE TABLE IF NOT EXISTS document_sections (
  57. id VARCHAR(36) PRIMARY KEY,
  58. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  59. parent_id VARCHAR(36) REFERENCES document_sections(id) ON DELETE CASCADE,
  60. section_index INT NOT NULL,
  61. level INT NOT NULL DEFAULT 1,
  62. title VARCHAR(500),
  63. content TEXT,
  64. start_page INT,
  65. end_page INT,
  66. start_char INT,
  67. end_char INT,
  68. section_type VARCHAR(32) DEFAULT 'heading',
  69. table_data JSONB,
  70. image_path VARCHAR(500),
  71. image_caption VARCHAR(500),
  72. metadata JSONB DEFAULT '{}',
  73. sort_order INT DEFAULT 0,
  74. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  75. );
  76. CREATE INDEX IF NOT EXISTS idx_doc_sections_document ON document_sections(document_id);
  77. CREATE INDEX IF NOT EXISTS idx_doc_sections_parent ON document_sections(parent_id);
  78. CREATE INDEX IF NOT EXISTS idx_doc_sections_level ON document_sections(level);
  79. CREATE INDEX IF NOT EXISTS idx_doc_sections_type ON document_sections(section_type);
  80. -- ==================== 文档分块表 ====================
  81. CREATE TABLE IF NOT EXISTS document_chunks (
  82. id VARCHAR(36) PRIMARY KEY,
  83. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  84. section_id VARCHAR(36) REFERENCES document_sections(id) ON DELETE SET NULL,
  85. chunk_index INT NOT NULL,
  86. content TEXT NOT NULL,
  87. start_char INT,
  88. end_char INT,
  89. page_number INT,
  90. token_count INT,
  91. metadata JSONB DEFAULT '{}',
  92. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  93. );
  94. CREATE INDEX IF NOT EXISTS idx_doc_chunks_document ON document_chunks(document_id);
  95. CREATE INDEX IF NOT EXISTS idx_doc_chunks_section ON document_chunks(section_id);
  96. -- ==================== 解析任务表(parse-service 依赖) ====================
  97. CREATE TABLE IF NOT EXISTS parse_tasks (
  98. id VARCHAR(36) PRIMARY KEY,
  99. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  100. status VARCHAR(20) DEFAULT 'pending',
  101. progress INT DEFAULT 0,
  102. current_step VARCHAR(32),
  103. error_message TEXT,
  104. options JSONB DEFAULT '{}',
  105. started_at TIMESTAMP,
  106. completed_at TIMESTAMP,
  107. parse_status VARCHAR(20) DEFAULT 'pending',
  108. parse_progress INT DEFAULT 0,
  109. rag_status VARCHAR(20) DEFAULT 'pending',
  110. rag_progress INT DEFAULT 0,
  111. structured_status VARCHAR(20) DEFAULT 'pending',
  112. structured_progress INT DEFAULT 0,
  113. structured_element_count INT,
  114. structured_image_count INT,
  115. structured_table_count INT,
  116. ner_status VARCHAR(20) DEFAULT 'pending',
  117. ner_progress INT DEFAULT 0,
  118. ner_task_id VARCHAR(100),
  119. ner_entity_count INT,
  120. ner_relation_count INT,
  121. ner_message TEXT,
  122. graph_status VARCHAR(20) DEFAULT 'pending',
  123. graph_progress INT DEFAULT 0,
  124. create_by VARCHAR(36),
  125. create_by_name VARCHAR(100),
  126. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  127. update_by VARCHAR(36),
  128. update_by_name VARCHAR(100),
  129. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  130. );
  131. CREATE INDEX IF NOT EXISTS idx_parse_tasks_document ON parse_tasks(document_id);
  132. -- ==================== 文档结构化元素表(结构化解析 / 前端展示依赖) ====================
  133. CREATE TABLE IF NOT EXISTS document_elements (
  134. id VARCHAR(36) PRIMARY KEY,
  135. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  136. element_index INT,
  137. element_type VARCHAR(50),
  138. content TEXT,
  139. style JSONB DEFAULT '{}',
  140. runs JSONB DEFAULT '[]',
  141. image_url VARCHAR(500),
  142. image_path VARCHAR(500),
  143. image_alt VARCHAR(500),
  144. image_width INT,
  145. image_height INT,
  146. image_format VARCHAR(20),
  147. table_index INT,
  148. table_data JSONB,
  149. table_row_count INT,
  150. table_col_count INT,
  151. table_text TEXT,
  152. create_by VARCHAR(36),
  153. create_by_name VARCHAR(100),
  154. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  155. update_by VARCHAR(36),
  156. update_by_name VARCHAR(100),
  157. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  158. );
  159. CREATE INDEX IF NOT EXISTS idx_document_elements_document ON document_elements(document_id);
  160. -- ==================== 向量嵌入表 ====================
  161. CREATE TABLE IF NOT EXISTS embeddings (
  162. id VARCHAR(36) PRIMARY KEY,
  163. chunk_id VARCHAR(36) NOT NULL REFERENCES document_chunks(id) ON DELETE CASCADE,
  164. embedding vector(1536),
  165. model_name VARCHAR(100) DEFAULT 'text-embedding-ada-002',
  166. model_version VARCHAR(50),
  167. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  168. );
  169. CREATE INDEX IF NOT EXISTS idx_embeddings_chunk ON embeddings(chunk_id);
  170. CREATE INDEX IF NOT EXISTS idx_embeddings_vector ON embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
  171. -- ==================== 实体类型表 ====================
  172. CREATE TABLE IF NOT EXISTS entity_types (
  173. id VARCHAR(36) PRIMARY KEY,
  174. type_code VARCHAR(50) NOT NULL UNIQUE,
  175. type_name VARCHAR(100) NOT NULL,
  176. category VARCHAR(32),
  177. color VARCHAR(20),
  178. icon VARCHAR(50),
  179. patterns JSONB DEFAULT '[]',
  180. examples JSONB DEFAULT '[]',
  181. description TEXT,
  182. is_active BOOLEAN DEFAULT TRUE,
  183. sort_order INT DEFAULT 0,
  184. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  185. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  186. );
  187. CREATE INDEX IF NOT EXISTS idx_entity_types_code ON entity_types(type_code);
  188. -- ==================== 实体表 ====================
  189. CREATE TABLE IF NOT EXISTS entities (
  190. id VARCHAR(36) PRIMARY KEY,
  191. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  192. entity_type VARCHAR(50) NOT NULL,
  193. entity_text VARCHAR(500) NOT NULL,
  194. normalized_text VARCHAR(500),
  195. occurrence_count INT DEFAULT 1,
  196. confidence FLOAT DEFAULT 1.0,
  197. business_label VARCHAR(100),
  198. embedding vector(1536),
  199. is_confirmed BOOLEAN DEFAULT FALSE,
  200. is_merged BOOLEAN DEFAULT FALSE,
  201. merged_to_id VARCHAR(36),
  202. metadata JSONB DEFAULT '{}',
  203. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  204. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  205. UNIQUE(document_id, entity_type, normalized_text)
  206. );
  207. CREATE INDEX IF NOT EXISTS idx_entities_document ON entities(document_id);
  208. CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type);
  209. CREATE INDEX IF NOT EXISTS idx_entities_text ON entities(entity_text);
  210. CREATE INDEX IF NOT EXISTS idx_entities_normalized ON entities(normalized_text);
  211. CREATE INDEX IF NOT EXISTS idx_entities_label ON entities(business_label);
  212. -- ==================== 关系类型表 ====================
  213. CREATE TABLE IF NOT EXISTS relation_types (
  214. id VARCHAR(36) PRIMARY KEY,
  215. type_code VARCHAR(50) NOT NULL UNIQUE,
  216. type_name VARCHAR(100) NOT NULL,
  217. source_entity_types JSONB DEFAULT '[]',
  218. target_entity_types JSONB DEFAULT '[]',
  219. is_symmetric BOOLEAN DEFAULT FALSE,
  220. description TEXT,
  221. is_active BOOLEAN DEFAULT TRUE,
  222. sort_order INT DEFAULT 0,
  223. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  224. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  225. );
  226. CREATE INDEX IF NOT EXISTS idx_relation_types_code ON relation_types(type_code);
  227. -- ==================== 实体关系表 ====================
  228. CREATE TABLE IF NOT EXISTS entity_relations (
  229. id VARCHAR(36) PRIMARY KEY,
  230. document_id VARCHAR(36) NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
  231. source_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
  232. target_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
  233. relation_type VARCHAR(50) NOT NULL,
  234. relation_text VARCHAR(500),
  235. confidence FLOAT DEFAULT 1.0,
  236. extraction_method VARCHAR(32) DEFAULT 'auto',
  237. evidence_text TEXT,
  238. is_confirmed BOOLEAN DEFAULT FALSE,
  239. metadata JSONB DEFAULT '{}',
  240. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  241. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  242. );
  243. CREATE INDEX IF NOT EXISTS idx_relations_document ON entity_relations(document_id);
  244. CREATE INDEX IF NOT EXISTS idx_relations_source ON entity_relations(source_entity_id);
  245. CREATE INDEX IF NOT EXISTS idx_relations_target ON entity_relations(target_entity_id);
  246. CREATE INDEX IF NOT EXISTS idx_relations_type ON entity_relations(relation_type);
  247. -- ==================== 规则表 ====================
  248. CREATE TABLE IF NOT EXISTS rules (
  249. id VARCHAR(36) PRIMARY KEY,
  250. document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  251. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  252. name VARCHAR(200) NOT NULL,
  253. description TEXT,
  254. category VARCHAR(50),
  255. rule_type VARCHAR(32) NOT NULL,
  256. source VARCHAR(32) DEFAULT 'auto',
  257. priority INT DEFAULT 0,
  258. status VARCHAR(20) DEFAULT 'draft',
  259. embedding vector(1536),
  260. is_global BOOLEAN DEFAULT FALSE,
  261. del_flag BOOLEAN DEFAULT FALSE,
  262. create_by VARCHAR(36),
  263. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  264. update_by VARCHAR(36),
  265. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  266. );
  267. CREATE INDEX IF NOT EXISTS idx_rules_document ON rules(document_id);
  268. CREATE INDEX IF NOT EXISTS idx_rules_user ON rules(user_id);
  269. CREATE INDEX IF NOT EXISTS idx_rules_type ON rules(rule_type);
  270. CREATE INDEX IF NOT EXISTS idx_rules_status ON rules(status);
  271. CREATE INDEX IF NOT EXISTS idx_rules_category ON rules(category);
  272. -- ==================== 规则条件表 ====================
  273. CREATE TABLE IF NOT EXISTS rule_conditions (
  274. id VARCHAR(36) PRIMARY KEY,
  275. rule_id VARCHAR(36) NOT NULL REFERENCES rules(id) ON DELETE CASCADE,
  276. condition_index INT NOT NULL,
  277. condition_type VARCHAR(32) NOT NULL,
  278. config JSONB NOT NULL,
  279. logic_operator VARCHAR(10) DEFAULT 'AND',
  280. sort_order INT DEFAULT 0,
  281. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  282. );
  283. CREATE INDEX IF NOT EXISTS idx_rule_conditions_rule ON rule_conditions(rule_id);
  284. -- ==================== 规则动作表 ====================
  285. CREATE TABLE IF NOT EXISTS rule_actions (
  286. id VARCHAR(36) PRIMARY KEY,
  287. rule_id VARCHAR(36) NOT NULL REFERENCES rules(id) ON DELETE CASCADE,
  288. action_index INT NOT NULL,
  289. action_type VARCHAR(32) NOT NULL,
  290. config JSONB NOT NULL,
  291. target_field VARCHAR(100),
  292. sort_order INT DEFAULT 0,
  293. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  294. );
  295. CREATE INDEX IF NOT EXISTS idx_rule_actions_rule ON rule_actions(rule_id);
  296. -- ==================== 生成任务表 ====================
  297. CREATE TABLE IF NOT EXISTS generations (
  298. id VARCHAR(36) PRIMARY KEY,
  299. user_id VARCHAR(36) NOT NULL REFERENCES users(id) ON DELETE CASCADE,
  300. document_id VARCHAR(36) REFERENCES documents(id) ON DELETE SET NULL,
  301. template_id VARCHAR(36),
  302. name VARCHAR(200),
  303. applied_rules JSONB DEFAULT '[]',
  304. source_file_map JSONB DEFAULT '{}',
  305. extracted_data JSONB DEFAULT '{}',
  306. confirmed_data JSONB DEFAULT '{}',
  307. output_file_path VARCHAR(500),
  308. output_file_name VARCHAR(255),
  309. output_document_id VARCHAR(36),
  310. progress INTEGER,
  311. status VARCHAR(20) DEFAULT 'pending',
  312. error_message TEXT,
  313. started_at TIMESTAMP,
  314. reviewed_at TIMESTAMP,
  315. completed_at TIMESTAMP,
  316. del_flag BOOLEAN DEFAULT FALSE,
  317. create_by VARCHAR(36),
  318. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  319. update_by VARCHAR(36),
  320. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  321. );
  322. CREATE INDEX IF NOT EXISTS idx_generations_user ON generations(user_id);
  323. CREATE INDEX IF NOT EXISTS idx_generations_document ON generations(document_id);
  324. CREATE INDEX IF NOT EXISTS idx_generations_status ON generations(status);
  325. -- ==================== 生成输出表 ====================
  326. CREATE TABLE IF NOT EXISTS generation_outputs (
  327. id VARCHAR(36) PRIMARY KEY,
  328. generation_id VARCHAR(36) NOT NULL REFERENCES generations(id) ON DELETE CASCADE,
  329. version INT NOT NULL DEFAULT 1,
  330. file_path VARCHAR(500) NOT NULL,
  331. file_name VARCHAR(255) NOT NULL,
  332. file_size BIGINT,
  333. file_type VARCHAR(20) DEFAULT 'docx',
  334. share_token VARCHAR(100),
  335. share_expires_at TIMESTAMP,
  336. download_count INT DEFAULT 0,
  337. is_final BOOLEAN DEFAULT FALSE,
  338. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  339. );
  340. CREATE INDEX IF NOT EXISTS idx_gen_outputs_generation ON generation_outputs(generation_id);
  341. CREATE INDEX IF NOT EXISTS idx_gen_outputs_share ON generation_outputs(share_token);
  342. -- ==================== 模板/变量/来源文件(兼容现有模板流程) ====================
  343. CREATE TABLE IF NOT EXISTS templates (
  344. id VARCHAR(36) PRIMARY KEY,
  345. user_id VARCHAR(36) NOT NULL,
  346. name VARCHAR(200),
  347. description TEXT,
  348. base_document_id VARCHAR(36),
  349. status VARCHAR(20) DEFAULT 'draft',
  350. config JSONB DEFAULT '{}',
  351. is_public BOOLEAN DEFAULT FALSE,
  352. use_count INT DEFAULT 0,
  353. rating DOUBLE PRECISION,
  354. create_by VARCHAR(36),
  355. create_by_name VARCHAR(100),
  356. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  357. update_by VARCHAR(36),
  358. update_by_name VARCHAR(100),
  359. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  360. );
  361. CREATE INDEX IF NOT EXISTS idx_templates_user ON templates(user_id);
  362. CREATE TABLE IF NOT EXISTS variables (
  363. id VARCHAR(36) PRIMARY KEY,
  364. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  365. name VARCHAR(100) NOT NULL,
  366. display_name VARCHAR(200),
  367. variable_group VARCHAR(100),
  368. category VARCHAR(50),
  369. location JSONB,
  370. example_value TEXT,
  371. value_type VARCHAR(20),
  372. source_file_alias VARCHAR(100),
  373. source_type VARCHAR(30),
  374. source_config JSONB DEFAULT '{}',
  375. extract_type VARCHAR(30),
  376. extract_config JSONB DEFAULT '{}',
  377. display_order INT DEFAULT 0,
  378. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  379. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  380. );
  381. CREATE INDEX IF NOT EXISTS idx_variables_template ON variables(template_id);
  382. CREATE TABLE IF NOT EXISTS source_files (
  383. id VARCHAR(36) PRIMARY KEY,
  384. template_id VARCHAR(36) NOT NULL REFERENCES templates(id) ON DELETE CASCADE,
  385. alias VARCHAR(100) NOT NULL,
  386. description TEXT,
  387. file_types JSONB DEFAULT '[]',
  388. required BOOLEAN DEFAULT TRUE,
  389. example_document_id VARCHAR(36),
  390. display_order INT DEFAULT 0,
  391. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  392. );
  393. CREATE INDEX IF NOT EXISTS idx_source_files_template ON source_files(template_id);
  394. -- ==================== 标准实体表 ====================
  395. CREATE TABLE IF NOT EXISTS standard_entities (
  396. id VARCHAR(36) PRIMARY KEY,
  397. entity_type VARCHAR(50) NOT NULL,
  398. name VARCHAR(500) NOT NULL,
  399. aliases JSONB DEFAULT '[]',
  400. attributes JSONB DEFAULT '{}',
  401. embedding vector(1536),
  402. occurrence_count INT DEFAULT 1,
  403. document_count INT DEFAULT 1,
  404. is_verified BOOLEAN DEFAULT FALSE,
  405. del_flag BOOLEAN DEFAULT FALSE,
  406. create_by VARCHAR(36),
  407. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  408. update_by VARCHAR(36),
  409. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  410. );
  411. CREATE INDEX IF NOT EXISTS idx_std_entities_type ON standard_entities(entity_type);
  412. CREATE INDEX IF NOT EXISTS idx_std_entities_name ON standard_entities(name);
  413. CREATE INDEX IF NOT EXISTS idx_std_entities_vector ON standard_entities USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
  414. -- ==================== 标准关系表 ====================
  415. CREATE TABLE IF NOT EXISTS standard_relations (
  416. id VARCHAR(36) PRIMARY KEY,
  417. source_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
  418. target_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
  419. relation_type VARCHAR(50) NOT NULL,
  420. attributes JSONB DEFAULT '{}',
  421. occurrence_count INT DEFAULT 1,
  422. is_verified BOOLEAN DEFAULT FALSE,
  423. del_flag BOOLEAN DEFAULT FALSE,
  424. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  425. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  426. );
  427. CREATE INDEX IF NOT EXISTS idx_std_relations_source ON standard_relations(source_entity_id);
  428. CREATE INDEX IF NOT EXISTS idx_std_relations_target ON standard_relations(target_entity_id);
  429. CREATE INDEX IF NOT EXISTS idx_std_relations_type ON standard_relations(relation_type);
  430. -- ==================== 实体合并记录表 ====================
  431. CREATE TABLE IF NOT EXISTS entity_merge_records (
  432. id VARCHAR(36) PRIMARY KEY,
  433. source_entity_id VARCHAR(36) NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
  434. target_entity_id VARCHAR(36) NOT NULL REFERENCES standard_entities(id) ON DELETE CASCADE,
  435. similarity_score FLOAT,
  436. merge_type VARCHAR(32) DEFAULT 'auto',
  437. create_by VARCHAR(36),
  438. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  439. );
  440. CREATE INDEX IF NOT EXISTS idx_merge_records_source ON entity_merge_records(source_entity_id);
  441. CREATE INDEX IF NOT EXISTS idx_merge_records_target ON entity_merge_records(target_entity_id);
  442. -- ==================== 预置数据 ====================
  443. INSERT INTO entity_types (id, type_code, type_name, category, color, icon, description, sort_order) VALUES
  444. ('et-001', 'PERSON', '人名', 'basic', '#1890ff', 'user', '人物姓名,如:张经理、李总', 1),
  445. ('et-002', 'ORG', '机构', 'basic', '#faad14', 'bank', '组织机构,如:成都检测公司、环保局', 2),
  446. ('et-003', 'LOC', '地点', 'basic', '#52c41a', 'environment', '地理位置,如:成都市高新区', 3),
  447. ('et-004', 'DATE', '日期', 'basic', '#722ed1', 'calendar', '日期时间,如:2024年5月15日', 4),
  448. ('et-005', 'NUMBER', '数值', 'basic', '#13c2c2', 'number', '数值数量,如:50分贝、100万元', 5),
  449. ('et-006', 'DEVICE', '设备', 'domain', '#eb2f96', 'tool', '设备仪器,如:噪音检测设备、光谱仪', 10),
  450. ('et-007', 'PROJECT', '项目', 'domain', '#f5222d', 'project', '项目名称,如:环境监测项目', 11),
  451. ('et-008', 'TERM', '术语', 'domain', '#a0d911', 'book', '专业术语,如:COD、BOD、PM2.5', 12),
  452. ('et-009', 'STANDARD', '标准', 'domain', '#2f54eb', 'file-text', '标准规范,如:GB 3096-2008', 13),
  453. ('et-010', 'MATERIAL', '材料', 'domain', '#fa8c16', 'experiment', '材料物质,如:甲醛、苯', 14),
  454. ('et-011', 'METHOD', '方法', 'domain', '#1890ff', 'api', '检测方法,如:气相色谱法', 15)
  455. ON CONFLICT (id) DO NOTHING;
  456. INSERT INTO relation_types (id, type_code, type_name, source_entity_types, target_entity_types, is_symmetric, description) VALUES
  457. ('rt-001', 'LOCATED_IN', '位于', '["ORG", "PERSON", "PROJECT"]', '["LOC"]', false, '实体位于某地'),
  458. ('rt-002', 'WORKS_FOR', '任职于', '["PERSON"]', '["ORG"]', false, '人员任职于机构'),
  459. ('rt-003', 'BELONGS_TO', '属于', '["ORG", "PROJECT"]', '["ORG"]', false, '隶属关系'),
  460. ('rt-004', 'RESPONSIBLE_FOR', '负责', '["PERSON", "ORG"]', '["PROJECT", "LOC"]', false, '负责某事/某地'),
  461. ('rt-005', 'USES', '使用', '["ORG", "PERSON", "PROJECT"]', '["DEVICE", "METHOD"]', false, '使用设备/方法'),
  462. ('rt-006', 'DETECTS', '检测', '["ORG", "PERSON", "DEVICE"]', '["MATERIAL", "TERM"]', false, '检测某物质/指标'),
  463. ('rt-007', 'CONTAINS', '包含', '["PROJECT", "ORG"]', '["PROJECT", "TERM", "MATERIAL"]', false, '包含关系'),
  464. ('rt-008', 'COMPLIES_WITH', '符合', '["PROJECT", "ORG", "METHOD"]', '["STANDARD"]', false, '符合某标准'),
  465. ('rt-009', 'PRODUCES', '产生', '["ORG", "PROJECT", "DEVICE"]', '["NUMBER", "MATERIAL"]', false, '产生数据/物质'),
  466. ('rt-010', 'COOPERATES', '合作', '["ORG"]', '["ORG"]', true, '机构合作关系'),
  467. ('rt-011', 'OCCURS_AT', '发生于', '["PROJECT"]', '["DATE"]', false, '发生在某时间')
  468. ON CONFLICT (id) DO NOTHING;