V2026_01_21__add_document_blocks_and_entities.sql 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. -- 文档块表:存储文档的结构化内容(参考飞书Block设计)
  2. -- 核心设计:块内容由 elements 数组(TextElement)组成,实体作为元素嵌入
  3. CREATE TABLE IF NOT EXISTS document_blocks (
  4. id VARCHAR(64) PRIMARY KEY,
  5. document_id VARCHAR(64) NOT NULL,
  6. parent_id VARCHAR(64),
  7. children JSONB,
  8. block_index INTEGER NOT NULL,
  9. block_type VARCHAR(32) NOT NULL,
  10. elements JSONB,
  11. style JSONB,
  12. metadata JSONB,
  13. create_by VARCHAR(64),
  14. create_by_name VARCHAR(128),
  15. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  16. update_by VARCHAR(64),
  17. update_by_name VARCHAR(128),
  18. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  19. );
  20. -- 索引
  21. CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
  22. CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
  23. CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
  24. -- 全文搜索索引(对elements中的文本内容)
  25. CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
  26. -- 文档实体标注表:存储文档中的实体/要素标记
  27. CREATE TABLE IF NOT EXISTS document_entities (
  28. id VARCHAR(64) PRIMARY KEY,
  29. document_id VARCHAR(64) NOT NULL,
  30. block_id VARCHAR(64),
  31. name VARCHAR(512) NOT NULL,
  32. entity_type VARCHAR(32) NOT NULL,
  33. value TEXT,
  34. block_char_start INTEGER,
  35. block_char_end INTEGER,
  36. global_char_start INTEGER,
  37. global_char_end INTEGER,
  38. anchor_before VARCHAR(100),
  39. anchor_after VARCHAR(100),
  40. source VARCHAR(16) DEFAULT 'auto',
  41. confidence DECIMAL(5,4),
  42. confirmed BOOLEAN DEFAULT FALSE,
  43. graph_node_id VARCHAR(64),
  44. metadata JSONB,
  45. create_by VARCHAR(64),
  46. create_by_name VARCHAR(128),
  47. create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  48. update_by VARCHAR(64),
  49. update_by_name VARCHAR(128),
  50. update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  51. );
  52. -- 索引
  53. CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
  54. CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
  55. CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
  56. CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
  57. CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
  58. CREATE INDEX IF NOT EXISTS idx_document_entities_graph_node ON document_entities(graph_node_id);
  59. -- 注释
  60. COMMENT ON TABLE document_blocks IS '文档块 - 存储文档的结构化内容块';
  61. COMMENT ON TABLE document_entities IS '文档实体 - 存储文档中标记的实体/要素';
  62. COMMENT ON COLUMN document_blocks.block_type IS '块类型: heading1/heading2/heading3/paragraph/table/list/image/quote';
  63. COMMENT ON COLUMN document_entities.entity_type IS '实体类型: PERSON/ORG/LOC/DATE/NUMBER/MONEY/CONCEPT/DATA/DEVICE/TERM';
  64. COMMENT ON COLUMN document_entities.source IS '来源: auto=自动识别, manual=手动标注';