V2026_01_21__add_document_blocks_and_entities.sql 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. -- 文档块表:存储文档的结构化内容(参考飞书Block设计)
  2. -- 核心设计:块内容由 elements 数组(TextElement)组成,实体作为元素嵌入
  3. CREATE TABLE IF NOT EXISTS document_blocks (
  4. id VARCHAR(64) PRIMARY KEY,
  5. document_id VARCHAR(64) NOT NULL,
  6. parent_id VARCHAR(64),
  7. children JSONB,
  8. block_index INTEGER NOT NULL,
  9. block_type VARCHAR(32) NOT NULL,
  10. elements JSONB,
  11. style JSONB,
  12. metadata JSONB,
  13. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  14. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  15. );
  16. -- 索引
  17. CREATE INDEX IF NOT EXISTS idx_document_blocks_document_id ON document_blocks(document_id);
  18. CREATE INDEX IF NOT EXISTS idx_document_blocks_parent_id ON document_blocks(parent_id);
  19. CREATE INDEX IF NOT EXISTS idx_document_blocks_block_type ON document_blocks(block_type);
  20. -- 全文搜索索引(对elements中的文本内容)
  21. CREATE INDEX IF NOT EXISTS idx_document_blocks_elements_gin ON document_blocks USING GIN (elements jsonb_path_ops);
  22. -- 文档实体标注表:存储文档中的实体/要素标记
  23. CREATE TABLE IF NOT EXISTS document_entities (
  24. id VARCHAR(64) PRIMARY KEY,
  25. document_id VARCHAR(64) NOT NULL,
  26. block_id VARCHAR(64),
  27. name VARCHAR(512) NOT NULL,
  28. entity_type VARCHAR(32) NOT NULL,
  29. value TEXT,
  30. block_char_start INTEGER,
  31. block_char_end INTEGER,
  32. global_char_start INTEGER,
  33. global_char_end INTEGER,
  34. anchor_before VARCHAR(100),
  35. anchor_after VARCHAR(100),
  36. source VARCHAR(16) DEFAULT 'auto',
  37. confidence DECIMAL(5,4),
  38. confirmed BOOLEAN DEFAULT FALSE,
  39. graph_node_id VARCHAR(64),
  40. metadata JSONB,
  41. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  42. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  43. );
  44. -- 索引
  45. CREATE INDEX IF NOT EXISTS idx_document_entities_document_id ON document_entities(document_id);
  46. CREATE INDEX IF NOT EXISTS idx_document_entities_block_id ON document_entities(block_id);
  47. CREATE INDEX IF NOT EXISTS idx_document_entities_type ON document_entities(entity_type);
  48. CREATE INDEX IF NOT EXISTS idx_document_entities_name ON document_entities(name);
  49. CREATE INDEX IF NOT EXISTS idx_document_entities_global_char ON document_entities(document_id, global_char_start, global_char_end);
  50. CREATE INDEX IF NOT EXISTS idx_document_entities_graph_node ON document_entities(graph_node_id);
  51. -- 注释
  52. COMMENT ON TABLE document_blocks IS '文档块 - 存储文档的结构化内容块';
  53. COMMENT ON TABLE document_entities IS '文档实体 - 存储文档中标记的实体/要素';
  54. COMMENT ON COLUMN document_blocks.block_type IS '块类型: heading1/heading2/heading3/paragraph/table/list/image/quote';
  55. COMMENT ON COLUMN document_entities.entity_type IS '实体类型: PERSON/ORG/LOC/DATE/NUMBER/MONEY/CONCEPT/DATA/DEVICE/TERM';
  56. COMMENT ON COLUMN document_entities.source IS '来源: auto=自动识别, manual=手动标注';