进度报告.html 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666
  1. <!DOCTYPE html>
  2. <html lang="zh-CN">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6. <title>进度报告</title>
  7. <style>
  8. body {
  9. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  10. line-height: 1.6;
  11. color: #333;
  12. max-width: 1200px;
  13. margin: 0 auto;
  14. padding: 20px;
  15. background-color: #fff;
  16. }
  17. @media (max-width: 480px) {
  18. body {
  19. padding: 12px;
  20. font-size: 14px;
  21. }
  22. h1 {
  23. font-size: 24px;
  24. padding-bottom: 8px;
  25. }
  26. h2 {
  27. font-size: 20px;
  28. margin-top: 20px;
  29. padding-bottom: 6px;
  30. }
  31. h3 {
  32. font-size: 18px;
  33. margin-top: 18px;
  34. }
  35. pre {
  36. padding: 10px;
  37. font-size: 12px;
  38. }
  39. code {
  40. font-size: 12px;
  41. }
  42. table {
  43. font-size: 12px;
  44. }
  45. th, td {
  46. padding: 8px;
  47. }
  48. ul, ol {
  49. padding-left: 20px;
  50. }
  51. }
  52. h1 {
  53. color: #2c3e50;
  54. border-bottom: 3px solid #3498db;
  55. padding-bottom: 10px;
  56. }
  57. h2 {
  58. color: #34495e;
  59. border-bottom: 2px solid #ecf0f1;
  60. padding-bottom: 8px;
  61. margin-top: 30px;
  62. }
  63. h3 {
  64. color: #555;
  65. margin-top: 25px;
  66. }
  67. img {
  68. max-width: 100%;
  69. height: auto;
  70. border: 1px solid #ddd;
  71. border-radius: 4px;
  72. margin: 10px 0;
  73. box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  74. }
  75. code {
  76. background-color: #f4f4f4;
  77. padding: 2px 6px;
  78. border-radius: 3px;
  79. font-family: "Courier New", monospace;
  80. font-size: 0.9em;
  81. }
  82. pre {
  83. background-color: #f4f4f4;
  84. padding: 15px;
  85. border-radius: 5px;
  86. overflow-x: auto;
  87. border-left: 4px solid #3498db;
  88. }
  89. blockquote {
  90. border-left: 4px solid #3498db;
  91. margin: 0;
  92. padding-left: 20px;
  93. color: #666;
  94. }
  95. table {
  96. border-collapse: collapse;
  97. width: 100%;
  98. margin: 20px 0;
  99. }
  100. th, td {
  101. border: 1px solid #ddd;
  102. padding: 12px;
  103. text-align: left;
  104. }
  105. th {
  106. background-color: #3498db;
  107. color: white;
  108. }
  109. tr:nth-child(even) {
  110. background-color: #f9f9f9;
  111. }
  112. ul, ol {
  113. padding-left: 30px;
  114. }
  115. li {
  116. margin: 8px 0;
  117. }
  118. /* 嵌套列表样式 */
  119. li ul, li ol {
  120. margin-top: 8px;
  121. margin-bottom: 8px;
  122. }
  123. /* 列表项内的段落 */
  124. li > p {
  125. margin: 0;
  126. display: inline;
  127. }
  128. li > p:not(:last-child) {
  129. margin-bottom: 8px;
  130. display: block;
  131. }
  132. strong {
  133. color: #2c3e50;
  134. }
  135. a {
  136. color: #3498db;
  137. text-decoration: none;
  138. }
  139. a:hover {
  140. text-decoration: underline;
  141. }
  142. hr {
  143. border: none;
  144. border-top: 2px solid #ecf0f1;
  145. margin: 30px 0;
  146. }
  147. </style>
  148. </head>
  149. <body>
  150. <h1 id="20-">📊 灵越智报 2.0 - 当前进度总结</h1>
  151. <p><strong>整体进度:90%</strong> | <strong>报告日期:2026-01-24</strong></p>
  152. <hr />
  153. <h2 id="90">✅ 已完成(90%)</h2>
  154. <h3 id="_1">基础设施</h3>
  155. <ul>
  156. <li>Spring Boot 3.1.5 单体应用架构(lingyue-starter)</li>
  157. <li>数据库(PostgreSQL + pgvector)、缓存(Redis)、消息队列(RabbitMQ)配置完成</li>
  158. <li>6大服务模块框架搭建完成</li>
  159. <li><strong>数据库重建脚本</strong> → ✅ 完整的 <code>rebuild_all.sh</code> 脚本</li>
  160. </ul>
  161. <h3 id="_2">核心模块现状</h3>
  162. <table>
  163. <thead>
  164. <tr>
  165. <th>模块</th>
  166. <th>功能</th>
  167. <th style="text-align: center;">状态</th>
  168. <th>说明</th>
  169. </tr>
  170. </thead>
  171. <tbody>
  172. <tr>
  173. <td><strong>认证服务</strong></td>
  174. <td>注册/登录/Token</td>
  175. <td style="text-align: center;">✅</td>
  176. <td>JWT 认证,密码修改</td>
  177. </tr>
  178. <tr>
  179. <td><strong>文档管理</strong></td>
  180. <td>上传/列表/删除</td>
  181. <td style="text-align: center;">✅</td>
  182. <td>级联删除,批量操作</td>
  183. </tr>
  184. <tr>
  185. <td><strong>文档解析</strong></td>
  186. <td>OCR/文本提取</td>
  187. <td style="text-align: center;">✅</td>
  188. <td>Word/PDF/图片</td>
  189. </tr>
  190. <tr>
  191. <td><strong>结构化解析</strong></td>
  192. <td>段落/图片/表格</td>
  193. <td style="text-align: center;">✅</td>
  194. <td>保留样式和顺序</td>
  195. </tr>
  196. <tr>
  197. <td><strong>RAG 向量化</strong></td>
  198. <td>分块/嵌入/检索</td>
  199. <td style="text-align: center;">✅</td>
  200. <td>pgvector + Ollama</td>
  201. </tr>
  202. <tr>
  203. <td><strong>NER 实体识别</strong></td>
  204. <td>实体/关系提取</td>
  205. <td style="text-align: center;">✅</td>
  206. <td>DeepSeek API</td>
  207. </tr>
  208. <tr>
  209. <td><strong>图数据库</strong></td>
  210. <td>节点/关系存储</td>
  211. <td style="text-align: center;">✅</td>
  212. <td>PostgreSQL + Neo4j</td>
  213. </tr>
  214. <tr>
  215. <td><strong>数据源管理</strong></td>
  216. <td>CRUD/取值/聚合</td>
  217. <td style="text-align: center;">✅</td>
  218. <td>支持多种值类型</td>
  219. </tr>
  220. <tr>
  221. <td><strong>任务中心</strong></td>
  222. <td>多阶段进度跟踪</td>
  223. <td style="text-align: center;">✅</td>
  224. <td>6阶段流水线</td>
  225. </tr>
  226. <tr>
  227. <td><strong>模板系统</strong></td>
  228. <td>数据模型</td>
  229. <td style="text-align: center;">✅</td>
  230. <td>v2.0 重构完成</td>
  231. </tr>
  232. <tr>
  233. <td><strong>模板系统</strong></td>
  234. <td>Service/Controller</td>
  235. <td style="text-align: center;">✅</td>
  236. <td>完整 CRUD + 状态管理</td>
  237. </tr>
  238. <tr>
  239. <td><strong>报告生成</strong></td>
  240. <td>变量提取/文档生成</td>
  241. <td style="text-align: center;">✅</td>
  242. <td>基础流程实现完成</td>
  243. </tr>
  244. <tr>
  245. <td><strong>前端界面</strong></td>
  246. <td>所有页面</td>
  247. <td style="text-align: center;">⏳</td>
  248. <td>Flutter Web 待开发</td>
  249. </tr>
  250. </tbody>
  251. </table>
  252. <h3 id="_3">模块完成度</h3>
  253. <div class="codehilite"><pre><span></span><code>认证服务 ████████████████████ 100%
  254. 文档管理 ████████████████████ 100%
  255. 文档解析 ████████████████████ 100%
  256. RAG服务 ████████████████████ 100%
  257. NER服务 ████████████████████ 100%
  258. 图数据库 ████████████████████ 100%
  259. 数据源管理 ████████████████████ 100%
  260. 任务中心 ████████████████████ 100%
  261. 模板系统 ████████████████████ 100% (完整实现)
  262. 报告生成 ████████████████░░░░ 80% (基础流程)
  263. 前端界面 ████░░░░░░░░░░░░░░░░ 20% (HTML原型)
  264. </code></pre></div>
  265. <h3 id="2026-01-24">新增功能(2026-01-24)✅ 模板系统完整实现 + 原型适配</h3>
  266. <ul>
  267. <li>✅ <strong>Service 层(6个服务)</strong></li>
  268. <li><code>TemplateService</code> - 模板 CRUD、发布、归档、复制</li>
  269. <li><code>SourceFileService</code> - 来源文件定义管理、重排序</li>
  270. <li><code>VariableService</code> - 变量管理、重排序、预览提取、<strong>按类别分组查询</strong></li>
  271. <li><code>GenerationService</code> - 生成任务管理、执行、进度</li>
  272. <li><code>ExtractionService</code> - 变量提取核心逻辑</li>
  273. <li>
  274. <p><code>DocumentGenerationService</code> - 文档生成、文件下载</p>
  275. </li>
  276. <li>
  277. <p>✅ <strong>Controller 层(2个控制器)</strong></p>
  278. </li>
  279. <li><code>TemplateController</code> - 模板/来源文件/变量统一管理、<strong>变量分组API</strong></li>
  280. <li>
  281. <p><code>GenerationController</code> - 生成任务 CRUD 和执行</p>
  282. </li>
  283. <li>
  284. <p>✅ <strong>DTO(12个)</strong></p>
  285. </li>
  286. <li>请求:CreateTemplateRequest, UpdateTemplateRequest, AddSourceFileRequest, AddVariableRequest, CreateGenerationRequest, UpdateVariableValueRequest, ReorderRequest</li>
  287. <li>
  288. <p>响应:TemplateDetailResponse, TemplateListResponse, SourceFileResponse, VariableResponse, GenerationResponse, GenerationProgressResponse, VariablePreviewResponse</p>
  289. </li>
  290. <li>
  291. <p>✅ <strong>工具类</strong></p>
  292. </li>
  293. <li>
  294. <p><code>SecurityUtils</code> - 从 JWT 获取当前用户ID</p>
  295. </li>
  296. <li>
  297. <p>✅ <strong>原型适配 - Variable.category 字段(新增)</strong></p>
  298. </li>
  299. <li>变量类别用于前端显示分类(与 <code>灵越智报_完整交互版.html</code> 原型对齐)</li>
  300. <li>类别定义:<ul>
  301. <li><code>entity</code> - 核心实体(蓝色):智慧园区、公司名等</li>
  302. <li><code>concept</code> - 概念/技术(紫色):产业升级、智能化管理等</li>
  303. <li><code>data</code> - 数据/指标(绿色):金额、面积、增长率等</li>
  304. <li><code>location</code> - 地点/组织(橙色):华南地区、华南事业部等</li>
  305. <li><code>asset</code> - 资源模板(粉色):图表、结论模板等</li>
  306. </ul>
  307. </li>
  308. <li>新增 API:<code>GET /api/v1/templates/{templateId}/variables/grouped</code> - 按类别分组返回变量</li>
  309. <li>数据库迁移:<code>V2026_01_24_01__add_variable_category.sql</code></li>
  310. </ul>
  311. <h3 id="2026-01-23-v20">新增功能(2026-01-23)✅ 模板系统 v2.0 数据模型重构</h3>
  312. <ul>
  313. <li>✅ <strong>设计理念升级</strong></li>
  314. <li>从「规则配置驱动」改为「示例文档驱动」</li>
  315. <li>
  316. <p>用户上传完整报告 → 标记变量 → 保存为模板 → 生成新报告</p>
  317. </li>
  318. <li>
  319. <p>✅ <strong>数据库重构</strong></p>
  320. </li>
  321. <li>新增 <code>all_tables.sql</code> 完整数据库表结构(21张表)</li>
  322. <li>新增 <code>template_tables.sql</code> 模板系统专用表</li>
  323. <li>
  324. <p><code>rebuild_all.sh</code> 增加 <code>--simple</code> 模式(单文件初始化)</p>
  325. </li>
  326. <li>
  327. <p>✅ <strong>新实体类(4个)</strong></p>
  328. </li>
  329. <li><code>Template</code> - 报告模板(替代 Project)</li>
  330. <li><code>SourceFile</code> - 来源文件定义(用别名标识)</li>
  331. <li><code>Variable</code> - 模板变量(绑定到文档位置)</li>
  332. <li>
  333. <p><code>Generation</code> - 生成任务</p>
  334. </li>
  335. <li>
  336. <p>✅ <strong>新 Repository(4个)</strong></p>
  337. </li>
  338. <li>
  339. <p><code>TemplateRepository</code>, <code>SourceFileRepository</code>, <code>VariableRepository</code>, <code>GenerationRepository</code></p>
  340. </li>
  341. <li>
  342. <p>✅ <strong>新 DTO 类</strong></p>
  343. </li>
  344. <li><code>VariableLocation</code> - 变量在文档中的位置</li>
  345. <li><code>ReferenceSourceConfig</code> - 引用来源配置</li>
  346. </ul>
  347. <h3 id="2026-01-22-">新增功能(2026-01-22 晚)✅ 数据提取规则系统 - 阶段一、二(已重构)</h3>
  348. <ul>
  349. <li>✅ <strong>第一阶段:基础设施搭建</strong></li>
  350. <li>Maven 模块 <code>extract-service</code> 框架搭建完成</li>
  351. <li>启动类 <code>ExtractServiceApplication.java</code></li>
  352. <li>配置文件 <code>application.properties</code>(端口 8086)</li>
  353. <li>健康检查接口 <code>/api/v1/extract/health</code></li>
  354. <li>~~数据库表(5张核心表)~~ → 已重构为模板系统 v2.0</li>
  355. <li>
  356. <p>集成配置:父 pom.xml、lingyue-starter 依赖</p>
  357. </li>
  358. <li>
  359. <p>✅ <strong>第二阶段:核心实体与 Repository 层</strong>(已重构为 v2.0)</p>
  360. </li>
  361. <li>~~实体类(5个):Project, SourceDocument, ExtractRule...~~ </li>
  362. <li>→ 新实体:Template, SourceFile, Variable, Generation</li>
  363. </ul>
  364. <h3 id="2026-01-22">新增功能(2026-01-22 下午)✅ 补充缺失接口</h3>
  365. <ul>
  366. <li>✅ <strong>认证服务接口完善</strong></li>
  367. <li><code>PUT /auth/profile</code> - 用户信息修改(用户名、邮箱、头像)</li>
  368. <li>
  369. <p><code>PUT /auth/password</code> - 密码修改(验证旧密码、新密码确认)</p>
  370. </li>
  371. <li>
  372. <p>✅ <strong>文档管理接口完善</strong></p>
  373. </li>
  374. <li><code>PUT /api/v1/documents/{id}</code> - 文档更新(名称、状态、元数据)</li>
  375. <li><code>DELETE /api/v1/documents/{id}</code> - <strong>级联删除增强</strong><ul>
  376. <li>删除向量嵌入、文本分块、图关系、图节点</li>
  377. <li>删除结构化元素、解析任务、文档记录</li>
  378. <li>删除文本文件和图片目录</li>
  379. </ul>
  380. </li>
  381. <li><code>POST /api/v1/documents/batch-delete</code> - 批量删除(返回成功/失败列表)</li>
  382. </ul>
  383. <h3 id="2026-01-22_1">新增功能(2026-01-22 上午)✅ 一键上传全自动处理 + 数据源管理</h3>
  384. <ul>
  385. <li>✅ <strong>一键上传全自动处理流程</strong></li>
  386. <li>上传接口:<code>POST /api/v1/parse/upload</code>(唯一入口)</li>
  387. <li>自动触发:RAG向量化 → 结构化解析 → NER实体提取 → 图数据库存储</li>
  388. <li>
  389. <p>手动重处理 API 保留,支持单独重新生成</p>
  390. </li>
  391. <li>
  392. <p>✅ <strong>数据源管理系统</strong></p>
  393. </li>
  394. <li>支持绑定 <code>GraphNode</code> 和 <code>DocumentElement</code> 两种类型</li>
  395. <li>支持多种值类型:text/image/table/mixed</li>
  396. <li>支持多种聚合方式:first/last/concat/sum/avg/list</li>
  397. <li>
  398. <p>完整 CRUD + 取值 + 批量取值 API</p>
  399. </li>
  400. <li>
  401. <p>✅ <strong>任务中心进度跟踪</strong></p>
  402. </li>
  403. <li>6阶段进度:upload → parse → rag → structured → ner → graph</li>
  404. <li>阶段权重:parse(15%) + rag(10%) + structured(15%) + ner(50%) + graph(10%)</li>
  405. <li>
  406. <p>实时进度查询 API + 结果摘要展示</p>
  407. </li>
  408. <li>
  409. <p>✅ <strong>端到端测试脚本完善</strong></p>
  410. </li>
  411. <li><code>test_upload_api.sh</code> 支持完整流程测试</li>
  412. <li>自动轮询等待 NER 完成(基于 GraphNode 数量稳定检测)</li>
  413. <li>支持任务中心查询、数据源操作等</li>
  414. </ul>
  415. <h3 id="2026-01-21">新增功能(2026-01-21)✅ 文档结构化解析完成</h3>
  416. <ul>
  417. <li>✅ <strong>Word 文档结构化提取</strong></li>
  418. <li>完整提取段落、标题、图片、表格等元素</li>
  419. <li>保留元素在文档中的原始顺序和位置</li>
  420. <li>图片自动提取并保存到独立目录</li>
  421. <li>支持图片尺寸、格式、描述信息提取</li>
  422. <li>✅ <strong>DocumentElement 实体存储</strong></li>
  423. <li>新增 <code>document_elements</code> 表存储结构化内容</li>
  424. <li>支持 JSONB 字段存储样式和表格数据</li>
  425. <li>自定义 <code>PostgreSqlJsonbTypeHandler</code> 处理 JSONB 类型</li>
  426. <li>✅ <strong>结构化解析 API</strong></li>
  427. <li><code>GET /parse/structured/{documentId}</code> - 触发结构化解析</li>
  428. <li><code>GET /parse/elements/{documentId}</code> - 获取所有元素</li>
  429. <li><code>GET /parse/elements/{documentId}/images</code> - 获取图片列表</li>
  430. <li><code>GET /parse/elements/{documentId}/tables</code> - 获取表格列表</li>
  431. <li>✅ <strong>数据库脚本完善</strong></li>
  432. <li>完整的数据库重建脚本 <code>rebuild_all.sh</code></li>
  433. <li>修复 UUID/VARCHAR 类型不兼容问题</li>
  434. <li>统一表结构字段命名规范</li>
  435. </ul>
  436. <h3 id="2026-01-20-ner-demo">新增功能(2026-01-20)✅ NER Demo 测试完成</h3>
  437. <ul>
  438. <li>✅ <strong>DeepSeek API 集成</strong></li>
  439. <li>接入阿里云百炼平台 DeepSeek V3.2 实验版</li>
  440. <li>支持长文本分块处理(每块 2000 字符)</li>
  441. <li>完整的错误处理和重试机制</li>
  442. <li>✅ <strong>异步任务轮询模式</strong></li>
  443. <li>Python 端:<code>/ner/extract/async</code> 提交异步任务</li>
  444. <li>Python 端:<code>/ner/task/{task_id}</code> 查询任务状态和进度</li>
  445. <li>Java 端:轮询模式(每 3 秒)查询进度,实时日志反馈</li>
  446. <li>最大等待时间 10 分钟,失败自动回退到同步 API</li>
  447. <li>✅ <strong>图数据库存储验证</strong></li>
  448. <li>实体成功存储到 <code>graph_nodes</code> 表(测试文档:188 个实体)</li>
  449. <li>关系成功存储到 <code>graph_relations</code> 表(测试文档:82 个关系)</li>
  450. <li>支持按文档 ID 导出 CSV/JSON 数据</li>
  451. <li>✅ <strong>Pydantic 序列化修复</strong></li>
  452. <li>修复 Python 模型 alias 序列化问题(snake_case → camelCase)</li>
  453. <li>确保 Java 端正确解析关系数据中的 <code>fromEntityId</code>、<code>toEntityId</code></li>
  454. </ul>
  455. <h3 id="2026-01-19">新增功能(2026-01-19)</h3>
  456. <ul>
  457. <li>✅ <strong>NER 服务完整实现</strong></li>
  458. <li>Python FastAPI NER 服务(规则模式,支持扩展 spaCy/Transformers/API)</li>
  459. <li>Java NER 客户端(PythonNerClient)</li>
  460. <li>NER DTO 类(NerRequest, NerResponse, EntityInfo, RelationInfo 等)</li>
  461. <li>NER API 接口(/api/ner/extract, /api/ner/document/{id})</li>
  462. <li>✅ <strong>关系抽取服务</strong></li>
  463. <li>基于规则的关系抽取(位置邻近性、语义模式匹配)</li>
  464. <li>关系抽取 API(/api/ner/relations)</li>
  465. <li>✅ <strong>图数据库服务扩展</strong></li>
  466. <li>GraphNodeService(节点/关系 CRUD、批量操作)</li>
  467. <li>图数据库 API(/api/graph/nodes, /api/graph/relations)</li>
  468. <li>文档节点统计接口</li>
  469. <li>✅ <strong>解析流程集成</strong></li>
  470. <li>文档解析完成事件(DocumentParsedEvent)</li>
  471. <li>NER 自动触发监听器</li>
  472. <li>解析 → RAG → NER → 图数据库完整链路</li>
  473. <li>NER 服务配置项(ner.python-service.url 等)</li>
  474. </ul>
  475. <h3 id="2026-01-17">已完成功能(2026-01-17)</h3>
  476. <ul>
  477. <li>✅ <strong>单体应用架构重构</strong> - 统一到 lingyue-starter 模块</li>
  478. <li>✅ <strong>配置文件统一</strong> - 全部使用 .properties 格式</li>
  479. <li>✅ <strong>接口测试完成</strong> - 核心接口验证通过</li>
  480. </ul>
  481. <h3 id="2026-01-16">已完成功能(2026-01-16)</h3>
  482. <ul>
  483. <li>✅ pgvector 向量数据库集成</li>
  484. <li>✅ 文本分块服务(智能句子边界切分)</li>
  485. <li>✅ Ollama Embedding 向量化服务</li>
  486. <li>✅ RAG 核心服务(索引、检索、问答)</li>
  487. </ul>
  488. <hr />
  489. <h2 id="_4">🎯 第一阶段完成情况</h2>
  490. <p>根据设计文档 <strong>6.2 第一阶段:核心数据流程(2月10日前)</strong></p>
  491. <h3 id="week-1-2-95">Week 1-2 任务完成率:95%</h3>
  492. <table>
  493. <thead>
  494. <tr>
  495. <th>任务</th>
  496. <th>状态</th>
  497. <th>说明</th>
  498. </tr>
  499. </thead>
  500. <tbody>
  501. <tr>
  502. <td>文件上传接口</td>
  503. <td>✅</td>
  504. <td>支持 PDF、Word、图片</td>
  505. </tr>
  506. <tr>
  507. <td>文件类型识别</td>
  508. <td>✅</td>
  509. <td>MIME 类型检测</td>
  510. </tr>
  511. <tr>
  512. <td>OCR 集成(飞浆OCR)</td>
  513. <td>✅</td>
  514. <td>Python 服务调用</td>
  515. </tr>
  516. <tr>
  517. <td>文本提取(Word文档)</td>
  518. <td>✅</td>
  519. <td>Apache POI</td>
  520. </tr>
  521. <tr>
  522. <td><strong>Word 结构化提取</strong></td>
  523. <td>✅</td>
  524. <td>段落、图片、表格、样式</td>
  525. </tr>
  526. <tr>
  527. <td>TXT 文件存储</td>
  528. <td>✅</td>
  529. <td>SSD 硬盘存储</td>
  530. </tr>
  531. <tr>
  532. <td><strong>图片资源提取存储</strong></td>
  533. <td>✅</td>
  534. <td>自动提取并保存到独立目录</td>
  535. </tr>
  536. <tr>
  537. <td>解析任务管理</td>
  538. <td>✅</td>
  539. <td>异步处理、状态跟踪</td>
  540. </tr>
  541. <tr>
  542. <td>NER 服务集成</td>
  543. <td>✅</td>
  544. <td>DeepSeek API 集成</td>
  545. </tr>
  546. <tr>
  547. <td>实体提取接口</td>
  548. <td>✅</td>
  549. <td>完整实体列表输出</td>
  550. </tr>
  551. <tr>
  552. <td>关系分析接口</td>
  553. <td>✅</td>
  554. <td>规则模式关系抽取</td>
  555. </tr>
  556. <tr>
  557. <td>实体存储到图节点表</td>
  558. <td>✅</td>
  559. <td>PostgreSQL 存储验证</td>
  560. </tr>
  561. <tr>
  562. <td>图节点 CRUD 接口</td>
  563. <td>✅</td>
  564. <td>完整 CRUD</td>
  565. </tr>
  566. <tr>
  567. <td>图关系 CRUD 接口</td>
  568. <td>✅</td>
  569. <td>完整 CRUD</td>
  570. </tr>
  571. <tr>
  572. <td>图查询接口</td>
  573. <td>✅</td>
  574. <td>按文档、按用户查询</td>
  575. </tr>
  576. <tr>
  577. <td><strong>Neo4j 集成</strong></td>
  578. <td>✅</td>
  579. <td>本地部署,双写同步</td>
  580. </tr>
  581. </tbody>
  582. </table>
  583. <h3 id="week-3-4">Week 3-4 任务完成情况:</h3>
  584. <table>
  585. <thead>
  586. <tr>
  587. <th>任务</th>
  588. <th>状态</th>
  589. <th>说明</th>
  590. </tr>
  591. </thead>
  592. <tbody>
  593. <tr>
  594. <td>数据源创建接口</td>
  595. <td>✅</td>
  596. <td><code>POST /api/v1/datasource</code></td>
  597. </tr>
  598. <tr>
  599. <td>数据源绑定节点</td>
  600. <td>✅</td>
  601. <td><code>PUT /api/v1/datasource/{id}/refs</code></td>
  602. </tr>
  603. <tr>
  604. <td>数据源查询接口</td>
  605. <td>✅</td>
  606. <td>按ID/文档/用户/类型查询</td>
  607. </tr>
  608. <tr>
  609. <td>数据源取值接口</td>
  610. <td>✅</td>
  611. <td><code>GET /api/v1/datasource/{id}/value</code></td>
  612. </tr>
  613. <tr>
  614. <td>批量取值接口</td>
  615. <td>✅</td>
  616. <td><code>POST /api/v1/datasource/batch-value</code></td>
  617. </tr>
  618. <tr>
  619. <td>任务中心进度跟踪</td>
  620. <td>✅</td>
  621. <td>6阶段进度 + 结果摘要</td>
  622. </tr>
  623. <tr>
  624. <td>端到端流程测试</td>
  625. <td>✅</td>
  626. <td>完整测试脚本</td>
  627. </tr>
  628. <tr>
  629. <td>模板创建接口</td>
  630. <td>⏳</td>
  631. <td>下一步重点</td>
  632. </tr>
  633. <tr>
  634. <td>占位符绑定数据源</td>
  635. <td>⏳</td>
  636. <td>下一步重点</td>
  637. </tr>
  638. <tr>
  639. <td>模板渲染接口</td>
  640. <td>⏳</td>
  641. <td>下一步重点</td>
  642. </tr>
  643. </tbody>
  644. </table>
  645. <hr />
  646. <h2 id="v20week-4-5">📋 下一步计划:模板系统 v2.0(Week 4-5)</h2>
  647. <p><strong>计划周期:2026-01-23 ~ 2026-01-31</strong></p>
  648. <p><strong>目标:完成「示例文档驱动」的模板生成系统</strong></p>
  649. <h3 id="1">1. 数据模型重构 ✅ 已完成</h3>
  650. <ul>
  651. <li>[x] 数据库表重构(templates, source_files, variables, generations)</li>
  652. <li>[x] 实体类创建(Template, SourceFile, Variable, Generation)</li>
  653. <li>[x] Repository 接口(4个)</li>
  654. <li>[x] all_tables.sql 完整表结构</li>
  655. <li>[x] rebuild_all.sh --simple 模式</li>
  656. </ul>
  657. <h3 id="2">2. 模板管理(下一步重点)</h3>
  658. <p><strong>核心用户流程:</strong>
  659. 1. 用户上传示例报告(Word 文档)
  660. 2. 在报告中选中文本,标记为"变量"
  661. 3. 为每个变量配置数据来源(从哪个来源文件提取)
  662. 4. 保存为模板</p>
  663. <p><strong>待实现 API:</strong></p>
  664. <table>
  665. <thead>
  666. <tr>
  667. <th>接口</th>
  668. <th>方法</th>
  669. <th>说明</th>
  670. <th>状态</th>
  671. </tr>
  672. </thead>
  673. <tbody>
  674. <tr>
  675. <td><code>/api/v1/templates</code></td>
  676. <td>POST</td>
  677. <td>创建模板</td>
  678. <td>⏳</td>
  679. </tr>
  680. <tr>
  681. <td><code>/api/v1/templates/{id}</code></td>
  682. <td>GET</td>
  683. <td>获取模板详情</td>
  684. <td>⏳</td>
  685. </tr>
  686. <tr>
  687. <td><code>/api/v1/templates</code></td>
  688. <td>GET</td>
  689. <td>模板列表</td>
  690. <td>⏳</td>
  691. </tr>
  692. <tr>
  693. <td><code>/api/v1/templates/{id}</code></td>
  694. <td>PUT</td>
  695. <td>更新模板</td>
  696. <td>⏳</td>
  697. </tr>
  698. <tr>
  699. <td><code>/api/v1/templates/{id}/publish</code></td>
  700. <td>POST</td>
  701. <td>发布模板</td>
  702. <td>⏳</td>
  703. </tr>
  704. <tr>
  705. <td><code>/api/v1/templates/{id}/source-files</code></td>
  706. <td>POST</td>
  707. <td>添加来源文件定义</td>
  708. <td>⏳</td>
  709. </tr>
  710. <tr>
  711. <td><code>/api/v1/templates/{id}/variables</code></td>
  712. <td>POST</td>
  713. <td>添加变量</td>
  714. <td>⏳</td>
  715. </tr>
  716. </tbody>
  717. </table>
  718. <h3 id="3">3. 报告生成</h3>
  719. <p><strong>核心用户流程:</strong>
  720. 1. 选择模板
  721. 2. 上传新的来源文件(按模板定义的别名)
  722. 3. 系统自动提取变量值
  723. 4. 用户确认/修改提取结果
  724. 5. 生成新报告</p>
  725. <p><strong>待实现 API:</strong></p>
  726. <table>
  727. <thead>
  728. <tr>
  729. <th>接口</th>
  730. <th>方法</th>
  731. <th>说明</th>
  732. <th>状态</th>
  733. </tr>
  734. </thead>
  735. <tbody>
  736. <tr>
  737. <td><code>/api/v1/generations</code></td>
  738. <td>POST</td>
  739. <td>创建生成任务</td>
  740. <td>⏳</td>
  741. </tr>
  742. <tr>
  743. <td><code>/api/v1/generations/{id}</code></td>
  744. <td>GET</td>
  745. <td>获取生成任务详情</td>
  746. <td>⏳</td>
  747. </tr>
  748. <tr>
  749. <td><code>/api/v1/generations/{id}/extract</code></td>
  750. <td>POST</td>
  751. <td>执行变量提取</td>
  752. <td>⏳</td>
  753. </tr>
  754. <tr>
  755. <td><code>/api/v1/generations/{id}/confirm</code></td>
  756. <td>POST</td>
  757. <td>确认提取结果</td>
  758. <td>⏳</td>
  759. </tr>
  760. <tr>
  761. <td><code>/api/v1/generations/{id}/generate</code></td>
  762. <td>POST</td>
  763. <td>生成最终报告</td>
  764. <td>⏳</td>
  765. </tr>
  766. </tbody>
  767. </table>
  768. <h3 id="4-api">4. 已实现的数据源 API(保留)</h3>
  769. <table>
  770. <thead>
  771. <tr>
  772. <th>接口</th>
  773. <th>方法</th>
  774. <th>说明</th>
  775. <th>状态</th>
  776. </tr>
  777. </thead>
  778. <tbody>
  779. <tr>
  780. <td><code>/api/v1/datasource</code></td>
  781. <td>POST</td>
  782. <td>创建数据源</td>
  783. <td>✅</td>
  784. </tr>
  785. <tr>
  786. <td><code>/api/v1/datasource/{id}</code></td>
  787. <td>GET</td>
  788. <td>获取数据源</td>
  789. <td>✅</td>
  790. </tr>
  791. <tr>
  792. <td><code>/api/v1/datasource/{id}/value</code></td>
  793. <td>GET</td>
  794. <td>获取数据源值</td>
  795. <td>✅</td>
  796. </tr>
  797. <tr>
  798. <td><code>/api/v1/datasource/batch-value</code></td>
  799. <td>POST</td>
  800. <td>批量获取值</td>
  801. <td>✅</td>
  802. </tr>
  803. </tbody>
  804. </table>
  805. <hr />
  806. <h2 id="_5">⚠️ 关键缺失(对照技术预研表)</h2>
  807. <table>
  808. <thead>
  809. <tr>
  810. <th>预研项</th>
  811. <th>进度</th>
  812. <th>已完成 ✅</th>
  813. <th>待实现 ❌</th>
  814. </tr>
  815. </thead>
  816. <tbody>
  817. <tr>
  818. <td><strong>1️⃣ 规则"智能体"设计</strong></td>
  819. <td>45%</td>
  820. <td>Graph Service 架构<br>规则、模板数据模型<br>RAG 问答服务<br><strong>NER 实体提取</strong><br><strong>Neo4j 图数据库</strong></td>
  821. <td>规则 DSL 定义与解析<br>规则执行引擎<br>多层计算算法</td>
  822. </tr>
  823. <tr>
  824. <td><strong>2️⃣ 产品定位与功能逻辑</strong></td>
  825. <td>45%</td>
  826. <td>6大后端服务框架<br><strong>前端交互原型</strong><br>基础组件<br><strong>结构化文档展示</strong></td>
  827. <td>所有前端页面 UI<br>智能体集群架构<br>规则校验功能</td>
  828. </tr>
  829. <tr>
  830. <td><strong>3️⃣ 规则智能体模拟</strong></td>
  831. <td>50%</td>
  832. <td>TextStorage(文本存储)<br>GraphNode、GraphRelation<br><strong>向量相似度检索</strong><br><strong>NER + 关系构建</strong><br><strong>文档结构化存储</strong></td>
  833. <td>规则逻辑树算法<br>单规则校验引擎<br>知识图谱构建算法</td>
  834. </tr>
  835. <tr>
  836. <td><strong>4️⃣ AI模态体型/OCR</strong></td>
  837. <td>70%</td>
  838. <td>PaddleOCR Client<br>PDF/Word/Excel 提取<br><strong>DeepSeek API</strong><br><strong>Ollama Embedding</strong><br><strong>Word 结构化提取</strong></td>
  839. <td>AI多模态模型接入<br>NSDK集成<br>OCR后处理优化</td>
  840. </tr>
  841. <tr>
  842. <td><strong>5️⃣ 前端交互设计</strong></td>
  843. <td>20%</td>
  844. <td><strong>前端交互原型 HTML</strong><br>文档编辑预览<br>实体高亮展示</td>
  845. <td>所有核心页面 UI<br>与后端 API 对接</td>
  846. </tr>
  847. </tbody>
  848. </table>
  849. <hr />
  850. <h2 id="_6">📁 项目结构</h2>
  851. <div class="codehilite"><pre><span></span><code><span class="nx">backend</span><span class="o">/</span>
  852. <span class="err">├──</span><span class="w"> </span><span class="nx">common</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">公共模块</span>
  853. <span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">mybatis</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">PostgreSqlJsonbTypeHandler</span>
  854. <span class="err">├──</span><span class="w"> </span><span class="nx">auth</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">认证服务</span>
  855. <span class="err">├──</span><span class="w"> </span><span class="nx">document</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">文档管理服务</span>
  856. <span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">entity</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">Document</span><span class="p">,</span><span class="w"> </span><span class="nx">DocumentElement</span><span class="p">,</span><span class="w"> </span><span class="nx">DocumentBlock</span>
  857. <span class="err">├──</span><span class="w"> </span><span class="nx">parse</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">解析服务</span>
  858. <span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">WordStructuredExtractionService</span>
  859. <span class="err">├──</span><span class="w"> </span><span class="nx">ai</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">AI服务</span>
  860. <span class="err">├──</span><span class="w"> </span><span class="nx">graph</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">图谱服务</span><span class="err">(</span><span class="nx">RAG</span><span class="err">、</span><span class="nx">NER</span><span class="err">、</span><span class="nx">图数据库</span><span class="err">)</span>
  861. <span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">neo4j</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">Neo4j</span><span class="w"> </span><span class="nx">集成服务</span>
  862. <span class="err">├──</span><span class="w"> </span><span class="nx">extract</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">模板系统服务</span><span class="err">(</span><span class="nx">v2</span><span class="m m-Double">.0</span><span class="err">)</span>
  863. <span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">entity</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">Template</span><span class="p">,</span><span class="w"> </span><span class="nx">SourceFile</span><span class="p">,</span><span class="w"> </span><span class="nx">Variable</span><span class="p">,</span><span class="w"> </span><span class="nx">Generation</span>
  864. <span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">repository</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="mi">4</span><span class="nx">个</span><span class="w"> </span><span class="nx">Repository</span>
  865. <span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">dto</span><span class="o">/</span><span class="nx">config</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">VariableLocation</span><span class="p">,</span><span class="w"> </span><span class="nx">ReferenceSourceConfig</span>
  866. <span class="err">├──</span><span class="w"> </span><span class="nx">notification</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">通知服务</span>
  867. <span class="err">├──</span><span class="w"> </span><span class="nx">gateway</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">网关服务</span>
  868. <span class="err">├──</span><span class="w"> </span><span class="nx">lingyue</span><span class="o">-</span><span class="nx">starter</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">单体应用启动器</span>
  869. <span class="err">└──</span><span class="w"> </span><span class="nx">sql</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">数据库脚本</span>
  870. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">rebuild_all</span><span class="p">.</span><span class="nx">sh</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">完整重建脚本</span><span class="err">(</span><span class="nx">支持</span><span class="w"> </span><span class="o">--</span><span class="nx">simple</span><span class="err">)</span>
  871. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">all_tables</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">完整表结构</span><span class="err">(</span><span class="mi">21</span><span class="nx">张表</span><span class="err">)</span>
  872. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">template_tables</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">模板系统表</span><span class="err">(</span><span class="nx">v2</span><span class="m m-Double">.0</span><span class="err">)</span>
  873. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">init</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">基础表</span>
  874. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">graph_tables</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">图谱表</span>
  875. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">supplement_tables</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">补充表</span>
  876. <span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">rag_tables_compatible</span><span class="p">.</span><span class="nx">sql</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">RAG</span><span class="w"> </span><span class="nx">表</span>
  877. <span class="nx">python</span><span class="o">-</span><span class="nx">services</span><span class="o">/</span>
  878. <span class="err">└──</span><span class="w"> </span><span class="nx">ner</span><span class="o">-</span><span class="nx">service</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">NER</span><span class="w"> </span><span class="nx">Python</span><span class="w"> </span><span class="nx">服务</span>
  879. <span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">app</span><span class="o">/</span>
  880. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">models</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">数据模型</span>
  881. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">routers</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">API</span><span class="w"> </span><span class="nx">路由</span>
  882. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">services</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">业务服务</span>
  883. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">ner_service</span><span class="p">.</span><span class="nx">py</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">NER</span><span class="w"> </span><span class="nx">调度</span>
  884. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">├──</span><span class="w"> </span><span class="nx">deepseek_service</span><span class="p">.</span><span class="nx">py</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">DeepSeek</span><span class="w"> </span><span class="nx">API</span>
  885. <span class="w"> </span><span class="err">│</span><span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">relation_service</span><span class="p">.</span><span class="nx">py</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">关系抽取</span>
  886. <span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">requirements</span><span class="p">.</span><span class="nx">txt</span>
  887. <span class="nx">frontend</span><span class="o">/</span>
  888. <span class="err">└──</span><span class="w"> </span><span class="nx">灵越智报_完整交互版</span><span class="p">.</span><span class="nx">html</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">前端交互原型</span>
  889. <span class="nx">database</span><span class="o">/</span>
  890. <span class="err">└──</span><span class="w"> </span><span class="nx">migrations</span><span class="o">/</span><span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="nx">数据库迁移脚本</span>
  891. <span class="w"> </span><span class="err">└──</span><span class="w"> </span><span class="nx">V2026_01_21_02__add_document_elements</span><span class="p">.</span><span class="nx">sql</span>
  892. </code></pre></div>
  893. <hr />
  894. <h2 id="_7">🔧 技术栈确认</h2>
  895. <table>
  896. <thead>
  897. <tr>
  898. <th>组件</th>
  899. <th>技术选型</th>
  900. <th>状态</th>
  901. </tr>
  902. </thead>
  903. <tbody>
  904. <tr>
  905. <td>后端框架</td>
  906. <td>Spring Boot 3.1.5</td>
  907. <td>✅</td>
  908. </tr>
  909. <tr>
  910. <td>数据库</td>
  911. <td>PostgreSQL + pgvector</td>
  912. <td>✅</td>
  913. </tr>
  914. <tr>
  915. <td>缓存</td>
  916. <td>Redis</td>
  917. <td>✅</td>
  918. </tr>
  919. <tr>
  920. <td>消息队列</td>
  921. <td>RabbitMQ</td>
  922. <td>✅</td>
  923. </tr>
  924. <tr>
  925. <td>NER 模型</td>
  926. <td>DeepSeek API(内测)</td>
  927. <td>✅</td>
  928. </tr>
  929. <tr>
  930. <td>Embedding</td>
  931. <td>Ollama nomic-embed-text</td>
  932. <td>✅</td>
  933. </tr>
  934. <tr>
  935. <td>OCR</td>
  936. <td>PaddleOCR</td>
  937. <td>✅</td>
  938. </tr>
  939. <tr>
  940. <td>图数据库</td>
  941. <td>PostgreSQL + <strong>Neo4j 4.4</strong></td>
  942. <td>✅</td>
  943. </tr>
  944. <tr>
  945. <td>文档解析</td>
  946. <td>Apache POI(Word 结构化)</td>
  947. <td>✅</td>
  948. </tr>
  949. <tr>
  950. <td>前端</td>
  951. <td>HTML 交互原型</td>
  952. <td>✅</td>
  953. </tr>
  954. <tr>
  955. <td>前端框架</td>
  956. <td>Flutter Web(待开发)</td>
  957. <td>⏳</td>
  958. </tr>
  959. </tbody>
  960. </table>
  961. <hr />
  962. <h2 id="_8">📅 里程碑</h2>
  963. <ul>
  964. <li><strong>M1(2月10日)</strong>:第一阶段完成,核心流程跑通</li>
  965. <li>✅ 文件上传解析</li>
  966. <li>✅ RAG 向量检索</li>
  967. <li>✅ NER 实体提取</li>
  968. <li>✅ 文档结构化解析(图片、表格)</li>
  969. <li>✅ Neo4j 图数据库集成</li>
  970. <li>✅ <strong>数据源管理</strong>(已完成)</li>
  971. <li>✅ <strong>一键上传全自动处理</strong>(已完成)</li>
  972. <li>✅ <strong>任务中心进度跟踪</strong>(已完成)</li>
  973. <li>⏳ 模板基础(下一步)</li>
  974. </ul>
  975. <hr />
  976. <h2 id="api">📋 API 接口清单</h2>
  977. <h3 id="auth-service">认证服务(auth-service)</h3>
  978. <table>
  979. <thead>
  980. <tr>
  981. <th>接口</th>
  982. <th>方法</th>
  983. <th>说明</th>
  984. <th>状态</th>
  985. </tr>
  986. </thead>
  987. <tbody>
  988. <tr>
  989. <td><code>/auth/register</code></td>
  990. <td>POST</td>
  991. <td>用户注册</td>
  992. <td>✅</td>
  993. </tr>
  994. <tr>
  995. <td><code>/auth/login</code></td>
  996. <td>POST</td>
  997. <td>用户登录</td>
  998. <td>✅</td>
  999. </tr>
  1000. <tr>
  1001. <td><code>/auth/logout</code></td>
  1002. <td>POST</td>
  1003. <td>用户登出</td>
  1004. <td>✅</td>
  1005. </tr>
  1006. <tr>
  1007. <td><code>/auth/refresh</code></td>
  1008. <td>POST</td>
  1009. <td>刷新Token</td>
  1010. <td>✅</td>
  1011. </tr>
  1012. <tr>
  1013. <td><code>/auth/me</code></td>
  1014. <td>GET</td>
  1015. <td>获取当前用户</td>
  1016. <td>✅</td>
  1017. </tr>
  1018. <tr>
  1019. <td><code>/auth/profile</code></td>
  1020. <td>PUT</td>
  1021. <td><strong>更新用户资料</strong></td>
  1022. <td>✅</td>
  1023. </tr>
  1024. <tr>
  1025. <td><code>/auth/password</code></td>
  1026. <td>PUT</td>
  1027. <td><strong>修改密码</strong></td>
  1028. <td>✅</td>
  1029. </tr>
  1030. </tbody>
  1031. </table>
  1032. <h3 id="document-service">文档管理服务(document-service)</h3>
  1033. <table>
  1034. <thead>
  1035. <tr>
  1036. <th>接口</th>
  1037. <th>方法</th>
  1038. <th>说明</th>
  1039. <th>状态</th>
  1040. </tr>
  1041. </thead>
  1042. <tbody>
  1043. <tr>
  1044. <td><code>/api/v1/documents</code></td>
  1045. <td>GET</td>
  1046. <td>文档列表(分页)</td>
  1047. <td>✅</td>
  1048. </tr>
  1049. <tr>
  1050. <td><code>/api/v1/documents/{id}</code></td>
  1051. <td>GET</td>
  1052. <td>文档详情</td>
  1053. <td>✅</td>
  1054. </tr>
  1055. <tr>
  1056. <td><code>/api/v1/documents/{id}</code></td>
  1057. <td>PUT</td>
  1058. <td><strong>更新文档</strong></td>
  1059. <td>✅</td>
  1060. </tr>
  1061. <tr>
  1062. <td><code>/api/v1/documents/{id}</code></td>
  1063. <td>DELETE</td>
  1064. <td><strong>级联删除</strong></td>
  1065. <td>✅</td>
  1066. </tr>
  1067. <tr>
  1068. <td><code>/api/v1/documents/batch-delete</code></td>
  1069. <td>POST</td>
  1070. <td><strong>批量删除</strong></td>
  1071. <td>✅</td>
  1072. </tr>
  1073. <tr>
  1074. <td><code>/api/v1/documents/{id}/text</code></td>
  1075. <td>GET</td>
  1076. <td>获取文档文本</td>
  1077. <td>✅</td>
  1078. </tr>
  1079. <tr>
  1080. <td><code>/api/v1/documents/{id}/parse-status</code></td>
  1081. <td>GET</td>
  1082. <td>解析状态</td>
  1083. <td>✅</td>
  1084. </tr>
  1085. <tr>
  1086. <td><code>/api/v1/documents/{id}/elements</code></td>
  1087. <td>GET</td>
  1088. <td>结构化元素</td>
  1089. <td>✅</td>
  1090. </tr>
  1091. <tr>
  1092. <td><code>/api/v1/documents/{id}/images</code></td>
  1093. <td>GET</td>
  1094. <td>图片列表</td>
  1095. <td>✅</td>
  1096. </tr>
  1097. <tr>
  1098. <td><code>/api/v1/documents/{id}/tables</code></td>
  1099. <td>GET</td>
  1100. <td>表格列表</td>
  1101. <td>✅</td>
  1102. </tr>
  1103. </tbody>
  1104. </table>
  1105. <h3 id="_9">文件上传(唯一入口)</h3>
  1106. <table>
  1107. <thead>
  1108. <tr>
  1109. <th>接口</th>
  1110. <th>方法</th>
  1111. <th>说明</th>
  1112. <th>状态</th>
  1113. </tr>
  1114. </thead>
  1115. <tbody>
  1116. <tr>
  1117. <td><code>/api/v1/parse/upload</code></td>
  1118. <td>POST</td>
  1119. <td><strong>一键上传,自动触发全流程</strong></td>
  1120. <td>✅</td>
  1121. </tr>
  1122. </tbody>
  1123. </table>
  1124. <h3 id="parse-service">文档解析服务(parse-service)</h3>
  1125. <table>
  1126. <thead>
  1127. <tr>
  1128. <th>接口</th>
  1129. <th>方法</th>
  1130. <th>说明</th>
  1131. <th>状态</th>
  1132. </tr>
  1133. </thead>
  1134. <tbody>
  1135. <tr>
  1136. <td><code>/parse/structured/{documentId}</code></td>
  1137. <td>GET</td>
  1138. <td>手动触发结构化解析</td>
  1139. <td>✅</td>
  1140. </tr>
  1141. <tr>
  1142. <td><code>/parse/elements/{documentId}</code></td>
  1143. <td>GET</td>
  1144. <td>获取所有结构化元素</td>
  1145. <td>✅</td>
  1146. </tr>
  1147. <tr>
  1148. <td><code>/parse/elements/{documentId}/images</code></td>
  1149. <td>GET</td>
  1150. <td>获取图片列表</td>
  1151. <td>✅</td>
  1152. </tr>
  1153. <tr>
  1154. <td><code>/parse/elements/{documentId}/tables</code></td>
  1155. <td>GET</td>
  1156. <td>获取表格列表</td>
  1157. <td>✅</td>
  1158. </tr>
  1159. <tr>
  1160. <td><code>/api/v1/files/images/{docId}/{filename}</code></td>
  1161. <td>GET</td>
  1162. <td>获取图片资源</td>
  1163. <td>✅</td>
  1164. </tr>
  1165. </tbody>
  1166. </table>
  1167. <h3 id="parse-service_1">任务中心(parse-service)</h3>
  1168. <table>
  1169. <thead>
  1170. <tr>
  1171. <th>接口</th>
  1172. <th>方法</th>
  1173. <th>说明</th>
  1174. <th>状态</th>
  1175. </tr>
  1176. </thead>
  1177. <tbody>
  1178. <tr>
  1179. <td><code>/api/v1/tasks/list</code></td>
  1180. <td>GET</td>
  1181. <td>获取任务列表</td>
  1182. <td>✅</td>
  1183. </tr>
  1184. <tr>
  1185. <td><code>/api/v1/tasks/{taskId}/detail</code></td>
  1186. <td>GET</td>
  1187. <td>获取任务详情</td>
  1188. <td>✅</td>
  1189. </tr>
  1190. <tr>
  1191. <td><code>/api/v1/tasks/by-document/{documentId}</code></td>
  1192. <td>GET</td>
  1193. <td>按文档ID查询任务</td>
  1194. <td>✅</td>
  1195. </tr>
  1196. <tr>
  1197. <td><code>/api/v1/tasks/statistics</code></td>
  1198. <td>GET</td>
  1199. <td>获取任务统计</td>
  1200. <td>✅</td>
  1201. </tr>
  1202. </tbody>
  1203. </table>
  1204. <h3 id="ner-ner-service">NER 服务(ner-service)</h3>
  1205. <table>
  1206. <thead>
  1207. <tr>
  1208. <th>接口</th>
  1209. <th>方法</th>
  1210. <th>说明</th>
  1211. <th>状态</th>
  1212. </tr>
  1213. </thead>
  1214. <tbody>
  1215. <tr>
  1216. <td><code>/api/ner/extract</code></td>
  1217. <td>POST</td>
  1218. <td>文本实体抽取</td>
  1219. <td>✅</td>
  1220. </tr>
  1221. <tr>
  1222. <td><code>/api/ner/document/{id}</code></td>
  1223. <td>POST</td>
  1224. <td>手动触发文档实体抽取</td>
  1225. <td>✅</td>
  1226. </tr>
  1227. <tr>
  1228. <td><code>/api/ner/relations</code></td>
  1229. <td>POST</td>
  1230. <td>关系抽取</td>
  1231. <td>✅</td>
  1232. </tr>
  1233. </tbody>
  1234. </table>
  1235. <h3 id="graph-service">图谱服务(graph-service)</h3>
  1236. <table>
  1237. <thead>
  1238. <tr>
  1239. <th>接口</th>
  1240. <th>方法</th>
  1241. <th>说明</th>
  1242. <th>状态</th>
  1243. </tr>
  1244. </thead>
  1245. <tbody>
  1246. <tr>
  1247. <td><code>/api/graph/nodes</code></td>
  1248. <td>POST</td>
  1249. <td>创建节点</td>
  1250. <td>✅</td>
  1251. </tr>
  1252. <tr>
  1253. <td><code>/api/graph/nodes/{nodeId}</code></td>
  1254. <td>GET/PUT/DELETE</td>
  1255. <td>节点 CRUD</td>
  1256. <td>✅</td>
  1257. </tr>
  1258. <tr>
  1259. <td><code>/api/graph/documents/{documentId}/nodes</code></td>
  1260. <td>GET</td>
  1261. <td>按文档查询节点</td>
  1262. <td>✅</td>
  1263. </tr>
  1264. <tr>
  1265. <td><code>/api/graph/relations</code></td>
  1266. <td>POST</td>
  1267. <td>创建关系</td>
  1268. <td>✅</td>
  1269. </tr>
  1270. </tbody>
  1271. </table>
  1272. <h3 id="graph-service_1">数据源服务(graph-service)</h3>
  1273. <table>
  1274. <thead>
  1275. <tr>
  1276. <th>接口</th>
  1277. <th>方法</th>
  1278. <th>说明</th>
  1279. <th>状态</th>
  1280. </tr>
  1281. </thead>
  1282. <tbody>
  1283. <tr>
  1284. <td><code>/api/v1/datasource</code></td>
  1285. <td>POST</td>
  1286. <td>创建数据源</td>
  1287. <td>✅</td>
  1288. </tr>
  1289. <tr>
  1290. <td><code>/api/v1/datasource/{id}</code></td>
  1291. <td>GET</td>
  1292. <td>获取数据源</td>
  1293. <td>✅</td>
  1294. </tr>
  1295. <tr>
  1296. <td><code>/api/v1/datasource/document/{documentId}</code></td>
  1297. <td>GET</td>
  1298. <td>按文档查询</td>
  1299. <td>✅</td>
  1300. </tr>
  1301. <tr>
  1302. <td><code>/api/v1/datasource/{id}/refs</code></td>
  1303. <td>PUT</td>
  1304. <td>更新绑定节点</td>
  1305. <td>✅</td>
  1306. </tr>
  1307. <tr>
  1308. <td><code>/api/v1/datasource/{id}/value</code></td>
  1309. <td>GET</td>
  1310. <td><strong>获取数据源值</strong></td>
  1311. <td>✅</td>
  1312. </tr>
  1313. <tr>
  1314. <td><code>/api/v1/datasource/batch-value</code></td>
  1315. <td>POST</td>
  1316. <td><strong>批量获取值</strong></td>
  1317. <td>✅</td>
  1318. </tr>
  1319. </tbody>
  1320. </table>
  1321. <h3 id="extract-service">模板系统(extract-service)</h3>
  1322. <table>
  1323. <thead>
  1324. <tr>
  1325. <th>接口</th>
  1326. <th>方法</th>
  1327. <th>说明</th>
  1328. <th>状态</th>
  1329. </tr>
  1330. </thead>
  1331. <tbody>
  1332. <tr>
  1333. <td><code>/api/v1/templates</code></td>
  1334. <td>POST</td>
  1335. <td>创建模板</td>
  1336. <td>✅</td>
  1337. </tr>
  1338. <tr>
  1339. <td><code>/api/v1/templates/{id}</code></td>
  1340. <td>GET</td>
  1341. <td>获取模板详情</td>
  1342. <td>✅</td>
  1343. </tr>
  1344. <tr>
  1345. <td><code>/api/v1/templates</code></td>
  1346. <td>GET</td>
  1347. <td>模板列表</td>
  1348. <td>✅</td>
  1349. </tr>
  1350. <tr>
  1351. <td><code>/api/v1/templates/{id}</code></td>
  1352. <td>PUT</td>
  1353. <td>更新模板</td>
  1354. <td>✅</td>
  1355. </tr>
  1356. <tr>
  1357. <td><code>/api/v1/templates/{id}</code></td>
  1358. <td>DELETE</td>
  1359. <td>删除模板</td>
  1360. <td>✅</td>
  1361. </tr>
  1362. <tr>
  1363. <td><code>/api/v1/templates/{id}/publish</code></td>
  1364. <td>POST</td>
  1365. <td>发布模板</td>
  1366. <td>✅</td>
  1367. </tr>
  1368. <tr>
  1369. <td><code>/api/v1/templates/{id}/archive</code></td>
  1370. <td>POST</td>
  1371. <td>归档模板</td>
  1372. <td>✅</td>
  1373. </tr>
  1374. <tr>
  1375. <td><code>/api/v1/templates/{id}/duplicate</code></td>
  1376. <td>POST</td>
  1377. <td>复制模板</td>
  1378. <td>✅</td>
  1379. </tr>
  1380. <tr>
  1381. <td><code>/api/v1/templates/{id}/source-files</code></td>
  1382. <td>POST</td>
  1383. <td>添加来源文件定义</td>
  1384. <td>✅</td>
  1385. </tr>
  1386. <tr>
  1387. <td><code>/api/v1/templates/{id}/source-files</code></td>
  1388. <td>GET</td>
  1389. <td>获取来源文件列表</td>
  1390. <td>✅</td>
  1391. </tr>
  1392. <tr>
  1393. <td><code>/api/v1/templates/{id}/variables</code></td>
  1394. <td>POST</td>
  1395. <td>添加变量</td>
  1396. <td>✅</td>
  1397. </tr>
  1398. <tr>
  1399. <td><code>/api/v1/templates/{id}/variables</code></td>
  1400. <td>GET</td>
  1401. <td>获取变量列表</td>
  1402. <td>✅</td>
  1403. </tr>
  1404. <tr>
  1405. <td><code>/api/v1/templates/{id}/variables/grouped</code></td>
  1406. <td>GET</td>
  1407. <td><strong>变量按类别分组</strong></td>
  1408. <td>✅</td>
  1409. </tr>
  1410. <tr>
  1411. <td><code>/api/v1/templates/variables/{id}/preview</code></td>
  1412. <td>POST</td>
  1413. <td>预览提取结果</td>
  1414. <td>✅</td>
  1415. </tr>
  1416. </tbody>
  1417. </table>
  1418. <h3 id="extract-service_1">生成任务(extract-service)</h3>
  1419. <table>
  1420. <thead>
  1421. <tr>
  1422. <th>接口</th>
  1423. <th>方法</th>
  1424. <th>说明</th>
  1425. <th>状态</th>
  1426. </tr>
  1427. </thead>
  1428. <tbody>
  1429. <tr>
  1430. <td><code>/api/v1/generations</code></td>
  1431. <td>POST</td>
  1432. <td>创建生成任务</td>
  1433. <td>✅</td>
  1434. </tr>
  1435. <tr>
  1436. <td><code>/api/v1/generations/{id}</code></td>
  1437. <td>GET</td>
  1438. <td>获取任务详情</td>
  1439. <td>✅</td>
  1440. </tr>
  1441. <tr>
  1442. <td><code>/api/v1/generations</code></td>
  1443. <td>GET</td>
  1444. <td>获取任务列表</td>
  1445. <td>✅</td>
  1446. </tr>
  1447. <tr>
  1448. <td><code>/api/v1/generations/{id}/execute</code></td>
  1449. <td>POST</td>
  1450. <td>执行变量提取</td>
  1451. <td>✅</td>
  1452. </tr>
  1453. <tr>
  1454. <td><code>/api/v1/generations/{id}/progress</code></td>
  1455. <td>GET</td>
  1456. <td>获取执行进度</td>
  1457. <td>✅</td>
  1458. </tr>
  1459. <tr>
  1460. <td><code>/api/v1/generations/{id}/variables/{varName}</code></td>
  1461. <td>PUT</td>
  1462. <td>修改变量值</td>
  1463. <td>✅</td>
  1464. </tr>
  1465. <tr>
  1466. <td><code>/api/v1/generations/{id}/confirm</code></td>
  1467. <td>POST</td>
  1468. <td>确认并生成文档</td>
  1469. <td>✅</td>
  1470. </tr>
  1471. <tr>
  1472. <td><code>/api/v1/generations/{id}/download</code></td>
  1473. <td>GET</td>
  1474. <td>下载生成文档</td>
  1475. <td>✅</td>
  1476. </tr>
  1477. </tbody>
  1478. </table>
  1479. <hr />
  1480. <h2 id="21">🗄️ 数据库表清单(21张)</h2>
  1481. <h3 id="_10">基础模块</h3>
  1482. <table>
  1483. <thead>
  1484. <tr>
  1485. <th>表名</th>
  1486. <th>说明</th>
  1487. <th>状态</th>
  1488. </tr>
  1489. </thead>
  1490. <tbody>
  1491. <tr>
  1492. <td><code>users</code></td>
  1493. <td>用户表</td>
  1494. <td>✅</td>
  1495. </tr>
  1496. <tr>
  1497. <td><code>documents</code></td>
  1498. <td>文档表</td>
  1499. <td>✅</td>
  1500. </tr>
  1501. <tr>
  1502. <td><code>elements</code></td>
  1503. <td>要素表</td>
  1504. <td>✅</td>
  1505. </tr>
  1506. <tr>
  1507. <td><code>annotations</code></td>
  1508. <td>批注表</td>
  1509. <td>✅</td>
  1510. </tr>
  1511. <tr>
  1512. <td><code>graphs</code></td>
  1513. <td>关系网络表</td>
  1514. <td>✅</td>
  1515. </tr>
  1516. <tr>
  1517. <td><code>parse_tasks</code></td>
  1518. <td>解析任务(多阶段进度)</td>
  1519. <td>✅</td>
  1520. </tr>
  1521. <tr>
  1522. <td><code>sessions</code></td>
  1523. <td>会话表</td>
  1524. <td>✅</td>
  1525. </tr>
  1526. </tbody>
  1527. </table>
  1528. <h3 id="_11">图谱模块</h3>
  1529. <table>
  1530. <thead>
  1531. <tr>
  1532. <th>表名</th>
  1533. <th>说明</th>
  1534. <th>状态</th>
  1535. </tr>
  1536. </thead>
  1537. <tbody>
  1538. <tr>
  1539. <td><code>graph_nodes</code></td>
  1540. <td>图节点(NER实体)</td>
  1541. <td>✅</td>
  1542. </tr>
  1543. <tr>
  1544. <td><code>graph_relations</code></td>
  1545. <td>图关系</td>
  1546. <td>✅</td>
  1547. </tr>
  1548. </tbody>
  1549. </table>
  1550. <h3 id="_12">补充模块</h3>
  1551. <table>
  1552. <thead>
  1553. <tr>
  1554. <th>表名</th>
  1555. <th>说明</th>
  1556. <th>状态</th>
  1557. </tr>
  1558. </thead>
  1559. <tbody>
  1560. <tr>
  1561. <td><code>rules</code></td>
  1562. <td>规则表</td>
  1563. <td>✅</td>
  1564. </tr>
  1565. <tr>
  1566. <td><code>data_sources</code></td>
  1567. <td>数据源表</td>
  1568. <td>✅</td>
  1569. </tr>
  1570. <tr>
  1571. <td><code>text_storage</code></td>
  1572. <td>文本存储</td>
  1573. <td>✅</td>
  1574. </tr>
  1575. </tbody>
  1576. </table>
  1577. <h3 id="rag">RAG 模块</h3>
  1578. <table>
  1579. <thead>
  1580. <tr>
  1581. <th>表名</th>
  1582. <th>说明</th>
  1583. <th>状态</th>
  1584. </tr>
  1585. </thead>
  1586. <tbody>
  1587. <tr>
  1588. <td><code>text_chunks</code></td>
  1589. <td>文本分块</td>
  1590. <td>✅</td>
  1591. </tr>
  1592. <tr>
  1593. <td><code>vector_embeddings</code></td>
  1594. <td>向量嵌入(pgvector)</td>
  1595. <td>✅</td>
  1596. </tr>
  1597. </tbody>
  1598. </table>
  1599. <h3 id="_13">文档结构化模块</h3>
  1600. <table>
  1601. <thead>
  1602. <tr>
  1603. <th>表名</th>
  1604. <th>说明</th>
  1605. <th>状态</th>
  1606. </tr>
  1607. </thead>
  1608. <tbody>
  1609. <tr>
  1610. <td><code>document_blocks</code></td>
  1611. <td>文档块(TextElement)</td>
  1612. <td>✅</td>
  1613. </tr>
  1614. <tr>
  1615. <td><code>document_entities</code></td>
  1616. <td>文档实体标注</td>
  1617. <td>✅</td>
  1618. </tr>
  1619. <tr>
  1620. <td><code>document_elements</code></td>
  1621. <td>文档结构化元素</td>
  1622. <td>✅</td>
  1623. </tr>
  1624. </tbody>
  1625. </table>
  1626. <h3 id="v20">模板系统 v2.0(新增)</h3>
  1627. <table>
  1628. <thead>
  1629. <tr>
  1630. <th>表名</th>
  1631. <th>说明</th>
  1632. <th>状态</th>
  1633. </tr>
  1634. </thead>
  1635. <tbody>
  1636. <tr>
  1637. <td><code>templates</code></td>
  1638. <td>报告模板</td>
  1639. <td>✅</td>
  1640. </tr>
  1641. <tr>
  1642. <td><code>source_files</code></td>
  1643. <td>来源文件定义</td>
  1644. <td>✅</td>
  1645. </tr>
  1646. <tr>
  1647. <td><code>variables</code></td>
  1648. <td>模板变量</td>
  1649. <td>✅</td>
  1650. </tr>
  1651. <tr>
  1652. <td><code>generations</code></td>
  1653. <td>生成任务</td>
  1654. <td>✅</td>
  1655. </tr>
  1656. </tbody>
  1657. </table>
  1658. </body>
  1659. </html>