Kaynağa Gözat

fix: 改进目录(TOC)渲染样式

后端改进:
- cleanTocTextWithPage() 分别返回标题和页码
- 目录项 style 中存储 tocPageNum 页码信息

前端改进:
1. 目录项结构改为三部分:
   - .toc-title: 章节标题
   - .toc-dots: 虚线连接符
   - .toc-page: 页码
2. 根据章节号层级自动计算缩进
3. CSS 样式优化:
   - flex 布局实现标题-虚线-页码
   - 虚线使用 border-bottom 实现
   - 悬停高亮效果
何文松 4 hafta önce
ebeveyn
işleme
9d425e82c4

+ 38 - 8
backend/parse-service/src/main/java/com/lingyue/parse/service/WordStructuredExtractionService.java

@@ -112,15 +112,21 @@ public class WordStructuredExtractionService {
                         String paragraphType = detectParagraphType(paragraph, text);
                         textElement.setType(paragraphType);
                         
-                        // 如果是目录项,清理域代码
+                        // 如果是目录项,清理域代码并提取页码
                         String cleanedText = text.trim();
+                        Map<String, Object> style = extractParagraphStyle(paragraph);
+                        
                         if ("toc_item".equals(paragraphType) || isTocEntry(cleanedText)) {
-                            cleanedText = cleanTocText(cleanedText);
+                            String[] tocParts = cleanTocTextWithPage(cleanedText);
+                            cleanedText = tocParts[0];
+                            if (!tocParts[1].isEmpty()) {
+                                style.put("tocPageNum", tocParts[1]);
+                            }
                             textElement.setType("toc_item");
                         }
                         
                         textElement.setContent(cleanedText);
-                        textElement.setStyle(extractParagraphStyle(paragraph));
+                        textElement.setStyle(style);
                         // 逐 Run 提取格式(目录项也提取,但文本会被清理)
                         textElement.setRuns(extractTextRunsWithClean(paragraph.getRuns(), "toc_item".equals(textElement.getType())));
                         
@@ -334,10 +340,10 @@ public class WordStructuredExtractionService {
     }
     
     /**
-     * 清理目录文本,移除域代码
+     * 清理目录文本,移除域代码,返回 [标题, 页码]
      */
-    private String cleanTocText(String text) {
-        if (text == null) return "";
+    private String[] cleanTocTextWithPage(String text) {
+        if (text == null) return new String[]{"", ""};
         
         String result = text;
         
@@ -350,9 +356,17 @@ public class WordStructuredExtractionService {
         result = result.replaceAll("HYPERLINK\\s+\\\\l\\s+\"[^\"]*\"\\s*", "");
         result = result.replaceAll("HYPERLINK\\s+\"[^\"]*\"\\s*", "");
         
+        // 提取页码(在 PAGEREF 后面)
+        String pageNum = "";
+        java.util.regex.Pattern pagePattern = java.util.regex.Pattern.compile("PAGEREF\\s+[^\\s]+\\s+\\\\h\\s*(\\d+)");
+        java.util.regex.Matcher pageMatcher = pagePattern.matcher(result);
+        if (pageMatcher.find()) {
+            pageNum = pageMatcher.group(1);
+        }
+        
         // 移除 PAGEREF 域代码
         // 格式: PAGEREF _Toc176869144 \h 1
-        result = result.replaceAll("PAGEREF\\s+[^\\s]+\\s+\\\\h\\s*", "");
+        result = result.replaceAll("PAGEREF\\s+[^\\s]+\\s+\\\\h\\s*\\d*", "");
         result = result.replaceAll("PAGEREF\\s+[^\\s]+\\s*", "");
         
         // 移除其他常见域代码标记
@@ -361,7 +375,23 @@ public class WordStructuredExtractionService {
         // 清理多余空格
         result = result.replaceAll("\\s+", " ").trim();
         
-        return result;
+        // 如果页码为空,尝试从末尾提取数字(有些目录格式是 "标题 页码")
+        if (pageNum.isEmpty() && !result.isEmpty()) {
+            java.util.regex.Matcher endNumMatcher = java.util.regex.Pattern.compile("\\s+(\\d+)$").matcher(result);
+            if (endNumMatcher.find()) {
+                pageNum = endNumMatcher.group(1);
+                result = result.substring(0, endNumMatcher.start()).trim();
+            }
+        }
+        
+        return new String[]{result, pageNum};
+    }
+    
+    /**
+     * 清理目录文本,移除域代码(简化版,只返回标题)
+     */
+    private String cleanTocText(String text) {
+        return cleanTocTextWithPage(text)[0];
     }
     
     /**

+ 31 - 9
frontend/vue-demo/src/views/Editor.vue

@@ -925,6 +925,19 @@ function wrapWithParagraphTag(content, type, style) {
   
   const styleAttr = styleAttrs.length > 0 ? ` style="${styleAttrs.join(';')}"` : ''
   
+  // 目录项特殊处理
+  if (type === 'toc_item') {
+    const pageNum = style?.tocPageNum || ''
+    // 计算缩进级别(根据章节号判断)
+    let level = 0
+    const levelMatch = content.match(/^(\d+(?:\.\d+)*)/)
+    if (levelMatch) {
+      level = (levelMatch[1].match(/\./g) || []).length
+    }
+    const indentStyle = level > 0 ? ` style="padding-left:${level * 20}px"` : ''
+    return `<div class="doc-toc-item"${indentStyle}><span class="toc-title">${content}</span><span class="toc-dots"></span><span class="toc-page">${pageNum}</span></div>`
+  }
+  
   switch (type) {
     case 'heading1':
       return `<h1${styleAttr}>${content}</h1>`
@@ -936,8 +949,6 @@ function wrapWithParagraphTag(content, type, style) {
       return `<h2${styleAttr}>${content}</h2>`
     case 'toc':
       return `<div class="doc-toc-title"${styleAttr}>${content}</div>`
-    case 'toc_item':
-      return `<div class="doc-toc-item"${styleAttr}>${content}</div>`
     case 'bullet':
     case 'list_item':
       return `<div class="doc-list-item bullet"${styleAttr}>${content}</div>`
@@ -1494,10 +1505,9 @@ onUnmounted(() => {
     
     :deep(.doc-toc-item) {
       display: flex;
-      justify-content: space-between;
       align-items: baseline;
-      padding: 4px 0;
-      border-bottom: 1px dotted #ccc;
+      padding: 6px 0;
+      line-height: 1.6;
       cursor: pointer;
       transition: background-color 0.2s;
       
@@ -1505,12 +1515,24 @@ onUnmounted(() => {
         background-color: #f5f5f5;
       }
       
-      // 页码样式(如果有的话)
-      &::after {
-        content: attr(data-page);
+      .toc-title {
+        flex-shrink: 0;
+        white-space: nowrap;
+      }
+      
+      .toc-dots {
+        flex: 1;
+        border-bottom: 1px dotted #999;
+        margin: 0 8px;
+        min-width: 20px;
+        height: 0.6em;
+      }
+      
+      .toc-page {
         flex-shrink: 0;
-        margin-left: 8px;
         color: #666;
+        min-width: 20px;
+        text-align: right;
       }
     }