1 сар өмнө · 8684762da5
--- a/backend/document-service/src/main/java/com/lingyue/document/dto/StructuredDocumentDTO.java
+++ b/backend/document-service/src/main/java/com/lingyue/document/dto/StructuredDocumentDTO.java
@@ -44,6 +44,9 @@ public class StructuredDocumentDTO {
 
															     @Schema(description = "内容块列表")
														
 
															     private List<BlockDTO> blocks;
														
 
															+    @Schema(description = "图片列表（从 document_elements 中提取）")
														
 
															+    private List<ImageDTO> images;
														
 
															+    
														
 
															     @Schema(description = "实体统计")
														
 
															     private EntityStats entityStats;
														
@@ -88,6 +91,35 @@ public class StructuredDocumentDTO {
 
															         private String markedHtml;
														
 
															     }
														
 
															+    /**
														
 
															+     * 图片 DTO
														
 
															+     */
														
 
															+    @Data
														
 
															+    @Builder
														
 
															+    @NoArgsConstructor
														
 
															+    @AllArgsConstructor
														
 
															+    @Schema(description = "图片信息")
														
 
															+    public static class ImageDTO {
														
 
															+        
														
 
															+        @Schema(description = "图片在文档中的顺序索引")
														
 
															+        private Integer index;
														
 
															+        
														
 
															+        @Schema(description = "图片访问 URL")
														
 
															+        private String url;
														
 
															+        
														
 
															+        @Schema(description = "图片描述/替代文本")
														
 
															+        private String alt;
														
 
															+        
														
 
															+        @Schema(description = "图片宽度（像素）")
														
 
															+        private Integer width;
														
 
															+        
														
 
															+        @Schema(description = "图片高度（像素）")
														
 
															+        private Integer height;
														
 
															+        
														
 
															+        @Schema(description = "图片格式")
														
 
															+        private String format;
														
 
															+    }
														
 
															+    
														
 
															     /**
														
 
															      * 实体统计
														
 
															      */
														
--- a/backend/document-service/src/main/java/com/lingyue/document/service/StructuredDocumentService.java
+++ b/backend/document-service/src/main/java/com/lingyue/document/service/StructuredDocumentService.java
@@ -6,6 +6,7 @@ import com.lingyue.document.dto.StructuredDocumentDTO.*;
 
															 import com.lingyue.document.entity.Document;
														
 
															 import com.lingyue.document.entity.DocumentBlock;
														
 
															 import com.lingyue.document.entity.DocumentBlock.TextElement;
														
 
															+import com.lingyue.document.entity.DocumentElement;
														
 
															 import com.lingyue.document.repository.DocumentBlockRepository;
														
 
															 import com.lingyue.document.repository.DocumentRepository;
														
 
															 import lombok.RequiredArgsConstructor;
														
@@ -34,6 +35,7 @@ public class StructuredDocumentService {
 
															     private final DocumentRepository documentRepository;
														
 
															     private final DocumentBlockRepository blockRepository;
														
 
															+    private final DocumentElementService documentElementService;
														
 
															     /**
														
 
															      * 获取结构化文档（用于编辑器渲染）
														
@@ -56,17 +58,39 @@ public class StructuredDocumentService {
 
															         // 4. 统计实体
														
 
															         EntityStats stats = buildEntityStats(blocks);
														
 
															+        // 5. 获取图片列表
														
 
															+        List<ImageDTO> images = buildImageList(documentId);
														
 
															+        
														
 
															         return StructuredDocumentDTO.builder()
														
 
															                 .documentId(documentId)
														
 
															                 .revision(1) // TODO: 实现版本控制
														
 
															                 .title(document.getName())
														
 
															                 .status(document.getStatus())
														
 
															                 .blocks(blockDTOs)
														
 
															+                .images(images)
														
 
															                 .entityStats(stats)
														
 
															                 .updatedAt(document.getUpdateTime())
														
 
															                 .build();
														
 
															     }
														
 
															+    /**
														
 
															+     * 构建图片列表（从 document_elements 表获取）
														
 
															+     */
														
 
															+    private List<ImageDTO> buildImageList(String documentId) {
														
 
															+        List<DocumentElement> imageElements = documentElementService.getImagesByDocumentId(documentId);
														
 
															+        
														
 
															+        return imageElements.stream()
														
 
															+                .map(el -> ImageDTO.builder()
														
 
															+                        .index(el.getElementIndex())
														
 
															+                        .url(el.getImageUrl())
														
 
															+                        .alt(el.getImageAlt())
														
 
															+                        .width(el.getImageWidth())
														
 
															+                        .height(el.getImageHeight())
														
 
															+                        .format(el.getImageFormat())
														
 
															+                        .build())
														
 
															+                .collect(Collectors.toList());
														
 
															+    }
														
 
															+    
														
 
															     /**
														
 
															      * 构建块 DTO
														
 
															      */
														
--- a/backend/parse-service/src/main/java/com/lingyue/parse/service/WordTextExtractionService.java
+++ b/backend/parse-service/src/main/java/com/lingyue/parse/service/WordTextExtractionService.java
@@ -4,13 +4,14 @@ import com.lingyue.common.exception.ServiceException;
 
															 import lombok.extern.slf4j.Slf4j;
														
 
															 import org.apache.poi.hwpf.HWPFDocument;
														
 
															 import org.apache.poi.hwpf.extractor.WordExtractor;
														
 
															-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
														
 
															 import org.apache.poi.xwpf.usermodel.XWPFDocument;
														
 
															+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
														
 
															 import org.springframework.stereotype.Service;
														
 
															 import java.io.File;
														
 
															 import java.io.FileInputStream;
														
 
															 import java.io.IOException;
														
 
															+import java.util.regex.Pattern;
														
 
															 /**
														
 
															  * Word文档文本提取服务
														
@@ -23,6 +24,14 @@ import java.io.IOException;
 
															 @Service
														
 
															 public class WordTextExtractionService {
														
 
															+    // 匹配 Word 批注格式: "Comment by xxx: ..." 或 "批注 [xxx]: ..."
														
 
															+    private static final Pattern COMMENT_PATTERN = Pattern.compile(
														
 
															+            "(?m)^\\s*Comment by [^:]+:.*$|" +          // English format
														
 
															+            "(?m)^\\s*批注\\s*\\[[^\\]]+\\]:.*$|" +     // Chinese format
														
 
															+            "\\[Comment by [^\\]]+\\]|" +                // Inline comment marker
														
 
															+            "\\[批注[^\\]]*\\]"                          // Inline Chinese comment
														
 
															+    );
														
 
															+    
														
 
															     /**
														
 
															      * 提取Word文档文本
														
 
															      * 
														
@@ -52,18 +61,34 @@ public class WordTextExtractionService {
 
															     }
														
 
															     /**
														
 
															-     * 从.docx文件提取文本
														
 
															+     * 从.docx文件提取文本（不包含批注）
														
 
															+     * 
														
 
															+     * 注意：XWPFWordExtractor.getText() 会包含批注内容，
														
 
															+     * 所以我们直接遍历段落提取文本，跳过批注。
														
 
															      */
														
 
															     private String extractFromDocx(String filePath) throws IOException {
														
 
															         log.info("提取.docx文件文本: {}", filePath);
														
 
															         try (FileInputStream fis = new FileInputStream(filePath);
														
 
															-             XWPFDocument document = new XWPFDocument(fis);
														
 
															-             XWPFWordExtractor extractor = new XWPFWordExtractor(document)) {
														
 
															+             XWPFDocument document = new XWPFDocument(fis)) {
														
 
															-            String text = extractor.getText();
														
 
															-            log.debug("提取到文本长度: {}", text != null ? text.length() : 0);
														
 
															-            return text != null ? text : "";
														
 
															+            StringBuilder textBuilder = new StringBuilder();
														
 
															+            
														
 
															+            // 遍历所有段落，只提取正文文本（不包含批注）
														
 
															+            for (XWPFParagraph paragraph : document.getParagraphs()) {
														
 
															+                String paragraphText = paragraph.getText();
														
 
															+                if (paragraphText != null && !paragraphText.isEmpty()) {
														
 
															+                    textBuilder.append(paragraphText).append("\n");
														
 
															+                }
														
 
															+            }
														
 
															+            
														
 
															+            String text = textBuilder.toString();
														
 
															+            
														
 
															+            // 过滤掉可能残留的批注文本
														
 
															+            text = removeCommentText(text);
														
 
															+            
														
 
															+            log.debug("提取到文本长度: {}", text.length());
														
 
															+            return text;
														
 
															         }
														
 
															     }
														
@@ -78,8 +103,34 @@ public class WordTextExtractionService {
 
															              WordExtractor extractor = new WordExtractor(document)) {
														
 
															             String text = extractor.getText();
														
 
															+            
														
 
															+            // 过滤批注
														
 
															+            if (text != null) {
														
 
															+                text = removeCommentText(text);
														
 
															+            }
														
 
															+            
														
 
															             log.debug("提取到文本长度: {}", text != null ? text.length() : 0);
														
 
															             return text != null ? text : "";
														
 
															         }
														
 
															     }
														
 
															+    
														
 
															+    /**
														
 
															+     * 移除文本中的批注内容
														
 
															+     */
														
 
															+    private String removeCommentText(String text) {
														
 
															+        if (text == null || text.isEmpty()) {
														
 
															+            return text;
														
 
															+        }
														
 
															+        
														
 
															+        String cleaned = COMMENT_PATTERN.matcher(text).replaceAll("");
														
 
															+        
														
 
															+        // 清理多余的空行
														
 
															+        cleaned = cleaned.replaceAll("\\n{3,}", "\n\n");
														
 
															+        
														
 
															+        if (cleaned.length() < text.length()) {
														
 
															+            log.debug("移除批注文本: 原长度={}, 新长度={}", text.length(), cleaned.length());
														
 
															+        }
														
 
															+        
														
 
															+        return cleaned.trim();
														
 
															+    }
														
 
															 }
														
--- a/frontend/vue-demo/src/views/Editor.vue
+++ b/frontend/vue-demo/src/views/Editor.vue
@@ -365,12 +365,9 @@ async function fetchTemplateData() {
 
															     if (baseDocumentId) {
														
 
															       try {
														
 
															         const structuredDoc = await documentApi.getStructured(baseDocumentId)
														
 
															-        // 将结构化文档的 blocks 转换为 HTML 内容
														
 
															+        // 将结构化文档的 blocks 和 images 合并渲染
														
 
															         if (structuredDoc && structuredDoc.blocks && structuredDoc.blocks.length > 0) {
														
 
															-          // 优先使用 markedHtml（带实体标注），其次使用 html
														
 
															-          documentContent.value = structuredDoc.blocks
														
 
															-            .map(block => block.markedHtml || block.html || block.plainText || '')
														
 
															-            .join('')
														
 
															+          documentContent.value = renderStructuredDocument(structuredDoc)
														
 
															         } else {
														
 
															           documentContent.value = emptyPlaceholder
														
 
															         }
														
@@ -427,6 +424,48 @@ const emptyPlaceholder = `
 
															   </div>
														
 
															 `
														
 
															+/**
														
 
															+ * 渲染结构化文档（合并 blocks 和 images）
														
 
															+ * 根据 index 排序，将图片插入到正确的位置
														
 
															+ */
														
 
															+function renderStructuredDocument(structuredDoc) {
														
 
															+  const blocks = structuredDoc.blocks || []
														
 
															+  const images = structuredDoc.images || []
														
 
															+  
														
 
															+  // 如果没有图片，直接渲染 blocks
														
 
															+  if (images.length === 0) {
														
 
															+    return blocks
														
 
															+      .map(block => block.markedHtml || block.html || block.plainText || '')
														
 
															+      .join('')
														
 
															+  }
														
 
															+  
														
 
															+  // 将 blocks 和 images 合并，按 index 排序
														
 
															+  const allElements = [
														
 
															+    ...blocks.map(block => ({
														
 
															+      type: 'block',
														
 
															+      index: block.index,
														
 
															+      html: block.markedHtml || block.html || block.plainText || ''
														
 
															+    })),
														
 
															+    ...images.map(img => ({
														
 
															+      type: 'image',
														
 
															+      index: img.index,
														
 
															+      html: `<div class="doc-image" style="text-align: center; margin: 16px 0;">
														
 
															+        <img src="${img.url}" alt="${img.alt || '图片'}" 
														
 
															+             style="max-width: 100%; height: auto;"
														
 
															+             ${img.width ? `width="${img.width}"` : ''}
														
 
															+             ${img.height ? `height="${img.height}"` : ''} />
														
 
															+        ${img.alt ? `<p class="image-caption" style="color: #666; font-size: 12px; margin-top: 8px;">${img.alt}</p>` : ''}
														
 
															+      </div>`
														
 
															+    }))
														
 
															+  ]
														
 
															+  
														
 
															+  // 按 index 排序
														
 
															+  allElements.sort((a, b) => (a.index || 0) - (b.index || 0))
														
 
															+  
														
 
															+  // 合并 HTML
														
 
															+  return allElements.map(el => el.html).join('')
														
 
															+}
														
 
															+
														
 
															 // 计算属性
														
 
															 const groupedVariables = computed(() => {
														
 
															   const groups = {}
														
@@ -464,9 +503,7 @@ async function handleRegenerateBlocks() {
 
															     // 重新加载文档内容
														
 
															     const structuredDoc = await documentApi.getStructured(baseDocumentId)
														
 
															     if (structuredDoc && structuredDoc.blocks && structuredDoc.blocks.length > 0) {
														
 
															-      documentContent.value = structuredDoc.blocks
														
 
															-        .map(block => block.markedHtml || block.html || block.plainText || '')
														
 
															-        .join('')
														
 
															+      documentContent.value = renderStructuredDocument(structuredDoc)
														
 
															     }
														
 
															   } catch (error) {
														
 
															     console.error('重新生成失败:', error)