1 ماه پیش · 8684762da5
--- a/backend/document-service/src/main/java/com/lingyue/document/dto/StructuredDocumentDTO.java
+++ b/backend/document-service/src/main/java/com/lingyue/document/dto/StructuredDocumentDTO.java
@@ -44,6 +44,9 @@ public class StructuredDocumentDTO {
 
				     @Schema(description = "内容块列表")
			
 
				     private List<BlockDTO> blocks;
			
 
				     
			
 
				+    @Schema(description = "图片列表（从 document_elements 中提取）")
			
 
				+    private List<ImageDTO> images;
			
 
				+    
			
 
				     @Schema(description = "实体统计")
			
 
				     private EntityStats entityStats;
			
 
				     
			
@@ -88,6 +91,35 @@ public class StructuredDocumentDTO {
 
				         private String markedHtml;
			
 
				     }
			
 
				     
			
 
				+    /**
			
 
				+     * 图片 DTO
			
 
				+     */
			
 
				+    @Data
			
 
				+    @Builder
			
 
				+    @NoArgsConstructor
			
 
				+    @AllArgsConstructor
			
 
				+    @Schema(description = "图片信息")
			
 
				+    public static class ImageDTO {
			
 
				+        
			
 
				+        @Schema(description = "图片在文档中的顺序索引")
			
 
				+        private Integer index;
			
 
				+        
			
 
				+        @Schema(description = "图片访问 URL")
			
 
				+        private String url;
			
 
				+        
			
 
				+        @Schema(description = "图片描述/替代文本")
			
 
				+        private String alt;
			
 
				+        
			
 
				+        @Schema(description = "图片宽度（像素）")
			
 
				+        private Integer width;
			
 
				+        
			
 
				+        @Schema(description = "图片高度（像素）")
			
 
				+        private Integer height;
			
 
				+        
			
 
				+        @Schema(description = "图片格式")
			
 
				+        private String format;
			
 
				+    }
			
 
				+    
			
 
				     /**
			
 
				      * 实体统计
			
 
				      */
			
--- a/backend/document-service/src/main/java/com/lingyue/document/service/StructuredDocumentService.java
+++ b/backend/document-service/src/main/java/com/lingyue/document/service/StructuredDocumentService.java
@@ -6,6 +6,7 @@ import com.lingyue.document.dto.StructuredDocumentDTO.*;
 
				 import com.lingyue.document.entity.Document;
			
 
				 import com.lingyue.document.entity.DocumentBlock;
			
 
				 import com.lingyue.document.entity.DocumentBlock.TextElement;
			
 
				+import com.lingyue.document.entity.DocumentElement;
			
 
				 import com.lingyue.document.repository.DocumentBlockRepository;
			
 
				 import com.lingyue.document.repository.DocumentRepository;
			
 
				 import lombok.RequiredArgsConstructor;
			
@@ -34,6 +35,7 @@ public class StructuredDocumentService {
 
				     
			
 
				     private final DocumentRepository documentRepository;
			
 
				     private final DocumentBlockRepository blockRepository;
			
 
				+    private final DocumentElementService documentElementService;
			
 
				     
			
 
				     /**
			
 
				      * 获取结构化文档（用于编辑器渲染）
			
@@ -56,17 +58,39 @@ public class StructuredDocumentService {
 
				         // 4. 统计实体
			
 
				         EntityStats stats = buildEntityStats(blocks);
			
 
				         
			
 
				+        // 5. 获取图片列表
			
 
				+        List<ImageDTO> images = buildImageList(documentId);
			
 
				+        
			
 
				         return StructuredDocumentDTO.builder()
			
 
				                 .documentId(documentId)
			
 
				                 .revision(1) // TODO: 实现版本控制
			
 
				                 .title(document.getName())
			
 
				                 .status(document.getStatus())
			
 
				                 .blocks(blockDTOs)
			
 
				+                .images(images)
			
 
				                 .entityStats(stats)
			
 
				                 .updatedAt(document.getUpdateTime())
			
 
				                 .build();
			
 
				     }
			
 
				     
			
 
				+    /**
			
 
				+     * 构建图片列表（从 document_elements 表获取）
			
 
				+     */
			
 
				+    private List<ImageDTO> buildImageList(String documentId) {
			
 
				+        List<DocumentElement> imageElements = documentElementService.getImagesByDocumentId(documentId);
			
 
				+        
			
 
				+        return imageElements.stream()
			
 
				+                .map(el -> ImageDTO.builder()
			
 
				+                        .index(el.getElementIndex())
			
 
				+                        .url(el.getImageUrl())
			
 
				+                        .alt(el.getImageAlt())
			
 
				+                        .width(el.getImageWidth())
			
 
				+                        .height(el.getImageHeight())
			
 
				+                        .format(el.getImageFormat())
			
 
				+                        .build())
			
 
				+                .collect(Collectors.toList());
			
 
				+    }
			
 
				+    
			
 
				     /**
			
 
				      * 构建块 DTO
			
 
				      */
			
--- a/backend/parse-service/src/main/java/com/lingyue/parse/service/WordTextExtractionService.java
+++ b/backend/parse-service/src/main/java/com/lingyue/parse/service/WordTextExtractionService.java
@@ -4,13 +4,14 @@ import com.lingyue.common.exception.ServiceException;
 
				 import lombok.extern.slf4j.Slf4j;
			
 
				 import org.apache.poi.hwpf.HWPFDocument;
			
 
				 import org.apache.poi.hwpf.extractor.WordExtractor;
			
 
				-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
			
 
				 import org.apache.poi.xwpf.usermodel.XWPFDocument;
			
 
				+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
			
 
				 import org.springframework.stereotype.Service;
			
 
				 
			
 
				 import java.io.File;
			
 
				 import java.io.FileInputStream;
			
 
				 import java.io.IOException;
			
 
				+import java.util.regex.Pattern;
			
 
				 
			
 
				 /**
			
 
				  * Word文档文本提取服务
			
@@ -23,6 +24,14 @@ import java.io.IOException;
 
				 @Service
			
 
				 public class WordTextExtractionService {
			
 
				     
			
 
				+    // 匹配 Word 批注格式: "Comment by xxx: ..." 或 "批注 [xxx]: ..."
			
 
				+    private static final Pattern COMMENT_PATTERN = Pattern.compile(
			
 
				+            "(?m)^\\s*Comment by [^:]+:.*$|" +          // English format
			
 
				+            "(?m)^\\s*批注\\s*\\[[^\\]]+\\]:.*$|" +     // Chinese format
			
 
				+            "\\[Comment by [^\\]]+\\]|" +                // Inline comment marker
			
 
				+            "\\[批注[^\\]]*\\]"                          // Inline Chinese comment
			
 
				+    );
			
 
				+    
			
 
				     /**
			
 
				      * 提取Word文档文本
			
 
				      * 
			
@@ -52,18 +61,34 @@ public class WordTextExtractionService {
 
				     }
			
 
				     
			
 
				     /**
			
 
				-     * 从.docx文件提取文本
			
 
				+     * 从.docx文件提取文本（不包含批注）
			
 
				+     * 
			
 
				+     * 注意：XWPFWordExtractor.getText() 会包含批注内容，
			
 
				+     * 所以我们直接遍历段落提取文本，跳过批注。
			
 
				      */
			
 
				     private String extractFromDocx(String filePath) throws IOException {
			
 
				         log.info("提取.docx文件文本: {}", filePath);
			
 
				         
			
 
				         try (FileInputStream fis = new FileInputStream(filePath);
			
 
				-             XWPFDocument document = new XWPFDocument(fis);
			
 
				-             XWPFWordExtractor extractor = new XWPFWordExtractor(document)) {
			
 
				+             XWPFDocument document = new XWPFDocument(fis)) {
			
 
				             
			
 
				-            String text = extractor.getText();
			
 
				-            log.debug("提取到文本长度: {}", text != null ? text.length() : 0);
			
 
				-            return text != null ? text : "";
			
 
				+            StringBuilder textBuilder = new StringBuilder();
			
 
				+            
			
 
				+            // 遍历所有段落，只提取正文文本（不包含批注）
			
 
				+            for (XWPFParagraph paragraph : document.getParagraphs()) {
			
 
				+                String paragraphText = paragraph.getText();
			
 
				+                if (paragraphText != null && !paragraphText.isEmpty()) {
			
 
				+                    textBuilder.append(paragraphText).append("\n");
			
 
				+                }
			
 
				+            }
			
 
				+            
			
 
				+            String text = textBuilder.toString();
			
 
				+            
			
 
				+            // 过滤掉可能残留的批注文本
			
 
				+            text = removeCommentText(text);
			
 
				+            
			
 
				+            log.debug("提取到文本长度: {}", text.length());
			
 
				+            return text;
			
 
				         }
			
 
				     }
			
 
				     
			
@@ -78,8 +103,34 @@ public class WordTextExtractionService {
 
				              WordExtractor extractor = new WordExtractor(document)) {
			
 
				             
			
 
				             String text = extractor.getText();
			
 
				+            
			
 
				+            // 过滤批注
			
 
				+            if (text != null) {
			
 
				+                text = removeCommentText(text);
			
 
				+            }
			
 
				+            
			
 
				             log.debug("提取到文本长度: {}", text != null ? text.length() : 0);
			
 
				             return text != null ? text : "";
			
 
				         }
			
 
				     }
			
 
				+    
			
 
				+    /**
			
 
				+     * 移除文本中的批注内容
			
 
				+     */
			
 
				+    private String removeCommentText(String text) {
			
 
				+        if (text == null || text.isEmpty()) {
			
 
				+            return text;
			
 
				+        }
			
 
				+        
			
 
				+        String cleaned = COMMENT_PATTERN.matcher(text).replaceAll("");
			
 
				+        
			
 
				+        // 清理多余的空行
			
 
				+        cleaned = cleaned.replaceAll("\\n{3,}", "\n\n");
			
 
				+        
			
 
				+        if (cleaned.length() < text.length()) {
			
 
				+            log.debug("移除批注文本: 原长度={}, 新长度={}", text.length(), cleaned.length());
			
 
				+        }
			
 
				+        
			
 
				+        return cleaned.trim();
			
 
				+    }
			
 
				 }
			
--- a/frontend/vue-demo/src/views/Editor.vue
+++ b/frontend/vue-demo/src/views/Editor.vue
@@ -365,12 +365,9 @@ async function fetchTemplateData() {
 
				     if (baseDocumentId) {
			
 
				       try {
			
 
				         const structuredDoc = await documentApi.getStructured(baseDocumentId)
			
 
				-        // 将结构化文档的 blocks 转换为 HTML 内容
			
 
				+        // 将结构化文档的 blocks 和 images 合并渲染
			
 
				         if (structuredDoc && structuredDoc.blocks && structuredDoc.blocks.length > 0) {
			
 
				-          // 优先使用 markedHtml（带实体标注），其次使用 html
			
 
				-          documentContent.value = structuredDoc.blocks
			
 
				-            .map(block => block.markedHtml || block.html || block.plainText || '')
			
 
				-            .join('')
			
 
				+          documentContent.value = renderStructuredDocument(structuredDoc)
			
 
				         } else {
			
 
				           documentContent.value = emptyPlaceholder
			
 
				         }
			
@@ -427,6 +424,48 @@ const emptyPlaceholder = `
 
				   </div>
			
 
				 `
			
 
				 
			
 
				+/**
			
 
				+ * 渲染结构化文档（合并 blocks 和 images）
			
 
				+ * 根据 index 排序，将图片插入到正确的位置
			
 
				+ */
			
 
				+function renderStructuredDocument(structuredDoc) {
			
 
				+  const blocks = structuredDoc.blocks || []
			
 
				+  const images = structuredDoc.images || []
			
 
				+  
			
 
				+  // 如果没有图片，直接渲染 blocks
			
 
				+  if (images.length === 0) {
			
 
				+    return blocks
			
 
				+      .map(block => block.markedHtml || block.html || block.plainText || '')
			
 
				+      .join('')
			
 
				+  }
			
 
				+  
			
 
				+  // 将 blocks 和 images 合并，按 index 排序
			
 
				+  const allElements = [
			
 
				+    ...blocks.map(block => ({
			
 
				+      type: 'block',
			
 
				+      index: block.index,
			
 
				+      html: block.markedHtml || block.html || block.plainText || ''
			
 
				+    })),
			
 
				+    ...images.map(img => ({
			
 
				+      type: 'image',
			
 
				+      index: img.index,
			
 
				+      html: `<div class="doc-image" style="text-align: center; margin: 16px 0;">
			
 
				+        <img src="${img.url}" alt="${img.alt || '图片'}" 
			
 
				+             style="max-width: 100%; height: auto;"
			
 
				+             ${img.width ? `width="${img.width}"` : ''}
			
 
				+             ${img.height ? `height="${img.height}"` : ''} />
			
 
				+        ${img.alt ? `<p class="image-caption" style="color: #666; font-size: 12px; margin-top: 8px;">${img.alt}</p>` : ''}
			
 
				+      </div>`
			
 
				+    }))
			
 
				+  ]
			
 
				+  
			
 
				+  // 按 index 排序
			
 
				+  allElements.sort((a, b) => (a.index || 0) - (b.index || 0))
			
 
				+  
			
 
				+  // 合并 HTML
			
 
				+  return allElements.map(el => el.html).join('')
			
 
				+}
			
 
				+
			
 
				 // 计算属性
			
 
				 const groupedVariables = computed(() => {
			
 
				   const groups = {}
			
@@ -464,9 +503,7 @@ async function handleRegenerateBlocks() {
 
				     // 重新加载文档内容
			
 
				     const structuredDoc = await documentApi.getStructured(baseDocumentId)
			
 
				     if (structuredDoc && structuredDoc.blocks && structuredDoc.blocks.length > 0) {
			
 
				-      documentContent.value = structuredDoc.blocks
			
 
				-        .map(block => block.markedHtml || block.html || block.plainText || '')
			
 
				-        .join('')
			
 
				+      documentContent.value = renderStructuredDocument(structuredDoc)
			
 
				     }
			
 
				   } catch (error) {
			
 
				     console.error('重新生成失败:', error)