| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517 |
- package com.lingyue.document.service;
- import com.lingyue.common.exception.ServiceException;
- import com.lingyue.document.dto.StructuredDocumentDTO;
- import com.lingyue.document.dto.StructuredDocumentDTO.*;
- import com.lingyue.document.entity.Document;
- import com.lingyue.document.entity.DocumentBlock;
- import com.lingyue.document.entity.DocumentBlock.TextElement;
- import com.lingyue.document.entity.DocumentElement;
- import com.lingyue.document.repository.DocumentBlockRepository;
- import com.lingyue.document.repository.DocumentRepository;
- import lombok.RequiredArgsConstructor;
- import lombok.extern.slf4j.Slf4j;
- import org.springframework.stereotype.Service;
- import org.springframework.transaction.annotation.Transactional;
- import java.util.*;
- import java.util.stream.Collectors;
- /**
- * 结构化文档服务(参考飞书设计)
- *
- * 核心设计:
- * - 文档由 Block 树组成,每个 Block 包含 elements 数组
- * - 实体作为 TextElement(type=entity)嵌入块中
- * - 编辑时修改 elements 数组,无需处理字符偏移
- *
- * @author lingyue
- * @since 2026-01-21
- */
- @Slf4j
- @Service
- @RequiredArgsConstructor
- public class StructuredDocumentService {
-
- private final DocumentRepository documentRepository;
- private final DocumentBlockRepository blockRepository;
- private final DocumentElementService documentElementService;
-
- /**
- * 获取结构化文档(用于编辑器渲染)
- */
- public StructuredDocumentDTO getStructuredDocument(String documentId) {
- // 1. 获取文档基本信息
- Document document = documentRepository.selectById(documentId);
- if (document == null) {
- return null;
- }
-
- // 2. 获取所有块
- List<DocumentBlock> blocks = blockRepository.findByDocumentId(documentId);
-
- // 3. 构建块 DTO 列表
- List<BlockDTO> blockDTOs = blocks.stream()
- .map(this::buildBlockDTO)
- .collect(Collectors.toList());
-
- // 4. 统计实体
- EntityStats stats = buildEntityStats(blocks);
-
- // 5. 获取图片列表
- List<ImageDTO> images = buildImageList(documentId);
-
- return StructuredDocumentDTO.builder()
- .documentId(documentId)
- .revision(1) // TODO: 实现版本控制
- .title(document.getName())
- .status(document.getStatus())
- .blocks(blockDTOs)
- .images(images)
- .entityStats(stats)
- .updatedAt(document.getUpdateTime())
- .build();
- }
-
- /**
- * 构建图片列表(从 document_elements 表获取)
- */
- private List<ImageDTO> buildImageList(String documentId) {
- List<DocumentElement> imageElements = documentElementService.getImagesByDocumentId(documentId);
-
- return imageElements.stream()
- .map(el -> ImageDTO.builder()
- .index(el.getElementIndex())
- .url(el.getImageUrl())
- .alt(el.getImageAlt())
- .width(el.getImageWidth())
- .height(el.getImageHeight())
- .format(el.getImageFormat())
- .build())
- .collect(Collectors.toList());
- }
-
- /**
- * 构建块 DTO
- */
- private BlockDTO buildBlockDTO(DocumentBlock block) {
- return BlockDTO.builder()
- .id(block.getId())
- .parentId(block.getParentId())
- .children(block.getChildren())
- .index(block.getBlockIndex())
- .type(block.getBlockType())
- .elements(block.getElements())
- .plainText(block.getPlainText())
- .html(block.toHtml())
- .markedHtml(block.toMarkedHtml())
- .build();
- }
-
- /**
- * 构建实体统计(从块的 elements 中提取)
- */
- private EntityStats buildEntityStats(List<DocumentBlock> blocks) {
- int total = 0;
- int confirmed = 0;
- Map<String, Integer> byType = new HashMap<>();
-
- for (DocumentBlock block : blocks) {
- if (block.getElements() == null) continue;
-
- for (TextElement el : block.getElements()) {
- if ("entity".equals(el.getType())) {
- total++;
- if (Boolean.TRUE.equals(el.getConfirmed())) {
- confirmed++;
- }
- String entityType = el.getEntityType();
- if (entityType != null) {
- byType.merge(entityType, 1, Integer::sum);
- }
- }
- }
- }
-
- return EntityStats.builder()
- .total(total)
- .confirmed(confirmed)
- .byType(byType)
- .build();
- }
-
- // ==================== 块操作 ====================
-
- /**
- * 更新块的 elements
- */
- @Transactional
- public void updateBlockElements(String blockId, List<TextElement> elements) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null) {
- throw new ServiceException("块不存在: " + blockId);
- }
-
- block.setElements(elements);
- block.setUpdateTime(new Date());
- blockRepository.updateById(block);
-
- log.info("更新块元素: blockId={}, elementCount={}", blockId, elements.size());
- }
-
- /**
- * 在块内添加实体(将文本片段转为实体元素)
- *
- * @param blockId 块ID
- * @param elementIndex 要转换的元素索引
- * @param startOffset 在该元素文本中的起始位置
- * @param endOffset 在该元素文本中的结束位置
- * @param entityType 实体类型
- * @return 新创建的实体ID
- */
- @Transactional
- public String markEntity(String blockId, int elementIndex, int startOffset, int endOffset, String entityType) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null || block.getElements() == null) {
- throw new ServiceException("块不存在或没有内容");
- }
-
- List<TextElement> elements = new ArrayList<>(block.getElements());
- if (elementIndex >= elements.size()) {
- throw new ServiceException("元素索引越界");
- }
-
- TextElement targetElement = elements.get(elementIndex);
- if (!"text_run".equals(targetElement.getType()) || targetElement.getContent() == null) {
- throw new ServiceException("只能在文本元素上标记实体");
- }
-
- String content = targetElement.getContent();
- if (startOffset < 0 || endOffset > content.length() || startOffset >= endOffset) {
- throw new ServiceException("偏移量无效");
- }
-
- String entityId = UUID.randomUUID().toString().replace("-", "");
- String entityText = content.substring(startOffset, endOffset);
-
- // 拆分元素:前段文本 + 实体 + 后段文本
- List<TextElement> newElements = new ArrayList<>();
-
- // 前段文本
- if (startOffset > 0) {
- TextElement before = new TextElement();
- before.setType("text_run");
- before.setContent(content.substring(0, startOffset));
- before.setStyle(targetElement.getStyle());
- newElements.add(before);
- }
-
- // 实体元素
- TextElement entity = new TextElement();
- entity.setType("entity");
- entity.setEntityId(entityId);
- entity.setEntityText(entityText);
- entity.setEntityType(entityType);
- entity.setConfirmed(true); // 手动标记的直接确认
- newElements.add(entity);
-
- // 后段文本
- if (endOffset < content.length()) {
- TextElement after = new TextElement();
- after.setType("text_run");
- after.setContent(content.substring(endOffset));
- after.setStyle(targetElement.getStyle());
- newElements.add(after);
- }
-
- // 替换原元素
- elements.remove(elementIndex);
- elements.addAll(elementIndex, newElements);
-
- block.setElements(elements);
- block.setUpdateTime(new Date());
- blockRepository.updateById(block);
-
- log.info("标记实体: blockId={}, entityId={}, text={}, type={}", blockId, entityId, entityText, entityType);
-
- return entityId;
- }
-
- /**
- * 删除实体标记(将实体元素还原为文本)
- */
- @Transactional
- public void unmarkEntity(String blockId, String entityId) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null || block.getElements() == null) {
- return;
- }
-
- List<TextElement> elements = new ArrayList<>(block.getElements());
-
- for (int i = 0; i < elements.size(); i++) {
- TextElement el = elements.get(i);
- if ("entity".equals(el.getType()) && entityId.equals(el.getEntityId())) {
- // 将实体还原为文本
- TextElement textEl = new TextElement();
- textEl.setType("text_run");
- textEl.setContent(el.getEntityText());
- elements.set(i, textEl);
- break;
- }
- }
-
- // 合并相邻的文本元素
- elements = mergeAdjacentTextRuns(elements);
-
- block.setElements(elements);
- block.setUpdateTime(new Date());
- blockRepository.updateById(block);
-
- log.info("取消实体标记: blockId={}, entityId={}", blockId, entityId);
- }
-
- /**
- * 更新实体类型
- */
- @Transactional
- public void updateEntityType(String blockId, String entityId, String newType) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null || block.getElements() == null) {
- return;
- }
-
- for (TextElement el : block.getElements()) {
- if ("entity".equals(el.getType()) && entityId.equals(el.getEntityId())) {
- el.setEntityType(newType);
- break;
- }
- }
-
- block.setUpdateTime(new Date());
- blockRepository.updateById(block);
- }
-
- /**
- * 确认实体
- */
- @Transactional
- public void confirmEntity(String blockId, String entityId) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null || block.getElements() == null) {
- return;
- }
-
- for (TextElement el : block.getElements()) {
- if ("entity".equals(el.getType()) && entityId.equals(el.getEntityId())) {
- el.setConfirmed(true);
- break;
- }
- }
-
- block.setUpdateTime(new Date());
- blockRepository.updateById(block);
- }
-
- /**
- * 合并相邻的文本元素
- */
- private List<TextElement> mergeAdjacentTextRuns(List<TextElement> elements) {
- if (elements.size() <= 1) {
- return elements;
- }
-
- List<TextElement> merged = new ArrayList<>();
- TextElement current = null;
-
- for (TextElement el : elements) {
- if ("text_run".equals(el.getType())) {
- if (current == null) {
- current = new TextElement();
- current.setType("text_run");
- current.setContent(el.getContent());
- current.setStyle(el.getStyle());
- } else {
- // 合并文本
- current.setContent(current.getContent() + el.getContent());
- }
- } else {
- if (current != null) {
- merged.add(current);
- current = null;
- }
- merged.add(el);
- }
- }
-
- if (current != null) {
- merged.add(current);
- }
-
- return merged;
- }
-
- // ==================== 块增删操作 ====================
-
- /**
- * 创建新块
- */
- @Transactional
- public DocumentBlock createBlock(String documentId, String parentId, int index,
- String blockType, List<TextElement> elements) {
- DocumentBlock block = new DocumentBlock();
- block.setId(UUID.randomUUID().toString().replace("-", ""));
- block.setDocumentId(documentId);
- block.setParentId(parentId);
- block.setBlockIndex(index);
- block.setBlockType(blockType);
- block.setElements(elements);
- block.setCreateTime(new Date());
- block.setUpdateTime(new Date());
-
- blockRepository.insert(block);
-
- // 更新父块的 children
- if (parentId != null) {
- DocumentBlock parent = blockRepository.selectById(parentId);
- if (parent != null) {
- List<String> children = parent.getChildren();
- if (children == null) {
- children = new ArrayList<>();
- }
- children.add(block.getId());
- parent.setChildren(children);
- blockRepository.updateById(parent);
- }
- }
-
- log.info("创建块: documentId={}, blockId={}, type={}", documentId, block.getId(), blockType);
- return block;
- }
-
- /**
- * 删除块
- */
- @Transactional
- public void deleteBlock(String blockId) {
- DocumentBlock block = blockRepository.selectById(blockId);
- if (block == null) {
- return;
- }
-
- // 递归删除子块
- if (block.getChildren() != null) {
- for (String childId : block.getChildren()) {
- deleteBlock(childId);
- }
- }
-
- // 从父块的 children 中移除
- if (block.getParentId() != null) {
- DocumentBlock parent = blockRepository.selectById(block.getParentId());
- if (parent != null && parent.getChildren() != null) {
- parent.getChildren().remove(block.getId());
- blockRepository.updateById(parent);
- }
- }
-
- blockRepository.deleteById(blockId);
- log.info("删除块: blockId={}", blockId);
- }
-
- // ==================== 批量操作 ====================
-
- /**
- * 批量保存文档块(用于 NER 完成后生成结构化文档)
- *
- * @param documentId 文档ID
- * @param blocks 块列表(来自 DocumentBlockGeneratorService)
- * @return 保存的块数量
- */
- @Transactional
- public int saveBlocksBatch(String documentId, List<Map<String, Object>> blocks) {
- if (blocks == null || blocks.isEmpty()) {
- log.warn("批量保存块: 块列表为空, documentId={}", documentId);
- return 0;
- }
-
- // 先删除该文档的旧块
- int deleted = blockRepository.deleteByDocumentId(documentId);
- if (deleted > 0) {
- log.info("删除旧块: documentId={}, count={}", documentId, deleted);
- }
-
- // 保存新块
- int savedCount = 0;
- for (Map<String, Object> blockMap : blocks) {
- try {
- DocumentBlock block = convertMapToBlock(blockMap);
- block.setDocumentId(documentId);
- block.setCreateTime(new Date());
- block.setUpdateTime(new Date());
-
- blockRepository.insert(block);
- savedCount++;
- } catch (Exception e) {
- log.error("保存块失败: documentId={}, block={}, error={}",
- documentId, blockMap, e.getMessage());
- }
- }
-
- log.info("批量保存块完成: documentId={}, savedCount={}", documentId, savedCount);
- return savedCount;
- }
-
- /**
- * 将 Map 转换为 DocumentBlock
- */
- @SuppressWarnings("unchecked")
- private DocumentBlock convertMapToBlock(Map<String, Object> blockMap) {
- DocumentBlock block = new DocumentBlock();
-
- block.setId((String) blockMap.get("blockId"));
- block.setDocumentId((String) blockMap.get("documentId"));
- block.setParentId((String) blockMap.get("parentId"));
- block.setBlockIndex(getIntValue(blockMap, "blockIndex", 0));
- block.setBlockType((String) blockMap.get("blockType"));
-
- // 转换 children
- Object childrenObj = blockMap.get("children");
- if (childrenObj instanceof List) {
- block.setChildren((List<String>) childrenObj);
- }
-
- // 转换 elements
- Object elementsObj = blockMap.get("elements");
- if (elementsObj instanceof List) {
- List<Map<String, Object>> elementMaps = (List<Map<String, Object>>) elementsObj;
- List<TextElement> elements = new ArrayList<>();
-
- for (Map<String, Object> elMap : elementMaps) {
- TextElement el = new TextElement();
- el.setType((String) elMap.get("type"));
- el.setContent((String) elMap.get("content"));
- el.setEntityId((String) elMap.get("entityId"));
- el.setEntityText((String) elMap.get("entityText"));
- el.setEntityType((String) elMap.get("entityType"));
- el.setConfirmed((Boolean) elMap.get("confirmed"));
- el.setUrl((String) elMap.get("url"));
- el.setRefDocId((String) elMap.get("refDocId"));
- el.setRefDocTitle((String) elMap.get("refDocTitle"));
- elements.add(el);
- }
-
- block.setElements(elements);
- }
-
- return block;
- }
-
- private int getIntValue(Map<String, Object> map, String key, int defaultValue) {
- Object value = map.get(key);
- if (value instanceof Number) {
- return ((Number) value).intValue();
- }
- return defaultValue;
- }
- }
|