|
|
@@ -3,17 +3,27 @@ package com.lingyue.document.service;
|
|
|
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
|
|
import com.baomidou.mybatisplus.core.metadata.IPage;
|
|
|
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
|
|
+import com.lingyue.common.exception.ServiceException;
|
|
|
import com.lingyue.document.entity.Document;
|
|
|
+import com.lingyue.document.repository.DocumentElementRepository;
|
|
|
import com.lingyue.document.repository.DocumentRepository;
|
|
|
import lombok.RequiredArgsConstructor;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.jdbc.core.JdbcTemplate;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.transaction.annotation.Transactional;
|
|
|
import org.springframework.util.StringUtils;
|
|
|
|
|
|
+import java.io.IOException;
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
+import java.nio.file.FileVisitResult;
|
|
|
import java.nio.file.Files;
|
|
|
import java.nio.file.Path;
|
|
|
+import java.nio.file.SimpleFileVisitor;
|
|
|
+import java.nio.file.attribute.BasicFileAttributes;
|
|
|
+import java.util.Date;
|
|
|
|
|
|
/**
|
|
|
* 文档服务
|
|
|
@@ -27,10 +37,17 @@ import java.nio.file.Path;
|
|
|
public class DocumentService {
|
|
|
|
|
|
private final DocumentRepository documentRepository;
|
|
|
+ private final DocumentElementRepository documentElementRepository;
|
|
|
+
|
|
|
+ @Autowired(required = false)
|
|
|
+ private JdbcTemplate jdbcTemplate;
|
|
|
|
|
|
@Value("${file.storage.text-path:/data/lingyue/texts}")
|
|
|
private String textStoragePath;
|
|
|
|
|
|
+ @Value("${file.storage.base-path:/data/lingyue/files}")
|
|
|
+ private String fileStoragePath;
|
|
|
+
|
|
|
/**
|
|
|
* 根据ID获取文档
|
|
|
*/
|
|
|
@@ -121,12 +138,249 @@ public class DocumentService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 删除文档
|
|
|
+ * 更新文档信息
|
|
|
+ *
|
|
|
+ * @param documentId 文档ID
|
|
|
+ * @param name 文档名称(可选)
|
|
|
+ * @param status 状态(可选)
|
|
|
+ * @param metadata 元数据(可选)
|
|
|
+ * @return 更新后的文档
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public Document updateDocument(String documentId, String name, String status, Object metadata) {
|
|
|
+ Document document = documentRepository.selectById(documentId);
|
|
|
+ if (document == null) {
|
|
|
+ throw new ServiceException("文档不存在: " + documentId, 404);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (StringUtils.hasText(name)) {
|
|
|
+ document.setName(name);
|
|
|
+ }
|
|
|
+ if (StringUtils.hasText(status)) {
|
|
|
+ document.setStatus(status);
|
|
|
+ }
|
|
|
+ if (metadata != null) {
|
|
|
+ document.setMetadata(metadata);
|
|
|
+ }
|
|
|
+
|
|
|
+ document.setUpdateTime(new Date());
|
|
|
+ documentRepository.updateById(document);
|
|
|
+
|
|
|
+ log.info("更新文档: documentId={}, name={}", documentId, name);
|
|
|
+ return document;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除文档(简单删除,不级联)
|
|
|
+ * @deprecated 使用 {@link #deleteDocumentCascade(String)} 代替
|
|
|
*/
|
|
|
+ @Deprecated
|
|
|
public void deleteDocument(String documentId) {
|
|
|
documentRepository.deleteById(documentId);
|
|
|
log.info("删除文档: documentId={}", documentId);
|
|
|
- // TODO: 同时删除关联的文本文件、图节点等
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 级联删除文档及其所有关联数据
|
|
|
+ *
|
|
|
+ * 删除顺序(遵循外键约束):
|
|
|
+ * 1. vector_embeddings (通过 chunk_id 关联)
|
|
|
+ * 2. text_chunks (document_id)
|
|
|
+ * 3. graph_relations (通过 node_id 关联)
|
|
|
+ * 4. graph_nodes (document_id)
|
|
|
+ * 5. document_elements (document_id)
|
|
|
+ * 6. parse_tasks (document_id)
|
|
|
+ * 7. documents (主表)
|
|
|
+ * 8. 文本文件
|
|
|
+ * 9. 图片目录
|
|
|
+ *
|
|
|
+ * @param documentId 文档ID
|
|
|
+ */
|
|
|
+ @Transactional
|
|
|
+ public void deleteDocumentCascade(String documentId) {
|
|
|
+ Document document = documentRepository.selectById(documentId);
|
|
|
+ if (document == null) {
|
|
|
+ throw new ServiceException("文档不存在: " + documentId, 404);
|
|
|
+ }
|
|
|
+
|
|
|
+ String userId = document.getUserId();
|
|
|
+ log.info("开始级联删除文档: documentId={}, userId={}", documentId, userId);
|
|
|
+
|
|
|
+ // 1. 删除向量嵌入(通过 text_chunks 关联)
|
|
|
+ int vectorCount = deleteVectorEmbeddingsByDocumentId(documentId);
|
|
|
+ log.debug("删除向量嵌入: count={}", vectorCount);
|
|
|
+
|
|
|
+ // 2. 删除文本分块
|
|
|
+ int chunkCount = deleteTextChunksByDocumentId(documentId);
|
|
|
+ log.debug("删除文本分块: count={}", chunkCount);
|
|
|
+
|
|
|
+ // 3. 删除图关系(通过 graph_nodes 关联)
|
|
|
+ int relationCount = deleteGraphRelationsByDocumentId(documentId);
|
|
|
+ log.debug("删除图关系: count={}", relationCount);
|
|
|
+
|
|
|
+ // 4. 删除图节点
|
|
|
+ int nodeCount = deleteGraphNodesByDocumentId(documentId);
|
|
|
+ log.debug("删除图节点: count={}", nodeCount);
|
|
|
+
|
|
|
+ // 5. 删除结构化元素
|
|
|
+ int elementCount = documentElementRepository.deleteByDocumentId(documentId);
|
|
|
+ log.debug("删除结构化元素: count={}", elementCount);
|
|
|
+
|
|
|
+ // 6. 删除解析任务
|
|
|
+ int taskCount = deleteParseTasksByDocumentId(documentId);
|
|
|
+ log.debug("删除解析任务: count={}", taskCount);
|
|
|
+
|
|
|
+ // 7. 删除文档记录
|
|
|
+ documentRepository.deleteById(documentId);
|
|
|
+ log.debug("删除文档记录");
|
|
|
+
|
|
|
+ // 8. 删除文本文件(不影响事务)
|
|
|
+ deleteTextFile(documentId);
|
|
|
+
|
|
|
+ // 9. 删除图片目录(不影响事务)
|
|
|
+ deleteImageDirectory(userId, documentId);
|
|
|
+
|
|
|
+ log.info("级联删除文档完成: documentId={}, 删除向量={}, 分块={}, 关系={}, 节点={}, 元素={}, 任务={}",
|
|
|
+ documentId, vectorCount, chunkCount, relationCount, nodeCount, elementCount, taskCount);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除向量嵌入
|
|
|
+ */
|
|
|
+ private int deleteVectorEmbeddingsByDocumentId(String documentId) {
|
|
|
+ if (jdbcTemplate == null) {
|
|
|
+ log.warn("JdbcTemplate 未注入,跳过向量嵌入删除");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ String sql = """
|
|
|
+ DELETE FROM vector_embeddings
|
|
|
+ WHERE chunk_id IN (
|
|
|
+ SELECT id FROM text_chunks WHERE document_id = ?
|
|
|
+ )
|
|
|
+ """;
|
|
|
+ return jdbcTemplate.update(sql, documentId);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除向量嵌入失败: {}", e.getMessage());
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除文本分块
|
|
|
+ */
|
|
|
+ private int deleteTextChunksByDocumentId(String documentId) {
|
|
|
+ if (jdbcTemplate == null) {
|
|
|
+ log.warn("JdbcTemplate 未注入,跳过文本分块删除");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return jdbcTemplate.update("DELETE FROM text_chunks WHERE document_id = ?", documentId);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除文本分块失败: {}", e.getMessage());
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除图关系(通过节点关联)
|
|
|
+ */
|
|
|
+ private int deleteGraphRelationsByDocumentId(String documentId) {
|
|
|
+ if (jdbcTemplate == null) {
|
|
|
+ log.warn("JdbcTemplate 未注入,跳过图关系删除");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ String sql = """
|
|
|
+ DELETE FROM graph_relations
|
|
|
+ WHERE from_node_id IN (
|
|
|
+ SELECT id FROM graph_nodes WHERE document_id = ?
|
|
|
+ )
|
|
|
+ OR to_node_id IN (
|
|
|
+ SELECT id FROM graph_nodes WHERE document_id = ?
|
|
|
+ )
|
|
|
+ """;
|
|
|
+ return jdbcTemplate.update(sql, documentId, documentId);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除图关系失败: {}", e.getMessage());
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除图节点
|
|
|
+ */
|
|
|
+ private int deleteGraphNodesByDocumentId(String documentId) {
|
|
|
+ if (jdbcTemplate == null) {
|
|
|
+ log.warn("JdbcTemplate 未注入,跳过图节点删除");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return jdbcTemplate.update("DELETE FROM graph_nodes WHERE document_id = ?", documentId);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除图节点失败: {}", e.getMessage());
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除解析任务
|
|
|
+ */
|
|
|
+ private int deleteParseTasksByDocumentId(String documentId) {
|
|
|
+ if (jdbcTemplate == null) {
|
|
|
+ log.warn("JdbcTemplate 未注入,跳过解析任务删除");
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return jdbcTemplate.update("DELETE FROM parse_tasks WHERE document_id = ?", documentId);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除解析任务失败: {}", e.getMessage());
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除文本文件
|
|
|
+ */
|
|
|
+ private void deleteTextFile(String documentId) {
|
|
|
+ try {
|
|
|
+ String subDir = documentId.substring(0, 2);
|
|
|
+ Path textFilePath = Path.of(textStoragePath, subDir, documentId + ".txt");
|
|
|
+ if (Files.exists(textFilePath)) {
|
|
|
+ Files.delete(textFilePath);
|
|
|
+ log.debug("删除文本文件: {}", textFilePath);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除文本文件失败: documentId={}, error={}", documentId, e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 删除图片目录
|
|
|
+ */
|
|
|
+ private void deleteImageDirectory(String userId, String documentId) {
|
|
|
+ try {
|
|
|
+ Path imageDir = Path.of(fileStoragePath, userId, documentId);
|
|
|
+ if (Files.exists(imageDir) && Files.isDirectory(imageDir)) {
|
|
|
+ // 递归删除目录及其内容
|
|
|
+ Files.walkFileTree(imageDir, new SimpleFileVisitor<Path>() {
|
|
|
+ @Override
|
|
|
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
|
|
+ Files.delete(file);
|
|
|
+ return FileVisitResult.CONTINUE;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
|
|
|
+ Files.delete(dir);
|
|
|
+ return FileVisitResult.CONTINUE;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ log.debug("删除图片目录: {}", imageDir);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("删除图片目录失败: documentId={}, error={}", documentId, e.getMessage());
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|