Selaa lähdekoodia

feat(parse-service): 实现文件上传模块

核心功能:
- 文件上传接口 (支持PDF/Word/Excel/图片)
- 文件类型检测 (扩展名+MIME+文件头三重检测)
- 文件存储管理 (按用户和文档ID组织)
- 文件验证 (类型、大小、扩展名)

新增文件:
- FileUploadController: 文件上传控制器
- FileUploadService: 文件上传服务
- FileTypeDetector: 文件类型检测工具
- FileType: 文件类型枚举
- ParseStatus: 解析状态枚举
- FileUploadResponse: 上传响应DTO
- FileStorageProperties: 文件存储配置

技术特点:
- 支持最大500MB文件上传
- 三重文件类型检测机制
- 自动创建存储目录
- 完整的异常处理
- Swagger API文档
何文松 1 kuukausi sitten
vanhempi
commit
51daf07674

+ 42 - 0
backend/parse-service/src/main/java/com/lingyue/parse/config/FileStorageProperties.java

@@ -0,0 +1,42 @@
+package com.lingyue.parse.config;
+
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.stereotype.Component;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * 文件存储配置属性
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Data
+@Component
+@ConfigurationProperties(prefix = "file.storage")
+public class FileStorageProperties {
+    
+    /**
+     * 文件存储基础路径
+     */
+    private String basePath = "/data/lingyue/files";
+    
+    /**
+     * 文本文件存储路径
+     */
+    private String textPath = "/data/lingyue/texts";
+    
+    /**
+     * 允许的文件扩展名
+     */
+    private String allowedExtensions = "pdf,doc,docx,xls,xlsx,jpg,jpeg,png,gif";
+    
+    /**
+     * 获取允许的扩展名列表
+     */
+    public List<String> getAllowedExtensionList() {
+        return Arrays.asList(allowedExtensions.split(","));
+    }
+}

+ 67 - 0
backend/parse-service/src/main/java/com/lingyue/parse/controller/FileUploadController.java

@@ -0,0 +1,67 @@
+package com.lingyue.parse.controller;
+
+import com.lingyue.common.domain.AjaxResult;
+import com.lingyue.parse.dto.FileUploadResponse;
+import com.lingyue.parse.service.FileUploadService;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.multipart.MultipartFile;
+
+/**
+ * 文件上传控制器
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Slf4j
+@RestController
+@RequestMapping("/api/v1/parse")
+@RequiredArgsConstructor
+@Tag(name = "文件上传", description = "文件上传相关接口")
+public class FileUploadController {
+    
+    private final FileUploadService fileUploadService;
+    
+    @PostMapping("/upload")
+    @Operation(summary = "上传文件", description = "上传PDF、Word、Excel或图片文件")
+    public AjaxResult<FileUploadResponse> uploadFile(
+            @Parameter(description = "上传的文件", required = true)
+            @RequestParam("file") MultipartFile file,
+            
+            @Parameter(description = "用户ID", required = true)
+            @RequestParam("userId") String userId) {
+        
+        log.info("接收文件上传请求, 文件名: {}, 大小: {} bytes, 用户ID: {}", 
+                file.getOriginalFilename(), file.getSize(), userId);
+        
+        FileUploadResponse response = fileUploadService.uploadFile(file, userId);
+        
+        log.info("文件上传成功, 文档ID: {}", response.getDocumentId());
+        
+        return AjaxResult.success(response);
+    }
+    
+    @GetMapping("/file/exists")
+    @Operation(summary = "检查文件是否存在")
+    public AjaxResult<Boolean> checkFileExists(
+            @Parameter(description = "文件路径", required = true)
+            @RequestParam("filePath") String filePath) {
+        
+        boolean exists = fileUploadService.fileExists(filePath);
+        return AjaxResult.success(exists);
+    }
+    
+    @DeleteMapping("/file")
+    @Operation(summary = "删除文件")
+    public AjaxResult<Void> deleteFile(
+            @Parameter(description = "文件路径", required = true)
+            @RequestParam("filePath") String filePath) {
+        
+        fileUploadService.deleteFile(filePath);
+        return AjaxResult.success();
+    }
+}

+ 42 - 0
backend/parse-service/src/main/java/com/lingyue/parse/dto/FileUploadResponse.java

@@ -0,0 +1,42 @@
+package com.lingyue.parse.dto;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * 文件上传响应DTO
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@Schema(description = "文件上传响应")
+public class FileUploadResponse {
+    
+    @Schema(description = "文档ID")
+    private String documentId;
+    
+    @Schema(description = "文件名")
+    private String fileName;
+    
+    @Schema(description = "文件类型")
+    private String fileType;
+    
+    @Schema(description = "文件大小(字节)")
+    private Long fileSize;
+    
+    @Schema(description = "文件路径")
+    private String filePath;
+    
+    @Schema(description = "解析状态")
+    private String parseStatus;
+    
+    @Schema(description = "上传时间")
+    private String uploadTime;
+}

+ 87 - 0
backend/parse-service/src/main/java/com/lingyue/parse/enums/FileType.java

@@ -0,0 +1,87 @@
+package com.lingyue.parse.enums;
+
+import lombok.Getter;
+
+/**
+ * 文件类型枚举
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Getter
+public enum FileType {
+    
+    PDF("application/pdf", "pdf", "PDF文档"),
+    WORD("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", "Word文档"),
+    WORD_OLD("application/msword", "doc", "Word文档(旧版)"),
+    EXCEL("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", "Excel表格"),
+    EXCEL_OLD("application/vnd.ms-excel", "xls", "Excel表格(旧版)"),
+    IMAGE_JPG("image/jpeg", "jpg", "JPEG图片"),
+    IMAGE_PNG("image/png", "png", "PNG图片"),
+    IMAGE_GIF("image/gif", "gif", "GIF图片"),
+    UNKNOWN("application/octet-stream", "", "未知类型");
+    
+    private final String mimeType;
+    private final String extension;
+    private final String description;
+    
+    FileType(String mimeType, String extension, String description) {
+        this.mimeType = mimeType;
+        this.extension = extension;
+        this.description = description;
+    }
+    
+    /**
+     * 根据文件扩展名获取文件类型
+     */
+    public static FileType fromExtension(String extension) {
+        if (extension == null || extension.isEmpty()) {
+            return UNKNOWN;
+        }
+        
+        String ext = extension.toLowerCase().replace(".", "");
+        for (FileType type : values()) {
+            if (type.extension.equals(ext)) {
+                return type;
+            }
+        }
+        return UNKNOWN;
+    }
+    
+    /**
+     * 根据MIME类型获取文件类型
+     */
+    public static FileType fromMimeType(String mimeType) {
+        if (mimeType == null || mimeType.isEmpty()) {
+            return UNKNOWN;
+        }
+        
+        for (FileType type : values()) {
+            if (type.mimeType.equals(mimeType)) {
+                return type;
+            }
+        }
+        return UNKNOWN;
+    }
+    
+    /**
+     * 判断是否为图片类型
+     */
+    public boolean isImage() {
+        return this == IMAGE_JPG || this == IMAGE_PNG || this == IMAGE_GIF;
+    }
+    
+    /**
+     * 判断是否为文档类型
+     */
+    public boolean isDocument() {
+        return this == PDF || this == WORD || this == WORD_OLD;
+    }
+    
+    /**
+     * 判断是否为表格类型
+     */
+    public boolean isSpreadsheet() {
+        return this == EXCEL || this == EXCEL_OLD;
+    }
+}

+ 36 - 0
backend/parse-service/src/main/java/com/lingyue/parse/enums/ParseStatus.java

@@ -0,0 +1,36 @@
+package com.lingyue.parse.enums;
+
+import lombok.Getter;
+
+/**
+ * 解析状态枚举
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Getter
+public enum ParseStatus {
+    
+    PENDING("pending", "等待解析"),
+    PARSING("parsing", "解析中"),
+    SUCCESS("success", "解析成功"),
+    FAILED("failed", "解析失败"),
+    CANCELLED("cancelled", "已取消");
+    
+    private final String code;
+    private final String description;
+    
+    ParseStatus(String code, String description) {
+        this.code = code;
+        this.description = description;
+    }
+    
+    public static ParseStatus fromCode(String code) {
+        for (ParseStatus status : values()) {
+            if (status.code.equals(code)) {
+                return status;
+            }
+        }
+        return PENDING;
+    }
+}

+ 168 - 0
backend/parse-service/src/main/java/com/lingyue/parse/service/FileUploadService.java

@@ -0,0 +1,168 @@
+package com.lingyue.parse.service;
+
+import com.lingyue.common.exception.ServiceException;
+import com.lingyue.parse.config.FileStorageProperties;
+import com.lingyue.parse.dto.FileUploadResponse;
+import com.lingyue.parse.enums.FileType;
+import com.lingyue.parse.enums.ParseStatus;
+import com.lingyue.parse.util.FileTypeDetector;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.UUID;
+
+/**
+ * 文件上传服务
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class FileUploadService {
+    
+    private final FileStorageProperties fileStorageProperties;
+    
+    /**
+     * 上传文件
+     * 
+     * @param file 上传的文件
+     * @param userId 用户ID
+     * @return 上传响应
+     */
+    public FileUploadResponse uploadFile(MultipartFile file, String userId) {
+        // 1. 验证文件
+        validateFile(file);
+        
+        // 2. 检测文件类型
+        FileType fileType = FileTypeDetector.detectFileType(file);
+        log.info("检测到文件类型: {}", fileType);
+        
+        // 3. 生成文档ID
+        String documentId = UUID.randomUUID().toString();
+        
+        // 4. 构建存储路径
+        String filePath = buildFilePath(userId, documentId, file.getOriginalFilename());
+        
+        // 5. 保存文件
+        try {
+            saveFile(file, filePath);
+            log.info("文件保存成功: {}", filePath);
+        } catch (IOException e) {
+            log.error("文件保存失败", e);
+            throw new ServiceException("文件保存失败: " + e.getMessage());
+        }
+        
+        // 6. 构建响应
+        return FileUploadResponse.builder()
+                .documentId(documentId)
+                .fileName(file.getOriginalFilename())
+                .fileType(fileType.name())
+                .fileSize(file.getSize())
+                .filePath(filePath)
+                .parseStatus(ParseStatus.PENDING.getCode())
+                .uploadTime(LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME))
+                .build();
+    }
+    
+    /**
+     * 验证文件
+     */
+    private void validateFile(MultipartFile file) {
+        if (file == null || file.isEmpty()) {
+            throw new ServiceException("文件不能为空");
+        }
+        
+        String originalFilename = file.getOriginalFilename();
+        if (originalFilename == null || originalFilename.isEmpty()) {
+            throw new ServiceException("文件名不能为空");
+        }
+        
+        // 验证文件扩展名
+        if (!FileTypeDetector.isAllowedExtension(originalFilename, 
+                fileStorageProperties.getAllowedExtensions())) {
+            throw new ServiceException("不支持的文件类型,允许的类型: " + 
+                    fileStorageProperties.getAllowedExtensions());
+        }
+        
+        // 验证文件大小(500MB)
+        long maxSize = 500 * 1024 * 1024L;
+        if (file.getSize() > maxSize) {
+            throw new ServiceException("文件大小超过限制(最大500MB)");
+        }
+    }
+    
+    /**
+     * 构建文件存储路径
+     * 
+     * @param userId 用户ID
+     * @param documentId 文档ID
+     * @param originalFilename 原始文件名
+     * @return 文件路径
+     */
+    private String buildFilePath(String userId, String documentId, String originalFilename) {
+        // 获取文件扩展名
+        String extension = "";
+        if (originalFilename.contains(".")) {
+            extension = originalFilename.substring(originalFilename.lastIndexOf("."));
+        }
+        
+        // 构建路径: /data/lingyue/files/{userId}/{documentId}/original{extension}
+        return Paths.get(
+                fileStorageProperties.getBasePath(),
+                userId,
+                documentId,
+                "original" + extension
+        ).toString();
+    }
+    
+    /**
+     * 保存文件到磁盘
+     */
+    private void saveFile(MultipartFile file, String filePath) throws IOException {
+        Path path = Paths.get(filePath);
+        
+        // 创建目录
+        Files.createDirectories(path.getParent());
+        
+        // 保存文件
+        file.transferTo(path.toFile());
+    }
+    
+    /**
+     * 删除文件
+     * 
+     * @param filePath 文件路径
+     */
+    public void deleteFile(String filePath) {
+        try {
+            File file = new File(filePath);
+            if (file.exists()) {
+                if (file.delete()) {
+                    log.info("文件删除成功: {}", filePath);
+                } else {
+                    log.warn("文件删除失败: {}", filePath);
+                }
+            }
+        } catch (Exception e) {
+            log.error("删除文件异常", e);
+        }
+    }
+    
+    /**
+     * 检查文件是否存在
+     */
+    public boolean fileExists(String filePath) {
+        return new File(filePath).exists();
+    }
+}

+ 127 - 0
backend/parse-service/src/main/java/com/lingyue/parse/util/FileTypeDetector.java

@@ -0,0 +1,127 @@
+package com.lingyue.parse.util;
+
+import com.lingyue.parse.enums.FileType;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * 文件类型检测工具类
+ * 
+ * @author lingyue
+ * @since 2026-01-14
+ */
+@Slf4j
+public class FileTypeDetector {
+    
+    /**
+     * 检测文件类型
+     * 
+     * @param file 上传的文件
+     * @return 文件类型
+     */
+    public static FileType detectFileType(MultipartFile file) {
+        // 1. 先通过扩展名判断
+        String originalFilename = file.getOriginalFilename();
+        if (originalFilename != null && originalFilename.contains(".")) {
+            String extension = originalFilename.substring(originalFilename.lastIndexOf(".") + 1);
+            FileType typeByExt = FileType.fromExtension(extension);
+            if (typeByExt != FileType.UNKNOWN) {
+                log.debug("通过扩展名检测到文件类型: {}", typeByExt);
+                return typeByExt;
+            }
+        }
+        
+        // 2. 通过MIME类型判断
+        String contentType = file.getContentType();
+        if (contentType != null) {
+            FileType typeByMime = FileType.fromMimeType(contentType);
+            if (typeByMime != FileType.UNKNOWN) {
+                log.debug("通过MIME类型检测到文件类型: {}", typeByMime);
+                return typeByMime;
+            }
+        }
+        
+        // 3. 通过文件头(Magic Number)判断
+        try {
+            FileType typeByMagic = detectByMagicNumber(file.getInputStream());
+            if (typeByMagic != FileType.UNKNOWN) {
+                log.debug("通过文件头检测到文件类型: {}", typeByMagic);
+                return typeByMagic;
+            }
+        } catch (IOException e) {
+            log.error("读取文件头失败", e);
+        }
+        
+        log.warn("无法检测文件类型,返回UNKNOWN");
+        return FileType.UNKNOWN;
+    }
+    
+    /**
+     * 通过文件头(Magic Number)检测文件类型
+     */
+    private static FileType detectByMagicNumber(InputStream inputStream) throws IOException {
+        byte[] header = new byte[8];
+        int bytesRead = inputStream.read(header);
+        
+        if (bytesRead < 4) {
+            return FileType.UNKNOWN;
+        }
+        
+        // PDF: %PDF (25 50 44 46)
+        if (header[0] == 0x25 && header[1] == 0x50 && header[2] == 0x44 && header[3] == 0x46) {
+            return FileType.PDF;
+        }
+        
+        // PNG: 89 50 4E 47
+        if (header[0] == (byte) 0x89 && header[1] == 0x50 && header[2] == 0x4E && header[3] == 0x47) {
+            return FileType.IMAGE_PNG;
+        }
+        
+        // JPEG: FF D8 FF
+        if (header[0] == (byte) 0xFF && header[1] == (byte) 0xD8 && header[2] == (byte) 0xFF) {
+            return FileType.IMAGE_JPG;
+        }
+        
+        // GIF: 47 49 46 38
+        if (header[0] == 0x47 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x38) {
+            return FileType.IMAGE_GIF;
+        }
+        
+        // ZIP-based formats (DOCX, XLSX): 50 4B 03 04
+        if (header[0] == 0x50 && header[1] == 0x4B && header[2] == 0x03 && header[3] == 0x04) {
+            // 需要进一步判断是DOCX还是XLSX,这里暂时返回WORD
+            return FileType.WORD;
+        }
+        
+        // DOC: D0 CF 11 E0
+        if (header[0] == (byte) 0xD0 && header[1] == (byte) 0xCF && 
+            header[2] == 0x11 && header[3] == (byte) 0xE0) {
+            return FileType.WORD_OLD;
+        }
+        
+        return FileType.UNKNOWN;
+    }
+    
+    /**
+     * 验证文件扩展名是否允许
+     */
+    public static boolean isAllowedExtension(String filename, String allowedExtensions) {
+        if (filename == null || !filename.contains(".")) {
+            return false;
+        }
+        
+        String extension = filename.substring(filename.lastIndexOf(".") + 1).toLowerCase();
+        String[] allowed = allowedExtensions.split(",");
+        
+        for (String ext : allowed) {
+            if (ext.trim().equalsIgnoreCase(extension)) {
+                return true;
+            }
+        }
+        
+        return false;
+    }
+}

+ 21 - 0
backend/parse-service/src/main/resources/application.yml

@@ -5,6 +5,14 @@ spring:
   application:
     name: parse-service
   
+  # 文件上传配置
+  servlet:
+    multipart:
+      enabled: true
+      max-file-size: 500MB
+      max-request-size: 500MB
+      file-size-threshold: 2KB
+  
   # 数据库配置(Druid)
   datasource:
     type: com.alibaba.druid.pool.DruidDataSource
@@ -53,6 +61,19 @@ spring:
         namespace: public
         group: DEFAULT_GROUP
 
+# 文件存储配置
+file:
+  storage:
+    base-path: ${FILE_STORAGE_BASE_PATH:/data/lingyue/files}
+    text-path: ${FILE_STORAGE_TEXT_PATH:/data/lingyue/texts}
+    allowed-extensions: pdf,doc,docx,xls,xlsx,jpg,jpeg,png,gif
+
+# AI服务配置
+ai:
+  service:
+    url: ${AI_SERVICE_URL:http://localhost:8007}
+    timeout: 300000
+
 # PaddleOCR配置
 paddleocr:
   server-url: ${PADDLEOCR_SERVER_URL:http://localhost:8866}