|
|
@@ -0,0 +1,388 @@
|
|
|
+package com.lingyue.project.attachment.service;
|
|
|
+
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.poi.xwpf.usermodel.*;
|
|
|
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+
|
|
|
+import java.io.InputStream;
|
|
|
+import java.math.BigInteger;
|
|
|
+import java.util.Base64;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+/**
|
|
|
+ * DOCX 解析服务:将 DOCX 文件转为 HTML,保留格式、表格、图片
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Service
|
|
|
+public class DocxParseService {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 解析 DOCX 文件为 HTML 字符串
|
|
|
+ * @param inputStream DOCX 文件输入流
|
|
|
+ * @return 完整的 HTML 字符串
|
|
|
+ */
|
|
|
+ public String parseToHtml(InputStream inputStream) throws Exception {
|
|
|
+ try (XWPFDocument document = new XWPFDocument(inputStream)) {
|
|
|
+ StringBuilder html = new StringBuilder();
|
|
|
+ html.append("<!DOCTYPE html><html><head><meta charset=\"UTF-8\">");
|
|
|
+ html.append("<style>");
|
|
|
+ html.append(getDefaultStyles());
|
|
|
+ html.append("</style>");
|
|
|
+ html.append("</head><body>");
|
|
|
+
|
|
|
+ for (IBodyElement element : document.getBodyElements()) {
|
|
|
+ if (element instanceof XWPFParagraph) {
|
|
|
+ html.append(convertParagraph((XWPFParagraph) element, document));
|
|
|
+ } else if (element instanceof XWPFTable) {
|
|
|
+ html.append(convertTable((XWPFTable) element, document));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ html.append("</body></html>");
|
|
|
+ return html.toString();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 解析 DOCX 文件为纯 HTML body 内容(不含 html/head 标签,适合前端内嵌展示)
|
|
|
+ */
|
|
|
+ public String parseToHtmlBody(InputStream inputStream) throws Exception {
|
|
|
+ try (XWPFDocument document = new XWPFDocument(inputStream)) {
|
|
|
+ StringBuilder html = new StringBuilder();
|
|
|
+
|
|
|
+ for (IBodyElement element : document.getBodyElements()) {
|
|
|
+ if (element instanceof XWPFParagraph) {
|
|
|
+ html.append(convertParagraph((XWPFParagraph) element, document));
|
|
|
+ } else if (element instanceof XWPFTable) {
|
|
|
+ html.append(convertTable((XWPFTable) element, document));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return html.toString();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private String convertParagraph(XWPFParagraph paragraph, XWPFDocument document) {
|
|
|
+ if (paragraph.getRuns().isEmpty()) {
|
|
|
+ // 空段落
|
|
|
+ return "<p> </p>\n";
|
|
|
+ }
|
|
|
+
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ String tag = "p";
|
|
|
+ String styleAttr = "";
|
|
|
+
|
|
|
+ // 检查标题级别
|
|
|
+ String styleName = paragraph.getStyle();
|
|
|
+ if (styleName != null) {
|
|
|
+ String lower = styleName.toLowerCase();
|
|
|
+ if (lower.startsWith("heading") || lower.startsWith("标题")) {
|
|
|
+ // 尝试从样式名提取级别
|
|
|
+ String num = lower.replaceAll("[^0-9]", "");
|
|
|
+ if (!num.isEmpty()) {
|
|
|
+ int level = Integer.parseInt(num);
|
|
|
+ if (level >= 1 && level <= 6) {
|
|
|
+ tag = "h" + level;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 对齐方式
|
|
|
+ ParagraphAlignment alignment = paragraph.getAlignment();
|
|
|
+ if (alignment != null) {
|
|
|
+ switch (alignment) {
|
|
|
+ case CENTER:
|
|
|
+ styleAttr = " style=\"text-align:center\"";
|
|
|
+ break;
|
|
|
+ case RIGHT:
|
|
|
+ styleAttr = " style=\"text-align:right\"";
|
|
|
+ break;
|
|
|
+ case BOTH:
|
|
|
+ case DISTRIBUTE:
|
|
|
+ styleAttr = " style=\"text-align:justify\"";
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("<").append(tag).append(styleAttr).append(">");
|
|
|
+
|
|
|
+ for (XWPFRun run : paragraph.getRuns()) {
|
|
|
+ sb.append(convertRun(run, document));
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("</").append(tag).append(">\n");
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private String convertRun(XWPFRun run, XWPFDocument document) {
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+
|
|
|
+ // 处理内嵌图片
|
|
|
+ List<XWPFPicture> pictures = run.getEmbeddedPictures();
|
|
|
+ for (XWPFPicture picture : pictures) {
|
|
|
+ sb.append(convertPicture(picture, document));
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理文本
|
|
|
+ String text = run.getText(0);
|
|
|
+ if (text != null && !text.isEmpty()) {
|
|
|
+ text = escapeHtml(text);
|
|
|
+
|
|
|
+ boolean bold = run.isBold();
|
|
|
+ boolean italic = run.isItalic();
|
|
|
+ boolean underline = run.getUnderline() != UnderlinePatterns.NONE;
|
|
|
+ boolean strike = run.isStrikeThrough();
|
|
|
+
|
|
|
+ // 构建内联样式
|
|
|
+ StringBuilder style = new StringBuilder();
|
|
|
+ String color = run.getColor();
|
|
|
+ if (color != null && !color.isEmpty() && !"000000".equals(color)) {
|
|
|
+ style.append("color:#").append(color).append(";");
|
|
|
+ }
|
|
|
+ int fontSize = run.getFontSize();
|
|
|
+ if (fontSize > 0) {
|
|
|
+ style.append("font-size:").append(fontSize).append("pt;");
|
|
|
+ }
|
|
|
+ String fontFamily = run.getFontFamily();
|
|
|
+ if (fontFamily != null && !fontFamily.isEmpty()) {
|
|
|
+ style.append("font-family:'").append(fontFamily).append("';");
|
|
|
+ }
|
|
|
+
|
|
|
+ if (bold) sb.append("<strong>");
|
|
|
+ if (italic) sb.append("<em>");
|
|
|
+ if (underline) sb.append("<u>");
|
|
|
+ if (strike) sb.append("<s>");
|
|
|
+
|
|
|
+ if (style.length() > 0) {
|
|
|
+ sb.append("<span style=\"").append(style).append("\">");
|
|
|
+ sb.append(text);
|
|
|
+ sb.append("</span>");
|
|
|
+ } else {
|
|
|
+ sb.append(text);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (strike) sb.append("</s>");
|
|
|
+ if (underline) sb.append("</u>");
|
|
|
+ if (italic) sb.append("</em>");
|
|
|
+ if (bold) sb.append("</strong>");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理换行
|
|
|
+ if (run.getCTR() != null) {
|
|
|
+ int brCount = run.getCTR().getBrList().size();
|
|
|
+ for (int i = 0; i < brCount; i++) {
|
|
|
+ sb.append("<br/>");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理制表符
|
|
|
+ int tabCount = run.getCTR().getTabList().size();
|
|
|
+ for (int i = 0; i < tabCount; i++) {
|
|
|
+ sb.append(" ");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private String convertPicture(XWPFPicture picture, XWPFDocument document) {
|
|
|
+ try {
|
|
|
+ XWPFPictureData pictureData = picture.getPictureData();
|
|
|
+ if (pictureData == null) return "";
|
|
|
+
|
|
|
+ byte[] data = pictureData.getData();
|
|
|
+ String mimeType = pictureData.getPackagePart().getContentType();
|
|
|
+ String base64 = Base64.getEncoder().encodeToString(data);
|
|
|
+
|
|
|
+ // 获取图片尺寸
|
|
|
+ String widthStyle = "";
|
|
|
+ try {
|
|
|
+ var ctPic = picture.getCTPicture();
|
|
|
+ if (ctPic != null && ctPic.getSpPr() != null
|
|
|
+ && ctPic.getSpPr().getXfrm() != null
|
|
|
+ && ctPic.getSpPr().getXfrm().getExt() != null) {
|
|
|
+ long cx = ctPic.getSpPr().getXfrm().getExt().getCx();
|
|
|
+ // EMU to pixels: 1 px = 9525 EMU
|
|
|
+ int widthPx = (int) (cx / 9525);
|
|
|
+ if (widthPx > 0 && widthPx < 2000) {
|
|
|
+ widthStyle = " style=\"max-width:" + widthPx + "px;height:auto\"";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 忽略尺寸获取失败
|
|
|
+ }
|
|
|
+
|
|
|
+ return "<img src=\"data:" + mimeType + ";base64," + base64 + "\""
|
|
|
+ + widthStyle + " alt=\"image\"/>";
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("转换图片失败", e);
|
|
|
+ return "<span>[图片加载失败]</span>";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private String convertTable(XWPFTable table, XWPFDocument document) {
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ sb.append("<table border=\"1\" cellpadding=\"6\" cellspacing=\"0\" style=\"border-collapse:collapse;width:100%\">\n");
|
|
|
+
|
|
|
+ List<XWPFTableRow> rows = table.getRows();
|
|
|
+ for (int rowIdx = 0; rowIdx < rows.size(); rowIdx++) {
|
|
|
+ XWPFTableRow row = rows.get(rowIdx);
|
|
|
+ sb.append("<tr>");
|
|
|
+
|
|
|
+ for (XWPFTableCell cell : row.getTableCells()) {
|
|
|
+ // 第一行默认作为表头
|
|
|
+ String cellTag = (rowIdx == 0) ? "th" : "td";
|
|
|
+
|
|
|
+ // 单元格样式
|
|
|
+ StringBuilder cellStyle = new StringBuilder();
|
|
|
+
|
|
|
+ // 垂直对齐
|
|
|
+ XWPFTableCell.XWPFVertAlign vAlign = cell.getVerticalAlignment();
|
|
|
+ if (vAlign != null) {
|
|
|
+ switch (vAlign) {
|
|
|
+ case CENTER:
|
|
|
+ cellStyle.append("vertical-align:middle;");
|
|
|
+ break;
|
|
|
+ case BOTTOM:
|
|
|
+ cellStyle.append("vertical-align:bottom;");
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 单元格宽度
|
|
|
+ try {
|
|
|
+ CTTcPr tcPr = cell.getCTTc().getTcPr();
|
|
|
+ if (tcPr != null && tcPr.getTcW() != null) {
|
|
|
+ Object wObj = tcPr.getTcW().getW();
|
|
|
+ if (wObj != null) {
|
|
|
+ BigInteger w = (wObj instanceof BigInteger) ? (BigInteger) wObj : BigInteger.valueOf(Long.parseLong(wObj.toString()));
|
|
|
+ // DXA to px: 1px ≈ 15 DXA
|
|
|
+ int widthPx = w.intValue() / 15;
|
|
|
+ if (widthPx > 0) {
|
|
|
+ cellStyle.append("width:").append(widthPx).append("px;");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 忽略
|
|
|
+ }
|
|
|
+
|
|
|
+ // 背景色
|
|
|
+ try {
|
|
|
+ String bgColor = cell.getColor();
|
|
|
+ if (bgColor != null && !bgColor.isEmpty()) {
|
|
|
+ cellStyle.append("background-color:#").append(bgColor).append(";");
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 忽略
|
|
|
+ }
|
|
|
+
|
|
|
+ // 合并列 (colspan)
|
|
|
+ String colspan = "";
|
|
|
+ try {
|
|
|
+ CTTcPr tcPr = cell.getCTTc().getTcPr();
|
|
|
+ if (tcPr != null && tcPr.getGridSpan() != null) {
|
|
|
+ BigInteger span = tcPr.getGridSpan().getVal();
|
|
|
+ if (span != null && span.intValue() > 1) {
|
|
|
+ colspan = " colspan=\"" + span.intValue() + "\"";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 忽略
|
|
|
+ }
|
|
|
+
|
|
|
+ // 合并行 (rowspan) - 需要检查 vMerge
|
|
|
+ String rowspan = "";
|
|
|
+ try {
|
|
|
+ CTTcPr tcPr = cell.getCTTc().getTcPr();
|
|
|
+ if (tcPr != null && tcPr.getVMerge() != null) {
|
|
|
+ CTVMerge vMerge = tcPr.getVMerge();
|
|
|
+ if (vMerge.getVal() != null && vMerge.getVal() == STMerge.RESTART) {
|
|
|
+ // 计算 rowspan
|
|
|
+ int span = 1;
|
|
|
+ int cellIdx = row.getTableCells().indexOf(cell);
|
|
|
+ for (int r = rowIdx + 1; r < rows.size(); r++) {
|
|
|
+ XWPFTableRow nextRow = rows.get(r);
|
|
|
+ if (cellIdx < nextRow.getTableCells().size()) {
|
|
|
+ XWPFTableCell nextCell = nextRow.getTableCells().get(cellIdx);
|
|
|
+ CTTcPr nextTcPr = nextCell.getCTTc().getTcPr();
|
|
|
+ if (nextTcPr != null && nextTcPr.getVMerge() != null
|
|
|
+ && nextTcPr.getVMerge().getVal() == null) {
|
|
|
+ span++;
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (span > 1) {
|
|
|
+ rowspan = " rowspan=\"" + span + "\"";
|
|
|
+ }
|
|
|
+ } else if (vMerge.getVal() == null) {
|
|
|
+ // 被合并的单元格,跳过
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ // 忽略
|
|
|
+ }
|
|
|
+
|
|
|
+ String styleStr = cellStyle.length() > 0
|
|
|
+ ? " style=\"" + cellStyle + "\""
|
|
|
+ : "";
|
|
|
+
|
|
|
+ sb.append("<").append(cellTag).append(styleStr).append(colspan).append(rowspan).append(">");
|
|
|
+
|
|
|
+ // 单元格内容(可能有多个段落)
|
|
|
+ List<XWPFParagraph> paragraphs = cell.getParagraphs();
|
|
|
+ if (paragraphs.size() == 1) {
|
|
|
+ // 单段落直接输出内联内容,避免多余的 <p> 标签
|
|
|
+ XWPFParagraph p = paragraphs.get(0);
|
|
|
+ for (XWPFRun run : p.getRuns()) {
|
|
|
+ sb.append(convertRun(run, document));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ for (XWPFParagraph p : paragraphs) {
|
|
|
+ sb.append(convertParagraph(p, document));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("</").append(cellTag).append(">");
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("</tr>\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ sb.append("</table>\n");
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private String escapeHtml(String text) {
|
|
|
+ if (text == null) return "";
|
|
|
+ return text
|
|
|
+ .replace("&", "&")
|
|
|
+ .replace("<", "<")
|
|
|
+ .replace(">", ">")
|
|
|
+ .replace("\"", """)
|
|
|
+ .replace("'", "'");
|
|
|
+ }
|
|
|
+
|
|
|
+ private String getDefaultStyles() {
|
|
|
+ return "body { font-family: 'SimSun', 'Microsoft YaHei', serif; font-size: 12pt; line-height: 1.6; color: #333; }"
|
|
|
+ + "table { border-collapse: collapse; width: 100%; margin: 10px 0; }"
|
|
|
+ + "th, td { border: 1px solid #999; padding: 6px 10px; text-align: left; }"
|
|
|
+ + "th { background-color: #f0f0f0; font-weight: bold; }"
|
|
|
+ + "img { max-width: 100%; height: auto; margin: 8px 0; }"
|
|
|
+ + "h1 { font-size: 22pt; }"
|
|
|
+ + "h2 { font-size: 16pt; }"
|
|
|
+ + "h3 { font-size: 14pt; }"
|
|
|
+ + "h4 { font-size: 12pt; }"
|
|
|
+ + "p { margin: 4px 0; }";
|
|
|
+ }
|
|
|
+}
|