常见的Word文档格式有doc、docx和wps,doc是比较老的Word文档格式,其内部为二进制文件;docx为比较新的Word文档格式,其扩展名修改为zip解压后可以看到其内部文件组成,关键文件是document.xml;而wps是金山WPS特有的格式。不同类库对Word文档格式的支持有所不同,选择类库时需要根据需求分析,哪种类库可以满足开发需要。

类库 docx doc wps 付费
poi 不支持 支持,HWPFDocument 支持,HWPFDocument 不需要
poi-ooxml 支持,XWPFParagraph 不支持 不支持 不需要
Aspose.Words for Java 支持 支持 不支持 需要,有免费版本
Spire.Doc for Java 支持 支持 支持 需要,有免费版本

一、POI-OOXML操作docx文档

XWPFDocument仅支持处理.docx文档,其功能强大,灵活好用!

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.2</version>
</dependency>
public java.io.File method(String wjmc, FileInputStream fileInputStream) {
    if(StringUtils.endsWith(wjmc, ".docx")) {
        XWPFDocument document = null;
        try (OutputStream fileOutputStream = Files.newOutputStream(Paths.get("tempFile/deal-" + wjmc))) {
            document = new XWPFDocument(fileInputStream);
            // 删除包含"【"的行
            for (int i = document.getParagraphs().size() - 1; i >= 0; i--) {
                XWPFParagraph paragraph = document.getParagraphs().get(i);
                String text = paragraph.getText().replaceAll(" ", StringUtils.EMPTY).replaceAll(" ", StringUtils.EMPTY);
                if (text.contains("【名称】")) {
                    paragraph.setAlignment(ParagraphAlignment.CENTER);
                    while (paragraph.getRuns().size() > 0) {
                        paragraph.removeRun(0);
                    }
                    XWPFRun run = paragraph.createRun();
                    run.setFontSize(22); // 二号字体
                    run.setFontFamily("方正小标宋简体");  // 方正小标宋简体
                    run.setText(text.replaceAll("【名称】", StringUtils.EMPTY));
                    run.addCarriageReturn();
                } else if (text.contains("【")) {
                    document.removeBodyElement(i);
                }
            }
            document.write(fileOutputStream);
        } catch (Exception e) {
            log.error("文件处理发生异常!", e);
        }
        return new java.io.File("tempFile/deal-" + wjmc);
    }
    return null;
}

二、POI操作doc或wps文档

HWPFDocument支持处理.doc.wps文档,由于doc文档版本较老,其仅支持文本、段落、文字处理等基础的功能,无法设置字体样式、文本居中等。

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.2</version>
</dependency>
public java.io.File method(String wjmc, FileInputStream fileInputStream) {
   if (StringUtils.endsWith(wjmc, ".doc") || StringUtils.endsWith(wjmc, ".wps")) {
       HWPFDocument document = null;
       try (OutputStream fileOutputStream = Files.newOutputStream(Paths.get("tempFile/deal-" + wjmc))) {
           document = new HWPFDocument(fileInputStream);
           // 删除包含"【"的行
           for (int i = document.getRange().numParagraphs() - 1; i >= 0; i--) {
               Range range = document.getRange();
               Paragraph paragraph = document.getRange().getParagraph(i);
               String text = paragraph.text().replaceAll(" ", StringUtils.EMPTY).replaceAll(" ", StringUtils.EMPTY);
               if (text.contains("【名称】")) {
                   range.insertBefore(text.replaceAll("【名称】", StringUtils.EMPTY));
               } else if (text.contains("【")) {
                   paragraph.delete();
               }
           }
           document.write(fileOutputStream);
       } catch (Exception e) {
           log.error("文件处理发生异常!", e);
       }
       return new java.io.File("tempFile/deal-" + wjmc);
    }
    return null;
}

三、Aspose处理doc和docx文档

Aspose.Words for Java 官方文档

aspose-words组件仅支持处理.doc.docx文档,不支持处理.wps文档。

<dependency>
    <groupId>com.aspose</groupId>
    <artifactId>aspose-words</artifactId>
    <version>20.8</version>
</dependency>
public java.io.File method(String wjmc, FileInputStream fileInputStream) {
    if (StringUtils.endsWith(wjmc, ".doc") || StringUtils.endsWith(wjmc, ".docx")) {
        try {
            Document document = new Document(fileInputStream);
            ParagraphCollection paragraphs = document.getFirstSection().getBody().getParagraphs();
            for (int i = paragraphs.getCount() - 1; i >= 0; i--) {
                Paragraph paragraph = paragraphs.get(i);
                String text = paragraph.getText().replaceAll(" ", StringUtils.EMPTY).replaceAll(" ", StringUtils.EMPTY);
                if (text.contains("【名称】")) {
                    String title = text.replaceAll("【名称】", StringUtils.EMPTY);
                    Run run = paragraph.getRuns().get(0);
                    run.setText(title);
                    run.getFont().setName("方正小标宋简体");
                    run.getFont().setSize(22);
                } else if (text.contains("【")) {
                    paragraph.remove();
                }
            }
            document.save("tempFile/deal-" + wjmc);
        } catch (Exception e) {
            log.error("文件处理发生异常!", e);
        }
        return new java.io.File("tempFile/deal-" + wjmc);
    }
    return null;
}

四、Spire处理doc、docx和wps文档

Spire.Doc for Java 官方文档

目前几个类库,仅Spire.Doc for Java 可以同时处理.doc.docx.wps三类文档。

<dependency>
    <groupId>e-iceblue</groupId>
    <artifactId>spire.doc.free</artifactId>
    <version>5.2.0</version>
</dependency>
public java.io.File method(String wjmc, FileInputStream fileInputStream) {
    try (OutputStream fileOutputStream = Files.newOutputStream(Paths.get("tempFile/deal-" + wjmc))) {
        Document document = new Document(fileInputStream);
        Section section = document.getSections().get(0);
        ParagraphCollection paragraphs = section.getParagraphs();
        for (int i = 0; i < paragraphs.getCount(); i++) {
            Paragraph paragraph = paragraphs.get(i);
            String text = paragraph.getText().replaceAll(" ", StringUtils.EMPTY).replaceAll(" ", StringUtils.EMPTY);
            if (text.contains("【名称】")) {
                paragraph.setText(text.replaceAll("【名称】", StringUtils.EMPTY));
                for (int j = 0; j < paragraph.getChildObjects().getCount(); j++) {
                    TextRange textRange = (TextRange) paragraph.getChildObjects().get(i);
                    textRange.getCharacterFormat().setFontSize(22);
                    textRange.getCharacterFormat().setFontName("方正小标宋简体");
                }
                ParagraphFormat format = paragraph.getFormat();
                format.setHorizontalAlignment(HorizontalAlignment.Center);
            } else if (text.contains("【")) {
                paragraphs.remove(paragraph);
                //边循环读取段落,边删除段落,删除之后需要将光标回移1
                i--;
            }
        }
        document.saveToStream(fileOutputStream, FileFormat.Auto);
    } catch (Exception e) {
        log.error("文件处理发生异常!", e);
    }
    return new java.io.File("tempFile/deal-" + wjmc);
}