word转html

依赖包版本:
compile(“org.apache.poi:poi:3.17”)
compile(“org.apache.poi:poi-ooxml:3.17”)
compile(“org.apache.poi:poi-ooxml-schemas:3.17”)
compile(“org.apache.poi:poi-scratchpad:3.17”)
compile(“org.apache.poi:ooxml-schemas:1.4”)
compile(“fr.opensagres.xdocreport:fr.opensagres.xdocreport.converter.docx.xwpf:2.0.1”)
maven库:
https://mvnrepository.com/artifact/fr.opensagres.xdocreport/fr.opensagres.xdocreport.converter.docx.xwpf
https://mvnrepository.com/artifact/org.apache.poi

import com.google.common.io.Files;
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.UUID;

/**
 * word文档转html文件
 * 兼容.doc、.docx格式转换
 */

public class WordToHtml {
    private final static String SEPARATOR = "/";
    public static String format(InputStream in, String wordFilename, String htmlRootPath) throws Exception {
        if(!htmlRootPath.endsWith(SEPARATOR)){
            htmlRootPath += SEPARATOR;
        }
        String uuid = UUID.randomUUID().toString().trim().replaceAll("-", "");
        final String htmlName = uuid + Constants.SUFFIX_HTML;
        OutputStream htmlOut = null;
        OutputStreamWriter writer = null;
        try{
            File htmlFile = new File(htmlRootPath + htmlName);
            Files.createParentDirs(htmlFile);
            htmlOut = new FileOutputStream(htmlFile);
            writer = new OutputStreamWriter(htmlOut, StandardCharsets.UTF_8);
            if (wordFilename.toLowerCase().endsWith(Constants.SUFFIX_DOCX)) {
                XWPFDocument document = new XWPFDocument(in);
                //保存图片
                XHTMLOptions options = XHTMLOptions.create();
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);
                //图片保存文件夹路径
                options.setImageManager(new ImageManager(new File(htmlRootPath), uuid));
                //解析word文档
                XHTMLConverter instance = (XHTMLConverter) XHTMLConverter.getInstance();
                instance.convert(document, writer, options);

            } else if (wordFilename.toLowerCase().endsWith(Constants.SUFFIX_DOC)) {
                HWPFDocument wordDocument = new HWPFDocument(in);
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
                //设置图片位置,也可在此方法中保存图片
                wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                    public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
                        return uuid + SEPARATOR +suggestedName;
                    }
                });
                // 保存图片
                List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
                if (pics != null) {
                    for (Picture pic : pics) {
                        File imagePath = new File(htmlRootPath + uuid + SEPARATOR + pic.suggestFullFileName());
                        Files.createParentDirs(imagePath);
                        OutputStream imageOut = new FileOutputStream(imagePath);
                        pic.writeImageContent(imageOut);
            if(null != imageOut){
              imageOut.clode();
            }
                    }
                }
                //解析word文档
                wordToHtmlConverter.processDocument(wordDocument);
                Document htmlDocument = wordToHtmlConverter.getDocument();

                Transformer serializer = TransformerFactory.newInstance().newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
                serializer.setOutputProperty(OutputKeys.INDENT, Boolean.toString(true));
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(new DOMSource(htmlDocument), new StreamResult(writer));
            }
            String wrapHtml = wrap(FileUtils.readFileToString(htmlFile, StandardCharsets.UTF_8), Files.getNameWithoutExtension(wordFilename), StandardCharsets.UTF_8);
            FileUtils.write(htmlFile, wrapHtml, StandardCharsets.UTF_8);
        }catch(Exception e){
            throw e;
        }finally {
      if(null != writer){
        writer.clode();
      }
      if(null != htmlOut){
        htmlOut.clode();
      }
        }
        return htmlName;
    }


    private static String wrap(String contentBody, String title, final Charset encoding) {
        StringBuilder wrapHtmlStringBuilder = new StringBuilder("<!DOCTYPE html>\n<html>\n<head>\n\t<title>");
        wrapHtmlStringBuilder.append(title).append("</title>\n\t<meta http-equiv="Content-Type" content="text/html; charset=").append(encoding).append("">\n</head>\n<body>\n\n").append(contentBody)
                .append("</body>\n</html>");
        return wrapHtmlStringBuilder.toString();
    }
 
    interface Constants{
        /**
         * 后缀:docx
         */

        String SUFFIX_DOCX = ".docx";

        /**
         * 后缀:doc
         */

        String SUFFIX_DOC = ".doc";

        /**
         * 后缀:html
         */

        String SUFFIX_HTML = ".html";
    }
}

Jsoup模拟发起restful请求

  • Jsoup版本:compile(‘org.jsoup:jsoup:1.11.2’)
  • httpclient 版本:compile(“org.apache.httpcomponents:httpclient:4.5.1”)
  • httpcore 版本:compile(“org.apache.httpcomponents:httpcore:4.4.3”)
  • 文件上传须添加 httpmime包: compile(“org.apache.httpcomponents:httpmime:4.5.1”)

1、模拟post请求:

/**
 * POST json
 */

public void postJsonTest(String url) {
  try {
    Connection connect = Jsoup.connect(url);//请求链接
    //以requestBody的形式发送请求 非表单形式提交,表单形式须修改"Content-Type"
    VO vo= new VO();
    System.out.println("请求报文:" + JSON.toJSONString(vo));
    Connection requestBody = connect.requestBody(JSON.toJSONString(vo));
    requestBody.ignoreHttpErrors(true);
    requestBody.validateTLSCertificates(false);
    requestBody.method(Connection.Method.POST);
    requestBody.ignoreContentType(true);
    requestBody.postDataCharset("utf-8");
    // Content-Type=application/json
    requestBody.header("Content-Type", "application/json");
    Connection.Response execute = requestBody.execute();
    String body1 = execute.body();
    System.out.println("返回报文:" + body1);
    JSONObject js = JSON.parseObject(body1);
  } catch (Exception ex) {
    ex.printStackTrace();
  }
}

2、模拟文件上传:

@RequestMapping(value = "/fileUpload", method = RequestMethod.POST)
public ResponseEntity fileUpload2(@RequestParam("file") MultipartFile file) {
  logger.info("OprUploadFileRestApiController -> fileUpload");
  String localDir = "${OPR_UPLOAD_DIR}";//文件上传的目录设置
  if (StringUtils.isBlank(localDir)) {
    return new ResponseEntity<>(new ResponseEnvelope<>("请检查文件上传的目录设置"), HttpStatus.OK);
  }
  String fileName = null;
  String filePath = null;
  String newFileName = UUID.randomUUID().toString().trim().replaceAll("-", "");

  // 判断文件是否为空
  if (!file.isEmpty()) {
    try {
      fileName = file.getOriginalFilename();
      filePath = localDir + "/" + newFileName;
      if (fileName.indexOf(".") < 0) {
        return new ResponseEntity<>(new ResponseEnvelope<>("文件类型错误"), HttpStatus.OK);
      }
      // 文件保存路径
      // 转存文件
      logger.info("文件名:{} 文件路径:{} 保存到硬盘",fileName,filePath);
      file.transferTo(new File(filePath));
      return new ResponseEntity<>(new ResponseEnvelope<>(newFileName), HttpStatus.OK);
    } catch (Exception e) {
      return new ResponseEntity<>(new ResponseEnvelope<>("文件保存失败"), HttpStatus.OK);
    }
  } else {
    return new ResponseEntity<>(new ResponseEnvelope<>("上传文件为null"), HttpStatus.OK);
  }
}
/**
 * POST MultipartFile文件上传
 */

public void uploadFileByPost(String url, String fileLocation) throws Exception {
  CloseableHttpClient httpclient = HttpClients.createDefault();
  HttpPost httppost = new HttpPost(url);
  File file = new File(fileLocation);//WebFileUtils.createFileByUrl(url, "jpg");
  System.out.println("executing request " + httppost.getURI());
  //setConnectTimeout:设置连接超时时间,单位毫秒。
  // setConnectionRequestTimeout:设置从connect Manager获取Connection 超时时间,单位毫秒。
  // setSocketTimeout:请求获取数据的超时时间,单位毫秒。 如果访问一个接口,多少时间内无法返回数据,就直接放弃此次调用。
  RequestConfig defaultRequestConfig = RequestConfig.custom()
      .setConnectTimeout(5000)
      .setConnectionRequestTimeout(5000)
      .setSocketTimeout(15000)
      .build();
  httppost.setConfig(defaultRequestConfig);
  Map<String,ContentBody> reqParam = new HashMap<String,ContentBody>();
  reqParam.put("filename", new StringBody("Hydrangeas.jpg", ContentType.MULTIPART_FORM_DATA));
  reqParam.put("file", new FileBody(file));
  MultipartEntityBuilder multipartEntityBuilder = MultipartEntityBuilder.create();
  multipartEntityBuilder.setCharset(Charset.forName("utf-8"));
  multipartEntityBuilder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
  for(Map.Entry<String,ContentBody> param : reqParam.entrySet()){
    multipartEntityBuilder.addPart(param.getKey(), param.getValue());
  }
  HttpEntity reqEntity = multipartEntityBuilder.build();
  httppost.setEntity(reqEntity);
  // 执行post请求.
  CloseableHttpResponse response = httpclient.execute(httppost);
  System.out.println("got response");
  try {
    // 获取响应实体
    HttpEntity entity = response.getEntity();
    //System.out.println("--------------------------------------");
    // 打印响应状态
    //System.out.println(response.getStatusLine());
    if (entity != null) {
      System.out.println(EntityUtils.toString(entity, Charset.forName("UTF-8")));
    }
    //System.out.println("------------------------------------");
  } finally {
    response.close();
  }
}