POI实现word文档转html文件

POI word文件转html

package com.feiruo.officeConvert;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStreamWriter;

import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import org.apache.poi.hwpf.usermodel.Picture;

public abstract class OfficeConvert {

        // 图片的存放地址

        private String imgPath = null;

        // 文件存放的地址

        private String parentPath = null;

        // 文件内容

        private String fileContent = null;

        private String encode = "UTF-8";

    /**

     * 将指定的doc文档进行格式转换

     *

     * @param docPath

     *            *.doc文档地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public abstract void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException;

    /**

     * 将文件内容写入到磁盘

     *

     * @param filepath

     *            保存转换文件的地址

     */

    public void writeFile(String filepath) {

        FileOutputStream fos = null;

        BufferedWriter bw = null;

        File f=new File(this.parentPath);

        if(!f.exists()){

            f.mkdirs();

        }

        try {

            File file = new File(filepath);

            fos = new FileOutputStream(file);

            bw = new BufferedWriter(new OutputStreamWriter(fos, encode));

            bw.write(fileContent);

        } catch (FileNotFoundException fnfe) {

            fnfe.printStackTrace();

        } catch (IOException ioe) {

            ioe.printStackTrace();

        } finally {

            try {

                if (bw != null)

                    bw.close();

                if (fos != null)

                    fos.close();

            } catch (IOException ie) {

            }

        }

    }

    public String checkSetPath(String path){

        path=path.trim();

        if(path.lastIndexOf("/")<path.length()-1) path+="/";

        if(path.indexOf("\"")>0)path=path.replaceAll("\"", "");

        if(path.indexOf(">")>0)path=path.replaceAll(">", "&gt;");

        if(path.indexOf("<")>0)path=path.replaceAll("<", "&lt;");

        //TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", "");

        return path;

    }

    public String getEncode() {

        return encode;

    }

    public void setEncode(String encode) {

        this.encode = encode;

    }

    /**

     * 获取图片存放地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getImgPath() {

        return imgPath;

    }

    /**

     * 设置图片的存放地址文件夹路径

     *

     * @param imgPath

     *            设置图片的存放文件夹名称

     */

    public void setImgPath(String imgPath) {

        this.imgPath = checkSetPath(imgPath);

    }

    /**

     * 获取存放文件的目录地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getParentPath() {

        return parentPath;

    }

    /**

     * 设置文件存放的路径

     *

     * @param parentPath

     *            文件地址

     */

    public void setParentPath(String parentPath) {

        this.parentPath = checkSetPath(parentPath);

    }

    /**

     * 获取文件内容

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getFileContent() {

        return fileContent;

    }

    public void setFileContent(String content){

        this.fileContent=content;

    }

}

package com.feiruo.officeConvert;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.w3c.dom.Document;

/**

 * 将*.doc文档转换为*.html文件格式

 *

 * @author Jdk.feiruo.

 * @since JDK 1.7 POI 3.8

 * @version 1.0

 */

public class DocToHtml extends OfficeConvert implements IOfficeConvert {

    private List<Picture> pics = null;

    /**

     * @param parentPath

     *            html文件存放地址

     * @param imageppth

     *            html图片存放地址

     * @param encoding

     *            设置html的编码格式

     */

    public DocToHtml(String parentPath, String imageppth, String encoding) {

        setParentPath(checkSetPath(parentPath));

        setImgPath(checkSetPath(imageppth));

        this.setEncode(encoding);

    }

    public DocToHtml() {

    }

    /**

     * 将*doc文档转为*html文件

     *

     * @param docPath

     *            *doc文档的所在地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException {

        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(

                docPath));

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

                DocumentBuilderFactory.newInstance().newDocumentBuilder()

                        .newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            public String savePicture(byte[] content, PictureType pictureType,

                    String suggestedName, float widthInches, float heightInches) {

                return suggestedName;

            }

        });

        wordToHtmlConverter.processDocument(wordDocument);

        pics = wordDocument.getPicturesTable().getAllPictures();

        Document htmlDocument = wordToHtmlConverter.getDocument();

        ByteArrayOutputStream out = new ByteArrayOutputStream();

        DOMSource domSource = new DOMSource(htmlDocument);

        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();

        Transformer serializer = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode());

        serializer.setOutputProperty(OutputKeys.INDENT, "yes");

        serializer.setOutputProperty(OutputKeys.METHOD, "html");

        serializer.transform(domSource, streamResult);

        out.close();

        String htmlContent = new String(out.toByteArray());

        if(htmlContent.indexOf("<img src=\"") > 0){

            htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath());

        }

        setFileContent(htmlContent);

    }

    @Override

    public void writeWithName(String fileName) {

        // 先保存文档中的图片

        if (pics != null) {

            File imgfile = new File(this.getParentPath() + this.getImgPath());

            // 如果当前文件夹不存在，则创建新文件夹

            if (!imgfile.exists())

                imgfile.mkdirs();

            for (int i = 0; i < pics.size(); i++) {

                Picture pic = (Picture) pics.get(i);

                try {

                    pic.writeImageContent(new FileOutputStream(imgfile + "//"

                            + pic.suggestFullFileName()));

                } catch (IOException e) {

                    e.printStackTrace();

                }

            }

        }

        // 保存html源码文件

        this.writeFile(getParentPath()+fileName+".html");

    }

}

package com.feiruo.Test;

import java.io.FileNotFoundException;

import java.io.IOException;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import com.yinhai.officeConvert.DocToHtml;

public class Test{

    public static void main(String[] args) {

        Test t=new Test();

    }

      public Test(){

          DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8");

          try {

            dth.convert("D://test//test.doc");

        } catch (FileNotFoundException e) {

            e.printStackTrace();

        } catch (IOException e) {

            e.printStackTrace();

        } catch (ParserConfigurationException e) {

            e.printStackTrace();

        } catch (TransformerException e) {

            e.printStackTrace();

        }

          dth.writeWithName("feiruo");

      }

}

package com.feiruo.officeConvert;

public interface IOfficeConvert {

    /**

     * 将文件写入到磁盘

     * @param fileName 要写入文件的名称

     */

    public void writeWithName(String fileName);

}

POI实现word文档转html文件的更多相关文章

POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
POI生成word文档完整案例及讲解
一,网上的API讲解其实POI的生成Word文档的规则就是先把获取到的数据转成xml格式的数据,然后通过xpath解析表单式的应用取值,判断等等,然后在把取到的值放到word文档中,最后在输出来. ...
用java语言通过POI实现word文档的按标题提取
最近有一个项目需要将一个word文档中的数据提取到数据库中.就去网上查了好多资料,最靠谱的就是用poi实现word文档的提取. 喝水不忘挖井人,我查了好多资料就这个最靠谱,我的这篇博客主要是借鉴htt ...
POI 生成 word 文档简单版（包括文字、表格、图片、字体样式设置等）
POI 生成word 文档一般有两种方法: ① word模板生成word 文档 : ② 写代码直接生成 word 文档: 我这里演示的是第二种方法,即写代码生成 word文档,不多说废话,直接 ...
Poi之Word文档结构介绍
1.poi之word文档结构介绍之正文段落一个文档包含多个段落,一个段落包含多个Runs,一个Runs包含多个Run,Run是文档的最小单元获取所有段落:List<XWPFParagraph ...
微信公众号怎么添加附件？比如word文档，pdf文件等
微信公众号怎么添加附件?比如word文档,pdf文件等我们都知道创建一个微信公众号,在公众号中发布一些文章是非常简单的,但公众号添加附件下载的功能却被限制,如今可以使用小程序“微附件”进行在公众 ...
Java POI 解析word文档
实现步骤: 1.poi实现word转html 2.模型化解析html 3.html转Map数组 Map数组(数组的操作处理不做说明) 1.导jar包. 2.代码实现 package com.web.o ...
java word文档转 html文件
一.简介一般word文件后缀有doc.docx两种.docx是office word 2007以及以后版本文档的扩展名:doc是office word 2003文档保存的扩展名.对于这两种格式的wo ...
poi 读取word文档
1.导入jar包官网下载地址: https://www.apache.org/dyn/closer.lua/poi/release/bin/poi-bin-3.17-20170915.zip 最开始 ...

随机推荐

(转)WEB第三方打印控件[ASP.NET常用工具]
本文转载自:http://blog.csdn.net/chz_cslg/article/details/25415347 在B/S模式开发中,打印是个很大的困扰.无论是采用页面直接输出或者引用WORD ...
mysql的主从配置以及主主配置
基础环境系统:linuxmysql版本:5.5主服务器IP:192.168.1.101从服务器IP:192.168.1.102 1.主服务器(master)要打开二进制日志2.从服务器(slave) ...
MySQL运行出错：无法连接驱动、无root访问权限解决办法
按照疯狂java讲义的13.3的程序,发现程序运行出错. 1.点开runConnMySql.cmd运行文件,出现如下结果: 2.用Editplus进行编译运行,如下结果: 报错定位到程序第18行,而第 ...
System.ArgumentOutOfRangeException: 年、月和日参数描述无法表示的 DateTime。
c#日期控件格式设为 yyyy-MM,通过updown 方式调整日期. 当为月度最后一天,且要调整月没有当前月的最后一天时,就会报标题错误. 如:当前为1月31日,要调整为2月时,就会报错.因为2月 ...
用C#将输入的小写字母转化为大写字母
string A = "adsaf"; string B =""; B=A.ToUper();
(C++) 基本面试题（整理）
1.new.delete.malloc.free关系 new/delete是C++的运算符.new 调用构造函数用于动态申请内存,delete调用对象的析构函数,用于释放内存. malloc与free ...
Spark1.6 DataSets简介
Apache Spark提供了强大的API,以便使开发者为使用复杂的分析成为了可能.通过引入SparkSQL,让开发者可以使用这些高级API接口来从事结构化数据的工作(例如数据库表,JSON文件),并 ...
[实变函数]3.2 可测集 (measurable set)
1 $\bbR^n$ 中集合 $E$ 称为可测的 (measurable), 如果 $$\bee\label{3.2:Caratheodory} m^*T=m^*(T\cap E)+m^*(T\cap ...
防篡改php文件校验程序
<?php /** * 校验线上源文件是否和本地的一致 * User: Administrator * Date: 2015/11/26 * Time: 9:30 */ include_once ...
创建MySQL用户赋予某指定库表的权限 flush privileges才能生效！！！！;@'localhost'授权本地，@'%'授权远程
update ERROR 1364 (HY000): Field 'ssl_cipher' doesn't have a default value 建议使用GRANT语句进行授权,语句如下: gra ...

POI实现word文档转html文件

POI实现word文档转html文件的更多相关文章

随机推荐

热门专题