POI实现word文档转html文件

POI word文件转html

package com.feiruo.officeConvert;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStreamWriter;

import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import org.apache.poi.hwpf.usermodel.Picture;

public abstract class OfficeConvert {

        // 图片的存放地址

        private String imgPath = null;

        // 文件存放的地址

        private String parentPath = null;

        // 文件内容

        private String fileContent = null;

        private String encode = "UTF-8";

    /**

     * 将指定的doc文档进行格式转换

     *

     * @param docPath

     *            *.doc文档地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public abstract void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException;

    /**

     * 将文件内容写入到磁盘

     *

     * @param filepath

     *            保存转换文件的地址

     */

    public void writeFile(String filepath) {

        FileOutputStream fos = null;

        BufferedWriter bw = null;

        File f=new File(this.parentPath);

        if(!f.exists()){

            f.mkdirs();

        }

        try {

            File file = new File(filepath);

            fos = new FileOutputStream(file);

            bw = new BufferedWriter(new OutputStreamWriter(fos, encode));

            bw.write(fileContent);

        } catch (FileNotFoundException fnfe) {

            fnfe.printStackTrace();

        } catch (IOException ioe) {

            ioe.printStackTrace();

        } finally {

            try {

                if (bw != null)

                    bw.close();

                if (fos != null)

                    fos.close();

            } catch (IOException ie) {

            }

        }

    }

    public String checkSetPath(String path){

        path=path.trim();

        if(path.lastIndexOf("/")<path.length()-1) path+="/";

        if(path.indexOf("\"")>0)path=path.replaceAll("\"", "");

        if(path.indexOf(">")>0)path=path.replaceAll(">", "&gt;");

        if(path.indexOf("<")>0)path=path.replaceAll("<", "&lt;");

        //TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", "");

        return path;

    }

    public String getEncode() {

        return encode;

    }

    public void setEncode(String encode) {

        this.encode = encode;

    }

    /**

     * 获取图片存放地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getImgPath() {

        return imgPath;

    }

    /**

     * 设置图片的存放地址文件夹路径

     *

     * @param imgPath

     *            设置图片的存放文件夹名称

     */

    public void setImgPath(String imgPath) {

        this.imgPath = checkSetPath(imgPath);

    }

    /**

     * 获取存放文件的目录地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getParentPath() {

        return parentPath;

    }

    /**

     * 设置文件存放的路径

     *

     * @param parentPath

     *            文件地址

     */

    public void setParentPath(String parentPath) {

        this.parentPath = checkSetPath(parentPath);

    }

    /**

     * 获取文件内容

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getFileContent() {

        return fileContent;

    }

    public void setFileContent(String content){

        this.fileContent=content;

    }

}

package com.feiruo.officeConvert;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.w3c.dom.Document;

/**

 * 将*.doc文档转换为*.html文件格式

 *

 * @author Jdk.feiruo.

 * @since JDK 1.7 POI 3.8

 * @version 1.0

 */

public class DocToHtml extends OfficeConvert implements IOfficeConvert {

    private List<Picture> pics = null;

    /**

     * @param parentPath

     *            html文件存放地址

     * @param imageppth

     *            html图片存放地址

     * @param encoding

     *            设置html的编码格式

     */

    public DocToHtml(String parentPath, String imageppth, String encoding) {

        setParentPath(checkSetPath(parentPath));

        setImgPath(checkSetPath(imageppth));

        this.setEncode(encoding);

    }

    public DocToHtml() {

    }

    /**

     * 将*doc文档转为*html文件

     *

     * @param docPath

     *            *doc文档的所在地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException {

        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(

                docPath));

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

                DocumentBuilderFactory.newInstance().newDocumentBuilder()

                        .newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            public String savePicture(byte[] content, PictureType pictureType,

                    String suggestedName, float widthInches, float heightInches) {

                return suggestedName;

            }

        });

        wordToHtmlConverter.processDocument(wordDocument);

        pics = wordDocument.getPicturesTable().getAllPictures();

        Document htmlDocument = wordToHtmlConverter.getDocument();

        ByteArrayOutputStream out = new ByteArrayOutputStream();

        DOMSource domSource = new DOMSource(htmlDocument);

        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();

        Transformer serializer = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode());

        serializer.setOutputProperty(OutputKeys.INDENT, "yes");

        serializer.setOutputProperty(OutputKeys.METHOD, "html");

        serializer.transform(domSource, streamResult);

        out.close();

        String htmlContent = new String(out.toByteArray());

        if(htmlContent.indexOf("<img src=\"") > 0){

            htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath());

        }

        setFileContent(htmlContent);

    }

    @Override

    public void writeWithName(String fileName) {

        // 先保存文档中的图片

        if (pics != null) {

            File imgfile = new File(this.getParentPath() + this.getImgPath());

            // 如果当前文件夹不存在，则创建新文件夹

            if (!imgfile.exists())

                imgfile.mkdirs();

            for (int i = 0; i < pics.size(); i++) {

                Picture pic = (Picture) pics.get(i);

                try {

                    pic.writeImageContent(new FileOutputStream(imgfile + "//"

                            + pic.suggestFullFileName()));

                } catch (IOException e) {

                    e.printStackTrace();

                }

            }

        }

        // 保存html源码文件

        this.writeFile(getParentPath()+fileName+".html");

    }

}

package com.feiruo.Test;

import java.io.FileNotFoundException;

import java.io.IOException;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import com.yinhai.officeConvert.DocToHtml;

public class Test{

    public static void main(String[] args) {

        Test t=new Test();

    }

      public Test(){

          DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8");

          try {

            dth.convert("D://test//test.doc");

        } catch (FileNotFoundException e) {

            e.printStackTrace();

        } catch (IOException e) {

            e.printStackTrace();

        } catch (ParserConfigurationException e) {

            e.printStackTrace();

        } catch (TransformerException e) {

            e.printStackTrace();

        }

          dth.writeWithName("feiruo");

      }

}

package com.feiruo.officeConvert;

public interface IOfficeConvert {

    /**

     * 将文件写入到磁盘

     * @param fileName 要写入文件的名称

     */

    public void writeWithName(String fileName);

}

POI实现word文档转html文件的更多相关文章

POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
POI生成word文档完整案例及讲解
一,网上的API讲解其实POI的生成Word文档的规则就是先把获取到的数据转成xml格式的数据,然后通过xpath解析表单式的应用取值,判断等等,然后在把取到的值放到word文档中,最后在输出来. ...
用java语言通过POI实现word文档的按标题提取
最近有一个项目需要将一个word文档中的数据提取到数据库中.就去网上查了好多资料,最靠谱的就是用poi实现word文档的提取. 喝水不忘挖井人,我查了好多资料就这个最靠谱,我的这篇博客主要是借鉴htt ...
POI 生成 word 文档简单版（包括文字、表格、图片、字体样式设置等）
POI 生成word 文档一般有两种方法: ① word模板生成word 文档 : ② 写代码直接生成 word 文档: 我这里演示的是第二种方法,即写代码生成 word文档,不多说废话,直接 ...
Poi之Word文档结构介绍
1.poi之word文档结构介绍之正文段落一个文档包含多个段落,一个段落包含多个Runs,一个Runs包含多个Run,Run是文档的最小单元获取所有段落:List<XWPFParagraph ...
微信公众号怎么添加附件？比如word文档，pdf文件等
微信公众号怎么添加附件?比如word文档,pdf文件等我们都知道创建一个微信公众号,在公众号中发布一些文章是非常简单的,但公众号添加附件下载的功能却被限制,如今可以使用小程序“微附件”进行在公众 ...
Java POI 解析word文档
实现步骤: 1.poi实现word转html 2.模型化解析html 3.html转Map数组 Map数组(数组的操作处理不做说明) 1.导jar包. 2.代码实现 package com.web.o ...
java word文档转 html文件
一.简介一般word文件后缀有doc.docx两种.docx是office word 2007以及以后版本文档的扩展名:doc是office word 2003文档保存的扩展名.对于这两种格式的wo ...
poi 读取word文档
1.导入jar包官网下载地址: https://www.apache.org/dyn/closer.lua/poi/release/bin/poi-bin-3.17-20170915.zip 最开始 ...

随机推荐

Oracle中in和exists的选择
在ORACLE 11G大行其道的今天,还有很多人受早期版本的影响,记住一些既定的规则, 1.子查询结果集小,用IN 2.外表小,子查询表大,用EXISTS 摘自:http://blog.chi ...
u-boot启动流程分析(2)_板级(board)部分
转自:http://www.wowotech.net/u-boot/boot_flow_2.html 目录: 1. 前言 2. Generic Board 3. _main 4. global dat ...
Linux系统默认服务建议开启关闭说明列表
服务名称功能简介建议 acpid 电源管理接口.如果是笔记本用户建议开启,可以监听内核层的相关电源事件. 开启 anacron 系统的定时任务程序.cron的一个子系统,如果定时任务错过了执行时间 ...
我的Android最佳实践之—— Android更新UI的两种方法:handler与runOnUiThread()
在Android开发过程中,常需要更新界面的UI.而更新UI是要主线程来更新的,即UI线程更新.如果在主线线程之外的线程中直接更新页面显示常会报错.抛出异常:android.view.ViewRoo ...
event 关键字
event(C# 参考) event 关键字用于在发行者类中声明事件.下面的示例演示如何声明和引发将 EventHandler 用作基础委托类型的事件. C# public class SampleE ...
(WPF) 文件和文件夹选择对话框。
点击button,选择一个excel文件,并将文件名显示在textbox上. private void btnSelectErrorTableFile_Click(object sender, Rou ...
用aauto做游戏修改器
import win.ui; import process; //以下自动生成,不用看. /*DSG{{*/ ;bottom=) winform.add( button={cls=;top=;righ ...
HelloHibernate详解
1. Configuration管理读取配置文件 //读取src下hibernate.properties,不推荐使用 Configuration cfg = new Configuration(); ...
Eclipse中构建Fluent风格到Formatter
The place to set this is on the "Line Wrapping" tab of the code formatting preferences pag ...
帝国CMS 6.0功能解密之新版结合项功能，帝国结合项使用
可以用来做A-Z信息检索某字段等于多少,输出等等帝国CMS6.0在继承以往版本结合项功能的基础上又新增很多特性,更强大.今天我们就专门来讲解6.0的结合项改进. 回顾下以往版本的结合项语 ...

POI实现word文档转html文件

POI实现word文档转html文件的更多相关文章

随机推荐

热门专题