使用POI把Word Excel转为HTML
此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。
所需jar包如下如下:
1:PoiUtil.java
package com.wzh.poi; import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document; /**
* @date 2015-3-16 17:22:05
* @author y
* @desc
*/
public class PoiUtil { /**
* Excel 转为 HTML
* @param fileName
* @param outputFile
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerConfigurationException
* @throws TransformerException
*/
public static void excelToHtml(String fileName, String outputFile)
throws FileNotFoundException, IOException, ParserConfigurationException,
TransformerConfigurationException, TransformerException {
InputStream is = new FileInputStream(fileName); HSSFWorkbook excelBook = new HSSFWorkbook(is); ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
ethc.setOutputColumnHeaders(false);
ethc.setOutputRowNumbers(false); ethc.processWorkbook(excelBook); Document htmlDocument = ethc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close(); String htmlStr = new String(out.toByteArray()); htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "")
.replace("<h2>Sheet2</h2>", "")
.replace("<h2>Sheet3</h2>", "")
.replace("<h2>Sheet4</h2>", "")
.replace("<h2>Sheet5</h2>", ""); writeFile(htmlStr, outputFile);
} /**
* Word 转为 HTML
*
* @param fileName
* @param outputFile
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void wordToHtml(String fileName, String outputFile) throws
IOException, ParserConfigurationException, TransformerException {
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override
public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) {
return string;
} }); wthc.processDocument(wordDoc); List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();
if (null != pics && pics.size() > 0) {
for (Picture pic : pics) {
pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName()));
}
} Document htmlDocument = wthc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray());
writeFile(htmlStr, outputFile);
} public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null; File file = new File(path); try {
fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
bw.write(content);
} catch (FileNotFoundException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
if (null != bw) {
bw.close();
}
if (null != fos) {
fos.close();
}
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} }
}
}
2.Test.java
import com.wzh.poi.PoiUtil;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException; /*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/ /**
*
* @author y
*/
public class Test { /**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
PoiUtil.excelToHtml("t2.xls", "test.html");
} catch (IOException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (ParserConfigurationException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (TransformerException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} } }
使用POI把Word Excel转为HTML的更多相关文章
- Java操作word文档使用JACOB和POI操作word,Excel,PPT需要的jar包
可参考文档: http://wibiline.iteye.com/blog/1725492 下载jar包 http://download.csdn.net/download/javashixiaofe ...
- java实现在线预览--poi实现word、excel、ppt转html
java实现在线预览 - -之poi实现word.excel.ppt转html 简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服 ...
- java实现在线预览 - -之poi实现word、excel、ppt转html
简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服务就可以了,例如永中office.office web 365(http://w ...
- Java实现windows,linux服务器word,excel转为PDF;aspose-words,Documents4j
Java实现windows,linux服务器word,excel转为PDF:aspose-words,Documents4j 一.通过aspose-words将word,Excel文档转为PDF 1. ...
- java中使用poi导入导出excel文件_并自定义日期格式
Apache POI项目的使命是创造和保持java API操纵各种文件格式基于Office Open XML标准(OOXML)和微软的OLE复合文档格式(OLE2)2.总之,你可以读写Excel文件使 ...
- POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
- Apache POI 实现对 Excel 文件读写
1. Apache POI 简介 Apache POI是Apache软件基金会的开放源码函式库. 提供API给Java应用程序对Microsoft Office格式档案读和写的功能. 老外起名字总是很 ...
- Word,Excel,pdf,txt等文件上传并提取内容
近期项目需求:1.要用到各种文件上传,下载. 2.并对文件进行搜索. 3.仅仅要文件里包括有搜索的内容,所有显示出来. 今天正好有时间整理一下,方便以后阅读,及对须要用到的朋友提供微薄之力.首先在实现 ...
- Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享
Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享 在此,先分享下写此文前的经历与感受,我所有的感觉浓缩到一个字,那就是:"坑&qu ...
随机推荐
- BZOJ3398: [Usaco2009 Feb]Bullcow 牡牛和牝牛
3398: [Usaco2009 Feb]Bullcow 牡牛和牝牛 Time Limit: 1 Sec Memory Limit: 128 MBSubmit: 30 Solved: 17[Sub ...
- Android视图框架
Android视图框架 Android的UI系统是android应用系统框架最核心,最基础的内容! 1. Android视图系统.层次关系 Android应用设计和Web应用设计类似,也分前端和后端设 ...
- ACM2039_三角形三边关系
#include <iostream> using namespace std; int main(int argc, char* argv[]) { double a,b,c; int ...
- Remove Node in Binary Search Tree 解答
从BST中移除一个节点是比较复杂的问题,需要分好几种情况讨论. 如这篇文章,就讨论了删除节点 1.有无左右子树 2.只有右子树 3.只有左子树 三种情况. 一种简单些的思维是只考虑删除节点是否有右子树 ...
- DPDK2.1 linux上开发入门手册
1引言 本文档主要包含INTEL DPDK安装和配置说明.目的是让用户快速的开发和运行程序.文档描述了如何在不深入细节的情况下在linux应用开发环境上编译和运行一个DPDK应用程序. 1.1文档总览 ...
- C++Memset误区
Memset的原型是void *memset(void *s, char ch, size_t n); Memset是按字节赋值的,对char以外的类型赋0(00000000) -1(11111111 ...
- 使用MapReduce查询Hbase表指定列簇的全部数据输出到HDFS(一)
package com.bank.service; import java.io.IOException; import org.apache.hadoop.conf.Configuration;im ...
- wpf下拉框不能多选的原因
<dxe:ComboBoxEdit Margin="0" Height="25" Width="65" VerticalAlignm ...
- 遍历Jenkins全部项目的配置
随着任务的增多.须要一个脚本能够检查全部的jenkins project的配置.比方提取任务计划配置,开发人员信息等. 首先要能够得到全部的project名称. 能够通过REST API实现: htt ...
- [React] React Router: Named Components
In this lesson we'll learn how to render multiple component children from a single route. Define a n ...