使用POI把Word Excel转为HTML
此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。
所需jar包如下如下:

1:PoiUtil.java
package com.wzh.poi; import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document; /**
* @date 2015-3-16 17:22:05
* @author y
* @desc
*/
public class PoiUtil { /**
* Excel 转为 HTML
* @param fileName
* @param outputFile
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerConfigurationException
* @throws TransformerException
*/
public static void excelToHtml(String fileName, String outputFile)
throws FileNotFoundException, IOException, ParserConfigurationException,
TransformerConfigurationException, TransformerException {
InputStream is = new FileInputStream(fileName); HSSFWorkbook excelBook = new HSSFWorkbook(is); ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
ethc.setOutputColumnHeaders(false);
ethc.setOutputRowNumbers(false); ethc.processWorkbook(excelBook); Document htmlDocument = ethc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close(); String htmlStr = new String(out.toByteArray()); htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "")
.replace("<h2>Sheet2</h2>", "")
.replace("<h2>Sheet3</h2>", "")
.replace("<h2>Sheet4</h2>", "")
.replace("<h2>Sheet5</h2>", ""); writeFile(htmlStr, outputFile);
} /**
* Word 转为 HTML
*
* @param fileName
* @param outputFile
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void wordToHtml(String fileName, String outputFile) throws
IOException, ParserConfigurationException, TransformerException {
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override
public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) {
return string;
} }); wthc.processDocument(wordDoc); List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();
if (null != pics && pics.size() > 0) {
for (Picture pic : pics) {
pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName()));
}
} Document htmlDocument = wthc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray());
writeFile(htmlStr, outputFile);
} public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null; File file = new File(path); try {
fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
bw.write(content);
} catch (FileNotFoundException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
if (null != bw) {
bw.close();
}
if (null != fos) {
fos.close();
}
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} }
}
}
2.Test.java
import com.wzh.poi.PoiUtil;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException; /*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/ /**
*
* @author y
*/
public class Test { /**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
PoiUtil.excelToHtml("t2.xls", "test.html");
} catch (IOException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (ParserConfigurationException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (TransformerException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} } }
使用POI把Word Excel转为HTML的更多相关文章
- Java操作word文档使用JACOB和POI操作word,Excel,PPT需要的jar包
可参考文档: http://wibiline.iteye.com/blog/1725492 下载jar包 http://download.csdn.net/download/javashixiaofe ...
- java实现在线预览--poi实现word、excel、ppt转html
java实现在线预览 - -之poi实现word.excel.ppt转html 简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服 ...
- java实现在线预览 - -之poi实现word、excel、ppt转html
简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服务就可以了,例如永中office.office web 365(http://w ...
- Java实现windows,linux服务器word,excel转为PDF;aspose-words,Documents4j
Java实现windows,linux服务器word,excel转为PDF:aspose-words,Documents4j 一.通过aspose-words将word,Excel文档转为PDF 1. ...
- java中使用poi导入导出excel文件_并自定义日期格式
Apache POI项目的使命是创造和保持java API操纵各种文件格式基于Office Open XML标准(OOXML)和微软的OLE复合文档格式(OLE2)2.总之,你可以读写Excel文件使 ...
- POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
- Apache POI 实现对 Excel 文件读写
1. Apache POI 简介 Apache POI是Apache软件基金会的开放源码函式库. 提供API给Java应用程序对Microsoft Office格式档案读和写的功能. 老外起名字总是很 ...
- Word,Excel,pdf,txt等文件上传并提取内容
近期项目需求:1.要用到各种文件上传,下载. 2.并对文件进行搜索. 3.仅仅要文件里包括有搜索的内容,所有显示出来. 今天正好有时间整理一下,方便以后阅读,及对须要用到的朋友提供微薄之力.首先在实现 ...
- Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享
Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享 在此,先分享下写此文前的经历与感受,我所有的感觉浓缩到一个字,那就是:"坑&qu ...
随机推荐
- android R.id.转化为view
LayoutInflater inflater=(LayoutInflater)getSystemService(Context.LAYOUT_INFLATER_SERVICE); View view ...
- python通过代理刷网页点击量
python通过代理刷网页点击量 更新异常处理情况 @time 2013-0803 更新循环里计数问题和随机等待时间问题 #!/usr/bin/python #-*- coding:utf-8 -*- ...
- Android studio 开发中 用git实现批量忽略特定文件的方法
git实现批量忽略特定文件的方法 在用AndroidStudio开发项目的时候,3个人协同开发,那么用Git同步代码,会将模块中的大量iml文件同步,每次都会提交和更新,一个一个的去忽略他们,显然是最 ...
- SQL with as 替代临时表的用法
原文地址:http://www.cnblogs.com/zerocc/archive/2011/11/28/2266180.html SQL中 WITH AS 的用法和注意事项 1.为什么使用with ...
- 学习DNS路上之CloudXNS
使用CloudXNS已经有两年了,趁着他们现在做活动的机会也发表一下这两年来使用感受与CloudXNS的变化,也对我学习使用的一次总结. 简介 CloudXNS是北京快网开发的一套授权DNS系统, 用 ...
- JS-Math内置对象
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title> ...
- 关于phpmyadmin中添加外键的做法
今天想加个外键,又不想用命令行,打开PHPMYADMIN看怎么弄,找了半天没有找到添加外键的地方,然后上网搜了一会,发现目前的PHPMYADMIN确实没有这个设置,所以只能手动命令行添加了. 语法 ...
- HDU 1501 & POJ 2192 Zipper(dp记忆化搜索)
题意:给定三个串,问c串是否能由a,b串任意组合在一起组成,但注意a,b串任意组合需要保证a,b原串的顺序 例如ab,cd可组成acbd,但不能组成adcb. 分析:对字符串上的dp还是不敏感啊,虽然 ...
- [RxJS] Observables can throw errors
Whenever we are writing code, we need to remember that things may go wrong. If an error happens in a ...
- CareerCup Chapter 9 Sorting and Searching
9.1 You are given two sorted arrays, A and B, and A has a large enough buffer at the end to hold B. ...