使用POI把Word Excel转为HTML
此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。
所需jar包如下如下:
1:PoiUtil.java
package com.wzh.poi; import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document; /**
* @date 2015-3-16 17:22:05
* @author y
* @desc
*/
public class PoiUtil { /**
* Excel 转为 HTML
* @param fileName
* @param outputFile
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerConfigurationException
* @throws TransformerException
*/
public static void excelToHtml(String fileName, String outputFile)
throws FileNotFoundException, IOException, ParserConfigurationException,
TransformerConfigurationException, TransformerException {
InputStream is = new FileInputStream(fileName); HSSFWorkbook excelBook = new HSSFWorkbook(is); ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
ethc.setOutputColumnHeaders(false);
ethc.setOutputRowNumbers(false); ethc.processWorkbook(excelBook); Document htmlDocument = ethc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close(); String htmlStr = new String(out.toByteArray()); htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "")
.replace("<h2>Sheet2</h2>", "")
.replace("<h2>Sheet3</h2>", "")
.replace("<h2>Sheet4</h2>", "")
.replace("<h2>Sheet5</h2>", ""); writeFile(htmlStr, outputFile);
} /**
* Word 转为 HTML
*
* @param fileName
* @param outputFile
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void wordToHtml(String fileName, String outputFile) throws
IOException, ParserConfigurationException, TransformerException {
HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wthc = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wthc.setPicturesManager(new PicturesManager() { @Override
public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) {
return string;
} }); wthc.processDocument(wordDoc); List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();
if (null != pics && pics.size() > 0) {
for (Picture pic : pics) {
pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName()));
}
} Document htmlDocument = wthc.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult); out.close(); String htmlStr = new String(out.toByteArray());
writeFile(htmlStr, outputFile);
} public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null; File file = new File(path); try {
fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
bw.write(content);
} catch (FileNotFoundException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
if (null != bw) {
bw.close();
}
if (null != fos) {
fos.close();
}
} catch (IOException ex) {
Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
} }
}
}
2.Test.java
import com.wzh.poi.PoiUtil;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException; /*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/ /**
*
* @author y
*/
public class Test { /**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
PoiUtil.excelToHtml("t2.xls", "test.html");
} catch (IOException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (ParserConfigurationException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (TransformerException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} } }
使用POI把Word Excel转为HTML的更多相关文章
- Java操作word文档使用JACOB和POI操作word,Excel,PPT需要的jar包
可参考文档: http://wibiline.iteye.com/blog/1725492 下载jar包 http://download.csdn.net/download/javashixiaofe ...
- java实现在线预览--poi实现word、excel、ppt转html
java实现在线预览 - -之poi实现word.excel.ppt转html 简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服 ...
- java实现在线预览 - -之poi实现word、excel、ppt转html
简介 java实现在线预览功能是一个大家在工作中也许会遇到的需求,如果公司有钱,直接使用付费的第三方软件或者云在线预览服务就可以了,例如永中office.office web 365(http://w ...
- Java实现windows,linux服务器word,excel转为PDF;aspose-words,Documents4j
Java实现windows,linux服务器word,excel转为PDF:aspose-words,Documents4j 一.通过aspose-words将word,Excel文档转为PDF 1. ...
- java中使用poi导入导出excel文件_并自定义日期格式
Apache POI项目的使命是创造和保持java API操纵各种文件格式基于Office Open XML标准(OOXML)和微软的OLE复合文档格式(OLE2)2.总之,你可以读写Excel文件使 ...
- POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
- Apache POI 实现对 Excel 文件读写
1. Apache POI 简介 Apache POI是Apache软件基金会的开放源码函式库. 提供API给Java应用程序对Microsoft Office格式档案读和写的功能. 老外起名字总是很 ...
- Word,Excel,pdf,txt等文件上传并提取内容
近期项目需求:1.要用到各种文件上传,下载. 2.并对文件进行搜索. 3.仅仅要文件里包括有搜索的内容,所有显示出来. 今天正好有时间整理一下,方便以后阅读,及对须要用到的朋友提供微薄之力.首先在实现 ...
- Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享
Java解析OFFICE(word,excel,powerpoint)以及PDF的实现方案及开发中的点滴分享 在此,先分享下写此文前的经历与感受,我所有的感觉浓缩到一个字,那就是:"坑&qu ...
随机推荐
- COJ 0802 非传统题(二)
(颓了这么多天是时候干点正事了QAQ) 非传统题(二) 难度级别:B: 运行时间限制:1000ms: 运行空间限制:51200KB: 代码长度限制:2000000B 试题描述 还是很久很久以前,chx ...
- PCanywhere/teamviewer/RDP/ultraVNC/tigerVNC/realVNC/Xmanager
PCanywhere/teamviewer/RDP/ultraVNC/tigerVNC/realVNC/Xmanager 1, 通常应用场景一般CentOS/RHEL等linux系统不配置安装Desk ...
- spring注解理解
步骤一:编写web.xml文件,主要代码如下:<servlet> Java代码 <servlet-name>spmvc</servlet-name> <ser ...
- mac 环境下使用virtual box 虚拟机(win7)与主机之间互相ping通
首先选择virtual box设置网络连接方式为网桥 混杂模式设置为全部允许 如下图: 进入虚拟机把虚拟机IP设置和主机在一个网段.如主机是192.168.1.100虚拟机可以设置为192.168.1 ...
- 转载:mybatis和hibernate 解析
第一章 Hibernate与MyBatis Hibernate 是当前最流行的O/R mapping框架,它出身于sf.net,现在已经成为Jboss的一部分. Mybatis 是另外一种优秀 ...
- ETL-Career RoadMap
RoadMap: 1.Tester:sql的单体或批处理测试: 2. Application Developer 2.1 批处理手动工具(如何使用.如何调度批处理.如何生成批处理脚本): 2.2 批处 ...
- 修改Android中strings.xml文件, 动态改变数据
有些朋友可能会动态的修改Android中strings.xml文件中的值,在这里给大家推荐一种简单的方法.strings.xml中节点是支持占位符的,如下所示: <string name=&qu ...
- Nuget找不到服务器
Nuget的新地址 http://nuget-prod-v2gallery.trafficmanager.net/api/v2/
- Unity 制作虚拟手柄例子
Unity不愧是收费开发软件,有写好的Joystick(虚拟手柄),使用起来很简单,我们一起来学习一下哈!! 本文源代码Win版的 :http://vdisk.weibo.com/s/BDn59yfn ...
- Entrez检索实例 - NCBI
题目:已知来豆荚斑驳病毒(bean pod mottle virus,BPMV)的名字,查询BPMV基因组信息.核酸序列信息.蛋白序列信息和结构信息 解答: 1.直接搜索,点genome,即可看到病毒 ...