使用itext直接替换PDF中的文本

直接说问题，itext没有直接提供替换PDF中文本的接口（查看资料得到的结论是PDF不支持这种操作），不过存在解决思路：在需要替换的文本上覆盖新的文本。按照这个思路我们需要解决以下几个问题：

itext怎样增加白色底的覆盖层
找到覆盖层的位置（左顶点的位置）和高度与宽带

这样做的目的是什么了？也告诉下大家，比如：现在要你将业务数据导出成PDF存档，且PDF的模板有现成的。对我们写程序的来说，变化的只是部分数据，假如我们可以直接替换里面的数据，是不是可以节省我们的开发时间。

1、itext怎样增加覆盖层？

itext在自己的Demo中提供了很多案例代码，从中我们可以看到高亮的案例

查看itext代码

/*
* This example was written in answer to the question:
* http://stackoverflow.com/questions/33952183
*/
package sandbox.stamper;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
/**
*
* @author Bruno Lowagie (iText Software)
*/
public class HighLightByAddingContent {
public static final String SRC = "resources/pdfs/hello.pdf";
public static final String DEST = "results/stamper/hello_highlighted.pdf";
public static void main(String[] args) throws IOException, DocumentException {
File file = new File(DEST);
file.getParentFile().mkdirs();
new HighLightByAddingContent().manipulatePdf(SRC, DEST);
}
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
PdfReader reader = new PdfReader(src);
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
PdfContentByte canvas = stamper.getUnderContent(1);
canvas.saveState();
canvas.setColorFill(BaseColor.YELLOW);
canvas.rectangle(36, 786, 66, 16);
canvas.fill();
canvas.restoreState();
stamper.close();
reader.close();
}
}

这里可以在任意位置产生一个层，符合我们的“遮盖层”的要求，不过，通过测试发现此段代码存在一个问题点，它无法遮挡住文字，只是添加了一个背景层。为了达到我们的要求，我们只需要修改一处地方：

PdfContentByte canvas = stamper.getUnderContent(1); //变成 PdfContentByte canvas = stamper.getOverContent(1);

到目前为止，我们的遮盖层已添加，后面我们还需要的就是在新的遮盖层上写上自己的文字，代码如下：

/**********************************************************************
* <pre>
* FILE : HighLightByAddingContent.java
* CLASS : HighLightByAddingContent
*
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
*
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URLDecoder;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
public class HighLightByAddingContent {
@SuppressWarnings("deprecation")
public static final String SRC = URLDecoder.decode(HighLightByAddingContent.class.getResource("ticket.pdf").getFile());
public static final String DEST = "I://ticket.pdf";
public static void main(String[] args) throws IOException, DocumentException {
File file = new File(DEST);
file.getParentFile().mkdirs();
new HighLightByAddingContent().manipulatePdf(SRC, DEST);
}
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
PdfReader reader = new PdfReader(src);
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
PdfContentByte canvas = stamper.getOverContent(1);
float height=595;
System.out.println(canvas.getHorizontalScaling());
float x,y;
x= 216;
y = height -49.09F;
canvas.saveState();
canvas.setColorFill(BaseColor.WHITE);
canvas.rectangle(x, y-5, 43, 15);
canvas.fill();
canvas.restoreState();
//开始写入文本
canvas.beginText();
//BaseFont bf = BaseFont.createFont(URLDecoder.decode(CutAndPaste.class.getResource("/AdobeSongStd-Light.otf").getFile()), BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
Font font = new Font(bf,10,Font.BOLD);
//设置字体和大小
canvas.setFontAndSize(font.getBaseFont(), 10);
//设置字体的输出位置
canvas.setTextMatrix(x, y);
//要输出的text
canvas.showText("多退少补" );
//设置字体的输出位置
canvas.setFontAndSize(font.getBaseFont(), 20);
canvas.setTextMatrix(x, y-90);
//要输出的text
canvas.showText("多退少补" );
canvas.endText();
stamper.close();
reader.close();
System.out.println("complete");
}
}

2、找到覆盖层的位置（左顶点的位置）和高度与宽带

我的第一个想法是通过工具得到替换文本的具体位置，虽然这个方法不怎么好，不过确实可行。使用到的工具是常用的Adobe Reader,以下是正常页面（PDF是网上搜的,百度key：“申请 filetype:pdf”）：

Adobe提供了测量工具，我们可以通过“编辑-->分析-->测量工具”看到如下页面：

此时，我们虽然可以直接测量，但是测量默认显示的厘米，与itext需要设置的单位不一致，我们需要手工换算下（1英寸=72点）。不过，adobe可以帮我们省掉换算的工作，右键点击，出现以下选项（需要在测量功能下右键）：

“更改比例”可以帮助我们完成换算工作。（ps：“显示标尺”是一个不错的选项）。最后的画面如下：

最后，需要提醒下，itext的Y是从下往上算的。

这样得到位置是不是太不方便了。那我们是否可以通过itext自动计算出我们需要的位置？代码如下（从网上COPY，不记得具体来源，支持作者）

/**********************************************************************
* <pre>
* FILE : Demo.java
* CLASS : Demo
*
* AUTHOR : caoxu-yiyang@qq.com
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
* |2016年11月9日|caoxu-yiyang@qq.com| Created |
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext;
import java.io.IOException;
import com.itextpdf.awt.geom.Rectangle2D.Float;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
public class Demo
{
// 定义关键字
private static String KEY_WORD = "结算区分";
// 定义返回值
private static float[] resu = null;
// 定义返回页码
private static int i = 0;
public static void main(String[] args) {
float[] point = getKeyWords("I://ticket_in.pdf");
}
/*
* 返回关键字所在的坐标和页数 float[0] >> X float[1] >> Y float[2] >> page
*/
private static float[] getKeyWords(String filePath)
{
try
{
PdfReader pdfReader = new PdfReader(filePath);
int pageNum = pdfReader.getNumberOfPages();
PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(
pdfReader);
// 下标从1开始
for (i = 1; i <= pageNum; i++)
{
pdfReaderContentParser.processContent(i, new RenderListener()
{
@Override
public void renderText(TextRenderInfo textRenderInfo)
{
String text = textRenderInfo.getText();
if (null != text && text.contains(KEY_WORD))
{
Float boundingRectange = textRenderInfo
.getBaseline().getBoundingRectange();
resu = new float[3];
System.out.println("======="+text);
System.out.println("h:"+boundingRectange.getHeight());
System.out.println("w:"+boundingRectange.width);
System.out.println("centerX:"+boundingRectange.getCenterX());
System.out.println("centerY:"+boundingRectange.getCenterY());
System.out.println("x:"+boundingRectange.getX());
System.out.println("y:"+boundingRectange.getY());
System.out.println("maxX:"+boundingRectange.getMaxX());
System.out.println("maxY:"+boundingRectange.getMaxY());
System.out.println("minX:"+boundingRectange.getMinX());
System.out.println("minY:"+boundingRectange.getMinY());
resu[0] = boundingRectange.x;
resu[1] = boundingRectange.y;
resu[2] = i;
}
}
@Override
public void renderImage(ImageRenderInfo arg0)
{
}
@Override
public void endTextBlock()
{
}
@Override
public void beginTextBlock()
{
}
});
}
} catch (IOException e)
{
e.printStackTrace();
}
return resu;
}
}

结合以上的，我们就可以写一个自动替换PDF文本的类，具体使用如下：

public static void main(String[] args) throws IOException, DocumentException {
PdfReplacer textReplacer = new PdfReplacer("I://test.pdf");
textReplacer.replaceText("陈坤", "小白");
textReplacer.replaceText("本科", "社会大学");
textReplacer.replaceText("0755-29493863", "15112345678");
textReplacer.toPdf("I://ticket_out.pdf");
}

原始PDF:

替换之后的(红色背景只是方便大家看到差别)：

（第一次认真写博客，感觉感觉好花时间了，佩服那些坚持写博客的人~~）

补上相关代码（还在完善中），总共4个类

代码中有几个地方要说明下：

1、由于自动计算得到的高度都是0，所有我这边默认的都是12，大家要根据实际情况来设

2、除了可以让代码自己计算位置之外，也可以通过replaceText的重载方法强制指定替换区域。

/**********************************************************************
* <pre>
* FILE : PdfTextReplacer.java
* CLASS : PdfTextReplacer
*
* AUTHOR : caoxu-yiyang@qq.com
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
* |2016年11月8日|caoxu-yiyang@qq.com| Created |
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.log.Logger;
import com.itextpdf.text.log.LoggerFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
/**
* 替换PDF文件某个区域内的文本
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月8日
*/
public class PdfReplacer {
private static final Logger logger = LoggerFactory.getLogger(PdfReplacer.class);
private int fontSize;
private Map<String, ReplaceRegion> replaceRegionMap = new HashMap<String, ReplaceRegion>();
private Map<String, Object> replaceTextMap =new HashMap<String, Object>();
private ByteArrayOutputStream output;
private PdfReader reader;
private PdfStamper stamper;
private PdfContentByte canvas;
private Font font;
public PdfReplacer(byte[] pdfBytes) throws DocumentException, IOException{
init(pdfBytes);
}
public PdfReplacer(String fileName) throws IOException, DocumentException{
FileInputStream in = null;
try{
in =new FileInputStream(fileName);
byte[] pdfBytes = new byte[in.available()];
in.read(pdfBytes);
init(pdfBytes);
}finally{
in.close();
}
}
private void init(byte[] pdfBytes) throws DocumentException, IOException{
logger.info("初始化开始");
reader = new PdfReader(pdfBytes);
output = new ByteArrayOutputStream();
stamper = new PdfStamper(reader, output);
canvas = stamper.getOverContent(1);
setFont(10);
logger.info("初始化成功");
}
private void close() throws DocumentException, IOException{
if(reader != null){
reader.close();
}
if(output != null){
output.close();
}
output=null;
replaceRegionMap=null;
replaceTextMap=null;
}
public void replaceText(float x, float y, float w,float h, String text){
ReplaceRegion region = new ReplaceRegion(text); //用文本作为别名
region.setH(h);
region.setW(w);
region.setX(x);
region.setY(y);
addReplaceRegion(region);
this.replaceText(text, text);
}
public void replaceText(String name, String text){
this.replaceTextMap.put(name, text);
}
/**
* 替换文本
* @throws IOException
* @throws DocumentException
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
*/
private void process() throws DocumentException, IOException{
try{
parseReplaceText();
canvas.saveState();
Set<Entry<String, ReplaceRegion>> entrys = replaceRegionMap.entrySet();
for (Entry<String, ReplaceRegion> entry : entrys) {
ReplaceRegion value = entry.getValue();
canvas.setColorFill(BaseColor.RED);
canvas.rectangle(value.getX(),value.getY(),value.getW(),value.getH());
}
canvas.fill();
canvas.restoreState();
//开始写入文本
canvas.beginText();
for (Entry<String, ReplaceRegion> entry : entrys) {
ReplaceRegion value = entry.getValue();
//设置字体
canvas.setFontAndSize(font.getBaseFont(), getFontSize());
canvas.setTextMatrix(value.getX(),value.getY()+2/*修正背景与文本的相对位置*/);
canvas.showText((String) replaceTextMap.get(value.getAliasName()));
}
canvas.endText();
}finally{
if(stamper != null){
stamper.close();
}
}
}
/**
* 未指定具体的替换位置时，系统自动查找位置
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
*/
private void parseReplaceText() {
PdfPositionParse parse = new PdfPositionParse(reader);
Set<Entry<String, Object>> entrys = this.replaceTextMap.entrySet();
for (Entry<String, Object> entry : entrys) {
if(this.replaceRegionMap.get(entry.getKey()) == null){
parse.addFindText(entry.getKey());
}
}
try {
Map<String, ReplaceRegion> parseResult = parse.parse();
Set<Entry<String, ReplaceRegion>> parseEntrys = parseResult.entrySet();
for (Entry<String, ReplaceRegion> entry : parseEntrys) {
if(entry.getValue() != null){
this.replaceRegionMap.put(entry.getKey(), entry.getValue());
}
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
/**
* 生成新的PDF文件
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @param fileName
* @throws DocumentException
* @throws IOException
*/
public void toPdf(String fileName) throws DocumentException, IOException{
FileOutputStream fileOutputStream = null;
try{
process();
fileOutputStream = new FileOutputStream(fileName);
fileOutputStream.write(output.toByteArray());
fileOutputStream.flush();
}catch(IOException e){
logger.error(e.getMessage(), e);
throw e;
}finally{
if(fileOutputStream != null){
fileOutputStream.close();
}
close();
}
logger.info("文件生成成功");
}
/**
* 将生成的PDF文件转换成二进制数组
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @return
* @throws DocumentException
* @throws IOException
*/
public byte[] toBytes() throws DocumentException, IOException{
try{
process();
logger.info("二进制数据生成成功");
return output.toByteArray();
}finally{
close();
}
}
/**
* 添加替换区域
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @param replaceRegion
*/
public void addReplaceRegion(ReplaceRegion replaceRegion){
this.replaceRegionMap.put(replaceRegion.getAliasName(), replaceRegion);
}
/**
* 通过别名得到替换区域
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @param aliasName
* @return
*/
public ReplaceRegion getReplaceRegion(String aliasName){
return this.replaceRegionMap.get(aliasName);
}
public int getFontSize() {
return fontSize;
}
/**
* 设置字体大小
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @param fontSize
* @throws DocumentException
* @throws IOException
*/
public void setFont(int fontSize) throws DocumentException, IOException{
if(fontSize != this.fontSize){
this.fontSize = fontSize;
BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
font = new Font(bf,this.fontSize,Font.BOLD);
}
}
public void setFont(Font font){
if(font == null){
throw new NullPointerException("font is null");
}
this.font = font;
}
public static void main(String[] args) throws IOException, DocumentException {
PdfReplacer textReplacer = new PdfReplacer("I://test.pdf");
textReplacer.replaceText("陈坤", "小白");
textReplacer.replaceText("本科", "社会大学");
textReplacer.replaceText("0755-29493863", "15112345678");
textReplacer.toPdf("I://ticket_out.pdf");
}
}

/**********************************************************************
* <pre>
* FILE : ReplaceRegion.java
* CLASS : ReplaceRegion
*
* AUTHOR : caoxu-yiyang@qq.com
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
* |2016年11月9日|caoxu-yiyang@qq.com| Created |
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext;
/**
* 需要替换的区域
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
*/
public class ReplaceRegion {
private String aliasName;
private Float x;
private Float y;
private Float w;
private Float h;
public ReplaceRegion(String aliasName){
this.aliasName = aliasName;
}
/**
* 替换区域的别名
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @return
*/
public String getAliasName() {
return aliasName;
}
public void setAliasName(String aliasName) {
this.aliasName = aliasName;
}
public Float getX() {
return x;
}
public void setX(Float x) {
this.x = x;
}
public Float getY() {
return y;
}
public void setY(Float y) {
this.y = y;
}
public Float getW() {
return w;
}
public void setW(Float w) {
this.w = w;
}
public Float getH() {
return h;
}
public void setH(Float h) {
this.h = h;
}
}

/**********************************************************************
* <pre>
* FILE : PdfPositionParse.java
* CLASS : PdfPositionParse
*
* AUTHOR : caoxu-yiyang@qq.com
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
* |2016年11月9日|caoxu-yiyang@qq.com| Created |
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.cx.itext.listener.PositionRenderListener;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
/**
* 解析PDF中文本的x,y位置
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
*/
public class PdfPositionParse {
private PdfReader reader;
private List<String> findText = new ArrayList<String>(); //需要查找的文本
private PdfReaderContentParser parser;
public PdfPositionParse(String fileName) throws IOException{
FileInputStream in = null;
try{
in =new FileInputStream(fileName);
byte[] bytes = new byte[in.available()];
in.read(bytes);
init(bytes);
}finally{
in.close();
}
}
public PdfPositionParse(byte[] bytes) throws IOException{
init(bytes);
}
private boolean needClose = true;
/**
* 传递进来的reader不会在PdfPositionParse结束时关闭
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @param reader
*/
public PdfPositionParse(PdfReader reader){
this.reader = reader;
parser = new PdfReaderContentParser(reader);
needClose = false;
}
public void addFindText(String text){
this.findText.add(text);
}
private void init(byte[] bytes) throws IOException {
reader = new PdfReader(bytes);
parser = new PdfReaderContentParser(reader);
}
/**
* 解析文本
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
* @throws IOException
*/
public Map<String, ReplaceRegion> parse() throws IOException{
try{
if(this.findText.size() == 0){
throw new NullPointerException("没有需要查找的文本");
}
PositionRenderListener listener = new PositionRenderListener(this.findText);
parser.processContent(1, listener);
return listener.getResult();
}finally{
if(reader != null && needClose){
reader.close();
}
}
}
}

/**********************************************************************
* <pre>
* FILE : PositionRenderListener.java
* CLASS : PositionRenderListener
*
* AUTHOR : caoxu-yiyang@qq.com
*
* FUNCTION : TODO
*
*
*======================================================================
* CHANGE HISTORY LOG
*----------------------------------------------------------------------
* MOD. NO.| DATE | NAME | REASON | CHANGE REQ.
*----------------------------------------------------------------------
* |2016年11月9日|caoxu-yiyang@qq.com| Created |
* DESCRIPTION:
* </pre>
***********************************************************************/
package com.cx.itext.listener;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.cx.itext.ReplaceRegion;
import com.itextpdf.awt.geom.Rectangle2D.Float;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
/**
* pdf渲染监听,当找到渲染的文本时，得到文本的坐标x,y,w,h
* @user : caoxu-yiyang@qq.com
* @date : 2016年11月9日
*/
public class PositionRenderListener implements RenderListener{
private List<String> findText;
private float defaultH; ///出现无法取到值的情况，默认为12
private float fixHeight; //可能出现无法完全覆盖的情况，提供修正的参数，默认为2
public PositionRenderListener(List<String> findText, float defaultH,float fixHeight) {
this.findText = findText;
this.defaultH = defaultH;
this.fixHeight = fixHeight;
}
public PositionRenderListener(List<String> findText) {
this.findText = findText;
this.defaultH = 12;
this.fixHeight = 2;
}
@Override
public void beginTextBlock() {
}
@Override
public void endTextBlock() {
}
@Override
public void renderImage(ImageRenderInfo imageInfo) {
}
private Map<String, ReplaceRegion> result = new HashMap<String, ReplaceRegion>();
@Override
public void renderText(TextRenderInfo textInfo) {
String text = textInfo.getText();
for (String keyWord : findText) {
if (null != text && text.equals(keyWord)){
Float bound = textInfo.getBaseline().getBoundingRectange();
ReplaceRegion region = new ReplaceRegion(keyWord);
region.setH(bound.height == 0 ? defaultH : bound.height);
region.setW(bound.width);
region.setX(bound.x);
region.setY(bound.y-this.fixHeight);
result.put(keyWord, region);
}
}
}
public Map<String, ReplaceRegion> getResult() {
for (String key : findText) { //补充没有找到的数据
if(this.result.get(key) == null){
this.result.put(key, null);
}
}
return this.result;
}
}

我用到的jar包如下：

大家可以从官网下载，可以构建maven项目省去自己找包的麻烦。如果没有用maven又想下载具体的jar包，可以直接访问maven仓库下载：http://mvnrepository.com/