JAVA PDF 截取N页，生成新文件，转图片，多个PDF 合并

<dependency>

    <groupId>com.itextpdf</groupId>

    <artifactId>itextpdf</artifactId>

    <version>5.5.13</version>

</dependency>

import com.itextpdf.text.Document;

import com.itextpdf.text.pdf.PdfCopy;

import com.itextpdf.text.pdf.PdfImportedPage;

import com.itextpdf.text.pdf.PdfReader;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.pdmodel.PDPageTree;

import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;

import java.awt.image.BufferedImage;

import java.io.*;

public class PdfUtil {

    /**

     * 截取pdfFile的第from页至第end页，组成一个新的文件名

     *

     * @param pdfFile 要切割的pdf文件

     * @param newFile 切割后形成的新的pdf文件

     * @param from    从第N页开始

     * @param end     到第N页结束

     */

    public static void partitionPdf(String pdfFile, String newFile, int from, int end) {

        Document document = null;

        PdfCopy copy = null;

        PdfReader reader = null;

        try {

            reader = new PdfReader(pdfFile);

            int pageCount = reader.getNumberOfPages();

            if (from < 1) {

                from = 1;

            }

            if (from > pageCount) {

                from = pageCount;

            }

            if (end == 0 || end > pageCount) {

                end = pageCount;

            }

            document = new Document(reader.getPageSize(1));

            copy = new PdfCopy(document, new FileOutputStream(newFile));

            document.open();

            for (int j = from; j <= end; j++) {

                document.newPage();

                PdfImportedPage page = copy.getImportedPage(reader, j);

                copy.addPage(page);

            }

        } catch (Exception e) {

            e.printStackTrace();

        } finally {

            if (document != null) {

                document.close();

            }

            if (copy != null) {

                copy.close();

            }

            if (reader != null) {

                reader.close();

            }

        }

    }

    /**

     * pdf转图片

     *

     * @param pdfFile   PDF 文件

     * @param imageFile 输出的图片文件

     * @param from      开始页 从1开始

     * @param end       结束页 最大为PDF总页数

     * @throws Exception

     */

    public static void pdfToImage(String pdfFile, String imageFile, int from, int end) throws Exception {

        PDDocument doc = null;

        ByteArrayOutputStream os = null;

        InputStream stream = null;

        OutputStream out = null;

        try {

            //pdf路径

            stream = new FileInputStream(pdfFile);

            // 加载解析PDF文件

            doc = PDDocument.load(stream);

            PDFRenderer pdfRenderer = new PDFRenderer(doc);

            PDPageTree pages = doc.getPages();

            int pageCount = pages.getCount();

            if (from < 1) {

                from = 1;

            }

            if (from > pageCount) {

                from = pageCount;

            }

            if (end == 0 || end > pageCount) {

                end = pageCount;

            }

            for (int i = from; i <= end; i++) {

                BufferedImage bim = pdfRenderer.renderImageWithDPI(i - 1, 200); //PDFBOX 是从0开始的，from初始值为1，所以这边要减 i-1

                os = new ByteArrayOutputStream();

                ImageIO.write(bim, "jpg", os);

                byte[] dataList = os.toByteArray();

                //只取一页，等于传进来的名称，多页时，加上 页号

                String imageFilePath = from == end ? saveImgFile : saveImgFile.replace(".jpg", "_" + i + ".jpg");

                File file = new File(imageFilePath);

                if (!file.getParentFile().exists()) {

                    // 不存在则创建父目录及子文件

                    file.getParentFile().mkdirs();

                    file.createNewFile();

                }

                out = new FileOutputStream(file);

                out.write(dataList);

            }

        } catch (Exception e) {

            e.printStackTrace();

        } finally {

            if (doc != null) {

                doc.close();

            }

            if (os != null) {

                os.close();

            }

            if (stream != null) {

                stream.close();

            }

            if (out != null) {

                out.close();

            }

        }

    }

    //多个PDF合并成一个

    public static void mergePDFFiles(List<String> pdfFiles, String outputPdf) throws IOException {

        // 创建一个新的 PDF 阅读器对象和一个新的 PDF 写入对象

        PdfReader reader = null;

        PdfCopy copy = null;

        Document document = new Document();

        try {

            // 创建 PDF 阅读器对象和写入对象

            reader = new PdfReader(pdfFiles.get(0));

            copy = new PdfCopy(document, new FileOutputStream(outputPdf));

            // 打开文档准备写入内容

            document.open();

            // 将第一个 PDF 的所有页面复制到输出 PDF 中

            for (int i = 1; i <= reader.getNumberOfPages(); i++) {

                PdfImportedPage page = copy.getImportedPage(reader, i);

                copy.addPage(page);

            }

            // 将其它PDF的所有页，输出到 PDF 中

            for (int i = 1; i < pdfFiles.size(); i++) {

                reader = new PdfReader(pdfFiles.get(i));

                for (int j = 1; j <= reader.getNumberOfPages(); j++) {

                    PdfImportedPage page = copy.getImportedPage(reader, j);

                    copy.addPage(page);

                }

            }

        } catch (Exception e) {

            e.printStackTrace();

        } finally {

            if (document != null) {

                document.close();

            }

            if (copy != null) {

                copy.close();

            }

            if (reader != null) {

                reader.close();

            }

        }

    }

}



@Test

void pdf() throws Exception {

    String pdfFile = "D:\\Desktop\\20220117.pdf";

    String jpgFile = "D:\\Desktop\\20220117.jpg";

    PdfUtil.pdfToImage(pdfFile, jpgFile, 1, 1);

}

@Test

 void testMerge() throws IOException {

    List<String> pdfFiles = new ArrayList<>();

    pdfFiles.add("D:\\Projects\\20231225180735.pdf");

    pdfFiles.add("D:\\Projects\\20231225182535.pdf");

    pdfFiles.add("D:\\Projects\\20231225184135.pdf");

    PdfUtil.mergePDFFiles(pdfFiles, "D:\\Projects\\New.pdf");

}

JAVA PDF 截取N页，生成新文件，转图片，多个PDF 合并的更多相关文章

java 写 Excel（不生成实体文件，写为流的形式）
java 写 Excel(不生成实体文件,写为流的形式) public String exportReportExcel(String mediaCode, List<SimpleMediaRe ...
Java写Excel（不生成实体文件，写为流的形式）
java 写 Excel(不生成实体文件,写为流的形式) public String exportReportExcel(String mediaCode, List<SimpleMediaRe ...
JAVA - SpringBoot项目引用generator生成 Mybatis文件
JAVA - SpringBoot项目引用generator生成 Mybatis文件在spring官网https://start.spring.io/自动生成springboot项目,这里选择项目 ...
Java中使用DOM4J来生成xml文件和解析xml文件
一.前言现在有不少需求,是需要我们解析xml文件中的数据,然后导入到数据库中,当然解析xml文件也有好多种方法,小编觉得还是DOM4J用的最多最广泛也最好理解的吧.小编也是最近需求里遇到了,就来整理 ...
[.Net] - 使用 iTextSharp 生成基于模板的 PDF，生成新文件并保留表单域
背景基于 PDF Template 预填充表单项,生成一份新的 PDF 文件,并保留表单域允许继续修改. 代码段 using iTextSharp.text.pdf; /* Code Snippet ...
linux提取指定字符的行列并生成新文件（awk命令）
如图所示,命名为file文件的表头有BP.A1.TEST等假如想提取含有"ADD"的行和该行对应列的"BP"和"P"值,则需要用到以下命令 ...
转载：C#保存文件时重名自动生成新文件的方法
/// <summary> /// Generates a new path for duplicate filenames. /// </summary> /// <p ...
shell脚本选择LOG里面特定的行，生成新文件并rsync上传
rsync.sh #!/bin/bash tool_path=$(cd `dirname $`; pwd) eval `cat ${tool_path}/conf.properties` rsync_ ...
根据html生成Word文件,包含图片
根据html内容生成word,并自动下载下来.使用到了itext-1.4.6.jar import java.io.File; import java.io.FileInputStream; impo ...
java 在MySQL中存储文件，读取文件(包括图片，word文档，excel表格，ppt,zip文件等)
转自:https://blog.csdn.net/u014475796/article/details/49893261 在设计到数据库的开发中,难免要将图片或文档文件(如word)插入到数据库中的情 ...

随机推荐

怎样给边框添加阴影？CSS3属性box-shadow帮你搞定！
作者:WangMin 格言:努力做好自己喜欢的每一件事关于box-shadow属性,有的小伙伴可能用的时候直接复制已有的,并没有仔细了解过box-shadow属性的参数分别是什么含义,最后导致阴影的 ...
DP：摆动序列
问题描述如果一个序列满足下面的性质,我们就将它称为摆动序列: 1. 序列中的所有数都是不大于k的正整数: 2. 序列中至少有两个数. 3. 序列中的数两两不相等: 4. 如果第i – 1个数比第i ...
深入理解 BigBird 的块稀疏注意力
引言基于 transformer 的模型已被证明对很多 NLP 任务都非常有用.然而,$O(n^2)$ 的时间和内存复杂度 (其中 $n$ 是序列长度) 使得在长序列 (\(n > 5 ...
总结（3）--- 知识总结（内存管理、线程阻塞、GIL锁）
一.Python中是如何进行内存管理的? 垃圾回收:Python不像C++,Java等语言一样,他们可以不用事先声明变量类型而直接对变量进行赋值.对Python而言,对象的类型和内存都是在运行时确定的 ...
IIS安装与配置
一.环境介绍 Windows Server 2019 64位标准版二.IIS安装 2.1.打开服务器管理器,单击添加角色和功能在Windows Server 2019 服务器管理中,点击角色和功 ...
Centos8.4自定义离线安装Nginx
一.简介 Nginx是一个web服务器也可以用来做负载均衡及反向代理使用. 目前使用最多的就是负载均衡,这篇文章主要介绍了centos8 安装 nginx. Nginx是一种开源的高性能HTTP和反向 ...
Cocos-JS HTTP网络请求
网络结构网络结构是网络构建方式,目前流行的有客户端服务器结构(C/S结构)和点对点(P2P)结构网络. 客户端服务器结构(C/S结构) 这种结构又被称为Clicent/Server结构,它是一种主从 ...
开源不挣钱？这个项目上线半年月入超 30w
很兴奋的告诉大家,Sealos 自从 6 月份上线以来,仅半年时间注册用户已经突破 7万,月收入超过 30w,本文来向大家介绍我们是怎么做开源商业化的.每月平均增速超过 40%,而且这些收入大部分来自 ...
冲刺秋招之牛客刷Java记录第二天
第一题下列代码输入什么? public class Test { public static Test t1 = new Test(); { System.out.println("blo ...
ASR项目实战-产品分析
分析Google.讯飞.百度.阿里.QQ.搜狗等大厂的ASR服务,可以罗列出一款ASR服务所需要具备的能力. 产品分类 ASR云服务产品,从用户体验.时效性.音频时长,可以划分为如下几类: 实时短音频 ...

JAVA PDF 截取N页，生成新文件，转图片，多个PDF 合并

JAVA PDF 截取N页，生成新文件，转图片，多个PDF 合并的更多相关文章

随机推荐

热门专题