python实现excel转换成pdf

1、安装

需要安装pywin32包，以实现对Office文件的操作，可以批量转换为pdf文件。支持 doc, docx, ppt, pptx, xls, xlsx 等格式。

pip install pywin32

2、office文件 (word, ppt, excel等) 转为pdf

#-*- coding:utf-8 -*-

import os

from win32com.client import Dispatch, constants, gencache, DispatchEx

class PDFConverter:

    def __init__(self, pathname, export='.'):

        self._handle_postfix = ['doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx']

        self._filename_list = list()

        self._export_folder = os.path.join(os.path.abspath('.'), 'pdfconver')

        if not os.path.exists(self._export_folder):

                os.mkdir(self._export_folder)

        self._enumerate_filename(pathname)

    def _enumerate_filename(self, pathname):

        '''

        读取所有文件名

        '''

        full_pathname = os.path.abspath(pathname)

        if os.path.isfile(full_pathname):

            if self._is_legal_postfix(full_pathname):

                self._filename_list.append(full_pathname)

            else:

                raise TypeError('文件 {} 后缀名不合法！仅支持如下文件类型：{}。'.format(pathname, '、'.join(self._handle_postfix)))

        elif os.path.isdir(full_pathname):

            for relpath, _, files in os.walk(full_pathname):

                for name in files:

                    filename = os.path.join(full_pathname, relpath, name)

                    if self._is_legal_postfix(filename):

                        self._filename_list.append(os.path.join(filename))

        else:

            raise TypeError('文件/文件夹 {} 不存在或不合法！'.format(pathname))

    def _is_legal_postfix(self, filename):

        return filename.split('.')[-1].lower() in self._handle_postfix and not os.path.basename(filename).startswith('~')

    def run_conver(self):

        '''

        进行批量处理，根据后缀名调用函数执行转换

        '''

        print('需要转换的文件数：', len(self._filename_list))

        for filename in self._filename_list:

            postfix = filename.split('.')[-1].lower()

            funcCall = getattr(self, postfix)

            print('原文件：', filename)

            funcCall(filename)

        print('转换完成！')

    def doc(self, filename):

        '''

        doc 和 docx 文件转换

        '''

        name = os.path.basename(filename).split('.')[0] + '.pdf'

        exportfile = os.path.join(self._export_folder, name)

        print('保存 PDF 文件：', exportfile)

        gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)

        w = Dispatch("Word.Application")

        doc = w.Documents.Open(filename)

        doc.ExportAsFixedFormat(exportfile, constants.wdExportFormatPDF,

                Item=constants.wdExportDocumentWithMarkup,

                CreateBookmarks=constants.wdExportCreateHeadingBookmarks)

        w.Quit(constants.wdDoNotSaveChanges)

    def docx(self, filename):

        self.doc(filename)

    def xls(self, filename):

        '''

        xls 和 xlsx 文件转换

        '''

        name = os.path.basename(filename).split('.')[0] + '.pdf'

        exportfile = os.path.join(self._export_folder, name)

        xlApp = DispatchEx("Excel.Application")

        xlApp.Visible = False

        xlApp.DisplayAlerts = 0

        books = xlApp.Workbooks.Open(filename,False)

        books.ExportAsFixedFormat(0, exportfile)

        books.Close(False)

        print('保存 PDF 文件：', exportfile)

        xlApp.Quit()

    def xlsx(self, filename):

        self.xls(filename)

    def ppt(self, filename):

        '''

        ppt 和 pptx 文件转换

        '''

        name = os.path.basename(filename).split('.')[0] + '.pdf'

        exportfile = os.path.join(self._export_folder, name)

        gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)

        p = Dispatch("PowerPoint.Application")

        ppt = p.Presentations.Open(filename, False, False, False)

        ppt.ExportAsFixedFormat(exportfile, 2, PrintRange=None)

        print('保存 PDF 文件：', exportfile)

        p.Quit()

    def pptx(self, filename):

        self.ppt(filename)

if __name__ == "__main__":

    # 支持文件夹批量导入

    folder = 'tmp'

    pathname = os.path.join(os.path.abspath('.'), folder)

    # 也支持单个文件的转换

    # pathname = 'test.doc'

    pdfConverter = PDFConverter(pathname)

    pdfConverter.run_conver()

转至https://blog.csdn.net/XnCSD/article/details/85208303

3、excel的不同sheet存为pdf

#-*- coding:utf-8 -*-

import os

from win32com.client import Dispatch, constants, gencache, DispatchEx

import xlrd

class PDFConverter:

    def __init__(self, pathname,sheetnum, export='.'):

        self.sheetnum = sheetnum

        self._handle_postfix = ['doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx']

        self._filename_list = list()

        self._export_folder = os.path.join(os.path.abspath('.'), 'pdfconver')

        if not os.path.exists(self._export_folder):

            os.mkdir(self._export_folder)

        self._enumerate_filename(pathname)

    def _enumerate_filename(self, pathname):

        '''

        读取所有文件名

        '''

        full_pathname = os.path.abspath(pathname)

        if os.path.isfile(full_pathname):

            if self._is_legal_postfix(full_pathname):

                self._filename_list.append(full_pathname)

            else:

                raise TypeError('文件 {} 后缀名不合法！仅支持如下文件类型：{}。'.format(pathname, '、'.join(self._handle_postfix)))

        elif os.path.isdir(full_pathname):

            for relpath, _, files in os.walk(full_pathname):

                for name in files:

                    filename = os.path.join(full_pathname, relpath, name)

                    if self._is_legal_postfix(filename):

                        self._filename_list.append(os.path.join(filename))

        else:

            raise TypeError('文件/文件夹 {} 不存在或不合法！'.format(pathname))

    def _is_legal_postfix(self, filename):

        return filename.split('.')[-1].lower() in self._handle_postfix and not os.path.basename(filename).startswith(

            '~')

    def run_conver(self):

        '''

        进行批量处理，根据后缀名调用函数执行转换

        '''

        print('需要转换的文件数：', len(self._filename_list))

        for filename in self._filename_list:

            postfix = filename.split('.')[-1].lower()

            funcCall = getattr(self, postfix)

            print('原文件：', filename)

            funcCall(filename)

        print('转换完成！')

    def xls(self, filename):

        '''

        xls 和 xlsx 文件转换

        '''

        xlApp = DispatchEx("Excel.Application")

        xlApp.Visible = False

        xlApp.DisplayAlerts = 0

        books = xlApp.Workbooks.Open(filename, False)

        # 循环保存每一个sheet

        for i in range(1, self.sheetnum+1):

            sheetName = books.Sheets(i).Name

            xlSheet = books.Worksheets(sheetName)

            name = sheetName + '.pdf'

            exportfile = os.path.join(self._export_folder, name)

            xlSheet.ExportAsFixedFormat(0, exportfile)

            print('保存 PDF 文件：', exportfile)

        books.Close(False)

        xlApp.Quit()

    def xlsx(self, filename):

        self.xls(filename)

if __name__ == "__main__":

    # 支持单个文件的转换

    pathname = u'原始数据.xlsx'

    # 获取到文件的sheet数

    b = xlrd.open_workbook(pathname)

    sheetnum = len(b.sheets())

    pdfConverter = PDFConverter(pathname, sheetnum)

    pdfConverter.run_conver()

python实现excel转换成pdf的更多相关文章

多页Excel转换成PDF时如何保存为单独文件
通过ABBYY PDF Transformer+图文识别软件,使用PDF-XChange打印机将多页Excel工作簿转换成PDF文档(相关文章请参考ABBYY PDF Transformer+从MS ...
excel 转换成pdf 总结
excl 转换成pdf 1.freespire 只能转换前三页 // 使用此组件只能转换前3页 //需要引用如下命名空间 //using Spire.Doc; //Document doc = ...
Excel转换成PDF
public class Office2Pdf { public bool DOCConvertToPDF(string sourcePath, string targetPath) { //Stre ...
python 将excel转换成字典，并且将字典写到txt文件里
# -*- coding: utf-8 -*- #python2.7 import sys reload(sys) sys.setdefaultencoding('utf-8') from pyexc ...
word ppt excel文档转换成pdf
1.把word文档转换成pdf (1).添加引用 using Microsoft.Office.Interop.Word; 添加引用 (2).转换方法 /// <summary> /// ...
Python 爬虫：把廖雪峰教程转换成 PDF 电子书
写爬虫似乎没有比用 Python 更合适了,Python 社区提供的爬虫工具多得让你眼花缭乱,各种拿来就可以直接用的 library 分分钟就可以写出一个爬虫出来,今天尝试写一个爬虫,将廖雪峰老师的 ...
C#.net word excel powerpoint (ppt) 转换成 pdf 文件
using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using Sy ...
C#实现文档转换成PDF
网上有很多将doc.ppt.xls等类型的文档转换成pdf的方法,目前了解到的有两大类: 1.使用虚拟打印机将doc.ppt.xls等类型的文档 2.使用OFFICE COM组件我采用了第二种方法实 ...
c# office转换成pdf
下载地址 [url]http://www.microsoft.com/downloads/details.aspx?FamilyId=4D951911-3E7E-4AE6-B059-A2E79ED87 ...

随机推荐

## ucore Lab0 一些杂记
ucore Lab0 一些杂记前一阵子开始做 MIT 6.828,做了两三个实验才发现清华的 ucore 貌似更友好一些,再加上前几个实验也与6.828 有所重叠,于是决定迁移阵地. 文章计划分两类 ...
使nginx归于systemd管理
[root@centos7 ~]# vim /usr/lib/systemd/system/nginx.service [Unit] Description=The Nginx HTTP Server ...
Linux下载：wget、yum与apt-get用法及区别
一般来说著名的linux系统基本上分两大类: RedHat系列:Redhat.Centos.Fedora等 Debian系列:Debian.Ubuntu等 RedHat 系列常见的安装包格式 rpm ...
什么是UAT
基本概念 UAT,英文User Acceptance Test的简写,也就是用户验收测试,或用户可接受测试,系统开发生命周期方法论的一个阶段,这时相关的用户或独立测试人员根据测试计划和结果对系统进行测 ...
bzoj4819 [Sdoi2017]新生舞会分数规划+最大费用最大流
题目传送门 https://lydsy.com/JudgeOnline/problem.php?id=4819 题解首先上面说, \[ C = \frac{\sum\limits_{i=1}^n a ...
elementui多个文件上传问题
我认为绑定一个值然后把值改变不同的名字即可
Java面试之集合框架篇（3）
21.ArrayList和Vector的区别这两个类都实现了List接口(List接口继承了Collection接口),他们都是有序集合,即存储在这两个集合中的元素的位置都是有顺序的,相当于一种动态 ...
面试题常考&必考之--js中的call()和apply()
apply: 接受两个参数,第一个参数是要绑定给this的值,第二个参数是一个参数数组.当第一个参数为null.undefined的时候,默认指向window. call: 第一个参数是要绑定给thi ...
Javascript获取页面元素相对和绝对位置
制作网页的过程中,你有时候需要知道某个元素在网页上的确切位置. 下面的教程总结了Javascript在网页定位方面的相关知识. 一.网页的大小和浏览器窗口的大小首先,要明确两个基本概念. 一张网页的 ...
接口返回[object,Object]解决方法
1.我们请求接口时有时会返回[object,Object],[object,Object],[object,Object].... 这个我们使用JSON.stringfity(data),就可以解决.

python实现excel转换成pdf

1、安装

2、office文件 (word, ppt, excel等) 转为pdf

3、excel的不同sheet存为pdf

python实现excel转换成pdf的更多相关文章

随机推荐

热门专题