Libreoffice 各类文件转换的filtername
LIBREOFFICE_DOC_FAMILIES = [
"TextDocument",
"WebDocument",
"Spreadsheet",
"Presentation",
"Graphics"
] LIBREOFFICE_IMPORT_TYPES = {
"docx": {
"FilterName": "MS Word 2007 XML"
},
"pdf": {
"FilterName": "PDF - Portable Document Format"
},
"jpg": {
"FilterName": "JPEG - Joint Photographic Experts Group"
},
"html": {
"FilterName": "HTML Document"
},
"odp": {
"FilterName": "OpenDocument Presentation (Flat XML)"
},
"pptx": {
"FilterName": "Microsoft PowerPoint 2007 XML"
}
} LIBREOFFICE_EXPORT_TYPES = {
"pdf": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
},
"jpg": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}
},
"html": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"}
},
"docx": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"}
},
"odp": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
},
"pptx": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
}
}
转:
convert_test
#!/usr/bin/env python3
"""
VIEW COMPLETE CODE AT
=====================
* https://github.com/six519/libreoffice_convert
THANKS
======
* Thanks to: Mirko Nasato for his PyODConverter http://www.artofsolving.com/opensource/pyodconverter
TESTED USING
============
* Fedora release 20 (Heisenbug)
* Python 3.3.2
INSTALL DEPENDENCIES
====================
* yum install libreoffice-sdk
""" import uno
import subprocess
import time
import os from com.sun.star.beans import PropertyValue LIBREOFFICE_DEFAULT_PORT = 6519
LIBREOFFICE_DEFAULT_HOST = "localhost" LIBREOFFICE_DOC_FAMILIES = [
"TextDocument",
"WebDocument",
"Spreadsheet",
"Presentation",
"Graphics"
] LIBREOFFICE_IMPORT_TYPES = {
"docx": {
"FilterName": "MS Word 2007 XML"
},
"pdf": {
"FilterName": "PDF - Portable Document Format"
},
"jpg": {
"FilterName": "JPEG - Joint Photographic Experts Group"
},
"html": {
"FilterName": "HTML Document"
},
"odp": {
"FilterName": "OpenDocument Presentation (Flat XML)"
},
"pptx": {
"FilterName": "Microsoft PowerPoint 2007 XML"
}
} LIBREOFFICE_EXPORT_TYPES = {
"pdf": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
},
"jpg": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}
},
"html": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"}
},
"docx": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"}
},
"odp": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
},
"pptx": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
}
} class PythonLibreOffice(object): def __init__(self, host=LIBREOFFICE_DEFAULT_HOST, port=LIBREOFFICE_DEFAULT_PORT):
self.host = host
self.port = port
self.local_context = uno.getComponentContext()
self.resolver = self.local_context.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.local_context)
self.connectionString = "socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (LIBREOFFICE_DEFAULT_HOST, LIBREOFFICE_DEFAULT_PORT)
self.context = None
self.desktop = None
self.runUnoProcess()
self.__lastErrorMessage = "" try:
self.context = self.resolver.resolve("uno:%s" % self.connectionString)
self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context)
except Exception as e:
self.__lastErrorMessage = str(e) @property
def lastError(self): return self.__lastErrorMessage def terminateProcess(self): try:
if self.desktop:
self.desktop.terminate()
except Exception as e:
self.__lastErrorMessage = str(e)
return False return True def convertFile(self, outputFormat, inputFilename): if self.desktop: tOldFileName = os.path.splitext(inputFilename)
outputFilename = "%s.%s" % (tOldFileName[0], outputFormat)
inputFormat = tOldFileName[1].replace(".","")
inputUrl = uno.systemPathToFileUrl(os.path.abspath(inputFilename))
outputUrl = uno.systemPathToFileUrl(os.path.abspath(outputFilename)) if inputFormat in LIBREOFFICE_IMPORT_TYPES:
inputProperties = {
"Hidden": True
} inputProperties.update(LIBREOFFICE_IMPORT_TYPES[inputFormat]) doc = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self.propertyTuple(inputProperties)) try:
doc.refresh()
except:
pass docFamily = self.getDocumentFamily(doc)
if docFamily:
try:
outputProperties = LIBREOFFICE_EXPORT_TYPES[outputFormat][docFamily]
doc.storeToURL(outputUrl, self.propertyTuple(outputProperties))
doc.close(True) return True
except Exception as e:
self.__lastErrorMessage = str(e) self.terminateProcess() return False def propertyTuple(self, propDict):
properties = []
for k,v in propDict.items():
property = PropertyValue()
property.Name = k
property.Value = v
properties.append(property) return tuple(properties) def getDocumentFamily(self, doc):
try:
if doc.supportsService("com.sun.star.text.GenericTextDocument"):
return LIBREOFFICE_DOC_FAMILIES[0]
if doc.supportsService("com.sun.star.text.WebDocument"):
return LIBREOFFICE_DOC_FAMILIES[1]
if doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
return LIBREOFFICE_DOC_FAMILIES[2]
if doc.supportsService("com.sun.star.presentation.PresentationDocument"):
return LIBREOFFICE_DOC_FAMILIES[3]
if doc.supportsService("com.sun.star.drawing.DrawingDocument"):
return LIBREOFFICE_DOC_FAMILIES[4]
except:
pass return None def runUnoProcess(self):
subprocess.Popen('soffice --headless --norestore --accept="%s"' % self.connectionString, shell=True, stdin=None, stdout=None, stderr=None)
time.sleep(3) if __name__ == "__main__": test_libreoffice = PythonLibreOffice() #convert MS Word Document file (docx) to PDF
test_libreoffice.convertFile("pdf", "document.docx")
Libreoffice 各类文件转换的filtername的更多相关文章
- C# 将多个office文件转换及合并为一个PDF文件
PDF文件介绍 PDF(Portable Document Format )文件源于20世纪90年代初期,如今早已成为了一种最流行的的文件格式之一.因为PDF文件有很多优点: 支持跨平台和跨设备共享 ...
- mpp文件转换成jpg图片,可以用pdf文件做中转站
用project软件做了一个表,发现不能转换成图片,先把mpp文件转换成pdf文件,然后用PS打开pdf文件,存储为jpg格式就行了
- php将文件转换成二进制输出[转]
header( "Content-type: image/jpeg"); $PSize = filesize('1.jpg'); $picturedata = fread(fope ...
- ocx文件转换成C#程序引用的DLL
将ocx文件转换成C#程序引用的DLL文件的办法 将ocx文件转换成C#程序引用的DLL文件的办法,需要的朋友可以参考一下 1.打开VS2008或VS2010命令提示符(此例用VS2008) 将o ...
- nodejs将PDF文件转换成txt文本,并利用python处理转换后的文本文件
目前公司Web服务端的开发是用Nodejs,所以开发功能的话首先使用Nodejs,这也是为什么不直接用python转换的原因. 由于node对文本的处理(提取所需信息)的能力不强,类似于npm上的包: ...
- Python:将utf-8格式的文件转换成gbk格式的文件
需求:将utf-8格式的文件转换成gbk格式的文件 实现代码如下: def ReadFile(filePath,encoding="utf-8"): with codecs.ope ...
- 15个最好的PDF转word的在线转换器,将PDF文件转换成doc文件
PDF是一种文件格式,包含文本,图像,数据等,这是独立于操作系统的文件类型.它是一个开放的标准,压缩,另一方面DOC文件和矢量图形是由微软文字处理文件.该文件格式将纯文本格式转换为格式化文档.它支持几 ...
- Marvel – 将图像和源文件转换成互动,共享的原型
Marvel 是一款非常简单的工具,将图像和设计源文件转换成互动,共享的原型,无需任何编码.原型可以通过点击几下鼠标就创建出来,能工作在任何设备上的浏览器,包括移动设备,台式机.Marvel 的一个特 ...
- 文件转换神器Pandoc使用
最近记录笔记,改用Markdown格式.但有时需要分享下笔记,对于不懂markdown格式的同学来说阅读感觉不是那么友好.因此就一直在寻找一款文件转换的软件,之前因为用markdownpad来编写,可 ...
随机推荐
- Hadoop |集群的搭建
Hadoop组成 HDFS(Hadoop Distributed File System)架构概述 NameNode目录--主刀医生(nn): DataNode(dn)数据: Secondary N ...
- day66 模板小结 [母板继承,块,组件]
小结: day65 1. 老师编辑功能写完 1. magic2函数 --> 用两层for循环解决 全栈8期之殇 问题 2. 模板语言 in 语法 {% if xx in xx_list %} { ...
- HDU 4185 Oil Skimming 【最大匹配】
<题目链接> 题目大意: 给你一张图,图中有 '*' , '.' 两点,现在每次覆盖相邻的两个 '#' ,问最多能够覆盖几次. 解题分析: 无向图二分匹配的模板题,每个'#'点与周围四个方 ...
- RIPng 配置
一.拓扑图 二.配置过程 1)首先将各个端口的IP地址按照如上图所示配置完毕 2)在全局模式下创建rip ipv6 router rip [name] 3)在各个路由器上进入所连接的端口启用RIPng ...
- windows系统nexus3安装和配置
一.前言 为什么要在本地开发机器上安装nexus?首先声明公司内部是有自己的nexus仓库,但是对上传jar包做了限制,不能畅快的上传自己测试包依赖.于是就自己在本地搭建了一个nexus私服,即可以使 ...
- spring之基础知识总结
spring是轻量级的(非侵入式,不用继承spring中的父类等).Spring框架主要提供了IoC容器.AOP.数据访问.Web开发.消息.测试等相关技术.本文主要介绍Spring中的一些小知识点, ...
- sql - 递归update
declare v_rlt ):; l_sql ); -- variable that contains a query l_c sys_refcursor; -- cursor variable(w ...
- 安装win7出现安装程序无法创建新的系统分区
安装win7的时候出现“安装程序无法创建新的系统分区 也无法定位系统分区”! 我是直接把一个系统碟里面的安装文件全部拷出来.放到要安装系统的机器(D盘).用的是老毛桃的winpe已经安装好了.我的安装 ...
- z-index使用及一定要加backgroun
代码: <div> <span style="display:block;width:40px;height:20px;border:1px solid red;posit ...
- Codeforces Round #541 (Div. 2) (A~F)
目录 Codeforces 1131 A.Sea Battle B.Draw! C.Birthday D.Gourmet choice(拓扑排序) E.String Multiplication(思路 ...