LZ77.py
import math
from bitarray import bitarray class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400 def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm. The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match) If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big') # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise while i < len(data):
#print i match = self.findLongestMatch(data, i) if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength)) if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength), i += bestMatchLength else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i]) if verbose:
print "<0, %s>" % data[i], i += 1 # fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill() # write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise # an output file path was not provided, return the compressed data
return output_buffer def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = [] # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise while len(data) >= 9: flag = data.pop(0) if not flag:
byte = data[0:8].tobytes() output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes()) del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf) for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer) if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1) best_match_distance = -1
best_match_length = -1 # Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer): start_index = max(0, current_position - self.window_size)
substring = data[current_position:j] for i in range(start_index, current_position): repetitions = len(substring) / (current_position - i) last = len(substring) % (current_position - i) matched_string = data[i:current_position] * repetitions + data[i:i+last] if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring) if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None
LZ77.py的更多相关文章
- python调用py中rar的路径问题。
1.python调用py,在py中的os.getcwd()获取的不是py的路径,可以通过os.path.split(os.path.realpath(__file__))[0]来获取py的路径. 2. ...
- Python导入其他文件中的.py文件 即模块
import sys sys.path.append("路径") import .py文件
- LZ77压缩算法编码原理详解(结合图片和简单代码)
前言 LZ77算法是无损压缩算法,由以色列人Abraham Lempel发表于1977年.LZ77是典型的基于字典的压缩算法,现在很多压缩技术都是基于LZ77.鉴于其在数据压缩领域的地位,本文将结合图 ...
- import renumber.py in pymol
cp renumber.py /usr/local/lib/python2.7/dist-packages/pymol import renumber or run /path/to/renumber ...
- python gettitle.py
#!/usr/bin/env python # coding=utf-8 import threading import requests import Queue import sys import ...
- 解决 odoo.py: error: option --addons-path: The addons-path 'local-addons/' does not seem to a be a valid Addons Directory!
情况说明 odoo源文件路径-/odoo-dev/odoo/: 我的模块插件路径 ~/odoo-dev/local-addons/my-module 在my-module中创建了__init__.py ...
- caffe机器学习自带图片分类器classify.py实现输出预测结果的概率及caffe的web_demo例子运行实例
caffe机器学习环境搭建及python接口编译参见我的上一篇博客:机器学习caffe环境搭建--redhat7.1和caffe的python接口编译 1.运行caffe图片分类器python接口 还 ...
- 【转】Windows下使用libsvm中的grid.py和easy.py进行参数调优
libsvm中有进行参数调优的工具grid.py和easy.py可以使用,这些工具可以帮助我们选择更好的参数,减少自己参数选优带来的烦扰. 所需工具:libsvm.gnuplot 本机环境:Windo ...
- MySqlNDB使用自带的ndb_setup.py安装集群
在用Mysql做集群时,使用Mysql的NDB版本更易于集群的扩展,稳定和数据的实时性. 我们可以使用Mysql自带的工具进行集群安装与管理:ndb_setup.py.位于Mysql的安装目录bin下 ...
随机推荐
- 前端学HTTP之WEB服务器
前面的话 Web服务器每天会分发出数以亿计的Web页面,它是万维网的骨干.本文主要介绍WEB服务器的相关内容 总括 Web服务器会对HTTP请求进行处理并提供响应.术语“Web服务器”可以用来表示We ...
- CE修改器修改DNF 测试视频 阿修罗提升智力增加攻击力
使用CE修改器来修改网络游戏,如DNF 测试视频: CE修改器:指的是Cheat Engine,字面上的意思指的是作弊引擎的意思,是一款内存修改编辑工具.通过修改游戏的内存数据来得到一些原本无法实现的 ...
- 自己动手之使用反射和泛型,动态读取XML创建类实例并赋值
前言: 最近小匹夫参与的游戏项目到了需要读取数据的阶段了,那么觉得自己业余时间也该实践下数据相关的内容.那么从哪入手呢?因为用的是Unity3d的游戏引擎,思来想去就选择了C#读取XML文件这个小功能 ...
- SparkStreaming实现Exactly-Once语义
作者:Syn良子 出处:http://www.cnblogs.com/cssdongl 转载请注明出处 译自:http://blog.cloudera.com/blog/2015/03/exactly ...
- .NET正则表达式基础入门
这是我第一次写的博客,个人觉得十分不容易.以前看别人写的博客文字十分流畅,到自己来写却发现十分困难,还是感谢那些为技术而奉献自己力量的人吧. 本教程编写之前,博主阅读了<正则指引>这本入门 ...
- "检索COM类工厂中 CLSID为 {00024500-0000-0000-C000-000000000046}的组件时失败,原因是出现以下错误: 80070005" 问题的解决
一.故障环境 Windows 2008 .net 3.0 二.故障描述 调用excel组件生成excel文档时页面报错.报错内容一大串,核心是"检索COM类工厂中 CLSID为 {000 ...
- 记录一次bug解决过程:resultType和手动开启事务
一.总结 二.BUG描述:MyBatis中resultType使用 MyBatis中的resultType类似于入参:parameterType.先看IDCM项目中的实际使用案例代码,如下: // L ...
- JSONP实现
使用jsonp实现跨域获取数据. js部分(旧): (function(window, document) { 'use strict'; var jsonp = function(url, data ...
- Fresco从配置到使用(最高效的图片加载框架)
Frescoj说明: facebook开源的针对android应用的图片加载框架,高效和功能齐全. 支持加载网络,本地存储和资源图片: 提供三级缓存(二级memory和一级internal ...
- iOS网络1——NSURLConnection使用详解
原文在此 一.整体介绍 NSURLConnection是苹果提供的原生网络访问类,但是苹果很快会将其废弃,且由NSURLSession(iOS7以后)来替代.目前使用最广泛的第三方网络框架AFNetw ...