import math
from bitarray import bitarray class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400 def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm. The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match) If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big') # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise while i < len(data):
#print i match = self.findLongestMatch(data, i) if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength)) if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength), i += bestMatchLength else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i]) if verbose:
print "<0, %s>" % data[i], i += 1 # fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill() # write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise # an output file path was not provided, return the compressed data
return output_buffer def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = [] # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise while len(data) >= 9: flag = data.pop(0) if not flag:
byte = data[0:8].tobytes() output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes()) del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf) for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer) if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1) best_match_distance = -1
best_match_length = -1 # Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer): start_index = max(0, current_position - self.window_size)
substring = data[current_position:j] for i in range(start_index, current_position): repetitions = len(substring) / (current_position - i) last = len(substring) % (current_position - i) matched_string = data[i:current_position] * repetitions + data[i:i+last] if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring) if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None

  

LZ77.py的更多相关文章

  1. python调用py中rar的路径问题。

    1.python调用py,在py中的os.getcwd()获取的不是py的路径,可以通过os.path.split(os.path.realpath(__file__))[0]来获取py的路径. 2. ...

  2. Python导入其他文件中的.py文件 即模块

    import sys sys.path.append("路径") import .py文件

  3. LZ77压缩算法编码原理详解(结合图片和简单代码)

    前言 LZ77算法是无损压缩算法,由以色列人Abraham Lempel发表于1977年.LZ77是典型的基于字典的压缩算法,现在很多压缩技术都是基于LZ77.鉴于其在数据压缩领域的地位,本文将结合图 ...

  4. import renumber.py in pymol

    cp renumber.py /usr/local/lib/python2.7/dist-packages/pymol import renumber or run /path/to/renumber ...

  5. python gettitle.py

    #!/usr/bin/env python # coding=utf-8 import threading import requests import Queue import sys import ...

  6. 解决 odoo.py: error: option --addons-path: The addons-path 'local-addons/' does not seem to a be a valid Addons Directory!

    情况说明 odoo源文件路径-/odoo-dev/odoo/: 我的模块插件路径 ~/odoo-dev/local-addons/my-module 在my-module中创建了__init__.py ...

  7. caffe机器学习自带图片分类器classify.py实现输出预测结果的概率及caffe的web_demo例子运行实例

    caffe机器学习环境搭建及python接口编译参见我的上一篇博客:机器学习caffe环境搭建--redhat7.1和caffe的python接口编译 1.运行caffe图片分类器python接口 还 ...

  8. 【转】Windows下使用libsvm中的grid.py和easy.py进行参数调优

    libsvm中有进行参数调优的工具grid.py和easy.py可以使用,这些工具可以帮助我们选择更好的参数,减少自己参数选优带来的烦扰. 所需工具:libsvm.gnuplot 本机环境:Windo ...

  9. MySqlNDB使用自带的ndb_setup.py安装集群

    在用Mysql做集群时,使用Mysql的NDB版本更易于集群的扩展,稳定和数据的实时性. 我们可以使用Mysql自带的工具进行集群安装与管理:ndb_setup.py.位于Mysql的安装目录bin下 ...

随机推荐

  1. .NET Core采用的全新配置系统[8]: 如何实现配置与源文件的同步

    配置的同步涉及到两个方面:第一,对原始的配置文件实施监控并在其发生变化之后从新加载配置:第二,配置重新加载之后及时通知应用程序进而使后者能够使用最新的配置.接下来我们利用一个简单的.NET Core控 ...

  2. SQL 性能调优中可参考的几类Lock Wait

    在我们的系统出现性能问题时,往往避不开调查各种类型 Lock Wait,如Row Lock Wait.Page Lock Wait.Page IO Latch Wait等.从中找出可能的异常等待,为性 ...

  3. 使用MATLAB对图像处理的几种方法(上)

    实验一图像的滤波处理 一.实验目的 使用MATLAB处理图像,掌握均值滤波器和加权均值滤波器的使用,对比两种滤波器对图像处理结果及系统自带函数和自定义函数性能的比较,体会不同大小的掩模对图像细节的影响 ...

  4. Java多种方式动态生成doc文档

    转载请注明出处:http://www.cnblogs.com/Joanna-Yan/p/5280272.html 本来是要在Android端生成doc的(这需求...),最后方法没有好的方法能够在An ...

  5. C# 复制指定节点的所有子孙节点到新建的节点下

    XML结构: 新建一个mask_list节点,一个procedure节点,将上面的mask_list和procedure节点的所有子孙节点添加到新建的mask_list和procedure节点 Xml ...

  6. ASP.NET的视图(Razor)循环产生html代码

    需要要视图中Razor语法,循环产生一些html代码. 产生后的html是这样的: <li data-transition="> <img src="~/Cont ...

  7. [函數] Firemonkey Android 取得系统参数设定的字型大小

    Android 系统参数设定内,可以设定字型大小: 可以透过下面代码来取得字型大小比例: function FontScale: Single; var Resources: JResources; ...

  8. Java集合-Python数据结构比较

    Java list与Python list相比较 Java List:有序的,可重复的.(有序指的是集合中对象的顺序与添加顺序相同) Python list(列表)是有序的,可变的. Java Lis ...

  9. C标准头文件<math.h>

    定义域错误可以理解为超出了函数的适用范围,如果发生了定义域错误,设errno为EDOM 如果结果不能表示为double值,则发生值域错误,如果结果上溢,则函数返回HUGE_VAL的值,设errno为E ...

  10. View and Data API Tips : Conversion between DbId and node

    By Daniel Du In View and Data client side API, The assets in the Autodesk Viewer have an object tree ...