import math
from bitarray import bitarray class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400 def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm. The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match) If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big') # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise while i < len(data):
#print i match = self.findLongestMatch(data, i) if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength)) if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength), i += bestMatchLength else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i]) if verbose:
print "<0, %s>" % data[i], i += 1 # fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill() # write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise # an output file path was not provided, return the compressed data
return output_buffer def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = [] # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise while len(data) >= 9: flag = data.pop(0) if not flag:
byte = data[0:8].tobytes() output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes()) del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf) for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer) if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1) best_match_distance = -1
best_match_length = -1 # Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer): start_index = max(0, current_position - self.window_size)
substring = data[current_position:j] for i in range(start_index, current_position): repetitions = len(substring) / (current_position - i) last = len(substring) % (current_position - i) matched_string = data[i:current_position] * repetitions + data[i:i+last] if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring) if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None

  

LZ77.py的更多相关文章

  1. python调用py中rar的路径问题。

    1.python调用py,在py中的os.getcwd()获取的不是py的路径,可以通过os.path.split(os.path.realpath(__file__))[0]来获取py的路径. 2. ...

  2. Python导入其他文件中的.py文件 即模块

    import sys sys.path.append("路径") import .py文件

  3. LZ77压缩算法编码原理详解(结合图片和简单代码)

    前言 LZ77算法是无损压缩算法,由以色列人Abraham Lempel发表于1977年.LZ77是典型的基于字典的压缩算法,现在很多压缩技术都是基于LZ77.鉴于其在数据压缩领域的地位,本文将结合图 ...

  4. import renumber.py in pymol

    cp renumber.py /usr/local/lib/python2.7/dist-packages/pymol import renumber or run /path/to/renumber ...

  5. python gettitle.py

    #!/usr/bin/env python # coding=utf-8 import threading import requests import Queue import sys import ...

  6. 解决 odoo.py: error: option --addons-path: The addons-path 'local-addons/' does not seem to a be a valid Addons Directory!

    情况说明 odoo源文件路径-/odoo-dev/odoo/: 我的模块插件路径 ~/odoo-dev/local-addons/my-module 在my-module中创建了__init__.py ...

  7. caffe机器学习自带图片分类器classify.py实现输出预测结果的概率及caffe的web_demo例子运行实例

    caffe机器学习环境搭建及python接口编译参见我的上一篇博客:机器学习caffe环境搭建--redhat7.1和caffe的python接口编译 1.运行caffe图片分类器python接口 还 ...

  8. 【转】Windows下使用libsvm中的grid.py和easy.py进行参数调优

    libsvm中有进行参数调优的工具grid.py和easy.py可以使用,这些工具可以帮助我们选择更好的参数,减少自己参数选优带来的烦扰. 所需工具:libsvm.gnuplot 本机环境:Windo ...

  9. MySqlNDB使用自带的ndb_setup.py安装集群

    在用Mysql做集群时,使用Mysql的NDB版本更易于集群的扩展,稳定和数据的实时性. 我们可以使用Mysql自带的工具进行集群安装与管理:ndb_setup.py.位于Mysql的安装目录bin下 ...

随机推荐

  1. ASP.NET MVC5+EF6+EasyUI 后台管理系统(17)-LinQ动态排序

    系列目录 首先修复程序中的一个BUG这个BUG在GridPager类中,把sord修改为sort这个名称填写错误,会导致后台一直无法获取datagrid的排序字段 本来是没有这一讲的,为了使20行的代 ...

  2. C#的扩展方法解析

    在使用面向对象的语言进行项目开发的过程中,较多的会使用到“继承”的特性,但是并非所有的场景都适合使用“继承”特性,在设计模式的一些基本原则中也有较多的提到. 继承的有关特性的使用所带来的问题:对象的继 ...

  3. 13.JAVA之GUI编程将程序打包jar

    jar基本命令: 目标:将下列MyMenuDemo.java代码打包成jar. 方法如下: 1.把java代码放到d:\myclass目录下. 2.按下快捷键ctrl+r,打开运行窗口,输入cmd后回 ...

  4. Signalr系列之虚拟目录详解与应用中的CDN加速实战

    目录 对SignalR不了解的人可以直接移步下面的目录 SignalR系列目录 前言 前段时间一直有人问我 在用SignalR 2.0开发客服系统[系列1:实现群发通讯]这篇文章中的"/Si ...

  5. PID控制

    PID解释: 位置式:      可以看出,比例部分只与当前的偏差有关,而积分部分则是系统过去所有偏差的累积.位置式PI调节器的结构清晰,P和I两部分作用分明,参数调整简单明了.但直观上看,要计算第拍 ...

  6. 我看不下去鸟。。。。Java和C#的socket通信真的简单吗?

    这几天在博客园上看到好几个写Java和C#的socket通信的帖子.但是都为指出其中关键点. C# socket通信组件有很多,在vs 使用nuget搜索socket组件有很多类似的.本人使用的是自己 ...

  7. Servlet的生命周期及工作原理

    Servlet生命周期分为三个阶段: 1,初始化阶段  调用init()方法 2,响应客户请求阶段 调用service()方法 3,终止阶段 调用destroy()方法 Servlet初始化阶段: 在 ...

  8. php实现设计模式之代理模式

    <?php /* * 代理模式 * 为其他对象提供一种代理以控制对这个对象的访问. * 在某些情况下,一个对象不适合或者不能直接引用另一个对象,而代理对象可以在客户端和目标对象之间起到中介的作用 ...

  9. 百度地图隐藏BMKAnnotationView

    BMKAnnotationview.hidden 失效,只能移除Annotation BMKPinAnnotationView设置setHidden为yes时不能隐藏

  10. Android Support 包知识

    Android Support Library包是一组代码库, 它提供了向后版本的framework API的兼容, 这些代码库实现的效果和只能在指定版本中使用的API一样好. 每个Support L ...