LZ77.py
import math
from bitarray import bitarray class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400 def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm. The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match) If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big') # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise while i < len(data):
#print i match = self.findLongestMatch(data, i) if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength)) if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength), i += bestMatchLength else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i]) if verbose:
print "<0, %s>" % data[i], i += 1 # fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill() # write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise # an output file path was not provided, return the compressed data
return output_buffer def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = [] # read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise while len(data) >= 9: flag = data.pop(0) if not flag:
byte = data[0:8].tobytes() output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes()) del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf) for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer) if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1) best_match_distance = -1
best_match_length = -1 # Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer): start_index = max(0, current_position - self.window_size)
substring = data[current_position:j] for i in range(start_index, current_position): repetitions = len(substring) / (current_position - i) last = len(substring) % (current_position - i) matched_string = data[i:current_position] * repetitions + data[i:i+last] if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring) if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None
LZ77.py的更多相关文章
- python调用py中rar的路径问题。
1.python调用py,在py中的os.getcwd()获取的不是py的路径,可以通过os.path.split(os.path.realpath(__file__))[0]来获取py的路径. 2. ...
- Python导入其他文件中的.py文件 即模块
import sys sys.path.append("路径") import .py文件
- LZ77压缩算法编码原理详解(结合图片和简单代码)
前言 LZ77算法是无损压缩算法,由以色列人Abraham Lempel发表于1977年.LZ77是典型的基于字典的压缩算法,现在很多压缩技术都是基于LZ77.鉴于其在数据压缩领域的地位,本文将结合图 ...
- import renumber.py in pymol
cp renumber.py /usr/local/lib/python2.7/dist-packages/pymol import renumber or run /path/to/renumber ...
- python gettitle.py
#!/usr/bin/env python # coding=utf-8 import threading import requests import Queue import sys import ...
- 解决 odoo.py: error: option --addons-path: The addons-path 'local-addons/' does not seem to a be a valid Addons Directory!
情况说明 odoo源文件路径-/odoo-dev/odoo/: 我的模块插件路径 ~/odoo-dev/local-addons/my-module 在my-module中创建了__init__.py ...
- caffe机器学习自带图片分类器classify.py实现输出预测结果的概率及caffe的web_demo例子运行实例
caffe机器学习环境搭建及python接口编译参见我的上一篇博客:机器学习caffe环境搭建--redhat7.1和caffe的python接口编译 1.运行caffe图片分类器python接口 还 ...
- 【转】Windows下使用libsvm中的grid.py和easy.py进行参数调优
libsvm中有进行参数调优的工具grid.py和easy.py可以使用,这些工具可以帮助我们选择更好的参数,减少自己参数选优带来的烦扰. 所需工具:libsvm.gnuplot 本机环境:Windo ...
- MySqlNDB使用自带的ndb_setup.py安装集群
在用Mysql做集群时,使用Mysql的NDB版本更易于集群的扩展,稳定和数据的实时性. 我们可以使用Mysql自带的工具进行集群安装与管理:ndb_setup.py.位于Mysql的安装目录bin下 ...
随机推荐
- Android数据存储之SQLCipher数据库加密
前言: 最近研究了Android Sqlite数据库(文章地址:Android数据存储之Sqlite的介绍及使用)以及ContentProvider程序间数据共享(Android探索之ContentP ...
- “前.NET Core时代”如何实现跨平台代码重用 ——程序集重用
除了在源代码层面实现共享("前.NET Core时代"如何实现跨平台代码重用 --源文件重用)之外,我们还可以跨平台共享同一个程序集,这种独立于具体平台的"中性" ...
- 常用原生JS方法总结(兼容性写法)
经常会用到原生JS来写前端...但是原生JS的一些方法在适应各个浏览器的时候写法有的也不怎么一样的... 今天下班有点累... 就来总结一下简单的东西吧…… 备注:一下的方法都是包裹在一个EventU ...
- Linux同平台数据库整体物理迁移
Linux同平台数据库整体物理迁移 需求:A机器不再使用,要将A机器的Oracle迁移到B机器. 之前写过类似需求的文章:http://www.cnblogs.com/jyzhao/p/3968504 ...
- OWIN与Katana详解
前言 我胡汉三又回来了,!!!!, 最近忙成狗,实在没空写博文,实在对不起自己,博客园上逛了逛发现 我大微软还是很给力的 asp.net core 1.0 .net core 1.0 即将发布,虽然. ...
- Django models对象的select_related方法(减少查询次数)
表结构 先创建一个新的app python manage.py startapp test01 在settings.py注册一下app INSTALLED_APPS = ( 'django.contr ...
- .Net语言 APP开发平台——Smobiler学习日志:如何快速实现类似于微信的悬浮显示二维码效果
最前面的话:Smobiler是一个在VS环境中使用.Net语言来开发APP的开发平台,也许比Xamarin更方便 样式一 一.目标样式 我们要实现上图中的效果,需要如下的操作: 1.从工具栏上的&qu ...
- 设计模式02迭代器(java)
先贴代码,有空来写内容. 1.定义集合 import java.util.List; import java.util.ArrayList; //coollection是我自己定义的一个集合,因为要写 ...
- 9.2.4 .net core 通过ViewComponent封装控件
我们在.net core中还使用了ViewComponent方式生成控件.ViewComponent也是asp.net core的新特性,是对页面部分的渲染,以前PartialView的功能,可以使用 ...
- SSH框架整合
SSH框架整合 一.原理图 action:(struts2) 1.获取表单的数据 2.表单的验证,例如非空验证,email验证等 3.调用service,并把数据传递给service Service: ...