huffman 编码

huffman压缩是一种压缩算法，其中经典的部分就是根据字符出现的频率建立huffman树，然后根据huffman树的构建结果标示每个字符。huffman编码也称为前缀编码，就是每个字符的表示形式不是另一个字符表示的前缀。如果学过c语言版本的数据结构的话，那么会知道其上面的算法的时间复杂度是O(N^2)，也算是比较复杂的，那么首先贴上这个版本算法的代码：

#include<iostream>

#include<string>

using namespace std;

typedef struct huffman_node_s {

	int weight;

	int parent;

	int lchild;

	int rchild;

}huffman_node_t, *HuffmanTree;

typedef char** HuffmanCode;

void select(HuffmanTree ht, int n, int* s1, int* s2) {

	int i;

	int temp;

	*s1 = *s2 = 0;

	for (i = 1; i <= n; i++) {

		if (0 == ht[i].parent) {

			if (0 == *s1 && 0 == *s2) {

				*s1 = i;

				continue;

			} else if (0 == *s2) {

				*s2 = i;

				if (ht[*s1].weight > ht[*s2].weight) {

					temp = *s1, *s1 = *s2, *s2 = temp;

				}

				continue;

			}

			if (ht[i].weight < ht[*s1].weight && ht[i].weight < ht[*s2].weight) {

				*s2 = *s1;

				*s1 = i;

			} else if (ht[i].weight > ht[*s1].weight && ht[i].weight < ht[*s2].weight) {

				*s2 = i;

			}

		}

	}

}

void HuffmanEncode(HuffmanTree* ht, HuffmanCode* hc, int* weight, int n) {

	int i, start;

	int s1, s2;

	int c, f;

	int m = 2 * n - 1;

	*ht = (huffman_node_t*)malloc((m + 1) * sizeof(huffman_node_t));

	for (i = 1; i <= m; i++) {

		if (i <= n) {

			(*ht)[i].weight = weight[i - 1];

		} else {

			(*ht)[i].weight = 0;

		}

		(*ht)[i].parent = 0;

		(*ht)[i].lchild = 0;

		(*ht)[i].rchild = 0;

	}

	for (i = n + 1; i <= m; i++) {

		select(*ht, i - 1, &s1, &s2);

		(*ht)[i].lchild = s1;

		(*ht)[i].rchild = s2;

		(*ht)[i].weight = (*ht)[s1].weight + (*ht)[s2].weight;

		(*ht)[s1].parent = (*ht)[s2].parent = i;

	}

	*hc = (char**)malloc((n + 1) * sizeof(char*));

	char* temp = (char*)malloc(n * sizeof(char));

	for (i = 1; i <= n; i++) {

		temp[n - 1] = '\0';

		start = n - 1;

		for (c = i, f = (*ht)[i].parent; f != 0; c = f, f = (*ht)[f].parent) {

			if (c == (*ht)[f].lchild)

				temp[--start] = '0';

			else

				temp[--start] = '1';

		}

		(*hc)[i] = (char*)malloc(n - start);

		strcpy((*hc)[i], temp + start);

	}

}

int main(int argc, char* argv[]) {

	int weight[] = {5, 29, 7, 8, 14, 23, 3, 11};

	int length = sizeof(weight) / sizeof(int);

	HuffmanTree ht = NULL;

	HuffmanCode hc = NULL;

	HuffmanEncode(&ht, &hc, weight, length);

	int i;

	for (i = 1; i <= length; i++)

		cout << hc[i] << endl;

	for (i = 1; i <= length; i++)

		free(hc[i]);

	free(hc);

	cin.get();

	return 0;

}

还有另外一种算法，就是用爽队列的形式，可以把时间复杂度降到O（N*logN）,算法的核心思想是：

1，建立两个空的队列

2，为每一个字符建立一个节点，并按照字符出现的频率以非递减的方式放入第一个队列

3，每步要找出出现频率最小的两个字符，那么可以根据以下方法进行查找：

a，如果第二个队列为空，那么使第一个队列的头结点出列

b，如果第一个队列为空，那么使第二个队列的头结点出列

c，如果两个队列都不为空，那比较两个队列头结点字符出现的频率，使出现频率较小的头结点出列

4，创建一个新的临时节点，它的频率是第三步骤中出列两个节点所包含的字符的频率之和，然后将临时节点压入第二个队列，当第一个队列中不包含元素节点而第二个队列中只有一个元素节点的时候，停止算法，下面给出代码：

#include<iostream>

#include<string>

using namespace std;

typedef struct queue_node_s {

	char data;

	int frequent;

	struct queue_node_s* lchild;

	struct queue_node_s* rchild;

}queue_node_t;

typedef struct queue_s {

	int front, rear;

	int capcity;

	queue_node_t** arr;

}queue_t;

queue_node_t* createNode(char data, int frequent) {

	queue_node_t* node = (queue_node_t*)malloc(sizeof(queue_node_t));

	node->data = data;

	node->frequent = frequent;

	node->lchild = NULL;

	node->rchild = NULL;

	return node;

}

queue_t* createQueue(int size)  {

	queue_t* queue = (queue_t*)malloc(sizeof(queue_t));

	queue->capcity = size;

	queue->front = queue->rear = -1;

	queue->arr = (queue_node_t**)malloc(size * sizeof(queue_node_t));

	if (NULL == queue->arr) {

		free(queue);

		return NULL;

	}

	return queue;

}

bool isQueueEmpty(queue_t* queue) {

	if (-1 == queue->front && -1 == queue->rear)

		return true;

	return false;

}

bool isContainOne(queue_t* queue) {

	if (queue->rear == queue->front && queue->front != -1)

		return true;

	return false;

}

bool isQueueFull(queue_t* queue) {

	return queue->rear == queue->capcity - 1;

}

void enQueue(queue_t* queue, queue_node_t* item) {

	if (isQueueFull(queue))

		return;

	queue->arr[++queue->rear] = item;

	if (-1 == queue->front)

		queue->front++;

}

queue_node_t* deQueue(queue_t* queue) {

	if (isQueueEmpty(queue))

		return NULL;

	queue_node_t* temp = queue->arr[queue->front];

	if (queue->front == queue->rear)

		queue->front = queue->rear = -1;

	else

		queue->front++;

	return temp;

}

queue_node_t* getFront(queue_t* queue) {

	if (isQueueEmpty(queue))

		return NULL;

	return queue->arr[queue->front];

}

queue_node_t* findMin(queue_t* queueOne, queue_t* queueTwo) {

	if (isQueueEmpty(queueOne))

		return deQueue(queueTwo);

	if (isQueueEmpty(queueTwo))

		return deQueue(queueOne);

	if (getFront(queueOne)->frequent < getFront(queueTwo)->frequent)

		return deQueue(queueOne);

	return deQueue(queueTwo);

}

void printArr(char* arr, int n) {

	int i;

	for (i = 0; i < n; i++)

		printf("%c", arr[i]);

	cout << endl;

}

bool isLeaf(queue_node_t* node) {

	if (NULL == node->lchild && NULL == node->rchild)

		return true;

	return false;

}

queue_node_t* buildHuffmanTree(char* data, int* frequents, int size) {

	queue_node_t* lchild;

	queue_node_t* rchild;

	queue_node_t* top;

	queue_t* queueOne = createQueue(size);

	queue_t* queueTwo = createQueue(size);

	int i;

	for (i = 0; i < size; i++)

		enQueue(queueOne, createNode(data[i], frequents[i]));

	while (!(isQueueEmpty(queueOne) && isContainOne(queueTwo))) {

		lchild = findMin(queueOne, queueTwo);

		rchild = findMin(queueOne, queueTwo);

		top = createNode('$', lchild->frequent + rchild->frequent);

		top->lchild = lchild;

		top->rchild = rchild;

		enQueue(queueTwo, top);

	}

	return deQueue(queueTwo);

}

void printCodes(queue_node_t* node, char* arr, int top) {

	if (node->lchild) {

		arr[top] = '0';

		printCodes(node->lchild, arr, top + 1);

	}

	if (node->rchild) {

		arr[top] = '1';

		printCodes(node->rchild, arr, top + 1);

	}

	if (isLeaf(node)) {

		printf("%c:", node->data);

		printArr(arr, top);

	}

}

void HuffmanCodes(char* data, int* frequents, int size) {

	queue_node_t* root = buildHuffmanTree(data, frequents, size);

	char* arr = (char*)malloc(size * sizeof(char));

	int top = 0;

	printCodes(root, arr, top);

	free(arr);

}

int main(int argc, char* argv[]) {

	char data[] = {'a', 'b', 'c', 'd', 'e', 'f'};

    int freq[] = {5, 9, 12, 13, 16, 45};

    int size = sizeof(data) / sizeof(data[0]);

    HuffmanCodes(data, freq, size);

	cin.get();

    return 0;

}

huffman 编码的更多相关文章

[老文章搬家] 关于 Huffman 编码
按:去年接手一个项目,涉及到一个一个叫做Mxpeg的非主流视频编码格式,编解码器是厂商以源代码形式提供的,但是可能代码写的不算健壮,以至于我们tcp直连设备很正常,但是经过一个UDP数据分发服务器之后 ...
Huffman编码
#define _CRT_SECURE_NO_WARNINGS #include <iostream> #include <cstdio> #include <cstri ...
【数据压缩】Huffman编码
1. 压缩编码概述数据压缩在日常生活极为常见,平常所用到jpg.mp3均采用数据压缩(采用Huffman编码)以减少占用空间.编码\(C\)是指从字符空间\(A\)到码字表\(X\)的映射.数据压缩 ...
优先队列求解Huffman编码 c++
优先队列小析优先队列的模板: template <class T, class Container = vector<T>,class Compare = less< ...
Huffman编码实现电文的转码与译码
//first thing:thanks to my teacher---chenrong Dalian Maritime university /* 构造Huffman Tree思路: ( ...
基于二叉树和数组实现限制长度的最优Huffman编码
具体介绍详见上篇博客:基于二叉树和双向链表实现限制长度的最优Huffman编码基于数组和基于链表的实现方式在效率上有明显区别: 编码256个符号,符号权重为1...256,限制长度为16,循环编码1 ...
uvalive 2088 - Entropy(huffman编码）
题目连接:2088 - Entropy 题目大意:给出一个字符串, 包括A~Z和_, 现在要根据字符出现的频率为他们进行编码,要求编码后字节最小, 然后输出字符均为8字节表示时的总字节数, 以及最小的 ...
Jcompress: 一款基于huffman编码和最小堆的压缩、解压缩小程序
前言最近基于huffman编码和最小堆排序算法实现了一个压缩.解压缩的小程序.其源代码已经上传到github上面: Jcompress下载地址 .在本人的github上面有一个叫Utility的re ...
DS二叉树--Huffman编码与解码
题目描述 1.问题描述给定n个字符及其对应的权值,构造Huffman树,并进行huffman编码和译(解)码. 构造Huffman树时,要求左子树根的权值小于.等于右子树根的权值. 进行Huffma ...

随机推荐

HTTP学习笔记
最近在看HTTP权威指南, 然后准备从Python的request库入手,看它的源代码实现 http://cn.python-requests.org/zh_CN/latest/ 挖坑今年准备在gi ...
Unity 4.6 uGUI的点击事件
因为Unity 4.6刚刚发布,自带的uGUI功能的相关资料还不是很完善,今天刚装的Unity 4.6,想看一下uGUI是否好用,那么开始就今天的学习吧啊! 1,新建一个空的工程.
LINQ 基本子句之二 join
Join子句据说可以实现3中连接关系. 1.内部连接——元素的连接关系必须同时满足被连接的两个数据源 2.分组连接 3.左外连接 1.最基本的,内部连接,类似于sql中inner join. 由于st ...
Jquery时间段选择器
效果(有给小bug, 在时间的大小比较上.): HTML: <html> <head> <title>测试DatePicker</title> < ...
js中slice(),splice(),split(),substring(),substr()的使用方法和区别
1.slice(): Array和String对象都有在Array中 slice(i,[j]) i为开始截取的索引值,负数代表从末尾算起的索引值,-1为倒数第一个元素j为结束的索引值,缺省时则获取 ...
GPS数据处理 - 字符串函数的灵活应用
题目内容: NMEA- 0183协议是为了在不同的GPS(全球定位系统)导航设备中建立统一的BTCM(海事无线电技术委员会)标准,由美国国家海洋电子协会(NMEA- The National Mari ...
正确合理的建立MYSQL数据库索引
写在前面:索引对查询的速度有着至关重要的影响,理解索引也是进行数据库调优的起点.考虑如下情况,假设数据库中一个表有10^6条记录,DBMS的页面大小为4K,并存储100条记录.如果没有索引,查询将对整 ...
uboot相关命令及用法
进入uboot时,在命令行上敲“?” ,回车就会打印出在uboot里可用的命令: #?? - alias for 'help'base - print or set address ...
Python学习笔记(六)Python的列表生成式、生成器
列表生成式 List Comprehensions 列表生成式是Python内置的非常简单却强大的可以用来创建list的生成式. 简单的数值范围的list可以使用一下方式生成: >>> ...
禁止Chrome浏览器缓存的方法
web开发的人经常chrome和firefox作为开发调试工具,有些时候需要禁止chrome浏览器缓存,最近也用到禁止缓存,以下介绍几种禁止chrome浏览器缓存的方法作为记录. HTML: < ...

huffman 编码

huffman 编码的更多相关文章

随机推荐

热门专题