该代码採用二叉树结合双向链表实现了限制长度的最优Huffman编码,本文代码中的权重所有採用整数值表示。http://pan.baidu.com/s/1mgHn8lq

算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf

演示样例:符号ABCDE的权重分别为10,6,2,1,1

   不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;

   限制长度3的最优Huffman编码为  A:0,B:100,C:101,D:110,E:111,  平均码长为2.0bits/symbol;

限制长度最优Huffman编码实现代码例如以下:

//Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs
//author:by Pan Yumin.2014-06-18
//with the method of BinaryTree and linked-list
#include <stdio.h>
#include <memory.h>
#include <malloc.h> #define MaxSymbols 256 //the Maximum Number of Symbols
#define MaxHuffLen 16 //the Limited Length typedef unsigned char boolean;
#ifndef FALSE //in case these macros already exist
#define FALSE 0 //values of boolean
#endif #ifndef TRUE
#define TRUE 1
#endif typedef struct __Node{
int width;
int weight;
int index;
int depth; struct __Node *prev; //double linked list
struct __Node *next; //double linked list
struct __Node *left; //left child
struct __Node *right; //right child
}Node; typedef struct __HuffTable{
unsigned int index;
unsigned int len;
unsigned int code;
}HuffTable; //Test memory leak
/*int g_malloc = 0,g_free = 0; void* my_malloc(int size){
g_malloc++;
return malloc(size);
}
void my_free(void *ptr){
if(ptr){
g_free++;
free(ptr);
ptr = NULL;
}
}
#define malloc my_malloc
#define free my_free*/ //Get the smallest term in the diadic expansion of X
int GetSmallestTerm(int X)
{
int N=0;
while((X & 0x01) == 0){
X >>= 1;
N++;
}
return 1<<N;
}
void deleteNode(Node *head,unsigned char *Flag,int Symbols,boolean isDelete)
{
if(head->left == NULL && head->right == NULL){
if(isDelete)
Flag[head->depth*Symbols+head->index] = 0;
else
Flag[head->depth*Symbols+head->index] = 1;
}
if(head->left){
deleteNode(head->left,Flag,Symbols,isDelete);
}
if(head->right){
deleteNode(head->right,Flag,Symbols,isDelete);
}
free(head); head = NULL;
} //N:the Num of node
void Package_Merge(Node *head,Node **tail,int minWidth,unsigned char * Flag,int Symbols)
{
Node *tmp = NULL,*node_1 = NULL,*node_2 = NULL;
Node *node_P_head = NULL,*node_P_tail = NULL; //node_P_tail not store data,node_P_head store data
Node *node_head = head; //the head of 2*minWidth
//package
node_P_tail = (Node *)malloc(sizeof(Node));
memset(node_P_tail,0,sizeof(Node)); node_2 = node_P_tail; node_1 = (*tail)->prev;
for(;node_1 != NULL && node_1 != head; node_1=(*tail)->prev){
if(node_1->width == minWidth){
tmp = (Node*)malloc(sizeof(Node));
tmp->right = node_1->next; //insert from right to left,so the weight from small to large
tmp->left = node_1;
tmp->width = 2*minWidth;
tmp->weight = node_1->weight+node_1->next->weight;
tmp->next = node_2;
tmp->prev = NULL; node_2->prev = tmp;
node_2 = tmp;
*tail = node_1->prev; (*tail)->next = NULL; //two intervals
}else{
break;
}
}
node_P_head = node_2; if(*tail != head && (*tail)->width == minWidth){ //if the number of minwidth is odd,delete the max weight item of minwidth
*tail = (*tail)->prev;
deleteNode((*tail)->next,Flag,Symbols,TRUE);
(*tail)->next = NULL;
} //find the range of 2*minWidth
node_1 = *tail;
for(;node_1 != head && node_1->width == 2*minWidth;node_1 = node_1->prev){
}
node_head = node_1; //the head of 2*minWidth, node_head not store 2*minWidth //merge
node_1 = node_head->next; node_2 = node_P_head;
for(;node_1 != NULL && node_2 != node_P_tail;){
if(node_1->weight >= node_2->weight){
node_1 = node_1->next;
}else{ //insert to the major list
node_1->prev->next = node_2;
node_2->prev = node_1->prev;
node_1->prev = node_2; node_2 = node_2->next;
node_2->prev->next = node_1; node_2->prev = NULL;
}
}
if(node_1 == NULL){ //insert list 2 to the major list
(*tail)->next = node_2;
node_2->prev = *tail;
*tail = node_P_tail->prev;
(*tail)->next = NULL;
free(node_P_tail); node_P_tail = NULL;
}else{
free(node_P_tail); node_P_tail = NULL;
}
} //N:the Num of node
int LengthLimitedHuffmanCode(Node *head,Node *tail,int X,unsigned char * Flag,int Symbols)
{
int minwidth,r; while(X>0){
minwidth = GetSmallestTerm(X);
if( head->next == NULL) //I empty
return -1;
r = tail->width; //Just for Huffman Code,else r = GetMinWidth(head);
if(r>minwidth){
return -2;
}else if(r == minwidth){
tail = tail->prev;
deleteNode(tail->next,Flag,Symbols,FALSE);
tail->next = NULL;
X = X-minwidth;
}else{
Package_Merge(head,&tail,r,Flag,Symbols);
}
} return 0;
}
void PrintHuffCode(HuffTable Huffcode)
{
int i;
for(i=Huffcode.len-1;i>=0;i--){
printf("%d",(Huffcode.code>>i) & 0x01);
}
}
void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex)
{
char Code[17];
int Pre_L = 0;
int i=0,j=0;
unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0}; //rank: the number of symbols in every length
//find the first code
for(i=0;i<Symbols;i++){
for(j=0;j<L;j++){
HuffCode[i].len += Flag[j*Symbols+i];
}
if(HuffCode[i].len != 0)
rank[HuffCode[i].len]++;
HuffCode[i].index = SortIndex[i];
} for(i=0;i<=L;i++){
codes[i+1] = (codes[i]+rank[i])<<1;
rank[i] = 0;
} //code
for(i=0;i<Symbols;i++){
HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++;
}
}
float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum)
{
float bitspersymbol = 0.0;
int i;
for(i=0;i<Symbols;i++){
bitspersymbol += (float)HuffCode[i].len*weight[i];
}
return bitspersymbol/WeightSum;
} void FreqSort(int *Freq,int *SortIndex,int Symbols)
{
int i,j,tmp;
for(i=0;i<Symbols;i++){
for(j=i+1;j<Symbols;j++){
if(Freq[i]<Freq[j]){
tmp = Freq[i];
Freq[i] = Freq[j];
Freq[j] = tmp; tmp = SortIndex[i];
SortIndex[i] = SortIndex[j];
SortIndex[j] = tmp;
}
}
}
} int GenLenLimitedOptHuffCode(int *Freq,int Symbols)
{
int i,j;
unsigned char *Flag = NULL; //record the state of the node
unsigned int rank[MaxHuffLen];
Node *node = NULL,*head = NULL,*tail = NULL,*tmp = NULL; //head not store data,just a head,tail store data
int Ret = 0;
HuffTable HuffCode[MaxSymbols];
float bitspersymbols = 0.0;
int WeightSum = 0;
int SortIndex[MaxSymbols]; if(Symbols > (1<<MaxHuffLen)){
printf("Symbols > (1<<MaxHuffLen)\n");
return -1;
} for(i=0;i<MaxSymbols;i++){
SortIndex[i] = i;
}
FreqSort(Freq,SortIndex,Symbols); //sort for(i=0;i<Symbols;i++){
WeightSum += Freq[i];
} head = (Node*)malloc(sizeof(Node));
memset(head,0,sizeof(Node));
Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char));
memset(Flag,1,MaxHuffLen*Symbols*sizeof(unsigned char)); memset(HuffCode,0,sizeof(HuffCode));
node = head; for(i=0;i<MaxHuffLen;i++){
for(j=0;j<Symbols;j++){
tmp = (Node*)malloc(sizeof(Node));
tmp->prev = node; tmp->next = NULL;
tmp->left = NULL; tmp->right = NULL;
tmp->width = 1<<(MaxHuffLen-i-1);
tmp->weight = Freq[j];
tmp->index = j; tmp->depth = i;
node->next = tmp;
node = tmp;
}
}
tail = node; //tail
Ret = LengthLimitedHuffmanCode(head,tail,(Symbols-1)<<MaxHuffLen,Flag,Symbols); GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex); //print HuffCode
for(i=0;i<Symbols;i++){
printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]);
PrintHuffCode(HuffCode[i]);
printf("\tCodeLen:%02d",HuffCode[i].len);
printf("\n");
}
bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum);
printf("average code length:%f bits/symbol.\n",bitspersymbols); free(head); head = NULL;
free(Flag); Flag = NULL; return Ret;
}
#include <time.h>
int main()
{
//int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578}; //weight is not zero.
int Freq[MaxSymbols] = {10,6,2,1,1}; //weight is not zero.
GenLenLimitedOptHuffCode(Freq,5);
return 0;
}

执行上述程序输出结果例如以下所看到的:





基于二叉树和双向链表实现限制长度的最优Huffman编码的更多相关文章

  1. 基于二叉树和数组实现限制长度的最优Huffman编码

    具体介绍详见上篇博客:基于二叉树和双向链表实现限制长度的最优Huffman编码 基于数组和基于链表的实现方式在效率上有明显区别: 编码256个符号,符号权重为1...256,限制长度为16,循环编码1 ...

  2. Jcompress: 一款基于huffman编码和最小堆的压缩、解压缩小程序

    前言 最近基于huffman编码和最小堆排序算法实现了一个压缩.解压缩的小程序.其源代码已经上传到github上面: Jcompress下载地址 .在本人的github上面有一个叫Utility的re ...

  3. DS二叉树--Huffman编码与解码

    题目描述 1.问题描述 给定n个字符及其对应的权值,构造Huffman树,并进行huffman编码和译(解)码. 构造Huffman树时,要求左子树根的权值小于.等于右子树根的权值. 进行Huffma ...

  4. 如何处理加括号的四则混合运算表达式——基于二叉树的实现(Eclipse平台 Java版)

    记得上<数据结构>课程时,利用栈的特性解决过四则混合运算表达式.而如今在编写小型关系数据库的时候,编译部分要处理where后面的逻辑表达式——检查语法正确与否的同时,还要将信息传给下一个接 ...

  5. 基于ZYNQ的uart传输任意长度的数据

    1.参考 UG585 网络笔记 参考:ZYNQ进阶之路14–PS端uart串口接收不定长数据 2.理论知识 参见上一次实验:基于ZYNQ 的UART中断实验之串口写数据到DDR3中 3.实验目的 基于 ...

  6. 基于GO语言实现的固定长度邀请码

    1. 选取数字加英文字母组成32个字符的字符串,用于表示32进制数. 2. 用一个特定的字符比如`G`作为分隔符,解析的时候字符`G`后面的字符不参与运算. 3. LEN表示邀请码长度,默认为6. g ...

  7. 数据结构-二叉树(6)哈夫曼树(Huffman树)/最优二叉树

    树的路径长度是从树根到每一个结点的路径长度(经过的边数)之和. n个结点的一般二叉树,为完全二叉树时取最小路径长度PL=0+1+1+2+2+2+2+… 带权路径长度=根结点到任意结点的路径长度*该结点 ...

  8. TLV(类型—长度—值)格式及编码

    转自: http://www.cnblogs.com/tml839720759/archive/2014/07/13/3841820.html 引子: 前段时间在项目中第一次接触TLV,项目中用这种格 ...

  9. 基于JVM(内存)和Tomcat性能调优

    一.总结前一天的学习 从“第三天”的性能测试一节中,我们得知了决定性能测试的几个重要指标,它们是: ü   吞吐量 ü   Responsetime ü   Cpuload ü   MemoryUsa ...

随机推荐

  1. Car Talk1

    This question is based on a Puzzler that was broadcast on the radioprogram Car Talk1: “I was driving ...

  2. BZOJ 1251 Splay维护序列

    思路: splay维护序列的裸题 啊woc调了一天 感谢yzy大佬的模板-- //By SiriusRen #include <cstdio> #include <cstring&g ...

  3. spring boot自动配置之jdbc

    1.DataSource配置 1.1 默认配置application.xml spring.datasource.url=jdbc:mysql://localhost/test spring.data ...

  4. Spring SpEL in JSP and Assign SpEL value to Java variable in JSP

    Spring SpEL in JSP and Assign SpEL value to Java variable in JSP method 1 use----ServletContextAttri ...

  5. day01-Python介绍,安装,idea

    一. python 简介 Python,读作['paɪθɑn],翻译成汉语是蟒蛇的意思,Python 的 logo 也是两条缠绕在一起的蟒蛇的样子,然而 Python 语言和蟒蛇实际上并没有一毛钱关系 ...

  6. NodeJS学习笔记 (13)数据加密-crypto(OK)

    写在前面 本章节写得差不多了,不过还需要再整理一下(TODO). hash例子 hash.digest([encoding]):计算摘要.encoding可以是hex.latin1或者base64.如 ...

  7. 紫书 例题 10-17 UVa 1639(数学期望+分数处理+处理溢出)

    设当前有k个,那么也就是说拿到其他图案的可能是(n-k)/n 那么要拿到一个就要拿n/(n-k)次 所以答案就是n(1/n + 1/(n-1) ......1/2 + 1 / 1) 看起来很简单,但是 ...

  8. OpenCv 人脸检測的学习

    近期公司要组织开发分享,可是自己还是新手真的不知道分享啥了,然后看了看前段时间研究过OpenCv,那么就分享他把. openCv就不介绍了,说下人脸检測.事实上是通过openCv里边已经训练好的xml ...

  9. 【python下使用OpenCV实现计算机视觉读书笔记2】图像与字节的变换

    import cv2 import numpy import os # Make an array of 120,000 random bytes. randomByteArray = bytearr ...

  10. 《机器学习系统设计》之应用scikit-learn做文本分类(上)

    前言: 本系列是在作者学习<机器学习系统设计>([美] WilliRichert)过程中的思考与实践,全书通过Python从数据处理.到特征project,再到模型选择,把机器学习解决这个 ...