哈夫曼树
哈夫曼树又被称为最优二叉树,是一类带权(权值就是定义的路径上面的值,哈夫曼树中的权值可以理解为:权值大表明出现概率大)路径最短的二叉树。哈夫曼树是二叉树的一种应用,在信息检索中很常用
路径:树中一个节点到另一个节点之间的分支构成这两个节点之间的路径;
节点之间的路径长度(不带权):从一个节点到另一个节点之间的分支数量称为两个节点之间的路径长度。
树的路径长度:从根节点到树中每一个节点的路径之和。
节点的带权路径长度:从该节点到根节点之间的路径长度与节点上权的乘积。
树的带权路径长度:树中所有叶子节点的带权路径长度之和
带权路径最小的二叉树被称为哈夫曼树或最优二叉树。
哈夫曼树的重要定理
对于具有n个叶子节点的哈夫曼树,一共需要2*n-1个节点。。
因为对于二叉树来说,有3种类型节点,即度数(节点拥有的子树的个数被称为节点的度)为2的节点,和度数为1的节点和度数为0的节点。而哈夫曼树的非叶子节点都是由两个节点合并产生,所以不会出现度数为1的节点。而生成的非叶子节点的个数为叶子节点个数-1,因此n个叶子节点的哈夫曼树,一共需要2*n-1个节点。
哈夫曼树和哈夫曼编码的存储表示
//------哈夫曼树和哈夫曼编码的存储表示--------
typedef struct{
unsigned int weight;
unsigned int parent, lcd, rcd;
} HTNode, *HuffmanTree;
typedef char **HuffmanCode;
在HT[1…i-1]选择parent=0且weight最小的两个结点
int min1(HuffmanTree t, int i)
{
/* 函数void select()调用 */
int j, flag;
unsigned int k = 1e9; /* 取k为不小于可能的值 */
for (j = 1; j <= i; j++)
if (t[j].weight < k&&t[j].parent == 0)
k = t[j].weight, flag = j;
t[flag].parent = 1;
return flag;
}
void Select(HuffmanTree t, int i, int &s1, int &s2)
{
/* s1为最小的两个值中序号小的那个 */
int j;
s1 = min1(t, i);
s2 = min1(t, i);
if (s1 > s2)
{
j = s1;
s1 = s2;
s2 = j;
}
}
算法6.12
void HuffmanCoding(HuffmanTree &HT,HuffmanCode &HC,int *w,int n){
//w存放n个字符的权值(均>0)构造哈夫曼树HT,并求出n个字符的哈夫曼树编码HC
if(n<=1)
return;
int m = 2 * n - 1;
int i, s1, s2;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
HuffmanTree p;
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w){
p->weight = *w;
cout<<*w<<" ";
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
cout<<endl;
for (i = n + 1; i <= m; ++i, ++p){
p->weight = 0;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
cout<<"Try to print the initial huffman Tree table."<<endl;
for(int i = 1;i<=m;++i){
cout<<HT[i].weight<<" "<<HT[i].parent<<" "<<HT[i].lcd<<" "<<HT[i].rcd<<endl;
}
for (i = n + 1; i <= m;i++){
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lcd = s1;
HT[i].rcd = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
cout<<"Try to print the created huffman Tree table."<<endl;
for(int i = 1;i<=m;++i){
cout<<HT[i].weight<<" "<<HT[i].parent<<" "<<HT[i].lcd<<" "<<HT[i].rcd<<endl;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));
char* cd = (char *)malloc(n * sizeof(char));
cd[n - 1] = '\0';
int start, c, f;
for (i = 1; i <= n;i++){
start = n - 1;
for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent){
if (HT[f].lcd == c)
cd[--start] = '0';
else
cd[--start] = '1';
}
HC[i] = (char *)malloc((n - start) * sizeof(char));
strcpy(HC[i], &cd[start]);
free(cd);
}
}
算法6.13
void HuffmanCoding2(HuffmanTree &HT,HuffmanCode &HC,int *w,int n){
//w存放n个字符的权值(均>0)构造哈夫曼树HT,并求出n个字符的哈夫曼树编码HC
if(n<=1)
return;
int m = 2 * n - 1;
int i, s1, s2;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
HuffmanTree p;
for (p = HT + 1, i = 1; i <= n; i++, p++, w++){
p->weight = *w;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
for (i = n + 1; i <= m; i++, p++){
p->weight = 0;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
for (i = n + 1; i <= m;i++){
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lcd = s1;
HT[i].rcd = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));
int q = m;
char *cd = (char *)malloc(n * sizeof(char));
int cdlen=0;
for(i=1;i<=m;++i)
HT[i].weight=0;
while(q){
if(HT[q].weight==0){
HT[q].weight = 1;
if (HT[q].lcd != 0){
q = HT[q].lcd;
cd[cdlen++] = '0';
}
else if(HT[q].rcd==0){
HC[q] = (char *)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0';
strcpy(HC[q], cd);
}
}
else if(HT[q].weight==1){
HT[q].weight = 2;
if(HT[q].rcd!=0){
q=HT[q].rcd;
cd[cdlen++]='1';
}
}
else{
HT[q].weight=0;
q=HT[q].parent;
--cdlen;
}
}
}
完整代码
#include<bits/stdc++.h>
using namespace std;
//------哈夫曼树和哈夫曼编码的存储表示--------
typedef struct{
unsigned int weight;
unsigned int parent, lcd, rcd;
} HTNode, *HuffmanTree;
typedef char **HuffmanCode;
//-----哈夫曼树-----------------------------
int min1(HuffmanTree t, int i)
{
/* 函数void select()调用 */
int j, flag;
unsigned int k = 1e9; /* 取k为不小于可能的值 */
for (j = 1; j <= i; j++)
if (t[j].weight < k&&t[j].parent == 0)
k = t[j].weight, flag = j;
t[flag].parent = 1;
return flag;
}
void Select(HuffmanTree t, int i, int &s1, int &s2)
{
/* s1为最小的两个值中序号小的那个 */
int j;
s1 = min1(t, i);
s2 = min1(t, i);
if (s1 > s2)
{
j = s1;
s1 = s2;
s2 = j;
}
}
//算法6.12
void HuffmanCoding(HuffmanTree &HT,HuffmanCode &HC,int *w,int n){
//w存放n个字符的权值(均>0)构造哈夫曼树HT,并求出n个字符的哈夫曼树编码HC
if(n<=1)
return;
int m = 2 * n - 1;
int i, s1, s2;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
HuffmanTree p;
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w){
p->weight = *w;
cout<<*w<<" ";
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
cout<<endl;
for (i = n + 1; i <= m; ++i, ++p){
p->weight = 0;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
cout<<"Try to print the initial huffman Tree table."<<endl;
for(int i = 1;i<=m;++i){
cout<<HT[i].weight<<" "<<HT[i].parent<<" "<<HT[i].lcd<<" "<<HT[i].rcd<<endl;
}
for (i = n + 1; i <= m;i++){
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lcd = s1;
HT[i].rcd = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
cout<<"Try to print the created huffman Tree table."<<endl;
for(int i = 1;i<=m;++i){
cout<<HT[i].weight<<" "<<HT[i].parent<<" "<<HT[i].lcd<<" "<<HT[i].rcd<<endl;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));
char* cd = (char *)malloc(n * sizeof(char));
cd[n - 1] = '\0';
int start, c, f;
for (i = 1; i <= n;i++){
start = n - 1;
for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent){
if (HT[f].lcd == c)
cd[--start] = '0';
else
cd[--start] = '1';
}
HC[i] = (char *)malloc((n - start) * sizeof(char));
strcpy(HC[i], &cd[start]);
free(cd);
}
}
//算法6.13
void HuffmanCoding2(HuffmanTree &HT,HuffmanCode &HC,int *w,int n){
//w存放n个字符的权值(均>0)构造哈夫曼树HT,并求出n个字符的哈夫曼树编码HC
if(n<=1)
return;
int m = 2 * n - 1;
int i, s1, s2;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
HuffmanTree p;
for (p = HT + 1, i = 1; i <= n; i++, p++, w++){
p->weight = *w;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
for (i = n + 1; i <= m; i++, p++){
p->weight = 0;
p->parent = 0;
p->lcd = 0;
p->rcd = 0;
}
for (i = n + 1; i <= m;i++){
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lcd = s1;
HT[i].rcd = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));
int q = m;
char *cd = (char *)malloc(n * sizeof(char));
int cdlen=0;
for(i=1;i<=m;++i)
HT[i].weight=0;
while(q){
if(HT[q].weight==0){
HT[q].weight = 1;
if (HT[q].lcd != 0){
q = HT[q].lcd;
cd[cdlen++] = '0';
}
else if(HT[q].rcd==0){
HC[q] = (char *)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0';
strcpy(HC[q], cd);
}
}
else if(HT[q].weight==1){
HT[q].weight = 2;
if(HT[q].rcd!=0){
q=HT[q].rcd;
cd[cdlen++]='1';
}
}
else{
HT[q].weight=0;
q=HT[q].parent;
--cdlen;
}
}
}
int main()
{
HuffmanTree HT;
HuffmanCode HC;
int n;
int data[1000];
while(scanf("%d", &n) != EOF){
for (int i = 1; i <= n; ++i) {
scanf("%d", &data[i]);
}
cout<<endl;
HuffmanCoding2(HT, HC, data+1, n);
for (int i = 1; i <= n; ++i) {
printf("%s\n", HC[i]);
}
delete(HC);
delete(HT);
}
return 0;
}
测试样例
input:
8
5 29 7 8 14 23 3 11
output:
0110
10
1110
1111
110
00
0111
010