# Computer science Topic:Huffman decoding

8 replies to this topic

### #1 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 09 April 2004 - 03:26 PM

Huffman is an algorithm for coding( compressing ) or decoding ( decompressing) a binary file....Actuallly there r 2 kinds...static Huffman algorithm..adaptic or dynamic huffman algorithm
I have decided to post simple decoding algorithm ( may be someone finds it useful ) and also my code to do that..( the code does not work well,,i am still working on that part..may be some one can explore it , play with it and fix it )

Algorithm
Before decompressing the file, we have to first regenerate the huffman tree from the table that was saved along with the compressed file.

To do this we use the following algorithm
1.We initialize an array having 511 elements to zero.
2.We take one element from the array and make it the root node.The root node is initially the current node.
3.We then read the ith (initially the first) bit of the code for the jth(initially the first) character in the table.
4.If the ith bit of the code read is a '1', we take another element from the array and make it the right child node of the current node, if it is a '0' then the node is made the left child node of the current node. If the current node already has a left or right child node we skip this step.
5.If the bit currently read is the last bit of the code then this node holds the character represented by the code.
6.This node is then made the current node.
7.i is incremented
8.Steps 3 to 5 are repeated until all the bits of the code for the jth character are read.
9.The current node is reset to the root node.
10.j is incremented and i reset to 0
11.steps 3 to 10 are repeated until the codes for all the characters are read.
Once this process is completed we will obtain the complete Huffman tree that was used to compress the file. This tree is used search for the character that was represented by the code in the compressed file.

Now the actual decompression process begins.
1.The current node is set to the root node.
2.A sequence of zeros and ones are read from the compressed file. For every '0' read we move to the left child node of the current node and for every '1' read we move to the right child node of the current node and set it as the new current node.
3.If the current node is a leaf node then we output the character that is represented by this node. The current node is reset to the root node.
4.Steps 2 to 3 are repeated until all the bytes in the file are read.
Once this process is complete the output file contains the decompressed data.

### #2 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 09 April 2004 - 03:28 PM

C++ Code ( Coded by me )

struct storage
{
unsigned int letter;
int repeat;
storage* left;
storage* right;
};

int main( int argc, void ** argv )
{
//It has to be 3 params
if( argc != 3 ){
printf("Error occurred. Invalid parameters");
return 0;
}
//Open files
char* temp = (char*)argv[1];
sf=fopen((char*)argv[1],"rb");
of=fopen((char*)argv[2],"wb");
if( sf == NULL ) return 1;
if( of == NULL ) return 1;

storage* st = new storage[256];
//Initialize
for( int i=0;i <256;i++)
{
st[i].letter = 0;
st[i].repeat = 0;
st[i].left = NULL;
st[i].right = NULL;

}

BuildTree( st, 0);
return 0;
}

void BuildTree( storage* storage1, int idx )
{
int i = 0, bitCount = 0, increment = 0, val = 0;
unsigned char achar;
storage *rootnode, *currentnode;

rootnode = &storage1[i];
currentnode = &storage1[i];
while( !feof( sf )){

achar = 0;
fread( (unsigned char*)&achar, sizeof( char ), 1, sf );
unsigned int ret= (unsigned int)achar;

bitCount = BitCount( ret );
for( int num=0;num<bitCount; num++ ){
/// for eq if we have 00000000 00000000 00000000 0000 0111 or simply 111
//to get first bit we need 111 & 100 ) so we r doing 1<<2, for second
//time we need to do 111 &010 or 1<<1..etc
val = ret& (1<< ((bitCount-1)-num));
val = val>>((bitCount-1)-num);
if( val == 1 ){
if( currentnode->right == NULL ){
i++;
currentnode->right = &storage1[i];
}
}
else{
if( currentnode->right == NULL ){
i++;
currentnode->left = &storage1[i];
}
}
if( increment == bitCount-1 ){
storage1[i].letter = ret;
}
currentnode = &storage1[i];
increment++;
}//end of for
currentnode = rootnode;

}//end of while

//Move at the beginning of the file
fseek( sf, 0L, SEEK_SET);

//Build the table
currentnode= rootnode;
while( !feof( sf )){

achar = 0;
fread( (unsigned char*)&achar, sizeof( char ), 1, sf );
unsigned int ret= (unsigned int)achar;

bitCount = BitCount( ret );
for( int num=0;num<bitCount; num++ ){
val = ret& (1<< ((bitCount-1)-num));
val = val>>((bitCount-1)-num);
if( currentnode->left == NULL && currentnode->right == NULL ){
printf("%c", (char)currentnode->letter );
currentnode = rootnode;//reset to the root node
continue;
}
if( val == 0 ){
currentnode = currentnode->left;
}
else{
currentnode = currentnode->right;
}
}//end of for
}//end of while

}

/* Returns bit count .Eg 1110 returns 4 */
int BitCount( unsigned int byte )
{
if( byte == 0 ) return 1;
int index = 0;
char bitbuffer[8 + 1];
int count = 8;

//Basically here we r comparing 1000 0000 and for eg 1110 and populating to the char array bitbuffer
//keep shifting 1110 to the left when,,it becomes 1
while(count--)
{
bitbuffer[8-(count+1)] = ( byte & MASK ) ? '1' : '0';
byte <<= 1;//Move all bit one left
}
bitbuffer[8] = '\0';

for( int i=0;i < 8;i++)
{
if( bitbuffer[i] == '1') break;
}
return 8-i;
}

Edited by rs_1915, 09 April 2004 - 03:29 PM.

### #3 Limitation//Moon

Limitation//Moon

• National Committee
• 2,810 posts

Posted 13 April 2004 - 03:27 PM

Thanks, any lynux source code

### #4 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 14 April 2004 - 05:40 PM

lots of linux source code should be in linux installation CD 2. Since linux is open source, source code for almost everything is available freely. Also some websites such as linux.org is good place for source codes.

### #5 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 15 April 2004 - 08:13 PM

LZW Compression
LZW is yet another famous compression method used to compress binary or text data.
For those who r interested, please check out web for detailed information on the algorithm. There is no point me typing here since u can easily get it somewhere else. Let me just show u the Psuedocode and my code to do the job. I think someone can benefit from the code. And this time the code is complete, it actually works,,may be there are couple of things u guys can work on
1) check for memory leak
2) enhance the error handling mechanism.

Psuedocode

STRING = get input character
WHILE there are still input characters DO
CHARACTER = get input character
IF STRING+CHARACTER is in the string table then
STRING = STRING+character
ELSE
output the code for STRING
add STRING+CHARACTER to the string table
STRING = CHARACTER
END of IF
END of WHILE
output the code for STRING

### #6 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 15 April 2004 - 08:16 PM

My C/C++ Code

////lzw.cpp
//Data Compression
#include<stdio.h>
#include<stdlib.h>
#include<string.h>

//Struct
struct strTable{
char* str;
unsigned int code;
};

//Define
#define MAX 5021
#define BITS 12

//Globals
FILE *sf, *df;

//Prototypes
int checkinTable( strTable* table, char* str );
int addinTable( strTable* table, char* str );
unsigned int getCodefromTable( strTable* table, char* str );
int lzwCompress( strTable* table );
int writeCode( unsigned int code );

int main( int argc, void** argv ){

if( argc != 3 ) {
printf("Error occurred. Invalid arguments\n");
return 0;
}

sf = fopen( (char*) argv[1], "rb");
df = fopen( (char*) argv[2], "wb");
if( sf == NULL ) {
printf("Error occurred. Invalid source file");
return 0;
}
if( df == NULL ){
printf("Error occurred. Invalid destination file");
return 0;
}

//Initialze the string table
int i = 0;
strTable* table = new strTable[ MAX ];
for( ; i < MAX; i++){
table[i].str = "";
table[i].code = 256 + i;
}

//lzw Compression
int ret = lzwCompress( table );
//Error code
switch( ret ){
case 1: printf("Error occurred. String could not be added in string table.\n");break;
case 2: printf("Error occurred. Write to destination file failed.\n");break;
//......More Error Messages
default: break;
}

//Cleanup
delete [] table;

fclose( sf );
fclose( df );
return 1;
}

int checkinTable( strTable* table, char* str )
{
int iFound = 0;
for(int i=0;i< MAX;i++){
if( strcmp( str, table[i].str ) == 0 ){
iFound = 1;
break;
}
}
return iFound;
}

int addinTable( strTable* table, char* str )
{
for(int i=0;i< MAX;i++){
if( strlen( table[i].str ) == 0 ){
table[i].str = new char[ strlen(str) + 1 ];
strcpy( table[i].str, str );
return 0;
}
}
return 1;
}

unsigned int getCodefromTable( strTable* table, char* str )
{
int size = strlen( str );
if( size == 1){

char achar = *str;
return (unsigned int)achar;
}
else{
//look up the table, only if string is more than one letter
for(int i=0;i< MAX;i++){
if( strcmp( str, table[i].str ) == 0 ){
return table[i].code;
}
}
}//end of else
return MAX + 1;
}

int lzwCompress( strTable* table )
{

char baseStr[256+ 1 ];
char* temp;
unsigned int code;
unsigned char achar;
if( !feof( sf )){
achar = 0;
fread((unsigned char*)&achar, sizeof( char), 1, sf );
code = (unsigned int)achar;
temp = (char*)&achar;
*(temp + 1) = 0;
strcpy( baseStr, temp );
while( !feof( sf )){
fread((unsigned char*)&achar, sizeof( char), 1, sf );
temp = 0;
temp = (char*)&achar;
*(temp + 1) = 0;
int ret = checkinTable( table, addStr );
if( ret ){
//Found the string in the string table
strcat( baseStr, temp );
}
else{
code = getCodefromTable( table, baseStr );
//Write to destination file
if( writeCode( code ) == 1 ){
return 2;
}

return 1;
}
memset( baseStr, 0, sizeof( baseStr ));
strcpy( baseStr, temp );
}
}//end of while
code = (unsigned int)baseStr;
}
return 0;
}

int writeCode( unsigned int code )
{
static int output_bit_count=0;
static unsigned long output_bit_buffer=0L;

output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count);
output_bit_count += BITS;
while (output_bit_count >= 8)
{
putc( output_bit_buffer >> 24,df );
output_bit_buffer <<= 8;
output_bit_count -= 8;
}
return 0;
}

### #7 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 19 May 2004 - 08:51 PM

Run Length Encoding
RLE is another way of compressing file. I think the algorithm used for RLE is very simple,,therefore makes it easy to code. Here is the description of the algorithm

RLE is probably the easiest compression algorithm there is. It replaces sequences of the same data values within a file by a count number and a single value.

Suppose the following string of data (17 bytes) has to be compressed:
ABBBBBBBBBCDEEEEF
Using RLE compression, the compressed file takes up 10 bytes and could look like this:
A *8B C D *4E F

As you can see, repetitive strings of data are replaced by a control character (*) followed by the number of repeated characters and the repetitive character itself. The control character is not fixed, it can differ from implementation to implementation.
If the control character itself appears in the file then one extra character is coded.
Als you can see, RLE encoding is only effective if there are sequences of 4 or more repeating characters because three characters are used to conduct RLE so coding two repeating characters would even lead to an increase in file size.

It is important to know that there are many different run-length encoding schemes. The above example has just been used to demonstrate the basic principle of RLE encoding. Sometimes the implementation of RLE is adapted to the type of data that are being compressed.

### #8 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 19 May 2004 - 08:53 PM

My code to compress
//RLE.cpp
#include<stdio.h>
#include<stdlib.h>
#include<string.h>

char main( char argc, char** argv )
{
if( argc != 3 ){
printf("Invalid argument.");
return 0;
}
FILE *sf, *df;
sf = fopen( argv[1], "rb");
if( sf == NULL ){
printf("Invalid source file.");
return 0;
}
df = fopen( argv[2], "wb");
if( df == NULL){
printf("Invalid destination file.");
return 0;
}
char count = 0;
char sCount[100];
unsigned char achar = 0,store = 0;
if(!feof( sf ) ){
fread( (unsigned char*)&achar,sizeof( unsigned char ),1, sf );
}
while( !feof( sf )){
fread( (unsigned char*)&store,sizeof( unsigned char ),1, sf );
if( store == achar ) count++;
else{

if( count > 0 ){
unsigned char controlchar = '*';
fwrite( (unsigned char*)&controlchar, sizeof( unsigned char),1, df );
itoa( count, sCount, 10 );
fwrite( (unsigned char*)sCount, sizeof( unsigned char),strlen( sCount ), df );
fwrite( (unsigned char*)&achar, sizeof( unsigned char),1, df );
}
else{
fwrite( (unsigned char*) &achar, sizeof( unsigned char ), 1, df );
}
count = 0;
}
achar = store;

}//end of while

//Close the files
fclose( sf );
fclose( df );
return 0;
}

### #9 linuxuser

linuxuser

Honourable Member

• 1,147 posts

Posted 19 May 2004 - 08:54 PM

Decompress
//DRLE.cpp
#include<stdio.h>
#include<stdlib.h>
#include<string.h>

char main( char argc, char** argv )
{
if( argc != 3 ){
printf("Invalid argument.");
return 0;
}
FILE *sf, *df;
sf = fopen( argv[1], "rb");
if( sf == NULL ){
printf("Invalid source file.");
return 0;
}
df = fopen( argv[2], "wb");
if( df == NULL){
printf("Invalid destination file.");
return 0;
}

char str[100];
int value = 0;
unsigned char achar = 0;
while(!feof( sf )){
memset( str, 0, 100 );
fread((unsigned char*)&achar, sizeof( unsigned char), 1, sf );
if( achar == '*'){
int count = 0;
do{
fread((unsigned char*)&achar, sizeof( unsigned char), 1, sf );
value = achar;
//Only numbers from 0 to 9
if( value >= 48 && value <= 57 ){
str[ count ] = achar;
count++;
}

}while( value >= 48 && value <= 57);
str[ count ] = '\0';
count = atoi( str );
if( count > 0 ){
for( int i =0;i<= count;i++){
fwrite((unsigned char*)&achar, sizeof( unsigned char ), 1, df );
}//end of for
}//end of if

}
else{
fwrite((unsigned char*)&achar, sizeof( unsigned char ), 1, df );
}
}//end of while

//Close the files
fclose( sf );
fclose( df );
return 0;
}

#### 0 user(s) are reading this topic

0 members, 0 guests, 0 anonymous users