Jump to content
Sign in to follow this  
linuxuser

Computer science Topic:Huffman decoding

Recommended Posts

Huffman is an algorithm for coding( compressing ) or decoding ( decompressing) a binary file....Actuallly there r 2 kinds...static Huffman algorithm..adaptic or dynamic huffman algorithm

I have decided to post simple decoding algorithm ( may be someone finds it useful ) and also my code to do that..( the code does not work well,,i am still working on that part..may be some one can explore it , play with it and fix it ) smile.gif

 

Algorithm

Before decompressing the file, we have to first regenerate the huffman tree from the table that was saved along with the compressed file.

 

To do this we use the following algorithm

1.We initialize an array having 511 elements to zero.

2.We take one element from the array and make it the root node.The root node is initially the current node.

3.We then read the ith (initially the first) bit of the code for the jth(initially the first) character in the table.

4.If the ith bit of the code read is a '1', we take another element from the array and make it the right child node of the current node, if it is a '0' then the node is made the left child node of the current node. If the current node already has a left or right child node we skip this step.

5.If the bit currently read is the last bit of the code then this node holds the character represented by the code.

6.This node is then made the current node.

7.i is incremented

8.Steps 3 to 5 are repeated until all the bits of the code for the jth character are read.

9.The current node is reset to the root node.

10.j is incremented and i reset to 0

11.steps 3 to 10 are repeated until the codes for all the characters are read.

Once this process is completed we will obtain the complete Huffman tree that was used to compress the file. This tree is used search for the character that was represented by the code in the compressed file.

 

Now the actual decompression process begins.

1.The current node is set to the root node.

2.A sequence of zeros and ones are read from the compressed file. For every '0' read we move to the left child node of the current node and for every '1' read we move to the right child node of the current node and set it as the new current node.

3.If the current node is a leaf node then we output the character that is represented by this node. The current node is reset to the root node.

4.Steps 2 to 3 are repeated until all the bytes in the file are read.

Once this process is complete the output file contains the decompressed data.

 

Share this post


Link to post
Share on other sites

C++ Code ( Coded by me )

 

struct storage

{

unsigned int letter;

int repeat;

storage* left;

storage* right;

};

 

int main( int argc, void ** argv )

{

//It has to be 3 params

if( argc != 3 ){

printf("Error occurred. Invalid parameters");

return 0;

}

//Open files

char* temp = (char*)argv[1];

sf=fopen((char*)argv[1],"rb");

of=fopen((char*)argv[2],"wb");

if( sf == NULL ) return 1;

if( of == NULL ) return 1;

 

storage* st = new storage[256];

//Initialize

for( int i=0;i <256;i++)

{

st.letter = 0;

st.repeat = 0;

st.left = NULL;

st.right = NULL;

 

}

 

BuildTree( st, 0);

return 0;

}

 

void BuildTree( storage* storage1, int idx )

{

int i = 0, bitCount = 0, increment = 0, val = 0;

unsigned char achar;

storage *rootnode, *currentnode;

 

rootnode = &storage1;

currentnode = &storage1;

while( !feof( sf )){

 

achar = 0;

fread( (unsigned char*)&achar, sizeof( char ), 1, sf );

unsigned int ret= (unsigned int)achar;

 

bitCount = BitCount( ret );

for( int num=0;num<bitCount; num++ ){

/// for eq if we have 00000000 00000000 00000000 0000 0111 or simply 111

//to get first bit we need 111 & 100 ) so we r doing 1<<2, for second

//time we need to do 111 &010 or 1<<1..etc

val = ret& (1<< ((bitCount-1)-num));

val = val>>((bitCount-1)-num);

if( val == 1 ){

if( currentnode->right == NULL ){

i++;

currentnode->right = &storage1;

}

}

else{

if( currentnode->right == NULL ){

i++;

currentnode->left = &storage1;

}

}

if( increment == bitCount-1 ){

storage1.letter = ret;

}

currentnode = &storage1;

increment++;

}//end of for

currentnode = rootnode;

 

}//end of while

 

//Move at the beginning of the file

fseek( sf, 0L, SEEK_SET);

 

//Build the table

currentnode= rootnode;

while( !feof( sf )){

 

achar = 0;

fread( (unsigned char*)&achar, sizeof( char ), 1, sf );

unsigned int ret= (unsigned int)achar;

 

bitCount = BitCount( ret );

for( int num=0;num<bitCount; num++ ){

val = ret& (1<< ((bitCount-1)-num));

val = val>>((bitCount-1)-num);

if( currentnode->left == NULL && currentnode->right == NULL ){

printf("%c", (char)currentnode->letter );

currentnode = rootnode;//reset to the root node

continue;

}

if( val == 0 ){

currentnode = currentnode->left;

}

else{

currentnode = currentnode->right;

}

}//end of for

}//end of while

 

}

 

 

/* Returns bit count .Eg 1110 returns 4 */

int BitCount( unsigned int byte )

{

if( byte == 0 ) return 1;

int index = 0;

char bitbuffer[8 + 1];

int count = 8;

int MASK = 1<<(count-1);//2^7=1000 0000

 

//Basically here we r comparing 1000 0000 and for eg 1110 and populating to the char array bitbuffer

//keep shifting 1110 to the left when,,it becomes 1

while(count--)

{

bitbuffer[8-(count+1)] = ( byte & MASK ) ? '1' : '0';

byte <<= 1;//Move all bit one left

}

bitbuffer[8] = '\0';

 

for( int i=0;i < 8;i++)

{

if( bitbuffer == '1') break;

}

return 8-i;

}

Edited by rs_1915

Share this post


Link to post
Share on other sites

lots of linux source code should be in linux installation CD 2. Since linux is open source, source code for almost everything is available freely. Also some websites such as linux.org is good place for source codes.

Share this post


Link to post
Share on other sites

LZW Compression

LZW is yet another famous compression method used to compress binary or text data.

For those who r interested, please check out web for detailed information on the algorithm. There is no point me typing here since u can easily get it somewhere else. Let me just show u the Psuedocode and my code to do the job. I think someone can benefit from the code. And this time the code is complete, it actually works,,may be there are couple of things u guys can work on

1) check for memory leak

2) enhance the error handling mechanism.

 

Psuedocode

 

STRING = get input character

WHILE there are still input characters DO

CHARACTER = get input character

IF STRING+CHARACTER is in the string table then

STRING = STRING+character

ELSE

output the code for STRING

add STRING+CHARACTER to the string table

STRING = CHARACTER

END of IF

END of WHILE

output the code for STRING

 

Share this post


Link to post
Share on other sites

My C/C++ Code

 

////lzw.cpp

//Data Compression

#include<stdio.h>

#include<stdlib.h>

#include<string.h>

 

//Struct

struct strTable{

char* str;

unsigned int code;

};

 

//Define

#define MAX 5021

#define BITS 12

 

//Globals

FILE *sf, *df;

 

//Prototypes

int checkinTable( strTable* table, char* str );

int addinTable( strTable* table, char* str );

unsigned int getCodefromTable( strTable* table, char* str );

int lzwCompress( strTable* table );

int writeCode( unsigned int code );

 

int main( int argc, void** argv ){

 

if( argc != 3 ) {

printf("Error occurred. Invalid arguments\n");

return 0;

}

 

sf = fopen( (char*) argv[1], "rb");

df = fopen( (char*) argv[2], "wb");

if( sf == NULL ) {

printf("Error occurred. Invalid source file");

return 0;

}

if( df == NULL ){

printf("Error occurred. Invalid destination file");

return 0;

}

 

//Initialze the string table

int i = 0;

strTable* table = new strTable[ MAX ];

for( ; i < MAX; i++){

table.str = "";

table.code = 256 + i;

}

 

//lzw Compression

int ret = lzwCompress( table );

//Error code

switch( ret ){

case 1: printf("Error occurred. String could not be added in string table.\n");break;

case 2: printf("Error occurred. Write to destination file failed.\n");break;

//......More Error Messages

default: break;

}

 

//Cleanup

delete [] table;

 

fclose( sf );

fclose( df );

return 1;

}

 

int checkinTable( strTable* table, char* str )

{

int iFound = 0;

for(int i=0;i< MAX;i++){

if( strcmp( str, table.str ) == 0 ){

iFound = 1;

break;

}

}

return iFound;

}

 

int addinTable( strTable* table, char* str )

{

for(int i=0;i< MAX;i++){

if( strlen( table.str ) == 0 ){

table.str = new char[ strlen(str) + 1 ];

strcpy( table.str, str );

return 0;

}

}

return 1;

}

 

unsigned int getCodefromTable( strTable* table, char* str )

{

int size = strlen( str );

if( size == 1){

 

char achar = *str;

return (unsigned int)achar;

}

else{

//look up the table, only if string is more than one letter

for(int i=0;i< MAX;i++){

if( strcmp( str, table.str ) == 0 ){

return table.code;

}

}

}//end of else

return MAX + 1;

}

 

int lzwCompress( strTable* table )

{

 

char baseStr[256+ 1 ];

char addStr[256+ 1 ];

char* temp;

unsigned int code;

unsigned char achar;

if( !feof( sf )){

achar = 0;

fread((unsigned char*)&achar, sizeof( char), 1, sf );

code = (unsigned int)achar;

temp = (char*)&achar;

*(temp + 1) = 0;

strcpy( baseStr, temp );

strcpy( addStr, temp );

while( !feof( sf )){

fread((unsigned char*)&achar, sizeof( char), 1, sf );

temp = 0;

temp = (char*)&achar;

*(temp + 1) = 0;

strcat( addStr, temp );

int ret = checkinTable( table, addStr );

if( ret ){

//Found the string in the string table

strcat( baseStr, temp );

}

else{

code = getCodefromTable( table, baseStr );

//Write to destination file

if( writeCode( code ) == 1 ){

return 2;

}

 

if( addinTable( table, addStr ) == 1 ){

return 1;

}

memset( baseStr, 0, sizeof( baseStr ));

memset( addStr, 0, sizeof( addStr ));

strcpy( baseStr, temp );

strcpy( addStr, temp );

}

}//end of while

code = (unsigned int)baseStr;

}

return 0;

}

 

int writeCode( unsigned int code )

{

static int output_bit_count=0;

static unsigned long output_bit_buffer=0L;

 

output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count);

output_bit_count += BITS;

while (output_bit_count >= 8)

{

putc( output_bit_buffer >> 24,df );

output_bit_buffer <<= 8;

output_bit_count -= 8;

}

return 0;

}

Share this post


Link to post
Share on other sites

Run Length Encoding

RLE is another way of compressing file. I think the algorithm used for RLE is very simple,,therefore makes it easy to code. Here is the description of the algorithm

 

RLE is probably the easiest compression algorithm there is. It replaces sequences of the same data values within a file by a count number and a single value.

 

Suppose the following string of data (17 bytes) has to be compressed:

ABBBBBBBBBCDEEEEF

Using RLE compression, the compressed file takes up 10 bytes and could look like this:

A *8B C D *4E F

 

As you can see, repetitive strings of data are replaced by a control character (*) followed by the number of repeated characters and the repetitive character itself. The control character is not fixed, it can differ from implementation to implementation.

If the control character itself appears in the file then one extra character is coded.

Als you can see, RLE encoding is only effective if there are sequences of 4 or more repeating characters because three characters are used to conduct RLE so coding two repeating characters would even lead to an increase in file size.

 

It is important to know that there are many different run-length encoding schemes. The above example has just been used to demonstrate the basic principle of RLE encoding. Sometimes the implementation of RLE is adapted to the type of data that are being compressed.

 

 

Share this post


Link to post
Share on other sites

My code to compress

//RLE.cpp

#include<stdio.h>

#include<stdlib.h>

#include<string.h>

 

char main( char argc, char** argv )

{

if( argc != 3 ){

printf("Invalid argument.");

return 0;

}

FILE *sf, *df;

sf = fopen( argv[1], "rb");

if( sf == NULL ){

printf("Invalid source file.");

return 0;

}

df = fopen( argv[2], "wb");

if( df == NULL){

printf("Invalid destination file.");

return 0;

}

char count = 0;

char sCount[100];

unsigned char achar = 0,store = 0;

if(!feof( sf ) ){

fread( (unsigned char*)&achar,sizeof( unsigned char ),1, sf );

}

while( !feof( sf )){

fread( (unsigned char*)&store,sizeof( unsigned char ),1, sf );

if( store == achar ) count++;

else{

 

if( count > 0 ){

unsigned char controlchar = '*';

fwrite( (unsigned char*)&controlchar, sizeof( unsigned char),1, df );

itoa( count, sCount, 10 );

fwrite( (unsigned char*)sCount, sizeof( unsigned char),strlen( sCount ), df );

fwrite( (unsigned char*)&achar, sizeof( unsigned char),1, df );

}

else{

fwrite( (unsigned char*) &achar, sizeof( unsigned char ), 1, df );

}

count = 0;

}

achar = store;

 

}//end of while

 

//Close the files

fclose( sf );

fclose( df );

return 0;

}

 

Share this post


Link to post
Share on other sites

Decompress

//DRLE.cpp

#include<stdio.h>

#include<stdlib.h>

#include<string.h>

 

char main( char argc, char** argv )

{

if( argc != 3 ){

printf("Invalid argument.");

return 0;

}

FILE *sf, *df;

sf = fopen( argv[1], "rb");

if( sf == NULL ){

printf("Invalid source file.");

return 0;

}

df = fopen( argv[2], "wb");

if( df == NULL){

printf("Invalid destination file.");

return 0;

}

 

char str[100];

int value = 0;

unsigned char achar = 0;

while(!feof( sf )){

memset( str, 0, 100 );

fread((unsigned char*)&achar, sizeof( unsigned char), 1, sf );

if( achar == '*'){

int count = 0;

do{

fread((unsigned char*)&achar, sizeof( unsigned char), 1, sf );

value = achar;

//Only numbers from 0 to 9

if( value >= 48 && value <= 57 ){

str[ count ] = achar;

count++;

}

 

}while( value >= 48 && value <= 57);

str[ count ] = '\0';

count = atoi( str );

if( count > 0 ){

for( int i =0;i<= count;i++){

fwrite((unsigned char*)&achar, sizeof( unsigned char ), 1, df );

}//end of for

}//end of if

 

 

}

else{

fwrite((unsigned char*)&achar, sizeof( unsigned char ), 1, df );

}

}//end of while

 

//Close the files

fclose( sf );

fclose( df );

return 0;

}

 

Share this post


Link to post
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
Sign in to follow this  

×
×
  • Create New...

Important Information

By using this site, you agree to our Terms of Use.