LZW Compression
 
#include <stdio.h>
char *currchararray,decode_stack[4000],*string,curr; 
int next_code,prev,index,i,*previntarray,*codeintarray,new,old;
unsigned int output_bit_count,output_bit_buffer,return_value,input_bit_count,input_bit_buffer;
FILE *fp;
char *decode_string(unsigned char *buffer,unsigned int code)
{
while (code > 255)
{
*buffer++ = currchararray[code];
code=previntarray[code];
}
*buffer=code;
return(buffer);
}
input_code()
{
while (input_bit_count <= 24)
{
input_bit_buffer |= getc(fp) << (24-input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-12);
input_bit_buffer <<= 12;
input_bit_count -= 12;
return(return_value);
}
output_code(unsigned int code)
{
printf("function output_code code=%x %c output_bit_count=%d output_bit_buffer=%x left shift bits=%d code shifted=%08x ",code,code,output_bit_count,output_bit_buffer,32-12-output_bit_count,code << (32-12-output_bit_count));
output_bit_buffer = output_bit_buffer | code << (32-12-output_bit_count);
output_bit_count = output_bit_count + 12;
printf("After output_bit_count=%d output_bit_buffer=%x\n",output_bit_count,output_bit_buffer);
while (output_bit_count >= 8)
{
printf("In while output_bit_count=%d output_bit_buffer=%x output_bit_buffer >> 24=%x ",output_bit_count,output_bit_buffer,output_bit_buffer >> 24);
putc(output_bit_buffer >> 24,fp);
output_bit_buffer = output_bit_buffer << 8;
printf("at end output_bit_buffer=%x\n",output_bit_buffer);
output_bit_count -= 8;
}
}
main()
{
char *p="ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCD vijay mukhi is shit and so is his wife";
codeintarray=(int *)malloc(5021*4);
previntarray=(int *)malloc(5021*4);
currchararray=(char *)malloc(5021);
fp=fopen("vijay.txt","wb");
next_code=256;       
for (i=0;i<5021;i++) 
codeintarray[i]= -1;
prev=*p++;
while ((curr=*p++) != 0)
{
printf("Start of while prev=%c %x curr=%c %x curr << 4=%d ",prev,prev,curr,curr,curr << 4);
index = (curr << 4) ^ prev;
printf("index=%d \n",index);
while (1)
{
if (codeintarray[index] == -1)
{
printf("1st break index=%d\n",index);
break;
}
if (previntarray[index]==prev && currchararray[index]==curr)
{
printf("2st break index=%d previntarray[index]=%x currchararray[index]=%x\n",index,previntarray[index],currchararray[index]);
break;
}
printf("In while old index=%d ",index);
index = (5021 - index) - index;
printf("New index=%d\n",index);
if (index < 0)
{
printf("3rd if index=%d\n",index);
index += 5021;
}
}
if (codeintarray[index] == -1)
{
printf("Last if index=%d next_code=%d prev=%c %x curr=%c %x\n",index,next_code,prev,prev,curr,curr);
codeintarray[index]=next_code++;
previntarray[index]=prev;
currchararray[index]=curr;
output_code(prev);
prev=curr;
}
else                            
{
prev=codeintarray[index];
printf("Last Else index=%d prev=%d\n",index,prev);
}
printf("\n");
}
output_code(prev);
output_code(4095);
output_code(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
next_code=256;
old=input_code();
curr=old;
printf("%c",old);
while ((new=input_code()) != (4095))
{
if (new>=next_code)
{
*decode_stack=curr;
string=decode_string(decode_stack+1,old);
}
else
string=decode_string(decode_stack,new);
curr=*string;
while (string >= decode_stack)
printf("%c",*string--);
previntarray[next_code]=old;
currchararray[next_code]=curr;
next_code++;
old=new;
}
}
 
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121 
1st break index=1121
Last if index=1121 next_code=256 prev=65 41 curr=B 42
function output_code code=41 A output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=04100000 After output_bit_count=12 output_bit_buffer=4100000
In while output_bit_count=12 output_bit_buffer=4100000 output_bit_buffer >> 24=4 at end output_bit_buffer=10000000
 
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138 
1st break index=1138
Last if index=1138 next_code=257 prev=66 42 curr=C 43
function output_code code=42 B output_bit_count=4 output_bit_buffer=10000000 left shift bits=16 code shifted=00420000 After output_bit_count=16 output_bit_buffer=10420000
In while output_bit_count=16 output_bit_buffer=10420000 output_bit_buffer >> 24=10 at end output_bit_buffer=42000000
In while output_bit_count=8 output_bit_buffer=42000000 output_bit_buffer >> 24=42 at end output_bit_buffer=0
 
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027 
1st break index=1027
Last if index=1027 next_code=258 prev=67 43 curr=D 44
function output_code code=43 C output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=04300000 After output_bit_count=12 output_bit_buffer=4300000
In while output_bit_count=12 output_bit_buffer=4300000 output_bit_buffer >> 24=4 at end output_bit_buffer=30000000
 
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108 
1st break index=1108
Last if index=1108 next_code=259 prev=68 44 curr=A 41
function output_code code=44 D output_bit_count=4 output_bit_buffer=30000000 left shift bits=16 code shifted=00440000 After output_bit_count=16 output_bit_buffer=30440000
In while output_bit_count=16 output_bit_buffer=30440000 output_bit_buffer >> 24=30 at end output_bit_buffer=44000000
In while output_bit_count=8 output_bit_buffer=44000000 output_bit_buffer >> 24=44 at end output_bit_buffer=0
 
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121 
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
 
Start of while prev=  256 curr=C 43 curr << 4=1072 index=1328 
1st break index=1328
Last if index=1328 next_code=260 prev=256 100 curr=C 43
function output_code code=100   output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10000000 After output_bit_count=12 output_bit_buffer=10000000
In while output_bit_count=12 output_bit_buffer=10000000 output_bit_buffer >> 24=10 at end output_bit_buffer=0
 
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027 
2st break index=1027 previntarray[index]=43 currchararray[index]=44
Last Else index=1027 prev=258
 
Start of while prev=[1] 258 curr=A 41 curr << 4=1040 index=1298 
1st break index=1298
Last if index=1298 next_code=261 prev=258 102 curr=A 41
function output_code code=102 [1] output_bit_count=4 output_bit_buffer=0 left shift bits=16 code shifted=01020000 After output_bit_count=16 output_bit_buffer=1020000
In while output_bit_count=16 output_bit_buffer=1020000 output_bit_buffer >> 24=1 at end output_bit_buffer=2000000
In while output_bit_count=8 output_bit_buffer=2000000 output_bit_buffer >> 24=2 at end output_bit_buffer=0
 
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121 
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
 
Start of while prev=  256 curr=C 43 curr << 4=1072 index=1328 
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
 
Start of while prev=


 260 curr=D 44 curr << 4=1088 index=1348
1st break index=1348
Last if index=1348 next_code=262 prev=260 104 curr=D 44
function output_code code=104 


 output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10400000 After output_bit_count=12 output_bit_buffer=10400000
In while output_bit_count=12 output_bit_buffer=10400000 output_bit_buffer >> 24=10 at end output_bit_buffer=40000000
 
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108 
2st break index=1108 previntarray[index]=44 currchararray[index]=41
Last Else index=1108 prev=259
 
Start of while prev=


 259 curr=B 42 curr << 4=1056 index=1315
1st break index=1315
Last if index=1315 next_code=263 prev=259 103 curr=B 42
function output_code code=103 


 output_bit_count=4 output_bit_buffer=40000000 left shift bits=16 code shifted=01030000 After output_bit_count=16 output_bit_buffer=41030000
In while output_bit_count=16 output_bit_buffer=41030000 output_bit_buffer >> 24=41 at end output_bit_buffer=3000000
In while output_bit_count=8 output_bit_buffer=3000000 output_bit_buffer >> 24=3 at end output_bit_buffer=0
 
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138 
2st break index=1138 previntarray[index]=42 currchararray[index]=43
Last Else index=1138 prev=257
 
Start of while prev= 257 curr=D 44 curr << 4=1088 index=1345 
1st break index=1345
Last if index=1345 next_code=264 prev=257 101 curr=D 44
function output_code code=101  output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10100000 After output_bit_count=12 output_bit_buffer=10100000
In while output_bit_count=12 output_bit_buffer=10100000 output_bit_buffer >> 24=10 at end output_bit_buffer=10000000
 
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108 
2st break index=1108 previntarray[index]=44 currchararray[index]=41
Last Else index=1108 prev=259
 
Start of while prev=


 259 curr=B 42 curr << 4=1056 index=1315
2st break index=1315 previntarray[index]=103 currchararray[index]=42
Last Else index=1315 prev=263
 
Start of while prev= 263 curr=C 43 curr << 4=1072 index=1335 
1st break index=1335
Last if index=1335 next_code=265 prev=263 107 curr=C 43
function output_code code=107  output_bit_count=4 output_bit_buffer=10000000 left shift bits=16 code shifted=01070000 After output_bit_count=16 output_bit_buffer=11070000
In while output_bit_count=16 output_bit_buffer=11070000 output_bit_buffer >> 24=11 at end output_bit_buffer=7000000
In while output_bit_count=8 output_bit_buffer=7000000 output_bit_buffer >> 24=7 at end output_bit_buffer=0
 
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027 
2st break index=1027 previntarray[index]=43 currchararray[index]=44
Last Else index=1027 prev=258
 
Start of while prev=[1] 258 curr=A 41 curr << 4=1040 index=1298 
2st break index=1298 previntarray[index]=102 currchararray[index]=41
Last Else index=1298 prev=261
 
Start of while prev= 261 curr=B 42 curr << 4=1056 index=1317 
1st break index=1317
Last if index=1317 next_code=266 prev=261 105 curr=B 42
function output_code code=105  output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10500000 After output_bit_count=12 output_bit_buffer=10500000
In while output_bit_count=12 output_bit_buffer=10500000 output_bit_buffer >> 24=10 at end output_bit_buffer=50000000
 
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138 
2st break index=1138 previntarray[index]=42 currchararray[index]=43
Last Else index=1138 prev=257
 
Start of while prev= 257 curr=D 44 curr << 4=1088 index=1345 
2st break index=1345 previntarray[index]=101 currchararray[index]=44
Last Else index=1345 prev=264
 
Start of while prev= 264 curr=A 41 curr << 4=1040 index=1304 
1st break index=1304
Last if index=1304 next_code=267 prev=264 108 curr=A 41
function output_code code=108  output_bit_count=4 output_bit_buffer=50000000 left shift bits=16 code shifted=01080000 After output_bit_count=16 output_bit_buffer=51080000
In while output_bit_count=16 output_bit_buffer=51080000 output_bit_buffer >> 24=51 at end output_bit_buffer=8000000
In while output_bit_count=8 output_bit_buffer=8000000 output_bit_buffer >> 24=8 at end output_bit_buffer=0
 
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121 
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
 
Start of while prev=  256 curr=C 43 curr << 4=1072 index=1328 
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
 
Start of while prev=


 260 curr=D 44 curr << 4=1088 index=1348
2st break index=1348 previntarray[index]=104 currchararray[index]=44
Last Else index=1348 prev=262
 
Start of while prev= 262 curr=A 41 curr << 4=1040 index=1302 
1st break index=1302
Last if index=1302 next_code=268 prev=262 106 curr=A 41
function output_code code=106  output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10600000 After output_bit_count=12 output_bit_buffer=10600000
In while output_bit_count=12 output_bit_buffer=10600000 output_bit_buffer >> 24=10 at end output_bit_buffer=60000000
 
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121 
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
 
Start of while prev=  256 curr=C 43 curr << 4=1072 index=1328 
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
 
Start of while prev=


 260 curr=D 44 curr << 4=1088 index=1348
2st break index=1348 previntarray[index]=104 currchararray[index]=44
Last Else index=1348 prev=262
 
function output_code code=106  output_bit_count=4 output_bit_buffer=60000000 left shift bits=16 code shifted=01060000 After output_bit_count=16 output_bit_buffer=61060000
In while output_bit_count=16 output_bit_buffer=61060000 output_bit_buffer >> 24=61 at end output_bit_buffer=6000000
In while output_bit_count=8 output_bit_buffer=6000000 output_bit_buffer >> 24=6 at end output_bit_buffer=0
function output_code code=fff ÿ output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=fff00000 After output_bit_count=12 output_bit_buffer=fff00000
In while output_bit_count=12 output_bit_buffer=fff00000 output_bit_buffer >> 24=ff at end output_bit_buffer=f0000000
function output_code code=0   output_bit_count=4 output_bit_buffer=f0000000 left shift bits=16 code shifted=00000000 After output_bit_count=16 output_bit_buffer=f0000000
In while output_bit_count=16 output_bit_buffer=f0000000 output_bit_buffer >> 24=f0 at end output_bit_buffer=0
In while output_bit_count=8 output_bit_buffer=0 output_bit_buffer >> 24=0 at end output_bit_buffer=0
 
 
Function input_code start input_bit_count=0 input_bit_buffer=00000000
Function input_code While loop char c=04 input_bit_count=8 input_bit_buffer=04000000 24-input_bit_count=16
Function input_code While loop char c=10 input_bit_count=16 input_bit_buffer=04100000 24-input_bit_count=8
Function input_code While loop char c=42 input_bit_count=24 input_bit_buffer=04104200 24-input_bit_count=0
Function input_code While loop char c=04 input_bit_count=32 input_bit_buffer=04104204 24-input_bit_count=-8
Function input_code input_bit_buffer=04104204 input_bit_buffer >> 20=00000041 Function input_code end input_bit_buffer <<= 12=04204000 input_bit_count=20 return_value=65 A
Function input_code start input_bit_count=20 input_bit_buffer=04204000
Function input_code While loop char c=30 input_bit_count=28 input_bit_buffer=04204300 24-input_bit_count=-4
Function input_code input_bit_buffer=04204300 input_bit_buffer >> 20=00000042 Function input_code end input_bit_buffer <<= 12=04300000 input_bit_count=16 return_value=66 B
In main Start of while new=66 B next_code=256 if new=66 B
Function Decode_String start code=66 B
Function Decode_String end Code=66 B
if string=B
In main inner while *string=66 B
In main end of while old=65 A curr=66 B next_code=256 previntarray[next_code]=65 currchararray[next_code]=66
 
Function input_code start input_bit_count=16 input_bit_buffer=04300000
Function input_code While loop char c=44 input_bit_count=24 input_bit_buffer=04304400 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=04304410 24-input_bit_count=-8
Function input_code input_bit_buffer=04304410 input_bit_buffer >> 20=00000043 Function input_code end input_bit_buffer <<= 12=04410000 input_bit_count=20 return_value=67 C
In main Start of while new=67 C next_code=257 if new=67 C
Function Decode_String start code=67 C
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main end of while old=66 B curr=67 C next_code=257 previntarray[next_code]=66 currchararray[next_code]=67
 
Function input_code start input_bit_count=20 input_bit_buffer=04410000
Function input_code While loop char c=01 input_bit_count=28 input_bit_buffer=04410010 24-input_bit_count=-4
Function input_code input_bit_buffer=04410010 input_bit_buffer >> 20=00000044 Function input_code end input_bit_buffer <<= 12=10010000 input_bit_count=16 return_value=68 D
In main Start of while new=68 D next_code=258 if new=68 D
Function Decode_String start code=68 D
Function Decode_String end Code=68 D
if string=D
In main inner while *string=68 D
In main end of while old=67 C curr=68 D next_code=258 previntarray[next_code]=67 currchararray[next_code]=68
 
Function input_code start input_bit_count=16 input_bit_buffer=10010000
Function input_code While loop char c=02 input_bit_count=24 input_bit_buffer=10010200 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10010210 24-input_bit_count=-8
Function input_code input_bit_buffer=10010210 input_bit_buffer >> 20=00000100 Function input_code end input_bit_buffer <<= 12=10210000 input_bit_count=20 return_value=256  
In main Start of while new=256   next_code=259 if new=256  
Function Decode_String start code=256  
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main end of while old=68 D curr=65 A next_code=259 previntarray[next_code]=68 currchararray[next_code]=65
 
Function input_code start input_bit_count=20 input_bit_buffer=10210000
Function input_code While loop char c=41 input_bit_count=28 input_bit_buffer=10210410 24-input_bit_count=-4
Function input_code input_bit_buffer=10210410 input_bit_buffer >> 20=00000102 Function input_code end input_bit_buffer <<= 12=10410000 input_bit_count=16 return_value=258 [1]
In main Start of while new=258 [1] next_code=260 if new=258 [1]
Function Decode_String start code=258 [1]
Function Decode_String while currchararray[code]=68 D previntarray[code]=67 C code=258
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=256   curr=67 C next_code=260 previntarray[next_code]=256 currchararray[next_code]=67
 
Function input_code start input_bit_count=16 input_bit_buffer=10410000
Function input_code While loop char c=03 input_bit_count=24 input_bit_buffer=10410300 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10410310 24-input_bit_count=-8
Function input_code input_bit_buffer=10410310 input_bit_buffer >> 20=00000104 Function input_code end input_bit_buffer <<= 12=10310000 input_bit_count=20 return_value=260 


In main Start of while new=260 


 next_code=261 if new=260
Function Decode_String start code=260 


Function Decode_String while currchararray[code]=67 C previntarray[code]=256   code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main end of while old=258 [1] curr=65 A next_code=261 previntarray[next_code]=258 currchararray[next_code]=65
 
Function input_code start input_bit_count=20 input_bit_buffer=10310000
Function input_code While loop char c=11 input_bit_count=28 input_bit_buffer=10310110 24-input_bit_count=-4
Function input_code input_bit_buffer=10310110 input_bit_buffer >> 20=00000103 Function input_code end input_bit_buffer <<= 12=10110000 input_bit_count=16 return_value=259 


In main Start of while new=259 


 next_code=262 if new=259
Function Decode_String start code=259 


Function Decode_String while currchararray[code]=65 A previntarray[code]=68 D code=259
Function Decode_String end Code=68 D
if string=DA
In main inner while *string=68 D
In main inner while *string=65 A
In main end of while old=260 


 curr=68 D next_code=262 previntarray[next_code]=260 currchararray[next_code]=68
 
Function input_code start input_bit_count=16 input_bit_buffer=10110000
Function input_code While loop char c=07 input_bit_count=24 input_bit_buffer=10110700 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10110710 24-input_bit_count=-8
Function input_code input_bit_buffer=10110710 input_bit_buffer >> 20=00000101 Function input_code end input_bit_buffer <<= 12=10710000 input_bit_count=20 return_value=257 
In main Start of while new=257  next_code=263 if new=257 
Function Decode_String start code=257 
Function Decode_String while currchararray[code]=67 C previntarray[code]=66 B code=257
Function Decode_String end Code=66 B
if string=BA
In main inner while *string=66 B
In main inner while *string=67 C
In main end of while old=259 


 curr=66 B next_code=263 previntarray[next_code]=259 currchararray[next_code]=66
 
Function input_code start input_bit_count=20 input_bit_buffer=10710000
Function input_code While loop char c=51 input_bit_count=28 input_bit_buffer=10710510 24-input_bit_count=-4
Function input_code input_bit_buffer=10710510 input_bit_buffer >> 20=00000107 Function input_code end input_bit_buffer <<= 12=10510000 input_bit_count=16 return_value=263 
In main Start of while new=263  next_code=264 if new=263 
Function Decode_String start code=263 
Function Decode_String while currchararray[code]=66 B previntarray[code]=259 


 code=263
Function Decode_String while currchararray[code]=65 A previntarray[code]=68 D code=259
Function Decode_String end Code=68 D
if string=D
In main inner while *string=68 D
In main inner while *string=65 A
In main inner while *string=66 B
In main end of while old=257  curr=68 D next_code=264 previntarray[next_code]=257 currchararray[next_code]=68
 
Function input_code start input_bit_count=16 input_bit_buffer=10510000
Function input_code While loop char c=08 input_bit_count=24 input_bit_buffer=10510800 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10510810 24-input_bit_count=-8
Function input_code input_bit_buffer=10510810 input_bit_buffer >> 20=00000105 Function input_code end input_bit_buffer <<= 12=10810000 input_bit_count=20 return_value=261 
In main Start of while new=261  next_code=265 if new=261 
Function Decode_String start code=261 
Function Decode_String while currchararray[code]=65 A previntarray[code]=258 [1] code=261
Function Decode_String while currchararray[code]=68 D previntarray[code]=67 C code=258
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main inner while *string=68 D
In main inner while *string=65 A
In main end of while old=263  curr=67 C next_code=265 previntarray[next_code]=263 currchararray[next_code]=67
 
Function input_code start input_bit_count=20 input_bit_buffer=10810000
Function input_code While loop char c=61 input_bit_count=28 input_bit_buffer=10810610 24-input_bit_count=-4
Function input_code input_bit_buffer=10810610 input_bit_buffer >> 20=00000108 Function input_code end input_bit_buffer <<= 12=10610000 input_bit_count=16 return_value=264 
In main Start of while new=264  next_code=266 if new=264 
Function Decode_String start code=264 
Function Decode_String while currchararray[code]=68 D previntarray[code]=257  code=264
Function Decode_String while currchararray[code]=67 C previntarray[code]=66 B code=257
Function Decode_String end Code=66 B
if string=B
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=261  curr=66 B next_code=266 previntarray[next_code]=261 currchararray[next_code]=66
 
Function input_code start input_bit_count=16 input_bit_buffer=10610000
Function input_code While loop char c=06 input_bit_count=24 input_bit_buffer=10610600 24-input_bit_count=0
Function input_code While loop char c=ff input_bit_count=32 input_bit_buffer=106106ff 24-input_bit_count=-8
Function input_code input_bit_buffer=106106ff input_bit_buffer >> 20=00000106 Function input_code end input_bit_buffer <<= 12=106ff000 input_bit_count=20 return_value=262 
In main Start of while new=262  next_code=267 if new=262 
Function Decode_String start code=262 
Function Decode_String while currchararray[code]=68 D previntarray[code]=260 


 code=262
Function Decode_String while currchararray[code]=67 C previntarray[code]=256   code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=264  curr=65 A next_code=267 previntarray[next_code]=264 currchararray[next_code]=65
 
Function input_code start input_bit_count=20 input_bit_buffer=106ff000
Function input_code While loop char c=f0 input_bit_count=28 input_bit_buffer=106fff00 24-input_bit_count=-4
Function input_code input_bit_buffer=106fff00 input_bit_buffer >> 20=00000106 Function input_code end input_bit_buffer <<= 12=fff00000 input_bit_count=16 return_value=262 
In main Start of while new=262  next_code=268 if new=262 
Function Decode_String start code=262 
Function Decode_String while currchararray[code]=68 D previntarray[code]=260 


 code=262
Function Decode_String while currchararray[code]=67 C previntarray[code]=256   code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=262  curr=65 A next_code=268 previntarray[next_code]=262 currchararray[next_code]=65
 
Function input_code start input_bit_count=16 input_bit_buffer=fff00000
Function input_code While loop char c=00 input_bit_count=24 input_bit_buffer=fff00000 24-input_bit_count=0
Function input_code While loop char c=ff input_bit_count=32 input_bit_buffer=fff000ff 24-input_bit_count=-8
Function input_code input_bit_buffer=fff000ff input_bit_buffer >> 20=00000fff Function input_code end input_bit_buffer <<= 12=000ff000 input_bit_count=20 return_value=4095 ÿ
 

The above program uses the LZW compression to compress data for us. The LZW algorithm is the compression method used for compressing gif and tiff files. Along with the Huffman algorithm it is the most widely used compression method used today.  Lets start with the function main.

We store the string to be compressed in the character pointer p. On purpose we have started our string with five ABC’s. The reason being that the LZW compression is very efficient when it finds the same data repeated over and over again. We create two arrays of int’s codeintarray and previntarray of size 20084 bytes or space for 5021 ints. Why a size of 5021 bytes is used will be explained later.

Then we also create a char array currchararray of the same size 5021. The string in the pointer p will be compressed and stored in the file vijay.txt and by using the w mode, so that this file is created from scratch each time. The variable next_code is set to 256 as a char can hold values from 0 to 255. The global array codeintarray which is initialized by default to 0. We now set each member of this array to –1 using a loop.

Thus if any codeintarray member is –1, it only means that we have no written anything into that member. The –1 is used as we will never have this value as a valid value of any member of the codeintarray.  We also set the prev member to the first char of our string A. We then increase p by 1 to point to the second character B. We now enter a while loop where we set curr to the second byte h and increase p by 1.

Some time later we will reach the end of the string or zero and thus we will quit out of the outer while loop. At the end of the loop we set prev to either curr or the value stored in the codeintarray using the variable index as the array offset. Thus the data type of curr is a char but the value of prev can be a int. 

Thus at times prev is what the value of curr was and at times it is a value from the codeintarray.

We then calculate a value for the variable index. We first left shift curr by 4 loosing the 4 high bits of the byte. Thus the letter B or 0x41 now becomes 1056. We then xor this value with prev getting a value of index or 1121. This variable index will be used as an offset into the three arrays we have created. We now enter a infinite while loop.

The if statement that we encounter is true. This is because we have not written into the array codeintarray at all. Once again the if is true and not the else. Here we print out the values of next_code and prev and curr which have not changed. The codeintarray at the offset index or 1121 now contains 256, the value of next_code which now is increased by 1. Thus the next we come across a A and B, the value of codeintarray at offset index will be 256 and not –1.

Thus for the same combination or curr and prev which we use to give us  index, the first if in the outer while will not be true again. This is because each time the index offset for the codeintarray  contains –1, we write the value of next_code there as its value and increase it by 1. Thus next_code if its value is 300, it means that we have written 45 values to the codeintarray at different places using the index variable as offset.

Thus imagine the codeintarray as comprised of  all –1 except the offset at location 1121 which contains the value 256. Thus the codeintarray contains a series of values from 256 upwards. 

We then go to the corresponding previntarray and currchararray at the offset index and place the values of prev and curr respectively which is A and B respectively. Remember variable curr is always a char but prev can be a char or a int. Also the three arrays are symmetrical in the sense that the same index variable is used as an offset into them to write there . The currchararray always contains the current character.

We then call a function output_code with the value of prev that is the first character A. Lets see what this function now does.

We start as always by printing out the values of prev or code that has a value of A and the two variables output_bit_count and output_bit_buffer which is 0. These are global variables and hence there values the next time will not be zero as we are not initializing them in the function itself. The earlier values will be remembered. We then use the output_bit_count variable to calculate the number of bits we have to left shift the value of code or the character A.

To start with the number of bits is 20. We then take the current value of output_bit_buffer which is 0 and bit wise or with the value of prev or code left shifted by 20 bits. Thus the value of code is 0x41 and after left shifting it by 20 bits we get 04100000. Thus the value of output_bit_buffer and the code shifted is the same as the output_bit_buffer variable value is 0.

We next add the output_bit_count variable by 12 and enter a while loop until the value is greater or equal to 8. In this loop we reduce the value of the variable output_bit_count  by 8  Thus if the value is 12 as it is right now, we will enter the loop only once, but the next time we enter the function output_code, the value of the variable output_bit_count at the start of function will be 4, we are increasing it by 12 and thus it becomes 16.

The while loop will happen twice the next time it is called, but at the end of the function its value will be 0 and hence the next time the function output_code will be called the value is 0.

We then print out the value of output_bit_buffer right shifted by 24 bits which is 4. The 1 gets thrown off. We then use the putc function to write this value of disk. Thus the first byte written on disk is 04, that is only the high nibble of the first character A.

Now the 1 that got thrown off must be saved and therefore we throw off the 0 and 4 by left shifting by 8. The value in output_bit_buffer is thus 10000000 which will be used when we call the output_code function later.

The value in output_bit_buffer is thus 10000000 which will be used when we call the output_code function later.

We now come back to the if statement where we set prev to curr as we change the value of curr in the while loop. Thus prev is B and curr is C which gives index a value of 1138.

As this is the first time a index of 1138 is generated, the first if is once again true as codeintarray[1138] is –1.  The value of next_code is 257 and we place this value ay codeintarray[1138]. We also store B and C in the arrays previntarray and currchararray.

We now call the function output_code with the value of prev as B. The output_bit_count variable is 4 from the previous call along with output_bit_buffer which is 10000000. The bits to be left shifted now reduce from 20 the last time to 16. Thus the code variable left shifted by 16 gives us 00420000 but as we bit wise or with the previous value of output_bit_buffer which is 10000000 the value finally is 10420000.

The variable output_bit_count is 16 as we add 4 the previous value by 12. The while loop will now go on twice. The first time we right shift by 24 so we are throwing away all but the top eight bits which moves them to the start of the int. We are thus writing 10 to disk. We then reduce output_bit_count by 8 and more importantly we left shift by 8 bits dropping the top 8 bits 10 and moving the 42 to as the top bits.

Thus at the start of the while at the second iteration we get output_bit_count=8 and output_bit_buffer=42000000. This allows us to write 42 to disk but at the end of the loop the two variables are 0.

Thus we write two bytes on disk 10 and 42 on disk and we have the following bytes 04 10 42. The 4 from the first byte and 1 from the second will give us an A. The 0 from the second bytes is ignored and the 42 from the third byte is used in full. Thus the output_code when used the first time writes only four useful bits out of 8 on disk, the next it writes 12 useful bits out of 16. From now on this pattern repeats itself.

Now we move prev to C and curr is now A. The index is a new value 1107. once again the first if of the inner while will be true. We will also be in the if after the inner while and the value of next_code will be 258 and we set the codeintarray[1107] to 258.

In the function output_code code is C, both variables are 0 and the left shift bits is once again 0. The code to be shifted is 04300000 and thus we write 4 on disk but the buffer gets the value left out 3 and thus becomes 30000000. In the function output_code if we do not bit wise or the buffer variable we will loose out the lower nibble of the byte for the odd call of the output_code function.

Now comes the real meat of the LZW algorithm. So far we have used 3 bytes to store AB which is two bytes, not a very good example of compression. We now set prev to A and curr to B which is a pattern that has occurred before. This gives us a familiar pattern of AB and thus the good old index value of 1121. Thus in the inner while, the first if statement that was always true, is not so.

The second if is now true as the previntarray has a value of A or prev and currchararray has a value of curr or B. We quit out of the inner while and now the if statement is false but the else is true. Here we have no output code function at all. Remember it is this function that writes to the file.

The fewer times we call this function the better. We simply set the value of prev to 256 which is what the codeintarray contains at index value 1121. This is the value we wrote when we first came across a index of 1121. This is why prev is a int as at may contain numbers from 256 onwards.  Thus prev either contains the value of curr or next_code which is what we store in the codeintarray.

We come back to start of the outer while loop with prev as 256 and curr as C. The 256 once again represents AB. The index value will be unique at 1328. The first if statement in the while loop will be true and the first if statement outside the loop will also be true.

The codeintarray[1328] will contain 259 and the previntarray[1328] will have 256 and the currchararray[1328]  will also have C. We now call the output_code function with a value of prev or 256. Lets us see how many bytes it will output for the abc combo.

The leftovers from the previous call are output_bit_count=4 and output_bit_buffer=30000000 for the C. After shifting by 16 the buffer now contains 31000000. We enter the while to write 31 and 00. This is what we finally call compression as the ABC was written as two bytes only.

Lets us now take a larger string ABCD and repeat it a zillion times as.

char *p="ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCD";

Lets us now understand for a last time how a real life compression algorithm works. We have repeated the string ABCD 10 times and the file vijay.txt has a size of 27 bytes and not 40 bytes. We now add one more ABCD to the string p.

The size of the file only increases by 1 byte to 28 bytes. If we have a single ABCD as the string the size of the file vijay.txt is a whooping 9 bytes a minus 200+ compression. Adding one more ABCD increases the file size to 12 bytes a increase of 3 bytes. One more ABCD increases the size to 15 bytes again a increase of 3 bytes.

The size of the string is 12 bytes. One more ABCD brings our string to 16 bytes, and the size of vijay.txt to 18 bytes or a increase of 3 bytes again. Thus the last three increased the size by 3 bytes and the string increased by 4 bytes. A little compression.

Now we add one more ABCD to bring our string size to 20 bytes and the size of vijay.txt increases by only one byte to 19 bytes. We add one more ABCD and the size increase is by 2 bytes to 21 bytes and the string is 20 bytes. One more ABCD and the increase is 1 byte. One more ABCD and the increase is 2 bytes. Lets us now understand how this magic happens.

We start at the while where prev is 41 or A and curr is 42 or B. The index value is 1121. As this is the first time we quit out of the first if  as codeintarray[index] is –1. We fill up the three arrays and increase next_code by 1. We write 04 on disk and store the 01 in the buffer variable. For the B and C combo, we write 10 and for the A and 42 for the B. For the C D combo we write 04 and store 3 in the buffer variable. For the D A combo we write 30 for the C and 44 for the D.

Now we come back to A B combo where the index is 1121 and thus the second if is true and the last else. Here we do not try to write anything to disk but simply set prev to 256, the value at the codeintarray offset of index. Remember 256 means the first combo, 257 the second.  Now that prev is 100 and curr 43 we get a new index value of 1323. This now writes 10 to disk only. The next combo is C D and this index value has occurred before.

Lets now explain everything in a slightly different way by drawing a table.

Prev

Curr

Index

Next_Code

codeintarray

previntarray

currchararray

written

buffer

A

B

1121

256

256

A

B

O4

10

B

C

1138

257

B

C

10 42

00

 

C

D

1027

258

258

C

D

04

30

D

A

1108

259

259

D

A

30 44

00

A

B

 

 

 

 

 

 

 

256

C

1328

260

260

256

C

10

0

C

D

 

 

 

 

 

 

 

258

A

1298

261

261

258

A

01 02

 

A

B

 

 

 

 

 

 

 

256

C

 

 

 

 

 

 

 

260

D

1348

262

262

260

D

10

40

D

A

 

 

 

 

 

 

 

259

B

1315

263

263

259

B

41 03

0

B

C

 

 

 

 

 

 

 

257

D

1345

264

264

257

D

10

10

D

A

 

 

 

 

 

 

 

259

B

 

 

 

 

 

 

 

263

C

1335

265

265

263

C

11 07

0

C

D

 

 

 

 

 

 

 

258

A

 

 

 

 

 

 

 

261

B

1317

266

266

261

B

10

50

B

C

 

 

 

 

 

 

 

256

D

 

 

 

 

 

 

 

264

A

1304

267

267

264

A

51 08

0

A

B

 

 

 

 

 

 

 

256

B

 

 

 

 

 

 

 

259

D

 

 

 

 

 

 

 

262

A

1302

268

268

262

A

10

60

A

B

 

 

 

 

 

 

 

256

C

 

 

 

 

 

 

 

260

D

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

Last Bytes  61 06  00
                    Ff   f0   00

Lets now understand the big table that we have painstakingly drawn out for you. When prev and curr is A and B, we are writing 256 to the codeintarray. This can only mean that the number 256 stands for the patter AB. In the same vein 257 stands for BC, 258 for CD and 259 for DA. As the value of index cannot exceed 4095 which is 2 raised to 12, we are writing a 12 bit number each time to disk.

The first time we are writing the high eight bits of the number and the second time we are writing the low 4 bits of the previous value and the 12 bits of the current. We save the 4 bits of the previous value in the buffer variable. Thus when we write a A or 0x41, we write 04 in the first round and 1  in the second call to the function. Then we write the B as 042.

This is why a AB gets written as 3 bytes 04 10 42. When we come across a pattern AB that is already there, we do not write anything to disk. We simply in the else find out the value in codeintarray that corresponds to the index offset. In this case it is 256 and in other cases a number larger than 256 or the value of the variable next_code. Then we repeat the while with a value of prev as 256 and curr as C.

As this is a new index we write the value in codeintarray as 260 but the previntarray now contains 256. Thus as 256 stands for AB and therefore 260 C stands for the pattern ABC. We write 256 as 100 on disk. The pattern CD also exists and therefore we now get prev as 258 and curr as A. This is a new pattern given a next_code of 261. The previntarray has a value of 258 which in turn is CD and curr is A. Thus 261 is CDA.

Now comes further compression much later on. DA is already there, 259 B is also there and 263 C is new. Codeintarray is 265 and previntarray is 263 and curr is C. If you look at 263 it is 259 B, but 259 is DA. Thus 263 is DAB and now 265 is DABC. Thus as the patters move on we find that a single number represents larger and larger values.

The last value of prev is 106. The value of buffer is 61060000 and therefore we write 61 and 06. We then write a 4095 which is the largest 12 bit value we write, which is 0xfff. We write out the FF first and when writing out the last 0, we write f and then 000. This completes the compression, now lets start with the decompression.

We close the file vijay.txt using fclose and then again open it for reading. We first next_code at 256 as we assume the individual chars take up the first 255 indexes into the array. Earlier we used the output_code for writing the file, Now we use the function input_code. Where we used prev and curr as variables, now we use old and new. We set old to the first value returned by the input_code function and in the while later we make old the value of new.

The function input_code job is to read the 12 bits written to disk and convert it into a int. This function therefore uses the same technique  that we used earlier to write 12 bits to disk. Had we simply written 16 bits to disk, our two functions input_code and output_code would be much simpler.

The first time both variables input_bit_count and input_bit_buffer will be zero. The while loop quits out until the count variable is less than or equal to 24. We increase its value by 8 each time in the loop. As then value starts from 0, the first time we call input_code, it remains in the loop 4 times. When it quits out of the while loop its value is 32.

We then reduce it by 12 at the end of the loop so that the next time input_code function is called its value is 20. It now enters the loop only once and at value 28 it leaves the loop. Then we reduce it by 12 so that its value is 16. The next time function input_code is called the while loop is done twice and its value is 32. We then decrease it by 12 to get it back to 20 and the pattern continues. It gets called once and then twice and so on.

The first time we must repeat is the only special case where it gets called four times. The count variable is important to us as we use to the left shift the byte that we read from disk. The first time we shift it 24,16,8 and 0 times. In future when the loop gets done once we left shift it 4 times and when the while gets called twice we left shift it 8 and 0 times.

Remember we are subtracting the count variable from 24 so when we say that we are left shifting 8, we are subtracting 24 from count whose value now is 16. We also are like before bitwise oring with what is already there in the buffer variable. Our first four bytes on disk are 04104204. Thus the first iteration of the for loop buffer getc(fp) returns 04, this gets left shifter by 24 in buffer to give us 04000000.

We then pick up the 10 and left shift this by 16 to give us 04100000. Then the third byte is 42 which is left shifted only 8 bits to give buffer a value 04104200 and finally the last 04 gives buffer 04104204. We are only interested in the top 12 bits as of now so if we right shift by 20 we get 041 which is stored in the variable return value and is returned back.

We then drop the top 12 bits by left shifting to get buffer as 04204000. In the next call of input_code, we will return 042 as the answer and the buffer variable will contain 04300000 the top 12 bits being the next value. This is how we keep picking up the top 12 bits and returning them.

Depending upon the number of times the while gets on, when it gets called once we are reading only 8 out of the 12 bits and the next time when it gets called twice, we are reading 4 lower bits of the earlier 12 bits and the entire 12 bits of the next number.

Thus every time we call input_code, we get back the same prev that we wrote to disk. The above table will show us the values of prev that now are returned back. The first word is unique and hence we immediately write it and set curr to it which is a A.

We then entire a while loop where we set the variable new to each new value from disk using the function input_code. The last value we wrote was 4095 and the minute new becomes this value we quit out of the loop. The other important point is that at the end when we set old to new, we also increase the value of next_code by 1. Thus the number of times we increased next_code in the compression is the same number of times our while loop will loop.

We now check new with the value of next_code. Normally new will always we less than next_code and thus the if statement will be true. We call the function decode_string with the address of the array decode_stack and new which is B. In this function we basically loop as long as the value of code or new is greater than 255.

In our case the value of code is less than 255 so we never enter the loop. We then add the code parameter to the array and also null terminate it and return the address of this string or array. We simply print out one by one the number of chars in the string called string.

For some time at least the length of the string will be 1. We set the curr variable to be the first char of this string and in this case the value of new and curr will be the same. We enter a while loop and display the entire string till we reach the end of the string the null character. We use next_code as the index to the previntarray and currchararray to hold the values of  old and curr.

Thus at the start old is 65 and curr is 66. These are the same values it had earlier when we build the dictionary. We are building a similar dictionary now. We will only understand the decompression when we start with a value of new as 256 and the value of next_code is 259 so once again the if is called. Here we call the function decode_string with the big array and the value 256.

The while loop now is true as the value of code is greater than 255. The currchararray [code] or  currchararray[256] contains B and previntarray[256] contains a A. We first put B in the buffer array in the loop and then the char A at the end of the loop. When we leave the loop we are at the A or the end of the loop. We display the A and then the B and then quit out as we have moved to byte 3999 of the array which will always contain –1.

Remember we are placing stuff backwards in the array and when we move out of the function decode_string, we have to read the stuff backwards. Also each time we are making sure that if the value at previntarray is larger than 256, we stay in the loop and use the currchararray to tell us what character it stands for. Remember old is like prev used earlier it can either be 0 to 255 or a number larger than 256 that stands for a combo of characters.

Like the last example lets build a table to further explain what is going on.

New

next_code

curr

string

old

previntarray

currchararray

 

 

 

A

 

 

 

B

256

B

B

65

65

66

C

257

C

C

66

66

67

D

258

D

D

67

67

68

256

259

A

AB

68

68

65

258

260

C

CD

256

256

67

260

261

A

ABC

258

258

65

259

262

D

DA

260

260

68

257

263

B

BC

259

259

66

263

264

D

DAB

257

257

68

261

265

C

CDA

263

263

67

264

266

B

BCD

261

261

66

262

267

A

ABCD

264

264

65

262

268

A

ABCD

262

262

65

 

 

 

 

 

 

 

 
 
LZWBasics
 
#include <stdio.h>
unsigned int ret,cnt,buff,times;
FILE *fp;
inp()
{
if ( times%2 == 0)
{
char i,j,k;
i = fgetc(fp);
j = fgetc(fp);
ret = i << 4;
k = j & 0xF0;
k = k >> 4;
ret = ret | k;
buff = j & 0x0F;
printf("\nif %02x %02x ret=%x buff=%08x\n",i,j,ret,buff);
}
else
{
char i;
i = fgetc(fp);
ret = buff & 0x0f;
ret = ret << 8;
ret = ret | i;
printf("\nelse %02x ret=%x\n",i,ret);
}
times++;
return ret;
}
out(unsigned int value)
{
if ( times % 2 == 0)
{
fputc(value >> 4 ,fp);
buff = value & 0x0f;
printf("In if value=%04x written %02x buffer=%08x\n",value,value >> 4,buff);
}
else
{
char dummy;
dummy = buff & 0x0f;
dummy = dummy << 4;
dummy = dummy | (value >> 8);
printf("In else value=%04x value>>8=%04x Writing %02x %02x\n",value,value>>8,dummy,value & 0xff);
fputc(dummy,fp);
fputc(value & 0xff , fp);
}
times++;
}
main()
{
fp=fopen("vijay.txt","wb");
out(67);
out(260);
out(258);
out(66);
out(0);
out(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
printf("\n");
times = 0;
printf("%c ",inp());
printf("%d ",inp());
printf("%d ",inp());
printf("%c ",inp());
printf("%d\n",inp());
}
 
In if value=0043 written 04 buffer=00000003
In else value=0104 value>>8=0001 Writing 31 04
In if value=0102 written 10 buffer=00000002
In else value=0042 value>>8=0000 Writing 20 42
In if value=0000 written 00 buffer=00000000
In else value=0000 value>>8=0000 Writing 00 00
 
 
if 04 31 ret=43 buff=00000001
C 
else 04 ret=104
260 
if 10 20 ret=102 buff=00000000
258 
else 42 ret=42
B 
if 00 00 ret=0 buff=00000000
0
 

In the above program we are simply writing a 12 bit value to disk and then reading 12 bits a time. We use a function out to write a 12 bit value to disk passed as a parameter and then the inp function to return a 12 bit value. The fgetc and fputc function are great for their job but they write 8 bits at a time.

We would like to write 12 bits at one time as our range of numbers are not 0 to 255 but 0 to 4095. If our range was 0 to 65535, then we could use fputc to write 2 bytes or 16 bits at a time. We could have written a 12 bit number as 16 bits but that would mean wasting 4 bits on disk each time. The problem with writing 12 bits at a time is that we cannot write 12 bits at a time, we have no choice but to write 8 bits at a time.

Lets take a specific example to explain. We want to write A and B to disk as 12 bits values. If we were to write them as chars we would write 41 42, as shorts 00 41 00 42 as big endian. If we were to write them as little endian, we would reverse the bytes as 41 00 and 42 00. If we were to use 12 bit values we would write A and B as 041 042. A 256 would be written as 100 and 258 as 102. 

The rule we follow will be very simple. The out function will first be called once and then the next time we call it will be called twice. This pattern will be constantly repeated. The first time we will write the top bits of the 12, the second time out of the 16 bits written, we will write the high 4 bits first and then the remaining complete 12 bits of the next int. Thus we need a way of knowing that we are writing 8 or 16 bits.

We use a variable times to tell us whether it is the odd or even time that we are calling the out function. At the end of the function we simply increase times by 1. To start with, its value is 0 and the mod of times with 2 is 0. When we call times the second time, its value is 1 and times mod 2 is 1.

Thus whenever the remainder is 0, we are calling the odd number of times and the if statement is true. When the mod is odd, we are calling it the even number of times. As explained before the first time we call it we must write the top eight bits to disk and store the remaining 4 bits somewhere.

Lets look at the actual code. The if statement is true the first time. We have to write a single byte to disk.  The only problem is that we have to write the top eight bytes so we right shift the bytes by 4 and then write. The lower four bits or nibble must be saved somewhere and we use the buffer variable for this purpose. But as we want to save only the first four bits we bitwise and all the other bits by 0 and the first four by 1.

Thus for the 0x43 or C we write 04 but store the 3 in buffer. In the same vein for a 258 or 102 we write 10  and put 2 for writing later. The next time we call the out function, we write two bytes. We first need to write the four bits in buffer which is a 1 and then as we are passing 100 we need to write the 1 or the high four bits. We create a variable dummy and take the first four bits off from the buff variable.

We then left shift it by 4 so that it becomes the high nibble. Remember these bits belong to the char whose high 8 bits we have just written. We then have to write in the first four bits of dummy the high 4 bits of the current byte to write. We therefore simply right shift value by 8 bringing the bytes 9 to 12  at position 1 to 4. This is how we create the first byte to write which is combo of the low 4 bytes of the previous word and the high 4 bits of the current word. 

Writing the second byte is easy as we have to simply take the top 8 bits by anding.  As an example the byte to be written now is 104. We need to write the 3 left over from buffer and the top four bits as 1. This gives us 31 and the low 8 bits to be written are 04.  The bytes written are 43 and 04.

Now lets do the reverse and read these bytes from disk. We use the variables times for the same reason as before, to differentiate the even and odd times the inp function gets called. We do a small reversal of roles. The first time we get called we read two bytes from disk i.e. 16 bits. We use 12 of these and return their values. The top 4 bits we keep in buffer to be used the second time function inp gets called.

Then we read only 8 bits an use the 4 leftover bits as the high bits for these 8 bits now read. Lets start with actual code. When times%2 is equal to zero, it is the odd or the first call. We have set times to 0 in main. We read the first two bytes in variables I and j respectively with values 04 and 31. We need to get at the values 043.

To do this we need to shift I or 04 4 bits to the left in the ret variable. We now need to extract the high 4 bits or 3 from the byte j. To do this we first bit wise and with f0 making the bottom nibble 0. This gives us the 3 but the problem is that it is in the wrong place. We therefore right shift it by 4 so that it becomes the low nibble from the high nibble. We then bit wise or this with ret giving us 12 bits.

As before we need to store the low nibble of j in buff we do by anding with 0f. The second time we get called we read the byte from disk in a variable i. We bit wise and with 0f the buff variable to get at the top nibble. This step is not really needed as we know that all of buf but the first nibble is already zero.

These 4 bits are actually the high 4 bits of the 12 bits and thus we move them left by 8 so that they occupy the last 4 of the 12 bits. We simply bit wise or with I to give us the low 8 bits.

 
#include <stdio.h>
unsigned int ret,cnt,buff;
FILE *fp;
inp()
{
printf("\n\nStart of while cnt=%d buff=%08x\n",cnt,buff);
while (cnt <= 24)
{
unsigned char c = fgetc(fp);
printf("%02x %d %08x ||| ",c,cnt,buff);
buff |= c << (24-cnt);
cnt += 8;
}
printf("\nEnd of while cnt=%d buff=%08x\n",cnt,buff);
ret=buff >> 20;
buff <<= 12;
cnt -= 12;
printf("End of function return=%d cnt=%d buff=%08x\n",ret,cnt,buff);
return ret;
}
out(unsigned int value)
{
buff = buff | value << (20-cnt);
cnt = cnt + 12;
printf("\nBefore while loop cnt=%d buff=%08x\n",cnt,buff);
while (cnt >= 8)
{
fputc(buff >> 24,fp);
printf("%02x ",buff >> 24);
buff = buff << 8;
cnt -= 8;
printf(" %d %08x..",cnt,buff);
}
printf("\nAfter while loop cnt=%d buff=%08x\n",cnt,buff);
}
main()
{
fp=fopen("vijay.txt","wb");
out(65);
out(256);
out(257);
out(66);
out(0);
out(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
printf("\n");
printf("%c ",inp());
printf("%d ",inp());
printf("%d ",inp());
printf("%c ",inp());
printf("%d\n",inp());
}
 
Before while loop cnt=0 buff=04300000
04  4 30000000..
After while loop cnt=4 buff=30000000
 
Before while loop cnt=4 buff=31050000
31  8 05000000..05  0 00000000..
After while loop cnt=0 buff=00000000
 
Before while loop cnt=0 buff=10200000
10  4 20000000..
After while loop cnt=4 buff=20000000
 
Before while loop cnt=4 buff=20420000
20  8 42000000..42  0 00000000..
After while loop cnt=0 buff=00000000
 
Before while loop cnt=0 buff=00000000
00  4 00000000..
After while loop cnt=4 buff=00000000
 
Before while loop cnt=4 buff=00000000
00  8 00000000..00  0 00000000..
After while loop cnt=0 buff=00000000
 
Start of while cnt=0 buff=00000000
04 0 00000000  04000000 ||||31 8 04000000  04310000 ||||05 16 04310000  04310500 ||||10 24 04310500  04310510 ||||
End of while cnt=32 buff=04310510
End of function return=67 cnt=20 buff=10510000
C 
 
Start of while cnt=20 buff=10510000
20 20 10510000  10510200 ||||
End of while cnt=28 buff=10510200
End of function return=261 cnt=16 buff=10200000
261 
 
Start of while cnt=16 buff=10200000
42 16 10200000  10204200 ||||00 24 10204200  10204200 ||||
End of while cnt=32 buff=10204200
End of function return=258 cnt=20 buff=04200000
258 
 
Start of while cnt=20 buff=04200000
00 20 04200000  04200000 ||||
End of while cnt=28 buff=04200000
End of function return=66 cnt=16 buff=00000000
B 
 
Start of while cnt=16 buff=00000000
00 16 00000000  00000000 ||||ff 24 00000000  000000ff ||||
End of while cnt=32 buff=000000ff
End of function return=0 cnt=20 buff=000ff000
0
 

Lets now understand the same example using a more generic method. The basic rule does not change at all. The function out gets called as before and we now have a while loop to write either 8 bits or 16 bits to disk. The cnt variable is used to control the number of times the while loop gets called. To start with its value is 0.

We then increase it by 12 before the while loop. The while loop checks whether it is greater than 8, which it is. We thus enter the loop once with value 12. In the loop we reduce cnt by 8 to makes its value 4. As it is less than 8, we quit out of the loop. Thus when its value is 12 at the start of the loop the loop gets called 1.

Now the next time we call the out function, the value of cnt at the start is 4. We add 12 to it to make its value 16. Now the while loop goes on twice. When we leave the while loop cnt is 0, so that the next time we call function out, the loop happens only once. Thus cnt inside the loop is 12 or 16 and 8. We do not use cnt for anything else other than making the loop go on once or twice.

We only use the cnt variable at the start of the function where its value is 0 or 4. Thus makes the shift of the bits 0 or 16 as we subtract from 20. The first time the value is 0 and we are shifting the contents of buf by 20, making the top 12 bits of buff as 04300000.

We then right shift buf by 24 making the last 8 bits 04 as the first 8 bits and writing this out to disk. In the same loop we left shift buf by 8 dropping the last 8 bits which we just wrote to disk. Buf now contains 3000000 which is the 3 that needs to be written to disk in the second round.

The second time out gets called, the top bits of buf are already used. We now left shift value into buf 16 times and not 20 times This is so that the 105 occupies the last 16 bits minus 4 which are already occupied. Thus buf now contains 31050000. As we are bit shifting value and not buf and then bit wise oring buff , the 3 does not fall out.

Now in the while loop we first write 31 the top high bits, then we remove the 31 and then again right 05 and then remove them. Thus the second time out gets called buf has nothing to carry. Thus as before we write 8 bits first and then the next 4 bits and the full 12 bits to make up 16 bits.

The inp function is a little more complex than it should be. The cnt variable is once again used to keep track of the while loop. The only problem is that the first time its gets called 4 times and then once and then twice for the same reason mentioned earlier. Lets look at the first time it gets called. We are reading one byte at a time and we pushing these bytes into buf. Thus the buf variable will now contain the first four bytes as byte 04310510.

We simply return the top 12 bits by right shifting by 20 the value of buf into ret. We also then left shift buf by 12 to drop these 12 bits. When we come back to this function the second time, buf now contains 10510 000. Of these bytes only the 10510 are of use to use.

We now read the next byte from disk which is 20 and we need to add this to buf to make buf 1051020 0. The value of cnt is 20 so that we left shift 24-20 only 4 bits. We return 105 and make buf 1020 0000. Then next time buf is called we read two bytes from disk.

The first time buf becomes 10204200 as the first byte is 42. The second byte is 00 and thus buff remains what it is. Thus we use cnt which is 16 and 24 in such a way so that the first byte from disk is left shifted by 8 and the second by 0. Thus the even times we call inp, we add the two bytes in the first 16 bits. This is how we add the bytes to the buff variable and take them off 12 bits at a time.

It does the same thing like the earlier program but is a lot more complex.

Back to the main page