LZW Compression
|
#include <stdio.h>
char *currchararray,decode_stack[4000],*string,curr;
int next_code,prev,index,i,*previntarray,*codeintarray,new,old;
unsigned int output_bit_count,output_bit_buffer,return_value,input_bit_count,input_bit_buffer;
FILE *fp;
char *decode_string(unsigned char *buffer,unsigned int code)
{
while (code > 255)
{
*buffer++ = currchararray[code];
code=previntarray[code];
}
*buffer=code;
return(buffer);
}
input_code()
{
while (input_bit_count <= 24)
{
input_bit_buffer |= getc(fp) << (24-input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-12);
input_bit_buffer <<= 12;
input_bit_count -= 12;
return(return_value);
}
output_code(unsigned int code)
{
printf("function output_code code=%x %c output_bit_count=%d output_bit_buffer=%x left shift bits=%d code shifted=%08x ",code,code,output_bit_count,output_bit_buffer,32-12-output_bit_count,code << (32-12-output_bit_count));
output_bit_buffer = output_bit_buffer | code << (32-12-output_bit_count);
output_bit_count = output_bit_count + 12;
printf("After output_bit_count=%d output_bit_buffer=%x\n",output_bit_count,output_bit_buffer);
while (output_bit_count >= 8)
{
printf("In while output_bit_count=%d output_bit_buffer=%x output_bit_buffer >> 24=%x ",output_bit_count,output_bit_buffer,output_bit_buffer >> 24);
putc(output_bit_buffer >> 24,fp);
output_bit_buffer = output_bit_buffer << 8;
printf("at end output_bit_buffer=%x\n",output_bit_buffer);
output_bit_count -= 8;
}
}
main()
{
char *p="ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDABCD vijay mukhi is shit and so is his wife";
codeintarray=(int *)malloc(5021*4);
previntarray=(int *)malloc(5021*4);
currchararray=(char *)malloc(5021);
fp=fopen("vijay.txt","wb");
next_code=256;
for (i=0;i<5021;i++)
codeintarray[i]= -1;
prev=*p++;
while ((curr=*p++) != 0)
{
printf("Start of while prev=%c %x curr=%c %x curr << 4=%d ",prev,prev,curr,curr,curr << 4);
index = (curr << 4) ^ prev;
printf("index=%d \n",index);
while (1)
{
if (codeintarray[index] == -1)
{
printf("1st break index=%d\n",index);
break;
}
if (previntarray[index]==prev && currchararray[index]==curr)
{
printf("2st break index=%d previntarray[index]=%x currchararray[index]=%x\n",index,previntarray[index],currchararray[index]);
break;
}
printf("In while old index=%d ",index);
index = (5021 - index) - index;
printf("New index=%d\n",index);
if (index < 0)
{
printf("3rd if index=%d\n",index);
index += 5021;
}
}
if (codeintarray[index] == -1)
{
printf("Last if index=%d next_code=%d prev=%c %x curr=%c %x\n",index,next_code,prev,prev,curr,curr);
codeintarray[index]=next_code++;
previntarray[index]=prev;
currchararray[index]=curr;
output_code(prev);
prev=curr;
}
else
{
prev=codeintarray[index];
printf("Last Else index=%d prev=%d\n",index,prev);
}
printf("\n");
}
output_code(prev);
output_code(4095);
output_code(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
next_code=256;
old=input_code();
curr=old;
printf("%c",old);
while ((new=input_code()) != (4095))
{
if (new>=next_code)
{
*decode_stack=curr;
string=decode_string(decode_stack+1,old);
}
else
string=decode_string(decode_stack,new);
curr=*string;
while (string >= decode_stack)
printf("%c",*string--);
previntarray[next_code]=old;
currchararray[next_code]=curr;
next_code++;
old=new;
}
}
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121
1st break index=1121
Last if index=1121 next_code=256 prev=65 41 curr=B 42
function output_code code=41 A output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=04100000 After output_bit_count=12 output_bit_buffer=4100000
In while output_bit_count=12 output_bit_buffer=4100000 output_bit_buffer >> 24=4 at end output_bit_buffer=10000000
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138
1st break index=1138
Last if index=1138 next_code=257 prev=66 42 curr=C 43
function output_code code=42 B output_bit_count=4 output_bit_buffer=10000000 left shift bits=16 code shifted=00420000 After output_bit_count=16 output_bit_buffer=10420000
In while output_bit_count=16 output_bit_buffer=10420000 output_bit_buffer >> 24=10 at end output_bit_buffer=42000000
In while output_bit_count=8 output_bit_buffer=42000000 output_bit_buffer >> 24=42 at end output_bit_buffer=0
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027
1st break index=1027
Last if index=1027 next_code=258 prev=67 43 curr=D 44
function output_code code=43 C output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=04300000 After output_bit_count=12 output_bit_buffer=4300000
In while output_bit_count=12 output_bit_buffer=4300000 output_bit_buffer >> 24=4 at end output_bit_buffer=30000000
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108
1st break index=1108
Last if index=1108 next_code=259 prev=68 44 curr=A 41
function output_code code=44 D output_bit_count=4 output_bit_buffer=30000000 left shift bits=16 code shifted=00440000 After output_bit_count=16 output_bit_buffer=30440000
In while output_bit_count=16 output_bit_buffer=30440000 output_bit_buffer >> 24=30 at end output_bit_buffer=44000000
In while output_bit_count=8 output_bit_buffer=44000000 output_bit_buffer >> 24=44 at end output_bit_buffer=0
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
Start of while prev= 256 curr=C 43 curr << 4=1072 index=1328
1st break index=1328
Last if index=1328 next_code=260 prev=256 100 curr=C 43
function output_code code=100 output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10000000 After output_bit_count=12 output_bit_buffer=10000000
In while output_bit_count=12 output_bit_buffer=10000000 output_bit_buffer >> 24=10 at end output_bit_buffer=0
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027
2st break index=1027 previntarray[index]=43 currchararray[index]=44
Last Else index=1027 prev=258
Start of while prev=[1] 258 curr=A 41 curr << 4=1040 index=1298
1st break index=1298
Last if index=1298 next_code=261 prev=258 102 curr=A 41
function output_code code=102 [1] output_bit_count=4 output_bit_buffer=0 left shift bits=16 code shifted=01020000 After output_bit_count=16 output_bit_buffer=1020000
In while output_bit_count=16 output_bit_buffer=1020000 output_bit_buffer >> 24=1 at end output_bit_buffer=2000000
In while output_bit_count=8 output_bit_buffer=2000000 output_bit_buffer >> 24=2 at end output_bit_buffer=0
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
Start of while prev= 256 curr=C 43 curr << 4=1072 index=1328
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
Start of while prev=
260 curr=D 44 curr << 4=1088 index=1348
1st break index=1348
Last if index=1348 next_code=262 prev=260 104 curr=D 44
function output_code code=104
output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10400000 After output_bit_count=12 output_bit_buffer=10400000
In while output_bit_count=12 output_bit_buffer=10400000 output_bit_buffer >> 24=10 at end output_bit_buffer=40000000
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108
2st break index=1108 previntarray[index]=44 currchararray[index]=41
Last Else index=1108 prev=259
Start of while prev=
259 curr=B 42 curr << 4=1056 index=1315
1st break index=1315
Last if index=1315 next_code=263 prev=259 103 curr=B 42
function output_code code=103
output_bit_count=4 output_bit_buffer=40000000 left shift bits=16 code shifted=01030000 After output_bit_count=16 output_bit_buffer=41030000
In while output_bit_count=16 output_bit_buffer=41030000 output_bit_buffer >> 24=41 at end output_bit_buffer=3000000
In while output_bit_count=8 output_bit_buffer=3000000 output_bit_buffer >> 24=3 at end output_bit_buffer=0
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138
2st break index=1138 previntarray[index]=42 currchararray[index]=43
Last Else index=1138 prev=257
Start of while prev= 257 curr=D 44 curr << 4=1088 index=1345
1st break index=1345
Last if index=1345 next_code=264 prev=257 101 curr=D 44
function output_code code=101 output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10100000 After output_bit_count=12 output_bit_buffer=10100000
In while output_bit_count=12 output_bit_buffer=10100000 output_bit_buffer >> 24=10 at end output_bit_buffer=10000000
Start of while prev=D 68 curr=A 41 curr << 4=1040 index=1108
2st break index=1108 previntarray[index]=44 currchararray[index]=41
Last Else index=1108 prev=259
Start of while prev=
259 curr=B 42 curr << 4=1056 index=1315
2st break index=1315 previntarray[index]=103 currchararray[index]=42
Last Else index=1315 prev=263
Start of while prev= 263 curr=C 43 curr << 4=1072 index=1335
1st break index=1335
Last if index=1335 next_code=265 prev=263 107 curr=C 43
function output_code code=107 output_bit_count=4 output_bit_buffer=10000000 left shift bits=16 code shifted=01070000 After output_bit_count=16 output_bit_buffer=11070000
In while output_bit_count=16 output_bit_buffer=11070000 output_bit_buffer >> 24=11 at end output_bit_buffer=7000000
In while output_bit_count=8 output_bit_buffer=7000000 output_bit_buffer >> 24=7 at end output_bit_buffer=0
Start of while prev=C 67 curr=D 44 curr << 4=1088 index=1027
2st break index=1027 previntarray[index]=43 currchararray[index]=44
Last Else index=1027 prev=258
Start of while prev=[1] 258 curr=A 41 curr << 4=1040 index=1298
2st break index=1298 previntarray[index]=102 currchararray[index]=41
Last Else index=1298 prev=261
Start of while prev= 261 curr=B 42 curr << 4=1056 index=1317
1st break index=1317
Last if index=1317 next_code=266 prev=261 105 curr=B 42
function output_code code=105 output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10500000 After output_bit_count=12 output_bit_buffer=10500000
In while output_bit_count=12 output_bit_buffer=10500000 output_bit_buffer >> 24=10 at end output_bit_buffer=50000000
Start of while prev=B 66 curr=C 43 curr << 4=1072 index=1138
2st break index=1138 previntarray[index]=42 currchararray[index]=43
Last Else index=1138 prev=257
Start of while prev= 257 curr=D 44 curr << 4=1088 index=1345
2st break index=1345 previntarray[index]=101 currchararray[index]=44
Last Else index=1345 prev=264
Start of while prev= 264 curr=A 41 curr << 4=1040 index=1304
1st break index=1304
Last if index=1304 next_code=267 prev=264 108 curr=A 41
function output_code code=108 output_bit_count=4 output_bit_buffer=50000000 left shift bits=16 code shifted=01080000 After output_bit_count=16 output_bit_buffer=51080000
In while output_bit_count=16 output_bit_buffer=51080000 output_bit_buffer >> 24=51 at end output_bit_buffer=8000000
In while output_bit_count=8 output_bit_buffer=8000000 output_bit_buffer >> 24=8 at end output_bit_buffer=0
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
Start of while prev= 256 curr=C 43 curr << 4=1072 index=1328
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
Start of while prev=
260 curr=D 44 curr << 4=1088 index=1348
2st break index=1348 previntarray[index]=104 currchararray[index]=44
Last Else index=1348 prev=262
Start of while prev= 262 curr=A 41 curr << 4=1040 index=1302
1st break index=1302
Last if index=1302 next_code=268 prev=262 106 curr=A 41
function output_code code=106 output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=10600000 After output_bit_count=12 output_bit_buffer=10600000
In while output_bit_count=12 output_bit_buffer=10600000 output_bit_buffer >> 24=10 at end output_bit_buffer=60000000
Start of while prev=A 65 curr=B 42 curr << 4=1056 index=1121
2st break index=1121 previntarray[index]=41 currchararray[index]=42
Last Else index=1121 prev=256
Start of while prev= 256 curr=C 43 curr << 4=1072 index=1328
2st break index=1328 previntarray[index]=100 currchararray[index]=43
Last Else index=1328 prev=260
Start of while prev=
260 curr=D 44 curr << 4=1088 index=1348
2st break index=1348 previntarray[index]=104 currchararray[index]=44
Last Else index=1348 prev=262
function output_code code=106 output_bit_count=4 output_bit_buffer=60000000 left shift bits=16 code shifted=01060000 After output_bit_count=16 output_bit_buffer=61060000
In while output_bit_count=16 output_bit_buffer=61060000 output_bit_buffer >> 24=61 at end output_bit_buffer=6000000
In while output_bit_count=8 output_bit_buffer=6000000 output_bit_buffer >> 24=6 at end output_bit_buffer=0
function output_code code=fff ÿ output_bit_count=0 output_bit_buffer=0 left shift bits=20 code shifted=fff00000 After output_bit_count=12 output_bit_buffer=fff00000
In while output_bit_count=12 output_bit_buffer=fff00000 output_bit_buffer >> 24=ff at end output_bit_buffer=f0000000
function output_code code=0 output_bit_count=4 output_bit_buffer=f0000000 left shift bits=16 code shifted=00000000 After output_bit_count=16 output_bit_buffer=f0000000
In while output_bit_count=16 output_bit_buffer=f0000000 output_bit_buffer >> 24=f0 at end output_bit_buffer=0
In while output_bit_count=8 output_bit_buffer=0 output_bit_buffer >> 24=0 at end output_bit_buffer=0
Function input_code start input_bit_count=0 input_bit_buffer=00000000
Function input_code While loop char c=04 input_bit_count=8 input_bit_buffer=04000000 24-input_bit_count=16
Function input_code While loop char c=10 input_bit_count=16 input_bit_buffer=04100000 24-input_bit_count=8
Function input_code While loop char c=42 input_bit_count=24 input_bit_buffer=04104200 24-input_bit_count=0
Function input_code While loop char c=04 input_bit_count=32 input_bit_buffer=04104204 24-input_bit_count=-8
Function input_code input_bit_buffer=04104204 input_bit_buffer >> 20=00000041 Function input_code end input_bit_buffer <<= 12=04204000 input_bit_count=20 return_value=65 A
Function input_code start input_bit_count=20 input_bit_buffer=04204000
Function input_code While loop char c=30 input_bit_count=28 input_bit_buffer=04204300 24-input_bit_count=-4
Function input_code input_bit_buffer=04204300 input_bit_buffer >> 20=00000042 Function input_code end input_bit_buffer <<= 12=04300000 input_bit_count=16 return_value=66 B
In main Start of while new=66 B next_code=256 if new=66 B
Function Decode_String start code=66 B
Function Decode_String end Code=66 B
if string=B
In main inner while *string=66 B
In main end of while old=65 A curr=66 B next_code=256 previntarray[next_code]=65 currchararray[next_code]=66
Function input_code start input_bit_count=16 input_bit_buffer=04300000
Function input_code While loop char c=44 input_bit_count=24 input_bit_buffer=04304400 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=04304410 24-input_bit_count=-8
Function input_code input_bit_buffer=04304410 input_bit_buffer >> 20=00000043 Function input_code end input_bit_buffer <<= 12=04410000 input_bit_count=20 return_value=67 C
In main Start of while new=67 C next_code=257 if new=67 C
Function Decode_String start code=67 C
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main end of while old=66 B curr=67 C next_code=257 previntarray[next_code]=66 currchararray[next_code]=67
Function input_code start input_bit_count=20 input_bit_buffer=04410000
Function input_code While loop char c=01 input_bit_count=28 input_bit_buffer=04410010 24-input_bit_count=-4
Function input_code input_bit_buffer=04410010 input_bit_buffer >> 20=00000044 Function input_code end input_bit_buffer <<= 12=10010000 input_bit_count=16 return_value=68 D
In main Start of while new=68 D next_code=258 if new=68 D
Function Decode_String start code=68 D
Function Decode_String end Code=68 D
if string=D
In main inner while *string=68 D
In main end of while old=67 C curr=68 D next_code=258 previntarray[next_code]=67 currchararray[next_code]=68
Function input_code start input_bit_count=16 input_bit_buffer=10010000
Function input_code While loop char c=02 input_bit_count=24 input_bit_buffer=10010200 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10010210 24-input_bit_count=-8
Function input_code input_bit_buffer=10010210 input_bit_buffer >> 20=00000100 Function input_code end input_bit_buffer <<= 12=10210000 input_bit_count=20 return_value=256
In main Start of while new=256 next_code=259 if new=256
Function Decode_String start code=256
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main end of while old=68 D curr=65 A next_code=259 previntarray[next_code]=68 currchararray[next_code]=65
Function input_code start input_bit_count=20 input_bit_buffer=10210000
Function input_code While loop char c=41 input_bit_count=28 input_bit_buffer=10210410 24-input_bit_count=-4
Function input_code input_bit_buffer=10210410 input_bit_buffer >> 20=00000102 Function input_code end input_bit_buffer <<= 12=10410000 input_bit_count=16 return_value=258 [1]
In main Start of while new=258 [1] next_code=260 if new=258 [1]
Function Decode_String start code=258 [1]
Function Decode_String while currchararray[code]=68 D previntarray[code]=67 C code=258
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=256 curr=67 C next_code=260 previntarray[next_code]=256 currchararray[next_code]=67
Function input_code start input_bit_count=16 input_bit_buffer=10410000
Function input_code While loop char c=03 input_bit_count=24 input_bit_buffer=10410300 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10410310 24-input_bit_count=-8
Function input_code input_bit_buffer=10410310 input_bit_buffer >> 20=00000104 Function input_code end input_bit_buffer <<= 12=10310000 input_bit_count=20 return_value=260
In main Start of while new=260
next_code=261 if new=260
Function Decode_String start code=260
Function Decode_String while currchararray[code]=67 C previntarray[code]=256 code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main end of while old=258 [1] curr=65 A next_code=261 previntarray[next_code]=258 currchararray[next_code]=65
Function input_code start input_bit_count=20 input_bit_buffer=10310000
Function input_code While loop char c=11 input_bit_count=28 input_bit_buffer=10310110 24-input_bit_count=-4
Function input_code input_bit_buffer=10310110 input_bit_buffer >> 20=00000103 Function input_code end input_bit_buffer <<= 12=10110000 input_bit_count=16 return_value=259
In main Start of while new=259
next_code=262 if new=259
Function Decode_String start code=259
Function Decode_String while currchararray[code]=65 A previntarray[code]=68 D code=259
Function Decode_String end Code=68 D
if string=DA
In main inner while *string=68 D
In main inner while *string=65 A
In main end of while old=260
curr=68 D next_code=262 previntarray[next_code]=260 currchararray[next_code]=68
Function input_code start input_bit_count=16 input_bit_buffer=10110000
Function input_code While loop char c=07 input_bit_count=24 input_bit_buffer=10110700 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10110710 24-input_bit_count=-8
Function input_code input_bit_buffer=10110710 input_bit_buffer >> 20=00000101 Function input_code end input_bit_buffer <<= 12=10710000 input_bit_count=20 return_value=257
In main Start of while new=257 next_code=263 if new=257
Function Decode_String start code=257
Function Decode_String while currchararray[code]=67 C previntarray[code]=66 B code=257
Function Decode_String end Code=66 B
if string=BA
In main inner while *string=66 B
In main inner while *string=67 C
In main end of while old=259
curr=66 B next_code=263 previntarray[next_code]=259 currchararray[next_code]=66
Function input_code start input_bit_count=20 input_bit_buffer=10710000
Function input_code While loop char c=51 input_bit_count=28 input_bit_buffer=10710510 24-input_bit_count=-4
Function input_code input_bit_buffer=10710510 input_bit_buffer >> 20=00000107 Function input_code end input_bit_buffer <<= 12=10510000 input_bit_count=16 return_value=263
In main Start of while new=263 next_code=264 if new=263
Function Decode_String start code=263
Function Decode_String while currchararray[code]=66 B previntarray[code]=259
code=263
Function Decode_String while currchararray[code]=65 A previntarray[code]=68 D code=259
Function Decode_String end Code=68 D
if string=D
In main inner while *string=68 D
In main inner while *string=65 A
In main inner while *string=66 B
In main end of while old=257 curr=68 D next_code=264 previntarray[next_code]=257 currchararray[next_code]=68
Function input_code start input_bit_count=16 input_bit_buffer=10510000
Function input_code While loop char c=08 input_bit_count=24 input_bit_buffer=10510800 24-input_bit_count=0
Function input_code While loop char c=10 input_bit_count=32 input_bit_buffer=10510810 24-input_bit_count=-8
Function input_code input_bit_buffer=10510810 input_bit_buffer >> 20=00000105 Function input_code end input_bit_buffer <<= 12=10810000 input_bit_count=20 return_value=261
In main Start of while new=261 next_code=265 if new=261
Function Decode_String start code=261
Function Decode_String while currchararray[code]=65 A previntarray[code]=258 [1] code=261
Function Decode_String while currchararray[code]=68 D previntarray[code]=67 C code=258
Function Decode_String end Code=67 C
if string=C
In main inner while *string=67 C
In main inner while *string=68 D
In main inner while *string=65 A
In main end of while old=263 curr=67 C next_code=265 previntarray[next_code]=263 currchararray[next_code]=67
Function input_code start input_bit_count=20 input_bit_buffer=10810000
Function input_code While loop char c=61 input_bit_count=28 input_bit_buffer=10810610 24-input_bit_count=-4
Function input_code input_bit_buffer=10810610 input_bit_buffer >> 20=00000108 Function input_code end input_bit_buffer <<= 12=10610000 input_bit_count=16 return_value=264
In main Start of while new=264 next_code=266 if new=264
Function Decode_String start code=264
Function Decode_String while currchararray[code]=68 D previntarray[code]=257 code=264
Function Decode_String while currchararray[code]=67 C previntarray[code]=66 B code=257
Function Decode_String end Code=66 B
if string=B
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=261 curr=66 B next_code=266 previntarray[next_code]=261 currchararray[next_code]=66
Function input_code start input_bit_count=16 input_bit_buffer=10610000
Function input_code While loop char c=06 input_bit_count=24 input_bit_buffer=10610600 24-input_bit_count=0
Function input_code While loop char c=ff input_bit_count=32 input_bit_buffer=106106ff 24-input_bit_count=-8
Function input_code input_bit_buffer=106106ff input_bit_buffer >> 20=00000106 Function input_code end input_bit_buffer <<= 12=106ff000 input_bit_count=20 return_value=262
In main Start of while new=262 next_code=267 if new=262
Function Decode_String start code=262
Function Decode_String while currchararray[code]=68 D previntarray[code]=260
code=262
Function Decode_String while currchararray[code]=67 C previntarray[code]=256 code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=264 curr=65 A next_code=267 previntarray[next_code]=264 currchararray[next_code]=65
Function input_code start input_bit_count=20 input_bit_buffer=106ff000
Function input_code While loop char c=f0 input_bit_count=28 input_bit_buffer=106fff00 24-input_bit_count=-4
Function input_code input_bit_buffer=106fff00 input_bit_buffer >> 20=00000106 Function input_code end input_bit_buffer <<= 12=fff00000 input_bit_count=16 return_value=262
In main Start of while new=262 next_code=268 if new=262
Function Decode_String start code=262
Function Decode_String while currchararray[code]=68 D previntarray[code]=260
code=262
Function Decode_String while currchararray[code]=67 C previntarray[code]=256 code=260
Function Decode_String while currchararray[code]=66 B previntarray[code]=65 A code=256
Function Decode_String end Code=65 A
if string=A
In main inner while *string=65 A
In main inner while *string=66 B
In main inner while *string=67 C
In main inner while *string=68 D
In main end of while old=262 curr=65 A next_code=268 previntarray[next_code]=262 currchararray[next_code]=65
Function input_code start input_bit_count=16 input_bit_buffer=fff00000
Function input_code While loop char c=00 input_bit_count=24 input_bit_buffer=fff00000 24-input_bit_count=0
Function input_code While loop char c=ff input_bit_count=32 input_bit_buffer=fff000ff 24-input_bit_count=-8
Function input_code input_bit_buffer=fff000ff input_bit_buffer >> 20=00000fff Function input_code end input_bit_buffer <<= 12=000ff000 input_bit_count=20 return_value=4095 ÿ
The above program uses the LZW compression to compress
data for us. The LZW algorithm is the compression method used for compressing
gif and tiff files. Along with the Huffman algorithm it is the most widely used
compression method used today. Lets start
with the function main.
We store the string to be compressed in the character
pointer p. On purpose we have started our string with five ABC’s. The reason
being that the LZW compression is very efficient when it finds the same data
repeated over and over again. We create two arrays of int’s codeintarray and
previntarray of size 20084 bytes or space for 5021 ints. Why a size of 5021
bytes is used will be explained later.
Then we also create a char array currchararray of the
same size 5021. The string in the pointer p will be compressed and stored in
the file vijay.txt and by using the w mode, so that this file is created from
scratch each time. The variable next_code is set to 256 as a char can hold
values from 0 to 255. The global array codeintarray which is initialized by
default to 0. We now set each member of this array to –1 using a loop.
Thus if any codeintarray member is –1, it only means that
we have no written anything into that member. The –1 is used as we will never
have this value as a valid value of any member of the codeintarray. We also set the prev member to the first
char of our string A. We then increase p by 1 to point to the second character
B. We now enter a while loop where we set curr to the second byte h and
increase p by 1.
Some time later we will reach the end of the string or
zero and thus we will quit out of the outer while loop. At the end of the loop
we set prev to either curr or the value stored in the codeintarray using the
variable index as the array offset. Thus the data type of curr is a char but
the value of prev can be a int.
Thus at times prev is what the value of curr was and at
times it is a value from the codeintarray.
We then calculate a value for the variable index. We first
left shift curr by 4 loosing the 4 high bits of the byte. Thus the letter B or
0x41 now becomes 1056. We then xor this value with prev getting a value of
index or 1121. This variable index will be used as an offset into the three
arrays we have created. We now enter a infinite while loop.
The if statement that we encounter is true. This is
because we have not written into the array codeintarray at all. Once again the
if is true and not the else. Here we print out the values of next_code and prev
and curr which have not changed. The codeintarray at the offset index or 1121
now contains 256, the value of next_code which now is increased by 1. Thus the
next we come across a A and B, the value of codeintarray at offset index will
be 256 and not –1.
Thus for the same combination or curr and prev which we
use to give us index, the first if in
the outer while will not be true again. This is because each time the index
offset for the codeintarray contains
–1, we write the value of next_code there as its value and increase it by 1.
Thus next_code if its value is 300, it means that we have written 45 values to
the codeintarray at different places using the index variable as offset.
Thus imagine the codeintarray as comprised of all –1 except the offset at location 1121
which contains the value 256. Thus the codeintarray contains a series of values
from 256 upwards.
We then go to the corresponding previntarray and
currchararray at the offset index and place the values of prev and curr
respectively which is A and B respectively. Remember variable curr is always a
char but prev can be a char or a int. Also the three arrays are symmetrical in
the sense that the same index variable is used as an offset into them to write
there . The currchararray always contains the current character.
We then call a function output_code with the value of
prev that is the first character A. Lets see what this function now does.
We start as always by printing out the values of prev or
code that has a value of A and the two variables output_bit_count and
output_bit_buffer which is 0. These are global variables and hence there values
the next time will not be zero as we are not initializing them in the function
itself. The earlier values will be remembered. We then use the output_bit_count
variable to calculate the number of bits we have to left shift the value of
code or the character A.
To start with the number of bits is 20. We then take the
current value of output_bit_buffer which is 0 and bit wise or with the value of
prev or code left shifted by 20 bits. Thus the value of code is 0x41 and after
left shifting it by 20 bits we get 04100000. Thus the value of
output_bit_buffer and the code shifted is the same as the output_bit_buffer
variable value is 0.
We next add the output_bit_count variable by 12 and enter
a while loop until the value is greater or equal to 8. In this loop we reduce
the value of the variable output_bit_count
by 8 Thus if the value is 12 as
it is right now, we will enter the loop only once, but the next time we enter
the function output_code, the value of the variable output_bit_count at the
start of function will be 4, we are increasing it by 12 and thus it becomes 16.
The while loop will happen twice the next time it is
called, but at the end of the function its value will be 0 and hence the next
time the function output_code will be called the value is 0.
We then print out the value of output_bit_buffer right
shifted by 24 bits which is 4. The 1 gets thrown off. We then use the putc
function to write this value of disk. Thus the first byte written on disk is
04, that is only the high nibble of the first character A.
Now the 1 that got thrown off must be saved and therefore
we throw off the 0 and 4 by left shifting by 8. The value in output_bit_buffer
is thus 10000000 which will be used when we call the output_code function
later.
The value in output_bit_buffer is thus 10000000 which
will be used when we call the output_code function later.
We now come back to the if statement where we set prev to
curr as we change the value of curr in the while loop. Thus prev is B and curr
is C which gives index a value of 1138.
As this is the first time a index of 1138 is generated,
the first if is once again true as codeintarray[1138] is –1. The value of next_code is 257 and we place
this value ay codeintarray[1138]. We also store B and C in the arrays
previntarray and currchararray.
We now call the function output_code with the value of
prev as B. The output_bit_count variable is 4 from the previous call along with
output_bit_buffer which is 10000000. The bits to be left shifted now reduce
from 20 the last time to 16. Thus the code variable left shifted by 16 gives us
00420000 but as we bit wise or with the previous value of output_bit_buffer
which is 10000000 the value finally is 10420000.
The variable output_bit_count is 16 as we add 4 the
previous value by 12. The while loop will now go on twice. The first time we
right shift by 24 so we are throwing away all but the top eight bits which
moves them to the start of the int. We are thus writing 10 to disk. We then
reduce output_bit_count by 8 and more importantly we left shift by 8 bits
dropping the top 8 bits 10 and moving the 42 to as the top bits.
Thus at the start of the while at the second iteration we
get output_bit_count=8 and output_bit_buffer=42000000. This allows us to write
42 to disk but at the end of the loop the two variables are 0.
Thus we write two bytes on disk 10 and 42 on disk and we
have the following bytes 04 10 42. The 4 from the first byte and 1 from the
second will give us an A. The 0 from the second bytes is ignored and the 42
from the third byte is used in full. Thus the output_code when used the first
time writes only four useful bits out of 8 on disk, the next it writes 12
useful bits out of 16. From now on this pattern repeats itself.
Now we move prev to C and curr is now A. The index is a
new value 1107. once again the first if of the inner while will be true. We
will also be in the if after the inner while and the value of next_code will be
258 and we set the codeintarray[1107] to 258.
In the function output_code code is C, both variables are
0 and the left shift bits is once again 0. The code to be shifted is 04300000
and thus we write 4 on disk but the buffer gets the value left out 3 and thus
becomes 30000000. In the function output_code if we do not bit wise or the
buffer variable we will loose out the lower nibble of the byte for the odd call
of the output_code function.
Now comes the real meat of the LZW algorithm. So far we
have used 3 bytes to store AB which is two bytes, not a very good example of
compression. We now set prev to A and curr to B which is a pattern that has
occurred before. This gives us a familiar pattern of AB and thus the good old
index value of 1121. Thus in the inner while, the first if statement that was
always true, is not so.
The second if is now true as the previntarray has a value
of A or prev and currchararray has a value of curr or B. We quit out of the
inner while and now the if statement is false but the else is true. Here we
have no output code function at all. Remember it is this function that writes
to the file.
The fewer times we call this function the better. We
simply set the value of prev to 256 which is what the codeintarray contains at
index value 1121. This is the value we wrote when we first came across a index
of 1121. This is why prev is a int as at may contain numbers from 256
onwards. Thus prev either contains the
value of curr or next_code which is what we store in the codeintarray.
We come back to start of the outer while loop with prev
as 256 and curr as C. The 256 once again represents AB. The index value will be
unique at 1328. The first if statement in the while loop will be true and the
first if statement outside the loop will also be true.
The codeintarray[1328] will contain 259 and the
previntarray[1328] will have 256 and the currchararray[1328] will also have C. We now call the
output_code function with a value of prev or 256. Lets us see how many bytes it
will output for the abc combo.
The leftovers from the previous call are
output_bit_count=4 and output_bit_buffer=30000000 for the C. After shifting by
16 the buffer now contains 31000000. We enter the while to write 31 and 00.
This is what we finally call compression as the ABC was written as two bytes
only.
Lets us now take a larger string ABCD and repeat it a
zillion times as.
char
*p="ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCD";
Lets us now understand for a last time how a real life
compression algorithm works. We have repeated the string ABCD 10 times and the
file vijay.txt has a size of 27 bytes and not 40 bytes. We now add one more
ABCD to the string p.
The size of the file only increases by 1 byte to 28
bytes. If we have a single ABCD as the string the size of the file vijay.txt is
a whooping 9 bytes a minus 200+ compression. Adding one more ABCD increases the
file size to 12 bytes a increase of 3 bytes. One more ABCD increases the size
to 15 bytes again a increase of 3 bytes.
The size of the string is 12 bytes. One more ABCD brings
our string to 16 bytes, and the size of vijay.txt to 18 bytes or a increase of
3 bytes again. Thus the last three increased the size by 3 bytes and the string
increased by 4 bytes. A little compression.
Now we add one more ABCD to bring our string size to 20
bytes and the size of vijay.txt increases by only one byte to 19 bytes. We add
one more ABCD and the size increase is by 2 bytes to 21 bytes and the string is
20 bytes. One more ABCD and the increase is 1 byte. One more ABCD and the
increase is 2 bytes. Lets us now understand how this magic happens.
We start at the while where prev is 41 or A and curr is
42 or B. The index value is 1121. As this is the first time we quit out of the
first if as codeintarray[index] is –1.
We fill up the three arrays and increase next_code by 1. We write 04 on disk
and store the 01 in the buffer variable. For the B and C combo, we write 10 and
for the A and 42 for the B. For the C D combo we write 04 and store 3 in the
buffer variable. For the D A combo we write 30 for the C and 44 for the D.
Now we come back to A B combo where the index is 1121 and
thus the second if is true and the last else. Here we do not try to write
anything to disk but simply set prev to 256, the value at the codeintarray
offset of index. Remember 256 means the first combo, 257 the second. Now that prev is 100 and curr 43 we get a
new index value of 1323. This now writes 10 to disk only. The next combo is C D
and this index value has occurred before.
Lets now explain everything in a slightly different way
by drawing a table.
Prev |
Curr |
Index |
Next_Code |
codeintarray |
previntarray |
currchararray |
written |
buffer |
A |
B |
1121 |
256 |
256 |
A |
B |
O4 |
10 |
B |
C |
1138 |
257 |
B |
C |
10 42 |
00 |
|
C |
D |
1027 |
258 |
258 |
C |
D |
04 |
30 |
D |
A |
1108 |
259 |
259 |
D |
A |
30 44 |
00 |
A |
B |
|
|
|
|
|
|
|
256 |
C |
1328 |
260 |
260 |
256 |
C |
10 |
0 |
C |
D |
|
|
|
|
|
|
|
258 |
A |
1298 |
261 |
261 |
258 |
A |
01 02 |
|
A |
B |
|
|
|
|
|
|
|
256 |
C |
|
|
|
|
|
|
|
260 |
D |
1348 |
262 |
262 |
260 |
D |
10 |
40 |
D |
A |
|
|
|
|
|
|
|
259 |
B |
1315 |
263 |
263 |
259 |
B |
41 03 |
0 |
B |
C |
|
|
|
|
|
|
|
257 |
D |
1345 |
264 |
264 |
257 |
D |
10 |
10 |
D |
A |
|
|
|
|
|
|
|
259 |
B |
|
|
|
|
|
|
|
263 |
C |
1335 |
265 |
265 |
263 |
C |
11 07 |
0 |
C |
D |
|
|
|
|
|
|
|
258 |
A |
|
|
|
|
|
|
|
261 |
B |
1317 |
266 |
266 |
261 |
B |
10 |
50 |
B |
C |
|
|
|
|
|
|
|
256 |
D |
|
|
|
|
|
|
|
264 |
A |
1304 |
267 |
267 |
264 |
A |
51 08 |
0 |
A |
B |
|
|
|
|
|
|
|
256 |
B |
|
|
|
|
|
|
|
259 |
D |
|
|
|
|
|
|
|
262 |
A |
1302 |
268 |
268 |
262 |
A |
10 |
60 |
A |
B |
|
|
|
|
|
|
|
256 |
C |
|
|
|
|
|
|
|
260 |
D |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Last Bytes 61 06 00
Ff f0 00
Lets now understand the big table that we have painstakingly
drawn out for you. When prev and curr is A and B, we are writing 256 to the
codeintarray. This can only mean that the number 256 stands for the patter AB.
In the same vein 257 stands for BC, 258 for CD and 259 for DA. As the value of
index cannot exceed 4095 which is 2 raised to 12, we are writing a 12 bit
number each time to disk.
The first time we are writing the high eight bits of the
number and the second time we are writing the low 4 bits of the previous value
and the 12 bits of the current. We save the 4 bits of the previous value in the
buffer variable. Thus when we write a A or 0x41, we write 04 in the first round
and 1 in the second call to the
function. Then we write the B as 042.
This is why a AB gets written as 3 bytes 04 10 42. When
we come across a pattern AB that is already there, we do not write anything to
disk. We simply in the else find out the value in codeintarray that corresponds
to the index offset. In this case it is 256 and in other cases a number larger
than 256 or the value of the variable next_code. Then we repeat the while with
a value of prev as 256 and curr as C.
As this is a new index we write the value in codeintarray
as 260 but the previntarray now contains 256. Thus as 256 stands for AB and
therefore 260 C stands for the pattern ABC. We write 256 as 100 on disk. The
pattern CD also exists and therefore we now get prev as 258 and curr as A. This
is a new pattern given a next_code of 261. The previntarray has a value of 258
which in turn is CD and curr is A. Thus 261 is CDA.
Now comes further compression much later on. DA is
already there, 259 B is also there and 263 C is new. Codeintarray is 265 and
previntarray is 263 and curr is C. If you look at 263 it is 259 B, but 259 is
DA. Thus 263 is DAB and now 265 is DABC. Thus as the patters move on we find
that a single number represents larger and larger values.
The last value of prev is 106. The value of buffer is
61060000 and therefore we write 61 and 06. We then write a 4095 which is the
largest 12 bit value we write, which is 0xfff. We write out the FF first and
when writing out the last 0, we write f and then 000. This completes the
compression, now lets start with the decompression.
We close the file vijay.txt using fclose and then again
open it for reading. We first next_code at 256 as we assume the individual
chars take up the first 255 indexes into the array. Earlier we used the
output_code for writing the file, Now we use the function input_code. Where we
used prev and curr as variables, now we use old and new. We set old to the
first value returned by the input_code function and in the while later we make
old the value of new.
The function input_code job is to read the 12 bits
written to disk and convert it into a int. This function therefore uses the
same technique that we used earlier to
write 12 bits to disk. Had we simply written 16 bits to disk, our two functions
input_code and output_code would be much simpler.
The first time both variables input_bit_count and
input_bit_buffer will be zero. The while loop quits out until the count
variable is less than or equal to 24. We increase its value by 8 each time in
the loop. As then value starts from 0, the first time we call input_code, it
remains in the loop 4 times. When it quits out of the while loop its value is
32.
We then reduce it by 12 at the end of the loop so that
the next time input_code function is called its value is 20. It now enters the
loop only once and at value 28 it leaves the loop. Then we reduce it by 12 so
that its value is 16. The next time function input_code is called the while
loop is done twice and its value is 32. We then decrease it by 12 to get it
back to 20 and the pattern continues. It gets called once and then twice and so
on.
The first time we must repeat is the only special case
where it gets called four times. The count variable is important to us as we
use to the left shift the byte that we read from disk. The first time we shift
it 24,16,8 and 0 times. In future when the loop gets done once we left shift it
4 times and when the while gets called twice we left shift it 8 and 0 times.
Remember we are subtracting the count variable from 24 so
when we say that we are left shifting 8, we are subtracting 24 from count whose
value now is 16. We also are like before bitwise oring with what is already
there in the buffer variable. Our first four bytes on disk are 04104204. Thus
the first iteration of the for loop buffer getc(fp) returns 04, this gets left
shifter by 24 in buffer to give us 04000000.
We then pick up the 10 and left shift this by 16 to give
us 04100000. Then the third byte is 42 which is left shifted only 8 bits to
give buffer a value 04104200 and finally the last 04 gives buffer 04104204. We
are only interested in the top 12 bits as of now so if we right shift by 20 we
get 041 which is stored in the variable return value and is returned back.
We then drop the top 12 bits by left shifting to get
buffer as 04204000. In the next call of input_code, we will return 042 as the
answer and the buffer variable will contain 04300000 the top 12 bits being the
next value. This is how we keep picking up the top 12 bits and returning them.
Depending upon the number of times the while gets on,
when it gets called once we are reading only 8 out of the 12 bits and the next
time when it gets called twice, we are reading 4 lower bits of the earlier 12
bits and the entire 12 bits of the next number.
Thus every time we call input_code, we get back the same
prev that we wrote to disk. The above table will show us the values of prev
that now are returned back. The first word is unique and hence we immediately
write it and set curr to it which is a A.
We then entire a while loop where we set the variable new
to each new value from disk using the function input_code. The last value we
wrote was 4095 and the minute new becomes this value we quit out of the loop.
The other important point is that at the end when we set old to new, we also
increase the value of next_code by 1. Thus the number of times we increased
next_code in the compression is the same number of times our while loop will
loop.
We now check new with the value of next_code. Normally
new will always we less than next_code and thus the if statement will be true.
We call the function decode_string with the address of the array decode_stack
and new which is B. In this function we basically loop as long as the value of
code or new is greater than 255.
In our case the value of code is less than 255 so we
never enter the loop. We then add the code parameter to the array and also null
terminate it and return the address of this string or array. We simply print
out one by one the number of chars in the string called string.
For some time at least the length of the string will be 1.
We set the curr variable to be the first char of this string and in this case
the value of new and curr will be the same. We enter a while loop and display
the entire string till we reach the end of the string the null character. We
use next_code as the index to the previntarray and currchararray to hold the
values of old and curr.
Thus at the start old is 65 and curr is 66. These are the
same values it had earlier when we build the dictionary. We are building a
similar dictionary now. We will only understand the decompression when we start
with a value of new as 256 and the value of next_code is 259 so once again the
if is called. Here we call the function decode_string with the big array and
the value 256.
The while loop now is true as the value of code is
greater than 255. The currchararray [code] or
currchararray[256] contains B and previntarray[256] contains a A. We
first put B in the buffer array in the loop and then the char A at the end of
the loop. When we leave the loop we are at the A or the end of the loop. We
display the A and then the B and then quit out as we have moved to byte 3999 of
the array which will always contain –1.
Remember we are placing stuff backwards in the array and
when we move out of the function decode_string, we have to read the stuff
backwards. Also each time we are making sure that if the value at previntarray
is larger than 256, we stay in the loop and use the currchararray to tell us
what character it stands for. Remember old is like prev used earlier it can
either be 0 to 255 or a number larger than 256 that stands for a combo of
characters.
Like the last example lets build a table to further
explain what is going on.
New |
next_code |
curr |
string |
old |
previntarray |
currchararray |
|
|
|
A |
|
|
|
B |
256 |
B |
B |
65 |
65 |
66 |
C |
257 |
C |
C |
66 |
66 |
67 |
D |
258 |
D |
D |
67 |
67 |
68 |
256 |
259 |
A |
AB |
68 |
68 |
65 |
258 |
260 |
C |
CD |
256 |
256 |
67 |
260 |
261 |
A |
ABC |
258 |
258 |
65 |
259 |
262 |
D |
DA |
260 |
260 |
68 |
257 |
263 |
B |
BC |
259 |
259 |
66 |
263 |
264 |
D |
DAB |
257 |
257 |
68 |
261 |
265 |
C |
CDA |
263 |
263 |
67 |
264 |
266 |
B |
BCD |
261 |
261 |
66 |
262 |
267 |
A |
ABCD |
264 |
264 |
65 |
262 |
268 |
A |
ABCD |
262 |
262 |
65 |
|
|
|
|
|
|
|
LZWBasics
#include <stdio.h>
unsigned int ret,cnt,buff,times;
FILE *fp;
inp()
{
if ( times%2 == 0)
{
char i,j,k;
i = fgetc(fp);
j = fgetc(fp);
ret = i << 4;
k = j & 0xF0;
k = k >> 4;
ret = ret | k;
buff = j & 0x0F;
printf("\nif %02x %02x ret=%x buff=%08x\n",i,j,ret,buff);
}
else
{
char i;
i = fgetc(fp);
ret = buff & 0x0f;
ret = ret << 8;
ret = ret | i;
printf("\nelse %02x ret=%x\n",i,ret);
}
times++;
return ret;
}
out(unsigned int value)
{
if ( times % 2 == 0)
{
fputc(value >> 4 ,fp);
buff = value & 0x0f;
printf("In if value=%04x written %02x buffer=%08x\n",value,value >> 4,buff);
}
else
{
char dummy;
dummy = buff & 0x0f;
dummy = dummy << 4;
dummy = dummy | (value >> 8);
printf("In else value=%04x value>>8=%04x Writing %02x %02x\n",value,value>>8,dummy,value & 0xff);
fputc(dummy,fp);
fputc(value & 0xff , fp);
}
times++;
}
main()
{
fp=fopen("vijay.txt","wb");
out(67);
out(260);
out(258);
out(66);
out(0);
out(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
printf("\n");
times = 0;
printf("%c ",inp());
printf("%d ",inp());
printf("%d ",inp());
printf("%c ",inp());
printf("%d\n",inp());
}
In if value=0043 written 04 buffer=00000003
In else value=0104 value>>8=0001 Writing 31 04
In if value=0102 written 10 buffer=00000002
In else value=0042 value>>8=0000 Writing 20 42
In if value=0000 written 00 buffer=00000000
In else value=0000 value>>8=0000 Writing 00 00
if 04 31 ret=43 buff=00000001
C
else 04 ret=104
260
if 10 20 ret=102 buff=00000000
258
else 42 ret=42
B
if 00 00 ret=0 buff=00000000
0
In the above program we are simply writing a 12 bit value
to disk and then reading 12 bits a time. We use a function out to write a 12
bit value to disk passed as a parameter and then the inp function to return a
12 bit value. The fgetc and fputc function are great for their job but they
write 8 bits at a time.
We would like to write 12 bits at one time as our range
of numbers are not 0 to 255 but 0 to 4095. If our range was 0 to 65535, then we
could use fputc to write 2 bytes or 16 bits at a time. We could have written a
12 bit number as 16 bits but that would mean wasting 4 bits on disk each time.
The problem with writing 12 bits at a time is that we cannot write 12 bits at a
time, we have no choice but to write 8 bits at a time.
Lets take a specific example to explain. We want to write
A and B to disk as 12 bits values. If we were to write them as chars we would
write 41 42, as shorts 00 41 00 42 as big endian. If we were to write them as
little endian, we would reverse the bytes as 41 00 and 42 00. If we were to use
12 bit values we would write A and B as 041 042. A 256 would be written as 100
and 258 as 102.
The rule we follow will be very simple. The out function
will first be called once and then the next time we call it will be called
twice. This pattern will be constantly repeated. The first time we will write
the top bits of the 12, the second time out of the 16 bits written, we will
write the high 4 bits first and then the remaining complete 12 bits of the next
int. Thus we need a way of knowing that we are writing 8 or 16 bits.
We use a variable times to tell us whether it is the odd
or even time that we are calling the out function. At the end of the function
we simply increase times by 1. To start with, its value is 0 and the mod of
times with 2 is 0. When we call times the second time, its value is 1 and times
mod 2 is 1.
Thus whenever the remainder is 0, we are calling the odd
number of times and the if statement is true. When the mod is odd, we are
calling it the even number of times. As explained before the first time we call
it we must write the top eight bits to disk and store the remaining 4 bits
somewhere.
Lets look at the actual code. The if statement is true
the first time. We have to write a single byte to disk. The only problem is that we have to write
the top eight bytes so we right shift the bytes by 4 and then write. The lower
four bits or nibble must be saved somewhere and we use the buffer variable for
this purpose. But as we want to save only the first four bits we bitwise and
all the other bits by 0 and the first four by 1.
Thus for the 0x43 or C we write 04 but store the 3 in
buffer. In the same vein for a 258 or 102 we write 10 and put 2 for writing later. The next time we call the out function,
we write two bytes. We first need to write the four bits in buffer which is a 1
and then as we are passing 100 we need to write the 1 or the high four bits. We
create a variable dummy and take the first four bits off from the buff
variable.
We then left shift it by 4 so that it becomes the high
nibble. Remember these bits belong to the char whose high 8 bits we have just
written. We then have to write in the first four bits of dummy the high 4 bits
of the current byte to write. We therefore simply right shift value by 8
bringing the bytes 9 to 12 at position
1 to 4. This is how we create the first byte to write which is combo of the low
4 bytes of the previous word and the high 4 bits of the current word.
Writing the second byte is easy as we have to simply take
the top 8 bits by anding. As an example
the byte to be written now is 104. We need to write the 3 left over from buffer
and the top four bits as 1. This gives us 31 and the low 8 bits to be written
are 04. The bytes written are 43 and
04.
Now lets do the reverse and read these bytes from disk.
We use the variables times for the same reason as before, to differentiate the
even and odd times the inp function gets called. We do a small reversal of
roles. The first time we get called we read two bytes from disk i.e. 16 bits.
We use 12 of these and return their values. The top 4 bits we keep in buffer to
be used the second time function inp gets called.
Then we read only 8 bits an use the 4 leftover bits as
the high bits for these 8 bits now read. Lets start with actual code. When
times%2 is equal to zero, it is the odd or the first call. We have set times to
0 in main. We read the first two bytes in variables I and j respectively with
values 04 and 31. We need to get at the values 043.
To do this we need to shift I or 04 4 bits to the left in
the ret variable. We now need to extract the high 4 bits or 3 from the byte j.
To do this we first bit wise and with f0 making the bottom nibble 0. This gives
us the 3 but the problem is that it is in the wrong place. We therefore right
shift it by 4 so that it becomes the low nibble from the high nibble. We then
bit wise or this with ret giving us 12 bits.
As before we need to store the low nibble of j in buff we
do by anding with 0f. The second time we get called we read the byte from disk
in a variable i. We bit wise and with 0f the buff variable to get at the top
nibble. This step is not really needed as we know that all of buf but the first
nibble is already zero.
These 4 bits are actually the high 4 bits of the 12 bits
and thus we move them left by 8 so that they occupy the last 4 of the 12 bits.
We simply bit wise or with I to give us the low 8 bits.
#include <stdio.h>
unsigned int ret,cnt,buff;
FILE *fp;
inp()
{
printf("\n\nStart of while cnt=%d buff=%08x\n",cnt,buff);
while (cnt <= 24)
{
unsigned char c = fgetc(fp);
printf("%02x %d %08x ||| ",c,cnt,buff);
buff |= c << (24-cnt);
cnt += 8;
}
printf("\nEnd of while cnt=%d buff=%08x\n",cnt,buff);
ret=buff >> 20;
buff <<= 12;
cnt -= 12;
printf("End of function return=%d cnt=%d buff=%08x\n",ret,cnt,buff);
return ret;
}
out(unsigned int value)
{
buff = buff | value << (20-cnt);
cnt = cnt + 12;
printf("\nBefore while loop cnt=%d buff=%08x\n",cnt,buff);
while (cnt >= 8)
{
fputc(buff >> 24,fp);
printf("%02x ",buff >> 24);
buff = buff << 8;
cnt -= 8;
printf(" %d %08x..",cnt,buff);
}
printf("\nAfter while loop cnt=%d buff=%08x\n",cnt,buff);
}
main()
{
fp=fopen("vijay.txt","wb");
out(65);
out(256);
out(257);
out(66);
out(0);
out(0);
fclose(fp);
fp=fopen("vijay.txt","rb");
printf("\n");
printf("%c ",inp());
printf("%d ",inp());
printf("%d ",inp());
printf("%c ",inp());
printf("%d\n",inp());
}
Before while loop cnt=0 buff=04300000
04 4 30000000..
After while loop cnt=4 buff=30000000
Before while loop cnt=4 buff=31050000
31 8 05000000..05 0 00000000..
After while loop cnt=0 buff=00000000
Before while loop cnt=0 buff=10200000
10 4 20000000..
After while loop cnt=4 buff=20000000
Before while loop cnt=4 buff=20420000
20 8 42000000..42 0 00000000..
After while loop cnt=0 buff=00000000
Before while loop cnt=0 buff=00000000
00 4 00000000..
After while loop cnt=4 buff=00000000
Before while loop cnt=4 buff=00000000
00 8 00000000..00 0 00000000..
After while loop cnt=0 buff=00000000
Start of while cnt=0 buff=00000000
04 0 00000000 04000000 ||||31 8 04000000 04310000 ||||05 16 04310000 04310500 ||||10 24 04310500 04310510 ||||
End of while cnt=32 buff=04310510
End of function return=67 cnt=20 buff=10510000
C
Start of while cnt=20 buff=10510000
20 20 10510000 10510200 ||||
End of while cnt=28 buff=10510200
End of function return=261 cnt=16 buff=10200000
261
Start of while cnt=16 buff=10200000
42 16 10200000 10204200 ||||00 24 10204200 10204200 ||||
End of while cnt=32 buff=10204200
End of function return=258 cnt=20 buff=04200000
258
Start of while cnt=20 buff=04200000
00 20 04200000 04200000 ||||
End of while cnt=28 buff=04200000
End of function return=66 cnt=16 buff=00000000
B
Start of while cnt=16 buff=00000000
00 16 00000000 00000000 ||||ff 24 00000000 000000ff ||||
End of while cnt=32 buff=000000ff
End of function return=0 cnt=20 buff=000ff000
0
Lets now understand the same example using a more generic
method. The basic rule does not change at all. The function out gets called as
before and we now have a while loop to write either 8 bits or 16 bits to disk.
The cnt variable is used to control the number of times the while loop gets
called. To start with its value is 0.
We then increase it by 12 before the while loop. The
while loop checks whether it is greater than 8, which it is. We thus enter the
loop once with value 12. In the loop we reduce cnt by 8 to makes its value 4.
As it is less than 8, we quit out of the loop. Thus when its value is 12 at the
start of the loop the loop gets called 1.
Now the next time we call the out function, the value of
cnt at the start is 4. We add 12 to it to make its value 16. Now the while loop
goes on twice. When we leave the while loop cnt is 0, so that the next time we
call function out, the loop happens only once. Thus cnt inside the loop is 12
or 16 and 8. We do not use cnt for anything else other than making the loop go
on once or twice.
We only use the cnt variable at the start of the function
where its value is 0 or 4. Thus makes the shift of the bits 0 or 16 as we
subtract from 20. The first time the value is 0 and we are shifting the
contents of buf by 20, making the top 12 bits of buff as 04300000.
We then right shift buf by 24 making the last 8 bits 04
as the first 8 bits and writing this out to disk. In the same loop we left
shift buf by 8 dropping the last 8 bits which we just wrote to disk. Buf now
contains 3000000 which is the 3 that needs to be written to disk in the second
round.
The second time out gets called, the top bits of buf are
already used. We now left shift value into buf 16 times and not 20 times This
is so that the 105 occupies the last 16 bits minus 4 which are already
occupied. Thus buf now contains 31050000. As we are bit shifting value and not
buf and then bit wise oring buff , the 3 does not fall out.
Now in the while loop we first write 31 the top high
bits, then we remove the 31 and then again right 05 and then remove them. Thus
the second time out gets called buf has nothing to carry. Thus as before we
write 8 bits first and then the next 4 bits and the full 12 bits to make up 16
bits.
The inp function is a little more complex than it should
be. The cnt variable is once again used to keep track of the while loop. The
only problem is that the first time its gets called 4 times and then once and
then twice for the same reason mentioned earlier. Lets look at the first time
it gets called. We are reading one byte at a time and we pushing these bytes
into buf. Thus the buf variable will now contain the first four bytes as byte
04310510.
We simply return the top 12 bits by right shifting by 20
the value of buf into ret. We also then left shift buf by 12 to drop these 12
bits. When we come back to this function the second time, buf now contains
10510 000. Of these bytes only the 10510 are of use to use.
We now read the next byte from disk which is 20 and we
need to add this to buf to make buf 1051020 0. The value of cnt is 20 so that
we left shift 24-20 only 4 bits. We return 105 and make buf 1020 0000. Then
next time buf is called we read two bytes from disk.
The first time buf becomes 10204200 as the first byte is
42. The second byte is 00 and thus buff remains what it is. Thus we use cnt
which is 16 and 24 in such a way so that the first byte from disk is left
shifted by 8 and the second by 0. Thus the even times we call inp, we add the
two bytes in the first 16 bits. This is how we add the bytes to the buff
variable and take them off 12 bits at a time.
It does the same thing like the earlier program but is a
lot more complex.
Back to the main page