At 6:54 PM -0500 on 1/8/01, Lake Group wrote: > >> Do any of you math wizards or word-bytists know of a simple compression >>> scheme for abbreviating or otherwise representing in less data the values 0 >>> to 255? >>> >>> Thank you for any suggestions. >>> >>> Robert >>> > >Hola, > >I approached a few compression ideas awhile back, but basically couldn't get >anything better than StuffIt, so I moved on. For what it's worth, here are >some approaches that I tried. I don't remember where I got it and I'm not sure if it will work for the values 0 to 255 but I have some C source for something called Arithmetic Encoding I got this as a clipping from a webpage but I can't rememeber the URL. I've converted most of it to FB^3 but it's a back-burner project. This is the original C source. Maybe someone can make use of it. It is a little long though. -------------------------------- /* Arithmetic Encoding Sample */ /* Malakhov K.A. 1994 */ /* This source was made available to us by Wrox Press from the book Master Class Assembly Language */ /* The full source for all the sample programs are available on Wrox Press's web page, happy reading ;) */ #include <stdio.h> #include <process.h> // The number of bits in "register" #define BITS_IN_REGISTER 16 // The top value in register #define TOP_VALUE (((long) 1<<BITS_IN_REGISTER)-1) // The ranges #define FIRST_QTR (TOP_VALUE/4+1) #define HALF (2*FIRST_QTR) #define THIRD_QTR (3*FIRST_QTR) // The number of symbols in alfabet #define NO_OF_CHARS 256 // End Of File symbol #define EOF_SYMBOL (NO_OF_CHARS+1) // Total number of symbols #define NO_OF_SYMBOLS (NO_OF_CHARS+1) // The limit of frequency for scale #define MAX_FREQUENCY 16383 // The tables for recoding unsigned char index_to_char[NO_OF_SYMBOLS]; int char_to_index[NO_OF_CHARS]; // The table of frequency int cum_freq[NO_OF_SYMBOLS+1]; int freq[_NOOFSYMBOLS+1]; // The registers of limit long low,high; // The register of code long value; // Registers for bit i/o long bits_to_follow; int buffer; int bits_to_go; int garbage_bits; int counteri,countero; void start_model(void) { int i; for (i=0;i<NO_OF_CHARS;i++) { char_to_index[i]=i+1; index_to_char[i+1]=i; } for (i=0;i<=NO_OF_SYMBOLS;i++) { freq[i]=1; cum_freq[i]=NO_OF_SYMBOLS-i; } freq[0]=0; } // Reinitialization of model by next char void update_model (int symbol) { int i; int ch_i,ch_symbol; int cum; // Check the counter of frequency for overflow if (cum_freq[0]==MAX_FREQUENCY) { cum=0; // Update the frequences duaring the overflow for (i=NO_OF_SYMBOLS;i>=0;i--) { freq[i]=(freq[i]+1)/2; cum_freq[i]=cum; cum+=freq[i]; } } for(i=symbol;freq[i]==freq[i-1];i--); if (i<symbol) { ch_i=index_to_char[i]; ch_symbol=index_to_char[symbol]; index_to_char[i]=ch_symbol; index_to_char[symbol]=ch_i; char_to_index[ch_i]=symbol; char_to_index[ch_symbol]=i; } // Update the values in the tables of frequencies freq[i]+=1; while(i>0) { i-=1; cum_freq[i]+=1; } } // Initialization of bits output void start_inputing_bits(void) { bits_to_go=0; garbage_bits=0; } // Input the next bit of compressed information int input_bit(char *buffi,int lengi) { int t; if(bits_to_go==0) { buffer=(unsigned char) buffi[counteri++]; if(counteri==lengi) { garbage_bits+=1; if(garbage_bits>BITS_IN_REGISTER-2) { printf("Error in compressed file\n"); exit(-1); } } bits_to_go=8; } t=buffer & 1; buffer>>=1; bits_to_go-=1; return t; } // Initialization of bits output void start_outputing_bits (void) { buffer=0; bits_to_go=8; } // Input the next bit of compressed information void output_bit(int bit, char *buffo, int lengo) { buffer>>=1; if(bit) buffer |=0x80; bits_to_go-=1; if(bits_to_go==0) { buffo[countero++]=(unsigned char) buffer; if (countero==lengo+1) { printf("Overflow the output buffer.\n"); return; } bits_to_go=8; } } // Clear the bits output void done_outputing_bits(char *buffo, int lengo) { buffo[countero++]=(unsigned char) (buffer>>bits_to_go); if (countero==lengo+1) { printf("Overflow the output buffer.\n"); return; } } // Output the current bit and other bits void output_bit_plus_follow(int bit, char *buffo, int lengo) { output_bit(bit,buffo,lengo); while(bits_to_follow>0) { output_bit(!bit,buffo,lengo); bits_to_follow--; } } // Initialization of registers of limits and code before compression void start_encoding(void) { low=0l; high=TOP_VALUE; bits_to_follow=0l; } // Clear the bits output void done_encoding(char *buffo,int lengo) { bits_to_follow++; if(low<FIRST_QTR) output_bit_plus_follow(0,buffo,lengo); else output_bit_plus_follow(1,buffo,lengo); } // Initialization of registers before decompression // Load the begin of compressed information void start_decoding(char *buffi,int lengi) { int i; value=0l; for (i=1;i<=BITS_IN_REGISTER;i++) value=2*value+input_bit(buffi,lengi); low=0l; high=TOP_VALUE; } // Coding the next char void encode_symbol(int symbol,char *buffo,int lengo) { long range; // Update the ranges and limits range=(long)(high-low)+1; high=low+(range*cum_freq[symbol-1])/cum_freq[0]-1; low=low+(range*cum_freq[symbol])/cum_freq[0]; while(1) { if(high<HALF) output_bit_plus_follow(0,buffo,lengo); else if (low>=HALF) { output_bit_plus_follow(1,buffo,lengo); low-=HALF; high-=HALF; } else if(low>=FIRST_QTR && high<THIRD_QTR) { bits_to_follow+=1; low-=FIRST_QTR; high-=FIRST_QTR; } else break; // Shift to left with shift the next bit low=2*low; high=2*high+1; } } // Decoding the next symbol int decode_symbol(char *buffi,int lengi) { long range; int cum,symbol; // Calculating the current scale of frequencies range=(long)(high-low)+1; // Scaling the values in register of code cum=(int) ((((long)(value-low)+1)*cum_freq[0]-1)/range); // Search the symbol in the table of symbols for(symbol=1;cum_freq[symbol]>cum;symbol++); // Recalculating the limits high=low+(range*cum_freq[symbol-1])/cum_freq[0]-1; low=low+(range*cum_freq[symbol])/cum_freq[0]; // Delete the current symbol from input flow while(1) { if(high<HALF) { } else if (low>=HALF) { value-=HALF; low-=HALF; high-=HALF; } else if (low>=FIRST_QTR && high<THIRD_QTR) { value-=FIRST_QTR; low-=FIRST_QTR; high-=FIRST_QTR; } else break; // Shift to left with shift the next bit low=2*low; high=2*high+1; value=2*value+input_bit(buffi,lengi); } return symbol; } // Adaptive arifmetic compression int arif_encode (char *buffi,int lengi,char *buffo,int lengo) { int ch,symbol; counteri=0; countero=0; start_model(); start_outputing_bits(); start_encoding(); while(1) { ch=((unsigned char) buffi[counteri++]); symbol=char_to_index[ch]; encode_symbol(symbol,buffo,lengo); update_model(symbol); if (counteri==lengi) break; } encode_symbol(EOF_SYMBOL,buffo,lengo); done_encoding(buffo,lengo); done_outputing_bits(buffo,lengo); return countero; } // Adaptive arifmetic decompression int arif_decode (char *buffi, int lengi, char *buffo, int lengo) { int ch,symbol; counteri=0; countero=0; start_model(); start_inputing_bits(); start_decoding(buffi,lengi); while(1) { symbol=decode_symbol(buffi,lengi); if(symbol==EOF_SYMBOL) break; ch=index_to_char[symbol]; buffo[countero++]=(unsigned char) ch; if (countero==lengo+1) { printf("Overflow the output buffer.\n"); break; } update_model(symbol); } return countero; }