[futurebasic] Re: [FB] Compressionism

Message: < previous - next > : Reply : Subscribe : Cleanse
Home   : January 2001 : Group Archive : Group : All Groups

From: Heather Donahue <heatherd@...>
Date: Mon, 8 Jan 2001 23:26:06 -0800
At 6:54 PM -0500 on 1/8/01, Lake Group wrote:


>  >> Do any of you math wizards or word-bytists know of a simple compression
>>>  scheme for abbreviating or otherwise representing in less data the values 0
>>>  to 255?
>>>
>>>  Thank you for any suggestions.
>>>
>>>  Robert
>>>
>
>Hola,
>
>I approached a few compression ideas awhile back, but basically couldn't get
>anything better than StuffIt, so I moved on. For what it's worth, here are
>some approaches that I tried.

I don't remember where I got it and I'm not sure if it will work for 
the values 0 to 255 but I have some C source for something called 
Arithmetic Encoding

I got this as a clipping from a webpage but I can't rememeber the 
URL.  I've converted most of it to FB^3 but it's a back-burner 
project.  This is the original C source.  Maybe someone can make use 
of it.  It is a little long though.

--------------------------------
/* Arithmetic Encoding Sample */
/* Malakhov K.A. 1994 */
/* This source was made available to us by Wrox Press from the
book Master Class Assembly Language */
/* The full source for all the sample programs are available
on Wrox Press's web page, happy reading ;) */



#include <stdio.h>
#include <process.h>
// The number of bits in "register"
#define BITS_IN_REGISTER 16
// The top value in register
#define TOP_VALUE (((long) 1<<BITS_IN_REGISTER)-1)
// The ranges
#define FIRST_QTR (TOP_VALUE/4+1)

#define HALF (2*FIRST_QTR)

#define THIRD_QTR (3*FIRST_QTR)
// The number of symbols in alfabet
#define NO_OF_CHARS 256
// End Of File symbol
#define EOF_SYMBOL (NO_OF_CHARS+1)
// Total number of symbols
#define NO_OF_SYMBOLS (NO_OF_CHARS+1)
// The limit of frequency for scale
#define MAX_FREQUENCY 16383
// The tables for recoding
unsigned char index_to_char[NO_OF_SYMBOLS];
int char_to_index[NO_OF_CHARS];
// The table of frequency
int cum_freq[NO_OF_SYMBOLS+1];
int freq[_NOOFSYMBOLS+1];
// The registers of limit
long low,high;
// The register of code
long value;
// Registers for bit i/o
long bits_to_follow;
int buffer;
int bits_to_go;
int garbage_bits;

int counteri,countero;

void start_model(void)
{
	int	i;
	for (i=0;i<NO_OF_CHARS;i++)
	{
		char_to_index[i]=i+1;
		index_to_char[i+1]=i;
	}
	for (i=0;i<=NO_OF_SYMBOLS;i++)
	{
		freq[i]=1;
		cum_freq[i]=NO_OF_SYMBOLS-i;
	}
	freq[0]=0;
}
// Reinitialization of model by next char
void update_model (int symbol)
{
	int	i;
	int	ch_i,ch_symbol;
	int	cum;
// Check the counter of frequency for overflow
	if (cum_freq[0]==MAX_FREQUENCY)
	{
		cum=0;
// Update the frequences duaring the overflow
		for (i=NO_OF_SYMBOLS;i>=0;i--)
		{
			freq[i]=(freq[i]+1)/2;
			cum_freq[i]=cum;
			cum+=freq[i];
		}
	}
	for(i=symbol;freq[i]==freq[i-1];i--);
	if (i<symbol)
	{
		ch_i=index_to_char[i];
		ch_symbol=index_to_char[symbol];
		index_to_char[i]=ch_symbol;
		index_to_char[symbol]=ch_i;
		char_to_index[ch_i]=symbol;
		char_to_index[ch_symbol]=i;
	}
// Update the values in the tables of frequencies
	freq[i]+=1;
	while(i>0)
	{
		i-=1;
		cum_freq[i]+=1;
	}
}
// Initialization of bits output
void start_inputing_bits(void)
{
	bits_to_go=0;
	garbage_bits=0;
}
// Input the next bit of compressed information
int input_bit(char *buffi,int lengi)
{
	int t;
	if(bits_to_go==0)
	{
		buffer=(unsigned char) buffi[counteri++];
		if(counteri==lengi)
		{
			garbage_bits+=1;
			if(garbage_bits>BITS_IN_REGISTER-2)
			{
				printf("Error in compressed file\n");
				exit(-1);
			}
		}
		bits_to_go=8;
	}
	t=buffer & 1;
	buffer>>=1;
	bits_to_go-=1;
	return t;
}
// Initialization of bits output
void start_outputing_bits (void)
{
	buffer=0;
	bits_to_go=8;
}
// Input the next bit of compressed information
void output_bit(int bit, char *buffo, int lengo)
{
	buffer>>=1;
	if(bit)
		buffer |=0x80;
	bits_to_go-=1;
	if(bits_to_go==0)
	{
		buffo[countero++]=(unsigned char) buffer;
		if (countero==lengo+1) {
			printf("Overflow the output buffer.\n");
			return;
		}
		bits_to_go=8;
	}
}
// Clear the bits output
void done_outputing_bits(char *buffo, int lengo)
{
	buffo[countero++]=(unsigned char) (buffer>>bits_to_go);
	if (countero==lengo+1) {
		printf("Overflow the output buffer.\n");
		return;
	}
}
// Output the current bit and other bits
void output_bit_plus_follow(int bit, char *buffo, int lengo)
{
	output_bit(bit,buffo,lengo);
	while(bits_to_follow>0)
	{
		output_bit(!bit,buffo,lengo);
		bits_to_follow--;
	}
}
// Initialization of registers of limits and code before compression
void start_encoding(void)
{
	low=0l;
	high=TOP_VALUE;
	bits_to_follow=0l;
}
// Clear the bits output
void done_encoding(char *buffo,int lengo)
{
	bits_to_follow++;
	if(low<FIRST_QTR)
		output_bit_plus_follow(0,buffo,lengo);
	else
		output_bit_plus_follow(1,buffo,lengo);
}
// Initialization of registers before decompression
// Load the begin of compressed information
void start_decoding(char *buffi,int lengi)
{
	int	i;
	value=0l;
	for (i=1;i<=BITS_IN_REGISTER;i++)
		value=2*value+input_bit(buffi,lengi);
	low=0l;
	high=TOP_VALUE;
}
// Coding the next char
void encode_symbol(int symbol,char *buffo,int lengo)
{
	long range;
	// Update the ranges and limits
	range=(long)(high-low)+1;
	high=low+(range*cum_freq[symbol-1])/cum_freq[0]-1;
	low=low+(range*cum_freq[symbol])/cum_freq[0];
	while(1)
	{
		if(high<HALF)
			output_bit_plus_follow(0,buffo,lengo);
		else if (low>=HALF)
		{
			output_bit_plus_follow(1,buffo,lengo);
			low-=HALF;
			high-=HALF;
		}
		else if(low>=FIRST_QTR && high<THIRD_QTR)
		{
			bits_to_follow+=1;
			low-=FIRST_QTR;
			high-=FIRST_QTR;
		}
		else
			break;
		// Shift to left with shift the next bit
		low=2*low;
		high=2*high+1;
	}
}
// Decoding the next symbol
int decode_symbol(char *buffi,int lengi)
{
	long range;
	int cum,symbol;
	// Calculating the current scale of frequencies
	range=(long)(high-low)+1;
	// Scaling the values in register of code
	cum=(int) ((((long)(value-low)+1)*cum_freq[0]-1)/range);
	// Search the symbol in  the table of symbols
	for(symbol=1;cum_freq[symbol]>cum;symbol++);
	// Recalculating the limits
	high=low+(range*cum_freq[symbol-1])/cum_freq[0]-1;
	low=low+(range*cum_freq[symbol])/cum_freq[0];
	// Delete the current symbol from input flow
	while(1)
	{
		if(high<HALF)
		{
		}
		else if (low>=HALF)
		{
			value-=HALF;
			low-=HALF;
			high-=HALF;
		}
		else if (low>=FIRST_QTR && high<THIRD_QTR)
		{
			value-=FIRST_QTR;
			low-=FIRST_QTR;
			high-=FIRST_QTR;
		}
		else
			break;
		// Shift to left with shift the next bit
		low=2*low;
		high=2*high+1;
		value=2*value+input_bit(buffi,lengi);
	}
	return symbol;
}
// Adaptive arifmetic compression
int arif_encode (char *buffi,int lengi,char *buffo,int lengo)
{
	int ch,symbol;
	counteri=0;
	countero=0;
	start_model();
	start_outputing_bits();
	start_encoding();
	while(1)
	{
		ch=((unsigned char) buffi[counteri++]);
		symbol=char_to_index[ch];
		encode_symbol(symbol,buffo,lengo);
		update_model(symbol);
		if (counteri==lengi)
			break;
	}
	encode_symbol(EOF_SYMBOL,buffo,lengo);
	done_encoding(buffo,lengo);
	done_outputing_bits(buffo,lengo);
	return countero;
}
// Adaptive arifmetic decompression
int arif_decode (char *buffi, int lengi, char *buffo, int lengo)
{
	int ch,symbol;
	counteri=0;
	countero=0;
	start_model();
	start_inputing_bits();
	start_decoding(buffi,lengi);
	while(1)
	{
		symbol=decode_symbol(buffi,lengi);
		if(symbol==EOF_SYMBOL)
			break;
		ch=index_to_char[symbol];
		buffo[countero++]=(unsigned char) ch;
		if (countero==lengo+1) {
			printf("Overflow the output buffer.\n");
			break;
		}
		update_model(symbol);
	}
	return countero;
}