// Huffman coding
// 
// Simplified code as example for the huffman coding.
//
// by feyd//godX.de

#include <vector>
#include <stdio.h>
#include <math.h>

class CHuffman
{
	struct Histogram
	{
		int nCount;
		size_t nLink;
		int nBits;
		int nCode;
	};
	struct BitHistogram
	{
		int nBase;
		int nCount;
	};

public:
	CHuffman(size_t nSymbols) : m_nSymbols(nSymbols), m_histogram(nSymbols*2) {}
	virtual ~CHuffman() {}

	void BuildTree(const std::vector<int>& codes)
	{
		// Clear histogram
		for(size_t n=0; n<m_histogram.size(); n++)
		{
			m_histogram[n].nLink = 0;
			m_histogram[n].nCount = 0;
			m_histogram[n].nBits = 0;
		}
		
		// Build histogram
		for(size_t n=0; n<codes.size(); n++)
		{
			if(codes[n]>=m_nSymbols)
			{
				printf("Error not enough symbols\r\n");
				return;
			}
			m_histogram[codes[n]].nCount++;
		}
		
		// Build tree (slow, good point to optimize ;) )
		size_t nMaxSymbol = m_nSymbols;
		while(true)
		{
			size_t nSmallest1 = nMaxSymbol;
			size_t nSmallest2 = nMaxSymbol;
			for(size_t n=0; n<nMaxSymbol; n++)
			{
				if(m_histogram[n].nLink!=0 || m_histogram[n].nCount==0)
					continue;
				if(nSmallest1<nMaxSymbol && nSmallest2<nMaxSymbol)
				{
					if(m_histogram[nSmallest1].nCount>m_histogram[nSmallest2].nCount)
					{
						if(m_histogram[n].nCount<m_histogram[nSmallest1].nCount)
							nSmallest1 = n;
					} else {
						if(m_histogram[n].nCount<m_histogram[nSmallest2].nCount)
							nSmallest2 = n;
					}
				} else {
					nSmallest1 = nSmallest2;
					nSmallest2 = n;
				}
			}
			if(nSmallest1==nMaxSymbol)
				break;
			m_histogram[nSmallest1].nLink = nMaxSymbol;
			m_histogram[nSmallest2].nLink = nMaxSymbol;
			m_histogram[nMaxSymbol].nCount = m_histogram[nSmallest1].nCount+m_histogram[nSmallest2].nCount;
			nMaxSymbol++;
		}
		
		BuildBinaryCodes();
	}

	void PrintCode(int nCode)
	{
		for(int n=m_histogram[nCode].nBits-1; n>=0; n--)
			printf("%d", (m_histogram[nCode].nCode>>n)&1);
	}
	
	void PrintHistogram()
	{
		printf(" Tables:\r\n");
		for(size_t n=0; n<m_nSymbols; n++)
		{
			if(m_histogram[n].nCount==0)
				continue;
			printf(" %5d (%c). %5d codes / %2d bits ", (int)n, (n<32 || n>=128)?'.':(char)n, m_histogram[n].nCount, m_histogram[n].nBits);
			PrintCode(n);
			printf("\r\n");
		}
		printf("\r\n");
	}
	
	void Encode(const std::vector<int>& codes)
	{
		printf(" Encoded:\r\n");
		printf(" ");
		float fSymbolSizeIn = log((float)m_nSymbols)/log(2.0f);
		int nBitsOut = 0;
		for(size_t n=0; n<codes.size(); n++)
		{		
			printf(" ");
			PrintCode(codes[n]);
			nBitsOut+= m_histogram[codes[n]].nBits;
		}
		printf("\r\n\r\n");
		printf(" Bits in: %.0fx%d=%.0f Bits out: %d = %3.3f bits/code entropy\r\n", fSymbolSizeIn, (int)codes.size(), fSymbolSizeIn*(float)codes.size(), nBitsOut, (float)nBitsOut/(float)codes.size());

		// Theoretical entropy
		int nMaxCount = 0;
		for(size_t n=0; n<m_nSymbols; n++)
			nMaxCount+=m_histogram[n].nCount;

		float fBits = 0;
		for(size_t n=0; n<m_nSymbols; n++)
		{		
			if(m_histogram[n].nCount==0)
				continue;
			
			fBits += (float)m_histogram[n].nCount*(log((float)m_histogram[n].nCount/(float)nMaxCount)/-log(2.0f));
		}
		printf(" Theoretical lower bound for entropy is: %3.3f bits = %3.3f bits/code\r\n", fBits, fBits/(float)codes.size());
	}
protected:
	void BuildBinaryCodes()
	{
		// Build bit histogram
		BitHistogram nBitCounts[32];
		for(size_t n=0; n<32; n++)
			nBitCounts[n].nCount = 0;

		// Build bit lengths and insert them into the bit histogram
		for(size_t n=0; n<m_nSymbols; n++)
		{
			if(m_histogram[n].nCount==0)
				continue;
			size_t nPos = n;
			while(true)
			{
				nPos = m_histogram[nPos].nLink;
				if(nPos==0)
					break;
				m_histogram[n].nBits++;
			}
			nBitCounts[m_histogram[n].nBits].nCount++;
		}
		
		// Build codes bases
		int nCurrentBase = 0;
		for(size_t n=0; n<32; n++)
		{
			nBitCounts[n].nBase = nCurrentBase;
			nCurrentBase += nBitCounts[n].nCount;
			nCurrentBase<<=1;
		}
		
		// Assign binary codes to every symbol
		for(size_t n=0; n<m_nSymbols; n++)
		{
			if(m_histogram[n].nCount==0)
				continue;
			m_histogram[n].nCode = nBitCounts[m_histogram[n].nBits].nBase;
			nBitCounts[m_histogram[n].nBits].nBase++;
		}
	}

	size_t m_nSymbols;
	std::vector<Histogram> m_histogram;
};

// Application entry point (from libc)
int main(int argc, const char* argv[])
{
	printf("Huffman test...\r\n\r\n");

	if(argc<2)
	{
		printf("Not enough parameters.\r\n");
		return -1;
	}
	std::vector<int> codes;
	for(size_t n=0; n<strlen(argv[1]); n++)
		codes.push_back(argv[1][n]);

	CHuffman huffman(256);
	huffman.BuildTree(codes);
	huffman.PrintHistogram();
	huffman.Encode(codes);
	return 0;
}
