/src/freeimage-svn/FreeImage/trunk/Source/OpenEXR/IlmImf/ImfFastHuf.cpp

Source (jump to first uncovered line)
///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2009-2014 DreamWorks Animation LLC. 
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// *       Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// *       Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// *       Neither the name of DreamWorks Animation nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////

#include "ImfFastHuf.h"
#include <Iex.h>

#include <string.h>
#include <assert.h>
#include <math.h>
#include <vector>

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER

//
// Adapted from hufUnpackEncTable - 
// We don't need to reconstruct the code book, just the encoded
// lengths for each symbol. From the lengths, we can build the
// base + offset tables. This should be a bit more efficient
// for sparse code books.
// 
//   table     - ptr to the start of the code length data. Will be
//               updated as we decode data
//
//   numBytes  - size of the encoded table (I think)?
//
//   minSymbol - smallest symbol in the code book
//
//   maxSymbol - largest symbol in the code book. 
//
//   rleSymbol - the symbol to trigger RLE in the encoded bitstream
//

FastHufDecoder::FastHufDecoder
    (const char *&table,
     int numBytes,
     int minSymbol,
     int maxSymbol,
     int rleSymbol)
:
    _rleSymbol (rleSymbol),
    _numSymbols (0),
    _minCodeLength (255),
    _maxCodeLength (0),
    _idToSymbol (0)
{
    //
    // List of symbols that we find with non-zero code lengths
    // (listed in the order we find them). Store these in the
    // same format as the code book stores codes + lengths - 
    // low 6 bits are the length, everything above that is
    // the symbol.
    //

    std::vector<Int64> symbols;

    //
    // The 'base' table is the minimum code at each code length. base[i]
    // is the smallest code (numerically) of length i.
    //

    Int64 base[MAX_CODE_LEN + 1];     

    //
    // The 'offset' table is the position (in sorted order) of the first id
    // of a given code lenght. Array is indexed by code length, like base.  
    //

    Int64 offset[MAX_CODE_LEN + 1];   

    //
    // Count of how many codes at each length there are. Array is 
    // indexed by code length, like base and offset.
    //

    size_t codeCount[MAX_CODE_LEN + 1];    

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
    {
        codeCount[i] = 0;
        base[i]      = 0xffffffffffffffffL;
        offset[i]    = 0;
    }

    //
    // Count the number of codes, the min/max code lengths, the number of
    // codes with each length, and record symbols with non-zero code
    // length as we find them.
    //

    const char *currByte     = table;
    Int64       currBits     = 0;
    int         currBitCount = 0;

    const int SHORT_ZEROCODE_RUN = 59;
    const int LONG_ZEROCODE_RUN  = 63;
    const int SHORTEST_LONG_RUN  = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;

    for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++)
    {
        if (currByte - table > numBytes)
        {
            throw Iex::InputExc ("Error decoding Huffman table "
                                 "(Truncated table data).");
        }

        //
        // Next code length - either:
        //       0-58  (literal code length)
        //       59-62 (various lengths runs of 0)
        //       63    (run of n 0's, with n is the next 8 bits)
        //

        Int64 codeLen = readBits (6, currBits, currBitCount, currByte);

        if (codeLen == (Int64) LONG_ZEROCODE_RUN)
        {
            if (currByte - table > numBytes)
            {
                throw Iex::InputExc ("Error decoding Huffman table "
                                     "(Truncated table data).");
            }

            int runLen = readBits (8, currBits, currBitCount, currByte) +
                         SHORTEST_LONG_RUN;

            if (symbol + runLen > maxSymbol + 1)
            {
                throw Iex::InputExc ("Error decoding Huffman table "
                                     "(Run beyond end of table).");
            }
            
            symbol += runLen - 1;

        }
        else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN)
        {
            int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;

            if (symbol + runLen > maxSymbol + 1)
            {
                throw Iex::InputExc ("Error decoding Huffman table "
                                     "(Run beyond end of table).");
            }

            symbol += runLen - 1;

        }
        else if (codeLen != 0)
        {
            symbols.push_back ((symbol << 6) | (codeLen & 63));

            if (codeLen < _minCodeLength)
                _minCodeLength = codeLen;

            if (codeLen > _maxCodeLength)
                _maxCodeLength = codeLen;

            codeCount[codeLen]++;
        }
    }

    for (int i = 0; i < MAX_CODE_LEN; ++i)
        _numSymbols += codeCount[i];

    table = currByte;

    //
    // Compute base - once we have the code length counts, there
    //                is a closed form solution for this
    //

    {
        double* countTmp = new double[_maxCodeLength+1];

        for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
        {
            countTmp[l] = (double)codeCount[l] * 
                          (double)(2 << (_maxCodeLength-l));
        }
    
        for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
        {
            double tmp = 0;

            for (int k =l + 1; k <= _maxCodeLength; ++k)
                tmp += countTmp[k];
            
            tmp /= (double)(2 << (_maxCodeLength - l));

            base[l] = (Int64)ceil (tmp);
        }

        delete [] countTmp;
    }
   
    //
    // Compute offset - these are the positions of the first
    //                  id (not symbol) that has length [i]
    //

    offset[_maxCodeLength] = 0;

    for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
        offset[i] = offset[i + 1] + codeCount[i + 1];

    //
    // Allocate and fill the symbol-to-id mapping. Smaller Ids should be
    // mapped to less-frequent symbols (which have longer codes). Use
    // the offset table to tell us where the id's for a given code 
    // length start off.
    //

    _idToSymbol = new int[_numSymbols];

    Int64 mapping[MAX_CODE_LEN + 1];
    for (int i = 0; i < MAX_CODE_LEN + 1; ++i) 
        mapping[i] = -1;
    for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
        mapping[i] = offset[i];

    for (std::vector<Int64>::const_iterator i = symbols.begin(); 
         i != symbols.end();
         ++i)
    {
        int codeLen = *i & 63;
        int symbol  = *i >> 6;

        if (mapping[codeLen] >= _numSymbols)
            throw Iex::InputExc ("Huffman decode error "
                                  "(Invalid symbol in header).");
        
        _idToSymbol[mapping[codeLen]] = symbol;
        mapping[codeLen]++;
    }

    buildTables(base, offset);
}


FastHufDecoder::~FastHufDecoder()
{
    delete[] _idToSymbol;
}


//
// Static check if the decoder is enabled.
//
// ATM, I only have access to little endian hardware for testing,
// so I'm not entirely sure that we are reading fom the bit stream
// properly on BE. 
//
// If you happen to have more obscure hardware, check that the 
// byte swapping in refill() is happening sensable, add an endian 
// check if needed, and fix the preprocessor magic here.
//

#define READ64(c) \
    ((Int64)(c)[0] << 56) | ((Int64)(c)[1] << 48) | ((Int64)(c)[2] << 40) | \
    ((Int64)(c)[3] << 32) | ((Int64)(c)[4] << 24) | ((Int64)(c)[5] << 16) | \
    ((Int64)(c)[6] <<  8) | ((Int64)(c)[7] ) 

#ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
    #if defined (__i386__) || defined(__x86_64__)
        #undef  READ64
        #define READ64(c) _bswap64 (*(const Int64*)(c))
    #endif
#endif


bool
FastHufDecoder::enabled()
{
    #if defined(__INTEL_COMPILER) || defined(__GNUC__)  

        //
        // Enabled for ICC, GCC:
        //       __i386__   -> x86
        //       __x86_64__ -> 64-bit x86
        //

        #if defined (__i386__) || defined(__x86_64__)
            return true;
        #else
            return false;
        #endif

    #elif defined (_MSC_VER)

        //
        // Enabled for Visual Studio:
        //        _M_IX86 -> x86
        //        _M_X64  -> 64bit x86

        #if defined (_M_IX86) || defined(_M_X64)
            return true;
        #else
            return false;
        #endif

    #else

        //
        // Unknown compiler - Be safe and disable.
        //
        return false;
    #endif
}

//
//
// Built the acceleration tables for lookups on the upper bits
// as well as the 'LJ' tables.
//

void
FastHufDecoder::buildTables (Int64 *base, Int64 *offset)
{
    //
    // Build the 'left justified' base table, by shifting base left..
    //

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
    {
        if (base[i] != 0xffffffffffffffffL)
        {
            _ljBase[i] = base[i] << (64 - i);
        }
        else
        {
            //
            // Unused code length - insert dummy values
            //

            _ljBase[i] = 0xffffffffffffffffL;
        }
    }

    //
    // Combine some terms into a big fat constant, which for
    // lack of a better term we'll call the 'left justified' 
    // offset table (because it serves the same function
    // as 'offset', when using the left justified base table.
    //

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
        _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));

    //
    // Build the acceleration tables for the lookups of
    // short codes ( <= TABLE_LOOKUP_BITS long)
    //

    for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
    {
        Int64 value = i << (64 - TABLE_LOOKUP_BITS);

        _tableSymbol[i]  = 0xffff;
        _tableCodeLen[i] = 0; 

        for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
        {
            if (_ljBase[codeLen] <= value)
            {
                _tableCodeLen[i] = codeLen;

                Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen));
                if (id < _numSymbols)
                {
                    _tableSymbol[i] = _idToSymbol[id];
                }
                else
                {
                    throw Iex::InputExc ("Huffman decode error "
                                          "(Overrun).");
                }
                break;
            }
        }
    }

    //
    // Store the smallest value in the table that points to real data.
    // This should be the entry for the largest length that has 
    // valid data (in our case, non-dummy _ljBase)
    //

    int minIdx = TABLE_LOOKUP_BITS;

    while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffL)
        minIdx--;

    if (minIdx < 0)
    {
        //
        // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
        // Set the min value such that the table is never tested.
        //

        _tableMin = 0xffffffffffffffffL;
    }
    else
    {
        _tableMin = _ljBase[minIdx];
    }
}


// 
// For decoding, we're holding onto 2 Int64's. 
//
// The first (buffer), holds the next bits from the bitstream to be 
// decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
// valid bits to decode the next symbol. For other paths, we need a full
// 64-bits to decode a symbol. 
//
// When we need to refill 'buffer', we could pull bits straight from 
// the bitstream. But this is very slow and requires lots of book keeping
// (what's the next bit in the next byte?). Instead, we keep another Int64
// around that we use to refill from. While this doesn't cut down on the
// book keeping (still need to know how many valid bits), it does cut
// down on some of the bit shifting crazy and byte access. 
//
// The refill Int64 (bufferBack) gets left-shifted after we've pulled
// off bits. If we run out of bits in the input bit stream, we just
// shift in 0's to bufferBack. 
//
// The refill act takes numBits from the top of bufferBack and sticks
// them in the bottom of buffer. If there arn't enough bits in bufferBack,
// it gets refilled (to 64-bits) from the input bitstream.
//

inline void
FastHufDecoder::refill
    (Int64 &buffer,
     int numBits,                       // number of bits to refill
     Int64 &bufferBack,                 // the next 64-bits, to refill from
     int &bufferBackNumBits,            // number of bits left in bufferBack
     const unsigned char *&currByte,    // current byte in the bitstream
     int &currBitsLeft)                 // number of bits left in the bitsream
{
    // 
    // Refill bits into the bottom of buffer, from the top of bufferBack.
    // Always top up buffer to be completely full.
    //

    buffer |= bufferBack >> (64 - numBits);

    if (bufferBackNumBits < numBits)
    {
        numBits -= bufferBackNumBits;

        // 
        // Refill all of bufferBack from the bitstream. Either grab
        // a full 64-bit chunk, or whatever bytes are left. If we
        // don't have 64-bits left, pad with 0's.
        //

        if (currBitsLeft >= 64)
        {
            bufferBack        = READ64 (currByte); 
            bufferBackNumBits = 64;
            currByte         += sizeof (Int64);
            currBitsLeft     -= 8 * sizeof (Int64);

        }
        else
        {
            bufferBack        = 0;
            bufferBackNumBits = 64; 

            Int64 shift = 56;
            
            while (currBitsLeft > 0)
            {
                bufferBack |= ((Int64)(*currByte)) << shift;

                currByte++;
                shift        -= 8;
                currBitsLeft -= 8;
            }

            //
            // At this point, currBitsLeft might be negative, just because
            // we're subtracting whole bytes. To keep anyone from freaking
            // out, zero the counter.
            //

            if (currBitsLeft < 0)
                currBitsLeft = 0;
        }

        buffer |= bufferBack >> (64 - numBits);
    }
    
    bufferBack         = bufferBack << numBits;
    bufferBackNumBits -= numBits;

    // 
    // We can have cases where the previous shift of bufferBack is << 64 - 
    // in which case no shift occurs. The bit count math still works though,
    // so if we don't have any bits left, zero out bufferBack.
    //

    if (bufferBackNumBits == 0)
        bufferBack = 0;
}

//
// Read the next few bits out of a bitstream. Will be given a backing buffer
// (buffer) that may still have data left over from previous reads
// (bufferNumBits).  Bitstream pointer (currByte) will be advanced when needed.
//

inline Int64 
FastHufDecoder::readBits
    (int numBits,
     Int64 &buffer,             // c
     int &bufferNumBits,        // lc
     const char *&currByte)     // in
{
    while (bufferNumBits < numBits)
    {
        buffer = (buffer << 8) | *(unsigned char*)(currByte++);
        bufferNumBits += 8;
    }

    bufferNumBits -= numBits;
    return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
}


//
// Decode using a the 'One-Shift' strategy for decoding, with a 
// small-ish table to accelerate decoding of short codes.
//
// If possible, try looking up codes into the acceleration table.
// This has a few benifits - there's no search involved; We don't
// need an additional lookup to map id to symbol; we don't need
// a full 64-bits (so less refilling). 
//

void
FastHufDecoder::decode
    (const unsigned char *src,
     int numSrcBits,
     unsigned short *dst, 
     int numDstElems)
{
    if (numSrcBits < 128)
        throw Iex::InputExc ("Error choosing Huffman decoder implementation "
                             "(insufficient number of bits).");

    //
    // Current position (byte/bit) in the src data stream
    // (after the first buffer fill)
    //

    const unsigned char *currByte = src + 2 * sizeof (Int64);

    numSrcBits -= 8 * 2 * sizeof (Int64);

    //
    // 64-bit buffer holding the current bits in the stream
    //

    Int64 buffer            = READ64 (src); 
    int   bufferNumBits     = 64;

    //
    // 64-bit buffer holding the next bits in the stream
    //

    Int64 bufferBack        = READ64 ((src + sizeof (Int64))); 
    int   bufferBackNumBits = 64;

    int dstIdx = 0;

    while (dstIdx < numDstElems)
    {
        int  codeLen;
        int  symbol;

        //
        // Test if we can be table accelerated. If so, directly
        // lookup the output symbol. Otherwise, we need to fall
        // back to searching for the code.
        //
        // If we're doing table lookups, we don't really need
        // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
        // left. But for a search, we do need a refilled table.
        //

        if (_tableMin <= buffer)
        {
            int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);

            // 
            // For invalid codes, _tableCodeLen[] should return 0. This
            // will cause the decoder to get stuck in the current spot
            // until we run out of elements, then barf that the codestream
            // is bad.  So we don't need to stick a condition like
            //     if (codeLen > _maxCodeLength) in this inner.
            //

            codeLen = _tableCodeLen[tableIdx];
            symbol  = _tableSymbol[tableIdx];
        }
        else
        {
            if (bufferNumBits < 64)
            {
                refill (buffer,
                        64 - bufferNumBits,
                        bufferBack,
                        bufferBackNumBits,
                        currByte,
                        numSrcBits);

                bufferNumBits = 64;
            }

            // 
            // Brute force search: 
            // Find the smallest length where _ljBase[length] <= buffer
            //

            codeLen = TABLE_LOOKUP_BITS + 1;

            while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
                codeLen++;

            if (codeLen > _maxCodeLength)
            {
                throw Iex::InputExc ("Huffman decode error "
                                     "(Decoded an invalid symbol).");
            }

            Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
            if (id < _numSymbols)
            {
                symbol = _idToSymbol[id];
            }
            else
            {
                throw Iex::InputExc ("Huffman decode error "
                                     "(Decoded an invalid symbol).");
            }
        }

        //
        // Shift over bit stream, and update the bit count in the buffer
        //

        buffer = buffer << codeLen;
        bufferNumBits -= codeLen;

        //
        // If we recieved a RLE symbol (_rleSymbol), then we need
        // to read ahead 8 bits to know how many times to repeat
        // the previous symbol. Need to ensure we at least have
        // 8 bits of data in the buffer
        //

        if (symbol == _rleSymbol)
        {
            if (bufferNumBits < 8)
            {
                refill (buffer,
                        64 - bufferNumBits,
                        bufferBack,
                        bufferBackNumBits,
                        currByte,
                        numSrcBits);

                bufferNumBits = 64;
            }

            int rleCount = buffer >> 56;

            if (dstIdx < 1)
            {
                throw Iex::InputExc ("Huffman decode error (RLE code "
                                     "with no previous symbol).");
            }

            if (dstIdx + rleCount > numDstElems)
            {
                throw Iex::InputExc ("Huffman decode error (Symbol run "
                                     "beyond expected output buffer length).");
            }

            if (rleCount <= 0) 
            {
                throw Iex::InputExc("Huffman decode error"
                                    " (Invalid RLE length)");
            }

            for (int i = 0; i < rleCount; ++i)
                dst[dstIdx + i] = dst[dstIdx - 1];

            dstIdx += rleCount;

            buffer = buffer << 8;
            bufferNumBits -= 8;
        }
        else
        {
            dst[dstIdx] = symbol;
            dstIdx++;
        }

        //
        // refill bit stream buffer if we're below the number of 
        // bits needed for a table lookup
        //

        if (bufferNumBits < TABLE_LOOKUP_BITS)
        {
            refill (buffer,
                    64 - bufferNumBits,
                    bufferBack,
                    bufferBackNumBits,
                    currByte,
                    numSrcBits);

            bufferNumBits = 64;
        }
    }

    if (numSrcBits != 0)
    {
        throw Iex::InputExc ("Huffman decode error (Compressed data remains "
                             "after filling expected output buffer).");
    }
}

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT

Coverage Report

Created: 2023-12-08 06:53

Line	Count	Source (jump to first uncovered line)
1		///////////////////////////////////////////////////////////////////////////
2		//
3		// Copyright (c) 2009-2014 DreamWorks Animation LLC.
4		//
5		// All rights reserved.
6		//
7		// Redistribution and use in source and binary forms, with or without
8		// modification, are permitted provided that the following conditions are
9		// met:
10		// * Redistributions of source code must retain the above copyright
11		// notice, this list of conditions and the following disclaimer.
12		// * Redistributions in binary form must reproduce the above
13		// copyright notice, this list of conditions and the following disclaimer
14		// in the documentation and/or other materials provided with the
15		// distribution.
16		// * Neither the name of DreamWorks Animation nor the names of
17		// its contributors may be used to endorse or promote products derived
18		// from this software without specific prior written permission.
19		//
20		// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21		// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22		// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23		// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24		// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25		// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26		// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27		// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28		// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29		// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30		// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31		//
32		///////////////////////////////////////////////////////////////////////////
33
34		#include "ImfFastHuf.h"
35		#include <Iex.h>
36
37		#include <string.h>
38		#include <assert.h>
39		#include <math.h>
40		#include <vector>
41
42		OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
43
44		//
45		// Adapted from hufUnpackEncTable -
46		// We don't need to reconstruct the code book, just the encoded
47		// lengths for each symbol. From the lengths, we can build the
48		// base + offset tables. This should be a bit more efficient
49		// for sparse code books.
50		//
51		// table - ptr to the start of the code length data. Will be
52		// updated as we decode data
53		//
54		// numBytes - size of the encoded table (I think)?
55		//
56		// minSymbol - smallest symbol in the code book
57		//
58		// maxSymbol - largest symbol in the code book.
59		//
60		// rleSymbol - the symbol to trigger RLE in the encoded bitstream
61		//
62
63		FastHufDecoder::FastHufDecoder
64		(const char *&table,
65		int numBytes,
66		int minSymbol,
67		int maxSymbol,
68		int rleSymbol)
69		:
70		_rleSymbol (rleSymbol),
71		_numSymbols (0),
72		_minCodeLength (255),
73		_maxCodeLength (0),
74		_idToSymbol (0)
75	0	{
76		//
77		// List of symbols that we find with non-zero code lengths
78		// (listed in the order we find them). Store these in the
79		// same format as the code book stores codes + lengths -
80		// low 6 bits are the length, everything above that is
81		// the symbol.
82		//
83
84	0	std::vector<Int64> symbols;
85
86		//
87		// The 'base' table is the minimum code at each code length. base[i]
88		// is the smallest code (numerically) of length i.
89		//
90
91	0	Int64 base[MAX_CODE_LEN + 1];
92
93		//
94		// The 'offset' table is the position (in sorted order) of the first id
95		// of a given code lenght. Array is indexed by code length, like base.
96		//
97
98	0	Int64 offset[MAX_CODE_LEN + 1];
99
100		//
101		// Count of how many codes at each length there are. Array is
102		// indexed by code length, like base and offset.
103		//
104
105	0	size_t codeCount[MAX_CODE_LEN + 1];
106
107	0	for (int i = 0; i <= MAX_CODE_LEN; ++i)
108	0	{
109	0	codeCount[i] = 0;
110	0	base[i] = 0xffffffffffffffffL;
111	0	offset[i] = 0;
112	0	}
113
114		//
115		// Count the number of codes, the min/max code lengths, the number of
116		// codes with each length, and record symbols with non-zero code
117		// length as we find them.
118		//
119
120	0	const char *currByte = table;
121	0	Int64 currBits = 0;
122	0	int currBitCount = 0;
123
124	0	const int SHORT_ZEROCODE_RUN = 59;
125	0	const int LONG_ZEROCODE_RUN = 63;
126	0	const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
127
128	0	for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++)
129	0	{
130	0	if (currByte - table > numBytes)
131	0	{
132	0	throw Iex::InputExc ("Error decoding Huffman table "
133	0	"(Truncated table data).");
134	0	}
135
136		//
137		// Next code length - either:
138		// 0-58 (literal code length)
139		// 59-62 (various lengths runs of 0)
140		// 63 (run of n 0's, with n is the next 8 bits)
141		//
142
143	0	Int64 codeLen = readBits (6, currBits, currBitCount, currByte);
144
145	0	if (codeLen == (Int64) LONG_ZEROCODE_RUN)
146	0	{
147	0	if (currByte - table > numBytes)
148	0	{
149	0	throw Iex::InputExc ("Error decoding Huffman table "
150	0	"(Truncated table data).");
151	0	}
152
153	0	int runLen = readBits (8, currBits, currBitCount, currByte) +
154	0	SHORTEST_LONG_RUN;
155
156	0	if (symbol + runLen > maxSymbol + 1)
157	0	{
158	0	throw Iex::InputExc ("Error decoding Huffman table "
159	0	"(Run beyond end of table).");
160	0	}
161
162	0	symbol += runLen - 1;
163
164	0	}
165	0	else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN)
166	0	{
167	0	int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;
168
169	0	if (symbol + runLen > maxSymbol + 1)
170	0	{
171	0	throw Iex::InputExc ("Error decoding Huffman table "
172	0	"(Run beyond end of table).");
173	0	}
174
175	0	symbol += runLen - 1;
176
177	0	}
178	0	else if (codeLen != 0)
179	0	{
180	0	symbols.push_back ((symbol << 6) \| (codeLen & 63));
181
182	0	if (codeLen < _minCodeLength)
183	0	_minCodeLength = codeLen;
184
185	0	if (codeLen > _maxCodeLength)
186	0	_maxCodeLength = codeLen;
187
188	0	codeCount[codeLen]++;
189	0	}
190	0	}
191
192	0	for (int i = 0; i < MAX_CODE_LEN; ++i)
193	0	_numSymbols += codeCount[i];
194
195	0	table = currByte;
196
197		//
198		// Compute base - once we have the code length counts, there
199		// is a closed form solution for this
200		//
201
202	0	{
203	0	double* countTmp = new double[_maxCodeLength+1];
204
205	0	for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
206	0	{
207	0	countTmp[l] = (double)codeCount[l] *
208	0	(double)(2 << (_maxCodeLength-l));
209	0	}
210
211	0	for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
212	0	{
213	0	double tmp = 0;
214
215	0	for (int k =l + 1; k <= _maxCodeLength; ++k)
216	0	tmp += countTmp[k];
217
218	0	tmp /= (double)(2 << (_maxCodeLength - l));
219
220	0	base[l] = (Int64)ceil (tmp);
221	0	}
222
223	0	delete [] countTmp;
224	0	}
225
226		//
227		// Compute offset - these are the positions of the first
228		// id (not symbol) that has length [i]
229		//
230
231	0	offset[_maxCodeLength] = 0;
232
233	0	for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
234	0	offset[i] = offset[i + 1] + codeCount[i + 1];
235
236		//
237		// Allocate and fill the symbol-to-id mapping. Smaller Ids should be
238		// mapped to less-frequent symbols (which have longer codes). Use
239		// the offset table to tell us where the id's for a given code
240		// length start off.
241		//
242
243	0	_idToSymbol = new int[_numSymbols];
244
245	0	Int64 mapping[MAX_CODE_LEN + 1];
246	0	for (int i = 0; i < MAX_CODE_LEN + 1; ++i)
247	0	mapping[i] = -1;
248	0	for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
249	0	mapping[i] = offset[i];
250
251	0	for (std::vector<Int64>::const_iterator i = symbols.begin();
252	0	i != symbols.end();
253	0	++i)
254	0	{
255	0	int codeLen = *i & 63;
256	0	int symbol = *i >> 6;
257
258	0	if (mapping[codeLen] >= _numSymbols)
259	0	throw Iex::InputExc ("Huffman decode error "
260	0	"(Invalid symbol in header).");
261
262	0	_idToSymbol[mapping[codeLen]] = symbol;
263	0	mapping[codeLen]++;
264	0	}
265
266	0	buildTables(base, offset);
267	0	}
268
269
270		FastHufDecoder::~FastHufDecoder()
271	0	{
272	0	delete[] _idToSymbol;
273	0	}
274
275
276		//
277		// Static check if the decoder is enabled.
278		//
279		// ATM, I only have access to little endian hardware for testing,
280		// so I'm not entirely sure that we are reading fom the bit stream
281		// properly on BE.
282		//
283		// If you happen to have more obscure hardware, check that the
284		// byte swapping in refill() is happening sensable, add an endian
285		// check if needed, and fix the preprocessor magic here.
286		//
287
288		#define READ64(c) \
289	0	((Int64)(c)[0] << 56) \| ((Int64)(c)[1] << 48) \| ((Int64)(c)[2] << 40) \| \
290	0	((Int64)(c)[3] << 32) \| ((Int64)(c)[4] << 24) \| ((Int64)(c)[5] << 16) \| \
291	0	((Int64)(c)[6] << 8) \| ((Int64)(c)[7] )
292
293		#ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
294		#if defined (__i386__) \|\| defined(__x86_64__)
295		#undef READ64
296		#define READ64(c) _bswap64 ((const Int64)(c))
297		#endif
298		#endif
299
300
301		bool
302		FastHufDecoder::enabled()
303	0	{
304	0	#if defined(__INTEL_COMPILER) \|\| defined(__GNUC__)
305
306		//
307		// Enabled for ICC, GCC:
308		// __i386__ -> x86
309		// __x86_64__ -> 64-bit x86
310		//
311
312	0	#if defined (__i386__) \|\| defined(__x86_64__)
313	0	return true;
314		#else
315		return false;
316		#endif
317
318		#elif defined (_MSC_VER)
319
320		//
321		// Enabled for Visual Studio:
322		// _M_IX86 -> x86
323		// _M_X64 -> 64bit x86
324
325		#if defined (_M_IX86) \|\| defined(_M_X64)
326		return true;
327		#else
328		return false;
329		#endif
330
331		#else
332
333		//
334		// Unknown compiler - Be safe and disable.
335		//
336		return false;
337		#endif
338	0	}
339
340		//
341		//
342		// Built the acceleration tables for lookups on the upper bits
343		// as well as the 'LJ' tables.
344		//
345
346		void
347		FastHufDecoder::buildTables (Int64 base, Int64 offset)
348	0	{
349		//
350		// Build the 'left justified' base table, by shifting base left..
351		//
352
353	0	for (int i = 0; i <= MAX_CODE_LEN; ++i)
354	0	{
355	0	if (base[i] != 0xffffffffffffffffL)
356	0	{
357	0	_ljBase[i] = base[i] << (64 - i);
358	0	}
359	0	else
360	0	{
361		//
362		// Unused code length - insert dummy values
363		//
364
365	0	_ljBase[i] = 0xffffffffffffffffL;
366	0	}
367	0	}
368
369		//
370		// Combine some terms into a big fat constant, which for
371		// lack of a better term we'll call the 'left justified'
372		// offset table (because it serves the same function
373		// as 'offset', when using the left justified base table.
374		//
375
376	0	for (int i = 0; i <= MAX_CODE_LEN; ++i)
377	0	_ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));
378
379		//
380		// Build the acceleration tables for the lookups of
381		// short codes ( <= TABLE_LOOKUP_BITS long)
382		//
383
384	0	for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
385	0	{
386	0	Int64 value = i << (64 - TABLE_LOOKUP_BITS);
387
388	0	_tableSymbol[i] = 0xffff;
389	0	_tableCodeLen[i] = 0;
390
391	0	for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
392	0	{
393	0	if (_ljBase[codeLen] <= value)
394	0	{
395	0	_tableCodeLen[i] = codeLen;
396
397	0	Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen));
398	0	if (id < _numSymbols)
399	0	{
400	0	_tableSymbol[i] = _idToSymbol[id];
401	0	}
402	0	else
403	0	{
404	0	throw Iex::InputExc ("Huffman decode error "
405	0	"(Overrun).");
406	0	}
407	0	break;
408	0	}
409	0	}
410	0	}
411
412		//
413		// Store the smallest value in the table that points to real data.
414		// This should be the entry for the largest length that has
415		// valid data (in our case, non-dummy _ljBase)
416		//
417
418	0	int minIdx = TABLE_LOOKUP_BITS;
419
420	0	while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffL)
421	0	minIdx--;
422
423	0	if (minIdx < 0)
424	0	{
425		//
426		// Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
427		// Set the min value such that the table is never tested.
428		//
429
430	0	_tableMin = 0xffffffffffffffffL;
431	0	}
432	0	else
433	0	{
434	0	_tableMin = _ljBase[minIdx];
435	0	}
436	0	}
437
438
439		//
440		// For decoding, we're holding onto 2 Int64's.
441		//
442		// The first (buffer), holds the next bits from the bitstream to be
443		// decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
444		// valid bits to decode the next symbol. For other paths, we need a full
445		// 64-bits to decode a symbol.
446		//
447		// When we need to refill 'buffer', we could pull bits straight from
448		// the bitstream. But this is very slow and requires lots of book keeping
449		// (what's the next bit in the next byte?). Instead, we keep another Int64
450		// around that we use to refill from. While this doesn't cut down on the
451		// book keeping (still need to know how many valid bits), it does cut
452		// down on some of the bit shifting crazy and byte access.
453		//
454		// The refill Int64 (bufferBack) gets left-shifted after we've pulled
455		// off bits. If we run out of bits in the input bit stream, we just
456		// shift in 0's to bufferBack.
457		//
458		// The refill act takes numBits from the top of bufferBack and sticks
459		// them in the bottom of buffer. If there arn't enough bits in bufferBack,
460		// it gets refilled (to 64-bits) from the input bitstream.
461		//
462
463		inline void
464		FastHufDecoder::refill
465		(Int64 &buffer,
466		int numBits, // number of bits to refill
467		Int64 &bufferBack, // the next 64-bits, to refill from
468		int &bufferBackNumBits, // number of bits left in bufferBack
469		const unsigned char *&currByte, // current byte in the bitstream
470		int &currBitsLeft) // number of bits left in the bitsream
471	0	{
472		//
473		// Refill bits into the bottom of buffer, from the top of bufferBack.
474		// Always top up buffer to be completely full.
475		//
476
477	0	buffer \|= bufferBack >> (64 - numBits);
478
479	0	if (bufferBackNumBits < numBits)
480	0	{
481	0	numBits -= bufferBackNumBits;
482
483		//
484		// Refill all of bufferBack from the bitstream. Either grab
485		// a full 64-bit chunk, or whatever bytes are left. If we
486		// don't have 64-bits left, pad with 0's.
487		//
488
489	0	if (currBitsLeft >= 64)
490	0	{
491	0	bufferBack = READ64 (currByte);
492	0	bufferBackNumBits = 64;
493	0	currByte += sizeof (Int64);
494	0	currBitsLeft -= 8 * sizeof (Int64);
495
496	0	}
497	0	else
498	0	{
499	0	bufferBack = 0;
500	0	bufferBackNumBits = 64;
501
502	0	Int64 shift = 56;
503
504	0	while (currBitsLeft > 0)
505	0	{
506	0	bufferBack \|= ((Int64)(*currByte)) << shift;
507
508	0	currByte++;
509	0	shift -= 8;
510	0	currBitsLeft -= 8;
511	0	}
512
513		//
514		// At this point, currBitsLeft might be negative, just because
515		// we're subtracting whole bytes. To keep anyone from freaking
516		// out, zero the counter.
517		//
518
519	0	if (currBitsLeft < 0)
520	0	currBitsLeft = 0;
521	0	}
522
523	0	buffer \|= bufferBack >> (64 - numBits);
524	0	}
525
526	0	bufferBack = bufferBack << numBits;
527	0	bufferBackNumBits -= numBits;
528
529		//
530		// We can have cases where the previous shift of bufferBack is << 64 -
531		// in which case no shift occurs. The bit count math still works though,
532		// so if we don't have any bits left, zero out bufferBack.
533		//
534
535	0	if (bufferBackNumBits == 0)
536	0	bufferBack = 0;
537	0	}
538
539		//
540		// Read the next few bits out of a bitstream. Will be given a backing buffer
541		// (buffer) that may still have data left over from previous reads
542		// (bufferNumBits). Bitstream pointer (currByte) will be advanced when needed.
543		//
544
545		inline Int64
546		FastHufDecoder::readBits
547		(int numBits,
548		Int64 &buffer, // c
549		int &bufferNumBits, // lc
550		const char *&currByte) // in
551	0	{
552	0	while (bufferNumBits < numBits)
553	0	{
554	0	buffer = (buffer << 8) \| (unsigned char)(currByte++);
555	0	bufferNumBits += 8;
556	0	}
557
558	0	bufferNumBits -= numBits;
559	0	return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
560	0	}
561
562
563		//
564		// Decode using a the 'One-Shift' strategy for decoding, with a
565		// small-ish table to accelerate decoding of short codes.
566		//
567		// If possible, try looking up codes into the acceleration table.
568		// This has a few benifits - there's no search involved; We don't
569		// need an additional lookup to map id to symbol; we don't need
570		// a full 64-bits (so less refilling).
571		//
572
573		void
574		FastHufDecoder::decode
575		(const unsigned char *src,
576		int numSrcBits,
577		unsigned short *dst,
578		int numDstElems)
579	0	{
580	0	if (numSrcBits < 128)
581	0	throw Iex::InputExc ("Error choosing Huffman decoder implementation "
582	0	"(insufficient number of bits).");
583
584		//
585		// Current position (byte/bit) in the src data stream
586		// (after the first buffer fill)
587		//
588
589	0	const unsigned char currByte = src + 2 sizeof (Int64);
590
591	0	numSrcBits -= 8 * 2 * sizeof (Int64);
592
593		//
594		// 64-bit buffer holding the current bits in the stream
595		//
596
597	0	Int64 buffer = READ64 (src);
598	0	int bufferNumBits = 64;
599
600		//
601		// 64-bit buffer holding the next bits in the stream
602		//
603
604	0	Int64 bufferBack = READ64 ((src + sizeof (Int64)));
605	0	int bufferBackNumBits = 64;
606
607	0	int dstIdx = 0;
608
609	0	while (dstIdx < numDstElems)
610	0	{
611	0	int codeLen;
612	0	int symbol;
613
614		//
615		// Test if we can be table accelerated. If so, directly
616		// lookup the output symbol. Otherwise, we need to fall
617		// back to searching for the code.
618		//
619		// If we're doing table lookups, we don't really need
620		// a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
621		// left. But for a search, we do need a refilled table.
622		//
623
624	0	if (_tableMin <= buffer)
625	0	{
626	0	int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);
627
628		//
629		// For invalid codes, _tableCodeLen[] should return 0. This
630		// will cause the decoder to get stuck in the current spot
631		// until we run out of elements, then barf that the codestream
632		// is bad. So we don't need to stick a condition like
633		// if (codeLen > _maxCodeLength) in this inner.
634		//
635
636	0	codeLen = _tableCodeLen[tableIdx];
637	0	symbol = _tableSymbol[tableIdx];
638	0	}
639	0	else
640	0	{
641	0	if (bufferNumBits < 64)
642	0	{
643	0	refill (buffer,
644	0	64 - bufferNumBits,
645	0	bufferBack,
646	0	bufferBackNumBits,
647	0	currByte,
648	0	numSrcBits);
649
650	0	bufferNumBits = 64;
651	0	}
652
653		//
654		// Brute force search:
655		// Find the smallest length where _ljBase[length] <= buffer
656		//
657
658	0	codeLen = TABLE_LOOKUP_BITS + 1;
659
660	0	while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
661	0	codeLen++;
662
663	0	if (codeLen > _maxCodeLength)
664	0	{
665	0	throw Iex::InputExc ("Huffman decode error "
666	0	"(Decoded an invalid symbol).");
667	0	}
668
669	0	Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
670	0	if (id < _numSymbols)
671	0	{
672	0	symbol = _idToSymbol[id];
673	0	}
674	0	else
675	0	{
676	0	throw Iex::InputExc ("Huffman decode error "
677	0	"(Decoded an invalid symbol).");
678	0	}
679	0	}
680
681		//
682		// Shift over bit stream, and update the bit count in the buffer
683		//
684
685	0	buffer = buffer << codeLen;
686	0	bufferNumBits -= codeLen;
687
688		//
689		// If we recieved a RLE symbol (_rleSymbol), then we need
690		// to read ahead 8 bits to know how many times to repeat
691		// the previous symbol. Need to ensure we at least have
692		// 8 bits of data in the buffer
693		//
694
695	0	if (symbol == _rleSymbol)
696	0	{
697	0	if (bufferNumBits < 8)
698	0	{
699	0	refill (buffer,
700	0	64 - bufferNumBits,
701	0	bufferBack,
702	0	bufferBackNumBits,
703	0	currByte,
704	0	numSrcBits);
705
706	0	bufferNumBits = 64;
707	0	}
708
709	0	int rleCount = buffer >> 56;
710
711	0	if (dstIdx < 1)
712	0	{
713	0	throw Iex::InputExc ("Huffman decode error (RLE code "
714	0	"with no previous symbol).");
715	0	}
716
717	0	if (dstIdx + rleCount > numDstElems)
718	0	{
719	0	throw Iex::InputExc ("Huffman decode error (Symbol run "
720	0	"beyond expected output buffer length).");
721	0	}
722
723	0	if (rleCount <= 0)
724	0	{
725	0	throw Iex::InputExc("Huffman decode error"
726	0	" (Invalid RLE length)");
727	0	}
728
729	0	for (int i = 0; i < rleCount; ++i)
730	0	dst[dstIdx + i] = dst[dstIdx - 1];
731
732	0	dstIdx += rleCount;
733
734	0	buffer = buffer << 8;
735	0	bufferNumBits -= 8;
736	0	}
737	0	else
738	0	{
739	0	dst[dstIdx] = symbol;
740	0	dstIdx++;
741	0	}
742
743		//
744		// refill bit stream buffer if we're below the number of
745		// bits needed for a table lookup
746		//
747
748	0	if (bufferNumBits < TABLE_LOOKUP_BITS)
749	0	{
750	0	refill (buffer,
751	0	64 - bufferNumBits,
752	0	bufferBack,
753	0	bufferBackNumBits,
754	0	currByte,
755	0	numSrcBits);
756
757	0	bufferNumBits = 64;
758	0	}
759	0	}
760
761	0	if (numSrcBits != 0)
762	0	{
763	0	throw Iex::InputExc ("Huffman decode error (Compressed data remains "
764	0	"after filling expected output buffer).");
765	0	}
766	0	}
767
768		OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT