/src/gdal/third_party/LercLib/Huffman.cpp

Source (jump to first uncovered line)
/*
Copyright 2015 Esri

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

A local copy of the license and additional notices are located with the
source distribution at:

http://github.com/Esri/lerc/

Contributors:  Thomas Maurer
*/

#include <algorithm>
#include <queue>
#include "Defines.h"
#include "Huffman.h"
#include "BitStuffer2.h"

using namespace std;
USING_NAMESPACE_LERC

// -------------------------------------------------------------------------- ;

bool Huffman::ComputeCodes(const vector<int>& histo)
{
  if (histo.empty() || histo.size() >= m_maxHistoSize)
    return false;

  priority_queue<Node, vector<Node>, less<Node> > pq;

  int numNodes = 0;

  int size = (int)histo.size();
  for (int i = 0; i < size; i++)    // add all leaf nodes
    if (histo[i] > 0)
      pq.push(Node((short)i, histo[i]));

  if (pq.size() < 2)    // histo has only 0 or 1 bin that is not empty; quit Huffman and give it to Lerc
    return false;

  while (pq.size() > 1)    // build the Huffman tree
  {
    Node* child0 = new Node(pq.top());
    numNodes++;
    pq.pop();
    Node* child1 = new Node(pq.top());
    numNodes++;
    pq.pop();
    pq.push(Node(child0, child1));
  }

  m_codeTable.resize(size);
  std::fill(m_codeTable.begin(), m_codeTable.end(),
    std::pair<unsigned short, unsigned int>((short)0, 0));

  if (!pq.top().TreeToLUT(0, 0, m_codeTable))    // fill the LUT
    return false;

  //pq.top().FreeTree(numNodes);    // Linux compiler complains
  Node nodeNonConst = pq.top();
  nodeNonConst.FreeTree(numNodes);    // free all the nodes

  if (numNodes != 0)    // check the ref count
    return false;

  if (!ConvertCodesToCanonical())
    return false;

  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const
{
  if (histo.empty() || histo.size() >= m_maxHistoSize)
    return false;

  numBytes = 0;
  if (!ComputeNumBytesCodeTable(numBytes))    // header and code table
    return false;

  int numBits = 0, numElem = 0;
  int size = (int)histo.size();
  for (int i = 0; i < size; i++)
    if (histo[i] > 0)
    {
      numBits += histo[i] * m_codeTable[i].first;
      numElem += histo[i];
    }

  if (numElem == 0)
    return false;

  int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1;    // add one more as the decode LUT can read ahead
  numBytes += 4 * numUInts;    // data huffman coded
  avgBpp = 8 * numBytes / (double)numElem;

  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::SetCodes(const vector<pair<unsigned short, unsigned int> >& codeTable)
{
  if (codeTable.empty() || codeTable.size() >= m_maxHistoSize)
    return false;

  m_codeTable = codeTable;
  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::WriteCodeTable(Byte** ppByte, int lerc2Version) const
{
  if (!ppByte)
    return false;

  int i0, i1, maxLen;
  if (!GetRange(i0, i1, maxLen))
    return false;

  int size = (int)m_codeTable.size();
  vector<unsigned int> dataVec(i1 - i0, 0);

  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    dataVec[i - i0] = m_codeTable[k].first;
  }

  // header
  vector<int> intVec;
  intVec.push_back(4);    // huffman version; 4 guarantees canonical codes
  intVec.push_back(size);
  intVec.push_back(i0);   // code range
  intVec.push_back(i1);

  Byte* ptr = *ppByte;

  size_t len = intVec.size() * sizeof(int);
  memcpy(ptr, &intVec[0], len);
  ptr += len;

  BitStuffer2 bitStuffer2;
  if (!bitStuffer2.EncodeSimple(&ptr, dataVec, lerc2Version))    // code lengths, bit stuffed
    return false;

  if (!BitStuffCodes(&ptr, i0, i1))    // variable length codes, bit stuffed
    return false;

  *ppByte = ptr;
  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::ReadCodeTable(const Byte** ppByte, size_t& nBytesRemainingInOut, int lerc2Version)
{
  if (!ppByte || !(*ppByte))
    return false;

  const Byte* ptr = *ppByte;
  size_t nBytesRemaining = nBytesRemainingInOut;

  vector<int> intVec(4, 0);
  size_t len = intVec.size() * sizeof(int);

  if (nBytesRemaining < len)
    return false;

  memcpy(&intVec[0], ptr, len);
  ptr += len;
  nBytesRemaining -= len;

  int version = intVec[0];

  if (version < 2)    // allow forward compatibility; for updates that break old decoders increase Lerc2 version number;
    return false;

  const int size = intVec[1];
  const int i0 = intVec[2];
  const int i1 = intVec[3];

  if (i0 >= i1 || i0 < 0 || size < 0 || size > (int)m_maxHistoSize)
    return false;

  if (GetIndexWrapAround(i0, size) >= size || GetIndexWrapAround(i1 - 1, size) >= size)
    return false;

  try
  {
    vector<unsigned int> dataVec(i1 - i0, 0);
    BitStuffer2 bitStuffer2;
    if (!bitStuffer2.Decode(&ptr, nBytesRemaining, dataVec, dataVec.size(), lerc2Version))    // unstuff the code lengths
      return false;

    if (dataVec.size() != static_cast<size_t>(i1 - i0))
      return false;

    m_codeTable.resize(size);
    std::fill(m_codeTable.begin(), m_codeTable.end(),
      std::pair<unsigned short, unsigned int>((short)0, 0));

    for (int i = i0; i < i1; i++)
    {
      int k = GetIndexWrapAround(i, size);
      m_codeTable[k].first = (unsigned short)dataVec[i - i0];
    }

    if (!BitUnStuffCodes(&ptr, nBytesRemaining, i0, i1))    // unstuff the codes
      return false;

    *ppByte = ptr;
    nBytesRemainingInOut = nBytesRemaining;
    return true;
  }
  catch (std::exception&)
  {
    return false;
  }
}

// -------------------------------------------------------------------------- ;

bool Huffman::BuildTreeFromCodes(int& numBitsLUT)
{
  int i0 = 0, i1 = 0, maxLen = 0;
  if (!GetRange(i0, i1, maxLen))
    return false;

  // build decode LUT using max of 12 bits
  int size = (int)m_codeTable.size();
  int minNumZeroBits = 32;

  bool bNeedTree = maxLen > m_maxNumBitsLUT;
  numBitsLUT = min(maxLen, m_maxNumBitsLUT);

  int sizeLUT = 1 << numBitsLUT;

  m_decodeLUT.clear();
  m_decodeLUT.assign((size_t)sizeLUT, pair<short, short>((short)-1, (short)-1));

  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    int len = m_codeTable[k].first;

    if (len == 0)
      continue;

    unsigned int code = m_codeTable[k].second;

    if (len <= numBitsLUT)
    {
      code <<= (numBitsLUT - len);
      unsigned int numEntries = 1 << (numBitsLUT - len);

      for (unsigned int j = 0; j < numEntries; j++)
      {
        auto& entry = m_decodeLUT[code | j];
        entry.first = (short)len;    // add the duplicates
        entry.second = (short)k;    // add the duplicates
      }
    }
    else    // for the codes too long for the LUT, count how many leading bits are 0
    {
      int shift = 1;
      while (code >>= 1) shift++;    // large canonical codes start with zero's
      minNumZeroBits = min(minNumZeroBits, len - shift);
    }
  }

  m_numBitsToSkipInTree = bNeedTree? minNumZeroBits : 0;

  if (!bNeedTree)    // decode LUT covers it all, no tree needed
    return true;

  //m_numBitsToSkipInTree = 0;    // to disable skipping the 0 bits

  ClearTree();  // if there

  Node emptyNode((short)-1, 0);
  m_root = new Node(emptyNode);

  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    int len = m_codeTable[k].first;

    if (len > 0 && len > numBitsLUT)    // add only codes not in the decode LUT
    {
      unsigned int code = m_codeTable[k].second;
      Node* node = m_root;
      int j = len - m_numBitsToSkipInTree;    // reduce len by number of leading 0 bits from above

      while (--j >= 0)    // go over the bits
      {
        if (code & (1 << j))
        {
          if (!node->child1)
            node->child1 = new Node(emptyNode);

          node = node->child1;
        }
        else
        {
          if (!node->child0)
            node->child0 = new Node(emptyNode);

          node = node->child0;
        }

        if (j == 0)    // last bit, leaf node
          node->value = (short)k;    // set the value
      }
    }
  }

  return true;
}

// -------------------------------------------------------------------------- ;

void Huffman::Clear()
{
  m_codeTable.clear();
  m_decodeLUT.clear();
  ClearTree();
}

// -------------------------------------------------------------------------- ;

void Huffman::ClearTree()
{
  if (m_root)
  {
    int n = 0;
    m_root->FreeTree(n);
    delete m_root;
    m_root = nullptr;
  }
}

// -------------------------------------------------------------------------- ;
// -------------------------------------------------------------------------- ;

bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const
{
  int i0, i1, maxLen;
  if (!GetRange(i0, i1, maxLen))
    return false;

  int size = (int)m_codeTable.size();
  int sum = 0;
  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    sum += m_codeTable[k].first;
  }

  numBytes = 4 * sizeof(int);    // version, size, first bin, (last + 1) bin

  BitStuffer2 bitStuffer2;
  numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen);    // code lengths
  int numUInts = (((sum + 7) >> 3) + 3) >> 2;
  numBytes += 4 * numUInts;    // byte array with the codes bit stuffed

  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const
{
  if (m_codeTable.empty() || m_codeTable.size() >= m_maxHistoSize)
    return false;

  // first, check for peak somewhere in the middle with 0 stretches left and right
  int size = (int)m_codeTable.size();
  {
    int i = 0;
    while (i < size && m_codeTable[i].first == 0) i++;
    i0 = i;
    i = size - 1;
    while (i >= 0 && m_codeTable[i].first == 0) i--;
    i1 = i + 1;    // exclusive
  }

  if (i1 <= i0)
    return false;

  // second, cover the common case that the peak is close to 0
  pair<int, int> segm(0, 0);
  int j = 0;
  while (j < size)    // find the largest stretch of 0's, if any
  {
    while (j < size && m_codeTable[j].first > 0) j++;
    int k0 = j;
    while (j < size && m_codeTable[j].first == 0) j++;
    int k1 = j;

    if (k1 - k0 > segm.second)
      segm = pair<int, int>(k0, k1 - k0);
  }

  if (size - segm.second < i1 - i0)
  {
    i0 = segm.first + segm.second;
    i1 = segm.first + size;    // do wrap around
  }

  if (i1 <= i0)
    return false;

  int maxLen = 0;
  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    int len = m_codeTable[k].first;
    maxLen = max(maxLen, len);
  }

  if (maxLen <= 0 || maxLen > 32)
    return false;

  maxCodeLength = maxLen;
  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const
{
  if (!ppByte)
    return false;

  unsigned int* arr = (unsigned int*)(*ppByte);
  unsigned int* dstPtr = arr;
  int size = (int)m_codeTable.size();
  int bitPos = 0;

  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    int len = m_codeTable[k].first;
    if (len > 0)
    {
      unsigned int val = m_codeTable[k].second;
      if (32 - bitPos >= len)
      {
        if (bitPos == 0)
          *dstPtr = 0;

        *dstPtr |= val << (32 - bitPos - len);
        bitPos += len;
        if (bitPos == 32)
        {
          bitPos = 0;
          dstPtr++;
        }
      }
      else
      {
        bitPos += len - 32;
        *dstPtr++ |= val >> bitPos;    // bitPos > 0
        *dstPtr = val << (32 - bitPos);
      }
    }
  }

  size_t numUInts = dstPtr - arr + (bitPos > 0 ? 1 : 0);
  *ppByte += numUInts * sizeof(unsigned int);
  return true;
}

// -------------------------------------------------------------------------- ;

bool Huffman::BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemainingInOut, int i0, int i1)
{
  if (!ppByte || !(*ppByte))
    return false;

  size_t nBytesRemaining = nBytesRemainingInOut;

  const unsigned int* arr = (const unsigned int*)(*ppByte);
  const unsigned int* srcPtr = arr;
  const size_t sizeUInt = sizeof(*srcPtr);

  int size = (int)m_codeTable.size();
  int bitPos = 0;

  for (int i = i0; i < i1; i++)
  {
    int k = GetIndexWrapAround(i, size);
    int len = m_codeTable[k].first;
    if (len > 0)
    {
      if (nBytesRemaining < sizeUInt || len > 32)
        return false;

      m_codeTable[k].second = ((*srcPtr) << bitPos) >> (32 - len);

      if (32 - bitPos >= len)
      {
        bitPos += len;
        if (bitPos == 32)
        {
          bitPos = 0;
          srcPtr++;
          nBytesRemaining -= sizeUInt;
        }
      }
      else
      {
        bitPos += len - 32;
        srcPtr++;
        nBytesRemaining -= sizeUInt;

        if (nBytesRemaining < sizeUInt)
          return false;

        m_codeTable[k].second |= (*srcPtr) >> (32 - bitPos);    // bitPos > 0
      }
    }
  }

  size_t numUInts = srcPtr - arr + (bitPos > 0 ? 1 : 0);
  size_t len = numUInts * sizeUInt;

  if (nBytesRemainingInOut < len)
    return false;

  *ppByte += len;
  nBytesRemainingInOut -= len;

  if (nBytesRemaining != nBytesRemainingInOut && nBytesRemaining != nBytesRemainingInOut + sizeUInt)    // the real check
    return false;

  return true;
}

// -------------------------------------------------------------------------- ;

//struct MyLargerThanOp
//{
//  inline bool operator() (const pair<int, unsigned int>& p0,
//                          const pair<int, unsigned int>& p1)  { return p0.first > p1.first; }
//};

// -------------------------------------------------------------------------- ;

bool Huffman::ConvertCodesToCanonical()
{
  // from the non canonical code book, create an array to be sorted in descending order:
  //   codeLength * tableSize - index

  unsigned int tableSize = (unsigned int)m_codeTable.size();
  if (tableSize == 0)
    return true;
  vector<pair<int, unsigned int> > sortVec(tableSize, pair<int, unsigned int>(0, 0));
  //memset(&sortVec[0], 0, tableSize * sizeof(pair<int, unsigned int>));

  for (unsigned int i = 0; i < tableSize; i++)
    if (m_codeTable[i].first > 0)
      sortVec[i] = pair<int, unsigned int>(m_codeTable[i].first * tableSize - i, i);

  // sort descending
  //std::sort(sortVec.begin(), sortVec.end(), MyLargerThanOp());

  std::sort(sortVec.begin(), sortVec.end(),
    [](const pair<int, unsigned int>& p0,
       const pair<int, unsigned int>& p1) { return p0.first > p1.first; });

  // create canonical codes and assign to orig code table
  unsigned int index = sortVec[0].second;
  unsigned short codeLen = m_codeTable[index].first;    // max code length for this table
  unsigned int i = 0, codeCanonical = 0;

  while (i < tableSize && sortVec[i].first > 0)
  {
    index = sortVec[i++].second;
    short delta = codeLen - m_codeTable[index].first;  // difference of 2 consecutive code lengths, >= 0 as sorted
    codeCanonical >>= delta;
    codeLen -= delta;
    m_codeTable[index].second = codeCanonical++;
  }

  return true;
}

// -------------------------------------------------------------------------- ;

Coverage Report

Created: 2025-06-13 06:29

Line	Count	Source (jump to first uncovered line)
1		/*
2		Copyright 2015 Esri
3
4		Licensed under the Apache License, Version 2.0 (the "License");
5		you may not use this file except in compliance with the License.
6		You may obtain a copy of the License at
7
8		http://www.apache.org/licenses/LICENSE-2.0
9
10		Unless required by applicable law or agreed to in writing, software
11		distributed under the License is distributed on an "AS IS" BASIS,
12		WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		See the License for the specific language governing permissions and
14		limitations under the License.
15
16		A local copy of the license and additional notices are located with the
17		source distribution at:
18
19		http://github.com/Esri/lerc/
20
21		Contributors: Thomas Maurer
22		*/
23
24		#include <algorithm>
25		#include <queue>
26		#include "Defines.h"
27		#include "Huffman.h"
28		#include "BitStuffer2.h"
29
30		using namespace std;
31		USING_NAMESPACE_LERC
32
33		// -------------------------------------------------------------------------- ;
34
35		bool Huffman::ComputeCodes(const vector<int>& histo)
36	0	{
37	0	if (histo.empty() \|\| histo.size() >= m_maxHistoSize)
38	0	return false;
39
40	0	priority_queue<Node, vector<Node>, less<Node> > pq;
41
42	0	int numNodes = 0;
43
44	0	int size = (int)histo.size();
45	0	for (int i = 0; i < size; i++) // add all leaf nodes
46	0	if (histo[i] > 0)
47	0	pq.push(Node((short)i, histo[i]));
48
49	0	if (pq.size() < 2) // histo has only 0 or 1 bin that is not empty; quit Huffman and give it to Lerc
50	0	return false;
51
52	0	while (pq.size() > 1) // build the Huffman tree
53	0	{
54	0	Node* child0 = new Node(pq.top());
55	0	numNodes++;
56	0	pq.pop();
57	0	Node* child1 = new Node(pq.top());
58	0	numNodes++;
59	0	pq.pop();
60	0	pq.push(Node(child0, child1));
61	0	}
62
63	0	m_codeTable.resize(size);
64	0	std::fill(m_codeTable.begin(), m_codeTable.end(),
65	0	std::pair<unsigned short, unsigned int>((short)0, 0));
66
67	0	if (!pq.top().TreeToLUT(0, 0, m_codeTable)) // fill the LUT
68	0	return false;
69
70		//pq.top().FreeTree(numNodes); // Linux compiler complains
71	0	Node nodeNonConst = pq.top();
72	0	nodeNonConst.FreeTree(numNodes); // free all the nodes
73
74	0	if (numNodes != 0) // check the ref count
75	0	return false;
76
77	0	if (!ConvertCodesToCanonical())
78	0	return false;
79
80	0	return true;
81	0	}
82
83		// -------------------------------------------------------------------------- ;
84
85		bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const
86	0	{
87	0	if (histo.empty() \|\| histo.size() >= m_maxHistoSize)
88	0	return false;
89
90	0	numBytes = 0;
91	0	if (!ComputeNumBytesCodeTable(numBytes)) // header and code table
92	0	return false;
93
94	0	int numBits = 0, numElem = 0;
95	0	int size = (int)histo.size();
96	0	for (int i = 0; i < size; i++)
97	0	if (histo[i] > 0)
98	0	{
99	0	numBits += histo[i] * m_codeTable[i].first;
100	0	numElem += histo[i];
101	0	}
102
103	0	if (numElem == 0)
104	0	return false;
105
106	0	int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1; // add one more as the decode LUT can read ahead
107	0	numBytes += 4 * numUInts; // data huffman coded
108	0	avgBpp = 8 * numBytes / (double)numElem;
109
110	0	return true;
111	0	}
112
113		// -------------------------------------------------------------------------- ;
114
115		bool Huffman::SetCodes(const vector<pair<unsigned short, unsigned int> >& codeTable)
116	0	{
117	0	if (codeTable.empty() \|\| codeTable.size() >= m_maxHistoSize)
118	0	return false;
119
120	0	m_codeTable = codeTable;
121	0	return true;
122	0	}
123
124		// -------------------------------------------------------------------------- ;
125
126		bool Huffman::WriteCodeTable(Byte** ppByte, int lerc2Version) const
127	0	{
128	0	if (!ppByte)
129	0	return false;
130
131	0	int i0, i1, maxLen;
132	0	if (!GetRange(i0, i1, maxLen))
133	0	return false;
134
135	0	int size = (int)m_codeTable.size();
136	0	vector<unsigned int> dataVec(i1 - i0, 0);
137
138	0	for (int i = i0; i < i1; i++)
139	0	{
140	0	int k = GetIndexWrapAround(i, size);
141	0	dataVec[i - i0] = m_codeTable[k].first;
142	0	}
143
144		// header
145	0	vector<int> intVec;
146	0	intVec.push_back(4); // huffman version; 4 guarantees canonical codes
147	0	intVec.push_back(size);
148	0	intVec.push_back(i0); // code range
149	0	intVec.push_back(i1);
150
151	0	Byte* ptr = *ppByte;
152
153	0	size_t len = intVec.size() * sizeof(int);
154	0	memcpy(ptr, &intVec[0], len);
155	0	ptr += len;
156
157	0	BitStuffer2 bitStuffer2;
158	0	if (!bitStuffer2.EncodeSimple(&ptr, dataVec, lerc2Version)) // code lengths, bit stuffed
159	0	return false;
160
161	0	if (!BitStuffCodes(&ptr, i0, i1)) // variable length codes, bit stuffed
162	0	return false;
163
164	0	*ppByte = ptr;
165	0	return true;
166	0	}
167
168		// -------------------------------------------------------------------------- ;
169
170		bool Huffman::ReadCodeTable(const Byte** ppByte, size_t& nBytesRemainingInOut, int lerc2Version)
171	0	{
172	0	if (!ppByte \|\| !(*ppByte))
173	0	return false;
174
175	0	const Byte* ptr = *ppByte;
176	0	size_t nBytesRemaining = nBytesRemainingInOut;
177
178	0	vector<int> intVec(4, 0);
179	0	size_t len = intVec.size() * sizeof(int);
180
181	0	if (nBytesRemaining < len)
182	0	return false;
183
184	0	memcpy(&intVec[0], ptr, len);
185	0	ptr += len;
186	0	nBytesRemaining -= len;
187
188	0	int version = intVec[0];
189
190	0	if (version < 2) // allow forward compatibility; for updates that break old decoders increase Lerc2 version number;
191	0	return false;
192
193	0	const int size = intVec[1];
194	0	const int i0 = intVec[2];
195	0	const int i1 = intVec[3];
196
197	0	if (i0 >= i1 \|\| i0 < 0 \|\| size < 0 \|\| size > (int)m_maxHistoSize)
198	0	return false;
199
200	0	if (GetIndexWrapAround(i0, size) >= size \|\| GetIndexWrapAround(i1 - 1, size) >= size)
201	0	return false;
202
203	0	try
204	0	{
205	0	vector<unsigned int> dataVec(i1 - i0, 0);
206	0	BitStuffer2 bitStuffer2;
207	0	if (!bitStuffer2.Decode(&ptr, nBytesRemaining, dataVec, dataVec.size(), lerc2Version)) // unstuff the code lengths
208	0	return false;
209
210	0	if (dataVec.size() != static_cast<size_t>(i1 - i0))
211	0	return false;
212
213	0	m_codeTable.resize(size);
214	0	std::fill(m_codeTable.begin(), m_codeTable.end(),
215	0	std::pair<unsigned short, unsigned int>((short)0, 0));
216
217	0	for (int i = i0; i < i1; i++)
218	0	{
219	0	int k = GetIndexWrapAround(i, size);
220	0	m_codeTable[k].first = (unsigned short)dataVec[i - i0];
221	0	}
222
223	0	if (!BitUnStuffCodes(&ptr, nBytesRemaining, i0, i1)) // unstuff the codes
224	0	return false;
225
226	0	*ppByte = ptr;
227	0	nBytesRemainingInOut = nBytesRemaining;
228	0	return true;
229	0	}
230	0	catch (std::exception&)
231	0	{
232	0	return false;
233	0	}
234	0	}
235
236		// -------------------------------------------------------------------------- ;
237
238		bool Huffman::BuildTreeFromCodes(int& numBitsLUT)
239	0	{
240	0	int i0 = 0, i1 = 0, maxLen = 0;
241	0	if (!GetRange(i0, i1, maxLen))
242	0	return false;
243
244		// build decode LUT using max of 12 bits
245	0	int size = (int)m_codeTable.size();
246	0	int minNumZeroBits = 32;
247
248	0	bool bNeedTree = maxLen > m_maxNumBitsLUT;
249	0	numBitsLUT = min(maxLen, m_maxNumBitsLUT);
250
251	0	int sizeLUT = 1 << numBitsLUT;
252
253	0	m_decodeLUT.clear();
254	0	m_decodeLUT.assign((size_t)sizeLUT, pair<short, short>((short)-1, (short)-1));
255
256	0	for (int i = i0; i < i1; i++)
257	0	{
258	0	int k = GetIndexWrapAround(i, size);
259	0	int len = m_codeTable[k].first;
260
261	0	if (len == 0)
262	0	continue;
263
264	0	unsigned int code = m_codeTable[k].second;
265
266	0	if (len <= numBitsLUT)
267	0	{
268	0	code <<= (numBitsLUT - len);
269	0	unsigned int numEntries = 1 << (numBitsLUT - len);
270
271	0	for (unsigned int j = 0; j < numEntries; j++)
272	0	{
273	0	auto& entry = m_decodeLUT[code \| j];
274	0	entry.first = (short)len; // add the duplicates
275	0	entry.second = (short)k; // add the duplicates
276	0	}
277	0	}
278	0	else // for the codes too long for the LUT, count how many leading bits are 0
279	0	{
280	0	int shift = 1;
281	0	while (code >>= 1) shift++; // large canonical codes start with zero's
282	0	minNumZeroBits = min(minNumZeroBits, len - shift);
283	0	}
284	0	}
285
286	0	m_numBitsToSkipInTree = bNeedTree? minNumZeroBits : 0;
287
288	0	if (!bNeedTree) // decode LUT covers it all, no tree needed
289	0	return true;
290
291		//m_numBitsToSkipInTree = 0; // to disable skipping the 0 bits
292
293	0	ClearTree(); // if there
294
295	0	Node emptyNode((short)-1, 0);
296	0	m_root = new Node(emptyNode);
297
298	0	for (int i = i0; i < i1; i++)
299	0	{
300	0	int k = GetIndexWrapAround(i, size);
301	0	int len = m_codeTable[k].first;
302
303	0	if (len > 0 && len > numBitsLUT) // add only codes not in the decode LUT
304	0	{
305	0	unsigned int code = m_codeTable[k].second;
306	0	Node* node = m_root;
307	0	int j = len - m_numBitsToSkipInTree; // reduce len by number of leading 0 bits from above
308
309	0	while (--j >= 0) // go over the bits
310	0	{
311	0	if (code & (1 << j))
312	0	{
313	0	if (!node->child1)
314	0	node->child1 = new Node(emptyNode);
315
316	0	node = node->child1;
317	0	}
318	0	else
319	0	{
320	0	if (!node->child0)
321	0	node->child0 = new Node(emptyNode);
322
323	0	node = node->child0;
324	0	}
325
326	0	if (j == 0) // last bit, leaf node
327	0	node->value = (short)k; // set the value
328	0	}
329	0	}
330	0	}
331
332	0	return true;
333	0	}
334
335		// -------------------------------------------------------------------------- ;
336
337		void Huffman::Clear()
338	0	{
339	0	m_codeTable.clear();
340	0	m_decodeLUT.clear();
341	0	ClearTree();
342	0	}
343
344		// -------------------------------------------------------------------------- ;
345
346		void Huffman::ClearTree()
347	0	{
348	0	if (m_root)
349	0	{
350	0	int n = 0;
351	0	m_root->FreeTree(n);
352	0	delete m_root;
353	0	m_root = nullptr;
354	0	}
355	0	}
356
357		// -------------------------------------------------------------------------- ;
358		// -------------------------------------------------------------------------- ;
359
360		bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const
361	0	{
362	0	int i0, i1, maxLen;
363	0	if (!GetRange(i0, i1, maxLen))
364	0	return false;
365
366	0	int size = (int)m_codeTable.size();
367	0	int sum = 0;
368	0	for (int i = i0; i < i1; i++)
369	0	{
370	0	int k = GetIndexWrapAround(i, size);
371	0	sum += m_codeTable[k].first;
372	0	}
373
374	0	numBytes = 4 * sizeof(int); // version, size, first bin, (last + 1) bin
375
376	0	BitStuffer2 bitStuffer2;
377	0	numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen); // code lengths
378	0	int numUInts = (((sum + 7) >> 3) + 3) >> 2;
379	0	numBytes += 4 * numUInts; // byte array with the codes bit stuffed
380
381	0	return true;
382	0	}
383
384		// -------------------------------------------------------------------------- ;
385
386		bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const
387	0	{
388	0	if (m_codeTable.empty() \|\| m_codeTable.size() >= m_maxHistoSize)
389	0	return false;
390
391		// first, check for peak somewhere in the middle with 0 stretches left and right
392	0	int size = (int)m_codeTable.size();
393	0	{
394	0	int i = 0;
395	0	while (i < size && m_codeTable[i].first == 0) i++;
396	0	i0 = i;
397	0	i = size - 1;
398	0	while (i >= 0 && m_codeTable[i].first == 0) i--;
399	0	i1 = i + 1; // exclusive
400	0	}
401
402	0	if (i1 <= i0)
403	0	return false;
404
405		// second, cover the common case that the peak is close to 0
406	0	pair<int, int> segm(0, 0);
407	0	int j = 0;
408	0	while (j < size) // find the largest stretch of 0's, if any
409	0	{
410	0	while (j < size && m_codeTable[j].first > 0) j++;
411	0	int k0 = j;
412	0	while (j < size && m_codeTable[j].first == 0) j++;
413	0	int k1 = j;
414
415	0	if (k1 - k0 > segm.second)
416	0	segm = pair<int, int>(k0, k1 - k0);
417	0	}
418
419	0	if (size - segm.second < i1 - i0)
420	0	{
421	0	i0 = segm.first + segm.second;
422	0	i1 = segm.first + size; // do wrap around
423	0	}
424
425	0	if (i1 <= i0)
426	0	return false;
427
428	0	int maxLen = 0;
429	0	for (int i = i0; i < i1; i++)
430	0	{
431	0	int k = GetIndexWrapAround(i, size);
432	0	int len = m_codeTable[k].first;
433	0	maxLen = max(maxLen, len);
434	0	}
435
436	0	if (maxLen <= 0 \|\| maxLen > 32)
437	0	return false;
438
439	0	maxCodeLength = maxLen;
440	0	return true;
441	0	}
442
443		// -------------------------------------------------------------------------- ;
444
445		bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const
446	0	{
447	0	if (!ppByte)
448	0	return false;
449
450	0	unsigned int* arr = (unsigned int)(ppByte);
451	0	unsigned int* dstPtr = arr;
452	0	int size = (int)m_codeTable.size();
453	0	int bitPos = 0;
454
455	0	for (int i = i0; i < i1; i++)
456	0	{
457	0	int k = GetIndexWrapAround(i, size);
458	0	int len = m_codeTable[k].first;
459	0	if (len > 0)
460	0	{
461	0	unsigned int val = m_codeTable[k].second;
462	0	if (32 - bitPos >= len)
463	0	{
464	0	if (bitPos == 0)
465	0	*dstPtr = 0;
466
467	0	*dstPtr \|= val << (32 - bitPos - len);
468	0	bitPos += len;
469	0	if (bitPos == 32)
470	0	{
471	0	bitPos = 0;
472	0	dstPtr++;
473	0	}
474	0	}
475	0	else
476	0	{
477	0	bitPos += len - 32;
478	0	*dstPtr++ \|= val >> bitPos; // bitPos > 0
479	0	*dstPtr = val << (32 - bitPos);
480	0	}
481	0	}
482	0	}
483
484	0	size_t numUInts = dstPtr - arr + (bitPos > 0 ? 1 : 0);
485	0	ppByte += numUInts sizeof(unsigned int);
486	0	return true;
487	0	}
488
489		// -------------------------------------------------------------------------- ;
490
491		bool Huffman::BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemainingInOut, int i0, int i1)
492	0	{
493	0	if (!ppByte \|\| !(*ppByte))
494	0	return false;
495
496	0	size_t nBytesRemaining = nBytesRemainingInOut;
497
498	0	const unsigned int* arr = (const unsigned int)(ppByte);
499	0	const unsigned int* srcPtr = arr;
500	0	const size_t sizeUInt = sizeof(*srcPtr);
501
502	0	int size = (int)m_codeTable.size();
503	0	int bitPos = 0;
504
505	0	for (int i = i0; i < i1; i++)
506	0	{
507	0	int k = GetIndexWrapAround(i, size);
508	0	int len = m_codeTable[k].first;
509	0	if (len > 0)
510	0	{
511	0	if (nBytesRemaining < sizeUInt \|\| len > 32)
512	0	return false;
513
514	0	m_codeTable[k].second = ((*srcPtr) << bitPos) >> (32 - len);
515
516	0	if (32 - bitPos >= len)
517	0	{
518	0	bitPos += len;
519	0	if (bitPos == 32)
520	0	{
521	0	bitPos = 0;
522	0	srcPtr++;
523	0	nBytesRemaining -= sizeUInt;
524	0	}
525	0	}
526	0	else
527	0	{
528	0	bitPos += len - 32;
529	0	srcPtr++;
530	0	nBytesRemaining -= sizeUInt;
531
532	0	if (nBytesRemaining < sizeUInt)
533	0	return false;
534
535	0	m_codeTable[k].second \|= (*srcPtr) >> (32 - bitPos); // bitPos > 0
536	0	}
537	0	}
538	0	}
539
540	0	size_t numUInts = srcPtr - arr + (bitPos > 0 ? 1 : 0);
541	0	size_t len = numUInts * sizeUInt;
542
543	0	if (nBytesRemainingInOut < len)
544	0	return false;
545
546	0	*ppByte += len;
547	0	nBytesRemainingInOut -= len;
548
549	0	if (nBytesRemaining != nBytesRemainingInOut && nBytesRemaining != nBytesRemainingInOut + sizeUInt) // the real check
550	0	return false;
551
552	0	return true;
553	0	}
554
555		// -------------------------------------------------------------------------- ;
556
557		//struct MyLargerThanOp
558		//{
559		// inline bool operator() (const pair<int, unsigned int>& p0,
560		// const pair<int, unsigned int>& p1) { return p0.first > p1.first; }
561		//};
562
563		// -------------------------------------------------------------------------- ;
564
565		bool Huffman::ConvertCodesToCanonical()
566	0	{
567		// from the non canonical code book, create an array to be sorted in descending order:
568		// codeLength * tableSize - index
569
570	0	unsigned int tableSize = (unsigned int)m_codeTable.size();
571	0	if (tableSize == 0)
572	0	return true;
573	0	vector<pair<int, unsigned int> > sortVec(tableSize, pair<int, unsigned int>(0, 0));
574		//memset(&sortVec[0], 0, tableSize * sizeof(pair<int, unsigned int>));
575
576	0	for (unsigned int i = 0; i < tableSize; i++)
577	0	if (m_codeTable[i].first > 0)
578	0	sortVec[i] = pair<int, unsigned int>(m_codeTable[i].first * tableSize - i, i);
579
580		// sort descending
581		//std::sort(sortVec.begin(), sortVec.end(), MyLargerThanOp());
582
583	0	std::sort(sortVec.begin(), sortVec.end(),
584	0	[](const pair<int, unsigned int>& p0,
585	0	const pair<int, unsigned int>& p1) { return p0.first > p1.first; });
586
587		// create canonical codes and assign to orig code table
588	0	unsigned int index = sortVec[0].second;
589	0	unsigned short codeLen = m_codeTable[index].first; // max code length for this table
590	0	unsigned int i = 0, codeCanonical = 0;
591
592	0	while (i < tableSize && sortVec[i].first > 0)
593	0	{
594	0	index = sortVec[i++].second;
595	0	short delta = codeLen - m_codeTable[index].first; // difference of 2 consecutive code lengths, >= 0 as sorted
596	0	codeCanonical >>= delta;
597	0	codeLen -= delta;
598	0	m_codeTable[index].second = codeCanonical++;
599	0	}
600
601	0	return true;
602	0	}
603
604		// -------------------------------------------------------------------------- ;