Coverage Report

Created: 2025-08-11 09:23

/src/gdal/third_party/LercLib/Huffman.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2015 Esri
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
16
A local copy of the license and additional notices are located with the
17
source distribution at:
18
19
http://github.com/Esri/lerc/
20
21
Contributors:  Thomas Maurer
22
*/
23
24
#include <algorithm>
25
#include <queue>
26
#include "Defines.h"
27
#include "Huffman.h"
28
#include "BitStuffer2.h"
29
30
using namespace std;
31
USING_NAMESPACE_LERC
32
33
// -------------------------------------------------------------------------- ;
34
35
bool Huffman::ComputeCodes(const vector<int>& histo)
36
0
{
37
0
  if (histo.empty() || histo.size() >= m_maxHistoSize)
38
0
    return false;
39
40
0
  priority_queue<Node, vector<Node>, less<Node> > pq;
41
42
0
  int numNodes = 0;
43
44
0
  int size = (int)histo.size();
45
0
  for (int i = 0; i < size; i++)    // add all leaf nodes
46
0
    if (histo[i] > 0)
47
0
      pq.push(Node((short)i, histo[i]));
48
49
0
  if (pq.size() < 2)    // histo has only 0 or 1 bin that is not empty; quit Huffman and give it to Lerc
50
0
    return false;
51
52
0
  while (pq.size() > 1)    // build the Huffman tree
53
0
  {
54
0
    Node* child0 = new Node(pq.top());
55
0
    numNodes++;
56
0
    pq.pop();
57
0
    Node* child1 = new Node(pq.top());
58
0
    numNodes++;
59
0
    pq.pop();
60
0
    pq.push(Node(child0, child1));
61
0
  }
62
63
0
  m_codeTable.resize(size);
64
0
  std::fill(m_codeTable.begin(), m_codeTable.end(),
65
0
    std::pair<unsigned short, unsigned int>((short)0, 0));
66
67
0
  if (!pq.top().TreeToLUT(0, 0, m_codeTable))    // fill the LUT
68
0
    return false;
69
70
  //pq.top().FreeTree(numNodes);    // Linux compiler complains
71
0
  Node nodeNonConst = pq.top();
72
0
  nodeNonConst.FreeTree(numNodes);    // free all the nodes
73
74
0
  if (numNodes != 0)    // check the ref count
75
0
    return false;
76
77
0
  if (!ConvertCodesToCanonical())
78
0
    return false;
79
80
0
  return true;
81
0
}
82
83
// -------------------------------------------------------------------------- ;
84
85
bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const
86
0
{
87
0
  if (histo.empty() || histo.size() >= m_maxHistoSize)
88
0
    return false;
89
90
0
  numBytes = 0;
91
0
  if (!ComputeNumBytesCodeTable(numBytes))    // header and code table
92
0
    return false;
93
94
0
  int numBits = 0, numElem = 0;
95
0
  int size = (int)histo.size();
96
0
  for (int i = 0; i < size; i++)
97
0
    if (histo[i] > 0)
98
0
    {
99
0
      numBits += histo[i] * m_codeTable[i].first;
100
0
      numElem += histo[i];
101
0
    }
102
103
0
  if (numElem == 0)
104
0
    return false;
105
106
0
  int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1;    // add one more as the decode LUT can read ahead
107
0
  numBytes += 4 * numUInts;    // data huffman coded
108
0
  avgBpp = 8 * numBytes / (double)numElem;
109
110
0
  return true;
111
0
}
112
113
// -------------------------------------------------------------------------- ;
114
115
bool Huffman::SetCodes(const vector<pair<unsigned short, unsigned int> >& codeTable)
116
0
{
117
0
  if (codeTable.empty() || codeTable.size() >= m_maxHistoSize)
118
0
    return false;
119
120
0
  m_codeTable = codeTable;
121
0
  return true;
122
0
}
123
124
// -------------------------------------------------------------------------- ;
125
126
bool Huffman::WriteCodeTable(Byte** ppByte, int lerc2Version) const
127
0
{
128
0
  if (!ppByte)
129
0
    return false;
130
131
0
  int i0, i1, maxLen;
132
0
  if (!GetRange(i0, i1, maxLen))
133
0
    return false;
134
135
0
  int size = (int)m_codeTable.size();
136
0
  vector<unsigned int> dataVec(i1 - i0, 0);
137
138
0
  for (int i = i0; i < i1; i++)
139
0
  {
140
0
    int k = GetIndexWrapAround(i, size);
141
0
    dataVec[i - i0] = m_codeTable[k].first;
142
0
  }
143
144
  // header
145
0
  vector<int> intVec;
146
0
  intVec.push_back(4);    // huffman version; 4 guarantees canonical codes
147
0
  intVec.push_back(size);
148
0
  intVec.push_back(i0);   // code range
149
0
  intVec.push_back(i1);
150
151
0
  Byte* ptr = *ppByte;
152
153
0
  size_t len = intVec.size() * sizeof(int);
154
0
  memcpy(ptr, &intVec[0], len);
155
0
  ptr += len;
156
157
0
  BitStuffer2 bitStuffer2;
158
0
  if (!bitStuffer2.EncodeSimple(&ptr, dataVec, lerc2Version))    // code lengths, bit stuffed
159
0
    return false;
160
161
0
  if (!BitStuffCodes(&ptr, i0, i1))    // variable length codes, bit stuffed
162
0
    return false;
163
164
0
  *ppByte = ptr;
165
0
  return true;
166
0
}
167
168
// -------------------------------------------------------------------------- ;
169
170
bool Huffman::ReadCodeTable(const Byte** ppByte, size_t& nBytesRemainingInOut, int lerc2Version)
171
6.50k
{
172
6.50k
  if (!ppByte || !(*ppByte))
173
0
    return false;
174
175
6.50k
  const Byte* ptr = *ppByte;
176
6.50k
  size_t nBytesRemaining = nBytesRemainingInOut;
177
178
6.50k
  vector<int> intVec(4, 0);
179
6.50k
  size_t len = intVec.size() * sizeof(int);
180
181
6.50k
  if (nBytesRemaining < len)
182
10
    return false;
183
184
6.49k
  memcpy(&intVec[0], ptr, len);
185
6.49k
  ptr += len;
186
6.49k
  nBytesRemaining -= len;
187
188
6.49k
  int version = intVec[0];
189
190
6.49k
  if (version < 2)    // allow forward compatibility; for updates that break old decoders increase Lerc2 version number;
191
104
    return false;
192
193
6.38k
  const int size = intVec[1];
194
6.38k
  const int i0 = intVec[2];
195
6.38k
  const int i1 = intVec[3];
196
197
6.38k
  if (i0 >= i1 || i0 < 0 || size < 0 || size > (int)m_maxHistoSize)
198
335
    return false;
199
200
6.05k
  if (GetIndexWrapAround(i0, size) >= size || GetIndexWrapAround(i1 - 1, size) >= size)
201
234
    return false;
202
203
5.81k
  try
204
5.81k
  {
205
5.81k
    vector<unsigned int> dataVec(i1 - i0, 0);
206
5.81k
    BitStuffer2 bitStuffer2;
207
5.81k
    if (!bitStuffer2.Decode(&ptr, nBytesRemaining, dataVec, dataVec.size(), lerc2Version))    // unstuff the code lengths
208
261
      return false;
209
210
5.55k
    if (dataVec.size() != static_cast<size_t>(i1 - i0))
211
97
      return false;
212
213
5.45k
    m_codeTable.resize(size);
214
5.45k
    std::fill(m_codeTable.begin(), m_codeTable.end(),
215
5.45k
      std::pair<unsigned short, unsigned int>((short)0, 0));
216
217
1.12M
    for (int i = i0; i < i1; i++)
218
1.12M
    {
219
1.12M
      int k = GetIndexWrapAround(i, size);
220
1.12M
      m_codeTable[k].first = (unsigned short)dataVec[i - i0];
221
1.12M
    }
222
223
5.45k
    if (!BitUnStuffCodes(&ptr, nBytesRemaining, i0, i1))    // unstuff the codes
224
227
      return false;
225
226
5.23k
    *ppByte = ptr;
227
5.23k
    nBytesRemainingInOut = nBytesRemaining;
228
5.23k
    return true;
229
5.45k
  }
230
5.81k
  catch (std::exception&)
231
5.81k
  {
232
0
    return false;
233
0
  }
234
5.81k
}
235
236
// -------------------------------------------------------------------------- ;
237
238
bool Huffman::BuildTreeFromCodes(int& numBitsLUT)
239
5.23k
{
240
5.23k
  int i0 = 0, i1 = 0, maxLen = 0;
241
5.23k
  if (!GetRange(i0, i1, maxLen))
242
93
    return false;
243
244
  // build decode LUT using max of 12 bits
245
5.13k
  int size = (int)m_codeTable.size();
246
5.13k
  int minNumZeroBits = 32;
247
248
5.13k
  bool bNeedTree = maxLen > m_maxNumBitsLUT;
249
5.13k
  numBitsLUT = min(maxLen, m_maxNumBitsLUT);
250
251
5.13k
  int sizeLUT = 1 << numBitsLUT;
252
253
5.13k
  m_decodeLUT.clear();
254
5.13k
  m_decodeLUT.assign((size_t)sizeLUT, pair<short, short>((short)-1, (short)-1));
255
256
865k
  for (int i = i0; i < i1; i++)
257
860k
  {
258
860k
    int k = GetIndexWrapAround(i, size);
259
860k
    int len = m_codeTable[k].first;
260
261
860k
    if (len == 0)
262
538k
      continue;
263
264
322k
    unsigned int code = m_codeTable[k].second;
265
266
322k
    if (len <= numBitsLUT)
267
267k
    {
268
267k
      code <<= (numBitsLUT - len);
269
267k
      unsigned int numEntries = 1 << (numBitsLUT - len);
270
271
100M
      for (unsigned int j = 0; j < numEntries; j++)
272
100M
      {
273
100M
        auto& entry = m_decodeLUT[code | j];
274
100M
        entry.first = (short)len;    // add the duplicates
275
100M
        entry.second = (short)k;    // add the duplicates
276
100M
      }
277
267k
    }
278
54.6k
    else    // for the codes too long for the LUT, count how many leading bits are 0
279
54.6k
    {
280
54.6k
      int shift = 1;
281
465k
      while (code >>= 1) shift++;    // large canonical codes start with zero's
282
54.6k
      minNumZeroBits = min(minNumZeroBits, len - shift);
283
54.6k
    }
284
322k
  }
285
286
5.13k
  m_numBitsToSkipInTree = bNeedTree? minNumZeroBits : 0;
287
288
5.13k
  if (!bNeedTree)    // decode LUT covers it all, no tree needed
289
681
    return true;
290
291
  //m_numBitsToSkipInTree = 0;    // to disable skipping the 0 bits
292
293
4.45k
  ClearTree();  // if there
294
295
4.45k
  Node emptyNode((short)-1, 0);
296
4.45k
  m_root = new Node(emptyNode);
297
298
765k
  for (int i = i0; i < i1; i++)
299
760k
  {
300
760k
    int k = GetIndexWrapAround(i, size);
301
760k
    int len = m_codeTable[k].first;
302
303
760k
    if (len > 0 && len > numBitsLUT)    // add only codes not in the decode LUT
304
54.6k
    {
305
54.6k
      unsigned int code = m_codeTable[k].second;
306
54.6k
      Node* node = m_root;
307
54.6k
      int j = len - m_numBitsToSkipInTree;    // reduce len by number of leading 0 bits from above
308
309
809k
      while (--j >= 0)    // go over the bits
310
754k
      {
311
754k
        if (code & (1 << j))
312
215k
        {
313
215k
          if (!node->child1)
314
133k
            node->child1 = new Node(emptyNode);
315
316
215k
          node = node->child1;
317
215k
        }
318
539k
        else
319
539k
        {
320
539k
          if (!node->child0)
321
232k
            node->child0 = new Node(emptyNode);
322
323
539k
          node = node->child0;
324
539k
        }
325
326
754k
        if (j == 0)    // last bit, leaf node
327
54.6k
          node->value = (short)k;    // set the value
328
754k
      }
329
54.6k
    }
330
760k
  }
331
332
4.45k
  return true;
333
5.13k
}
334
335
// -------------------------------------------------------------------------- ;
336
337
void Huffman::Clear()
338
6.50k
{
339
6.50k
  m_codeTable.clear();
340
6.50k
  m_decodeLUT.clear();
341
6.50k
  ClearTree();
342
6.50k
}
343
344
// -------------------------------------------------------------------------- ;
345
346
void Huffman::ClearTree()
347
10.9k
{
348
10.9k
  if (m_root)
349
4.45k
  {
350
4.45k
    int n = 0;
351
4.45k
    m_root->FreeTree(n);
352
4.45k
    delete m_root;
353
4.45k
    m_root = nullptr;
354
4.45k
  }
355
10.9k
}
356
357
// -------------------------------------------------------------------------- ;
358
// -------------------------------------------------------------------------- ;
359
360
bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const
361
0
{
362
0
  int i0, i1, maxLen;
363
0
  if (!GetRange(i0, i1, maxLen))
364
0
    return false;
365
366
0
  int size = (int)m_codeTable.size();
367
0
  int sum = 0;
368
0
  for (int i = i0; i < i1; i++)
369
0
  {
370
0
    int k = GetIndexWrapAround(i, size);
371
0
    sum += m_codeTable[k].first;
372
0
  }
373
374
0
  numBytes = 4 * sizeof(int);    // version, size, first bin, (last + 1) bin
375
376
0
  BitStuffer2 bitStuffer2;
377
0
  numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen);    // code lengths
378
0
  int numUInts = (((sum + 7) >> 3) + 3) >> 2;
379
0
  numBytes += 4 * numUInts;    // byte array with the codes bit stuffed
380
381
0
  return true;
382
0
}
383
384
// -------------------------------------------------------------------------- ;
385
386
bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const
387
5.23k
{
388
5.23k
  if (m_codeTable.empty() || m_codeTable.size() >= m_maxHistoSize)
389
12
    return false;
390
391
  // first, check for peak somewhere in the middle with 0 stretches left and right
392
5.22k
  int size = (int)m_codeTable.size();
393
5.22k
  {
394
5.22k
    int i = 0;
395
1.02M
    while (i < size && m_codeTable[i].first == 0) i++;
396
5.22k
    i0 = i;
397
5.22k
    i = size - 1;
398
42.9M
    while (i >= 0 && m_codeTable[i].first == 0) i--;
399
5.22k
    i1 = i + 1;    // exclusive
400
5.22k
  }
401
402
5.22k
  if (i1 <= i0)
403
81
    return false;
404
405
  // second, cover the common case that the peak is close to 0
406
5.13k
  pair<int, int> segm(0, 0);
407
5.13k
  int j = 0;
408
114k
  while (j < size)    // find the largest stretch of 0's, if any
409
109k
  {
410
431k
    while (j < size && m_codeTable[j].first > 0) j++;
411
109k
    int k0 = j;
412
43.1M
    while (j < size && m_codeTable[j].first == 0) j++;
413
109k
    int k1 = j;
414
415
109k
    if (k1 - k0 > segm.second)
416
13.3k
      segm = pair<int, int>(k0, k1 - k0);
417
109k
  }
418
419
5.13k
  if (size - segm.second < i1 - i0)
420
806
  {
421
806
    i0 = segm.first + segm.second;
422
806
    i1 = segm.first + size;    // do wrap around
423
806
  }
424
425
5.13k
  if (i1 <= i0)
426
0
    return false;
427
428
5.13k
  int maxLen = 0;
429
865k
  for (int i = i0; i < i1; i++)
430
860k
  {
431
860k
    int k = GetIndexWrapAround(i, size);
432
860k
    int len = m_codeTable[k].first;
433
860k
    maxLen = max(maxLen, len);
434
860k
  }
435
436
5.13k
  if (maxLen <= 0 || maxLen > 32)
437
0
    return false;
438
439
5.13k
  maxCodeLength = maxLen;
440
5.13k
  return true;
441
5.13k
}
442
443
// -------------------------------------------------------------------------- ;
444
445
bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const
446
0
{
447
0
  if (!ppByte)
448
0
    return false;
449
450
0
  unsigned int* arr = (unsigned int*)(*ppByte);
451
0
  unsigned int* dstPtr = arr;
452
0
  int size = (int)m_codeTable.size();
453
0
  int bitPos = 0;
454
455
0
  for (int i = i0; i < i1; i++)
456
0
  {
457
0
    int k = GetIndexWrapAround(i, size);
458
0
    int len = m_codeTable[k].first;
459
0
    if (len > 0)
460
0
    {
461
0
      unsigned int val = m_codeTable[k].second;
462
0
      if (32 - bitPos >= len)
463
0
      {
464
0
        if (bitPos == 0)
465
0
          *dstPtr = 0;
466
467
0
        *dstPtr |= val << (32 - bitPos - len);
468
0
        bitPos += len;
469
0
        if (bitPos == 32)
470
0
        {
471
0
          bitPos = 0;
472
0
          dstPtr++;
473
0
        }
474
0
      }
475
0
      else
476
0
      {
477
0
        bitPos += len - 32;
478
0
        *dstPtr++ |= val >> bitPos;    // bitPos > 0
479
0
        *dstPtr = val << (32 - bitPos);
480
0
      }
481
0
    }
482
0
  }
483
484
0
  size_t numUInts = dstPtr - arr + (bitPos > 0 ? 1 : 0);
485
0
  *ppByte += numUInts * sizeof(unsigned int);
486
0
  return true;
487
0
}
488
489
// -------------------------------------------------------------------------- ;
490
491
bool Huffman::BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemainingInOut, int i0, int i1)
492
5.45k
{
493
5.45k
  if (!ppByte || !(*ppByte))
494
0
    return false;
495
496
5.45k
  size_t nBytesRemaining = nBytesRemainingInOut;
497
498
5.45k
  const unsigned int* arr = (const unsigned int*)(*ppByte);
499
5.45k
  const unsigned int* srcPtr = arr;
500
5.45k
  const size_t sizeUInt = sizeof(*srcPtr);
501
502
5.45k
  int size = (int)m_codeTable.size();
503
5.45k
  int bitPos = 0;
504
505
1.10M
  for (int i = i0; i < i1; i++)
506
1.10M
  {
507
1.10M
    int k = GetIndexWrapAround(i, size);
508
1.10M
    int len = m_codeTable[k].first;
509
1.10M
    if (len > 0)
510
339k
    {
511
339k
      if (nBytesRemaining < sizeUInt || len > 32)
512
106
        return false;
513
514
338k
      m_codeTable[k].second = ((*srcPtr) << bitPos) >> (32 - len);
515
516
338k
      if (32 - bitPos >= len)
517
273k
      {
518
273k
        bitPos += len;
519
273k
        if (bitPos == 32)
520
11.4k
        {
521
11.4k
          bitPos = 0;
522
11.4k
          srcPtr++;
523
11.4k
          nBytesRemaining -= sizeUInt;
524
11.4k
        }
525
273k
      }
526
65.8k
      else
527
65.8k
      {
528
65.8k
        bitPos += len - 32;
529
65.8k
        srcPtr++;
530
65.8k
        nBytesRemaining -= sizeUInt;
531
532
65.8k
        if (nBytesRemaining < sizeUInt)
533
121
          return false;
534
535
65.6k
        m_codeTable[k].second |= (*srcPtr) >> (32 - bitPos);    // bitPos > 0
536
65.6k
      }
537
338k
    }
538
1.10M
  }
539
540
5.23k
  size_t numUInts = srcPtr - arr + (bitPos > 0 ? 1 : 0);
541
5.23k
  size_t len = numUInts * sizeUInt;
542
543
5.23k
  if (nBytesRemainingInOut < len)
544
0
    return false;
545
546
5.23k
  *ppByte += len;
547
5.23k
  nBytesRemainingInOut -= len;
548
549
5.23k
  if (nBytesRemaining != nBytesRemainingInOut && nBytesRemaining != nBytesRemainingInOut + sizeUInt)    // the real check
550
0
    return false;
551
552
5.23k
  return true;
553
5.23k
}
554
555
// -------------------------------------------------------------------------- ;
556
557
//struct MyLargerThanOp
558
//{
559
//  inline bool operator() (const pair<int, unsigned int>& p0,
560
//                          const pair<int, unsigned int>& p1)  { return p0.first > p1.first; }
561
//};
562
563
// -------------------------------------------------------------------------- ;
564
565
bool Huffman::ConvertCodesToCanonical()
566
0
{
567
  // from the non canonical code book, create an array to be sorted in descending order:
568
  //   codeLength * tableSize - index
569
570
0
  unsigned int tableSize = (unsigned int)m_codeTable.size();
571
0
  if (tableSize == 0)
572
0
    return true;
573
0
  vector<pair<int, unsigned int> > sortVec(tableSize, pair<int, unsigned int>(0, 0));
574
  //memset(&sortVec[0], 0, tableSize * sizeof(pair<int, unsigned int>));
575
576
0
  for (unsigned int i = 0; i < tableSize; i++)
577
0
    if (m_codeTable[i].first > 0)
578
0
      sortVec[i] = pair<int, unsigned int>(m_codeTable[i].first * tableSize - i, i);
579
580
  // sort descending
581
  //std::sort(sortVec.begin(), sortVec.end(), MyLargerThanOp());
582
583
0
  std::sort(sortVec.begin(), sortVec.end(),
584
0
    [](const pair<int, unsigned int>& p0,
585
0
       const pair<int, unsigned int>& p1) { return p0.first > p1.first; });
586
587
  // create canonical codes and assign to orig code table
588
0
  unsigned int index = sortVec[0].second;
589
0
  unsigned short codeLen = m_codeTable[index].first;    // max code length for this table
590
0
  unsigned int i = 0, codeCanonical = 0;
591
592
0
  while (i < tableSize && sortVec[i].first > 0)
593
0
  {
594
0
    index = sortVec[i++].second;
595
0
    short delta = codeLen - m_codeTable[index].first;  // difference of 2 consecutive code lengths, >= 0 as sorted
596
0
    codeCanonical >>= delta;
597
0
    codeLen -= delta;
598
0
    m_codeTable[index].second = codeCanonical++;
599
0
  }
600
601
0
  return true;
602
0
}
603
604
// -------------------------------------------------------------------------- ;