Coverage Report

Created: 2025-06-13 06:18

/src/gdal/third_party/LercLib/Huffman.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2015 Esri
3
4
Licensed under the Apache License, Version 2.0 (the "License");
5
you may not use this file except in compliance with the License.
6
You may obtain a copy of the License at
7
8
http://www.apache.org/licenses/LICENSE-2.0
9
10
Unless required by applicable law or agreed to in writing, software
11
distributed under the License is distributed on an "AS IS" BASIS,
12
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
See the License for the specific language governing permissions and
14
limitations under the License.
15
16
A local copy of the license and additional notices are located with the
17
source distribution at:
18
19
http://github.com/Esri/lerc/
20
21
Contributors:  Thomas Maurer
22
*/
23
24
#include <algorithm>
25
#include <queue>
26
#include "Defines.h"
27
#include "Huffman.h"
28
#include "BitStuffer2.h"
29
30
using namespace std;
31
USING_NAMESPACE_LERC
32
33
// -------------------------------------------------------------------------- ;
34
35
bool Huffman::ComputeCodes(const vector<int>& histo)
36
0
{
37
0
  if (histo.empty() || histo.size() >= m_maxHistoSize)
38
0
    return false;
39
40
0
  priority_queue<Node, vector<Node>, less<Node> > pq;
41
42
0
  int numNodes = 0;
43
44
0
  int size = (int)histo.size();
45
0
  for (int i = 0; i < size; i++)    // add all leaf nodes
46
0
    if (histo[i] > 0)
47
0
      pq.push(Node((short)i, histo[i]));
48
49
0
  if (pq.size() < 2)    // histo has only 0 or 1 bin that is not empty; quit Huffman and give it to Lerc
50
0
    return false;
51
52
0
  while (pq.size() > 1)    // build the Huffman tree
53
0
  {
54
0
    Node* child0 = new Node(pq.top());
55
0
    numNodes++;
56
0
    pq.pop();
57
0
    Node* child1 = new Node(pq.top());
58
0
    numNodes++;
59
0
    pq.pop();
60
0
    pq.push(Node(child0, child1));
61
0
  }
62
63
0
  m_codeTable.resize(size);
64
0
  std::fill(m_codeTable.begin(), m_codeTable.end(),
65
0
    std::pair<unsigned short, unsigned int>((short)0, 0));
66
67
0
  if (!pq.top().TreeToLUT(0, 0, m_codeTable))    // fill the LUT
68
0
    return false;
69
70
  //pq.top().FreeTree(numNodes);    // Linux compiler complains
71
0
  Node nodeNonConst = pq.top();
72
0
  nodeNonConst.FreeTree(numNodes);    // free all the nodes
73
74
0
  if (numNodes != 0)    // check the ref count
75
0
    return false;
76
77
0
  if (!ConvertCodesToCanonical())
78
0
    return false;
79
80
0
  return true;
81
0
}
82
83
// -------------------------------------------------------------------------- ;
84
85
bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const
86
0
{
87
0
  if (histo.empty() || histo.size() >= m_maxHistoSize)
88
0
    return false;
89
90
0
  numBytes = 0;
91
0
  if (!ComputeNumBytesCodeTable(numBytes))    // header and code table
92
0
    return false;
93
94
0
  int numBits = 0, numElem = 0;
95
0
  int size = (int)histo.size();
96
0
  for (int i = 0; i < size; i++)
97
0
    if (histo[i] > 0)
98
0
    {
99
0
      numBits += histo[i] * m_codeTable[i].first;
100
0
      numElem += histo[i];
101
0
    }
102
103
0
  if (numElem == 0)
104
0
    return false;
105
106
0
  int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1;    // add one more as the decode LUT can read ahead
107
0
  numBytes += 4 * numUInts;    // data huffman coded
108
0
  avgBpp = 8 * numBytes / (double)numElem;
109
110
0
  return true;
111
0
}
112
113
// -------------------------------------------------------------------------- ;
114
115
bool Huffman::SetCodes(const vector<pair<unsigned short, unsigned int> >& codeTable)
116
0
{
117
0
  if (codeTable.empty() || codeTable.size() >= m_maxHistoSize)
118
0
    return false;
119
120
0
  m_codeTable = codeTable;
121
0
  return true;
122
0
}
123
124
// -------------------------------------------------------------------------- ;
125
126
bool Huffman::WriteCodeTable(Byte** ppByte, int lerc2Version) const
127
0
{
128
0
  if (!ppByte)
129
0
    return false;
130
131
0
  int i0, i1, maxLen;
132
0
  if (!GetRange(i0, i1, maxLen))
133
0
    return false;
134
135
0
  int size = (int)m_codeTable.size();
136
0
  vector<unsigned int> dataVec(i1 - i0, 0);
137
138
0
  for (int i = i0; i < i1; i++)
139
0
  {
140
0
    int k = GetIndexWrapAround(i, size);
141
0
    dataVec[i - i0] = m_codeTable[k].first;
142
0
  }
143
144
  // header
145
0
  vector<int> intVec;
146
0
  intVec.push_back(4);    // huffman version; 4 guarantees canonical codes
147
0
  intVec.push_back(size);
148
0
  intVec.push_back(i0);   // code range
149
0
  intVec.push_back(i1);
150
151
0
  Byte* ptr = *ppByte;
152
153
0
  size_t len = intVec.size() * sizeof(int);
154
0
  memcpy(ptr, &intVec[0], len);
155
0
  ptr += len;
156
157
0
  BitStuffer2 bitStuffer2;
158
0
  if (!bitStuffer2.EncodeSimple(&ptr, dataVec, lerc2Version))    // code lengths, bit stuffed
159
0
    return false;
160
161
0
  if (!BitStuffCodes(&ptr, i0, i1))    // variable length codes, bit stuffed
162
0
    return false;
163
164
0
  *ppByte = ptr;
165
0
  return true;
166
0
}
167
168
// -------------------------------------------------------------------------- ;
169
170
bool Huffman::ReadCodeTable(const Byte** ppByte, size_t& nBytesRemainingInOut, int lerc2Version)
171
0
{
172
0
  if (!ppByte || !(*ppByte))
173
0
    return false;
174
175
0
  const Byte* ptr = *ppByte;
176
0
  size_t nBytesRemaining = nBytesRemainingInOut;
177
178
0
  vector<int> intVec(4, 0);
179
0
  size_t len = intVec.size() * sizeof(int);
180
181
0
  if (nBytesRemaining < len)
182
0
    return false;
183
184
0
  memcpy(&intVec[0], ptr, len);
185
0
  ptr += len;
186
0
  nBytesRemaining -= len;
187
188
0
  int version = intVec[0];
189
190
0
  if (version < 2)    // allow forward compatibility; for updates that break old decoders increase Lerc2 version number;
191
0
    return false;
192
193
0
  const int size = intVec[1];
194
0
  const int i0 = intVec[2];
195
0
  const int i1 = intVec[3];
196
197
0
  if (i0 >= i1 || i0 < 0 || size < 0 || size > (int)m_maxHistoSize)
198
0
    return false;
199
200
0
  if (GetIndexWrapAround(i0, size) >= size || GetIndexWrapAround(i1 - 1, size) >= size)
201
0
    return false;
202
203
0
  try
204
0
  {
205
0
    vector<unsigned int> dataVec(i1 - i0, 0);
206
0
    BitStuffer2 bitStuffer2;
207
0
    if (!bitStuffer2.Decode(&ptr, nBytesRemaining, dataVec, dataVec.size(), lerc2Version))    // unstuff the code lengths
208
0
      return false;
209
210
0
    if (dataVec.size() != static_cast<size_t>(i1 - i0))
211
0
      return false;
212
213
0
    m_codeTable.resize(size);
214
0
    std::fill(m_codeTable.begin(), m_codeTable.end(),
215
0
      std::pair<unsigned short, unsigned int>((short)0, 0));
216
217
0
    for (int i = i0; i < i1; i++)
218
0
    {
219
0
      int k = GetIndexWrapAround(i, size);
220
0
      m_codeTable[k].first = (unsigned short)dataVec[i - i0];
221
0
    }
222
223
0
    if (!BitUnStuffCodes(&ptr, nBytesRemaining, i0, i1))    // unstuff the codes
224
0
      return false;
225
226
0
    *ppByte = ptr;
227
0
    nBytesRemainingInOut = nBytesRemaining;
228
0
    return true;
229
0
  }
230
0
  catch (std::exception&)
231
0
  {
232
0
    return false;
233
0
  }
234
0
}
235
236
// -------------------------------------------------------------------------- ;
237
238
bool Huffman::BuildTreeFromCodes(int& numBitsLUT)
239
0
{
240
0
  int i0 = 0, i1 = 0, maxLen = 0;
241
0
  if (!GetRange(i0, i1, maxLen))
242
0
    return false;
243
244
  // build decode LUT using max of 12 bits
245
0
  int size = (int)m_codeTable.size();
246
0
  int minNumZeroBits = 32;
247
248
0
  bool bNeedTree = maxLen > m_maxNumBitsLUT;
249
0
  numBitsLUT = min(maxLen, m_maxNumBitsLUT);
250
251
0
  int sizeLUT = 1 << numBitsLUT;
252
253
0
  m_decodeLUT.clear();
254
0
  m_decodeLUT.assign((size_t)sizeLUT, pair<short, short>((short)-1, (short)-1));
255
256
0
  for (int i = i0; i < i1; i++)
257
0
  {
258
0
    int k = GetIndexWrapAround(i, size);
259
0
    int len = m_codeTable[k].first;
260
261
0
    if (len == 0)
262
0
      continue;
263
264
0
    unsigned int code = m_codeTable[k].second;
265
266
0
    if (len <= numBitsLUT)
267
0
    {
268
0
      code <<= (numBitsLUT - len);
269
0
      unsigned int numEntries = 1 << (numBitsLUT - len);
270
271
0
      for (unsigned int j = 0; j < numEntries; j++)
272
0
      {
273
0
        auto& entry = m_decodeLUT[code | j];
274
0
        entry.first = (short)len;    // add the duplicates
275
0
        entry.second = (short)k;    // add the duplicates
276
0
      }
277
0
    }
278
0
    else    // for the codes too long for the LUT, count how many leading bits are 0
279
0
    {
280
0
      int shift = 1;
281
0
      while (code >>= 1) shift++;    // large canonical codes start with zero's
282
0
      minNumZeroBits = min(minNumZeroBits, len - shift);
283
0
    }
284
0
  }
285
286
0
  m_numBitsToSkipInTree = bNeedTree? minNumZeroBits : 0;
287
288
0
  if (!bNeedTree)    // decode LUT covers it all, no tree needed
289
0
    return true;
290
291
  //m_numBitsToSkipInTree = 0;    // to disable skipping the 0 bits
292
293
0
  ClearTree();  // if there
294
295
0
  Node emptyNode((short)-1, 0);
296
0
  m_root = new Node(emptyNode);
297
298
0
  for (int i = i0; i < i1; i++)
299
0
  {
300
0
    int k = GetIndexWrapAround(i, size);
301
0
    int len = m_codeTable[k].first;
302
303
0
    if (len > 0 && len > numBitsLUT)    // add only codes not in the decode LUT
304
0
    {
305
0
      unsigned int code = m_codeTable[k].second;
306
0
      Node* node = m_root;
307
0
      int j = len - m_numBitsToSkipInTree;    // reduce len by number of leading 0 bits from above
308
309
0
      while (--j >= 0)    // go over the bits
310
0
      {
311
0
        if (code & (1 << j))
312
0
        {
313
0
          if (!node->child1)
314
0
            node->child1 = new Node(emptyNode);
315
316
0
          node = node->child1;
317
0
        }
318
0
        else
319
0
        {
320
0
          if (!node->child0)
321
0
            node->child0 = new Node(emptyNode);
322
323
0
          node = node->child0;
324
0
        }
325
326
0
        if (j == 0)    // last bit, leaf node
327
0
          node->value = (short)k;    // set the value
328
0
      }
329
0
    }
330
0
  }
331
332
0
  return true;
333
0
}
334
335
// -------------------------------------------------------------------------- ;
336
337
void Huffman::Clear()
338
0
{
339
0
  m_codeTable.clear();
340
0
  m_decodeLUT.clear();
341
0
  ClearTree();
342
0
}
343
344
// -------------------------------------------------------------------------- ;
345
346
void Huffman::ClearTree()
347
0
{
348
0
  if (m_root)
349
0
  {
350
0
    int n = 0;
351
0
    m_root->FreeTree(n);
352
0
    delete m_root;
353
0
    m_root = nullptr;
354
0
  }
355
0
}
356
357
// -------------------------------------------------------------------------- ;
358
// -------------------------------------------------------------------------- ;
359
360
bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const
361
0
{
362
0
  int i0, i1, maxLen;
363
0
  if (!GetRange(i0, i1, maxLen))
364
0
    return false;
365
366
0
  int size = (int)m_codeTable.size();
367
0
  int sum = 0;
368
0
  for (int i = i0; i < i1; i++)
369
0
  {
370
0
    int k = GetIndexWrapAround(i, size);
371
0
    sum += m_codeTable[k].first;
372
0
  }
373
374
0
  numBytes = 4 * sizeof(int);    // version, size, first bin, (last + 1) bin
375
376
0
  BitStuffer2 bitStuffer2;
377
0
  numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen);    // code lengths
378
0
  int numUInts = (((sum + 7) >> 3) + 3) >> 2;
379
0
  numBytes += 4 * numUInts;    // byte array with the codes bit stuffed
380
381
0
  return true;
382
0
}
383
384
// -------------------------------------------------------------------------- ;
385
386
bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const
387
0
{
388
0
  if (m_codeTable.empty() || m_codeTable.size() >= m_maxHistoSize)
389
0
    return false;
390
391
  // first, check for peak somewhere in the middle with 0 stretches left and right
392
0
  int size = (int)m_codeTable.size();
393
0
  {
394
0
    int i = 0;
395
0
    while (i < size && m_codeTable[i].first == 0) i++;
396
0
    i0 = i;
397
0
    i = size - 1;
398
0
    while (i >= 0 && m_codeTable[i].first == 0) i--;
399
0
    i1 = i + 1;    // exclusive
400
0
  }
401
402
0
  if (i1 <= i0)
403
0
    return false;
404
405
  // second, cover the common case that the peak is close to 0
406
0
  pair<int, int> segm(0, 0);
407
0
  int j = 0;
408
0
  while (j < size)    // find the largest stretch of 0's, if any
409
0
  {
410
0
    while (j < size && m_codeTable[j].first > 0) j++;
411
0
    int k0 = j;
412
0
    while (j < size && m_codeTable[j].first == 0) j++;
413
0
    int k1 = j;
414
415
0
    if (k1 - k0 > segm.second)
416
0
      segm = pair<int, int>(k0, k1 - k0);
417
0
  }
418
419
0
  if (size - segm.second < i1 - i0)
420
0
  {
421
0
    i0 = segm.first + segm.second;
422
0
    i1 = segm.first + size;    // do wrap around
423
0
  }
424
425
0
  if (i1 <= i0)
426
0
    return false;
427
428
0
  int maxLen = 0;
429
0
  for (int i = i0; i < i1; i++)
430
0
  {
431
0
    int k = GetIndexWrapAround(i, size);
432
0
    int len = m_codeTable[k].first;
433
0
    maxLen = max(maxLen, len);
434
0
  }
435
436
0
  if (maxLen <= 0 || maxLen > 32)
437
0
    return false;
438
439
0
  maxCodeLength = maxLen;
440
0
  return true;
441
0
}
442
443
// -------------------------------------------------------------------------- ;
444
445
bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const
446
0
{
447
0
  if (!ppByte)
448
0
    return false;
449
450
0
  unsigned int* arr = (unsigned int*)(*ppByte);
451
0
  unsigned int* dstPtr = arr;
452
0
  int size = (int)m_codeTable.size();
453
0
  int bitPos = 0;
454
455
0
  for (int i = i0; i < i1; i++)
456
0
  {
457
0
    int k = GetIndexWrapAround(i, size);
458
0
    int len = m_codeTable[k].first;
459
0
    if (len > 0)
460
0
    {
461
0
      unsigned int val = m_codeTable[k].second;
462
0
      if (32 - bitPos >= len)
463
0
      {
464
0
        if (bitPos == 0)
465
0
          *dstPtr = 0;
466
467
0
        *dstPtr |= val << (32 - bitPos - len);
468
0
        bitPos += len;
469
0
        if (bitPos == 32)
470
0
        {
471
0
          bitPos = 0;
472
0
          dstPtr++;
473
0
        }
474
0
      }
475
0
      else
476
0
      {
477
0
        bitPos += len - 32;
478
0
        *dstPtr++ |= val >> bitPos;    // bitPos > 0
479
0
        *dstPtr = val << (32 - bitPos);
480
0
      }
481
0
    }
482
0
  }
483
484
0
  size_t numUInts = dstPtr - arr + (bitPos > 0 ? 1 : 0);
485
0
  *ppByte += numUInts * sizeof(unsigned int);
486
0
  return true;
487
0
}
488
489
// -------------------------------------------------------------------------- ;
490
491
bool Huffman::BitUnStuffCodes(const Byte** ppByte, size_t& nBytesRemainingInOut, int i0, int i1)
492
0
{
493
0
  if (!ppByte || !(*ppByte))
494
0
    return false;
495
496
0
  size_t nBytesRemaining = nBytesRemainingInOut;
497
498
0
  const unsigned int* arr = (const unsigned int*)(*ppByte);
499
0
  const unsigned int* srcPtr = arr;
500
0
  const size_t sizeUInt = sizeof(*srcPtr);
501
502
0
  int size = (int)m_codeTable.size();
503
0
  int bitPos = 0;
504
505
0
  for (int i = i0; i < i1; i++)
506
0
  {
507
0
    int k = GetIndexWrapAround(i, size);
508
0
    int len = m_codeTable[k].first;
509
0
    if (len > 0)
510
0
    {
511
0
      if (nBytesRemaining < sizeUInt || len > 32)
512
0
        return false;
513
514
0
      m_codeTable[k].second = ((*srcPtr) << bitPos) >> (32 - len);
515
516
0
      if (32 - bitPos >= len)
517
0
      {
518
0
        bitPos += len;
519
0
        if (bitPos == 32)
520
0
        {
521
0
          bitPos = 0;
522
0
          srcPtr++;
523
0
          nBytesRemaining -= sizeUInt;
524
0
        }
525
0
      }
526
0
      else
527
0
      {
528
0
        bitPos += len - 32;
529
0
        srcPtr++;
530
0
        nBytesRemaining -= sizeUInt;
531
532
0
        if (nBytesRemaining < sizeUInt)
533
0
          return false;
534
535
0
        m_codeTable[k].second |= (*srcPtr) >> (32 - bitPos);    // bitPos > 0
536
0
      }
537
0
    }
538
0
  }
539
540
0
  size_t numUInts = srcPtr - arr + (bitPos > 0 ? 1 : 0);
541
0
  size_t len = numUInts * sizeUInt;
542
543
0
  if (nBytesRemainingInOut < len)
544
0
    return false;
545
546
0
  *ppByte += len;
547
0
  nBytesRemainingInOut -= len;
548
549
0
  if (nBytesRemaining != nBytesRemainingInOut && nBytesRemaining != nBytesRemainingInOut + sizeUInt)    // the real check
550
0
    return false;
551
552
0
  return true;
553
0
}
554
555
// -------------------------------------------------------------------------- ;
556
557
//struct MyLargerThanOp
558
//{
559
//  inline bool operator() (const pair<int, unsigned int>& p0,
560
//                          const pair<int, unsigned int>& p1)  { return p0.first > p1.first; }
561
//};
562
563
// -------------------------------------------------------------------------- ;
564
565
bool Huffman::ConvertCodesToCanonical()
566
0
{
567
  // from the non canonical code book, create an array to be sorted in descending order:
568
  //   codeLength * tableSize - index
569
570
0
  unsigned int tableSize = (unsigned int)m_codeTable.size();
571
0
  if (tableSize == 0)
572
0
    return true;
573
0
  vector<pair<int, unsigned int> > sortVec(tableSize, pair<int, unsigned int>(0, 0));
574
  //memset(&sortVec[0], 0, tableSize * sizeof(pair<int, unsigned int>));
575
576
0
  for (unsigned int i = 0; i < tableSize; i++)
577
0
    if (m_codeTable[i].first > 0)
578
0
      sortVec[i] = pair<int, unsigned int>(m_codeTable[i].first * tableSize - i, i);
579
580
  // sort descending
581
  //std::sort(sortVec.begin(), sortVec.end(), MyLargerThanOp());
582
583
0
  std::sort(sortVec.begin(), sortVec.end(),
584
0
    [](const pair<int, unsigned int>& p0,
585
0
       const pair<int, unsigned int>& p1) { return p0.first > p1.first; });
586
587
  // create canonical codes and assign to orig code table
588
0
  unsigned int index = sortVec[0].second;
589
0
  unsigned short codeLen = m_codeTable[index].first;    // max code length for this table
590
0
  unsigned int i = 0, codeCanonical = 0;
591
592
0
  while (i < tableSize && sortVec[i].first > 0)
593
0
  {
594
0
    index = sortVec[i++].second;
595
0
    short delta = codeLen - m_codeTable[index].first;  // difference of 2 consecutive code lengths, >= 0 as sorted
596
0
    codeCanonical >>= delta;
597
0
    codeLen -= delta;
598
0
    m_codeTable[index].second = codeCanonical++;
599
0
  }
600
601
0
  return true;
602
0
}
603
604
// -------------------------------------------------------------------------- ;