Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libwebp/src/utils/huffman_encode_utils.c
Line
Count
Source
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Author: Jyrki Alakuijala (jyrki@google.com)
11
//
12
// Entropy encoding (Huffman) for webp lossless.
13
14
#include "src/utils/huffman_encode_utils.h"
15
16
#include <assert.h>
17
#include <stdlib.h>
18
#include <string.h>
19
20
#include "src/utils/bounds_safety.h"
21
#include "src/utils/utils.h"
22
#include "src/webp/format_constants.h"
23
#include "src/webp/types.h"
24
25
WEBP_ASSUME_UNSAFE_INDEXABLE_ABI
26
27
// -----------------------------------------------------------------------------
28
// Util function to optimize the symbol map for RLE coding
29
30
// Heuristics for selecting the stride ranges to collapse.
31
2.23M
static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
32
2.23M
  return abs(a - b) < 4;
33
2.23M
}
34
35
// Change the population counts in a way that the consequent
36
// Huffman tree compression, especially its RLE-part, give smaller output.
37
static void OptimizeHuffmanForRle(int length,
38
                                  uint8_t* const WEBP_COUNTED_BY(length)
39
                                      good_for_rle,
40
                                  uint32_t* const WEBP_COUNTED_BY(length)
41
142k
                                      counts) {
42
  // 1) Let's make the Huffman code more compatible with rle encoding.
43
142k
  int i;
44
15.4M
  for (; length >= 0; --length) {
45
15.4M
    if (length == 0) {
46
8.50k
      return;  // All zeros.
47
8.50k
    }
48
15.4M
    if (counts[length - 1] != 0) {
49
      // Now counts[0..length - 1] does not have trailing zeros.
50
133k
      break;
51
133k
    }
52
15.4M
  }
53
  // 2) Let's mark all population counts that already can be encoded
54
  // with an rle code.
55
133k
  {
56
    // Let's not spoil any of the existing good rle codes.
57
    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
58
    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
59
133k
    uint32_t symbol = counts[0];
60
133k
    int stride = 0;
61
10.5M
    for (i = 0; i < length + 1; ++i) {
62
10.3M
      if (i == length || counts[i] != symbol) {
63
2.20M
        if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
64
180k
          int k;
65
8.00M
          for (k = 0; k < stride; ++k) {
66
7.82M
            good_for_rle[i - k - 1] = 1;
67
7.82M
          }
68
180k
        }
69
2.20M
        stride = 1;
70
2.20M
        if (i != length) {
71
2.07M
          symbol = counts[i];
72
2.07M
        }
73
8.15M
      } else {
74
8.15M
        ++stride;
75
8.15M
      }
76
10.3M
    }
77
133k
  }
78
  // 3) Let's replace those population counts that lead to more rle codes.
79
133k
  {
80
133k
    uint32_t stride = 0;
81
133k
    uint32_t limit = counts[0];
82
133k
    uint32_t sum = 0;
83
10.5M
    for (i = 0; i < length + 1; ++i) {
84
10.3M
      if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) ||
85
9.09M
          !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
86
9.09M
        if (stride >= 4 || (stride >= 3 && sum == 0)) {
87
143k
          uint32_t k;
88
          // The stride must end, collapse what we have, if we have enough (4).
89
143k
          uint32_t count = (sum + stride / 2) / stride;
90
143k
          if (count < 1) {
91
26.9k
            count = 1;
92
26.9k
          }
93
143k
          if (sum == 0) {
94
            // Don't make an all zeros stride to be upgraded to ones.
95
14.2k
            count = 0;
96
14.2k
          }
97
1.22M
          for (k = 0; k < stride; ++k) {
98
            // We don't want to change value at counts[i],
99
            // that is already belonging to the next stride. Thus - 1.
100
1.08M
            counts[i - k - 1] = count;
101
1.08M
          }
102
143k
        }
103
9.09M
        stride = 0;
104
9.09M
        sum = 0;
105
9.09M
        if (i < length - 3) {
106
          // All interesting strides have a count of at least 4,
107
          // at least when non-zeros.
108
8.79M
          limit =
109
8.79M
              (counts[i] + counts[i + 1] + counts[i + 2] + counts[i + 3] + 2) /
110
8.79M
              4;
111
8.79M
        } else if (i < length) {
112
166k
          limit = counts[i];
113
166k
        } else {
114
133k
          limit = 0;
115
133k
        }
116
9.09M
      }
117
10.3M
      ++stride;
118
10.3M
      if (i != length) {
119
10.2M
        sum += counts[i];
120
10.2M
        if (stride >= 4) {
121
652k
          limit = (sum + stride / 2) / stride;
122
652k
        }
123
10.2M
      }
124
10.3M
    }
125
133k
  }
126
133k
}
127
128
// A comparer function for two Huffman trees: sorts first by 'total count'
129
// (more comes first), and then by 'value' (more comes first).
130
10.7M
static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
131
10.7M
  const HuffmanTree* const t1 = (const HuffmanTree*)ptr1;
132
10.7M
  const HuffmanTree* const t2 = (const HuffmanTree*)ptr2;
133
10.7M
  if (t1->total_count > t2->total_count) {
134
2.82M
    return -1;
135
7.88M
  } else if (t1->total_count < t2->total_count) {
136
4.12M
    return 1;
137
4.12M
  } else {
138
3.75M
    assert(t1->value != t2->value);
139
3.75M
    return (t1->value < t2->value) ? -1 : 1;
140
3.75M
  }
141
10.7M
}
142
143
static void SetBitDepths(const HuffmanTree* const tree,
144
                         const HuffmanTree* WEBP_BIDI_INDEXABLE const pool,
145
4.72M
                         uint8_t* WEBP_INDEXABLE const bit_depths, int level) {
146
4.72M
  if (tree->pool_index_left >= 0) {
147
2.33M
    SetBitDepths(&pool[tree->pool_index_left], pool, bit_depths, level + 1);
148
2.33M
    SetBitDepths(&pool[tree->pool_index_right], pool, bit_depths, level + 1);
149
2.39M
  } else {
150
2.39M
    bit_depths[tree->value] = level;
151
2.39M
  }
152
4.72M
}
153
154
// Create an optimal Huffman tree.
155
//
156
// (data,length): population counts.
157
// tree_limit: maximum bit depth (inclusive) of the codes.
158
// bit_depths[]: how many bits are used for the symbol.
159
//
160
// Returns 0 when an error has occurred.
161
//
162
// The catch here is that the tree cannot be arbitrarily deep
163
//
164
// count_limit is the value that is to be faked as the minimum value
165
// and this minimum value is raised until the tree matches the
166
// maximum length requirement.
167
//
168
// This algorithm is not of excellent performance for very long data blocks,
169
// especially when population counts are longer than 2**tree_limit, but
170
// we are not planning to use this with extremely long blocks.
171
//
172
// See https://en.wikipedia.org/wiki/Huffman_coding
173
static void GenerateOptimalTree(
174
    const uint32_t* const WEBP_COUNTED_BY(histogram_size) histogram,
175
    int histogram_size, HuffmanTree* WEBP_BIDI_INDEXABLE tree,
176
    int tree_depth_limit,
177
142k
    uint8_t* WEBP_COUNTED_BY(histogram_size) const bit_depths) {
178
142k
  uint32_t count_min;
179
142k
  HuffmanTree* WEBP_BIDI_INDEXABLE tree_pool;
180
142k
  int tree_size_orig = 0;
181
142k
  int i;
182
183
25.7M
  for (i = 0; i < histogram_size; ++i) {
184
25.5M
    if (histogram[i] != 0) {
185
2.27M
      ++tree_size_orig;
186
2.27M
    }
187
25.5M
  }
188
189
142k
  if (tree_size_orig == 0) {  // pretty optimal already!
190
8.50k
    return;
191
8.50k
  }
192
193
133k
  tree_pool = tree + tree_size_orig;
194
195
  // For block sizes with less than 64k symbols we never need to do a
196
  // second iteration of this loop.
197
  // If we actually start running inside this loop a lot, we would perhaps
198
  // be better off with the Katajainen algorithm.
199
133k
  assert(tree_size_orig <= (1 << (tree_depth_limit - 1)));
200
135k
  for (count_min = 1;; count_min *= 2) {
201
135k
    int tree_size = tree_size_orig;
202
    // We need to pack the Huffman tree in tree_depth_limit bits.
203
    // So, we try by faking histogram entries to be at least 'count_min'.
204
135k
    int idx = 0;
205
135k
    int j;
206
25.2M
    for (j = 0; j < histogram_size; ++j) {
207
25.0M
      if (histogram[j] != 0) {
208
2.46M
        const uint32_t count =
209
2.46M
            (histogram[j] < count_min) ? count_min : histogram[j];
210
2.46M
        tree[idx].total_count = count;
211
2.46M
        tree[idx].value = j;
212
2.46M
        tree[idx].pool_index_left = -1;
213
2.46M
        tree[idx].pool_index_right = -1;
214
2.46M
        ++idx;
215
2.46M
      }
216
25.0M
    }
217
218
    // Build the Huffman tree.
219
135k
    qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees);
220
221
135k
    if (tree_size > 1) {  // Normal case.
222
62.8k
      int tree_pool_size = 0;
223
2.39M
      while (tree_size > 1) {  // Finish when we have only one root.
224
2.33M
        uint32_t count;
225
2.33M
        tree_pool[tree_pool_size++] = tree[tree_size - 1];
226
2.33M
        tree_pool[tree_pool_size++] = tree[tree_size - 2];
227
2.33M
        count = tree_pool[tree_pool_size - 1].total_count +
228
2.33M
                tree_pool[tree_pool_size - 2].total_count;
229
2.33M
        tree_size -= 2;
230
2.33M
        {
231
          // Search for the insertion point.
232
2.33M
          int k;
233
71.1M
          for (k = 0; k < tree_size; ++k) {
234
71.1M
            if (tree[k].total_count <= count) {
235
2.25M
              break;
236
2.25M
            }
237
71.1M
          }
238
2.33M
          memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree));
239
2.33M
          tree[k].total_count = count;
240
2.33M
          tree[k].value = -1;
241
242
2.33M
          tree[k].pool_index_left = tree_pool_size - 1;
243
2.33M
          tree[k].pool_index_right = tree_pool_size - 2;
244
2.33M
          tree_size = tree_size + 1;
245
2.33M
        }
246
2.33M
      }
247
62.8k
      SetBitDepths(&tree[0], tree_pool, bit_depths, 0);
248
72.5k
    } else if (tree_size == 1) {  // Trivial case: only one element.
249
72.5k
      bit_depths[tree[0].value] = 1;
250
72.5k
    }
251
252
135k
    {
253
      // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria.
254
135k
      int max_depth = bit_depths[0];
255
25.0M
      for (j = 1; j < histogram_size; ++j) {
256
24.9M
        if (max_depth < bit_depths[j]) {
257
110k
          max_depth = bit_depths[j];
258
110k
        }
259
24.9M
      }
260
135k
      if (max_depth <= tree_depth_limit) {
261
133k
        break;
262
133k
      }
263
135k
    }
264
135k
  }
265
133k
}
266
267
// -----------------------------------------------------------------------------
268
// Coding of the Huffman tree values
269
270
static HuffmanTreeToken* WEBP_INDEXABLE
271
CodeRepeatedValues(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens,
272
823k
                   int value, int prev_value) {
273
823k
  assert(value <= MAX_ALLOWED_CODE_LENGTH);
274
823k
  if (value != prev_value) {
275
711k
    tokens->code = value;
276
711k
    tokens->extra_bits = 0;
277
711k
    ++tokens;
278
711k
    --repetitions;
279
711k
  }
280
925k
  while (repetitions >= 1) {
281
381k
    if (repetitions < 3) {
282
192k
      int i;
283
416k
      for (i = 0; i < repetitions; ++i) {
284
223k
        tokens->code = value;
285
223k
        tokens->extra_bits = 0;
286
223k
        ++tokens;
287
223k
      }
288
192k
      break;
289
192k
    } else if (repetitions < 7) {
290
87.2k
      tokens->code = 16;
291
87.2k
      tokens->extra_bits = repetitions - 3;
292
87.2k
      ++tokens;
293
87.2k
      break;
294
101k
    } else {
295
101k
      tokens->code = 16;
296
101k
      tokens->extra_bits = 3;
297
101k
      ++tokens;
298
101k
      repetitions -= 6;
299
101k
    }
300
381k
  }
301
823k
  return tokens;
302
823k
}
303
304
static HuffmanTreeToken* WEBP_INDEXABLE
305
279k
CodeRepeatedZeros(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens) {
306
287k
  while (repetitions >= 1) {
307
287k
    if (repetitions < 3) {
308
109k
      int i;
309
239k
      for (i = 0; i < repetitions; ++i) {
310
130k
        tokens->code = 0;  // 0-value
311
130k
        tokens->extra_bits = 0;
312
130k
        ++tokens;
313
130k
      }
314
109k
      break;
315
178k
    } else if (repetitions < 11) {
316
92.8k
      tokens->code = 17;
317
92.8k
      tokens->extra_bits = repetitions - 3;
318
92.8k
      ++tokens;
319
92.8k
      break;
320
92.8k
    } else if (repetitions < 139) {
321
77.8k
      tokens->code = 18;
322
77.8k
      tokens->extra_bits = repetitions - 11;
323
77.8k
      ++tokens;
324
77.8k
      break;
325
77.8k
    } else {
326
7.29k
      tokens->code = 18;
327
7.29k
      tokens->extra_bits = 0x7f;  // 138 repeated 0s
328
7.29k
      ++tokens;
329
7.29k
      repetitions -= 138;
330
7.29k
    }
331
287k
  }
332
279k
  return tokens;
333
279k
}
334
335
int VP8LCreateCompressedHuffmanTree(
336
    const HuffmanTreeCode* const tree,
337
27.6k
    HuffmanTreeToken* WEBP_COUNTED_BY(max_tokens) tokens, int max_tokens) {
338
27.6k
  HuffmanTreeToken* WEBP_INDEXABLE current_token = tokens;
339
27.6k
  HuffmanTreeToken* const starting_token = tokens;
340
27.6k
  HuffmanTreeToken* const ending_token = tokens + max_tokens;
341
27.6k
  const int depth_size = tree->num_symbols;
342
27.6k
  int prev_value = 8;  // 8 is the initial value for rle.
343
27.6k
  int i = 0;
344
27.6k
  assert(tokens != NULL);
345
1.13M
  while (i < depth_size) {
346
1.10M
    const int value = tree->code_lengths[i];
347
1.10M
    int k = i + 1;
348
1.10M
    int runs;
349
6.05M
    while (k < depth_size && tree->code_lengths[k] == value) ++k;
350
1.10M
    runs = k - i;
351
1.10M
    if (value == 0) {
352
279k
      current_token = CodeRepeatedZeros(runs, current_token);
353
823k
    } else {
354
823k
      current_token =
355
823k
          CodeRepeatedValues(runs, current_token, value, prev_value);
356
823k
      prev_value = value;
357
823k
    }
358
1.10M
    i += runs;
359
1.10M
    assert(current_token <= ending_token);
360
1.10M
  }
361
27.6k
  (void)ending_token;  // suppress 'unused variable' warning
362
27.6k
  return (int)(current_token - starting_token);
363
27.6k
}
364
365
// -----------------------------------------------------------------------------
366
367
// Pre-reversed 4-bit values.
368
static const uint8_t kReversedBits[16] = {0x0, 0x8, 0x4, 0xc, 0x2, 0xa,
369
                                          0x6, 0xe, 0x1, 0x9, 0x5, 0xd,
370
                                          0x3, 0xb, 0x7, 0xf};
371
372
25.5M
static uint32_t ReverseBits(int num_bits, uint32_t bits) {
373
25.5M
  uint32_t retval = 0;
374
25.5M
  int i = 0;
375
30.5M
  while (i < num_bits) {
376
4.96M
    i += 4;
377
4.96M
    retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i);
378
4.96M
    bits >>= 4;
379
4.96M
  }
380
25.5M
  retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits);
381
25.5M
  return retval;
382
25.5M
}
383
384
// Get the actual bit values for a tree of bit depths.
385
142k
static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
386
  // 0 bit-depth means that the symbol does not exist.
387
142k
  int i;
388
142k
  int len;
389
142k
  uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1];
390
142k
  int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = {0};
391
392
142k
  assert(tree != NULL);
393
142k
  len = tree->num_symbols;
394
25.7M
  for (i = 0; i < len; ++i) {
395
25.5M
    const int code_length = tree->code_lengths[i];
396
25.5M
    assert(code_length <= MAX_ALLOWED_CODE_LENGTH);
397
25.5M
    ++depth_count[code_length];
398
25.5M
  }
399
142k
  depth_count[0] = 0;  // ignore unused symbol
400
142k
  next_code[0] = 0;
401
142k
  {
402
142k
    uint32_t code = 0;
403
2.27M
    for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) {
404
2.13M
      code = (code + depth_count[i - 1]) << 1;
405
2.13M
      next_code[i] = code;
406
2.13M
    }
407
142k
  }
408
25.7M
  for (i = 0; i < len; ++i) {
409
25.5M
    const int code_length = tree->code_lengths[i];
410
25.5M
    tree->codes[i] = ReverseBits(code_length, next_code[code_length]++);
411
25.5M
  }
412
142k
}
413
414
// -----------------------------------------------------------------------------
415
// Main entry point
416
417
void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
418
                           uint8_t* const buf_rle, HuffmanTree* const huff_tree,
419
142k
                           HuffmanTreeCode* const huff_code) {
420
142k
  const int num_symbols = huff_code->num_symbols;
421
142k
  uint32_t* const WEBP_BIDI_INDEXABLE bounded_histogram =
422
142k
      WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(
423
142k
          uint32_t*, histogram, (size_t)num_symbols * sizeof(*histogram));
424
142k
  uint8_t* const WEBP_BIDI_INDEXABLE bounded_buf_rle =
425
142k
      WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(uint8_t*, buf_rle,
426
142k
                                       (size_t)num_symbols * sizeof(*buf_rle));
427
428
142k
  memset(bounded_buf_rle, 0, num_symbols * sizeof(*buf_rle));
429
142k
  OptimizeHuffmanForRle(num_symbols, bounded_buf_rle, bounded_histogram);
430
142k
  GenerateOptimalTree(
431
142k
      bounded_histogram, num_symbols,
432
142k
      WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(HuffmanTree*, huff_tree,
433
142k
                                       3 * num_symbols * sizeof(*huff_tree)),
434
142k
      tree_depth_limit, huff_code->code_lengths);
435
  // Create the actual bit codes for the bit lengths.
436
142k
  ConvertBitDepthsToSymbols(huff_code);
437
142k
}