/src/libwebp/src/utils/huffman_encode_utils.c
Line | Count | Source |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // Author: Jyrki Alakuijala (jyrki@google.com) |
11 | | // |
12 | | // Entropy encoding (Huffman) for webp lossless. |
13 | | |
14 | | #include "src/utils/huffman_encode_utils.h" |
15 | | |
16 | | #include <assert.h> |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | |
20 | | #include "src/utils/bounds_safety.h" |
21 | | #include "src/utils/utils.h" |
22 | | #include "src/webp/format_constants.h" |
23 | | #include "src/webp/types.h" |
24 | | |
25 | | WEBP_ASSUME_UNSAFE_INDEXABLE_ABI |
26 | | |
27 | | // ----------------------------------------------------------------------------- |
28 | | // Util function to optimize the symbol map for RLE coding |
29 | | |
30 | | // Heuristics for selecting the stride ranges to collapse. |
31 | 92.7M | static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { |
32 | 92.7M | return abs(a - b) < 4; |
33 | 92.7M | } |
34 | | |
35 | | // Change the population counts in a way that the consequent |
36 | | // Huffman tree compression, especially its RLE-part, give smaller output. |
37 | | static void OptimizeHuffmanForRle(int length, |
38 | | uint8_t* const WEBP_COUNTED_BY(length) |
39 | | good_for_rle, |
40 | | uint32_t* const WEBP_COUNTED_BY(length) |
41 | 9.03M | counts) { |
42 | | // 1) Let's make the Huffman code more compatible with rle encoding. |
43 | 9.03M | int i; |
44 | 968M | for (; length >= 0; --length) { |
45 | 968M | if (length == 0) { |
46 | 1.14M | return; // All zeros. |
47 | 1.14M | } |
48 | 967M | if (counts[length - 1] != 0) { |
49 | | // Now counts[0..length - 1] does not have trailing zeros. |
50 | 7.88M | break; |
51 | 7.88M | } |
52 | 967M | } |
53 | | // 2) Let's mark all population counts that already can be encoded |
54 | | // with an rle code. |
55 | 7.88M | { |
56 | | // Let's not spoil any of the existing good rle codes. |
57 | | // Mark any seq of 0's that is longer as 5 as a good_for_rle. |
58 | | // Mark any seq of non-0's that is longer as 7 as a good_for_rle. |
59 | 7.88M | uint32_t symbol = counts[0]; |
60 | 7.88M | int stride = 0; |
61 | 663M | for (i = 0; i < length + 1; ++i) { |
62 | 656M | if (i == length || counts[i] != symbol) { |
63 | 92.2M | if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) { |
64 | 13.3M | int k; |
65 | 555M | for (k = 0; k < stride; ++k) { |
66 | 542M | good_for_rle[i - k - 1] = 1; |
67 | 542M | } |
68 | 13.3M | } |
69 | 92.2M | stride = 1; |
70 | 92.2M | if (i != length) { |
71 | 84.4M | symbol = counts[i]; |
72 | 84.4M | } |
73 | 563M | } else { |
74 | 563M | ++stride; |
75 | 563M | } |
76 | 656M | } |
77 | 7.88M | } |
78 | | // 3) Let's replace those population counts that lead to more rle codes. |
79 | 7.88M | { |
80 | 7.88M | uint32_t stride = 0; |
81 | 7.88M | uint32_t limit = counts[0]; |
82 | 7.88M | uint32_t sum = 0; |
83 | 663M | for (i = 0; i < length + 1; ++i) { |
84 | 655M | if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) || |
85 | 572M | !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { |
86 | 572M | if (stride >= 4 || (stride >= 3 && sum == 0)) { |
87 | 6.77M | uint32_t k; |
88 | | // The stride must end, collapse what we have, if we have enough (4). |
89 | 6.77M | uint32_t count = (sum + stride / 2) / stride; |
90 | 6.77M | if (count < 1) { |
91 | 1.75M | count = 1; |
92 | 1.75M | } |
93 | 6.77M | if (sum == 0) { |
94 | | // Don't make an all zeros stride to be upgraded to ones. |
95 | 156k | count = 0; |
96 | 156k | } |
97 | 83.4M | for (k = 0; k < stride; ++k) { |
98 | | // We don't want to change value at counts[i], |
99 | | // that is already belonging to the next stride. Thus - 1. |
100 | 76.6M | counts[i - k - 1] = count; |
101 | 76.6M | } |
102 | 6.77M | } |
103 | 572M | stride = 0; |
104 | 572M | sum = 0; |
105 | 572M | if (i < length - 3) { |
106 | | // All interesting strides have a count of at least 4, |
107 | | // at least when non-zeros. |
108 | 555M | limit = |
109 | 555M | (counts[i] + counts[i + 1] + counts[i + 2] + counts[i + 3] + 2) / |
110 | 555M | 4; |
111 | 555M | } else if (i < length) { |
112 | 9.93M | limit = counts[i]; |
113 | 9.93M | } else { |
114 | 7.85M | limit = 0; |
115 | 7.85M | } |
116 | 572M | } |
117 | 655M | ++stride; |
118 | 655M | if (i != length) { |
119 | 647M | sum += counts[i]; |
120 | 647M | if (stride >= 4) { |
121 | 56.3M | limit = (sum + stride / 2) / stride; |
122 | 56.3M | } |
123 | 647M | } |
124 | 655M | } |
125 | 7.88M | } |
126 | 7.88M | } |
127 | | |
128 | | // A comparer function for two Huffman trees: sorts first by 'total count' |
129 | | // (more comes first), and then by 'value' (more comes first). |
130 | 341M | static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) { |
131 | 341M | const HuffmanTree* const t1 = (const HuffmanTree*)ptr1; |
132 | 341M | const HuffmanTree* const t2 = (const HuffmanTree*)ptr2; |
133 | 341M | if (t1->total_count > t2->total_count) { |
134 | 27.8M | return -1; |
135 | 313M | } else if (t1->total_count < t2->total_count) { |
136 | 53.0M | return 1; |
137 | 260M | } else { |
138 | 260M | assert(t1->value != t2->value); |
139 | 18.4E | return (t1->value < t2->value) ? -1 : 1; |
140 | 260M | } |
141 | 341M | } |
142 | | |
143 | | static void SetBitDepths(const HuffmanTree* const tree, |
144 | | const HuffmanTree* WEBP_BIDI_INDEXABLE const pool, |
145 | 202M | uint8_t* WEBP_INDEXABLE const bit_depths, int level) { |
146 | 202M | if (tree->pool_index_left >= 0) { |
147 | 99.0M | SetBitDepths(&pool[tree->pool_index_left], pool, bit_depths, level + 1); |
148 | 99.0M | SetBitDepths(&pool[tree->pool_index_right], pool, bit_depths, level + 1); |
149 | 103M | } else { |
150 | 103M | bit_depths[tree->value] = level; |
151 | 103M | } |
152 | 202M | } |
153 | | |
154 | | // Create an optimal Huffman tree. |
155 | | // |
156 | | // (data,length): population counts. |
157 | | // tree_limit: maximum bit depth (inclusive) of the codes. |
158 | | // bit_depths[]: how many bits are used for the symbol. |
159 | | // |
160 | | // Returns 0 when an error has occurred. |
161 | | // |
162 | | // The catch here is that the tree cannot be arbitrarily deep |
163 | | // |
164 | | // count_limit is the value that is to be faked as the minimum value |
165 | | // and this minimum value is raised until the tree matches the |
166 | | // maximum length requirement. |
167 | | // |
168 | | // This algorithm is not of excellent performance for very long data blocks, |
169 | | // especially when population counts are longer than 2**tree_limit, but |
170 | | // we are not planning to use this with extremely long blocks. |
171 | | // |
172 | | // See https://en.wikipedia.org/wiki/Huffman_coding |
173 | | static void GenerateOptimalTree( |
174 | | const uint32_t* const WEBP_COUNTED_BY(histogram_size) histogram, |
175 | | int histogram_size, HuffmanTree* WEBP_BIDI_INDEXABLE tree, |
176 | | int tree_depth_limit, |
177 | 9.03M | uint8_t* WEBP_COUNTED_BY(histogram_size) const bit_depths) { |
178 | 9.03M | uint32_t count_min; |
179 | 9.03M | HuffmanTree* WEBP_BIDI_INDEXABLE tree_pool; |
180 | 9.03M | int tree_size_orig = 0; |
181 | 9.03M | int i; |
182 | | |
183 | 1.61G | for (i = 0; i < histogram_size; ++i) { |
184 | 1.60G | if (histogram[i] != 0) { |
185 | 106M | ++tree_size_orig; |
186 | 106M | } |
187 | 1.60G | } |
188 | | |
189 | 9.03M | if (tree_size_orig == 0) { // pretty optimal already! |
190 | 1.14M | return; |
191 | 1.14M | } |
192 | | |
193 | 7.88M | tree_pool = tree + tree_size_orig; |
194 | | |
195 | | // For block sizes with less than 64k symbols we never need to do a |
196 | | // second iteration of this loop. |
197 | | // If we actually start running inside this loop a lot, we would perhaps |
198 | | // be better off with the Katajainen algorithm. |
199 | 7.88M | assert(tree_size_orig <= (1 << (tree_depth_limit - 1))); |
200 | 7.89M | for (count_min = 1;; count_min *= 2) { |
201 | 7.89M | int tree_size = tree_size_orig; |
202 | | // We need to pack the Huffman tree in tree_depth_limit bits. |
203 | | // So, we try by faking histogram entries to be at least 'count_min'. |
204 | 7.89M | int idx = 0; |
205 | 7.89M | int j; |
206 | 1.53G | for (j = 0; j < histogram_size; ++j) { |
207 | 1.53G | if (histogram[j] != 0) { |
208 | 106M | const uint32_t count = |
209 | 106M | (histogram[j] < count_min) ? count_min : histogram[j]; |
210 | 106M | tree[idx].total_count = count; |
211 | 106M | tree[idx].value = j; |
212 | 106M | tree[idx].pool_index_left = -1; |
213 | 106M | tree[idx].pool_index_right = -1; |
214 | 106M | ++idx; |
215 | 106M | } |
216 | 1.53G | } |
217 | | |
218 | | // Build the Huffman tree. |
219 | 7.89M | qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees); |
220 | | |
221 | 7.89M | if (tree_size > 1) { // Normal case. |
222 | 4.10M | int tree_pool_size = 0; |
223 | 103M | while (tree_size > 1) { // Finish when we have only one root. |
224 | 99.0M | uint32_t count; |
225 | 99.0M | tree_pool[tree_pool_size++] = tree[tree_size - 1]; |
226 | 99.0M | tree_pool[tree_pool_size++] = tree[tree_size - 2]; |
227 | 99.0M | count = tree_pool[tree_pool_size - 1].total_count + |
228 | 99.0M | tree_pool[tree_pool_size - 2].total_count; |
229 | 99.0M | tree_size -= 2; |
230 | 99.0M | { |
231 | | // Search for the insertion point. |
232 | 99.0M | int k; |
233 | 773M | for (k = 0; k < tree_size; ++k) { |
234 | 768M | if (tree[k].total_count <= count) { |
235 | 94.1M | break; |
236 | 94.1M | } |
237 | 768M | } |
238 | 99.0M | memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree)); |
239 | 99.0M | tree[k].total_count = count; |
240 | 99.0M | tree[k].value = -1; |
241 | | |
242 | 99.0M | tree[k].pool_index_left = tree_pool_size - 1; |
243 | 99.0M | tree[k].pool_index_right = tree_pool_size - 2; |
244 | 99.0M | tree_size = tree_size + 1; |
245 | 99.0M | } |
246 | 99.0M | } |
247 | 4.10M | SetBitDepths(&tree[0], tree_pool, bit_depths, 0); |
248 | 4.10M | } else if (tree_size == 1) { // Trivial case: only one element. |
249 | 3.79M | bit_depths[tree[0].value] = 1; |
250 | 3.79M | } |
251 | | |
252 | 7.89M | { |
253 | | // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria. |
254 | 7.89M | int max_depth = bit_depths[0]; |
255 | 1.53G | for (j = 1; j < histogram_size; ++j) { |
256 | 1.52G | if (max_depth < bit_depths[j]) { |
257 | 5.54M | max_depth = bit_depths[j]; |
258 | 5.54M | } |
259 | 1.52G | } |
260 | 7.89M | if (max_depth <= tree_depth_limit) { |
261 | 7.88M | break; |
262 | 7.88M | } |
263 | 7.89M | } |
264 | 7.89M | } |
265 | 7.88M | } |
266 | | |
267 | | // ----------------------------------------------------------------------------- |
268 | | // Coding of the Huffman tree values |
269 | | |
270 | | static HuffmanTreeToken* WEBP_INDEXABLE |
271 | | CodeRepeatedValues(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens, |
272 | 19.4M | int value, int prev_value) { |
273 | 19.4M | assert(value <= MAX_ALLOWED_CODE_LENGTH); |
274 | 19.4M | if (value != prev_value) { |
275 | 11.4M | tokens->code = value; |
276 | 11.4M | tokens->extra_bits = 0; |
277 | 11.4M | ++tokens; |
278 | 11.4M | --repetitions; |
279 | 11.4M | } |
280 | 27.6M | while (repetitions >= 1) { |
281 | 20.3M | if (repetitions < 3) { |
282 | 8.02M | int i; |
283 | 17.3M | for (i = 0; i < repetitions; ++i) { |
284 | 9.29M | tokens->code = value; |
285 | 9.29M | tokens->extra_bits = 0; |
286 | 9.29M | ++tokens; |
287 | 9.29M | } |
288 | 8.02M | break; |
289 | 12.3M | } else if (repetitions < 7) { |
290 | 4.18M | tokens->code = 16; |
291 | 4.18M | tokens->extra_bits = repetitions - 3; |
292 | 4.18M | ++tokens; |
293 | 4.18M | break; |
294 | 8.16M | } else { |
295 | 8.16M | tokens->code = 16; |
296 | 8.16M | tokens->extra_bits = 3; |
297 | 8.16M | ++tokens; |
298 | 8.16M | repetitions -= 6; |
299 | 8.16M | } |
300 | 20.3M | } |
301 | 19.4M | return tokens; |
302 | 19.4M | } |
303 | | |
304 | | static HuffmanTreeToken* WEBP_INDEXABLE |
305 | 12.8M | CodeRepeatedZeros(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens) { |
306 | 13.6M | while (repetitions >= 1) { |
307 | 13.6M | if (repetitions < 3) { |
308 | 1.32M | int i; |
309 | 2.83M | for (i = 0; i < repetitions; ++i) { |
310 | 1.50M | tokens->code = 0; // 0-value |
311 | 1.50M | tokens->extra_bits = 0; |
312 | 1.50M | ++tokens; |
313 | 1.50M | } |
314 | 1.32M | break; |
315 | 12.3M | } else if (repetitions < 11) { |
316 | 4.89M | tokens->code = 17; |
317 | 4.89M | tokens->extra_bits = repetitions - 3; |
318 | 4.89M | ++tokens; |
319 | 4.89M | break; |
320 | 7.45M | } else if (repetitions < 139) { |
321 | 6.60M | tokens->code = 18; |
322 | 6.60M | tokens->extra_bits = repetitions - 11; |
323 | 6.60M | ++tokens; |
324 | 6.60M | break; |
325 | 6.60M | } else { |
326 | 846k | tokens->code = 18; |
327 | 846k | tokens->extra_bits = 0x7f; // 138 repeated 0s |
328 | 846k | ++tokens; |
329 | 846k | repetitions -= 138; |
330 | 846k | } |
331 | 13.6M | } |
332 | 12.8M | return tokens; |
333 | 12.8M | } |
334 | | |
335 | | int VP8LCreateCompressedHuffmanTree( |
336 | | const HuffmanTreeCode* const tree, |
337 | 1.85M | HuffmanTreeToken* WEBP_COUNTED_BY(max_tokens) tokens, int max_tokens) { |
338 | 1.85M | HuffmanTreeToken* WEBP_INDEXABLE current_token = tokens; |
339 | 1.85M | HuffmanTreeToken* const starting_token = tokens; |
340 | 1.85M | HuffmanTreeToken* const ending_token = tokens + max_tokens; |
341 | 1.85M | const int depth_size = tree->num_symbols; |
342 | 1.85M | int prev_value = 8; // 8 is the initial value for rle. |
343 | 1.85M | int i = 0; |
344 | 1.85M | assert(tokens != NULL); |
345 | 34.1M | while (i < depth_size) { |
346 | 32.3M | const int value = tree->code_lengths[i]; |
347 | 32.3M | int k = i + 1; |
348 | 32.3M | int runs; |
349 | 489M | while (k < depth_size && tree->code_lengths[k] == value) ++k; |
350 | 32.3M | runs = k - i; |
351 | 32.3M | if (value == 0) { |
352 | 12.8M | current_token = CodeRepeatedZeros(runs, current_token); |
353 | 19.4M | } else { |
354 | 19.4M | current_token = |
355 | 19.4M | CodeRepeatedValues(runs, current_token, value, prev_value); |
356 | 19.4M | prev_value = value; |
357 | 19.4M | } |
358 | 32.3M | i += runs; |
359 | 32.3M | assert(current_token <= ending_token); |
360 | 32.3M | } |
361 | 1.85M | (void)ending_token; // suppress 'unused variable' warning |
362 | 1.85M | return (int)(current_token - starting_token); |
363 | 1.85M | } |
364 | | |
365 | | // ----------------------------------------------------------------------------- |
366 | | |
367 | | // Pre-reversed 4-bit values. |
368 | | static const uint8_t kReversedBits[16] = {0x0, 0x8, 0x4, 0xc, 0x2, 0xa, |
369 | | 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, |
370 | | 0x3, 0xb, 0x7, 0xf}; |
371 | | |
372 | 1.60G | static uint32_t ReverseBits(int num_bits, uint32_t bits) { |
373 | 1.60G | uint32_t retval = 0; |
374 | 1.60G | int i = 0; |
375 | 1.81G | while (i < num_bits) { |
376 | 204M | i += 4; |
377 | 204M | retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i); |
378 | 204M | bits >>= 4; |
379 | 204M | } |
380 | 1.60G | retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits); |
381 | 1.60G | return retval; |
382 | 1.60G | } |
383 | | |
384 | | // Get the actual bit values for a tree of bit depths. |
385 | 9.03M | static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { |
386 | | // 0 bit-depth means that the symbol does not exist. |
387 | 9.03M | int i; |
388 | 9.03M | int len; |
389 | 9.03M | uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1]; |
390 | 9.03M | int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = {0}; |
391 | | |
392 | 9.03M | assert(tree != NULL); |
393 | 9.03M | len = tree->num_symbols; |
394 | 1.61G | for (i = 0; i < len; ++i) { |
395 | 1.60G | const int code_length = tree->code_lengths[i]; |
396 | 1.60G | assert(code_length <= MAX_ALLOWED_CODE_LENGTH); |
397 | 1.60G | ++depth_count[code_length]; |
398 | 1.60G | } |
399 | 9.03M | depth_count[0] = 0; // ignore unused symbol |
400 | 9.03M | next_code[0] = 0; |
401 | 9.03M | { |
402 | 9.03M | uint32_t code = 0; |
403 | 144M | for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) { |
404 | 135M | code = (code + depth_count[i - 1]) << 1; |
405 | 135M | next_code[i] = code; |
406 | 135M | } |
407 | 9.03M | } |
408 | 1.61G | for (i = 0; i < len; ++i) { |
409 | 1.60G | const int code_length = tree->code_lengths[i]; |
410 | 1.60G | tree->codes[i] = ReverseBits(code_length, next_code[code_length]++); |
411 | 1.60G | } |
412 | 9.03M | } |
413 | | |
414 | | // ----------------------------------------------------------------------------- |
415 | | // Main entry point |
416 | | |
417 | | void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, |
418 | | uint8_t* const buf_rle, HuffmanTree* const huff_tree, |
419 | 9.03M | HuffmanTreeCode* const huff_code) { |
420 | 9.03M | const int num_symbols = huff_code->num_symbols; |
421 | 9.03M | uint32_t* const WEBP_BIDI_INDEXABLE bounded_histogram = |
422 | 9.03M | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE( |
423 | 9.03M | uint32_t*, histogram, (size_t)num_symbols * sizeof(*histogram)); |
424 | 9.03M | uint8_t* const WEBP_BIDI_INDEXABLE bounded_buf_rle = |
425 | 9.03M | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(uint8_t*, buf_rle, |
426 | 9.03M | (size_t)num_symbols * sizeof(*buf_rle)); |
427 | | |
428 | 9.03M | memset(bounded_buf_rle, 0, num_symbols * sizeof(*buf_rle)); |
429 | 9.03M | OptimizeHuffmanForRle(num_symbols, bounded_buf_rle, bounded_histogram); |
430 | 9.03M | GenerateOptimalTree( |
431 | 9.03M | bounded_histogram, num_symbols, |
432 | 9.03M | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(HuffmanTree*, huff_tree, |
433 | 9.03M | 3 * num_symbols * sizeof(*huff_tree)), |
434 | 9.03M | tree_depth_limit, huff_code->code_lengths); |
435 | | // Create the actual bit codes for the bit lengths. |
436 | 9.03M | ConvertBitDepthsToSymbols(huff_code); |
437 | 9.03M | } |