/src/libwebp/src/utils/huffman_encode_utils.c
Line | Count | Source |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style license |
4 | | // that can be found in the COPYING file in the root of the source |
5 | | // tree. An additional intellectual property rights grant can be found |
6 | | // in the file PATENTS. All contributing project authors may |
7 | | // be found in the AUTHORS file in the root of the source tree. |
8 | | // ----------------------------------------------------------------------------- |
9 | | // |
10 | | // Author: Jyrki Alakuijala (jyrki@google.com) |
11 | | // |
12 | | // Entropy encoding (Huffman) for webp lossless. |
13 | | |
14 | | #include "src/utils/huffman_encode_utils.h" |
15 | | |
16 | | #include <assert.h> |
17 | | #include <stdlib.h> |
18 | | #include <string.h> |
19 | | |
20 | | #include "src/utils/bounds_safety.h" |
21 | | #include "src/utils/utils.h" |
22 | | #include "src/webp/format_constants.h" |
23 | | #include "src/webp/types.h" |
24 | | |
25 | | WEBP_ASSUME_UNSAFE_INDEXABLE_ABI |
26 | | |
27 | | // ----------------------------------------------------------------------------- |
28 | | // Util function to optimize the symbol map for RLE coding |
29 | | |
30 | | // Heuristics for selecting the stride ranges to collapse. |
31 | 2.23M | static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { |
32 | 2.23M | return abs(a - b) < 4; |
33 | 2.23M | } |
34 | | |
35 | | // Change the population counts in a way that the consequent |
36 | | // Huffman tree compression, especially its RLE-part, give smaller output. |
37 | | static void OptimizeHuffmanForRle(int length, |
38 | | uint8_t* const WEBP_COUNTED_BY(length) |
39 | | good_for_rle, |
40 | | uint32_t* const WEBP_COUNTED_BY(length) |
41 | 142k | counts) { |
42 | | // 1) Let's make the Huffman code more compatible with rle encoding. |
43 | 142k | int i; |
44 | 15.4M | for (; length >= 0; --length) { |
45 | 15.4M | if (length == 0) { |
46 | 8.50k | return; // All zeros. |
47 | 8.50k | } |
48 | 15.4M | if (counts[length - 1] != 0) { |
49 | | // Now counts[0..length - 1] does not have trailing zeros. |
50 | 133k | break; |
51 | 133k | } |
52 | 15.4M | } |
53 | | // 2) Let's mark all population counts that already can be encoded |
54 | | // with an rle code. |
55 | 133k | { |
56 | | // Let's not spoil any of the existing good rle codes. |
57 | | // Mark any seq of 0's that is longer as 5 as a good_for_rle. |
58 | | // Mark any seq of non-0's that is longer as 7 as a good_for_rle. |
59 | 133k | uint32_t symbol = counts[0]; |
60 | 133k | int stride = 0; |
61 | 10.5M | for (i = 0; i < length + 1; ++i) { |
62 | 10.3M | if (i == length || counts[i] != symbol) { |
63 | 2.20M | if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) { |
64 | 180k | int k; |
65 | 8.00M | for (k = 0; k < stride; ++k) { |
66 | 7.82M | good_for_rle[i - k - 1] = 1; |
67 | 7.82M | } |
68 | 180k | } |
69 | 2.20M | stride = 1; |
70 | 2.20M | if (i != length) { |
71 | 2.07M | symbol = counts[i]; |
72 | 2.07M | } |
73 | 8.15M | } else { |
74 | 8.15M | ++stride; |
75 | 8.15M | } |
76 | 10.3M | } |
77 | 133k | } |
78 | | // 3) Let's replace those population counts that lead to more rle codes. |
79 | 133k | { |
80 | 133k | uint32_t stride = 0; |
81 | 133k | uint32_t limit = counts[0]; |
82 | 133k | uint32_t sum = 0; |
83 | 10.5M | for (i = 0; i < length + 1; ++i) { |
84 | 10.3M | if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) || |
85 | 9.09M | !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { |
86 | 9.09M | if (stride >= 4 || (stride >= 3 && sum == 0)) { |
87 | 143k | uint32_t k; |
88 | | // The stride must end, collapse what we have, if we have enough (4). |
89 | 143k | uint32_t count = (sum + stride / 2) / stride; |
90 | 143k | if (count < 1) { |
91 | 26.9k | count = 1; |
92 | 26.9k | } |
93 | 143k | if (sum == 0) { |
94 | | // Don't make an all zeros stride to be upgraded to ones. |
95 | 14.2k | count = 0; |
96 | 14.2k | } |
97 | 1.22M | for (k = 0; k < stride; ++k) { |
98 | | // We don't want to change value at counts[i], |
99 | | // that is already belonging to the next stride. Thus - 1. |
100 | 1.08M | counts[i - k - 1] = count; |
101 | 1.08M | } |
102 | 143k | } |
103 | 9.09M | stride = 0; |
104 | 9.09M | sum = 0; |
105 | 9.09M | if (i < length - 3) { |
106 | | // All interesting strides have a count of at least 4, |
107 | | // at least when non-zeros. |
108 | 8.79M | limit = |
109 | 8.79M | (counts[i] + counts[i + 1] + counts[i + 2] + counts[i + 3] + 2) / |
110 | 8.79M | 4; |
111 | 8.79M | } else if (i < length) { |
112 | 166k | limit = counts[i]; |
113 | 166k | } else { |
114 | 133k | limit = 0; |
115 | 133k | } |
116 | 9.09M | } |
117 | 10.3M | ++stride; |
118 | 10.3M | if (i != length) { |
119 | 10.2M | sum += counts[i]; |
120 | 10.2M | if (stride >= 4) { |
121 | 652k | limit = (sum + stride / 2) / stride; |
122 | 652k | } |
123 | 10.2M | } |
124 | 10.3M | } |
125 | 133k | } |
126 | 133k | } |
127 | | |
128 | | // A comparer function for two Huffman trees: sorts first by 'total count' |
129 | | // (more comes first), and then by 'value' (more comes first). |
130 | 10.7M | static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) { |
131 | 10.7M | const HuffmanTree* const t1 = (const HuffmanTree*)ptr1; |
132 | 10.7M | const HuffmanTree* const t2 = (const HuffmanTree*)ptr2; |
133 | 10.7M | if (t1->total_count > t2->total_count) { |
134 | 2.82M | return -1; |
135 | 7.88M | } else if (t1->total_count < t2->total_count) { |
136 | 4.12M | return 1; |
137 | 4.12M | } else { |
138 | 3.75M | assert(t1->value != t2->value); |
139 | 3.75M | return (t1->value < t2->value) ? -1 : 1; |
140 | 3.75M | } |
141 | 10.7M | } |
142 | | |
143 | | static void SetBitDepths(const HuffmanTree* const tree, |
144 | | const HuffmanTree* WEBP_BIDI_INDEXABLE const pool, |
145 | 4.72M | uint8_t* WEBP_INDEXABLE const bit_depths, int level) { |
146 | 4.72M | if (tree->pool_index_left >= 0) { |
147 | 2.33M | SetBitDepths(&pool[tree->pool_index_left], pool, bit_depths, level + 1); |
148 | 2.33M | SetBitDepths(&pool[tree->pool_index_right], pool, bit_depths, level + 1); |
149 | 2.39M | } else { |
150 | 2.39M | bit_depths[tree->value] = level; |
151 | 2.39M | } |
152 | 4.72M | } |
153 | | |
154 | | // Create an optimal Huffman tree. |
155 | | // |
156 | | // (data,length): population counts. |
157 | | // tree_limit: maximum bit depth (inclusive) of the codes. |
158 | | // bit_depths[]: how many bits are used for the symbol. |
159 | | // |
160 | | // Returns 0 when an error has occurred. |
161 | | // |
162 | | // The catch here is that the tree cannot be arbitrarily deep |
163 | | // |
164 | | // count_limit is the value that is to be faked as the minimum value |
165 | | // and this minimum value is raised until the tree matches the |
166 | | // maximum length requirement. |
167 | | // |
168 | | // This algorithm is not of excellent performance for very long data blocks, |
169 | | // especially when population counts are longer than 2**tree_limit, but |
170 | | // we are not planning to use this with extremely long blocks. |
171 | | // |
172 | | // See https://en.wikipedia.org/wiki/Huffman_coding |
173 | | static void GenerateOptimalTree( |
174 | | const uint32_t* const WEBP_COUNTED_BY(histogram_size) histogram, |
175 | | int histogram_size, HuffmanTree* WEBP_BIDI_INDEXABLE tree, |
176 | | int tree_depth_limit, |
177 | 142k | uint8_t* WEBP_COUNTED_BY(histogram_size) const bit_depths) { |
178 | 142k | uint32_t count_min; |
179 | 142k | HuffmanTree* WEBP_BIDI_INDEXABLE tree_pool; |
180 | 142k | int tree_size_orig = 0; |
181 | 142k | int i; |
182 | | |
183 | 25.7M | for (i = 0; i < histogram_size; ++i) { |
184 | 25.5M | if (histogram[i] != 0) { |
185 | 2.27M | ++tree_size_orig; |
186 | 2.27M | } |
187 | 25.5M | } |
188 | | |
189 | 142k | if (tree_size_orig == 0) { // pretty optimal already! |
190 | 8.50k | return; |
191 | 8.50k | } |
192 | | |
193 | 133k | tree_pool = tree + tree_size_orig; |
194 | | |
195 | | // For block sizes with less than 64k symbols we never need to do a |
196 | | // second iteration of this loop. |
197 | | // If we actually start running inside this loop a lot, we would perhaps |
198 | | // be better off with the Katajainen algorithm. |
199 | 133k | assert(tree_size_orig <= (1 << (tree_depth_limit - 1))); |
200 | 135k | for (count_min = 1;; count_min *= 2) { |
201 | 135k | int tree_size = tree_size_orig; |
202 | | // We need to pack the Huffman tree in tree_depth_limit bits. |
203 | | // So, we try by faking histogram entries to be at least 'count_min'. |
204 | 135k | int idx = 0; |
205 | 135k | int j; |
206 | 25.2M | for (j = 0; j < histogram_size; ++j) { |
207 | 25.0M | if (histogram[j] != 0) { |
208 | 2.46M | const uint32_t count = |
209 | 2.46M | (histogram[j] < count_min) ? count_min : histogram[j]; |
210 | 2.46M | tree[idx].total_count = count; |
211 | 2.46M | tree[idx].value = j; |
212 | 2.46M | tree[idx].pool_index_left = -1; |
213 | 2.46M | tree[idx].pool_index_right = -1; |
214 | 2.46M | ++idx; |
215 | 2.46M | } |
216 | 25.0M | } |
217 | | |
218 | | // Build the Huffman tree. |
219 | 135k | qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees); |
220 | | |
221 | 135k | if (tree_size > 1) { // Normal case. |
222 | 62.8k | int tree_pool_size = 0; |
223 | 2.39M | while (tree_size > 1) { // Finish when we have only one root. |
224 | 2.33M | uint32_t count; |
225 | 2.33M | tree_pool[tree_pool_size++] = tree[tree_size - 1]; |
226 | 2.33M | tree_pool[tree_pool_size++] = tree[tree_size - 2]; |
227 | 2.33M | count = tree_pool[tree_pool_size - 1].total_count + |
228 | 2.33M | tree_pool[tree_pool_size - 2].total_count; |
229 | 2.33M | tree_size -= 2; |
230 | 2.33M | { |
231 | | // Search for the insertion point. |
232 | 2.33M | int k; |
233 | 71.1M | for (k = 0; k < tree_size; ++k) { |
234 | 71.1M | if (tree[k].total_count <= count) { |
235 | 2.25M | break; |
236 | 2.25M | } |
237 | 71.1M | } |
238 | 2.33M | memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree)); |
239 | 2.33M | tree[k].total_count = count; |
240 | 2.33M | tree[k].value = -1; |
241 | | |
242 | 2.33M | tree[k].pool_index_left = tree_pool_size - 1; |
243 | 2.33M | tree[k].pool_index_right = tree_pool_size - 2; |
244 | 2.33M | tree_size = tree_size + 1; |
245 | 2.33M | } |
246 | 2.33M | } |
247 | 62.8k | SetBitDepths(&tree[0], tree_pool, bit_depths, 0); |
248 | 72.5k | } else if (tree_size == 1) { // Trivial case: only one element. |
249 | 72.5k | bit_depths[tree[0].value] = 1; |
250 | 72.5k | } |
251 | | |
252 | 135k | { |
253 | | // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria. |
254 | 135k | int max_depth = bit_depths[0]; |
255 | 25.0M | for (j = 1; j < histogram_size; ++j) { |
256 | 24.9M | if (max_depth < bit_depths[j]) { |
257 | 110k | max_depth = bit_depths[j]; |
258 | 110k | } |
259 | 24.9M | } |
260 | 135k | if (max_depth <= tree_depth_limit) { |
261 | 133k | break; |
262 | 133k | } |
263 | 135k | } |
264 | 135k | } |
265 | 133k | } |
266 | | |
267 | | // ----------------------------------------------------------------------------- |
268 | | // Coding of the Huffman tree values |
269 | | |
270 | | static HuffmanTreeToken* WEBP_INDEXABLE |
271 | | CodeRepeatedValues(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens, |
272 | 823k | int value, int prev_value) { |
273 | 823k | assert(value <= MAX_ALLOWED_CODE_LENGTH); |
274 | 823k | if (value != prev_value) { |
275 | 711k | tokens->code = value; |
276 | 711k | tokens->extra_bits = 0; |
277 | 711k | ++tokens; |
278 | 711k | --repetitions; |
279 | 711k | } |
280 | 925k | while (repetitions >= 1) { |
281 | 381k | if (repetitions < 3) { |
282 | 192k | int i; |
283 | 416k | for (i = 0; i < repetitions; ++i) { |
284 | 223k | tokens->code = value; |
285 | 223k | tokens->extra_bits = 0; |
286 | 223k | ++tokens; |
287 | 223k | } |
288 | 192k | break; |
289 | 192k | } else if (repetitions < 7) { |
290 | 87.2k | tokens->code = 16; |
291 | 87.2k | tokens->extra_bits = repetitions - 3; |
292 | 87.2k | ++tokens; |
293 | 87.2k | break; |
294 | 101k | } else { |
295 | 101k | tokens->code = 16; |
296 | 101k | tokens->extra_bits = 3; |
297 | 101k | ++tokens; |
298 | 101k | repetitions -= 6; |
299 | 101k | } |
300 | 381k | } |
301 | 823k | return tokens; |
302 | 823k | } |
303 | | |
304 | | static HuffmanTreeToken* WEBP_INDEXABLE |
305 | 279k | CodeRepeatedZeros(int repetitions, HuffmanTreeToken* WEBP_INDEXABLE tokens) { |
306 | 287k | while (repetitions >= 1) { |
307 | 287k | if (repetitions < 3) { |
308 | 109k | int i; |
309 | 239k | for (i = 0; i < repetitions; ++i) { |
310 | 130k | tokens->code = 0; // 0-value |
311 | 130k | tokens->extra_bits = 0; |
312 | 130k | ++tokens; |
313 | 130k | } |
314 | 109k | break; |
315 | 178k | } else if (repetitions < 11) { |
316 | 92.8k | tokens->code = 17; |
317 | 92.8k | tokens->extra_bits = repetitions - 3; |
318 | 92.8k | ++tokens; |
319 | 92.8k | break; |
320 | 92.8k | } else if (repetitions < 139) { |
321 | 77.8k | tokens->code = 18; |
322 | 77.8k | tokens->extra_bits = repetitions - 11; |
323 | 77.8k | ++tokens; |
324 | 77.8k | break; |
325 | 77.8k | } else { |
326 | 7.29k | tokens->code = 18; |
327 | 7.29k | tokens->extra_bits = 0x7f; // 138 repeated 0s |
328 | 7.29k | ++tokens; |
329 | 7.29k | repetitions -= 138; |
330 | 7.29k | } |
331 | 287k | } |
332 | 279k | return tokens; |
333 | 279k | } |
334 | | |
335 | | int VP8LCreateCompressedHuffmanTree( |
336 | | const HuffmanTreeCode* const tree, |
337 | 27.6k | HuffmanTreeToken* WEBP_COUNTED_BY(max_tokens) tokens, int max_tokens) { |
338 | 27.6k | HuffmanTreeToken* WEBP_INDEXABLE current_token = tokens; |
339 | 27.6k | HuffmanTreeToken* const starting_token = tokens; |
340 | 27.6k | HuffmanTreeToken* const ending_token = tokens + max_tokens; |
341 | 27.6k | const int depth_size = tree->num_symbols; |
342 | 27.6k | int prev_value = 8; // 8 is the initial value for rle. |
343 | 27.6k | int i = 0; |
344 | 27.6k | assert(tokens != NULL); |
345 | 1.13M | while (i < depth_size) { |
346 | 1.10M | const int value = tree->code_lengths[i]; |
347 | 1.10M | int k = i + 1; |
348 | 1.10M | int runs; |
349 | 6.05M | while (k < depth_size && tree->code_lengths[k] == value) ++k; |
350 | 1.10M | runs = k - i; |
351 | 1.10M | if (value == 0) { |
352 | 279k | current_token = CodeRepeatedZeros(runs, current_token); |
353 | 823k | } else { |
354 | 823k | current_token = |
355 | 823k | CodeRepeatedValues(runs, current_token, value, prev_value); |
356 | 823k | prev_value = value; |
357 | 823k | } |
358 | 1.10M | i += runs; |
359 | 1.10M | assert(current_token <= ending_token); |
360 | 1.10M | } |
361 | 27.6k | (void)ending_token; // suppress 'unused variable' warning |
362 | 27.6k | return (int)(current_token - starting_token); |
363 | 27.6k | } |
364 | | |
365 | | // ----------------------------------------------------------------------------- |
366 | | |
367 | | // Pre-reversed 4-bit values. |
368 | | static const uint8_t kReversedBits[16] = {0x0, 0x8, 0x4, 0xc, 0x2, 0xa, |
369 | | 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, |
370 | | 0x3, 0xb, 0x7, 0xf}; |
371 | | |
372 | 25.5M | static uint32_t ReverseBits(int num_bits, uint32_t bits) { |
373 | 25.5M | uint32_t retval = 0; |
374 | 25.5M | int i = 0; |
375 | 30.5M | while (i < num_bits) { |
376 | 4.96M | i += 4; |
377 | 4.96M | retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i); |
378 | 4.96M | bits >>= 4; |
379 | 4.96M | } |
380 | 25.5M | retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits); |
381 | 25.5M | return retval; |
382 | 25.5M | } |
383 | | |
384 | | // Get the actual bit values for a tree of bit depths. |
385 | 142k | static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { |
386 | | // 0 bit-depth means that the symbol does not exist. |
387 | 142k | int i; |
388 | 142k | int len; |
389 | 142k | uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1]; |
390 | 142k | int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = {0}; |
391 | | |
392 | 142k | assert(tree != NULL); |
393 | 142k | len = tree->num_symbols; |
394 | 25.7M | for (i = 0; i < len; ++i) { |
395 | 25.5M | const int code_length = tree->code_lengths[i]; |
396 | 25.5M | assert(code_length <= MAX_ALLOWED_CODE_LENGTH); |
397 | 25.5M | ++depth_count[code_length]; |
398 | 25.5M | } |
399 | 142k | depth_count[0] = 0; // ignore unused symbol |
400 | 142k | next_code[0] = 0; |
401 | 142k | { |
402 | 142k | uint32_t code = 0; |
403 | 2.27M | for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) { |
404 | 2.13M | code = (code + depth_count[i - 1]) << 1; |
405 | 2.13M | next_code[i] = code; |
406 | 2.13M | } |
407 | 142k | } |
408 | 25.7M | for (i = 0; i < len; ++i) { |
409 | 25.5M | const int code_length = tree->code_lengths[i]; |
410 | 25.5M | tree->codes[i] = ReverseBits(code_length, next_code[code_length]++); |
411 | 25.5M | } |
412 | 142k | } |
413 | | |
414 | | // ----------------------------------------------------------------------------- |
415 | | // Main entry point |
416 | | |
417 | | void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, |
418 | | uint8_t* const buf_rle, HuffmanTree* const huff_tree, |
419 | 142k | HuffmanTreeCode* const huff_code) { |
420 | 142k | const int num_symbols = huff_code->num_symbols; |
421 | 142k | uint32_t* const WEBP_BIDI_INDEXABLE bounded_histogram = |
422 | 142k | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE( |
423 | 142k | uint32_t*, histogram, (size_t)num_symbols * sizeof(*histogram)); |
424 | 142k | uint8_t* const WEBP_BIDI_INDEXABLE bounded_buf_rle = |
425 | 142k | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(uint8_t*, buf_rle, |
426 | 142k | (size_t)num_symbols * sizeof(*buf_rle)); |
427 | | |
428 | 142k | memset(bounded_buf_rle, 0, num_symbols * sizeof(*buf_rle)); |
429 | 142k | OptimizeHuffmanForRle(num_symbols, bounded_buf_rle, bounded_histogram); |
430 | 142k | GenerateOptimalTree( |
431 | 142k | bounded_histogram, num_symbols, |
432 | 142k | WEBP_UNSAFE_FORGE_BIDI_INDEXABLE(HuffmanTree*, huff_tree, |
433 | 142k | 3 * num_symbols * sizeof(*huff_tree)), |
434 | 142k | tree_depth_limit, huff_code->code_lengths); |
435 | | // Create the actual bit codes for the bit lengths. |
436 | 142k | ConvertBitDepthsToSymbols(huff_code); |
437 | 142k | } |