/src/libjxl/lib/jxl/enc_huffman_tree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_huffman_tree.h" |
7 | | |
8 | | #include <algorithm> |
9 | | #include <cstddef> |
10 | | #include <cstdint> |
11 | | #include <limits> |
12 | | #include <vector> |
13 | | |
14 | | #include "lib/jxl/base/compiler_specific.h" |
15 | | #include "lib/jxl/base/status.h" |
16 | | |
17 | | namespace jxl { |
18 | | |
19 | | void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, |
20 | 26.8k | uint8_t level) { |
21 | 26.8k | if (p.index_left >= 0) { |
22 | 11.7k | ++level; |
23 | 11.7k | SetDepth(pool[p.index_left], pool, depth, level); |
24 | 11.7k | SetDepth(pool[p.index_right_or_value], pool, depth, level); |
25 | 15.0k | } else { |
26 | 15.0k | depth[p.index_right_or_value] = level; |
27 | 15.0k | } |
28 | 26.8k | } |
29 | | |
30 | | // Compare the root nodes, least popular first; indices are in decreasing order |
31 | | // before sorting is applied. |
32 | 27.6k | static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { |
33 | 27.6k | return v0.total_count != v1.total_count |
34 | 27.6k | ? v0.total_count < v1.total_count |
35 | 27.6k | : v0.index_right_or_value > v1.index_right_or_value; |
36 | 27.6k | } |
37 | | |
38 | | // This function will create a Huffman tree. |
39 | | // |
40 | | // The catch here is that the tree cannot be arbitrarily deep. |
41 | | // Brotli specifies a maximum depth of 15 bits for "code trees" |
42 | | // and 7 bits for "code length code trees." |
43 | | // |
44 | | // count_limit is the value that is to be faked as the minimum value |
45 | | // and this minimum value is raised until the tree matches the |
46 | | // maximum length requirement. |
47 | | // |
48 | | // This algorithm is not of excellent performance for very long data blocks, |
49 | | // especially when population counts are longer than 2**tree_limit, but |
50 | | // we are not planning to use this with extremely long blocks. |
51 | | // |
52 | | // See http://en.wikipedia.org/wiki/Huffman_coding |
53 | | void CreateHuffmanTree(const uint32_t* data, const size_t length, |
54 | 3.30k | const int tree_limit, uint8_t* depth) { |
55 | | // For block sizes below 64 kB, we never need to do a second iteration |
56 | | // of this loop. Probably all of our block sizes will be smaller than |
57 | | // that, so this loop is mostly of academic interest. If we actually |
58 | | // would need this, we would be better off with the Katajainen algorithm. |
59 | 3.30k | for (uint32_t count_limit = 1;; count_limit *= 2) { |
60 | 3.30k | std::vector<HuffmanTree> tree; |
61 | 3.30k | tree.reserve(2 * length + 1); |
62 | | |
63 | 36.7k | for (size_t i = length; i != 0;) { |
64 | 33.4k | --i; |
65 | 33.4k | if (data[i]) { |
66 | 15.0k | const uint32_t count = std::max(data[i], count_limit - 1); |
67 | 15.0k | tree.emplace_back(count, -1, static_cast<int16_t>(i)); |
68 | 15.0k | } |
69 | 33.4k | } |
70 | | |
71 | 3.30k | const size_t n = tree.size(); |
72 | 3.30k | if (n == 1) { |
73 | | // Fake value; will be fixed on upper level. |
74 | 13 | depth[tree[0].index_right_or_value] = 1; |
75 | 13 | break; |
76 | 13 | } |
77 | | |
78 | 3.29k | std::sort(tree.begin(), tree.end(), Compare); |
79 | | |
80 | | // The nodes are: |
81 | | // [0, n): the sorted leaf nodes that we start with. |
82 | | // [n]: we add a sentinel here. |
83 | | // [n + 1, 2n): new parent nodes are added here, starting from |
84 | | // (n+1). These are naturally in ascending order. |
85 | | // [2n]: we add a sentinel at the end as well. |
86 | | // There will be (2n+1) elements at the end. |
87 | 3.29k | const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); |
88 | 3.29k | tree.push_back(sentinel); |
89 | 3.29k | tree.push_back(sentinel); |
90 | | |
91 | 3.29k | size_t i = 0; // Points to the next leaf node. |
92 | 3.29k | size_t j = n + 1; // Points to the next non-leaf node. |
93 | 15.0k | for (size_t k = n - 1; k != 0; --k) { |
94 | 11.7k | size_t left; |
95 | 11.7k | size_t right; |
96 | 11.7k | if (tree[i].total_count <= tree[j].total_count) { |
97 | 7.75k | left = i; |
98 | 7.75k | ++i; |
99 | 7.75k | } else { |
100 | 4.03k | left = j; |
101 | 4.03k | ++j; |
102 | 4.03k | } |
103 | 11.7k | if (tree[i].total_count <= tree[j].total_count) { |
104 | 7.32k | right = i; |
105 | 7.32k | ++i; |
106 | 7.32k | } else { |
107 | 4.46k | right = j; |
108 | 4.46k | ++j; |
109 | 4.46k | } |
110 | | |
111 | | // The sentinel node becomes the parent node. |
112 | 11.7k | size_t j_end = tree.size() - 1; |
113 | 11.7k | tree[j_end].total_count = |
114 | 11.7k | tree[left].total_count + tree[right].total_count; |
115 | 11.7k | tree[j_end].index_left = static_cast<int16_t>(left); |
116 | 11.7k | tree[j_end].index_right_or_value = static_cast<int16_t>(right); |
117 | | |
118 | | // Add back the last sentinel node. |
119 | 11.7k | tree.push_back(sentinel); |
120 | 11.7k | } |
121 | 3.29k | JXL_DASSERT(tree.size() == 2 * n + 1); |
122 | 3.29k | SetDepth(tree[2 * n - 1], tree.data(), depth, 0); |
123 | | |
124 | | // We need to pack the Huffman tree in tree_limit bits. |
125 | | // If this was not successful, add fake entities to the lowest values |
126 | | // and retry. |
127 | 3.29k | if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { |
128 | 3.29k | break; |
129 | 3.29k | } |
130 | 3.29k | } |
131 | 3.30k | } |
132 | | |
133 | 22 | void Reverse(uint8_t* v, size_t start, size_t end) { |
134 | 22 | --end; |
135 | 44 | while (start < end) { |
136 | 22 | uint8_t tmp = v[start]; |
137 | 22 | v[start] = v[end]; |
138 | 22 | v[end] = tmp; |
139 | 22 | ++start; |
140 | 22 | --end; |
141 | 22 | } |
142 | 22 | } |
143 | | |
144 | | void WriteHuffmanTreeRepetitions(const uint8_t previous_value, |
145 | | const uint8_t value, size_t repetitions, |
146 | | size_t* tree_size, uint8_t* tree, |
147 | 8.47k | uint8_t* extra_bits_data) { |
148 | 8.47k | JXL_DASSERT(repetitions > 0); |
149 | 8.47k | if (previous_value != value) { |
150 | 4.88k | tree[*tree_size] = value; |
151 | 4.88k | extra_bits_data[*tree_size] = 0; |
152 | 4.88k | ++(*tree_size); |
153 | 4.88k | --repetitions; |
154 | 4.88k | } |
155 | 8.47k | if (repetitions == 7) { |
156 | 0 | tree[*tree_size] = value; |
157 | 0 | extra_bits_data[*tree_size] = 0; |
158 | 0 | ++(*tree_size); |
159 | 0 | --repetitions; |
160 | 0 | } |
161 | 8.47k | if (repetitions < 3) { |
162 | 12.0k | for (size_t i = 0; i < repetitions; ++i) { |
163 | 3.59k | tree[*tree_size] = value; |
164 | 3.59k | extra_bits_data[*tree_size] = 0; |
165 | 3.59k | ++(*tree_size); |
166 | 3.59k | } |
167 | 8.47k | } else { |
168 | 0 | repetitions -= 3; |
169 | 0 | size_t start = *tree_size; |
170 | 0 | while (true) { |
171 | 0 | tree[*tree_size] = 16; |
172 | 0 | extra_bits_data[*tree_size] = repetitions & 0x3; |
173 | 0 | ++(*tree_size); |
174 | 0 | repetitions >>= 2; |
175 | 0 | if (repetitions == 0) { |
176 | 0 | break; |
177 | 0 | } |
178 | 0 | --repetitions; |
179 | 0 | } |
180 | 0 | Reverse(tree, start, *tree_size); |
181 | 0 | Reverse(extra_bits_data, start, *tree_size); |
182 | 0 | } |
183 | 8.47k | } |
184 | | |
185 | | void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size, |
186 | 199 | uint8_t* tree, uint8_t* extra_bits_data) { |
187 | 199 | if (repetitions == 11) { |
188 | 0 | tree[*tree_size] = 0; |
189 | 0 | extra_bits_data[*tree_size] = 0; |
190 | 0 | ++(*tree_size); |
191 | 0 | --repetitions; |
192 | 0 | } |
193 | 199 | if (repetitions < 3) { |
194 | 378 | for (size_t i = 0; i < repetitions; ++i) { |
195 | 190 | tree[*tree_size] = 0; |
196 | 190 | extra_bits_data[*tree_size] = 0; |
197 | 190 | ++(*tree_size); |
198 | 190 | } |
199 | 188 | } else { |
200 | 11 | repetitions -= 3; |
201 | 11 | size_t start = *tree_size; |
202 | 33 | while (true) { |
203 | 33 | tree[*tree_size] = 17; |
204 | 33 | extra_bits_data[*tree_size] = repetitions & 0x7; |
205 | 33 | ++(*tree_size); |
206 | 33 | repetitions >>= 3; |
207 | 33 | if (repetitions == 0) { |
208 | 11 | break; |
209 | 11 | } |
210 | 22 | --repetitions; |
211 | 22 | } |
212 | 11 | Reverse(tree, start, *tree_size); |
213 | 11 | Reverse(extra_bits_data, start, *tree_size); |
214 | 11 | } |
215 | 199 | } |
216 | | |
217 | | static void DecideOverRleUse(const uint8_t* depth, const size_t length, |
218 | | bool* use_rle_for_non_zero, |
219 | 12 | bool* use_rle_for_zero) { |
220 | 12 | size_t total_reps_zero = 0; |
221 | 12 | size_t total_reps_non_zero = 0; |
222 | 12 | size_t count_reps_zero = 1; |
223 | 12 | size_t count_reps_non_zero = 1; |
224 | 143 | for (size_t i = 0; i < length;) { |
225 | 131 | const uint8_t value = depth[i]; |
226 | 131 | size_t reps = 1; |
227 | 2.54k | for (size_t k = i + 1; k < length && depth[k] == value; ++k) { |
228 | 2.41k | ++reps; |
229 | 2.41k | } |
230 | 131 | if (reps >= 3 && value == 0) { |
231 | 13 | total_reps_zero += reps; |
232 | 13 | ++count_reps_zero; |
233 | 13 | } |
234 | 131 | if (reps >= 4 && value != 0) { |
235 | 4 | total_reps_non_zero += reps; |
236 | 4 | ++count_reps_non_zero; |
237 | 4 | } |
238 | 131 | i += reps; |
239 | 131 | } |
240 | 12 | *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2; |
241 | 12 | *use_rle_for_zero = total_reps_zero > count_reps_zero * 2; |
242 | 12 | } |
243 | | |
244 | | void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size, |
245 | 1.05k | uint8_t* tree, uint8_t* extra_bits_data) { |
246 | 1.05k | uint8_t previous_value = 8; |
247 | | |
248 | | // Throw away trailing zeros. |
249 | 1.05k | size_t new_length = length; |
250 | 1.05k | for (size_t i = 0; i < length; ++i) { |
251 | 1.05k | if (depth[length - i - 1] == 0) { |
252 | 0 | --new_length; |
253 | 1.05k | } else { |
254 | 1.05k | break; |
255 | 1.05k | } |
256 | 1.05k | } |
257 | | |
258 | | // First gather statistics on if it is a good idea to do rle. |
259 | 1.05k | bool use_rle_for_non_zero = false; |
260 | 1.05k | bool use_rle_for_zero = false; |
261 | 1.05k | if (length > 50) { |
262 | | // Find rle coding for longer codes. |
263 | | // Shorter codes seem not to benefit from rle. |
264 | 12 | DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, |
265 | 12 | &use_rle_for_zero); |
266 | 12 | } |
267 | | |
268 | | // Actual rle coding. |
269 | 9.73k | for (size_t i = 0; i < new_length;) { |
270 | 8.67k | const uint8_t value = depth[i]; |
271 | 8.67k | size_t reps = 1; |
272 | 8.67k | if ((value != 0 && use_rle_for_non_zero) || |
273 | 8.67k | (value == 0 && use_rle_for_zero)) { |
274 | 2.37k | for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) { |
275 | 2.36k | ++reps; |
276 | 2.36k | } |
277 | 15 | } |
278 | 8.67k | if (value == 0) { |
279 | 199 | WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data); |
280 | 8.47k | } else { |
281 | 8.47k | WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, |
282 | 8.47k | extra_bits_data); |
283 | 8.47k | previous_value = value; |
284 | 8.47k | } |
285 | 8.67k | i += reps; |
286 | 8.67k | } |
287 | 1.05k | } |
288 | | |
289 | | namespace { |
290 | | |
291 | 15.0k | uint16_t ReverseBits(int num_bits, uint16_t bits) { |
292 | 15.0k | static const size_t kLut[16] = {// Pre-reversed 4-bit values. |
293 | 15.0k | 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, |
294 | 15.0k | 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf}; |
295 | 15.0k | size_t retval = kLut[bits & 0xf]; |
296 | 16.3k | for (int i = 4; i < num_bits; i += 4) { |
297 | 1.25k | retval <<= 4; |
298 | 1.25k | bits = static_cast<uint16_t>(bits >> 4); |
299 | 1.25k | retval |= kLut[bits & 0xf]; |
300 | 1.25k | } |
301 | 15.0k | retval >>= (-num_bits & 0x3); |
302 | 15.0k | return static_cast<uint16_t>(retval); |
303 | 15.0k | } |
304 | | |
305 | | } // namespace |
306 | | |
307 | | void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, |
308 | 3.30k | uint16_t* bits) { |
309 | | // In Brotli, all bit depths are [1..15] |
310 | | // 0 bit depth means that the symbol does not exist. |
311 | 3.30k | const int kMaxBits = 16; // 0..15 are values for bits |
312 | 3.30k | uint16_t bl_count[kMaxBits] = {0}; |
313 | 3.30k | { |
314 | 36.7k | for (size_t i = 0; i < len; ++i) { |
315 | 33.4k | ++bl_count[depth[i]]; |
316 | 33.4k | } |
317 | 3.30k | bl_count[0] = 0; |
318 | 3.30k | } |
319 | 3.30k | uint16_t next_code[kMaxBits]; |
320 | 3.30k | next_code[0] = 0; |
321 | 3.30k | { |
322 | 3.30k | int code = 0; |
323 | 52.8k | for (size_t i = 1; i < kMaxBits; ++i) { |
324 | 49.5k | code = (code + bl_count[i - 1]) << 1; |
325 | 49.5k | next_code[i] = static_cast<uint16_t>(code); |
326 | 49.5k | } |
327 | 3.30k | } |
328 | 36.7k | for (size_t i = 0; i < len; ++i) { |
329 | 33.4k | if (depth[i]) { |
330 | 15.0k | bits[i] = ReverseBits(depth[i], next_code[depth[i]]++); |
331 | 15.0k | } |
332 | 33.4k | } |
333 | 3.30k | } |
334 | | |
335 | | } // namespace jxl |