/src/libjxl/lib/jxl/enc_huffman_tree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) the JPEG XL Project Authors. All rights reserved. |
2 | | // |
3 | | // Use of this source code is governed by a BSD-style |
4 | | // license that can be found in the LICENSE file. |
5 | | |
6 | | #include "lib/jxl/enc_huffman_tree.h" |
7 | | |
8 | | #include <algorithm> |
9 | | #include <cstddef> |
10 | | #include <cstdint> |
11 | | #include <limits> |
12 | | #include <vector> |
13 | | |
14 | | #include "lib/jxl/base/compiler_specific.h" |
15 | | #include "lib/jxl/base/status.h" |
16 | | |
17 | | namespace jxl { |
18 | | |
19 | | void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, |
20 | 302k | uint8_t level) { |
21 | 302k | if (p.index_left >= 0) { |
22 | 134k | ++level; |
23 | 134k | SetDepth(pool[p.index_left], pool, depth, level); |
24 | 134k | SetDepth(pool[p.index_right_or_value], pool, depth, level); |
25 | 168k | } else { |
26 | 168k | depth[p.index_right_or_value] = level; |
27 | 168k | } |
28 | 302k | } |
29 | | |
30 | | // Compare the root nodes, least popular first; indices are in decreasing order |
31 | | // before sorting is applied. |
32 | 331k | static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { |
33 | 331k | return v0.total_count != v1.total_count |
34 | 331k | ? v0.total_count < v1.total_count |
35 | 331k | : v0.index_right_or_value > v1.index_right_or_value; |
36 | 331k | } |
37 | | |
38 | | // This function will create a Huffman tree. |
39 | | // |
40 | | // The catch here is that the tree cannot be arbitrarily deep. |
41 | | // Brotli specifies a maximum depth of 15 bits for "code trees" |
42 | | // and 7 bits for "code length code trees." |
43 | | // |
44 | | // count_limit is the value that is to be faked as the minimum value |
45 | | // and this minimum value is raised until the tree matches the |
46 | | // maximum length requirement. |
47 | | // |
48 | | // This algorithm is not of excellent performance for very long data blocks, |
49 | | // especially when population counts are longer than 2**tree_limit, but |
50 | | // we are not planning to use this with extremely long blocks. |
51 | | // |
52 | | // See http://en.wikipedia.org/wiki/Huffman_coding |
53 | | void CreateHuffmanTree(const uint32_t* data, const size_t length, |
54 | 34.2k | const int tree_limit, uint8_t* depth) { |
55 | | // For block sizes below 64 kB, we never need to do a second iteration |
56 | | // of this loop. Probably all of our block sizes will be smaller than |
57 | | // that, so this loop is mostly of academic interest. If we actually |
58 | | // would need this, we would be better off with the Katajainen algorithm. |
59 | 34.2k | for (uint32_t count_limit = 1;; count_limit *= 2) { |
60 | 34.2k | std::vector<HuffmanTree> tree; |
61 | 34.2k | tree.reserve(2 * length + 1); |
62 | | |
63 | 416k | for (size_t i = length; i != 0;) { |
64 | 382k | --i; |
65 | 382k | if (data[i]) { |
66 | 168k | const uint32_t count = std::max(data[i], count_limit - 1); |
67 | 168k | tree.emplace_back(count, -1, static_cast<int16_t>(i)); |
68 | 168k | } |
69 | 382k | } |
70 | | |
71 | 34.2k | const size_t n = tree.size(); |
72 | 34.2k | if (n == 1) { |
73 | | // Fake value; will be fixed on upper level. |
74 | 94 | depth[tree[0].index_right_or_value] = 1; |
75 | 94 | break; |
76 | 94 | } |
77 | | |
78 | 34.1k | std::sort(tree.begin(), tree.end(), Compare); |
79 | | |
80 | | // The nodes are: |
81 | | // [0, n): the sorted leaf nodes that we start with. |
82 | | // [n]: we add a sentinel here. |
83 | | // [n + 1, 2n): new parent nodes are added here, starting from |
84 | | // (n+1). These are naturally in ascending order. |
85 | | // [2n]: we add a sentinel at the end as well. |
86 | | // There will be (2n+1) elements at the end. |
87 | 34.1k | const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); |
88 | 34.1k | tree.push_back(sentinel); |
89 | 34.1k | tree.push_back(sentinel); |
90 | | |
91 | 34.1k | size_t i = 0; // Points to the next leaf node. |
92 | 34.1k | size_t j = n + 1; // Points to the next non-leaf node. |
93 | 168k | for (size_t k = n - 1; k != 0; --k) { |
94 | 134k | size_t left; |
95 | 134k | size_t right; |
96 | 134k | if (tree[i].total_count <= tree[j].total_count) { |
97 | 90.4k | left = i; |
98 | 90.4k | ++i; |
99 | 90.4k | } else { |
100 | 43.8k | left = j; |
101 | 43.8k | ++j; |
102 | 43.8k | } |
103 | 134k | if (tree[i].total_count <= tree[j].total_count) { |
104 | 78.0k | right = i; |
105 | 78.0k | ++i; |
106 | 78.0k | } else { |
107 | 56.2k | right = j; |
108 | 56.2k | ++j; |
109 | 56.2k | } |
110 | | |
111 | | // The sentinel node becomes the parent node. |
112 | 134k | size_t j_end = tree.size() - 1; |
113 | 134k | tree[j_end].total_count = |
114 | 134k | tree[left].total_count + tree[right].total_count; |
115 | 134k | tree[j_end].index_left = static_cast<int16_t>(left); |
116 | 134k | tree[j_end].index_right_or_value = static_cast<int16_t>(right); |
117 | | |
118 | | // Add back the last sentinel node. |
119 | 134k | tree.push_back(sentinel); |
120 | 134k | } |
121 | 34.1k | JXL_DASSERT(tree.size() == 2 * n + 1); |
122 | 34.1k | SetDepth(tree[2 * n - 1], tree.data(), depth, 0); |
123 | | |
124 | | // We need to pack the Huffman tree in tree_limit bits. |
125 | | // If this was not successful, add fake entities to the lowest values |
126 | | // and retry. |
127 | 34.1k | if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { |
128 | 34.1k | break; |
129 | 34.1k | } |
130 | 34.1k | } |
131 | 34.2k | } |
132 | | |
133 | 1.73k | void Reverse(uint8_t* v, size_t start, size_t end) { |
134 | 1.73k | --end; |
135 | 2.15k | while (start < end) { |
136 | 422 | uint8_t tmp = v[start]; |
137 | 422 | v[start] = v[end]; |
138 | 422 | v[end] = tmp; |
139 | 422 | ++start; |
140 | 422 | --end; |
141 | 422 | } |
142 | 1.73k | } |
143 | | |
144 | | void WriteHuffmanTreeRepetitions(const uint8_t previous_value, |
145 | | const uint8_t value, size_t repetitions, |
146 | | size_t* tree_size, uint8_t* tree, |
147 | 96.5k | uint8_t* extra_bits_data) { |
148 | 96.5k | JXL_DASSERT(repetitions > 0); |
149 | 96.5k | if (previous_value != value) { |
150 | 58.5k | tree[*tree_size] = value; |
151 | 58.5k | extra_bits_data[*tree_size] = 0; |
152 | 58.5k | ++(*tree_size); |
153 | 58.5k | --repetitions; |
154 | 58.5k | } |
155 | 96.5k | if (repetitions == 7) { |
156 | 11 | tree[*tree_size] = value; |
157 | 11 | extra_bits_data[*tree_size] = 0; |
158 | 11 | ++(*tree_size); |
159 | 11 | --repetitions; |
160 | 11 | } |
161 | 96.5k | if (repetitions < 3) { |
162 | 134k | for (size_t i = 0; i < repetitions; ++i) { |
163 | 38.0k | tree[*tree_size] = value; |
164 | 38.0k | extra_bits_data[*tree_size] = 0; |
165 | 38.0k | ++(*tree_size); |
166 | 38.0k | } |
167 | 96.4k | } else { |
168 | 118 | repetitions -= 3; |
169 | 118 | size_t start = *tree_size; |
170 | 121 | while (true) { |
171 | 121 | tree[*tree_size] = 16; |
172 | 121 | extra_bits_data[*tree_size] = repetitions & 0x3; |
173 | 121 | ++(*tree_size); |
174 | 121 | repetitions >>= 2; |
175 | 121 | if (repetitions == 0) { |
176 | 118 | break; |
177 | 118 | } |
178 | 3 | --repetitions; |
179 | 3 | } |
180 | 118 | Reverse(tree, start, *tree_size); |
181 | 118 | Reverse(extra_bits_data, start, *tree_size); |
182 | 118 | } |
183 | 96.5k | } |
184 | | |
185 | | void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size, |
186 | 3.80k | uint8_t* tree, uint8_t* extra_bits_data) { |
187 | 3.80k | if (repetitions == 11) { |
188 | 20 | tree[*tree_size] = 0; |
189 | 20 | extra_bits_data[*tree_size] = 0; |
190 | 20 | ++(*tree_size); |
191 | 20 | --repetitions; |
192 | 20 | } |
193 | 3.80k | if (repetitions < 3) { |
194 | 6.74k | for (size_t i = 0; i < repetitions; ++i) { |
195 | 3.69k | tree[*tree_size] = 0; |
196 | 3.69k | extra_bits_data[*tree_size] = 0; |
197 | 3.69k | ++(*tree_size); |
198 | 3.69k | } |
199 | 3.05k | } else { |
200 | 750 | repetitions -= 3; |
201 | 750 | size_t start = *tree_size; |
202 | 1.10k | while (true) { |
203 | 1.10k | tree[*tree_size] = 17; |
204 | 1.10k | extra_bits_data[*tree_size] = repetitions & 0x7; |
205 | 1.10k | ++(*tree_size); |
206 | 1.10k | repetitions >>= 3; |
207 | 1.10k | if (repetitions == 0) { |
208 | 750 | break; |
209 | 750 | } |
210 | 354 | --repetitions; |
211 | 354 | } |
212 | 750 | Reverse(tree, start, *tree_size); |
213 | 750 | Reverse(extra_bits_data, start, *tree_size); |
214 | 750 | } |
215 | 3.80k | } |
216 | | |
217 | | static void DecideOverRleUse(const uint8_t* depth, const size_t length, |
218 | | bool* use_rle_for_non_zero, |
219 | 258 | bool* use_rle_for_zero) { |
220 | 258 | size_t total_reps_zero = 0; |
221 | 258 | size_t total_reps_non_zero = 0; |
222 | 258 | size_t count_reps_zero = 1; |
223 | 258 | size_t count_reps_non_zero = 1; |
224 | 4.83k | for (size_t i = 0; i < length;) { |
225 | 4.57k | const uint8_t value = depth[i]; |
226 | 4.57k | size_t reps = 1; |
227 | 40.8k | for (size_t k = i + 1; k < length && depth[k] == value; ++k) { |
228 | 36.3k | ++reps; |
229 | 36.3k | } |
230 | 4.57k | if (reps >= 3 && value == 0) { |
231 | 751 | total_reps_zero += reps; |
232 | 751 | ++count_reps_zero; |
233 | 751 | } |
234 | 4.57k | if (reps >= 4 && value != 0) { |
235 | 118 | total_reps_non_zero += reps; |
236 | 118 | ++count_reps_non_zero; |
237 | 118 | } |
238 | 4.57k | i += reps; |
239 | 4.57k | } |
240 | 258 | *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2; |
241 | 258 | *use_rle_for_zero = total_reps_zero > count_reps_zero * 2; |
242 | 258 | } |
243 | | |
244 | | void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size, |
245 | 12.0k | uint8_t* tree, uint8_t* extra_bits_data) { |
246 | 12.0k | uint8_t previous_value = 8; |
247 | | |
248 | | // Throw away trailing zeros. |
249 | 12.0k | size_t new_length = length; |
250 | 12.0k | for (size_t i = 0; i < length; ++i) { |
251 | 12.0k | if (depth[length - i - 1] == 0) { |
252 | 0 | --new_length; |
253 | 12.0k | } else { |
254 | 12.0k | break; |
255 | 12.0k | } |
256 | 12.0k | } |
257 | | |
258 | | // First gather statistics on if it is a good idea to do rle. |
259 | 12.0k | bool use_rle_for_non_zero = false; |
260 | 12.0k | bool use_rle_for_zero = false; |
261 | 12.0k | if (length > 50) { |
262 | | // Find rle coding for longer codes. |
263 | | // Shorter codes seem not to benefit from rle. |
264 | 258 | DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, |
265 | 258 | &use_rle_for_zero); |
266 | 258 | } |
267 | | |
268 | | // Actual rle coding. |
269 | 112k | for (size_t i = 0; i < new_length;) { |
270 | 100k | const uint8_t value = depth[i]; |
271 | 100k | size_t reps = 1; |
272 | 100k | if ((value != 0 && use_rle_for_non_zero) || |
273 | 100k | (value == 0 && use_rle_for_zero)) { |
274 | 37.7k | for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) { |
275 | 35.5k | ++reps; |
276 | 35.5k | } |
277 | 2.16k | } |
278 | 100k | if (value == 0) { |
279 | 3.80k | WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data); |
280 | 96.5k | } else { |
281 | 96.5k | WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, |
282 | 96.5k | extra_bits_data); |
283 | 96.5k | previous_value = value; |
284 | 96.5k | } |
285 | 100k | i += reps; |
286 | 100k | } |
287 | 12.0k | } |
288 | | |
289 | | namespace { |
290 | | |
291 | 168k | uint16_t ReverseBits(int num_bits, uint16_t bits) { |
292 | 168k | static const size_t kLut[16] = {// Pre-reversed 4-bit values. |
293 | 168k | 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, |
294 | 168k | 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf}; |
295 | 168k | size_t retval = kLut[bits & 0xf]; |
296 | 193k | for (int i = 4; i < num_bits; i += 4) { |
297 | 24.6k | retval <<= 4; |
298 | 24.6k | bits = static_cast<uint16_t>(bits >> 4); |
299 | 24.6k | retval |= kLut[bits & 0xf]; |
300 | 24.6k | } |
301 | 168k | retval >>= (-num_bits & 0x3); |
302 | 168k | return static_cast<uint16_t>(retval); |
303 | 168k | } |
304 | | |
305 | | } // namespace |
306 | | |
307 | | void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, |
308 | 34.2k | uint16_t* bits) { |
309 | | // In Brotli, all bit depths are [1..15] |
310 | | // 0 bit depth means that the symbol does not exist. |
311 | 34.2k | const int kMaxBits = 16; // 0..15 are values for bits |
312 | 34.2k | uint16_t bl_count[kMaxBits] = {0}; |
313 | 34.2k | { |
314 | 416k | for (size_t i = 0; i < len; ++i) { |
315 | 382k | ++bl_count[depth[i]]; |
316 | 382k | } |
317 | 34.2k | bl_count[0] = 0; |
318 | 34.2k | } |
319 | 34.2k | uint16_t next_code[kMaxBits]; |
320 | 34.2k | next_code[0] = 0; |
321 | 34.2k | { |
322 | 34.2k | int code = 0; |
323 | 548k | for (size_t i = 1; i < kMaxBits; ++i) { |
324 | 514k | code = (code + bl_count[i - 1]) << 1; |
325 | 514k | next_code[i] = static_cast<uint16_t>(code); |
326 | 514k | } |
327 | 34.2k | } |
328 | 416k | for (size_t i = 0; i < len; ++i) { |
329 | 382k | if (depth[i]) { |
330 | 168k | bits[i] = ReverseBits(depth[i], next_code[depth[i]]++); |
331 | 168k | } |
332 | 382k | } |
333 | 34.2k | } |
334 | | |
335 | | } // namespace jxl |