/src/serenity/Userland/Libraries/LibCompress/Brotli.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/BinarySearch.h> |
8 | | #include <AK/QuickSort.h> |
9 | | #include <LibCompress/Brotli.h> |
10 | | #include <LibCompress/BrotliDictionary.h> |
11 | | |
12 | | namespace Compress { |
13 | | |
14 | | ErrorOr<size_t> Brotli::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream) const |
15 | 580M | { |
16 | 580M | size_t code_bits = 1; |
17 | | |
18 | 659M | while (code_bits < (1 << 16)) { |
19 | | // FIXME: This is very inefficient and could greatly be improved by implementing this |
20 | | // algorithm: https://www.hanshq.net/zip.html#huffdec |
21 | 659M | size_t index; |
22 | 659M | if (binary_search(m_symbol_codes.span(), code_bits, &index)) |
23 | 580M | return m_symbol_values[index]; |
24 | | |
25 | 78.2M | code_bits = (code_bits << 1) | TRY(input_stream.read_bit()); |
26 | 78.2M | } |
27 | | |
28 | 114 | return Error::from_string_literal("no matching code found"); |
29 | 580M | } |
30 | | |
31 | | BrotliDecompressionStream::BrotliDecompressionStream(MaybeOwned<Stream> stream) |
32 | 5.59k | : m_input_stream(move(stream)) |
33 | 5.59k | { |
34 | 5.59k | } |
35 | | |
36 | | ErrorOr<size_t> BrotliDecompressionStream::read_window_length() |
37 | 5.59k | { |
38 | 5.59k | if (TRY(m_input_stream.read_bit())) { |
39 | 2.15k | switch (TRY(m_input_stream.read_bits(3))) { |
40 | 441 | case 0: { |
41 | 441 | switch (TRY(m_input_stream.read_bits(3))) { |
42 | 99 | case 0: |
43 | 99 | return 17; |
44 | 2 | case 1: |
45 | 2 | return Error::from_string_literal("invalid window length"); |
46 | 147 | case 2: |
47 | 147 | return 10; |
48 | 62 | case 3: |
49 | 62 | return 11; |
50 | 31 | case 4: |
51 | 31 | return 12; |
52 | 45 | case 5: |
53 | 45 | return 13; |
54 | 24 | case 6: |
55 | 24 | return 14; |
56 | 31 | case 7: |
57 | 31 | return 15; |
58 | 0 | default: |
59 | 0 | VERIFY_NOT_REACHED(); |
60 | 441 | } |
61 | 441 | } |
62 | 133 | case 1: |
63 | 133 | return 18; |
64 | 146 | case 2: |
65 | 146 | return 19; |
66 | 94 | case 3: |
67 | 94 | return 20; |
68 | 114 | case 4: |
69 | 114 | return 21; |
70 | 673 | case 5: |
71 | 673 | return 22; |
72 | 299 | case 6: |
73 | 299 | return 23; |
74 | 258 | case 7: |
75 | 258 | return 24; |
76 | 0 | default: |
77 | 0 | VERIFY_NOT_REACHED(); |
78 | 2.15k | } |
79 | 3.43k | } else { |
80 | 3.43k | return 16; |
81 | 3.43k | } |
82 | 5.59k | } |
83 | | |
84 | | ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles() |
85 | 75.5k | { |
86 | 75.5k | switch (TRY(m_input_stream.read_bits(2))) { |
87 | 19.8k | case 0: |
88 | 19.8k | return 4; |
89 | 1.55k | case 1: |
90 | 1.55k | return 5; |
91 | 1.48k | case 2: |
92 | 1.48k | return 6; |
93 | 52.5k | case 3: |
94 | 52.5k | return 0; |
95 | 0 | default: |
96 | 0 | VERIFY_NOT_REACHED(); |
97 | 75.5k | } |
98 | 75.5k | } |
99 | | |
100 | | ErrorOr<size_t> BrotliDecompressionStream::read_variable_length() |
101 | 84.4k | { |
102 | | // Value Bit Pattern |
103 | | // ----- ----------- |
104 | | // 1 0 |
105 | | // 2 0001 |
106 | | // 3..4 x0011 |
107 | | // 5..8 xx0101 |
108 | | // 9..16 xxx0111 |
109 | | // 17..32 xxxx1001 |
110 | | // 33..64 xxxxx1011 |
111 | | // 65..128 xxxxxx1101 |
112 | | // 129..256 xxxxxxx1111 |
113 | | |
114 | 84.4k | if (TRY(m_input_stream.read_bit())) { |
115 | 13.3k | switch (TRY(m_input_stream.read_bits(3))) { |
116 | 3.99k | case 0: |
117 | 3.99k | return 2; |
118 | 5.49k | case 1: |
119 | 5.49k | return 3 + TRY(m_input_stream.read_bits(1)); |
120 | 1.75k | case 2: |
121 | 1.75k | return 5 + TRY(m_input_stream.read_bits(2)); |
122 | 270 | case 3: |
123 | 270 | return 9 + TRY(m_input_stream.read_bits(3)); |
124 | 364 | case 4: |
125 | 364 | return 17 + TRY(m_input_stream.read_bits(4)); |
126 | 353 | case 5: |
127 | 353 | return 33 + TRY(m_input_stream.read_bits(5)); |
128 | 262 | case 6: |
129 | 262 | return 65 + TRY(m_input_stream.read_bits(6)); |
130 | 828 | case 7: |
131 | 828 | return 129 + TRY(m_input_stream.read_bits(7)); |
132 | 0 | default: |
133 | 0 | VERIFY_NOT_REACHED(); |
134 | 13.3k | } |
135 | 71.0k | } else { |
136 | 71.0k | return 1; |
137 | 71.0k | } |
138 | 84.4k | } |
139 | | |
140 | | ErrorOr<size_t> Brotli::CanonicalCode::read_complex_prefix_code_length(LittleEndianInputBitStream& stream) |
141 | 365k | { |
142 | | // Symbol Code |
143 | | // ------ ---- |
144 | | // 0 00 |
145 | | // 1 0111 |
146 | | // 2 011 |
147 | | // 3 10 |
148 | | // 4 01 |
149 | | // 5 1111 |
150 | | |
151 | 365k | switch (TRY(stream.read_bits(2))) { |
152 | 208k | case 0: |
153 | 208k | return 0; |
154 | 36.2k | case 1: |
155 | 36.2k | return 4; |
156 | 42.8k | case 2: |
157 | 42.8k | return 3; |
158 | 77.1k | case 3: { |
159 | 154k | if (TRY(stream.read_bit()) == 0) { |
160 | 22.7k | return 2; |
161 | 54.4k | } else { |
162 | 108k | if (TRY(stream.read_bit()) == 0) { |
163 | 30.4k | return 1; |
164 | 30.4k | } else { |
165 | 23.9k | return 5; |
166 | 23.9k | } |
167 | 54.4k | } |
168 | 77.1k | } |
169 | 0 | default: |
170 | 0 | VERIFY_NOT_REACHED(); |
171 | 364k | } |
172 | 364k | } |
173 | | |
174 | | ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size) |
175 | 110k | { |
176 | 110k | size_t hskip = TRY(stream.read_bits(2)); |
177 | | |
178 | 110k | if (hskip == 1) |
179 | 110k | return TRY(read_simple_prefix_code(stream, alphabet_size)); |
180 | | |
181 | 36.3k | return TRY(read_complex_prefix_code(stream, alphabet_size, hskip)); |
182 | 36.3k | } |
183 | | |
184 | | ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_simple_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size) |
185 | 74.2k | { |
186 | 74.2k | CanonicalCode code {}; |
187 | | |
188 | 74.2k | size_t number_of_symbols = 1 + TRY(stream.read_bits(2)); |
189 | | |
190 | 74.1k | size_t symbol_size = 0; |
191 | 602k | while ((1u << symbol_size) < alphabet_size) |
192 | 528k | symbol_size++; |
193 | | |
194 | 74.1k | Vector<size_t> symbols; |
195 | 207k | for (size_t i = 0; i < number_of_symbols; i++) { |
196 | 133k | size_t symbol = TRY(stream.read_bits(symbol_size)); |
197 | 132k | symbols.append(symbol); |
198 | | |
199 | 132k | if (symbol >= alphabet_size) |
200 | 32 | return Error::from_string_literal("symbol larger than alphabet"); |
201 | 132k | } |
202 | | |
203 | 73.9k | if (number_of_symbols == 1) { |
204 | 37.6k | code.m_symbol_codes.append(0b1); |
205 | 37.6k | code.m_symbol_values = move(symbols); |
206 | 37.6k | } else if (number_of_symbols == 2) { |
207 | 18.9k | code.m_symbol_codes.extend({ 0b10, 0b11 }); |
208 | 18.9k | if (symbols[0] > symbols[1]) |
209 | 6.20k | swap(symbols[0], symbols[1]); |
210 | 18.9k | code.m_symbol_values = move(symbols); |
211 | 18.9k | } else if (number_of_symbols == 3) { |
212 | 12.3k | code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 }); |
213 | 12.3k | if (symbols[1] > symbols[2]) |
214 | 2.94k | swap(symbols[1], symbols[2]); |
215 | 12.3k | code.m_symbol_values = move(symbols); |
216 | 12.3k | } else if (number_of_symbols == 4) { |
217 | 5.11k | bool tree_select = TRY(stream.read_bit()); |
218 | 5.11k | if (tree_select) { |
219 | 1.62k | code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 }); |
220 | 1.62k | if (symbols[2] > symbols[3]) |
221 | 589 | swap(symbols[2], symbols[3]); |
222 | 1.62k | code.m_symbol_values = move(symbols); |
223 | 3.48k | } else { |
224 | 3.48k | code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 }); |
225 | 3.48k | quick_sort(symbols); |
226 | 3.48k | code.m_symbol_values = move(symbols); |
227 | 3.48k | } |
228 | 5.11k | } |
229 | | |
230 | 73.9k | return code; |
231 | 73.9k | } |
232 | | |
233 | | ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_complex_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size, size_t hskip) |
234 | 36.3k | { |
235 | | // hskip should only be 0, 2 or 3 |
236 | 36.3k | VERIFY(hskip != 1); |
237 | 36.3k | VERIFY(hskip <= 3); |
238 | | |
239 | | // Read the prefix code_value that is used to encode the actual prefix code_value |
240 | 36.3k | size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; |
241 | 36.3k | size_t code_length[18] { 0 }; |
242 | 36.3k | size_t code_length_counts[6] { 0 }; |
243 | | |
244 | 36.3k | size_t sum = 0; |
245 | 36.3k | size_t number_of_non_zero_symbols = 0; |
246 | 375k | for (size_t i = hskip; i < 18; i++) { |
247 | 365k | size_t len = TRY(read_complex_prefix_code_length(stream)); |
248 | 364k | code_length[symbol_mapping[i]] = len; |
249 | | |
250 | 364k | if (len != 0) { |
251 | 156k | code_length_counts[len]++; |
252 | 156k | sum += (32 >> len); |
253 | 156k | number_of_non_zero_symbols++; |
254 | 156k | } |
255 | | |
256 | 364k | if (sum == 32) |
257 | 24.9k | break; |
258 | 339k | else if (sum > 32) |
259 | 102 | return Error::from_string_literal("invalid prefix code"); |
260 | 364k | } |
261 | | |
262 | 35.5k | CanonicalCode temp_code; |
263 | 35.5k | if (number_of_non_zero_symbols > 1) { |
264 | 25.8k | size_t code_value = 0; |
265 | 155k | for (size_t bits = 1; bits <= 5; bits++) { |
266 | 129k | code_value = (code_value + code_length_counts[bits - 1]) << 1; |
267 | 129k | size_t current_code_value = code_value; |
268 | | |
269 | 2.45M | for (size_t i = 0; i < 18; i++) { |
270 | 2.32M | size_t len = code_length[i]; |
271 | 2.32M | if (len == bits) { |
272 | 144k | temp_code.m_symbol_codes.append((1 << bits) | current_code_value); |
273 | 144k | temp_code.m_symbol_values.append(i); |
274 | 144k | current_code_value++; |
275 | 144k | } |
276 | 2.32M | } |
277 | 129k | } |
278 | 25.8k | } else { |
279 | 104k | for (size_t i = 0; i < 18; i++) { |
280 | 104k | size_t len = code_length[i]; |
281 | 104k | if (len != 0) { |
282 | 9.67k | temp_code.m_symbol_codes.append(1); |
283 | 9.67k | temp_code.m_symbol_values.append(i); |
284 | 9.67k | break; |
285 | 9.67k | } |
286 | 104k | } |
287 | 9.70k | } |
288 | | |
289 | | // Read the actual prefix code_value |
290 | 35.5k | sum = 0; |
291 | 35.5k | size_t i = 0; |
292 | | |
293 | 35.5k | size_t previous_non_zero_code_length = 8; |
294 | 35.5k | size_t last_symbol = 0; |
295 | 35.5k | size_t last_repeat = 0; |
296 | | |
297 | 35.5k | Vector<size_t> result_symbols; |
298 | 35.5k | Vector<size_t> result_lengths; |
299 | 35.5k | size_t result_lengths_count[16] { 0 }; |
300 | 2.46M | while (i < alphabet_size) { |
301 | 2.45M | auto symbol = TRY(temp_code.read_symbol(stream)); |
302 | | |
303 | 2.45M | if (symbol < 16) { |
304 | 2.25M | result_symbols.append(i); |
305 | 2.25M | result_lengths.append(symbol); |
306 | 2.25M | result_lengths_count[symbol]++; |
307 | | |
308 | 2.25M | if (symbol != 0) { |
309 | 1.58M | previous_non_zero_code_length = symbol; |
310 | 1.58M | sum += (32768 >> symbol); |
311 | 1.58M | if (sum == 32768) |
312 | 24.6k | break; |
313 | 1.56M | else if (sum > 32768) |
314 | 46 | return Error::from_string_literal("invalid prefix code"); |
315 | 1.58M | } |
316 | | |
317 | 2.23M | last_repeat = 0; |
318 | 2.23M | i++; |
319 | 2.23M | } else if (symbol == 16) { |
320 | 106k | size_t repeat_count = 0; |
321 | 106k | if (last_symbol == 16 && last_repeat != 0) { |
322 | 30.6k | repeat_count = (4 * (last_repeat - 2)); |
323 | 76.2k | } else { |
324 | 76.2k | last_repeat = 0; |
325 | 76.2k | } |
326 | 106k | repeat_count += 3 + TRY(stream.read_bits(2)); |
327 | | |
328 | 815k | for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) { |
329 | 711k | result_symbols.append(i); |
330 | 711k | result_lengths.append(previous_non_zero_code_length); |
331 | 711k | result_lengths_count[previous_non_zero_code_length]++; |
332 | | |
333 | 711k | if (previous_non_zero_code_length != 0) { |
334 | 711k | sum += (32768 >> previous_non_zero_code_length); |
335 | 711k | if (sum == 32768) |
336 | 2.55k | break; |
337 | 709k | else if (sum > 32768) |
338 | 12 | return Error::from_string_literal("invalid prefix code"); |
339 | 711k | } |
340 | | |
341 | 709k | i++; |
342 | 709k | if (i >= alphabet_size) |
343 | 607 | break; |
344 | 709k | } |
345 | 106k | if (sum == 32768) |
346 | 2.55k | break; |
347 | 104k | VERIFY(sum < 32768); |
348 | | |
349 | 104k | last_repeat = repeat_count; |
350 | 104k | } else if (symbol == 17) { |
351 | 91.4k | size_t repeat_count = 0; |
352 | 91.4k | if (last_symbol == 17 && last_repeat != 0) { |
353 | 28.8k | repeat_count = (8 * (last_repeat - 2)); |
354 | 62.5k | } else { |
355 | 62.5k | last_repeat = 0; |
356 | 62.5k | } |
357 | 91.4k | repeat_count += 3 + TRY(stream.read_bits(3)); |
358 | | |
359 | 91.4k | i += (repeat_count - last_repeat); |
360 | 91.4k | last_repeat = repeat_count; |
361 | 91.4k | } |
362 | | |
363 | 2.42M | last_symbol = symbol; |
364 | 2.42M | } |
365 | 35.1k | result_lengths_count[0] = 0; |
366 | | |
367 | 35.1k | CanonicalCode final_code; |
368 | | |
369 | 35.1k | size_t code_value = 0; |
370 | 561k | for (size_t bits = 1; bits < 16; bits++) { |
371 | 526k | code_value = (code_value + result_lengths_count[bits - 1]) << 1; |
372 | 526k | size_t current_code_value = code_value; |
373 | | |
374 | 44.8M | for (size_t n = 0; n < result_symbols.size(); n++) { |
375 | 44.2M | size_t len = result_lengths[n]; |
376 | 44.2M | if (len == bits) { |
377 | 2.28M | final_code.m_symbol_codes.append((1 << bits) | current_code_value); |
378 | 2.28M | final_code.m_symbol_values.append(result_symbols[n]); |
379 | 2.28M | current_code_value++; |
380 | 2.28M | } |
381 | 44.2M | } |
382 | 526k | } |
383 | | |
384 | 35.1k | return final_code; |
385 | 35.5k | } |
386 | | |
387 | | static void inverse_move_to_front_transform(Span<u8> v) |
388 | 2.92k | { |
389 | | // RFC 7932 section 7.3 |
390 | 2.92k | u8 mtf[256]; |
391 | 752k | for (size_t i = 0; i < 256; ++i) { |
392 | 749k | mtf[i] = (u8)i; |
393 | 749k | } |
394 | 3.52M | for (size_t i = 0; i < v.size(); ++i) { |
395 | 3.52M | u8 index = v[i]; |
396 | 3.52M | u8 value = mtf[index]; |
397 | 3.52M | v[i] = value; |
398 | 9.57M | for (; index; --index) { |
399 | 6.05M | mtf[index] = mtf[index - 1]; |
400 | 6.05M | } |
401 | 3.52M | mtf[0] = value; |
402 | 3.52M | } |
403 | 2.92k | } |
404 | | |
405 | | ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size) |
406 | 3.97k | { |
407 | 3.97k | bool use_run_length_encoding = TRY(m_input_stream.read_bit()); |
408 | 3.95k | size_t run_length_encoding_max = 0; |
409 | 3.95k | if (use_run_length_encoding) { |
410 | 2.20k | run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4)); |
411 | 2.18k | } |
412 | | |
413 | 3.95k | auto const code = TRY(CanonicalCode::read_prefix_code(m_input_stream, number_of_codes + run_length_encoding_max)); |
414 | | |
415 | 3.82k | size_t i = 0; |
416 | 396k | while (i < context_map_size) { |
417 | 392k | size_t symbol = TRY(code.read_symbol(m_input_stream)); |
418 | | |
419 | 392k | if (symbol <= run_length_encoding_max) { |
420 | 107k | size_t repeat_base = 1 << symbol; |
421 | 107k | size_t repeat_additional = TRY(m_input_stream.read_bits(symbol)); |
422 | 107k | size_t repeat_count = repeat_base + repeat_additional; |
423 | 3.83M | while (repeat_count--) { |
424 | 3.72M | context_map.append(0); |
425 | 3.72M | i++; |
426 | 3.72M | } |
427 | 285k | } else { |
428 | 285k | size_t value = symbol - run_length_encoding_max; |
429 | 285k | context_map.append(value); |
430 | 285k | i++; |
431 | 285k | } |
432 | 392k | } |
433 | | |
434 | 3.82k | bool inverse_move_to_front = TRY(m_input_stream.read_bit()); |
435 | 3.69k | if (inverse_move_to_front) |
436 | 2.92k | inverse_move_to_front_transform(context_map.span()); |
437 | | |
438 | 3.69k | return {}; |
439 | 3.71k | } |
440 | | |
441 | | ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block) |
442 | 51.2k | { |
443 | 51.2k | size_t blocks_of_type = TRY(read_variable_length()); |
444 | | |
445 | 51.2k | block.type = 0; |
446 | 51.2k | block.type_previous = 1; |
447 | 51.2k | block.number_of_types = blocks_of_type; |
448 | | |
449 | 51.2k | if (blocks_of_type == 1) { |
450 | 41.8k | block.length = 16 * MiB; |
451 | 41.8k | block.type_code = {}; |
452 | 41.8k | block.length_code = {}; |
453 | 41.8k | } else { |
454 | 9.31k | block.type_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 2 + blocks_of_type)); |
455 | 9.08k | block.length_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 26)); |
456 | 8.84k | TRY(block_update_length(block)); |
457 | 8.76k | } |
458 | | |
459 | 51.2k | return {}; |
460 | 51.2k | } |
461 | | |
462 | | ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block) |
463 | 303k | { |
464 | 303k | size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 }; |
465 | 303k | size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 }; |
466 | | |
467 | 303k | size_t symbol = TRY(block.length_code.read_symbol(m_input_stream)); |
468 | 303k | size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol])); |
469 | | |
470 | 303k | block.length = block_length; |
471 | 303k | return {}; |
472 | 303k | } |
473 | | |
474 | | ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block) |
475 | 295k | { |
476 | 295k | size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream)); |
477 | 295k | TRY(block_update_length(block)); |
478 | | |
479 | 294k | if (block_type_symbol == 0) { |
480 | 275k | swap(block.type, block.type_previous); |
481 | 275k | } else if (block_type_symbol == 1) { |
482 | 11.1k | block.type_previous = block.type; |
483 | 11.1k | block.type = (block.type + 1) % block.number_of_types; |
484 | 11.1k | } else { |
485 | 8.48k | block.type_previous = block.type; |
486 | 8.48k | block.type = block_type_symbol - 2; |
487 | 8.48k | } |
488 | | |
489 | 294k | return {}; |
490 | 295k | } |
491 | | |
492 | | size_t BrotliDecompressionStream::literal_code_index_from_context() |
493 | 395M | { |
494 | 395M | size_t const context_id_lut0[256] { |
495 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, |
496 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
497 | 395M | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, |
498 | 395M | 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, |
499 | 395M | 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, |
500 | 395M | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, |
501 | 395M | 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, |
502 | 395M | 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, |
503 | 395M | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
504 | 395M | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
505 | 395M | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
506 | 395M | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
507 | 395M | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
508 | 395M | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
509 | 395M | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, |
510 | 395M | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3 |
511 | 395M | }; |
512 | 395M | size_t const context_id_lut1[256] { |
513 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
514 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
515 | 395M | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
516 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, |
517 | 395M | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
518 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, |
519 | 395M | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
520 | 395M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, |
521 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
522 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
523 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
524 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
525 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
526 | 395M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
527 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
528 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 |
529 | 395M | }; |
530 | 395M | size_t const context_id_lut2[256] { |
531 | 395M | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
532 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
533 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
534 | 395M | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
535 | 395M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
536 | 395M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
537 | 395M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
538 | 395M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
539 | 395M | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
540 | 395M | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
541 | 395M | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
542 | 395M | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
543 | 395M | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
544 | 395M | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
545 | 395M | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
546 | 395M | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7 |
547 | 395M | }; |
548 | | |
549 | 395M | size_t context_mode = m_literal_context_modes[m_literal_block.type]; |
550 | 395M | size_t context_id; |
551 | 395M | switch (context_mode) { |
552 | 89.7M | case 0: |
553 | 89.7M | context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f; |
554 | 89.7M | break; |
555 | 90.0M | case 1: |
556 | 90.0M | context_id = m_lookback_buffer.value().lookback(1, 0) >> 2; |
557 | 90.0M | break; |
558 | 132M | case 2: |
559 | 132M | context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)]; |
560 | 132M | break; |
561 | 84.0M | case 3: |
562 | 84.0M | context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)]; |
563 | 84.0M | break; |
564 | 0 | default: |
565 | 0 | VERIFY_NOT_REACHED(); |
566 | 395M | } |
567 | | |
568 | 395M | size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id]; |
569 | 395M | return literal_code_index; |
570 | 395M | } |
571 | | |
572 | | ErrorOr<Bytes> BrotliDecompressionStream::read_some(Bytes output_buffer) |
573 | 335k | { |
574 | 335k | size_t bytes_read = 0; |
575 | 1.60G | while (bytes_read < output_buffer.size()) { |
576 | 1.60G | if (m_current_state == State::WindowSize) { |
577 | 5.59k | size_t window_bits = TRY(read_window_length()); |
578 | 5.58k | m_window_size = (1 << window_bits) - 16; |
579 | | |
580 | 5.58k | m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size)); |
581 | | |
582 | 5.58k | m_current_state = State::Idle; |
583 | 1.60G | } else if (m_current_state == State::Idle) { |
584 | | // If the final block was read, we are done decompressing |
585 | 77.1k | if (m_read_final_block) |
586 | 1.01k | break; |
587 | | |
588 | | // RFC 7932 section 9.1 |
589 | | // |
590 | | // 1 bit: ISLAST, set to 1 if this is the last meta-block |
591 | 76.1k | m_read_final_block = TRY(m_input_stream.read_bit()); |
592 | 76.0k | if (m_read_final_block) { |
593 | | // 1 bit: ISLASTEMPTY, if set to 1, the meta-block is empty; this |
594 | | // field is only present if ISLAST bit is set -- if it is 1, |
595 | | // then the meta-block and the brotli stream ends at that |
596 | | // bit, with any remaining bits in the last byte of the |
597 | | // compressed stream filled with zeros (if the fill bits are |
598 | | // not zero, then the stream should be rejected as invalid) |
599 | 2.26k | bool is_last_block_empty = TRY(m_input_stream.read_bit()); |
600 | | // If the last block is empty we are done decompressing |
601 | 2.23k | if (is_last_block_empty) |
602 | 426 | break; |
603 | 2.23k | } |
604 | | |
605 | | // 2 bits: MNIBBLES, number of nibbles to represent the uncompressed |
606 | | // length |
607 | 75.5k | size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles()); |
608 | | |
609 | | // If MNIBBLES is 0, the meta-block is empty, i.e., it does |
610 | | // not generate any uncompressed data. In this case, the |
611 | | // rest of the meta-block has the following format: |
612 | 75.5k | if (size_number_of_nibbles == 0) { |
613 | | |
614 | | // 1 bit: reserved, must be zero |
615 | 52.5k | bool reserved = TRY(m_input_stream.read_bit()); |
616 | 52.5k | if (reserved) |
617 | 7 | return Error::from_string_literal("invalid reserved bit"); |
618 | | |
619 | | // 2 bits: MSKIPBYTES, number of bytes to represent |
620 | | // metadata length |
621 | | // |
622 | | // MSKIPBYTES * 8 bits: MSKIPLEN - 1, where MSKIPLEN is |
623 | | // the number of metadata bytes; this field is |
624 | | // only present if MSKIPBYTES is positive; |
625 | | // otherwise, MSKIPLEN is 0 (if MSKIPBYTES is |
626 | | // greater than 1, and the last byte is all |
627 | | // zeros, then the stream should be rejected as |
628 | | // invalid) |
629 | 52.5k | size_t skip_bytes = TRY(m_input_stream.read_bits(2)); |
630 | 52.5k | if (skip_bytes == 0) { |
631 | | // 0..7 bits: fill bits until the next byte boundary, |
632 | | // must be all zeros |
633 | 43.0k | u8 remainder = m_input_stream.align_to_byte_boundary(); |
634 | 43.0k | if (remainder != 0) |
635 | 20 | return Error::from_string_literal("remainder bits are non-zero"); |
636 | 43.0k | continue; |
637 | 43.0k | } |
638 | | |
639 | | // MSKIPLEN bytes of metadata, not part of the |
640 | | // uncompressed data or the sliding window |
641 | 9.53k | size_t skip_length = 1 + TRY(m_input_stream.read_bits(8 * skip_bytes)); |
642 | | |
643 | 9.51k | u8 remainder = m_input_stream.align_to_byte_boundary(); |
644 | 9.51k | if (remainder != 0) |
645 | 14 | return Error::from_string_literal("remainder bits are non-zero"); |
646 | | |
647 | | // Discard meta-data bytes |
648 | 9.50k | u8 temp_buffer[4096]; |
649 | 9.50k | Bytes temp_bytes { temp_buffer, 4096 }; |
650 | 19.4k | while (skip_length > 0) { |
651 | 10.1k | Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length)); |
652 | 10.1k | auto metadata_bytes = TRY(m_input_stream.read_some(temp_bytes_slice)); |
653 | 10.1k | if (metadata_bytes.is_empty()) |
654 | 169 | return Error::from_string_literal("eof"); |
655 | 10.0k | if (metadata_bytes.last() == 0) |
656 | 15 | return Error::from_string_literal("invalid stream"); |
657 | 9.99k | skip_length -= metadata_bytes.size(); |
658 | 9.99k | } |
659 | | |
660 | 9.31k | continue; |
661 | 9.50k | } |
662 | | |
663 | 22.9k | size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles)); |
664 | | |
665 | | // 1 bit: ISUNCOMPRESSED, if set to 1, any bits of compressed data |
666 | | // up to the next byte boundary are ignored, and the rest of |
667 | | // the meta-block contains MLEN bytes of literal data; this |
668 | | // field is only present if the ISLAST bit is not set (if the |
669 | | // ignored bits are not all zeros, the stream should be |
670 | | // rejected as invalid) |
671 | 22.8k | bool is_uncompressed = false; |
672 | 22.8k | if (!m_read_final_block) |
673 | 21.1k | is_uncompressed = TRY(m_input_stream.read_bit()); |
674 | | |
675 | 22.8k | m_bytes_left = uncompressed_size; |
676 | 22.8k | if (is_uncompressed) { |
677 | 5.47k | u8 remainder = m_input_stream.align_to_byte_boundary(); |
678 | 5.47k | if (remainder != 0) |
679 | 26 | return Error::from_string_literal("remainder is non-zero"); |
680 | 5.44k | m_current_state = State::UncompressedData; |
681 | 17.3k | } else { |
682 | 17.3k | TRY(read_block_configuration(m_literal_block)); |
683 | 17.0k | TRY(read_block_configuration(m_insert_and_copy_block)); |
684 | 16.8k | TRY(read_block_configuration(m_distance_block)); |
685 | | |
686 | 16.7k | m_postfix_bits = TRY(m_input_stream.read_bits(2)); |
687 | 16.7k | m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits; |
688 | | |
689 | 16.7k | m_literal_context_modes.clear(); |
690 | 101k | for (size_t i = 0; i < m_literal_block.number_of_types; i++) { |
691 | 84.6k | size_t context_mode = TRY(m_input_stream.read_bits(2)); |
692 | 84.5k | m_literal_context_modes.append(context_mode); |
693 | 84.5k | } |
694 | | |
695 | 16.7k | m_context_mapping_literal.clear(); |
696 | 16.6k | size_t number_of_literal_codes = TRY(read_variable_length()); |
697 | 16.6k | if (number_of_literal_codes == 1) { |
698 | 4.59M | for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++) |
699 | 4.57M | m_context_mapping_literal.append(0); |
700 | 13.7k | } else { |
701 | 2.85k | TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types)); |
702 | 2.70k | } |
703 | | |
704 | 16.6k | m_context_mapping_distance.clear(); |
705 | 16.5k | size_t number_of_distance_codes = TRY(read_variable_length()); |
706 | 16.4k | if (number_of_distance_codes == 1) { |
707 | 107k | for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++) |
708 | 92.6k | m_context_mapping_distance.append(0); |
709 | 15.3k | } else { |
710 | 1.12k | TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types)); |
711 | 989 | } |
712 | | |
713 | 16.4k | m_literal_codes.clear(); |
714 | 56.4k | for (size_t i = 0; i < number_of_literal_codes; i++) { |
715 | 40.9k | m_literal_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 256))); |
716 | 40.1k | } |
717 | | |
718 | 16.2k | m_insert_and_copy_codes.clear(); |
719 | 36.2k | for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) { |
720 | 20.8k | m_insert_and_copy_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 704))); |
721 | 20.6k | } |
722 | | |
723 | 15.5k | m_distance_codes.clear(); |
724 | 41.8k | for (size_t i = 0; i < number_of_distance_codes; i++) { |
725 | 26.7k | m_distance_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 16 + m_direct_distances + (48 << m_postfix_bits)))); |
726 | 26.4k | } |
727 | | |
728 | 15.3k | m_current_state = State::CompressedCommand; |
729 | 15.1k | } |
730 | 1.60G | } else if (m_current_state == State::UncompressedData) { |
731 | 6.54k | size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left); |
732 | 6.54k | VERIFY(number_of_fitting_bytes > 0); |
733 | | |
734 | 6.54k | auto uncompressed_bytes = TRY(m_input_stream.read_some(output_buffer.slice(bytes_read, number_of_fitting_bytes))); |
735 | 6.54k | if (uncompressed_bytes.is_empty()) |
736 | 268 | return Error::from_string_literal("eof"); |
737 | | |
738 | | // TODO: Replace the home-grown LookbackBuffer with AK::CircularBuffer. |
739 | 6.28k | for (auto c : uncompressed_bytes) |
740 | 3.71M | m_lookback_buffer.value().write(c); |
741 | | |
742 | 6.28k | m_bytes_left -= uncompressed_bytes.size(); |
743 | 6.28k | bytes_read += uncompressed_bytes.size(); |
744 | | |
745 | | // If all bytes were read, return to the idle state |
746 | 6.28k | if (m_bytes_left == 0) |
747 | 5.18k | m_current_state = State::Idle; |
748 | 1.60G | } else if (m_current_state == State::CompressedCommand) { |
749 | 124M | if (m_insert_and_copy_block.length == 0) { |
750 | 259k | TRY(block_read_new_state(m_insert_and_copy_block)); |
751 | 259k | } |
752 | 124M | m_insert_and_copy_block.length--; |
753 | | |
754 | 124M | size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream)); |
755 | | |
756 | 124M | size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 }; |
757 | 124M | size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 }; |
758 | 124M | bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false }; |
759 | | |
760 | 124M | size_t insert_and_copy_index = insert_and_copy_symbol >> 6; |
761 | 124M | size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111; |
762 | 124M | size_t copy_length_code_offset = insert_and_copy_symbol & 0b111; |
763 | | |
764 | 124M | size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset; |
765 | 124M | size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset; |
766 | | |
767 | 124M | m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index]; |
768 | | |
769 | 124M | size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 }; |
770 | 124M | size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 }; |
771 | 124M | size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 }; |
772 | 124M | size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 }; |
773 | | |
774 | 124M | m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code])); |
775 | 124M | m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code])); |
776 | | |
777 | 124M | if (m_insert_length > 0) { |
778 | 58.9M | m_current_state = State::CompressedLiteral; |
779 | 65.7M | } else { |
780 | 65.7M | m_current_state = State::CompressedDistance; |
781 | 65.7M | } |
782 | 1.47G | } else if (m_current_state == State::CompressedLiteral) { |
783 | 395M | if (m_literal_block.length == 0) { |
784 | 24.3k | TRY(block_read_new_state(m_literal_block)); |
785 | 24.2k | } |
786 | 395M | m_literal_block.length--; |
787 | | |
788 | 395M | size_t literal_code_index = literal_code_index_from_context(); |
789 | 395M | size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream)); |
790 | | |
791 | 395M | output_buffer[bytes_read] = literal_value; |
792 | 395M | m_lookback_buffer.value().write(literal_value); |
793 | 395M | bytes_read++; |
794 | 395M | m_insert_length--; |
795 | 395M | m_bytes_left--; |
796 | | |
797 | 395M | if (m_bytes_left == 0) |
798 | 3.92k | m_current_state = State::Idle; |
799 | 395M | else if (m_insert_length == 0) |
800 | 58.9M | m_current_state = State::CompressedDistance; |
801 | 1.08G | } else if (m_current_state == State::CompressedDistance) { |
802 | 124M | size_t distance_symbol; |
803 | 124M | if (m_implicit_zero_distance) { |
804 | 67.8M | distance_symbol = 0; |
805 | 67.8M | } else { |
806 | 56.8M | if (m_distance_block.length == 0) { |
807 | 11.3k | TRY(block_read_new_state(m_distance_block)); |
808 | 11.2k | } |
809 | 56.8M | m_distance_block.length--; |
810 | | |
811 | 56.8M | size_t context_id = clamp(m_copy_length - 2, 0, 3); |
812 | 56.8M | size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id]; |
813 | | |
814 | 56.8M | distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream)); |
815 | 56.8M | } |
816 | | |
817 | 124M | size_t distance; |
818 | 124M | bool reuse_previous_distance = false; |
819 | 124M | if (distance_symbol < 16) { |
820 | 106M | switch (distance_symbol) { |
821 | 73.7M | case 0: |
822 | 73.7M | distance = m_distances[0]; |
823 | 73.7M | reuse_previous_distance = true; |
824 | 73.7M | break; |
825 | 9.35M | case 1: |
826 | 9.35M | distance = m_distances[1]; |
827 | 9.35M | break; |
828 | 611k | case 2: |
829 | 611k | distance = m_distances[2]; |
830 | 611k | break; |
831 | 1.94M | case 3: |
832 | 1.94M | distance = m_distances[3]; |
833 | 1.94M | break; |
834 | 6.47k | case 4: |
835 | 6.47k | distance = m_distances[0] - 1; |
836 | 6.47k | break; |
837 | 11.5M | case 5: |
838 | 11.5M | distance = m_distances[0] + 1; |
839 | 11.5M | break; |
840 | 22.8k | case 6: |
841 | 22.8k | distance = m_distances[0] - 2; |
842 | 22.8k | break; |
843 | 788k | case 7: |
844 | 788k | distance = m_distances[0] + 2; |
845 | 788k | break; |
846 | 10.4k | case 8: |
847 | 10.4k | distance = m_distances[0] - 3; |
848 | 10.4k | break; |
849 | 300k | case 9: |
850 | 300k | distance = m_distances[0] + 3; |
851 | 300k | break; |
852 | 23.3k | case 10: |
853 | 23.3k | distance = m_distances[1] - 1; |
854 | 23.3k | break; |
855 | 1.03M | case 11: |
856 | 1.03M | distance = m_distances[1] + 1; |
857 | 1.03M | break; |
858 | 3.03k | case 12: |
859 | 3.03k | distance = m_distances[1] - 2; |
860 | 3.03k | break; |
861 | 6.57M | case 13: |
862 | 6.57M | distance = m_distances[1] + 2; |
863 | 6.57M | break; |
864 | 6.62k | case 14: |
865 | 6.62k | distance = m_distances[1] - 3; |
866 | 6.62k | break; |
867 | 588k | case 15: |
868 | 588k | distance = m_distances[1] + 3; |
869 | 588k | break; |
870 | 106M | } |
871 | 106M | } else if (distance_symbol < 16 + m_direct_distances) { |
872 | 16.2M | distance = distance_symbol - 15; |
873 | 16.2M | } else { |
874 | 1.92M | size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1; |
875 | | |
876 | 1.92M | size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1)); |
877 | 1.92M | size_t dextra = TRY(m_input_stream.read_bits(ndistbits)); |
878 | | |
879 | 1.92M | size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits; |
880 | 1.92M | size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK; |
881 | 1.92M | size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4; |
882 | 1.92M | distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1; |
883 | 1.92M | } |
884 | 124M | m_distance = distance; |
885 | | |
886 | 124M | size_t total_written = m_lookback_buffer.value().total_written(); |
887 | 124M | size_t max_lookback = min(total_written, m_window_size); |
888 | | |
889 | 124M | if (distance > max_lookback) { |
890 | 17.8M | size_t word_index = distance - (max_lookback + 1); |
891 | 17.8M | m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length)); |
892 | 17.8M | m_copy_length = m_dictionary_data.size(); |
893 | | |
894 | 17.8M | if (m_copy_length == 0) |
895 | 1.91k | m_current_state = State::CompressedCommand; |
896 | 17.8M | else |
897 | 17.8M | m_current_state = State::CompressedDictionary; |
898 | 106M | } else { |
899 | 106M | if (!reuse_previous_distance) { |
900 | 33.0M | m_distances[3] = m_distances[2]; |
901 | 33.0M | m_distances[2] = m_distances[1]; |
902 | 33.0M | m_distances[1] = m_distances[0]; |
903 | 33.0M | m_distances[0] = distance; |
904 | 33.0M | } |
905 | | |
906 | 106M | m_current_state = State::CompressedCopy; |
907 | 106M | } |
908 | 955M | } else if (m_current_state == State::CompressedCopy) { |
909 | 847M | u8 copy_value = m_lookback_buffer.value().lookback(m_distance); |
910 | | |
911 | 847M | output_buffer[bytes_read] = copy_value; |
912 | 847M | m_lookback_buffer.value().write(copy_value); |
913 | 847M | bytes_read++; |
914 | 847M | m_copy_length--; |
915 | 847M | m_bytes_left--; |
916 | | |
917 | 847M | if (m_bytes_left == 0) |
918 | 8.92k | m_current_state = State::Idle; |
919 | 847M | else if (m_copy_length == 0) |
920 | 106M | m_current_state = State::CompressedCommand; |
921 | 847M | } else if (m_current_state == State::CompressedDictionary) { |
922 | 107M | size_t offset = m_dictionary_data.size() - m_copy_length; |
923 | 107M | u8 dictionary_value = m_dictionary_data[offset]; |
924 | | |
925 | 107M | output_buffer[bytes_read] = dictionary_value; |
926 | 107M | m_lookback_buffer.value().write(dictionary_value); |
927 | 107M | bytes_read++; |
928 | 107M | m_copy_length--; |
929 | 107M | m_bytes_left--; |
930 | | |
931 | 107M | if (m_bytes_left == 0) |
932 | 1.19k | m_current_state = State::Idle; |
933 | 107M | else if (m_copy_length == 0) |
934 | 17.8M | m_current_state = State::CompressedCommand; |
935 | 107M | } |
936 | 1.60G | } |
937 | | |
938 | 331k | return output_buffer.slice(0, bytes_read); |
939 | 335k | } |
940 | | |
941 | | bool BrotliDecompressionStream::is_eof() const |
942 | 337k | { |
943 | 337k | return m_read_final_block && m_current_state == State::Idle; |
944 | 337k | } |
945 | | |
946 | | } |