Coverage Report

Created: 2025-11-16 07:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibCompress/Brotli.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/BinarySearch.h>
8
#include <AK/QuickSort.h>
9
#include <LibCompress/Brotli.h>
10
#include <LibCompress/BrotliDictionary.h>
11
12
namespace Compress {
13
14
ErrorOr<size_t> Brotli::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream) const
15
580M
{
16
580M
    size_t code_bits = 1;
17
18
659M
    while (code_bits < (1 << 16)) {
19
        // FIXME: This is very inefficient and could greatly be improved by implementing this
20
        //        algorithm: https://www.hanshq.net/zip.html#huffdec
21
659M
        size_t index;
22
659M
        if (binary_search(m_symbol_codes.span(), code_bits, &index))
23
580M
            return m_symbol_values[index];
24
25
78.2M
        code_bits = (code_bits << 1) | TRY(input_stream.read_bit());
26
78.2M
    }
27
28
114
    return Error::from_string_literal("no matching code found");
29
580M
}
30
31
BrotliDecompressionStream::BrotliDecompressionStream(MaybeOwned<Stream> stream)
32
5.59k
    : m_input_stream(move(stream))
33
5.59k
{
34
5.59k
}
35
36
ErrorOr<size_t> BrotliDecompressionStream::read_window_length()
37
5.59k
{
38
5.59k
    if (TRY(m_input_stream.read_bit())) {
39
2.15k
        switch (TRY(m_input_stream.read_bits(3))) {
40
441
        case 0: {
41
441
            switch (TRY(m_input_stream.read_bits(3))) {
42
99
            case 0:
43
99
                return 17;
44
2
            case 1:
45
2
                return Error::from_string_literal("invalid window length");
46
147
            case 2:
47
147
                return 10;
48
62
            case 3:
49
62
                return 11;
50
31
            case 4:
51
31
                return 12;
52
45
            case 5:
53
45
                return 13;
54
24
            case 6:
55
24
                return 14;
56
31
            case 7:
57
31
                return 15;
58
0
            default:
59
0
                VERIFY_NOT_REACHED();
60
441
            }
61
441
        }
62
133
        case 1:
63
133
            return 18;
64
146
        case 2:
65
146
            return 19;
66
94
        case 3:
67
94
            return 20;
68
114
        case 4:
69
114
            return 21;
70
673
        case 5:
71
673
            return 22;
72
299
        case 6:
73
299
            return 23;
74
258
        case 7:
75
258
            return 24;
76
0
        default:
77
0
            VERIFY_NOT_REACHED();
78
2.15k
        }
79
3.43k
    } else {
80
3.43k
        return 16;
81
3.43k
    }
82
5.59k
}
83
84
ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles()
85
75.5k
{
86
75.5k
    switch (TRY(m_input_stream.read_bits(2))) {
87
19.8k
    case 0:
88
19.8k
        return 4;
89
1.55k
    case 1:
90
1.55k
        return 5;
91
1.48k
    case 2:
92
1.48k
        return 6;
93
52.5k
    case 3:
94
52.5k
        return 0;
95
0
    default:
96
0
        VERIFY_NOT_REACHED();
97
75.5k
    }
98
75.5k
}
99
100
ErrorOr<size_t> BrotliDecompressionStream::read_variable_length()
101
84.4k
{
102
    //  Value    Bit Pattern
103
    //   -----    -----------
104
    //     1                0
105
    //     2             0001
106
    //   3..4           x0011
107
    //   5..8          xx0101
108
    //   9..16        xxx0111
109
    //  17..32       xxxx1001
110
    //  33..64      xxxxx1011
111
    //  65..128    xxxxxx1101
112
    // 129..256   xxxxxxx1111
113
114
84.4k
    if (TRY(m_input_stream.read_bit())) {
115
13.3k
        switch (TRY(m_input_stream.read_bits(3))) {
116
3.99k
        case 0:
117
3.99k
            return 2;
118
5.49k
        case 1:
119
5.49k
            return 3 + TRY(m_input_stream.read_bits(1));
120
1.75k
        case 2:
121
1.75k
            return 5 + TRY(m_input_stream.read_bits(2));
122
270
        case 3:
123
270
            return 9 + TRY(m_input_stream.read_bits(3));
124
364
        case 4:
125
364
            return 17 + TRY(m_input_stream.read_bits(4));
126
353
        case 5:
127
353
            return 33 + TRY(m_input_stream.read_bits(5));
128
262
        case 6:
129
262
            return 65 + TRY(m_input_stream.read_bits(6));
130
828
        case 7:
131
828
            return 129 + TRY(m_input_stream.read_bits(7));
132
0
        default:
133
0
            VERIFY_NOT_REACHED();
134
13.3k
        }
135
71.0k
    } else {
136
71.0k
        return 1;
137
71.0k
    }
138
84.4k
}
139
140
ErrorOr<size_t> Brotli::CanonicalCode::read_complex_prefix_code_length(LittleEndianInputBitStream& stream)
141
365k
{
142
    // Symbol   Code
143
    // ------   ----
144
    // 0          00
145
    // 1        0111
146
    // 2         011
147
    // 3          10
148
    // 4          01
149
    // 5        1111
150
151
365k
    switch (TRY(stream.read_bits(2))) {
152
208k
    case 0:
153
208k
        return 0;
154
36.2k
    case 1:
155
36.2k
        return 4;
156
42.8k
    case 2:
157
42.8k
        return 3;
158
77.1k
    case 3: {
159
154k
        if (TRY(stream.read_bit()) == 0) {
160
22.7k
            return 2;
161
54.4k
        } else {
162
108k
            if (TRY(stream.read_bit()) == 0) {
163
30.4k
                return 1;
164
30.4k
            } else {
165
23.9k
                return 5;
166
23.9k
            }
167
54.4k
        }
168
77.1k
    }
169
0
    default:
170
0
        VERIFY_NOT_REACHED();
171
364k
    }
172
364k
}
173
174
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size)
175
110k
{
176
110k
    size_t hskip = TRY(stream.read_bits(2));
177
178
110k
    if (hskip == 1)
179
110k
        return TRY(read_simple_prefix_code(stream, alphabet_size));
180
181
36.3k
    return TRY(read_complex_prefix_code(stream, alphabet_size, hskip));
182
36.3k
}
183
184
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_simple_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size)
185
74.2k
{
186
74.2k
    CanonicalCode code {};
187
188
74.2k
    size_t number_of_symbols = 1 + TRY(stream.read_bits(2));
189
190
74.1k
    size_t symbol_size = 0;
191
602k
    while ((1u << symbol_size) < alphabet_size)
192
528k
        symbol_size++;
193
194
74.1k
    Vector<size_t> symbols;
195
207k
    for (size_t i = 0; i < number_of_symbols; i++) {
196
133k
        size_t symbol = TRY(stream.read_bits(symbol_size));
197
132k
        symbols.append(symbol);
198
199
132k
        if (symbol >= alphabet_size)
200
32
            return Error::from_string_literal("symbol larger than alphabet");
201
132k
    }
202
203
73.9k
    if (number_of_symbols == 1) {
204
37.6k
        code.m_symbol_codes.append(0b1);
205
37.6k
        code.m_symbol_values = move(symbols);
206
37.6k
    } else if (number_of_symbols == 2) {
207
18.9k
        code.m_symbol_codes.extend({ 0b10, 0b11 });
208
18.9k
        if (symbols[0] > symbols[1])
209
6.20k
            swap(symbols[0], symbols[1]);
210
18.9k
        code.m_symbol_values = move(symbols);
211
18.9k
    } else if (number_of_symbols == 3) {
212
12.3k
        code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 });
213
12.3k
        if (symbols[1] > symbols[2])
214
2.94k
            swap(symbols[1], symbols[2]);
215
12.3k
        code.m_symbol_values = move(symbols);
216
12.3k
    } else if (number_of_symbols == 4) {
217
5.11k
        bool tree_select = TRY(stream.read_bit());
218
5.11k
        if (tree_select) {
219
1.62k
            code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 });
220
1.62k
            if (symbols[2] > symbols[3])
221
589
                swap(symbols[2], symbols[3]);
222
1.62k
            code.m_symbol_values = move(symbols);
223
3.48k
        } else {
224
3.48k
            code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 });
225
3.48k
            quick_sort(symbols);
226
3.48k
            code.m_symbol_values = move(symbols);
227
3.48k
        }
228
5.11k
    }
229
230
73.9k
    return code;
231
73.9k
}
232
233
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_complex_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size, size_t hskip)
234
36.3k
{
235
    // hskip should only be 0, 2 or 3
236
36.3k
    VERIFY(hskip != 1);
237
36.3k
    VERIFY(hskip <= 3);
238
239
    // Read the prefix code_value that is used to encode the actual prefix code_value
240
36.3k
    size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
241
36.3k
    size_t code_length[18] { 0 };
242
36.3k
    size_t code_length_counts[6] { 0 };
243
244
36.3k
    size_t sum = 0;
245
36.3k
    size_t number_of_non_zero_symbols = 0;
246
375k
    for (size_t i = hskip; i < 18; i++) {
247
365k
        size_t len = TRY(read_complex_prefix_code_length(stream));
248
364k
        code_length[symbol_mapping[i]] = len;
249
250
364k
        if (len != 0) {
251
156k
            code_length_counts[len]++;
252
156k
            sum += (32 >> len);
253
156k
            number_of_non_zero_symbols++;
254
156k
        }
255
256
364k
        if (sum == 32)
257
24.9k
            break;
258
339k
        else if (sum > 32)
259
102
            return Error::from_string_literal("invalid prefix code");
260
364k
    }
261
262
35.5k
    CanonicalCode temp_code;
263
35.5k
    if (number_of_non_zero_symbols > 1) {
264
25.8k
        size_t code_value = 0;
265
155k
        for (size_t bits = 1; bits <= 5; bits++) {
266
129k
            code_value = (code_value + code_length_counts[bits - 1]) << 1;
267
129k
            size_t current_code_value = code_value;
268
269
2.45M
            for (size_t i = 0; i < 18; i++) {
270
2.32M
                size_t len = code_length[i];
271
2.32M
                if (len == bits) {
272
144k
                    temp_code.m_symbol_codes.append((1 << bits) | current_code_value);
273
144k
                    temp_code.m_symbol_values.append(i);
274
144k
                    current_code_value++;
275
144k
                }
276
2.32M
            }
277
129k
        }
278
25.8k
    } else {
279
104k
        for (size_t i = 0; i < 18; i++) {
280
104k
            size_t len = code_length[i];
281
104k
            if (len != 0) {
282
9.67k
                temp_code.m_symbol_codes.append(1);
283
9.67k
                temp_code.m_symbol_values.append(i);
284
9.67k
                break;
285
9.67k
            }
286
104k
        }
287
9.70k
    }
288
289
    // Read the actual prefix code_value
290
35.5k
    sum = 0;
291
35.5k
    size_t i = 0;
292
293
35.5k
    size_t previous_non_zero_code_length = 8;
294
35.5k
    size_t last_symbol = 0;
295
35.5k
    size_t last_repeat = 0;
296
297
35.5k
    Vector<size_t> result_symbols;
298
35.5k
    Vector<size_t> result_lengths;
299
35.5k
    size_t result_lengths_count[16] { 0 };
300
2.46M
    while (i < alphabet_size) {
301
2.45M
        auto symbol = TRY(temp_code.read_symbol(stream));
302
303
2.45M
        if (symbol < 16) {
304
2.25M
            result_symbols.append(i);
305
2.25M
            result_lengths.append(symbol);
306
2.25M
            result_lengths_count[symbol]++;
307
308
2.25M
            if (symbol != 0) {
309
1.58M
                previous_non_zero_code_length = symbol;
310
1.58M
                sum += (32768 >> symbol);
311
1.58M
                if (sum == 32768)
312
24.6k
                    break;
313
1.56M
                else if (sum > 32768)
314
46
                    return Error::from_string_literal("invalid prefix code");
315
1.58M
            }
316
317
2.23M
            last_repeat = 0;
318
2.23M
            i++;
319
2.23M
        } else if (symbol == 16) {
320
106k
            size_t repeat_count = 0;
321
106k
            if (last_symbol == 16 && last_repeat != 0) {
322
30.6k
                repeat_count = (4 * (last_repeat - 2));
323
76.2k
            } else {
324
76.2k
                last_repeat = 0;
325
76.2k
            }
326
106k
            repeat_count += 3 + TRY(stream.read_bits(2));
327
328
815k
            for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) {
329
711k
                result_symbols.append(i);
330
711k
                result_lengths.append(previous_non_zero_code_length);
331
711k
                result_lengths_count[previous_non_zero_code_length]++;
332
333
711k
                if (previous_non_zero_code_length != 0) {
334
711k
                    sum += (32768 >> previous_non_zero_code_length);
335
711k
                    if (sum == 32768)
336
2.55k
                        break;
337
709k
                    else if (sum > 32768)
338
12
                        return Error::from_string_literal("invalid prefix code");
339
711k
                }
340
341
709k
                i++;
342
709k
                if (i >= alphabet_size)
343
607
                    break;
344
709k
            }
345
106k
            if (sum == 32768)
346
2.55k
                break;
347
104k
            VERIFY(sum < 32768);
348
349
104k
            last_repeat = repeat_count;
350
104k
        } else if (symbol == 17) {
351
91.4k
            size_t repeat_count = 0;
352
91.4k
            if (last_symbol == 17 && last_repeat != 0) {
353
28.8k
                repeat_count = (8 * (last_repeat - 2));
354
62.5k
            } else {
355
62.5k
                last_repeat = 0;
356
62.5k
            }
357
91.4k
            repeat_count += 3 + TRY(stream.read_bits(3));
358
359
91.4k
            i += (repeat_count - last_repeat);
360
91.4k
            last_repeat = repeat_count;
361
91.4k
        }
362
363
2.42M
        last_symbol = symbol;
364
2.42M
    }
365
35.1k
    result_lengths_count[0] = 0;
366
367
35.1k
    CanonicalCode final_code;
368
369
35.1k
    size_t code_value = 0;
370
561k
    for (size_t bits = 1; bits < 16; bits++) {
371
526k
        code_value = (code_value + result_lengths_count[bits - 1]) << 1;
372
526k
        size_t current_code_value = code_value;
373
374
44.8M
        for (size_t n = 0; n < result_symbols.size(); n++) {
375
44.2M
            size_t len = result_lengths[n];
376
44.2M
            if (len == bits) {
377
2.28M
                final_code.m_symbol_codes.append((1 << bits) | current_code_value);
378
2.28M
                final_code.m_symbol_values.append(result_symbols[n]);
379
2.28M
                current_code_value++;
380
2.28M
            }
381
44.2M
        }
382
526k
    }
383
384
35.1k
    return final_code;
385
35.5k
}
386
387
static void inverse_move_to_front_transform(Span<u8> v)
388
2.92k
{
389
    // RFC 7932 section 7.3
390
2.92k
    u8 mtf[256];
391
752k
    for (size_t i = 0; i < 256; ++i) {
392
749k
        mtf[i] = (u8)i;
393
749k
    }
394
3.52M
    for (size_t i = 0; i < v.size(); ++i) {
395
3.52M
        u8 index = v[i];
396
3.52M
        u8 value = mtf[index];
397
3.52M
        v[i] = value;
398
9.57M
        for (; index; --index) {
399
6.05M
            mtf[index] = mtf[index - 1];
400
6.05M
        }
401
3.52M
        mtf[0] = value;
402
3.52M
    }
403
2.92k
}
404
405
ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size)
406
3.97k
{
407
3.97k
    bool use_run_length_encoding = TRY(m_input_stream.read_bit());
408
3.95k
    size_t run_length_encoding_max = 0;
409
3.95k
    if (use_run_length_encoding) {
410
2.20k
        run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4));
411
2.18k
    }
412
413
3.95k
    auto const code = TRY(CanonicalCode::read_prefix_code(m_input_stream, number_of_codes + run_length_encoding_max));
414
415
3.82k
    size_t i = 0;
416
396k
    while (i < context_map_size) {
417
392k
        size_t symbol = TRY(code.read_symbol(m_input_stream));
418
419
392k
        if (symbol <= run_length_encoding_max) {
420
107k
            size_t repeat_base = 1 << symbol;
421
107k
            size_t repeat_additional = TRY(m_input_stream.read_bits(symbol));
422
107k
            size_t repeat_count = repeat_base + repeat_additional;
423
3.83M
            while (repeat_count--) {
424
3.72M
                context_map.append(0);
425
3.72M
                i++;
426
3.72M
            }
427
285k
        } else {
428
285k
            size_t value = symbol - run_length_encoding_max;
429
285k
            context_map.append(value);
430
285k
            i++;
431
285k
        }
432
392k
    }
433
434
3.82k
    bool inverse_move_to_front = TRY(m_input_stream.read_bit());
435
3.69k
    if (inverse_move_to_front)
436
2.92k
        inverse_move_to_front_transform(context_map.span());
437
438
3.69k
    return {};
439
3.71k
}
440
441
ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block)
442
51.2k
{
443
51.2k
    size_t blocks_of_type = TRY(read_variable_length());
444
445
51.2k
    block.type = 0;
446
51.2k
    block.type_previous = 1;
447
51.2k
    block.number_of_types = blocks_of_type;
448
449
51.2k
    if (blocks_of_type == 1) {
450
41.8k
        block.length = 16 * MiB;
451
41.8k
        block.type_code = {};
452
41.8k
        block.length_code = {};
453
41.8k
    } else {
454
9.31k
        block.type_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 2 + blocks_of_type));
455
9.08k
        block.length_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 26));
456
8.84k
        TRY(block_update_length(block));
457
8.76k
    }
458
459
51.2k
    return {};
460
51.2k
}
461
462
ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block)
463
303k
{
464
303k
    size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
465
303k
    size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
466
467
303k
    size_t symbol = TRY(block.length_code.read_symbol(m_input_stream));
468
303k
    size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol]));
469
470
303k
    block.length = block_length;
471
303k
    return {};
472
303k
}
473
474
ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block)
475
295k
{
476
295k
    size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream));
477
295k
    TRY(block_update_length(block));
478
479
294k
    if (block_type_symbol == 0) {
480
275k
        swap(block.type, block.type_previous);
481
275k
    } else if (block_type_symbol == 1) {
482
11.1k
        block.type_previous = block.type;
483
11.1k
        block.type = (block.type + 1) % block.number_of_types;
484
11.1k
    } else {
485
8.48k
        block.type_previous = block.type;
486
8.48k
        block.type = block_type_symbol - 2;
487
8.48k
    }
488
489
294k
    return {};
490
295k
}
491
492
size_t BrotliDecompressionStream::literal_code_index_from_context()
493
395M
{
494
395M
    size_t const context_id_lut0[256] {
495
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
496
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497
395M
        8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
498
395M
        44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
499
395M
        12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
500
395M
        52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
501
395M
        12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
502
395M
        60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
503
395M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
504
395M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
505
395M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
506
395M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
507
395M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
508
395M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
509
395M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
510
395M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
511
395M
    };
512
395M
    size_t const context_id_lut1[256] {
513
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515
395M
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
516
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
517
395M
        1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
518
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
519
395M
        1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
520
395M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
521
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526
395M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
528
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
529
395M
    };
530
395M
    size_t const context_id_lut2[256] {
531
395M
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
532
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
533
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
534
395M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
535
395M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
536
395M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
537
395M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
538
395M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
539
395M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
540
395M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
541
395M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
542
395M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
543
395M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
544
395M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
545
395M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
546
395M
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
547
395M
    };
548
549
395M
    size_t context_mode = m_literal_context_modes[m_literal_block.type];
550
395M
    size_t context_id;
551
395M
    switch (context_mode) {
552
89.7M
    case 0:
553
89.7M
        context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f;
554
89.7M
        break;
555
90.0M
    case 1:
556
90.0M
        context_id = m_lookback_buffer.value().lookback(1, 0) >> 2;
557
90.0M
        break;
558
132M
    case 2:
559
132M
        context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)];
560
132M
        break;
561
84.0M
    case 3:
562
84.0M
        context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)];
563
84.0M
        break;
564
0
    default:
565
0
        VERIFY_NOT_REACHED();
566
395M
    }
567
568
395M
    size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id];
569
395M
    return literal_code_index;
570
395M
}
571
572
ErrorOr<Bytes> BrotliDecompressionStream::read_some(Bytes output_buffer)
573
335k
{
574
335k
    size_t bytes_read = 0;
575
1.60G
    while (bytes_read < output_buffer.size()) {
576
1.60G
        if (m_current_state == State::WindowSize) {
577
5.59k
            size_t window_bits = TRY(read_window_length());
578
5.58k
            m_window_size = (1 << window_bits) - 16;
579
580
5.58k
            m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size));
581
582
5.58k
            m_current_state = State::Idle;
583
1.60G
        } else if (m_current_state == State::Idle) {
584
            // If the final block was read, we are done decompressing
585
77.1k
            if (m_read_final_block)
586
1.01k
                break;
587
588
            // RFC 7932 section 9.1
589
            //
590
            // 1 bit:  ISLAST, set to 1 if this is the last meta-block
591
76.1k
            m_read_final_block = TRY(m_input_stream.read_bit());
592
76.0k
            if (m_read_final_block) {
593
                // 1 bit:  ISLASTEMPTY, if set to 1, the meta-block is empty; this
594
                //       field is only present if ISLAST bit is set -- if it is 1,
595
                //       then the meta-block and the brotli stream ends at that
596
                //       bit, with any remaining bits in the last byte of the
597
                //       compressed stream filled with zeros (if the fill bits are
598
                //       not zero, then the stream should be rejected as invalid)
599
2.26k
                bool is_last_block_empty = TRY(m_input_stream.read_bit());
600
                // If the last block is empty we are done decompressing
601
2.23k
                if (is_last_block_empty)
602
426
                    break;
603
2.23k
            }
604
605
            // 2 bits: MNIBBLES, number of nibbles to represent the uncompressed
606
            //         length
607
75.5k
            size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles());
608
609
            // If MNIBBLES is 0, the meta-block is empty, i.e., it does
610
            // not generate any uncompressed data.  In this case, the
611
            // rest of the meta-block has the following format:
612
75.5k
            if (size_number_of_nibbles == 0) {
613
614
                // 1 bit:  reserved, must be zero
615
52.5k
                bool reserved = TRY(m_input_stream.read_bit());
616
52.5k
                if (reserved)
617
7
                    return Error::from_string_literal("invalid reserved bit");
618
619
                // 2 bits: MSKIPBYTES, number of bytes to represent
620
                //         metadata length
621
                //
622
                // MSKIPBYTES * 8 bits: MSKIPLEN - 1, where MSKIPLEN is
623
                //    the number of metadata bytes; this field is
624
                //    only present if MSKIPBYTES is positive;
625
                //    otherwise, MSKIPLEN is 0 (if MSKIPBYTES is
626
                //    greater than 1, and the last byte is all
627
                //    zeros, then the stream should be rejected as
628
                //    invalid)
629
52.5k
                size_t skip_bytes = TRY(m_input_stream.read_bits(2));
630
52.5k
                if (skip_bytes == 0) {
631
                    // 0..7 bits: fill bits until the next byte boundary,
632
                    //         must be all zeros
633
43.0k
                    u8 remainder = m_input_stream.align_to_byte_boundary();
634
43.0k
                    if (remainder != 0)
635
20
                        return Error::from_string_literal("remainder bits are non-zero");
636
43.0k
                    continue;
637
43.0k
                }
638
639
                // MSKIPLEN bytes of metadata, not part of the
640
                //         uncompressed data or the sliding window
641
9.53k
                size_t skip_length = 1 + TRY(m_input_stream.read_bits(8 * skip_bytes));
642
643
9.51k
                u8 remainder = m_input_stream.align_to_byte_boundary();
644
9.51k
                if (remainder != 0)
645
14
                    return Error::from_string_literal("remainder bits are non-zero");
646
647
                // Discard meta-data bytes
648
9.50k
                u8 temp_buffer[4096];
649
9.50k
                Bytes temp_bytes { temp_buffer, 4096 };
650
19.4k
                while (skip_length > 0) {
651
10.1k
                    Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length));
652
10.1k
                    auto metadata_bytes = TRY(m_input_stream.read_some(temp_bytes_slice));
653
10.1k
                    if (metadata_bytes.is_empty())
654
169
                        return Error::from_string_literal("eof");
655
10.0k
                    if (metadata_bytes.last() == 0)
656
15
                        return Error::from_string_literal("invalid stream");
657
9.99k
                    skip_length -= metadata_bytes.size();
658
9.99k
                }
659
660
9.31k
                continue;
661
9.50k
            }
662
663
22.9k
            size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles));
664
665
            // 1 bit:  ISUNCOMPRESSED, if set to 1, any bits of compressed data
666
            //       up to the next byte boundary are ignored, and the rest of
667
            //       the meta-block contains MLEN bytes of literal data; this
668
            //       field is only present if the ISLAST bit is not set (if the
669
            //       ignored bits are not all zeros, the stream should be
670
            //       rejected as invalid)
671
22.8k
            bool is_uncompressed = false;
672
22.8k
            if (!m_read_final_block)
673
21.1k
                is_uncompressed = TRY(m_input_stream.read_bit());
674
675
22.8k
            m_bytes_left = uncompressed_size;
676
22.8k
            if (is_uncompressed) {
677
5.47k
                u8 remainder = m_input_stream.align_to_byte_boundary();
678
5.47k
                if (remainder != 0)
679
26
                    return Error::from_string_literal("remainder is non-zero");
680
5.44k
                m_current_state = State::UncompressedData;
681
17.3k
            } else {
682
17.3k
                TRY(read_block_configuration(m_literal_block));
683
17.0k
                TRY(read_block_configuration(m_insert_and_copy_block));
684
16.8k
                TRY(read_block_configuration(m_distance_block));
685
686
16.7k
                m_postfix_bits = TRY(m_input_stream.read_bits(2));
687
16.7k
                m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits;
688
689
16.7k
                m_literal_context_modes.clear();
690
101k
                for (size_t i = 0; i < m_literal_block.number_of_types; i++) {
691
84.6k
                    size_t context_mode = TRY(m_input_stream.read_bits(2));
692
84.5k
                    m_literal_context_modes.append(context_mode);
693
84.5k
                }
694
695
16.7k
                m_context_mapping_literal.clear();
696
16.6k
                size_t number_of_literal_codes = TRY(read_variable_length());
697
16.6k
                if (number_of_literal_codes == 1) {
698
4.59M
                    for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++)
699
4.57M
                        m_context_mapping_literal.append(0);
700
13.7k
                } else {
701
2.85k
                    TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types));
702
2.70k
                }
703
704
16.6k
                m_context_mapping_distance.clear();
705
16.5k
                size_t number_of_distance_codes = TRY(read_variable_length());
706
16.4k
                if (number_of_distance_codes == 1) {
707
107k
                    for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++)
708
92.6k
                        m_context_mapping_distance.append(0);
709
15.3k
                } else {
710
1.12k
                    TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types));
711
989
                }
712
713
16.4k
                m_literal_codes.clear();
714
56.4k
                for (size_t i = 0; i < number_of_literal_codes; i++) {
715
40.9k
                    m_literal_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 256)));
716
40.1k
                }
717
718
16.2k
                m_insert_and_copy_codes.clear();
719
36.2k
                for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) {
720
20.8k
                    m_insert_and_copy_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 704)));
721
20.6k
                }
722
723
15.5k
                m_distance_codes.clear();
724
41.8k
                for (size_t i = 0; i < number_of_distance_codes; i++) {
725
26.7k
                    m_distance_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 16 + m_direct_distances + (48 << m_postfix_bits))));
726
26.4k
                }
727
728
15.3k
                m_current_state = State::CompressedCommand;
729
15.1k
            }
730
1.60G
        } else if (m_current_state == State::UncompressedData) {
731
6.54k
            size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left);
732
6.54k
            VERIFY(number_of_fitting_bytes > 0);
733
734
6.54k
            auto uncompressed_bytes = TRY(m_input_stream.read_some(output_buffer.slice(bytes_read, number_of_fitting_bytes)));
735
6.54k
            if (uncompressed_bytes.is_empty())
736
268
                return Error::from_string_literal("eof");
737
738
            // TODO: Replace the home-grown LookbackBuffer with AK::CircularBuffer.
739
6.28k
            for (auto c : uncompressed_bytes)
740
3.71M
                m_lookback_buffer.value().write(c);
741
742
6.28k
            m_bytes_left -= uncompressed_bytes.size();
743
6.28k
            bytes_read += uncompressed_bytes.size();
744
745
            // If all bytes were read, return to the idle state
746
6.28k
            if (m_bytes_left == 0)
747
5.18k
                m_current_state = State::Idle;
748
1.60G
        } else if (m_current_state == State::CompressedCommand) {
749
124M
            if (m_insert_and_copy_block.length == 0) {
750
259k
                TRY(block_read_new_state(m_insert_and_copy_block));
751
259k
            }
752
124M
            m_insert_and_copy_block.length--;
753
754
124M
            size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream));
755
756
124M
            size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 };
757
124M
            size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 };
758
124M
            bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false };
759
760
124M
            size_t insert_and_copy_index = insert_and_copy_symbol >> 6;
761
124M
            size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111;
762
124M
            size_t copy_length_code_offset = insert_and_copy_symbol & 0b111;
763
764
124M
            size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset;
765
124M
            size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset;
766
767
124M
            m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index];
768
769
124M
            size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
770
124M
            size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
771
124M
            size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
772
124M
            size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
773
774
124M
            m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code]));
775
124M
            m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code]));
776
777
124M
            if (m_insert_length > 0) {
778
58.9M
                m_current_state = State::CompressedLiteral;
779
65.7M
            } else {
780
65.7M
                m_current_state = State::CompressedDistance;
781
65.7M
            }
782
1.47G
        } else if (m_current_state == State::CompressedLiteral) {
783
395M
            if (m_literal_block.length == 0) {
784
24.3k
                TRY(block_read_new_state(m_literal_block));
785
24.2k
            }
786
395M
            m_literal_block.length--;
787
788
395M
            size_t literal_code_index = literal_code_index_from_context();
789
395M
            size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream));
790
791
395M
            output_buffer[bytes_read] = literal_value;
792
395M
            m_lookback_buffer.value().write(literal_value);
793
395M
            bytes_read++;
794
395M
            m_insert_length--;
795
395M
            m_bytes_left--;
796
797
395M
            if (m_bytes_left == 0)
798
3.92k
                m_current_state = State::Idle;
799
395M
            else if (m_insert_length == 0)
800
58.9M
                m_current_state = State::CompressedDistance;
801
1.08G
        } else if (m_current_state == State::CompressedDistance) {
802
124M
            size_t distance_symbol;
803
124M
            if (m_implicit_zero_distance) {
804
67.8M
                distance_symbol = 0;
805
67.8M
            } else {
806
56.8M
                if (m_distance_block.length == 0) {
807
11.3k
                    TRY(block_read_new_state(m_distance_block));
808
11.2k
                }
809
56.8M
                m_distance_block.length--;
810
811
56.8M
                size_t context_id = clamp(m_copy_length - 2, 0, 3);
812
56.8M
                size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id];
813
814
56.8M
                distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream));
815
56.8M
            }
816
817
124M
            size_t distance;
818
124M
            bool reuse_previous_distance = false;
819
124M
            if (distance_symbol < 16) {
820
106M
                switch (distance_symbol) {
821
73.7M
                case 0:
822
73.7M
                    distance = m_distances[0];
823
73.7M
                    reuse_previous_distance = true;
824
73.7M
                    break;
825
9.35M
                case 1:
826
9.35M
                    distance = m_distances[1];
827
9.35M
                    break;
828
611k
                case 2:
829
611k
                    distance = m_distances[2];
830
611k
                    break;
831
1.94M
                case 3:
832
1.94M
                    distance = m_distances[3];
833
1.94M
                    break;
834
6.47k
                case 4:
835
6.47k
                    distance = m_distances[0] - 1;
836
6.47k
                    break;
837
11.5M
                case 5:
838
11.5M
                    distance = m_distances[0] + 1;
839
11.5M
                    break;
840
22.8k
                case 6:
841
22.8k
                    distance = m_distances[0] - 2;
842
22.8k
                    break;
843
788k
                case 7:
844
788k
                    distance = m_distances[0] + 2;
845
788k
                    break;
846
10.4k
                case 8:
847
10.4k
                    distance = m_distances[0] - 3;
848
10.4k
                    break;
849
300k
                case 9:
850
300k
                    distance = m_distances[0] + 3;
851
300k
                    break;
852
23.3k
                case 10:
853
23.3k
                    distance = m_distances[1] - 1;
854
23.3k
                    break;
855
1.03M
                case 11:
856
1.03M
                    distance = m_distances[1] + 1;
857
1.03M
                    break;
858
3.03k
                case 12:
859
3.03k
                    distance = m_distances[1] - 2;
860
3.03k
                    break;
861
6.57M
                case 13:
862
6.57M
                    distance = m_distances[1] + 2;
863
6.57M
                    break;
864
6.62k
                case 14:
865
6.62k
                    distance = m_distances[1] - 3;
866
6.62k
                    break;
867
588k
                case 15:
868
588k
                    distance = m_distances[1] + 3;
869
588k
                    break;
870
106M
                }
871
106M
            } else if (distance_symbol < 16 + m_direct_distances) {
872
16.2M
                distance = distance_symbol - 15;
873
16.2M
            } else {
874
1.92M
                size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1;
875
876
1.92M
                size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1));
877
1.92M
                size_t dextra = TRY(m_input_stream.read_bits(ndistbits));
878
879
1.92M
                size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits;
880
1.92M
                size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK;
881
1.92M
                size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4;
882
1.92M
                distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1;
883
1.92M
            }
884
124M
            m_distance = distance;
885
886
124M
            size_t total_written = m_lookback_buffer.value().total_written();
887
124M
            size_t max_lookback = min(total_written, m_window_size);
888
889
124M
            if (distance > max_lookback) {
890
17.8M
                size_t word_index = distance - (max_lookback + 1);
891
17.8M
                m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length));
892
17.8M
                m_copy_length = m_dictionary_data.size();
893
894
17.8M
                if (m_copy_length == 0)
895
1.91k
                    m_current_state = State::CompressedCommand;
896
17.8M
                else
897
17.8M
                    m_current_state = State::CompressedDictionary;
898
106M
            } else {
899
106M
                if (!reuse_previous_distance) {
900
33.0M
                    m_distances[3] = m_distances[2];
901
33.0M
                    m_distances[2] = m_distances[1];
902
33.0M
                    m_distances[1] = m_distances[0];
903
33.0M
                    m_distances[0] = distance;
904
33.0M
                }
905
906
106M
                m_current_state = State::CompressedCopy;
907
106M
            }
908
955M
        } else if (m_current_state == State::CompressedCopy) {
909
847M
            u8 copy_value = m_lookback_buffer.value().lookback(m_distance);
910
911
847M
            output_buffer[bytes_read] = copy_value;
912
847M
            m_lookback_buffer.value().write(copy_value);
913
847M
            bytes_read++;
914
847M
            m_copy_length--;
915
847M
            m_bytes_left--;
916
917
847M
            if (m_bytes_left == 0)
918
8.92k
                m_current_state = State::Idle;
919
847M
            else if (m_copy_length == 0)
920
106M
                m_current_state = State::CompressedCommand;
921
847M
        } else if (m_current_state == State::CompressedDictionary) {
922
107M
            size_t offset = m_dictionary_data.size() - m_copy_length;
923
107M
            u8 dictionary_value = m_dictionary_data[offset];
924
925
107M
            output_buffer[bytes_read] = dictionary_value;
926
107M
            m_lookback_buffer.value().write(dictionary_value);
927
107M
            bytes_read++;
928
107M
            m_copy_length--;
929
107M
            m_bytes_left--;
930
931
107M
            if (m_bytes_left == 0)
932
1.19k
                m_current_state = State::Idle;
933
107M
            else if (m_copy_length == 0)
934
17.8M
                m_current_state = State::CompressedCommand;
935
107M
        }
936
1.60G
    }
937
938
331k
    return output_buffer.slice(0, bytes_read);
939
335k
}
940
941
bool BrotliDecompressionStream::is_eof() const
942
337k
{
943
337k
    return m_read_final_block && m_current_state == State::Idle;
944
337k
}
945
946
}