Coverage Report

Created: 2026-05-16 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibCompress/Brotli.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/BinarySearch.h>
8
#include <AK/QuickSort.h>
9
#include <LibCompress/Brotli.h>
10
#include <LibCompress/BrotliDictionary.h>
11
12
namespace Compress {
13
14
ErrorOr<size_t> Brotli::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream) const
15
601M
{
16
601M
    size_t code_bits = 1;
17
18
689M
    while (code_bits < (1 << 16)) {
19
        // FIXME: This is very inefficient and could greatly be improved by implementing this
20
        //        algorithm: https://www.hanshq.net/zip.html#huffdec
21
689M
        size_t index;
22
689M
        if (binary_search(m_symbol_codes.span(), code_bits, &index))
23
601M
            return m_symbol_values[index];
24
25
87.6M
        code_bits = (code_bits << 1) | TRY(input_stream.read_bit());
26
87.6M
    }
27
28
114
    return Error::from_string_literal("no matching code found");
29
601M
}
30
31
BrotliDecompressionStream::BrotliDecompressionStream(MaybeOwned<Stream> stream)
32
6.02k
    : m_input_stream(move(stream))
33
6.02k
{
34
6.02k
}
35
36
ErrorOr<size_t> BrotliDecompressionStream::read_window_length()
37
6.02k
{
38
6.02k
    if (TRY(m_input_stream.read_bit())) {
39
2.39k
        switch (TRY(m_input_stream.read_bits(3))) {
40
439
        case 0: {
41
439
            switch (TRY(m_input_stream.read_bits(3))) {
42
108
            case 0:
43
108
                return 17;
44
2
            case 1:
45
2
                return Error::from_string_literal("invalid window length");
46
136
            case 2:
47
136
                return 10;
48
69
            case 3:
49
69
                return 11;
50
25
            case 4:
51
25
                return 12;
52
41
            case 5:
53
41
                return 13;
54
26
            case 6:
55
26
                return 14;
56
32
            case 7:
57
32
                return 15;
58
0
            default:
59
0
                VERIFY_NOT_REACHED();
60
439
            }
61
439
        }
62
141
        case 1:
63
141
            return 18;
64
156
        case 2:
65
156
            return 19;
66
110
        case 3:
67
110
            return 20;
68
149
        case 4:
69
149
            return 21;
70
726
        case 5:
71
726
            return 22;
72
316
        case 6:
73
316
            return 23;
74
362
        case 7:
75
362
            return 24;
76
0
        default:
77
0
            VERIFY_NOT_REACHED();
78
2.39k
        }
79
3.62k
    } else {
80
3.62k
        return 16;
81
3.62k
    }
82
6.02k
}
83
84
ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles()
85
59.7k
{
86
59.7k
    switch (TRY(m_input_stream.read_bits(2))) {
87
17.6k
    case 0:
88
17.6k
        return 4;
89
1.66k
    case 1:
90
1.66k
        return 5;
91
1.48k
    case 2:
92
1.48k
        return 6;
93
38.8k
    case 3:
94
38.8k
        return 0;
95
0
    default:
96
0
        VERIFY_NOT_REACHED();
97
59.6k
    }
98
59.6k
}
99
100
ErrorOr<size_t> BrotliDecompressionStream::read_variable_length()
101
79.4k
{
102
    //  Value    Bit Pattern
103
    //   -----    -----------
104
    //     1                0
105
    //     2             0001
106
    //   3..4           x0011
107
    //   5..8          xx0101
108
    //   9..16        xxx0111
109
    //  17..32       xxxx1001
110
    //  33..64      xxxxx1011
111
    //  65..128    xxxxxx1101
112
    // 129..256   xxxxxxx1111
113
114
79.4k
    if (TRY(m_input_stream.read_bit())) {
115
12.6k
        switch (TRY(m_input_stream.read_bits(3))) {
116
3.86k
        case 0:
117
3.86k
            return 2;
118
4.30k
        case 1:
119
4.30k
            return 3 + TRY(m_input_stream.read_bits(1));
120
2.35k
        case 2:
121
2.35k
            return 5 + TRY(m_input_stream.read_bits(2));
122
278
        case 3:
123
278
            return 9 + TRY(m_input_stream.read_bits(3));
124
378
        case 4:
125
378
            return 17 + TRY(m_input_stream.read_bits(4));
126
355
        case 5:
127
355
            return 33 + TRY(m_input_stream.read_bits(5));
128
262
        case 6:
129
262
            return 65 + TRY(m_input_stream.read_bits(6));
130
832
        case 7:
131
832
            return 129 + TRY(m_input_stream.read_bits(7));
132
0
        default:
133
0
            VERIFY_NOT_REACHED();
134
12.6k
        }
135
66.7k
    } else {
136
66.7k
        return 1;
137
66.7k
    }
138
79.4k
}
139
140
ErrorOr<size_t> Brotli::CanonicalCode::read_complex_prefix_code_length(LittleEndianInputBitStream& stream)
141
361k
{
142
    // Symbol   Code
143
    // ------   ----
144
    // 0          00
145
    // 1        0111
146
    // 2         011
147
    // 3          10
148
    // 4          01
149
    // 5        1111
150
151
361k
    switch (TRY(stream.read_bits(2))) {
152
209k
    case 0:
153
209k
        return 0;
154
35.2k
    case 1:
155
35.2k
        return 4;
156
41.3k
    case 2:
157
41.3k
        return 3;
158
74.7k
    case 3: {
159
149k
        if (TRY(stream.read_bit()) == 0) {
160
22.2k
            return 2;
161
52.4k
        } else {
162
104k
            if (TRY(stream.read_bit()) == 0) {
163
29.8k
                return 1;
164
29.8k
            } else {
165
22.6k
                return 5;
166
22.6k
            }
167
52.4k
        }
168
74.7k
    }
169
0
    default:
170
0
        VERIFY_NOT_REACHED();
171
360k
    }
172
360k
}
173
174
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size)
175
107k
{
176
107k
    size_t hskip = TRY(stream.read_bits(2));
177
178
107k
    if (hskip == 1)
179
107k
        return TRY(read_simple_prefix_code(stream, alphabet_size));
180
181
35.5k
    return TRY(read_complex_prefix_code(stream, alphabet_size, hskip));
182
35.5k
}
183
184
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_simple_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size)
185
71.9k
{
186
71.9k
    CanonicalCode code {};
187
188
71.9k
    size_t number_of_symbols = 1 + TRY(stream.read_bits(2));
189
190
71.8k
    size_t symbol_size = 0;
191
589k
    while ((1u << symbol_size) < alphabet_size)
192
517k
        symbol_size++;
193
194
71.8k
    Vector<size_t> symbols;
195
197k
    for (size_t i = 0; i < number_of_symbols; i++) {
196
126k
        size_t symbol = TRY(stream.read_bits(symbol_size));
197
126k
        symbols.append(symbol);
198
199
126k
        if (symbol >= alphabet_size)
200
36
            return Error::from_string_literal("symbol larger than alphabet");
201
126k
    }
202
203
71.7k
    if (number_of_symbols == 1) {
204
37.2k
        code.m_symbol_codes.append(0b1);
205
37.2k
        code.m_symbol_values = move(symbols);
206
37.2k
    } else if (number_of_symbols == 2) {
207
19.6k
        code.m_symbol_codes.extend({ 0b10, 0b11 });
208
19.6k
        if (symbols[0] > symbols[1])
209
7.47k
            swap(symbols[0], symbols[1]);
210
19.6k
        code.m_symbol_values = move(symbols);
211
19.6k
    } else if (number_of_symbols == 3) {
212
9.75k
        code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 });
213
9.75k
        if (symbols[1] > symbols[2])
214
2.39k
            swap(symbols[1], symbols[2]);
215
9.75k
        code.m_symbol_values = move(symbols);
216
9.75k
    } else if (number_of_symbols == 4) {
217
5.05k
        bool tree_select = TRY(stream.read_bit());
218
5.04k
        if (tree_select) {
219
1.69k
            code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 });
220
1.69k
            if (symbols[2] > symbols[3])
221
626
                swap(symbols[2], symbols[3]);
222
1.69k
            code.m_symbol_values = move(symbols);
223
3.35k
        } else {
224
3.35k
            code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 });
225
3.35k
            quick_sort(symbols);
226
3.35k
            code.m_symbol_values = move(symbols);
227
3.35k
        }
228
5.04k
    }
229
230
71.7k
    return code;
231
71.7k
}
232
233
ErrorOr<Brotli::CanonicalCode> Brotli::CanonicalCode::read_complex_prefix_code(LittleEndianInputBitStream& stream, size_t alphabet_size, size_t hskip)
234
35.5k
{
235
    // hskip should only be 0, 2 or 3
236
35.5k
    VERIFY(hskip != 1);
237
35.5k
    VERIFY(hskip <= 3);
238
239
    // Read the prefix code_value that is used to encode the actual prefix code_value
240
35.5k
    size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
241
35.5k
    size_t code_length[18] { 0 };
242
35.5k
    size_t code_length_counts[6] { 0 };
243
244
35.5k
    size_t sum = 0;
245
35.5k
    size_t number_of_non_zero_symbols = 0;
246
371k
    for (size_t i = hskip; i < 18; i++) {
247
361k
        size_t len = TRY(read_complex_prefix_code_length(stream));
248
360k
        code_length[symbol_mapping[i]] = len;
249
250
360k
        if (len != 0) {
251
151k
            code_length_counts[len]++;
252
151k
            sum += (32 >> len);
253
151k
            number_of_non_zero_symbols++;
254
151k
        }
255
256
360k
        if (sum == 32)
257
24.1k
            break;
258
336k
        else if (sum > 32)
259
97
            return Error::from_string_literal("invalid prefix code");
260
360k
    }
261
262
34.8k
    CanonicalCode temp_code;
263
34.8k
    if (number_of_non_zero_symbols > 1) {
264
25.2k
        size_t code_value = 0;
265
151k
        for (size_t bits = 1; bits <= 5; bits++) {
266
126k
            code_value = (code_value + code_length_counts[bits - 1]) << 1;
267
126k
            size_t current_code_value = code_value;
268
269
2.39M
            for (size_t i = 0; i < 18; i++) {
270
2.26M
                size_t len = code_length[i];
271
2.26M
                if (len == bits) {
272
140k
                    temp_code.m_symbol_codes.append((1 << bits) | current_code_value);
273
140k
                    temp_code.m_symbol_values.append(i);
274
140k
                    current_code_value++;
275
140k
                }
276
2.26M
            }
277
126k
        }
278
25.2k
    } else {
279
103k
        for (size_t i = 0; i < 18; i++) {
280
103k
            size_t len = code_length[i];
281
103k
            if (len != 0) {
282
9.56k
                temp_code.m_symbol_codes.append(1);
283
9.56k
                temp_code.m_symbol_values.append(i);
284
9.56k
                break;
285
9.56k
            }
286
103k
        }
287
9.59k
    }
288
289
    // Read the actual prefix code_value
290
34.8k
    sum = 0;
291
34.8k
    size_t i = 0;
292
293
34.8k
    size_t previous_non_zero_code_length = 8;
294
34.8k
    size_t last_symbol = 0;
295
34.8k
    size_t last_repeat = 0;
296
297
34.8k
    Vector<size_t> result_symbols;
298
34.8k
    Vector<size_t> result_lengths;
299
34.8k
    size_t result_lengths_count[16] { 0 };
300
2.33M
    while (i < alphabet_size) {
301
2.32M
        auto symbol = TRY(temp_code.read_symbol(stream));
302
303
2.32M
        if (symbol < 16) {
304
2.12M
            result_symbols.append(i);
305
2.12M
            result_lengths.append(symbol);
306
2.12M
            result_lengths_count[symbol]++;
307
308
2.12M
            if (symbol != 0) {
309
1.55M
                previous_non_zero_code_length = symbol;
310
1.55M
                sum += (32768 >> symbol);
311
1.55M
                if (sum == 32768)
312
23.8k
                    break;
313
1.52M
                else if (sum > 32768)
314
47
                    return Error::from_string_literal("invalid prefix code");
315
1.55M
            }
316
317
2.09M
            last_repeat = 0;
318
2.09M
            i++;
319
2.09M
        } else if (symbol == 16) {
320
108k
            size_t repeat_count = 0;
321
108k
            if (last_symbol == 16 && last_repeat != 0) {
322
31.7k
                repeat_count = (4 * (last_repeat - 2));
323
77.0k
            } else {
324
77.0k
                last_repeat = 0;
325
77.0k
            }
326
108k
            repeat_count += 3 + TRY(stream.read_bits(2));
327
328
832k
            for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) {
329
726k
                result_symbols.append(i);
330
726k
                result_lengths.append(previous_non_zero_code_length);
331
726k
                result_lengths_count[previous_non_zero_code_length]++;
332
333
726k
                if (previous_non_zero_code_length != 0) {
334
726k
                    sum += (32768 >> previous_non_zero_code_length);
335
726k
                    if (sum == 32768)
336
2.59k
                        break;
337
723k
                    else if (sum > 32768)
338
11
                        return Error::from_string_literal("invalid prefix code");
339
726k
                }
340
341
723k
                i++;
342
723k
                if (i >= alphabet_size)
343
603
                    break;
344
723k
            }
345
108k
            if (sum == 32768)
346
2.59k
                break;
347
106k
            VERIFY(sum < 32768);
348
349
106k
            last_repeat = repeat_count;
350
106k
        } else if (symbol == 17) {
351
95.0k
            size_t repeat_count = 0;
352
95.0k
            if (last_symbol == 17 && last_repeat != 0) {
353
29.9k
                repeat_count = (8 * (last_repeat - 2));
354
65.0k
            } else {
355
65.0k
                last_repeat = 0;
356
65.0k
            }
357
95.0k
            repeat_count += 3 + TRY(stream.read_bits(3));
358
359
95.0k
            i += (repeat_count - last_repeat);
360
95.0k
            last_repeat = repeat_count;
361
95.0k
        }
362
363
2.29M
        last_symbol = symbol;
364
2.29M
    }
365
34.3k
    result_lengths_count[0] = 0;
366
367
34.3k
    CanonicalCode final_code;
368
369
34.3k
    size_t code_value = 0;
370
549k
    for (size_t bits = 1; bits < 16; bits++) {
371
515k
        code_value = (code_value + result_lengths_count[bits - 1]) << 1;
372
515k
        size_t current_code_value = code_value;
373
374
42.9M
        for (size_t n = 0; n < result_symbols.size(); n++) {
375
42.4M
            size_t len = result_lengths[n];
376
42.4M
            if (len == bits) {
377
2.26M
                final_code.m_symbol_codes.append((1 << bits) | current_code_value);
378
2.26M
                final_code.m_symbol_values.append(result_symbols[n]);
379
2.26M
                current_code_value++;
380
2.26M
            }
381
42.4M
        }
382
515k
    }
383
384
34.3k
    return final_code;
385
34.8k
}
386
387
static void inverse_move_to_front_transform(Span<u8> v)
388
3.26k
{
389
    // RFC 7932 section 7.3
390
3.26k
    u8 mtf[256];
391
839k
    for (size_t i = 0; i < 256; ++i) {
392
836k
        mtf[i] = (u8)i;
393
836k
    }
394
3.26M
    for (size_t i = 0; i < v.size(); ++i) {
395
3.25M
        u8 index = v[i];
396
3.25M
        u8 value = mtf[index];
397
3.25M
        v[i] = value;
398
6.59M
        for (; index; --index) {
399
3.33M
            mtf[index] = mtf[index - 1];
400
3.33M
        }
401
3.25M
        mtf[0] = value;
402
3.25M
    }
403
3.26k
}
404
405
ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size)
406
4.35k
{
407
4.35k
    bool use_run_length_encoding = TRY(m_input_stream.read_bit());
408
4.33k
    size_t run_length_encoding_max = 0;
409
4.33k
    if (use_run_length_encoding) {
410
2.46k
        run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4));
411
2.44k
    }
412
413
4.33k
    auto const code = TRY(CanonicalCode::read_prefix_code(m_input_stream, number_of_codes + run_length_encoding_max));
414
415
4.18k
    size_t i = 0;
416
420k
    while (i < context_map_size) {
417
416k
        size_t symbol = TRY(code.read_symbol(m_input_stream));
418
419
415k
        if (symbol <= run_length_encoding_max) {
420
133k
            size_t repeat_base = 1 << symbol;
421
133k
            size_t repeat_additional = TRY(m_input_stream.read_bits(symbol));
422
133k
            size_t repeat_count = repeat_base + repeat_additional;
423
3.48M
            while (repeat_count--) {
424
3.35M
                context_map.append(0);
425
3.35M
                i++;
426
3.35M
            }
427
282k
        } else {
428
282k
            size_t value = symbol - run_length_encoding_max;
429
282k
            context_map.append(value);
430
282k
            i++;
431
282k
        }
432
415k
    }
433
434
4.18k
    bool inverse_move_to_front = TRY(m_input_stream.read_bit());
435
4.03k
    if (inverse_move_to_front)
436
3.26k
        inverse_move_to_front_transform(context_map.span());
437
438
4.03k
    return {};
439
4.05k
}
440
441
ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block)
442
48.2k
{
443
48.2k
    size_t blocks_of_type = TRY(read_variable_length());
444
445
48.2k
    block.type = 0;
446
48.2k
    block.type_previous = 1;
447
48.2k
    block.number_of_types = blocks_of_type;
448
449
48.2k
    if (blocks_of_type == 1) {
450
40.0k
        block.length = 16 * MiB;
451
40.0k
        block.type_code = {};
452
40.0k
        block.length_code = {};
453
40.0k
    } else {
454
8.22k
        block.type_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 2 + blocks_of_type));
455
7.96k
        block.length_code = TRY(CanonicalCode::read_prefix_code(m_input_stream, 26));
456
7.73k
        TRY(block_update_length(block));
457
7.65k
    }
458
459
48.2k
    return {};
460
48.2k
}
461
462
ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block)
463
299k
{
464
299k
    size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
465
299k
    size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
466
467
299k
    size_t symbol = TRY(block.length_code.read_symbol(m_input_stream));
468
299k
    size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol]));
469
470
299k
    block.length = block_length;
471
299k
    return {};
472
299k
}
473
474
ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block)
475
291k
{
476
291k
    size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream));
477
291k
    TRY(block_update_length(block));
478
479
291k
    if (block_type_symbol == 0) {
480
269k
        swap(block.type, block.type_previous);
481
269k
    } else if (block_type_symbol == 1) {
482
13.1k
        block.type_previous = block.type;
483
13.1k
        block.type = (block.type + 1) % block.number_of_types;
484
13.1k
    } else {
485
8.50k
        block.type_previous = block.type;
486
8.50k
        block.type = block_type_symbol - 2;
487
8.50k
    }
488
489
291k
    return {};
490
291k
}
491
492
size_t BrotliDecompressionStream::literal_code_index_from_context()
493
404M
{
494
404M
    size_t const context_id_lut0[256] {
495
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
496
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497
404M
        8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
498
404M
        44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
499
404M
        12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
500
404M
        52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
501
404M
        12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
502
404M
        60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
503
404M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
504
404M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
505
404M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
506
404M
        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
507
404M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
508
404M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
509
404M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
510
404M
        2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
511
404M
    };
512
404M
    size_t const context_id_lut1[256] {
513
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515
404M
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
516
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
517
404M
        1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
518
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
519
404M
        1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
520
404M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
521
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526
404M
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
528
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
529
404M
    };
530
404M
    size_t const context_id_lut2[256] {
531
404M
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
532
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
533
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
534
404M
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
535
404M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
536
404M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
537
404M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
538
404M
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
539
404M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
540
404M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
541
404M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
542
404M
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
543
404M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
544
404M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
545
404M
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
546
404M
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
547
404M
    };
548
549
404M
    size_t context_mode = m_literal_context_modes[m_literal_block.type];
550
404M
    size_t context_id;
551
404M
    switch (context_mode) {
552
62.3M
    case 0:
553
62.3M
        context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f;
554
62.3M
        break;
555
114M
    case 1:
556
114M
        context_id = m_lookback_buffer.value().lookback(1, 0) >> 2;
557
114M
        break;
558
148M
    case 2:
559
148M
        context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)];
560
148M
        break;
561
79.1M
    case 3:
562
79.1M
        context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)];
563
79.1M
        break;
564
0
    default:
565
0
        VERIFY_NOT_REACHED();
566
404M
    }
567
568
404M
    size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id];
569
404M
    return literal_code_index;
570
404M
}
571
572
ErrorOr<Bytes> BrotliDecompressionStream::read_some(Bytes output_buffer)
573
349k
{
574
349k
    size_t bytes_read = 0;
575
1.67G
    while (bytes_read < output_buffer.size()) {
576
1.67G
        if (m_current_state == State::WindowSize) {
577
6.02k
            size_t window_bits = TRY(read_window_length());
578
6.01k
            m_window_size = (1 << window_bits) - 16;
579
580
6.01k
            m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size));
581
582
6.01k
            m_current_state = State::Idle;
583
1.67G
        } else if (m_current_state == State::Idle) {
584
            // If the final block was read, we are done decompressing
585
61.5k
            if (m_read_final_block)
586
1.19k
                break;
587
588
            // RFC 7932 section 9.1
589
            //
590
            // 1 bit:  ISLAST, set to 1 if this is the last meta-block
591
60.3k
            m_read_final_block = TRY(m_input_stream.read_bit());
592
60.2k
            if (m_read_final_block) {
593
                // 1 bit:  ISLASTEMPTY, if set to 1, the meta-block is empty; this
594
                //       field is only present if ISLAST bit is set -- if it is 1,
595
                //       then the meta-block and the brotli stream ends at that
596
                //       bit, with any remaining bits in the last byte of the
597
                //       compressed stream filled with zeros (if the fill bits are
598
                //       not zero, then the stream should be rejected as invalid)
599
2.62k
                bool is_last_block_empty = TRY(m_input_stream.read_bit());
600
                // If the last block is empty we are done decompressing
601
2.59k
                if (is_last_block_empty)
602
517
                    break;
603
2.59k
            }
604
605
            // 2 bits: MNIBBLES, number of nibbles to represent the uncompressed
606
            //         length
607
59.7k
            size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles());
608
609
            // If MNIBBLES is 0, the meta-block is empty, i.e., it does
610
            // not generate any uncompressed data.  In this case, the
611
            // rest of the meta-block has the following format:
612
59.6k
            if (size_number_of_nibbles == 0) {
613
614
                // 1 bit:  reserved, must be zero
615
38.8k
                bool reserved = TRY(m_input_stream.read_bit());
616
38.8k
                if (reserved)
617
9
                    return Error::from_string_literal("invalid reserved bit");
618
619
                // 2 bits: MSKIPBYTES, number of bytes to represent
620
                //         metadata length
621
                //
622
                // MSKIPBYTES * 8 bits: MSKIPLEN - 1, where MSKIPLEN is
623
                //    the number of metadata bytes; this field is
624
                //    only present if MSKIPBYTES is positive;
625
                //    otherwise, MSKIPLEN is 0 (if MSKIPBYTES is
626
                //    greater than 1, and the last byte is all
627
                //    zeros, then the stream should be rejected as
628
                //    invalid)
629
38.8k
                size_t skip_bytes = TRY(m_input_stream.read_bits(2));
630
38.8k
                if (skip_bytes == 0) {
631
                    // 0..7 bits: fill bits until the next byte boundary,
632
                    //         must be all zeros
633
30.9k
                    u8 remainder = m_input_stream.align_to_byte_boundary();
634
30.9k
                    if (remainder != 0)
635
22
                        return Error::from_string_literal("remainder bits are non-zero");
636
30.9k
                    continue;
637
30.9k
                }
638
639
                // MSKIPLEN bytes of metadata, not part of the
640
                //         uncompressed data or the sliding window
641
7.93k
                size_t skip_length = 1 + TRY(m_input_stream.read_bits(8 * skip_bytes));
642
643
7.91k
                u8 remainder = m_input_stream.align_to_byte_boundary();
644
7.91k
                if (remainder != 0)
645
12
                    return Error::from_string_literal("remainder bits are non-zero");
646
647
                // Discard meta-data bytes
648
7.90k
                u8 temp_buffer[4096];
649
7.90k
                Bytes temp_bytes { temp_buffer, 4096 };
650
16.3k
                while (skip_length > 0) {
651
8.60k
                    Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length));
652
8.60k
                    auto metadata_bytes = TRY(m_input_stream.read_some(temp_bytes_slice));
653
8.60k
                    if (metadata_bytes.is_empty())
654
176
                        return Error::from_string_literal("eof");
655
8.43k
                    if (metadata_bytes.last() == 0)
656
14
                        return Error::from_string_literal("invalid stream");
657
8.41k
                    skip_length -= metadata_bytes.size();
658
8.41k
                }
659
660
7.71k
                continue;
661
7.90k
            }
662
663
20.7k
            size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles));
664
665
            // 1 bit:  ISUNCOMPRESSED, if set to 1, any bits of compressed data
666
            //       up to the next byte boundary are ignored, and the rest of
667
            //       the meta-block contains MLEN bytes of literal data; this
668
            //       field is only present if the ISLAST bit is not set (if the
669
            //       ignored bits are not all zeros, the stream should be
670
            //       rejected as invalid)
671
20.6k
            bool is_uncompressed = false;
672
20.6k
            if (!m_read_final_block)
673
18.7k
                is_uncompressed = TRY(m_input_stream.read_bit());
674
675
20.6k
            m_bytes_left = uncompressed_size;
676
20.6k
            if (is_uncompressed) {
677
4.28k
                u8 remainder = m_input_stream.align_to_byte_boundary();
678
4.28k
                if (remainder != 0)
679
20
                    return Error::from_string_literal("remainder is non-zero");
680
4.26k
                m_current_state = State::UncompressedData;
681
16.3k
            } else {
682
16.3k
                TRY(read_block_configuration(m_literal_block));
683
16.0k
                TRY(read_block_configuration(m_insert_and_copy_block));
684
15.8k
                TRY(read_block_configuration(m_distance_block));
685
686
15.7k
                m_postfix_bits = TRY(m_input_stream.read_bits(2));
687
15.7k
                m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits;
688
689
15.7k
                m_literal_context_modes.clear();
690
93.7k
                for (size_t i = 0; i < m_literal_block.number_of_types; i++) {
691
78.1k
                    size_t context_mode = TRY(m_input_stream.read_bits(2));
692
78.0k
                    m_literal_context_modes.append(context_mode);
693
78.0k
                }
694
695
15.7k
                m_context_mapping_literal.clear();
696
15.6k
                size_t number_of_literal_codes = TRY(read_variable_length());
697
15.6k
                if (number_of_literal_codes == 1) {
698
4.16M
                    for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++)
699
4.14M
                        m_context_mapping_literal.append(0);
700
12.4k
                } else {
701
3.15k
                    TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types));
702
2.98k
                }
703
704
15.6k
                m_context_mapping_distance.clear();
705
15.4k
                size_t number_of_distance_codes = TRY(read_variable_length());
706
15.3k
                if (number_of_distance_codes == 1) {
707
110k
                    for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++)
708
96.6k
                        m_context_mapping_distance.append(0);
709
14.1k
                } else {
710
1.20k
                    TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types));
711
1.04k
                }
712
713
15.3k
                m_literal_codes.clear();
714
55.5k
                for (size_t i = 0; i < number_of_literal_codes; i++) {
715
41.0k
                    m_literal_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 256)));
716
40.2k
                }
717
718
15.2k
                m_insert_and_copy_codes.clear();
719
34.2k
                for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) {
720
19.9k
                    m_insert_and_copy_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 704)));
721
19.7k
                }
722
723
14.5k
                m_distance_codes.clear();
724
40.4k
                for (size_t i = 0; i < number_of_distance_codes; i++) {
725
26.3k
                    m_distance_codes.append(TRY(CanonicalCode::read_prefix_code(m_input_stream, 16 + m_direct_distances + (48 << m_postfix_bits))));
726
26.1k
                }
727
728
14.3k
                m_current_state = State::CompressedCommand;
729
14.0k
            }
730
1.67G
        } else if (m_current_state == State::UncompressedData) {
731
5.35k
            size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left);
732
5.35k
            VERIFY(number_of_fitting_bytes > 0);
733
734
5.35k
            auto uncompressed_bytes = TRY(m_input_stream.read_some(output_buffer.slice(bytes_read, number_of_fitting_bytes)));
735
5.35k
            if (uncompressed_bytes.is_empty())
736
281
                return Error::from_string_literal("eof");
737
738
            // TODO: Replace the home-grown LookbackBuffer with AK::CircularBuffer.
739
5.07k
            for (auto c : uncompressed_bytes)
740
3.64M
                m_lookback_buffer.value().write(c);
741
742
5.07k
            m_bytes_left -= uncompressed_bytes.size();
743
5.07k
            bytes_read += uncompressed_bytes.size();
744
745
            // If all bytes were read, return to the idle state
746
5.07k
            if (m_bytes_left == 0)
747
3.98k
                m_current_state = State::Idle;
748
1.67G
        } else if (m_current_state == State::CompressedCommand) {
749
132M
            if (m_insert_and_copy_block.length == 0) {
750
254k
                TRY(block_read_new_state(m_insert_and_copy_block));
751
254k
            }
752
132M
            m_insert_and_copy_block.length--;
753
754
132M
            size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream));
755
756
132M
            size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 };
757
132M
            size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 };
758
132M
            bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false };
759
760
132M
            size_t insert_and_copy_index = insert_and_copy_symbol >> 6;
761
132M
            size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111;
762
132M
            size_t copy_length_code_offset = insert_and_copy_symbol & 0b111;
763
764
132M
            size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset;
765
132M
            size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset;
766
767
132M
            m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index];
768
769
132M
            size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
770
132M
            size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
771
132M
            size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
772
132M
            size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
773
774
132M
            m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code]));
775
132M
            m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code]));
776
777
132M
            if (m_insert_length > 0) {
778
66.3M
                m_current_state = State::CompressedLiteral;
779
66.5M
            } else {
780
66.5M
                m_current_state = State::CompressedDistance;
781
66.5M
            }
782
1.53G
        } else if (m_current_state == State::CompressedLiteral) {
783
404M
            if (m_literal_block.length == 0) {
784
24.0k
                TRY(block_read_new_state(m_literal_block));
785
24.0k
            }
786
404M
            m_literal_block.length--;
787
788
404M
            size_t literal_code_index = literal_code_index_from_context();
789
404M
            size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream));
790
791
404M
            output_buffer[bytes_read] = literal_value;
792
404M
            m_lookback_buffer.value().write(literal_value);
793
404M
            bytes_read++;
794
404M
            m_insert_length--;
795
404M
            m_bytes_left--;
796
797
404M
            if (m_bytes_left == 0)
798
4.21k
                m_current_state = State::Idle;
799
404M
            else if (m_insert_length == 0)
800
66.3M
                m_current_state = State::CompressedDistance;
801
1.13G
        } else if (m_current_state == State::CompressedDistance) {
802
132M
            size_t distance_symbol;
803
132M
            if (m_implicit_zero_distance) {
804
71.9M
                distance_symbol = 0;
805
71.9M
            } else {
806
60.9M
                if (m_distance_block.length == 0) {
807
13.1k
                    TRY(block_read_new_state(m_distance_block));
808
13.1k
                }
809
60.9M
                m_distance_block.length--;
810
811
60.9M
                size_t context_id = clamp(m_copy_length - 2, 0, 3);
812
60.9M
                size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id];
813
814
60.9M
                distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream));
815
60.9M
            }
816
817
132M
            size_t distance;
818
132M
            bool reuse_previous_distance = false;
819
132M
            if (distance_symbol < 16) {
820
113M
                switch (distance_symbol) {
821
73.6M
                case 0:
822
73.6M
                    distance = m_distances[0];
823
73.6M
                    reuse_previous_distance = true;
824
73.6M
                    break;
825
10.6M
                case 1:
826
10.6M
                    distance = m_distances[1];
827
10.6M
                    break;
828
869k
                case 2:
829
869k
                    distance = m_distances[2];
830
869k
                    break;
831
899k
                case 3:
832
899k
                    distance = m_distances[3];
833
899k
                    break;
834
6.89k
                case 4:
835
6.89k
                    distance = m_distances[0] - 1;
836
6.89k
                    break;
837
10.6M
                case 5:
838
10.6M
                    distance = m_distances[0] + 1;
839
10.6M
                    break;
840
30.7k
                case 6:
841
30.7k
                    distance = m_distances[0] - 2;
842
30.7k
                    break;
843
859k
                case 7:
844
859k
                    distance = m_distances[0] + 2;
845
859k
                    break;
846
10.5k
                case 8:
847
10.5k
                    distance = m_distances[0] - 3;
848
10.5k
                    break;
849
1.03M
                case 9:
850
1.03M
                    distance = m_distances[0] + 3;
851
1.03M
                    break;
852
32.0k
                case 10:
853
32.0k
                    distance = m_distances[1] - 1;
854
32.0k
                    break;
855
1.13M
                case 11:
856
1.13M
                    distance = m_distances[1] + 1;
857
1.13M
                    break;
858
3.13k
                case 12:
859
3.13k
                    distance = m_distances[1] - 2;
860
3.13k
                    break;
861
12.7M
                case 13:
862
12.7M
                    distance = m_distances[1] + 2;
863
12.7M
                    break;
864
6.63k
                case 14:
865
6.63k
                    distance = m_distances[1] - 3;
866
6.63k
                    break;
867
941k
                case 15:
868
941k
                    distance = m_distances[1] + 3;
869
941k
                    break;
870
113M
                }
871
113M
            } else if (distance_symbol < 16 + m_direct_distances) {
872
17.6M
                distance = distance_symbol - 15;
873
17.6M
            } else {
874
1.74M
                size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1;
875
876
1.74M
                size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1));
877
1.74M
                size_t dextra = TRY(m_input_stream.read_bits(ndistbits));
878
879
1.74M
                size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits;
880
1.74M
                size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK;
881
1.74M
                size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4;
882
1.74M
                distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1;
883
1.74M
            }
884
132M
            m_distance = distance;
885
886
132M
            size_t total_written = m_lookback_buffer.value().total_written();
887
132M
            size_t max_lookback = min(total_written, m_window_size);
888
889
132M
            if (distance > max_lookback) {
890
24.0M
                size_t word_index = distance - (max_lookback + 1);
891
24.0M
                m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length));
892
24.0M
                m_copy_length = m_dictionary_data.size();
893
894
24.0M
                if (m_copy_length == 0)
895
2.06k
                    m_current_state = State::CompressedCommand;
896
24.0M
                else
897
24.0M
                    m_current_state = State::CompressedDictionary;
898
108M
            } else {
899
108M
                if (!reuse_previous_distance) {
900
35.1M
                    m_distances[3] = m_distances[2];
901
35.1M
                    m_distances[2] = m_distances[1];
902
35.1M
                    m_distances[1] = m_distances[0];
903
35.1M
                    m_distances[0] = distance;
904
35.1M
                }
905
906
108M
                m_current_state = State::CompressedCopy;
907
108M
            }
908
1.00G
        } else if (m_current_state == State::CompressedCopy) {
909
866M
            u8 copy_value = m_lookback_buffer.value().lookback(m_distance);
910
911
866M
            output_buffer[bytes_read] = copy_value;
912
866M
            m_lookback_buffer.value().write(copy_value);
913
866M
            bytes_read++;
914
866M
            m_copy_length--;
915
866M
            m_bytes_left--;
916
917
866M
            if (m_bytes_left == 0)
918
7.46k
                m_current_state = State::Idle;
919
866M
            else if (m_copy_length == 0)
920
108M
                m_current_state = State::CompressedCommand;
921
866M
        } else if (m_current_state == State::CompressedDictionary) {
922
134M
            size_t offset = m_dictionary_data.size() - m_copy_length;
923
134M
            u8 dictionary_value = m_dictionary_data[offset];
924
925
134M
            output_buffer[bytes_read] = dictionary_value;
926
134M
            m_lookback_buffer.value().write(dictionary_value);
927
134M
            bytes_read++;
928
134M
            m_copy_length--;
929
134M
            m_bytes_left--;
930
931
134M
            if (m_bytes_left == 0)
932
1.26k
                m_current_state = State::Idle;
933
134M
            else if (m_copy_length == 0)
934
24.0M
                m_current_state = State::CompressedCommand;
935
134M
        }
936
1.67G
    }
937
938
345k
    return output_buffer.slice(0, bytes_read);
939
349k
}
940
941
bool BrotliDecompressionStream::is_eof() const
942
351k
{
943
351k
    return m_read_final_block && m_current_state == State::Idle;
944
351k
}
945
946
}