Coverage Report

Created: 2026-02-14 08:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibGfx/ImageFormats/JBIG2Loader.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2024-2025, Nico Weber <thakis@chromium.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/BitStream.h>
8
#include <AK/Debug.h>
9
#include <AK/Enumerate.h>
10
#include <AK/GenericShorthands.h>
11
#include <AK/IntegralMath.h>
12
#include <AK/Utf16View.h>
13
#include <LibGfx/ImageFormats/BilevelImage.h>
14
#include <LibGfx/ImageFormats/CCITTDecoder.h>
15
#include <LibGfx/ImageFormats/JBIG2Loader.h>
16
#include <LibGfx/ImageFormats/JBIG2Shared.h>
17
#include <LibGfx/ImageFormats/MQArithmeticCoder.h>
18
#include <LibTextCodec/Decoder.h>
19
20
// Spec: ITU-T_T_88__08_2018.pdf in the zip file here:
21
// https://www.itu.int/rec/T-REC-T.88-201808-I
22
// Annex H has a datastream example.
23
24
// That spec was published in 2018 and contains all previous amendments. Its history is:
25
// * 2002: Original spec published, describes decoding only. Has generic regions,
26
//         symbol regions, text regions, halftone regions, and pattern regions.
27
// * 2003: Amendment 1 approved. Describes encoding. Not interesting for us.
28
//   * 2004: (Amendment 1 erratum 1 approved. Not interesting for us.)
29
// * 2003: Amendment 2 approved. Added support for EXTTEMPLATE.
30
// * 2011: Amendment 3 approved. Added support for color coding
31
//         (COLEXTFLAG, CPCOMPLEN, CPDEFCOLS, CPEXCOLS, CPNCOMP, CPNVALS, GBCOLS,
32
//         GBCOMBOP, GBFGCOLID, SBCOLS, SBCOLSECTSIZE and SBFGCOLID).
33
// This history might explain why EXTTEMPLATE and colors are very rare in practice.
34
35
namespace Gfx {
36
37
namespace JBIG2 {
38
39
ArithmeticIntegerDecoder::ArithmeticIntegerDecoder()
40
40
{
41
40
    contexts.resize(1 << 9);
42
40
}
43
44
Optional<int> ArithmeticIntegerDecoder::decode(MQArithmeticDecoder& decoder)
45
4
{
46
    // A.2 Procedure for decoding values (except IAID)
47
    // "1) Set:
48
    //    PREV = 1"
49
4
    u16 PREV = 1;
50
51
    // "2) Follow the flowchart in Figure A.1. Decode each bit with CX equal to "IAx + PREV" where "IAx" represents the identifier
52
    //     of the current arithmetic integer decoding procedure, "+" represents concatenation, and the rightmost 9 bits of PREV are used."
53
118
    auto decode_bit = [&]() {
54
118
        bool D = decoder.get_next_bit(contexts[PREV & 0x1FF]);
55
        // "3) After each bit is decoded:
56
        //     If PREV < 256 set:
57
        //         PREV = (PREV << 1) OR D
58
        //     Otherwise set:
59
        //         PREV = (((PREV << 1) OR D) AND 511) OR 256
60
        //     where D represents the value of the just-decoded bit.
61
118
        if (PREV < 256)
62
28
            PREV = (PREV << 1) | (u16)D;
63
90
        else
64
90
            PREV = (((PREV << 1) | (u16)D) & 511) | 256;
65
118
        return D;
66
118
    };
67
68
4
    auto decode_bits = [&](int n) {
69
4
        u32 result = 0;
70
102
        for (int i = 0; i < n; ++i)
71
98
            result = (result << 1) | decode_bit();
72
4
        return result;
73
4
    };
74
75
    // Figure A.1 – Flowchart for the integer arithmetic decoding procedures (except IAID)
76
4
    u8 S = decode_bit();
77
4
    u32 V;
78
4
    if (!decode_bit())
79
1
        V = decode_bits(2);
80
3
    else if (!decode_bit())
81
0
        V = decode_bits(4) + 4;
82
3
    else if (!decode_bit())
83
0
        V = decode_bits(6) + 20;
84
3
    else if (!decode_bit())
85
0
        V = decode_bits(8) + 84;
86
3
    else if (!decode_bit())
87
0
        V = decode_bits(12) + 340;
88
3
    else
89
3
        V = decode_bits(32) + 4436;
90
91
    // "4) The sequence of bits decoded, interpreted according to Table A.1, gives the value that is the result of this invocation
92
    //     of the integer arithmetic decoding procedure."
93
4
    if (S == 1 && V == 0)
94
1
        return {};
95
3
    return S ? -V : V;
96
4
}
97
98
ErrorOr<i32> ArithmeticIntegerDecoder::decode_non_oob(MQArithmeticDecoder& decoder)
99
4
{
100
4
    auto result = decode(decoder);
101
4
    if (!result.has_value())
102
1
        return Error::from_string_literal("ArithmeticIntegerDecoder: Unexpected OOB");
103
3
    return result.value();
104
4
}
105
106
ArithmeticIntegerIDDecoder::ArithmeticIntegerIDDecoder(u32 code_length)
107
4
    : m_code_length(code_length)
108
4
{
109
4
    contexts.resize(1 << (code_length + 1));
110
4
}
111
112
u32 ArithmeticIntegerIDDecoder::decode(MQArithmeticDecoder& decoder)
113
0
{
114
    // A.3 The IAID decoding procedure
115
0
    u32 prev = 1;
116
0
    for (u8 i = 0; i < m_code_length; ++i) {
117
0
        bool bit = decoder.get_next_bit(contexts[prev]);
118
0
        prev = (prev << 1) | bit;
119
0
    }
120
0
    prev = prev - (1 << m_code_length);
121
0
    return prev;
122
0
}
123
124
}
125
126
struct SegmentData {
127
    SegmentData(JBIG2::SegmentHeader header, ReadonlyBytes data)
128
308k
        : header(header)
129
308k
        , data(data)
130
308k
    {
131
308k
    }
132
133
    JBIG2::SegmentHeader header;
134
    ReadonlyBytes data;
135
136
4.82k
    auto type() const { return header.type; }
137
138
    // Valid after complete_decoding_all_segment_headers().
139
    Vector<SegmentData*> referred_to_segments;
140
141
    // Set on dictionary segments after they've been decoded.
142
    Optional<Vector<BilevelSubImage>> symbols;
143
144
    struct BitmapCodingContextState {
145
        Optional<JBIG2::GenericContexts> generic_contexts;
146
        Optional<JBIG2::RefinementContexts> refinement_contexts;
147
        bool used_huffman_encoding { false };
148
        bool used_refinement_or_aggregate_coding { false };
149
        u8 symbol_template { 0 };
150
        u8 refinement_template { 0 };
151
        Array<JBIG2::AdaptiveTemplatePixel, 4> adaptive_template_pixels {};
152
        Array<JBIG2::AdaptiveTemplatePixel, 2> refinement_adaptive_template_pixels {};
153
    };
154
    Optional<BitmapCodingContextState> retained_bitmap_coding_contexts; // Only set on dictionary segments with bitmap_coding_context_retained set.
155
156
    // Set on pattern segments after they've been decoded.
157
    Optional<Vector<BilevelSubImage>> patterns;
158
159
    // Set on code table segments after they've been decoded.
160
    Optional<Vector<JBIG2::Code>> codes;
161
    Optional<JBIG2::HuffmanTable> huffman_table;
162
163
    // Set on intermediate region segments after they've been decoded.
164
    RefPtr<BilevelImage> aux_buffer;
165
    JBIG2::RegionSegmentInformationField aux_buffer_information_field;
166
};
167
168
struct Page {
169
    IntSize size;
170
171
    // This is never CombinationOperator::Replace for Pages.
172
    JBIG2::CombinationOperator default_combination_operator { JBIG2::CombinationOperator::Or };
173
174
    bool direct_region_segments_override_default_combination_operator { false };
175
176
    RefPtr<BilevelImage> bits;
177
};
178
179
struct JBIG2LoadingContext {
180
    JBIG2DecoderOptions options;
181
182
    enum class State {
183
        NotDecoded = 0,
184
        Error,
185
        Decoded,
186
    };
187
    State state { State::NotDecoded };
188
189
    JBIG2::Organization organization { JBIG2::Organization::Sequential };
190
    Page page;
191
    u32 current_page_number { 1 };
192
193
    Optional<u32> number_of_pages;
194
    Vector<u32> page_numbers;
195
196
    Vector<SegmentData> segments;
197
198
    // Files from the Power JBIG2 tests have a few quirks.
199
    // Since they're useful for coverage, detect these files and be more lenient.
200
    bool allow_power_jbig2_quirks { false };
201
};
202
203
static ErrorOr<void> decode_jbig2_header(JBIG2LoadingContext& context, ReadonlyBytes data)
204
417
{
205
417
    if (!JBIG2ImageDecoderPlugin::sniff(data))
206
4
        return Error::from_string_literal("JBIG2LoadingContext: Invalid JBIG2 header");
207
208
413
    FixedMemoryStream stream(data.slice(sizeof(JBIG2::id_string)));
209
210
    // D.4.2 File header flags
211
413
    u8 header_flags = TRY(stream.read_value<u8>());
212
413
    if (header_flags & 0b11110000)
213
1
        return Error::from_string_literal("JBIG2LoadingContext: Invalid header flags");
214
412
    context.organization = (header_flags & 1) ? JBIG2::Organization::Sequential : JBIG2::Organization::RandomAccess;
215
412
    dbgln_if(JBIG2_DEBUG, "JBIG2 Header: Organization: {} ({})", (int)context.organization, context.organization == JBIG2::Organization::Sequential ? "Sequential" : "Random-access");
216
412
    bool has_known_number_of_pages = (header_flags & 2) ? false : true;
217
412
    bool uses_templates_with_12_AT_pixels = (header_flags & 4) ? true : false;
218
412
    bool contains_colored_region_segments = (header_flags & 8) ? true : false;
219
220
412
    dbgln_if(JBIG2_DEBUG, "    has_known_number_of_pages={}", has_known_number_of_pages);
221
412
    dbgln_if(JBIG2_DEBUG, "    uses_templates_with_12_AT_pixels={}", uses_templates_with_12_AT_pixels);
222
412
    dbgln_if(JBIG2_DEBUG, "    contains_colored_region_segments={}", contains_colored_region_segments);
223
224
    // D.4.3 Number of pages
225
412
    if (has_known_number_of_pages) {
226
121
        context.number_of_pages = TRY(stream.read_value<BigEndian<u32>>());
227
121
        dbgln_if(JBIG2_DEBUG, "    number of pages: {}", context.number_of_pages.value());
228
121
    }
229
230
412
    dbgln_if(JBIG2_DEBUG, "");
231
232
412
    return {};
233
412
}
234
235
static ErrorOr<JBIG2::SegmentType> to_segment_type(u8 type_int)
236
790k
{
237
790k
    auto type = static_cast<JBIG2::SegmentType>(type_int);
238
790k
    switch (type) {
239
785k
    case JBIG2::SegmentType::SymbolDictionary:
240
786k
    case JBIG2::SegmentType::IntermediateTextRegion:
241
786k
    case JBIG2::SegmentType::ImmediateTextRegion:
242
786k
    case JBIG2::SegmentType::ImmediateLosslessTextRegion:
243
787k
    case JBIG2::SegmentType::PatternDictionary:
244
787k
    case JBIG2::SegmentType::IntermediateHalftoneRegion:
245
787k
    case JBIG2::SegmentType::ImmediateHalftoneRegion:
246
787k
    case JBIG2::SegmentType::ImmediateLosslessHalftoneRegion:
247
787k
    case JBIG2::SegmentType::IntermediateGenericRegion:
248
788k
    case JBIG2::SegmentType::ImmediateGenericRegion:
249
788k
    case JBIG2::SegmentType::ImmediateLosslessGenericRegion:
250
788k
    case JBIG2::SegmentType::IntermediateGenericRefinementRegion:
251
788k
    case JBIG2::SegmentType::ImmediateGenericRefinementRegion:
252
789k
    case JBIG2::SegmentType::ImmediateLosslessGenericRefinementRegion:
253
789k
    case JBIG2::SegmentType::PageInformation:
254
789k
    case JBIG2::SegmentType::EndOfPage:
255
790k
    case JBIG2::SegmentType::EndOfStripe:
256
790k
    case JBIG2::SegmentType::EndOfFile:
257
790k
    case JBIG2::SegmentType::Profiles:
258
790k
    case JBIG2::SegmentType::Tables:
259
790k
    case JBIG2::SegmentType::ColorPalette:
260
790k
    case JBIG2::SegmentType::Extension:
261
790k
        return type;
262
790k
    }
263
31
    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid segment type");
264
790k
}
265
266
static ErrorOr<JBIG2::SegmentHeader> decode_segment_header(SeekableStream& stream)
267
790k
{
268
    // 7.2.2 Segment number
269
790k
    u32 segment_number = TRY(stream.read_value<BigEndian<u32>>());
270
790k
    dbgln_if(JBIG2_DEBUG, "Segment number: {}", segment_number);
271
272
    // 7.2.3 Segment header flags
273
790k
    u8 flags = TRY(stream.read_value<u8>());
274
790k
    JBIG2::SegmentType type = TRY(to_segment_type(flags & 0b11'1111));
275
790k
    dbgln_if(JBIG2_DEBUG, "Segment type: {}", (int)type);
276
790k
    bool segment_page_association_size_is_32_bits = (flags & 0b100'0000) != 0;
277
790k
    bool segment_retained_only_by_itself_and_extension_segments = (flags & 0b1000'00000) != 0;
278
279
790k
    dbgln_if(JBIG2_DEBUG, "Page association size is 32 bits: {}", segment_page_association_size_is_32_bits);
280
790k
    dbgln_if(JBIG2_DEBUG, "Page retained only by itself and extension segments: {}", segment_retained_only_by_itself_and_extension_segments);
281
282
    // 7.2.4 Referred-to segment count and retention flags
283
790k
    u8 referred_to_segment_count_and_retention_flags = TRY(stream.read_value<u8>());
284
790k
    u32 count_of_referred_to_segments = referred_to_segment_count_and_retention_flags >> 5;
285
790k
    if (count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6)
286
8
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid count_of_referred_to_segments");
287
288
790k
    bool retention_flag = false;
289
790k
    Vector<bool> referred_to_segment_retention_flags;
290
790k
    if (count_of_referred_to_segments == 7) {
291
980
        TRY(stream.seek(-1, SeekMode::FromCurrentPosition));
292
980
        count_of_referred_to_segments = TRY(stream.read_value<BigEndian<u32>>()) & 0x1FFF'FFFF;
293
294
978
        auto retention_flags_offset = TRY(stream.tell());
295
296
978
        LittleEndianInputBitStream bit_stream { MaybeOwned { stream } };
297
978
        u32 bit_count = ceil_div(count_of_referred_to_segments + 1, 8) * 8;
298
978
        retention_flag = TRY(bit_stream.read_bit());
299
43.7M
        for (u32 i = 1; i < count_of_referred_to_segments + 1; ++i)
300
43.7M
            referred_to_segment_retention_flags.append(TRY(bit_stream.read_bit()));
301
5.80k
        for (u32 i = count_of_referred_to_segments + 1; i < bit_count; ++i) {
302
4.91k
            if (TRY(bit_stream.read_bit()))
303
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid referred-to segment retention flag");
304
4.91k
        }
305
306
        // LittleEndianInputBitStream peeks ahead, which means it over-reads the underlying stream.
307
        // Seek back to past the retention flag bits.
308
1.77k
        TRY(stream.seek(retention_flags_offset + bit_count / 8, SeekMode::SetPosition));
309
789k
    } else {
310
789k
        retention_flag = referred_to_segment_count_and_retention_flags & 1;
311
793k
        for (u32 i = 1; i < count_of_referred_to_segments + 1; ++i)
312
4.39k
            referred_to_segment_retention_flags.append((referred_to_segment_count_and_retention_flags >> i) & 1);
313
3.94M
        for (u32 i = count_of_referred_to_segments + 1; i < 5; ++i) {
314
3.15M
            if ((referred_to_segment_count_and_retention_flags >> i) & 1)
315
15
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid referred-to segment retention flag");
316
3.15M
        }
317
789k
    }
318
790k
    dbgln_if(JBIG2_DEBUG, "Retained: {}", retention_flag);
319
790k
    dbgln_if(JBIG2_DEBUG, "Referred-to segment count: {}", count_of_referred_to_segments);
320
321
    // 7.2.5 Referred-to segment numbers
322
790k
    Vector<u32> referred_to_segment_numbers;
323
1.31M
    for (u32 i = 0; i < count_of_referred_to_segments; ++i) {
324
521k
        u32 referred_to_segment_number;
325
521k
        if (segment_number <= 256)
326
298k
            referred_to_segment_number = TRY(stream.read_value<u8>());
327
222k
        else if (segment_number <= 65536)
328
218k
            referred_to_segment_number = TRY(stream.read_value<BigEndian<u16>>());
329
4.18k
        else
330
4.18k
            referred_to_segment_number = TRY(stream.read_value<BigEndian<u32>>());
331
332
        // "If a segment refers to other segments, it must refer to only segments with lower segment numbers."
333
521k
        if (referred_to_segment_number >= segment_number)
334
12
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Referred-to segment number too large");
335
336
521k
        referred_to_segment_numbers.append(referred_to_segment_number);
337
521k
        dbgln_if(JBIG2_DEBUG, "Referred-to segment number: {}, retained {}", referred_to_segment_number, referred_to_segment_retention_flags[i]);
338
521k
    }
339
340
    // 7.2.6 Segment page association
341
790k
    u32 segment_page_association;
342
790k
    if (segment_page_association_size_is_32_bits) {
343
914
        segment_page_association = TRY(stream.read_value<BigEndian<u32>>());
344
789k
    } else {
345
789k
        segment_page_association = TRY(stream.read_value<u8>());
346
789k
    }
347
790k
    dbgln_if(JBIG2_DEBUG, "Segment page association: {}", segment_page_association);
348
349
    // 7.2.7 Segment data length
350
790k
    u32 data_length = TRY(stream.read_value<BigEndian<u32>>());
351
790k
    dbgln_if(JBIG2_DEBUG, "Segment data length: {}", data_length);
352
353
790k
    Optional<u32> opt_data_length;
354
790k
    if (data_length != 0xffff'ffff)
355
789k
        opt_data_length = data_length;
356
512
    else if (type != JBIG2::SegmentType::ImmediateGenericRegion)
357
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unknown data length only allowed for ImmediateGenericRegion");
358
359
790k
    dbgln_if(JBIG2_DEBUG, "");
360
361
790k
    return JBIG2::SegmentHeader { segment_number, type, retention_flag, move(referred_to_segment_numbers), move(referred_to_segment_retention_flags), segment_page_association, opt_data_length };
362
790k
}
363
364
static ErrorOr<size_t> scan_for_immediate_generic_region_size(ReadonlyBytes data)
365
420
{
366
    // 7.2.7 Segment data length
367
    // "If the segment's type is "Immediate generic region", then the length field may contain the value 0xFFFFFFFF.
368
    //  This value is intended to mean that the length of the segment's data part is unknown at the time that the segment header is written (...).
369
    //  In this case, the true length of the segment's data part shall be determined through examination of the data:
370
    //  if the segment uses template-based arithmetic coding, then the segment's data part ends with the two-byte sequence 0xFF 0xAC followed by a four-byte row count.
371
    //  If the segment uses MMR coding, then the segment's data part ends with the two-byte sequence 0x00 0x00 followed by a four-byte row count.
372
    //  The form of encoding used by the segment may be determined by examining the eighteenth byte of its segment data part,
373
    //  and the end sequences can occur anywhere after that eighteenth byte."
374
    // 7.4.6.4 Decoding a generic region segment
375
    // "NOTE – The sequence 0x00 0x00 cannot occur within MMR-encoded data; the sequence 0xFF 0xAC can occur only at the end of arithmetically-coded data.
376
    //  Thus, those sequences cannot occur by chance in the data that is decoded to generate the contents of the generic region."
377
420
    dbgln_if(JBIG2_DEBUG, "(Unknown data length, computing it)");
378
379
420
    if (data.size() < 19 + sizeof(u32))
380
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Data too short to contain segment data header and end sequence");
381
382
    // Per 7.4.6.1 Generic region segment data header, this starts with the 17 bytes described in
383
    // 7.4.1 Region segment information field, followed the byte described in 7.4.6.2 Generic region segment flags.
384
    // That byte's lowest bit stores if the segment uses MMR.
385
420
    u8 flags = data[17];
386
420
    bool uses_mmr = (flags & 1) != 0;
387
420
    auto end_sequence = uses_mmr ? to_array<u8>({ 0x00, 0x00 }) : to_array<u8>({ 0xFF, 0xAC });
388
420
    u8 const* end = static_cast<u8 const*>(memmem(data.data() + 19, data.size() - 19 - sizeof(u32), end_sequence.data(), end_sequence.size()));
389
420
    if (!end)
390
8
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Could not find end sequence in segment data");
391
392
412
    size_t size = end - data.data() + end_sequence.size() + sizeof(u32);
393
412
    dbgln_if(JBIG2_DEBUG, "(Computed size is {})", size);
394
412
    return size;
395
420
}
396
397
static void identify_power_jbig2_files(JBIG2LoadingContext& context)
398
51
{
399
51
    if (context.options.strictness == JBIG2DecoderOptions::Strictness::SpecCompliant)
400
0
        return;
401
402
444
    for (auto const& segment : context.segments) {
403
444
        auto signature_data_1 = "\x20\0\0\0"
404
444
                                "Source\0"
405
444
                                "Power JBIG-2 Encoder - The University of British Columba and Image Power Inc.\0"
406
444
                                "Version\0"
407
444
                                "1.0.0\0"
408
444
                                "\0"sv;
409
444
        auto signature_data_2 = "\x20\0\0\0"
410
444
                                "Source\0"
411
444
                                "Power JBIG-2 Encoder - The University of British Columbia and Image Power Inc.\0"
412
444
                                "Version\0"
413
444
                                "1.0.0\0"
414
444
                                "\0"sv;
415
444
        if (segment.type() == JBIG2::SegmentType::Extension && (segment.data == signature_data_1.bytes() || segment.data == signature_data_2.bytes())) {
416
0
            context.allow_power_jbig2_quirks = true;
417
0
            return;
418
0
        }
419
444
    }
420
51
}
421
422
static ErrorOr<void> validate_segment_order(JBIG2LoadingContext const& context)
423
51
{
424
    // 7.1 General description
425
    // "In the sequential and random-access organizations (see D.1 and D.2), the segments must appear in the file in increasing order
426
    //  of their segment numbers. However, in the embedded organization (see D.3), this is not the case"
427
    // "NOTE – It is possible for there to be gaps in the segment numbering"
428
51
    if (context.organization == JBIG2::Organization::Embedded)
429
0
        return {};
430
431
274
    for (size_t i = 1; i < context.segments.size(); ++i)
432
238
        if (context.segments[i - 1].header.segment_number > context.segments[i].header.segment_number)
433
15
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segments out of order");
434
435
36
    return {};
436
51
}
437
438
static ErrorOr<void> validate_segment_header_retention_flags(JBIG2LoadingContext const& context)
439
36
{
440
    // "If the retain bit for this segment value is 0, then no segment may refer to this segment.
441
    //  If the retain bit for the first referred-to segment value is 0, then no segment after this one may refer to the first segment
442
    //  that this segment refers to (i.e., this segment is the last segment that refers to that other segment)"
443
36
    HashTable<int> dead_segments;
444
445
248
    for (auto const& segment : context.segments) {
446
248
        auto const& header = segment.header;
447
448
248
        if (header.retention_flag) {
449
            // Guaranteed because decode_segment_header() guarantees referred_to_segment_numbers are larger than segment_number.
450
38
            VERIFY(!dead_segments.contains(header.segment_number));
451
210
        } else {
452
210
            if (dead_segments.set(header.segment_number) != HashSetResult::InsertedNewEntry)
453
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid segment retention flags");
454
210
        }
455
456
248
        for (auto const& [i, referred_to_segment] : enumerate(segment.referred_to_segments)) {
457
2
            bool allow_reference_to_dead_segment_quirk = false;
458
2
            if (context.allow_power_jbig2_quirks
459
0
                && referred_to_segment->type() == JBIG2::SegmentType::PatternDictionary) {
460
                // Quirk: t89-halftone/*-stripe.jb2 have one PatternDictionary and then one ImmediateHalftoneRegion per stripe,
461
                // but each ImmediateHalftoneRegion (incorrectly?) sets the retention flag for the PatternDictionary to 0.
462
0
                allow_reference_to_dead_segment_quirk = true;
463
0
            }
464
2
            if (context.options.strictness == JBIG2DecoderOptions::Strictness::Permissive
465
2
                && referred_to_segment->type() == JBIG2::SegmentType::SymbolDictionary) {
466
                // Quirk: jbig2enc (used e.g. by Google Books) sometimes generates SymbolDictionary segments that do not
467
                // have their retention flag set, https://github.com/agl/jbig2enc/issues/121.
468
2
                allow_reference_to_dead_segment_quirk = true;
469
2
            }
470
2
            if (dead_segments.contains(referred_to_segment->header.segment_number) && !allow_reference_to_dead_segment_quirk)
471
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment refers to dead segment");
472
473
2
            auto const referred_to_segment_retention_flag = header.referred_to_segment_retention_flags[i];
474
2
            if (referred_to_segment_retention_flag) {
475
0
                if (dead_segments.contains(referred_to_segment->header.segment_number))
476
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment retention flags tried to revive dead segment");
477
2
            } else {
478
2
                dead_segments.set(referred_to_segment->header.segment_number);
479
2
            }
480
2
        }
481
248
    }
482
483
    // It is not true that all segments are marked as dead at the end of the file.
484
485
36
    return {};
486
36
}
487
488
static bool is_region_segment(JBIG2::SegmentType type)
489
223
{
490
    // 7.3 Segment types
491
    // "The segments of types "intermediate text region", "immediate text region", "immediate lossless text region",
492
    //  "intermediate halftone region", "immediate halftone region", "immediate lossless halftone region", "intermediate
493
    //  generic region", "immediate generic region" , "immediate lossless generic region", "intermediate generic refinement
494
    //  region", "immediate generic refinement region", and "immediate lossless generic refinement region" are collectively
495
    //  referred to as "region segments"."
496
223
    switch (type) {
497
1
    case JBIG2::SegmentType::IntermediateTextRegion:
498
23
    case JBIG2::SegmentType::ImmediateTextRegion:
499
25
    case JBIG2::SegmentType::ImmediateLosslessTextRegion:
500
25
    case JBIG2::SegmentType::IntermediateHalftoneRegion:
501
25
    case JBIG2::SegmentType::ImmediateHalftoneRegion:
502
25
    case JBIG2::SegmentType::ImmediateLosslessHalftoneRegion:
503
25
    case JBIG2::SegmentType::IntermediateGenericRegion:
504
25
    case JBIG2::SegmentType::ImmediateGenericRegion:
505
25
    case JBIG2::SegmentType::ImmediateLosslessGenericRegion:
506
25
    case JBIG2::SegmentType::IntermediateGenericRefinementRegion:
507
25
    case JBIG2::SegmentType::ImmediateGenericRefinementRegion:
508
25
    case JBIG2::SegmentType::ImmediateLosslessGenericRefinementRegion:
509
25
        return true;
510
198
    default:
511
198
        return false;
512
223
    }
513
223
}
514
515
static bool is_intermediate_region_segment(JBIG2::SegmentType type)
516
2
{
517
2
    switch (type) {
518
0
    case JBIG2::SegmentType::IntermediateTextRegion:
519
0
    case JBIG2::SegmentType::IntermediateHalftoneRegion:
520
0
    case JBIG2::SegmentType::IntermediateGenericRegion:
521
0
    case JBIG2::SegmentType::IntermediateGenericRefinementRegion:
522
0
        return true;
523
2
    default:
524
2
        return false;
525
2
    }
526
2
}
527
528
static ErrorOr<void> validate_segment_header_references(JBIG2LoadingContext const& context)
529
36
{
530
    // 7.3.1 Rules for segment references
531
532
36
    HashMap<u32, u32> intermediate_region_segment_references;
533
248
    for (auto const& segment : context.segments) {
534
        // "• An intermediate region segment may only be referred to by one other non-extension segment; it may be
535
        //    referred to by any number of extension segments."
536
248
        for (auto const* referred_to_segment : segment.referred_to_segments) {
537
2
            if (!is_intermediate_region_segment(referred_to_segment->type()) || segment.type() == JBIG2::SegmentType::Extension)
538
2
                continue;
539
0
            if (intermediate_region_segment_references.set(referred_to_segment->header.segment_number, segment.header.segment_number) != HashSetResult::InsertedNewEntry)
540
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Intermediate region segment referred to by multiple non-extension segments");
541
0
        }
542
543
        // "• A segment of type "symbol dictionary" (type 0) may refer to any number of segments of type "symbol
544
        //    dictionary" and to up to four segments of type "tables"."
545
248
        if (segment.type() == JBIG2::SegmentType::SymbolDictionary) {
546
172
            u32 table_count = 0;
547
172
            for (auto const* referred_to_segment : segment.referred_to_segments) {
548
2
                if (!first_is_one_of(referred_to_segment->type(), JBIG2::SegmentType::SymbolDictionary, JBIG2::SegmentType::Tables))
549
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Symbol dictionary segment refers to invalid segment type");
550
2
                if (referred_to_segment->type() == JBIG2::SegmentType::Tables)
551
0
                    table_count++;
552
2
            }
553
172
            if (table_count > 4)
554
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Symbol dictionary segment refers to too many tables segments");
555
172
        }
556
557
        // "• A segment of type "intermediate text region", "immediate text region" or "immediate lossless text
558
        //    region" (type 4, 6 or 7) may refer to any number of segments of type "symbol dictionary" and to up to
559
        //    eight segments of type "tables". Additionally, it may refer to any number of segments of type "colour
560
        //    palette segment", if it has COLEXTFLAG = 1 in its region segment flags."
561
        // Note: decode_region_segment_information_field() currently rejects COLEXTFLAG = 1, so that part is not implemented.
562
248
        if (first_is_one_of(segment.type(),
563
248
                JBIG2::SegmentType::IntermediateTextRegion,
564
248
                JBIG2::SegmentType::ImmediateTextRegion,
565
248
                JBIG2::SegmentType::ImmediateLosslessTextRegion)) {
566
28
            u32 table_count = 0;
567
28
            for (auto const* referred_to_segment : segment.referred_to_segments) {
568
0
                if (!first_is_one_of(referred_to_segment->type(), JBIG2::SegmentType::SymbolDictionary,
569
0
                        JBIG2::SegmentType::Tables))
570
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Text region segment refers to invalid segment type");
571
0
                if (referred_to_segment->type() == JBIG2::SegmentType::Tables)
572
0
                    table_count++;
573
0
            }
574
28
            if (table_count > 8)
575
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Text region segment refers to too many tables segments");
576
28
        }
577
578
        // "• A segment of type "pattern dictionary" (type 16) must not refer to any other segment."
579
248
        if (segment.type() == JBIG2::SegmentType::PatternDictionary && !segment.header.referred_to_segment_numbers.is_empty())
580
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Pattern dictionary segment refers to other segments");
581
582
        // "• A segment of type "intermediate halftone region", "immediate halftone region" or "immediate lossless
583
        //    halftone region" (type 20, 22 or 23) must refer to exactly one segment, and this segment must be of type
584
        //    "pattern dictionary"."
585
248
        if (first_is_one_of(segment.type(),
586
248
                JBIG2::SegmentType::IntermediateHalftoneRegion,
587
248
                JBIG2::SegmentType::ImmediateHalftoneRegion,
588
248
                JBIG2::SegmentType::ImmediateLosslessHalftoneRegion)) {
589
0
            if (segment.referred_to_segments.size() != 1)
590
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Halftone region segment must refer to exactly one pattern dictionary segment");
591
0
            if (segment.referred_to_segments[0]->type() != JBIG2::SegmentType::PatternDictionary)
592
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Halftone region segment refers to non-pattern dictionary segment");
593
0
        }
594
595
        // "• A segment of type "intermediate generic region", "immediate generic region" or "immediate lossless
596
        //    generic region" (type 36, 38 or 39) must not refer to any other segment. If it has COLEXTFLAG = 1 in
597
        //    its region segment flags, however, it may refer to any number of segments of the type "colour palette
598
        //   segment"."
599
        // Note: decode_region_segment_information_field() currently rejects COLEXTFLAG = 1, so that part is not implemented.
600
248
        if (first_is_one_of(segment.type(),
601
248
                JBIG2::SegmentType::IntermediateGenericRegion,
602
248
                JBIG2::SegmentType::ImmediateGenericRegion,
603
248
                JBIG2::SegmentType::ImmediateLosslessGenericRegion)
604
0
            && !segment.header.referred_to_segment_numbers.is_empty()) {
605
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Generic region segment refers to other segments");
606
0
        }
607
608
        // "• A segment of type "intermediate generic refinement region" (type 40) must refer to exactly one other
609
        //    segment. This other segment must be an intermediate region segment."
610
248
        if (segment.type() == JBIG2::SegmentType::IntermediateGenericRefinementRegion) {
611
0
            if (segment.referred_to_segments.size() != 1)
612
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Intermediate generic refinement region must refer to exactly one segment");
613
0
            if (!is_intermediate_region_segment(segment.referred_to_segments[0]->type()))
614
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Intermediate generic refinement region does not refer to intermediate region segment");
615
0
        }
616
617
        // "• A segment of type "immediate generic refinement region" or "immediate lossless generic refinement
618
        //    region" (type 42 or 43) may refer to either zero other segments or exactly one other segment. If it refers
619
        //    to one other segment then that segment must be an intermediate region segment."
620
248
        if (first_is_one_of(segment.type(),
621
248
                JBIG2::SegmentType::ImmediateGenericRefinementRegion,
622
248
                JBIG2::SegmentType::ImmediateLosslessGenericRefinementRegion)) {
623
2
            if (segment.referred_to_segments.size() > 1)
624
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Immediate generic refinement region must refer to zero or one segment");
625
2
            if (segment.referred_to_segments.size() == 1 && !is_intermediate_region_segment(segment.referred_to_segments[0]->type()))
626
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Immediate generic refinement region does not refer to intermediate region segment");
627
2
        }
628
629
        // "• A segment of type "page information" (type 48) must not refer to any other segments."
630
248
        if (segment.type() == JBIG2::SegmentType::PageInformation && !segment.header.referred_to_segment_numbers.is_empty())
631
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Page information segment refers to other segments");
632
633
        // "• A segment of type "end of page" (type 49) must not refer to any other segments."
634
248
        if (segment.type() == JBIG2::SegmentType::EndOfPage && !segment.header.referred_to_segment_numbers.is_empty())
635
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of page segment refers to other segments");
636
637
        // "• A segment of type "end of stripe" (type 50) must not refer to any other segments."
638
248
        if (segment.type() == JBIG2::SegmentType::EndOfStripe && !segment.header.referred_to_segment_numbers.is_empty())
639
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of stripe segment refers to other segments");
640
641
        // "• A segment of type "end of file" (type 51) must not refer to any other segments."
642
248
        if (segment.type() == JBIG2::SegmentType::EndOfFile && !segment.header.referred_to_segment_numbers.is_empty())
643
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of file segment refers to other segments");
644
645
        // "• A segment of type "profiles" (type 52) must not refer to any other segments."
646
248
        if (segment.type() == JBIG2::SegmentType::Profiles && !segment.header.referred_to_segment_numbers.is_empty())
647
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Profiles segment refers to other segments");
648
649
        // "• A segment of type "tables" (type 53) must not refer to any other segments."
650
248
        if (segment.type() == JBIG2::SegmentType::Tables && !segment.header.referred_to_segment_numbers.is_empty())
651
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Tables segment refers to other segments");
652
653
        // "• A segment of type "extension" (type 62) may refer to any number of segments of any type, unless the
654
        //    extension segment's type imposes some restriction."
655
        // Nothing to check.
656
657
        // "• A segment of type "colour palette" (type 54) must not refer to any other segments."
658
248
        if (segment.type() == JBIG2::SegmentType::ColorPalette && !segment.header.referred_to_segment_numbers.is_empty())
659
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Colour palette segment refers to other segments");
660
248
    }
661
662
36
    return {};
663
36
}
664
665
static ErrorOr<void> validate_segment_header_page_associations(JBIG2LoadingContext const& context)
666
36
{
667
    // 7.3.2 Rules for page associations
668
223
    for (auto const& segment : context.segments) {
669
        // "Every region segment must be associated with some page (i.e., have a non-zero page association field). "Page
670
        //  information",  "end of page" and "end of stripe" segments must be associated with some page. "End of file" segments
671
        //  must not be associated with any page. Segments of other types may be associated with a page or not."
672
223
        if (is_region_segment(segment.type())
673
198
            || first_is_one_of(segment.type(), JBIG2::SegmentType::PageInformation, JBIG2::SegmentType::EndOfPage, JBIG2::SegmentType::EndOfStripe)) {
674
62
            if (segment.header.page_association == 0)
675
5
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Region, page information, end of page, or end of stripe segment with no page association");
676
62
        }
677
        // Quirk: `042_*.jb2`, `amb_*.jb2` in the Power JBIG2 test suite incorrectly (cf 7.3.2) associate EndOfFile with a page.
678
218
        if (segment.type() == JBIG2::SegmentType::EndOfFile && segment.header.page_association != 0 && !context.allow_power_jbig2_quirks)
679
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of file segment with page association");
680
681
        // "If a segment is not associated with any page, then it must not refer to any segment that is associated with any page."
682
218
        if (segment.header.page_association == 0) {
683
101
            for (auto const* referred_to_segment : segment.referred_to_segments) {
684
0
                if (referred_to_segment->header.page_association != 0)
685
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment not associated with a page refers to segment associated with a page");
686
0
            }
687
101
        }
688
689
        // "If a segment is associated with a page, then it may refer to segments that are not associated with any page, and to
690
        //  segments that are associated with the same page. It must not refer to any segment that is associated with a different
691
        //  page."
692
218
        if (segment.header.page_association != 0) {
693
117
            for (auto const* referred_to_segment : segment.referred_to_segments) {
694
0
                if (referred_to_segment->header.page_association != 0 && referred_to_segment->header.page_association != segment.header.page_association)
695
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment refers to segment associated with a different page");
696
0
            }
697
117
        }
698
218
    }
699
700
31
    return {};
701
36
}
702
703
static ErrorOr<void> decode_segment_headers(JBIG2LoadingContext& context, ReadonlyBytes data)
704
412
{
705
412
    FixedMemoryStream stream(data);
706
707
412
    Vector<ReadonlyBytes> segment_datas;
708
671k
    auto store_and_skip_segment_data = [&](JBIG2::SegmentHeader const& segment_header) -> ErrorOr<void> {
709
671k
        size_t start_offset = TRY(stream.tell());
710
711
        // 7.2.7 Segment data length
712
        // "If the segment's type is "Immediate generic region", then the length field may contain the value 0xFFFFFFFF."
713
        // It sounds like this is not even allowed for ImmediateLosslessGenericRegion.
714
        // It's used in 0000033.pdf pages 1-2, and 0000600.pdf pages 1-3 (only with ImmediateGenericRegion).
715
671k
        if (!segment_header.data_length.has_value() && segment_header.type != JBIG2::SegmentType::ImmediateGenericRegion)
716
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment data length must be known for non-ImmediateGenericRegion segments");
717
718
671k
        u32 data_length = TRY(segment_header.data_length.try_value_or_lazy_evaluated([&]() {
719
671k
            return scan_for_immediate_generic_region_size(data.slice(start_offset));
720
671k
        }));
721
722
671k
        if (start_offset + data_length > data.size()) {
723
34
            dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: start_offset={}, data_length={}, data.size()={}", start_offset, data_length, data.size());
724
34
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment data length exceeds file size");
725
34
        }
726
671k
        ReadonlyBytes segment_data = data.slice(start_offset, data_length);
727
671k
        segment_datas.append(segment_data);
728
729
671k
        TRY(stream.seek(data_length, SeekMode::FromCurrentPosition));
730
671k
        return {};
731
671k
    };
732
733
412
    Vector<JBIG2::SegmentHeader> segment_headers;
734
790k
    while (!stream.is_eof()) {
735
790k
        auto segment_header = TRY(decode_segment_header(stream));
736
790k
        segment_headers.append(segment_header);
737
738
790k
        if (context.organization != JBIG2::Organization::RandomAccess)
739
610k
            TRY(store_and_skip_segment_data(segment_header));
740
741
        // Required per spec for files with RandomAccess organization.
742
790k
        if (segment_header.type == JBIG2::SegmentType::EndOfFile)
743
9
            break;
744
790k
    }
745
746
134
    if (context.organization == JBIG2::Organization::RandomAccess) {
747
49
        for (auto const& segment_header : segment_headers)
748
61.5k
            TRY(store_and_skip_segment_data(segment_header));
749
49
    }
750
751
134
    if (segment_headers.size() != segment_datas.size())
752
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment headers and segment datas have different sizes");
753
754
308k
    for (size_t i = 0; i < segment_headers.size(); ++i)
755
308k
        context.segments.append({ segment_headers[i], segment_datas[i] });
756
757
121
    return {};
758
121
}
759
760
static ErrorOr<void> complete_decoding_all_segment_headers(JBIG2LoadingContext& context)
761
121
{
762
121
    HashMap<u32, u32> segments_by_number;
763
737
    for (auto const& [i, segment] : enumerate(context.segments)) {
764
737
        if (segments_by_number.set(segment.header.segment_number, i) != HashSetResult::InsertedNewEntry)
765
66
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Duplicate segment number");
766
737
    }
767
768
449
    for (auto& segment : context.segments) {
769
449
        for (auto referred_to_segment_number : segment.header.referred_to_segment_numbers) {
770
163
            auto opt_referred_to_segment = segments_by_number.get(referred_to_segment_number);
771
163
            if (!opt_referred_to_segment.has_value())
772
4
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment refers to non-existing segment");
773
159
            segment.referred_to_segments.append(&context.segments[opt_referred_to_segment.value()]);
774
159
        }
775
449
    }
776
777
51
    identify_power_jbig2_files(context);
778
779
51
    TRY(validate_segment_order(context));
780
36
    TRY(validate_segment_header_retention_flags(context));
781
36
    TRY(validate_segment_header_references(context));
782
36
    TRY(validate_segment_header_page_associations(context));
783
784
31
    return {};
785
36
}
786
787
static ErrorOr<JBIG2::RegionSegmentInformationField> decode_region_segment_information_field(ReadonlyBytes data)
788
7
{
789
    // 7.4.1 Region segment information field
790
7
    if (data.size() < sizeof(JBIG2::RegionSegmentInformationField))
791
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid region segment information field size");
792
7
    auto result = *(JBIG2::RegionSegmentInformationField const*)data.data();
793
7
    if ((result.flags & 0b1111'0000) != 0)
794
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid region segment information field flags");
795
7
    if ((result.flags & 0x7) > 4)
796
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid region segment information field operator");
797
798
    // NOTE 3 – If the colour extension flag (COLEXTFLAG) is equal to 1, the external combination operator must be REPLACE.
799
7
    if (result.is_color_bitmap() && result.external_combination_operator() != JBIG2::CombinationOperator::Replace)
800
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid colored region segment information field operator");
801
802
    // FIXME: Support colors one day.
803
    // Update validate_segment_header_references() when allowing this.
804
    // Check that is_color_bitmap is only true if contains_colored_region_segments in the JBIG2 file header is set then.
805
7
    if (result.is_color_bitmap())
806
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: COLEXTFLAG=1 not yet implemented");
807
808
7
    return result;
809
7
}
810
811
static ErrorOr<JBIG2::PageInformationSegment> decode_page_information_segment(ReadonlyBytes data)
812
35
{
813
    // 7.4.8 Page information segment syntax
814
35
    if (data.size() != sizeof(JBIG2::PageInformationSegment))
815
2
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid page information segment size");
816
33
    return *(JBIG2::PageInformationSegment const*)data.data();
817
35
}
818
819
static ErrorOr<void> validate_segment_combination_operator_consistency(JBIG2LoadingContext& context, JBIG2::RegionSegmentInformationField const& information_field)
820
7
{
821
    // 7.4.8.5 Page segment flags
822
    // "NOTE 1 – All region segments, except for refinement region segments, are direct region segments. Because of the requirements
823
    //  in 7.4.7.5 restricting the external combination operators of refinement region segments, if this bit is 0, then refinement region
824
    //  segments associated with this page that refer to no region segments must have an external combination operator of REPLACE,
825
    //  and all other region segments associated with this page must have the external combination operator specified by this page's
826
    //  "Page default combination operator"."
827
828
7
    if (context.page.direct_region_segments_override_default_combination_operator)
829
0
        return {};
830
831
7
    if (information_field.external_combination_operator() != context.page.default_combination_operator)
832
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment combination operator does not match page default combination operator, despite page information segment claiming it would");
833
834
7
    return {};
835
7
}
836
837
static ErrorOr<JBIG2::EndOfStripeSegment> decode_end_of_stripe_segment(ReadonlyBytes data)
838
0
{
839
    // 7.4.10 End of stripe segment syntax
840
0
    if (data.size() != sizeof(JBIG2::EndOfStripeSegment))
841
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of strip segment has wrong size");
842
0
    return *(JBIG2::EndOfStripeSegment const*)data.data();
843
0
}
844
845
static ErrorOr<void> scan_for_page_size(JBIG2LoadingContext& context)
846
31
{
847
    // This implements just enough of "8.2 Page image composition" to figure out the size of the current page.
848
    // The spec describes a slightly more complicated approach to make streaming work,
849
    // but we require all input data to be available anyway, so can just scan through all EndOfStripe segments.
850
851
31
    size_t page_info_count = 0;
852
31
    bool has_initially_unknown_height = false;
853
31
    bool found_end_of_page = false;
854
31
    bool page_is_striped = false;
855
31
    u16 max_stripe_height = 0;
856
31
    Optional<int> height_at_end_of_last_stripe;
857
31
    Optional<size_t> last_end_of_stripe_index;
858
31
    Optional<size_t> last_not_end_of_page_segment_index;
859
193
    for (auto const& [segment_index, segment] : enumerate(context.segments)) {
860
193
        if (segment.header.page_association != context.current_page_number)
861
141
            continue;
862
863
        // Quirk: `042_*.jb2`, `amb_*.jb2` in the Power JBIG2 test suite incorrectly (cf 7.3.2) associate EndOfFile with a page.
864
52
        if (segment.type() == JBIG2::SegmentType::EndOfFile && context.allow_power_jbig2_quirks)
865
0
            continue;
866
867
52
        if (found_end_of_page)
868
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Found segment after EndOfPage");
869
870
52
        if (segment.type() != JBIG2::SegmentType::EndOfPage)
871
35
            last_not_end_of_page_segment_index = segment_index;
872
873
52
        if (segment.type() == JBIG2::SegmentType::PageInformation) {
874
19
            if (++page_info_count > 1)
875
0
                return Error::from_string_literal("JBIG2: Multiple PageInformation segments");
876
877
19
            auto page_information = TRY(decode_page_information_segment(segment.data));
878
879
            // 7.4.8.6 Page striping information
880
            // "the maximum size of each stripe (the distance between an end of stripe segment's end row and the end row of the previous
881
            //  end of stripe segment, or 0 in the case of the first end of stripe segment) must be no more than the page's maximum
882
            //  stripe size."
883
            // This means that the first stripe can be one taller than maximum_stripe_size, but all subsequent stripes must not be.
884
            // FIXME: Be stricter about subsequent stripes.
885
17
            page_is_striped = page_information.page_is_striped();
886
17
            max_stripe_height = page_information.maximum_stripe_size() + 1;
887
888
17
            context.page.size = { page_information.bitmap_width, page_information.bitmap_height };
889
17
            has_initially_unknown_height = page_information.bitmap_height == 0xffff'ffff;
890
891
            // "If the page's bitmap height is unknown (indicated by a page bitmap height of 0xFFFFFFFF) then the "page is striped"
892
            //  bit must be 1."
893
17
            if (has_initially_unknown_height && !page_information.page_is_striped())
894
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Non-striped bitmaps of indeterminate height not allowed");
895
33
        } else if (segment.type() == JBIG2::SegmentType::EndOfStripe) {
896
0
            if (page_info_count == 0)
897
0
                return Error::from_string_literal("JBIG2: EndOfStripe before PageInformation");
898
0
            if (!page_is_striped)
899
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Found EndOfStripe for non-striped page");
900
901
            // 7.4.10 End of stripe segment syntax
902
            // "An end of stripe segment states that the encoder has finished coding a portion of the page with which the segment is
903
            //  associated, and will not revisit it. It specifies the Y coordinate of a row of the page; no segment following the end of
904
            //  stripe may modify any portion of the page bitmap that lines on or above that row; furthermore, no segment preceding
905
            //  the end of stripe may modify any portion of the page bitmap that lies below that row. This row is called the "end row"
906
            //  of the stripe."
907
0
            auto end_of_stripe = TRY(decode_end_of_stripe_segment(segment.data));
908
0
            int new_height = end_of_stripe.y_coordinate + 1;
909
910
0
            if (has_initially_unknown_height) {
911
0
                if (height_at_end_of_last_stripe.has_value() && new_height < height_at_end_of_last_stripe.value())
912
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: EndOfStripe Y coordinate is not increasing");
913
0
                context.page.size.set_height(new_height);
914
0
            } else if (new_height > context.page.size.height()) {
915
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: EndOfStripe Y coordinate larger than page height");
916
0
            }
917
918
            // "The end row specified by an end of stripe segment must lie below any previous end row for that page."
919
0
            int stripe_height = new_height - height_at_end_of_last_stripe.value_or(0);
920
0
            if (stripe_height <= 0)
921
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: EndOfStripe Y coordinate is not increasing");
922
923
0
            dbgln_if(JBIG2_DEBUG, "stripe_height={}, max_stripe_height={}", stripe_height, max_stripe_height);
924
0
            if (stripe_height > max_stripe_height)
925
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: EndOfStripe Y coordinate larger than maximum stripe height");
926
927
0
            height_at_end_of_last_stripe = new_height;
928
0
            last_end_of_stripe_index = segment_index;
929
33
        } else if (segment.type() == JBIG2::SegmentType::EndOfPage) {
930
17
            if (segment.data.size() != 0)
931
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of page segment has non-zero size");
932
17
            found_end_of_page = true;
933
17
        }
934
52
    }
935
936
29
    if (page_info_count == 0)
937
12
        return Error::from_string_literal("JBIG2: Missing PageInformation segment");
938
939
17
    if (page_is_striped) {
940
1
        if (has_initially_unknown_height) {
941
            // "A page whose height was originally unknown must contain at least one end of stripe segment."
942
0
            if (!height_at_end_of_last_stripe.has_value())
943
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Striped page of initially unknown height without EndOfStripe segment");
944
945
0
            if (last_end_of_stripe_index.value() != last_not_end_of_page_segment_index.value())
946
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Page not ended by end of stripe segment on striped page with initially unknown height");
947
0
            context.page.size.set_height(height_at_end_of_last_stripe.value());
948
0
        }
949
950
        // `!=` is not true, e.g. in ignition.pdf the last stripe is shorter than the page height.
951
1
        if (!has_initially_unknown_height && height_at_end_of_last_stripe.has_value() && height_at_end_of_last_stripe.value() > context.page.size.height())
952
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Stripes are higher than page height");
953
1
    }
954
955
17
    if (context.organization == JBIG2::Organization::Embedded) {
956
        // PDF 1.7 spec, 3.3.6 JBIG2Decode Filter
957
        // "The JBIG2 file header, end-of-page segments, and end-of-file segment are not
958
        //  used in PDF. These should be removed before the PDF objects described below
959
        //  are created."
960
0
        if (found_end_of_page)
961
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unexpected EndOfPage segment in embedded stream");
962
17
    } else {
963
        // 7.4.9 End of page segment syntax
964
        // "Each page must have exactly one end of page segment associated with it."
965
17
        if (!found_end_of_page)
966
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Missing EndOfPage segment");
967
17
    }
968
969
17
    return {};
970
17
}
971
972
static ErrorOr<void> scan_for_page_numbers(JBIG2LoadingContext& context)
973
17
{
974
17
    HashTable<u32> seen_pages;
975
17
    Vector<u32> pages;
976
977
55
    for (auto const& segment : context.segments) {
978
55
        if (segment.header.page_association == 0)
979
12
            continue;
980
43
        if (seen_pages.contains(segment.header.page_association))
981
24
            continue;
982
19
        seen_pages.set(segment.header.page_association);
983
19
        pages.append(segment.header.page_association);
984
19
    }
985
986
17
    if (context.number_of_pages.has_value() && context.number_of_pages.value() != pages.size())
987
1
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Number of pages in file header does not match number of pages found in segments");
988
989
16
    context.page_numbers = move(pages);
990
16
    return {};
991
17
}
992
993
// 6.2.2 Input parameters
994
// Table 2 – Parameters for the generic region decoding procedure
995
struct GenericRegionDecodingInputParameters {
996
    bool is_modified_modified_read { false }; // "MMR" in spec.
997
    u32 region_width { 0 };                   // "GBW" in spec.
998
    u32 region_height { 0 };                  // "GBH" in spec.
999
    u8 gb_template { 0 };
1000
    bool is_typical_prediction_used { false };          // "TPGDON" in spec.
1001
    bool is_extended_reference_template_used { false }; // "EXTTEMPLATE" in spec.
1002
    Optional<BilevelImage const&> skip_pattern;         // "USESKIP", "SKIP" in spec.
1003
1004
    Array<JBIG2::AdaptiveTemplatePixel, 12> adaptive_template_pixels {}; // "GBATX" / "GBATY" in spec.
1005
    // FIXME: GBCOLS, GBCOMBOP, COLEXTFLAG
1006
1007
    enum RequireEOFBAfterMMR {
1008
        No,
1009
        Yes,
1010
    } require_eof_after_mmr { RequireEOFBAfterMMR::No };
1011
1012
    // If is_modified_modified_read is true, generic_region_decoding_procedure() reads data off this stream.
1013
    Stream* stream { nullptr };
1014
1015
    // If is_modified_modified_read is false, generic_region_decoding_procedure() reads data off this decoder.
1016
    MQArithmeticDecoder* arithmetic_decoder { nullptr };
1017
};
1018
1019
// 6.2 Generic region decoding procedure
1020
static ErrorOr<NonnullRefPtr<BilevelImage>> generic_region_decoding_procedure(GenericRegionDecodingInputParameters const& inputs, Optional<JBIG2::GenericContexts>& maybe_contexts)
1021
3
{
1022
3
    if (inputs.is_modified_modified_read) {
1023
2
        dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: MMR image data");
1024
1025
        // 6.2.6 Decoding using MMR coding
1026
        // "If the number of bytes contained in the encoded bitmap is known in advance, then it is permissible for the data
1027
        //  stream not to contain an EOFB (000000000001000000000001) at the end of the MMR-encoded data."
1028
2
        CCITT::Group4Options options;
1029
2
        if (inputs.require_eof_after_mmr == GenericRegionDecodingInputParameters::RequireEOFBAfterMMR::Yes)
1030
0
            options.has_end_of_block = CCITT::Group4Options::HasEndOfBlock::Yes;
1031
1032
        // "An invocation of the generic region decoding procedure with MMR equal to 1 shall consume an integral number of
1033
        //  bytes, beginning and ending on a byte boundary."
1034
        // This means we can pass in a stream to CCITT::decode_ccitt_group4() and that can use a bit stream internally.
1035
2
        auto buffer = TRY(CCITT::decode_ccitt_group4(*inputs.stream, inputs.region_width, inputs.region_height, options));
1036
1037
0
        size_t bytes_per_row = ceil_div(inputs.region_width, 8);
1038
0
        if (buffer.size() != bytes_per_row * inputs.region_height)
1039
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Decoded MMR data has wrong size");
1040
1041
0
        auto result = TRY(BilevelImage::create_from_byte_buffer(move(buffer), inputs.region_width, inputs.region_height));
1042
0
        return result;
1043
0
    }
1044
1045
1
    auto& contexts = maybe_contexts.value();
1046
1047
    // 6.2.5 Decoding using a template and arithmetic coding
1048
1
    if (inputs.is_extended_reference_template_used)
1049
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode EXTTEMPLATE yet");
1050
1051
1
    int number_of_adaptive_template_pixels = inputs.gb_template == 0 ? 4 : 1;
1052
5
    for (int i = 0; i < number_of_adaptive_template_pixels; ++i)
1053
4
        TRY(check_valid_adaptive_template_pixel(inputs.adaptive_template_pixels[i]));
1054
1055
1
    if (inputs.skip_pattern.has_value() && (inputs.skip_pattern->width() != inputs.region_width || inputs.skip_pattern->height() != inputs.region_height))
1056
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid USESKIP dimensions");
1057
1058
12
    static constexpr auto get_pixel = [](NonnullRefPtr<BilevelImage> const& buffer, int x, int y) -> bool {
1059
        // 6.2.5.2 Coding order and edge conventions
1060
        // "• All pixels lying outside the bounds of the actual bitmap have the value 0."
1061
        // We don't have to check y >= buffer->height() because check_valid_adaptive_template_pixel() rejects y > 0.
1062
12
        if (x < 0 || x >= (int)buffer->width() || y < 0)
1063
10
            return false;
1064
2
        return buffer->get_bit(x, y);
1065
12
    };
1066
1067
9
    static constexpr auto get_pixels = [](NonnullRefPtr<BilevelImage> const& buffer, int x, int y, u8 width) -> u8 {
1068
9
        if (x + width < 0 || x >= (int)buffer->width() || y < 0)
1069
6
            return 0;
1070
3
        auto corrected_x = max(x, 0);
1071
3
        auto right_end = x + width;
1072
3
        auto corrected_right_end = min(right_end, buffer->width());
1073
3
        auto in_bounds = corrected_right_end - corrected_x;
1074
3
        auto res = buffer->get_bits(corrected_x, y, in_bounds);
1075
3
        res <<= (right_end - corrected_right_end);
1076
3
        return res;
1077
9
    };
1078
1079
    // Figure 3(a) – Template when GBTEMPLATE = 0 and EXTTEMPLATE = 0,
1080
3
    constexpr auto compute_context_0 = [](NonnullRefPtr<BilevelImage> const& buffer, ReadonlySpan<JBIG2::AdaptiveTemplatePixel> adaptive_pixels, int x, int y) -> u16 {
1081
3
        u16 result = 0;
1082
15
        for (int i = 0; i < 4; ++i)
1083
12
            result = (result << 1) | (u16)get_pixel(buffer, x + adaptive_pixels[i].x, y + adaptive_pixels[i].y);
1084
3
        result = (result << 3) | get_pixels(buffer, x - 1, y - 2, 3);
1085
3
        result = (result << 5) | get_pixels(buffer, x - 2, y - 1, 5);
1086
3
        result = (result << 4) | get_pixels(buffer, x - 4, y, 4);
1087
3
        return result;
1088
3
    };
1089
1090
    // Figure 4 – Template when GBTEMPLATE = 1
1091
1
    auto compute_context_1 = [](NonnullRefPtr<BilevelImage> const& buffer, ReadonlySpan<JBIG2::AdaptiveTemplatePixel> adaptive_pixels, int x, int y) -> u16 {
1092
0
        u16 result = 0;
1093
0
        result = (result << 1) | (u16)get_pixel(buffer, x + adaptive_pixels[0].x, y + adaptive_pixels[0].y);
1094
0
        result = (result << 4) | get_pixels(buffer, x - 1, y - 2, 4);
1095
0
        result = (result << 5) | get_pixels(buffer, x - 2, y - 1, 5);
1096
0
        result = (result << 3) | get_pixels(buffer, x - 3, y, 3);
1097
0
        return result;
1098
0
    };
1099
1100
    // Figure 5 – Template when GBTEMPLATE = 2
1101
1
    auto compute_context_2 = [](NonnullRefPtr<BilevelImage> const& buffer, ReadonlySpan<JBIG2::AdaptiveTemplatePixel> adaptive_pixels, int x, int y) -> u16 {
1102
0
        u16 result = 0;
1103
0
        result = (result << 1) | (u16)get_pixel(buffer, x + adaptive_pixels[0].x, y + adaptive_pixels[0].y);
1104
0
        result = (result << 3) | get_pixels(buffer, x - 1, y - 2, 3);
1105
0
        result = (result << 4) | get_pixels(buffer, x - 2, y - 1, 4);
1106
0
        result = (result << 2) | get_pixels(buffer, x - 2, y, 2);
1107
0
        return result;
1108
0
    };
1109
1110
    // Figure 6 – Template when GBTEMPLATE = 3
1111
1
    auto compute_context_3 = [](NonnullRefPtr<BilevelImage> const& buffer, ReadonlySpan<JBIG2::AdaptiveTemplatePixel> adaptive_pixels, int x, int y) -> u16 {
1112
0
        u16 result = 0;
1113
0
        result = (result << 1) | (u16)get_pixel(buffer, x + adaptive_pixels[0].x, y + adaptive_pixels[0].y);
1114
0
        result = (result << 5) | get_pixels(buffer, x - 3, y - 1, 5);
1115
0
        result = (result << 4) | get_pixels(buffer, x - 4, y, 4);
1116
0
        return result;
1117
0
    };
1118
1119
1
    u16 (*compute_context)(NonnullRefPtr<BilevelImage> const&, ReadonlySpan<JBIG2::AdaptiveTemplatePixel>, int, int);
1120
1
    if (inputs.gb_template == 0)
1121
1
        compute_context = compute_context_0;
1122
0
    else if (inputs.gb_template == 1)
1123
0
        compute_context = compute_context_1;
1124
0
    else if (inputs.gb_template == 2)
1125
0
        compute_context = compute_context_2;
1126
0
    else {
1127
0
        VERIFY(inputs.gb_template == 3);
1128
0
        compute_context = compute_context_3;
1129
0
    }
1130
1131
    // "The values of the pixels in this neighbourhood define a context. Each context has its own adaptive probability estimate
1132
    //  used by the arithmetic coder (see Annex E)."
1133
    // "* Decode the current pixel by invoking the arithmetic entropy decoding procedure, with CX set to the value formed by
1134
    //    concatenating the label "GB" and the 10-16 pixel values gathered in CONTEXT."
1135
    // NOTE: What this is supposed to mean is that we have a bunch of independent contexts, and we pick the
1136
    // context for the current pixel based on pixel values in the neighborhood. The "GB" part just means this context is
1137
    // independent from other contexts in the spec. They are passed in to this function.
1138
1139
    // Figure 8 – Reused context for coding the SLTP value when GBTEMPLATE is 0
1140
1
    constexpr u16 sltp_context_for_template_0 = 0b0011'001'11001'0101;
1141
1142
    // Figure 9 – Reused context for coding the SLTP value when GBTEMPLATE is 1
1143
1
    constexpr u16 sltp_context_for_template_1 = 0b0'0011'11001'101;
1144
1145
    // Figure 10 – Reused context for coding the SLTP value when GBTEMPLATE is 2
1146
1
    constexpr u16 sltp_context_for_template_2 = 0b1'001'1100'01;
1147
1148
    // Figure 11 – Reused context for coding the SLTP value when GBTEMPLATE is 3
1149
1
    constexpr u16 sltp_context_for_template_3 = 0b1'01100'0101;
1150
1151
1
    u16 sltp_context = [](u8 gb_template) {
1152
1
        if (gb_template == 0)
1153
1
            return sltp_context_for_template_0;
1154
0
        if (gb_template == 1)
1155
0
            return sltp_context_for_template_1;
1156
0
        if (gb_template == 2)
1157
0
            return sltp_context_for_template_2;
1158
0
        VERIFY(gb_template == 3);
1159
0
        return sltp_context_for_template_3;
1160
0
    }(inputs.gb_template);
1161
1162
    // 6.2.5.7 Decoding the bitmap
1163
1
    MQArithmeticDecoder& decoder = *inputs.arithmetic_decoder;
1164
1165
    // "1) Set:
1166
    //         LTP = 0"
1167
1
    bool ltp = false; // "Line (uses) Typical Prediction" maybe?
1168
1169
    // " 2) Create a bitmap GBREG of width GBW and height GBH pixels."
1170
1
    auto result = TRY(BilevelImage::create(inputs.region_width, inputs.region_height));
1171
1172
    // "3) Decode each row as follows:"
1173
2
    for (size_t y = 0; y < inputs.region_height; ++y) {
1174
        // "a) If all GBH rows have been decoded then the decoding is complete; proceed to step 4)."
1175
        // "b) If TPGDON is 1, then decode a bit using the arithmetic entropy coder..."
1176
1
        if (inputs.is_typical_prediction_used) {
1177
            // "SLTP" in spec. "Swap LTP" or "Switch LTP" maybe?
1178
0
            bool sltp = decoder.get_next_bit(contexts.contexts[sltp_context]);
1179
0
            ltp = ltp ^ sltp;
1180
1181
            // "c) If LTP = 1 then set every pixel of the current row of GBREG equal to the corresponding pixel of the row
1182
            //     immediately above."
1183
0
            if (ltp) {
1184
0
                for (size_t x = 0; x < inputs.region_width; ++x)
1185
0
                    result->set_bit(x, y, get_pixel(result, (int)x, (int)y - 1));
1186
0
                continue;
1187
0
            }
1188
0
        }
1189
1190
        // "d) If LTP = 0 then, from left to right, decode each pixel of the current row of GBREG. The procedure for each
1191
        //     pixel is as follows:"
1192
4
        for (size_t x = 0; x < inputs.region_width; ++x) {
1193
            // "i) If USESKIP is 1 and the pixel in the bitmap SKIP at the location corresponding to the current pixel is 1,
1194
            //     then set the current pixel to 0."
1195
3
            if (inputs.skip_pattern.has_value() && inputs.skip_pattern->get_bit(x, y)) {
1196
0
                result->set_bit(x, y, false);
1197
0
                continue;
1198
0
            }
1199
1200
            // "ii) Otherwise:"
1201
3
            u16 context = compute_context(result, inputs.adaptive_template_pixels, x, y);
1202
3
            bool bit = decoder.get_next_bit(contexts.contexts[context]);
1203
3
            result->set_bit(x, y, bit);
1204
3
        }
1205
1
    }
1206
1207
    // "4) After all the rows have been decoded, the current contents of the bitmap GBREG are the results that shall be
1208
    //     obtained by every decoder, whether it performs this exact sequence of steps or not."
1209
1
    return result;
1210
1
}
1211
1212
// 6.3.2 Input parameters
1213
// Table 6 – Parameters for the generic refinement region decoding procedure
1214
struct GenericRefinementRegionDecodingInputParameters {
1215
    u32 region_width { 0 };                                             // "GRW" in spec.
1216
    u32 region_height { 0 };                                            // "GRH" in spec.
1217
    u8 gr_template { 0 };                                               // "GRTEMPLATE" in spec.
1218
    BilevelSubImage const* reference_bitmap { nullptr };                // "GRREFERENCE" in spec.
1219
    i32 reference_x_offset { 0 };                                       // "GRREFERENCEDX" in spec.
1220
    i32 reference_y_offset { 0 };                                       // "GRREFERENCEDY" in spec.
1221
    bool is_typical_prediction_used { false };                          // "TPGRON" in spec.
1222
    Array<JBIG2::AdaptiveTemplatePixel, 2> adaptive_template_pixels {}; // "GRATX" / "GRATY" in spec.
1223
};
1224
1225
// 6.3 Generic Refinement Region Decoding Procedure
1226
static ErrorOr<NonnullRefPtr<BilevelImage>> generic_refinement_region_decoding_procedure(GenericRefinementRegionDecodingInputParameters& inputs, MQArithmeticDecoder& decoder, JBIG2::RefinementContexts& contexts)
1227
0
{
1228
0
    VERIFY(inputs.gr_template == 0 || inputs.gr_template == 1);
1229
1230
0
    if (inputs.gr_template == 0) {
1231
0
        TRY(check_valid_adaptive_template_pixel(inputs.adaptive_template_pixels[0]));
1232
        // inputs.adaptive_template_pixels[1] is allowed to contain any value.
1233
0
    }
1234
    // GRTEMPLATE 1 never uses adaptive pixels.
1235
1236
    // 6.3.5.3 Fixed templates and adaptive templates
1237
0
    static constexpr auto get_pixel = [](auto const& buffer, int x, int y) -> bool {
1238
0
        if (x < 0 || x >= (int)buffer.width() || y < 0 || y >= (int)buffer.height())
1239
0
            return false;
1240
0
        return buffer.get_bit(x, y);
1241
0
    };
Unexecuted instantiation: JBIG2Loader.cpp:bool Gfx::generic_refinement_region_decoding_procedure(Gfx::GenericRefinementRegionDecodingInputParameters&, Gfx::MQArithmeticDecoder&, Gfx::JBIG2::RefinementContexts&)::$_0::operator()<Gfx::BilevelSubImage>(Gfx::BilevelSubImage const&, int, int) const
Unexecuted instantiation: JBIG2Loader.cpp:bool Gfx::generic_refinement_region_decoding_procedure(Gfx::GenericRefinementRegionDecodingInputParameters&, Gfx::MQArithmeticDecoder&, Gfx::JBIG2::RefinementContexts&)::$_0::operator()<Gfx::BilevelImage>(Gfx::BilevelImage const&, int, int) const
1242
1243
    // Figure 12 – 13-pixel refinement template showing the AT pixels at their nominal locations
1244
0
    constexpr auto compute_context_0 = [](ReadonlySpan<JBIG2::AdaptiveTemplatePixel> adaptive_pixels, BilevelSubImage const& reference, int reference_x, int reference_y, BilevelImage const& buffer, int x, int y) -> u16 {
1245
0
        u16 result = 0;
1246
1247
0
        for (int dy = -1; dy <= 1; ++dy) {
1248
0
            for (int dx = -1; dx <= 1; ++dx) {
1249
0
                if (dy == -1 && dx == -1)
1250
0
                    result = (result << 1) | (u16)get_pixel(reference, reference_x + adaptive_pixels[1].x, reference_y + adaptive_pixels[1].y);
1251
0
                else
1252
0
                    result = (result << 1) | (u16)get_pixel(reference, reference_x + dx, reference_y + dy);
1253
0
            }
1254
0
        }
1255
1256
0
        result = (result << 1) | (u16)get_pixel(buffer, x + adaptive_pixels[0].x, y + adaptive_pixels[0].y);
1257
0
        for (int i = 0; i < 2; ++i)
1258
0
            result = (result << 1) | (u16)get_pixel(buffer, x + i, y - 1);
1259
0
        result = (result << 1) | (u16)get_pixel(buffer, x - 1, y);
1260
1261
0
        return result;
1262
0
    };
1263
1264
    // Figure 13 – 10-pixel refinement template
1265
0
    constexpr auto compute_context_1 = [](ReadonlySpan<JBIG2::AdaptiveTemplatePixel>, BilevelSubImage const& reference, int reference_x, int reference_y, BilevelImage const& buffer, int x, int y) -> u16 {
1266
0
        u16 result = 0;
1267
1268
0
        for (int dy = -1; dy <= 1; ++dy) {
1269
0
            for (int dx = -1; dx <= 1; ++dx) {
1270
0
                if ((dy == -1 && (dx == -1 || dx == 1)) || (dy == 1 && dx == -1))
1271
0
                    continue;
1272
0
                result = (result << 1) | (u16)get_pixel(reference, reference_x + dx, reference_y + dy);
1273
0
            }
1274
0
        }
1275
1276
0
        for (int i = 0; i < 3; ++i)
1277
0
            result = (result << 1) | (u16)get_pixel(buffer, x - 1 + i, y - 1);
1278
0
        result = (result << 1) | (u16)get_pixel(buffer, x - 1, y);
1279
1280
0
        return result;
1281
0
    };
1282
1283
0
    auto compute_context = inputs.gr_template == 0 ? compute_context_0 : compute_context_1;
1284
1285
    // Figure 14 – Reused context for coding the SLTP value when GRTEMPLATE is 0
1286
0
    constexpr u16 sltp_context_for_template_0 = 0b000'010'000'000'0;
1287
1288
    // Figure 15 – Reused context for coding the SLTP value when GRTEMPLATE is 1
1289
0
    constexpr u16 sltp_context_for_template_1 = 0b0'010'00'000'0;
1290
1291
0
    u16 const sltp_context = inputs.gr_template == 0 ? sltp_context_for_template_0 : sltp_context_for_template_1;
1292
1293
    // 6.3.5.6 Decoding the refinement bitmap
1294
1295
    // "1) Set LTP = 0."
1296
0
    bool ltp = false; // "Line (uses) Typical Prediction" maybe?
1297
1298
    // "2) Create a bitmap GRREG of width GRW and height GRH pixels."
1299
0
    auto result = TRY(BilevelImage::create(inputs.region_width, inputs.region_height));
1300
1301
    // "3) Decode each row as follows:"
1302
0
    for (size_t y = 0; y < result->height(); ++y) {
1303
        // "a) If all GRH rows have been decoded, then the decoding is complete; proceed to step 4)."
1304
        // "b) If TPGRON is 1, then decode a bit using the arithmetic entropy coder..."
1305
0
        if (inputs.is_typical_prediction_used) {
1306
            // "SLTP" in spec. "Swap LTP" or "Switch LTP" maybe?
1307
0
            bool sltp = decoder.get_next_bit(contexts.contexts[sltp_context]);
1308
0
            ltp = ltp ^ sltp;
1309
0
        }
1310
1311
0
        if (!ltp) {
1312
            // "c) If LTP = 0 then, from left to right, explicitly decode all pixels of the current row of GRREG. The
1313
            //     procedure for each pixel is as follows:"
1314
0
            for (size_t x = 0; x < result->width(); ++x) {
1315
0
                u16 context = compute_context(inputs.adaptive_template_pixels, *inputs.reference_bitmap, x - inputs.reference_x_offset, y - inputs.reference_y_offset, *result, x, y);
1316
0
                bool bit = decoder.get_next_bit(contexts.contexts[context]);
1317
0
                result->set_bit(x, y, bit);
1318
0
            }
1319
0
        } else {
1320
            // "d) If LTP = 1 then, from left to right, implicitly decode certain pixels of the current row of GRREG,
1321
            //     and explicitly decode the rest. The procedure for each pixel is as follows:"
1322
0
            for (size_t x = 0; x < result->width(); ++x) {
1323
                // "TPGRPIX", "TPGRVAL" in spec.
1324
0
                auto prediction = [&](size_t x, size_t y) -> Optional<bool> {
1325
                    // "• a 3 × 3 pixel array in the reference bitmap (Figure 16), centred at the location
1326
                    //    corresponding to the current pixel, contains pixels all of the same value."
1327
0
                    bool prediction = get_pixel(*inputs.reference_bitmap, x - inputs.reference_x_offset - 1, y - inputs.reference_y_offset - 1);
1328
0
                    for (int dy = -1; dy <= 1; ++dy)
1329
0
                        for (int dx = -1; dx <= 1; ++dx)
1330
0
                            if (get_pixel(*inputs.reference_bitmap, x - inputs.reference_x_offset + dx, y - inputs.reference_y_offset + dy) != prediction)
1331
0
                                return {};
1332
0
                    return prediction;
1333
0
                }(x, y);
1334
1335
                // TPGRON must be 1 if LTP is set. (The spec has an explicit "TPGRON is 1 AND" check here, but it is pointless.)
1336
0
                VERIFY(inputs.is_typical_prediction_used);
1337
0
                if (prediction.has_value()) {
1338
0
                    result->set_bit(x, y, prediction.value());
1339
0
                } else {
1340
0
                    u16 context = compute_context(inputs.adaptive_template_pixels, *inputs.reference_bitmap, x - inputs.reference_x_offset, y - inputs.reference_y_offset, *result, x, y);
1341
0
                    bool bit = decoder.get_next_bit(contexts.contexts[context]);
1342
0
                    result->set_bit(x, y, bit);
1343
0
                }
1344
0
            }
1345
0
        }
1346
0
    }
1347
1348
0
    return result;
1349
0
}
1350
1351
static constexpr BilevelImage::CompositionType to_composition_type(JBIG2::CombinationOperator operator_)
1352
3
{
1353
3
    switch (operator_) {
1354
3
    case JBIG2::CombinationOperator::Or:
1355
3
        return BilevelImage::CompositionType::Or;
1356
0
    case JBIG2::CombinationOperator::And:
1357
0
        return BilevelImage::CompositionType::And;
1358
0
    case JBIG2::CombinationOperator::Xor:
1359
0
        return BilevelImage::CompositionType::Xor;
1360
0
    case JBIG2::CombinationOperator::XNor:
1361
0
        return BilevelImage::CompositionType::XNor;
1362
0
    case JBIG2::CombinationOperator::Replace:
1363
0
        return BilevelImage::CompositionType::Replace;
1364
3
    }
1365
0
    VERIFY_NOT_REACHED();
1366
0
}
1367
1368
// 6.4.2 Input parameters
1369
// Table 9 – Parameters for the text region decoding procedure
1370
struct TextRegionDecodingInputParameters {
1371
    bool uses_huffman_encoding { false };     // "SBHUFF" in spec.
1372
    bool uses_refinement_coding { false };    // "SBREFINE" in spec.
1373
    u32 region_width { 0 };                   // "SBW" in spec.
1374
    u32 region_height { 0 };                  // "SBH" in spec.
1375
    u32 number_of_instances { 0 };            // "SBNUMINSTANCES" in spec.
1376
    u32 size_of_symbol_instance_strips { 0 }; // "SBSTRIPS" in spec.
1377
    // "SBNUMSYMS" is `symbols.size()` below.
1378
1379
    // Only set if uses_huffman_encoding is true.
1380
    JBIG2::HuffmanTable const* symbol_id_table { nullptr }; // "SBSYMCODES" in spec.
1381
1382
    u32 id_symbol_code_length { 0 }; // "SBSYMCODELEN" in spec.
1383
    Vector<BilevelSubImage> symbols; // "SBNUMSYMS" / "SBSYMS" in spec.
1384
    u8 default_pixel { 0 };          // "SBDEFPIXEL" in spec.
1385
1386
    JBIG2::CombinationOperator operator_ { JBIG2::CombinationOperator::Or }; // "SBCOMBOP" in spec.
1387
1388
    bool is_transposed { false }; // "TRANSPOSED" in spec.
1389
1390
    JBIG2::ReferenceCorner reference_corner { JBIG2::ReferenceCorner::TopLeft }; // "REFCORNER" in spec.
1391
1392
    i8 delta_s_offset { 0 }; // "SBDSOFFSET" in spec.
1393
1394
    // Only set if uses_huffman_encoding is true.
1395
    JBIG2::HuffmanTable const* first_s_table { nullptr };                 // "SBHUFFFS" in spec.
1396
    JBIG2::HuffmanTable const* subsequent_s_table { nullptr };            // "SBHUFFDS" in spec.
1397
    JBIG2::HuffmanTable const* delta_t_table { nullptr };                 // "SBHUFFDT" in spec.
1398
    JBIG2::HuffmanTable const* refinement_delta_width_table { nullptr };  // "SBHUFFRDW" in spec.
1399
    JBIG2::HuffmanTable const* refinement_delta_height_table { nullptr }; // "SBHUFFRDH" in spec.
1400
    JBIG2::HuffmanTable const* refinement_x_offset_table { nullptr };     // "SBHUFFRDX" in spec.
1401
    JBIG2::HuffmanTable const* refinement_y_offset_table { nullptr };     // "SBHUFFRDY" in spec.
1402
    JBIG2::HuffmanTable const* refinement_size_table { nullptr };         // "SBHUFFRSIZE" in spec.
1403
1404
    u8 refinement_template { 0 };                                                  // "SBRTEMPLATE" in spec.
1405
    Array<JBIG2::AdaptiveTemplatePixel, 2> refinement_adaptive_template_pixels {}; // "SBRATX" / "SBRATY" in spec.
1406
    // FIXME: COLEXTFLAG, SBCOLS
1407
1408
    // If uses_huffman_encoding is true, text_region_decoding_procedure() reads data off this stream.
1409
    BigEndianInputBitStream* bit_stream { nullptr };
1410
1411
    // If uses_huffman_encoding is false, text_region_decoding_procedure() reads data off this decoder.
1412
    MQArithmeticDecoder* arithmetic_decoder { nullptr };
1413
};
1414
1415
struct TextContexts {
1416
    explicit TextContexts(u32 id_symbol_code_length)
1417
4
        : id_decoder(id_symbol_code_length)
1418
4
    {
1419
4
    }
1420
1421
    JBIG2::ArithmeticIntegerDecoder delta_t_integer_decoder;         // "IADT" in spec.
1422
    JBIG2::ArithmeticIntegerDecoder first_s_integer_decoder;         // "IAFS" in spec.
1423
    JBIG2::ArithmeticIntegerDecoder subsequent_s_integer_decoder;    // "IADS" in spec.
1424
    JBIG2::ArithmeticIntegerDecoder instance_t_integer_decoder;      // "IAIT" in spec.
1425
    JBIG2::ArithmeticIntegerIDDecoder id_decoder;                    // "IAID" in spec.
1426
    JBIG2::ArithmeticIntegerDecoder refinement_delta_width_decoder;  // "IARDW" in spec.
1427
    JBIG2::ArithmeticIntegerDecoder refinement_delta_height_decoder; // "IARDH" in spec.
1428
    JBIG2::ArithmeticIntegerDecoder refinement_x_offset_decoder;     // "IARDX" in spec.
1429
    JBIG2::ArithmeticIntegerDecoder refinement_y_offset_decoder;     // "IARDY" in spec.
1430
    JBIG2::ArithmeticIntegerDecoder has_refinement_image_decoder;    // "IARI" in spec.
1431
};
1432
1433
// 6.4 Text Region Decoding Procedure
1434
static ErrorOr<NonnullRefPtr<BilevelImage>> text_region_decoding_procedure(TextRegionDecodingInputParameters const& inputs, Optional<TextContexts>& text_contexts, Optional<JBIG2::RefinementContexts>& refinement_contexts)
1435
6
{
1436
6
    BigEndianInputBitStream* bit_stream = nullptr;
1437
6
    MQArithmeticDecoder* decoder = nullptr;
1438
6
    if (inputs.uses_huffman_encoding)
1439
3
        bit_stream = inputs.bit_stream;
1440
3
    else
1441
3
        decoder = inputs.arithmetic_decoder;
1442
1443
    // 6.4.6 Strip delta T
1444
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFDT and multiply the resulting value by SBSTRIPS.
1445
    //  If SBHUFF is 0, decode a value using the IADT integer arithmetic decoding procedure (see Annex A) and multiply the resulting value by SBSTRIPS."
1446
9
    auto read_delta_t = [&]() -> ErrorOr<i32> {
1447
9
        if (inputs.uses_huffman_encoding)
1448
6
            return TRY(inputs.delta_t_table->read_symbol_non_oob(*bit_stream)) * inputs.size_of_symbol_instance_strips;
1449
3
        return TRY(text_contexts->delta_t_integer_decoder.decode_non_oob(*decoder)) * inputs.size_of_symbol_instance_strips;
1450
3
    };
1451
1452
    // 6.4.7 First symbol instance S coordinate
1453
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFFS.
1454
    //  If SBHUFF is 0, decode a value using the IAFS integer arithmetic decoding procedure (see Annex A)."
1455
6
    auto read_first_s = [&]() -> ErrorOr<i32> {
1456
3
        if (inputs.uses_huffman_encoding)
1457
3
            return inputs.first_s_table->read_symbol_non_oob(*bit_stream);
1458
0
        return text_contexts->first_s_integer_decoder.decode_non_oob(*decoder);
1459
3
    };
1460
1461
    // 6.4.8 Subsequent symbol instance S coordinate
1462
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFDS.
1463
    //  If SBHUFF is 0, decode a value using the IADS integer arithmetic decoding procedure (see Annex A).
1464
    //  In either case it is possible that the result of this decoding is the out-of-band value OOB."
1465
6
    auto read_subsequent_s = [&]() -> ErrorOr<Optional<i32>> {
1466
0
        if (inputs.uses_huffman_encoding)
1467
0
            return inputs.subsequent_s_table->read_symbol(*bit_stream);
1468
0
        return text_contexts->subsequent_s_integer_decoder.decode(*decoder);
1469
0
    };
1470
1471
    // 6.4.9 Symbol instance T coordinate
1472
    // "If SBSTRIPS == 1, then the value decoded is always zero. Otherwise:
1473
    //  • If SBHUFF is 1, decode a value by reading ceil(log2(SBSTRIPS)) bits directly from the bitstream.
1474
    //  • If SBHUFF is 0, decode a value using the IAIT integer arithmetic decoding procedure (see Annex A)."
1475
6
    auto read_instance_t = [&]() -> ErrorOr<i32> {
1476
3
        if (inputs.size_of_symbol_instance_strips == 1)
1477
1
            return 0;
1478
2
        if (inputs.uses_huffman_encoding)
1479
2
            return TRY(bit_stream->read_bits(AK::ceil_log2(inputs.size_of_symbol_instance_strips)));
1480
0
        return text_contexts->instance_t_integer_decoder.decode_non_oob(*decoder);
1481
2
    };
1482
1483
    // 6.4.10 Symbol instance symbol ID
1484
    // "If SBHUFF is 1, decode a value by reading one bit at a time until the resulting bit string is equal to one of the entries in
1485
    //  SBSYMCODES. The resulting value, which is IDI, is the index of the entry in SBSYMCODES that is read.
1486
    //  If SBHUFF is 0, decode a value using the IAID integer arithmetic decoding procedure (see Annex A). Set IDI to the
1487
    //  resulting value."
1488
6
    auto read_symbol_id = [&]() -> ErrorOr<u32> {
1489
3
        if (inputs.uses_huffman_encoding)
1490
3
            return inputs.symbol_id_table->read_symbol_non_oob(*bit_stream);
1491
0
        return text_contexts->id_decoder.decode(*decoder);
1492
3
    };
1493
1494
    // 6.4.11.1 Symbol instance refinement delta width
1495
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFRDW.
1496
    //  If SBHUFF is 0, decode a value using the IARDW integer arithmetic decoding procedure (see Annex A)."
1497
6
    auto read_refinement_delta_width = [&]() -> ErrorOr<i32> {
1498
0
        if (inputs.uses_huffman_encoding)
1499
0
            return inputs.refinement_delta_width_table->read_symbol_non_oob(*bit_stream);
1500
0
        return text_contexts->refinement_delta_width_decoder.decode_non_oob(*decoder);
1501
0
    };
1502
1503
    // 6.4.11.2 Symbol instance refinement delta height
1504
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFRDH.
1505
    //  If SBHUFF is 0, decode a value using the IARDH integer arithmetic decoding procedure (see Annex A)."
1506
6
    auto read_refinement_delta_height = [&]() -> ErrorOr<i32> {
1507
0
        if (inputs.uses_huffman_encoding)
1508
0
            return inputs.refinement_delta_height_table->read_symbol_non_oob(*bit_stream);
1509
0
        return text_contexts->refinement_delta_height_decoder.decode_non_oob(*decoder);
1510
0
    };
1511
1512
    // 6.4.11.3 Symbol instance refinement X offset
1513
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFRDX.
1514
    //  If SBHUFF is 0, decode a value using the IARDX integer arithmetic decoding procedure (see Annex A)."
1515
6
    auto read_refinement_x_offset = [&]() -> ErrorOr<i32> {
1516
0
        if (inputs.uses_huffman_encoding)
1517
0
            return inputs.refinement_x_offset_table->read_symbol_non_oob(*bit_stream);
1518
0
        return text_contexts->refinement_x_offset_decoder.decode_non_oob(*decoder);
1519
0
    };
1520
1521
    // 6.4.11.4 Symbol instance refinement Y offset
1522
    // "If SBHUFF is 1, decode a value using the Huffman table specified by SBHUFFRDY.
1523
    //  If SBHUFF is 0, decode a value using the IARDY integer arithmetic decoding procedure (see Annex A)."
1524
6
    auto read_refinement_y_offset = [&]() -> ErrorOr<i32> {
1525
0
        if (inputs.uses_huffman_encoding)
1526
0
            return inputs.refinement_y_offset_table->read_symbol_non_oob(*bit_stream);
1527
0
        return text_contexts->refinement_y_offset_decoder.decode_non_oob(*decoder);
1528
0
    };
1529
1530
    // 6.4.11 Symbol instance bitmap
1531
6
    Optional<BilevelSubImage> refinement_result;
1532
6
    auto read_bitmap = [&](u32 id) -> ErrorOr<BilevelSubImage const*> {
1533
0
        if (id >= inputs.symbols.size())
1534
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Symbol ID out of range");
1535
0
        auto const& symbol = inputs.symbols[id];
1536
1537
0
        bool has_refinement_image = false; // "R_I" in spec.
1538
0
        if (inputs.uses_refinement_coding) {
1539
            // "• If SBHUFF is 1, then read one bit and set RI to the value of that bit.
1540
            //  • If SBHUFF is 0, then decode one bit using the IARI integer arithmetic decoding procedure and set RI to the value of that bit."
1541
0
            if (inputs.uses_huffman_encoding)
1542
0
                has_refinement_image = TRY(bit_stream->read_bit());
1543
0
            else
1544
0
                has_refinement_image = TRY(text_contexts->has_refinement_image_decoder.decode_non_oob(*decoder));
1545
0
        }
1546
1547
        // "If RI is 0 then set the symbol instance bitmap IBI to SBSYMS[IDI]."
1548
0
        if (!has_refinement_image)
1549
0
            return &symbol;
1550
1551
0
        auto refinement_delta_width = TRY(read_refinement_delta_width());
1552
0
        auto refinement_delta_height = TRY(read_refinement_delta_height());
1553
0
        auto refinement_x_offset = TRY(read_refinement_x_offset());
1554
0
        auto refinement_y_offset = TRY(read_refinement_y_offset());
1555
1556
0
        MQArithmeticDecoder* refinement_decoder = nullptr;
1557
0
        Optional<MQArithmeticDecoder> huffman_refinement_decoder;
1558
0
        ByteBuffer huffman_refinement_data;
1559
0
        if (inputs.uses_huffman_encoding) {
1560
0
            auto data_size = TRY(inputs.refinement_size_table->read_symbol_non_oob(*bit_stream));
1561
0
            bit_stream->align_to_byte_boundary();
1562
0
            huffman_refinement_data = TRY(ByteBuffer::create_uninitialized(data_size));
1563
0
            TRY(bit_stream->read_until_filled(huffman_refinement_data));
1564
0
            huffman_refinement_decoder = TRY(MQArithmeticDecoder::initialize(huffman_refinement_data));
1565
0
            refinement_decoder = &huffman_refinement_decoder.value();
1566
0
        } else {
1567
0
            refinement_decoder = decoder;
1568
0
        }
1569
1570
0
        dbgln_if(JBIG2_DEBUG, "refinement delta width: {}, refinement delta height: {}, refinement x offset: {}, refinement y offset: {}", refinement_delta_width, refinement_delta_height, refinement_x_offset, refinement_y_offset);
1571
1572
        // Table 12 – Parameters used to decode a symbol instance's bitmap using refinement
1573
0
        if (symbol.width() > static_cast<u32>(INT32_MAX) || static_cast<i32>(symbol.width()) + refinement_delta_width < 0)
1574
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Refinement width out of bounds");
1575
0
        if (symbol.height() > static_cast<u32>(INT32_MAX) || static_cast<i32>(symbol.height()) + refinement_delta_height < 0)
1576
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Refinement height out of bounds");
1577
1578
0
        GenericRefinementRegionDecodingInputParameters refinement_inputs;
1579
0
        refinement_inputs.region_width = symbol.width() + refinement_delta_width;
1580
0
        refinement_inputs.region_height = symbol.height() + refinement_delta_height;
1581
0
        refinement_inputs.gr_template = inputs.refinement_template;
1582
0
        refinement_inputs.reference_bitmap = &symbol;
1583
0
        refinement_inputs.reference_x_offset = floor_div(refinement_delta_width, 2) + refinement_x_offset;
1584
0
        refinement_inputs.reference_y_offset = floor_div(refinement_delta_height, 2) + refinement_y_offset;
1585
0
        refinement_inputs.is_typical_prediction_used = false;
1586
0
        refinement_inputs.adaptive_template_pixels = inputs.refinement_adaptive_template_pixels;
1587
0
        auto result = TRY(generic_refinement_region_decoding_procedure(refinement_inputs, *refinement_decoder, refinement_contexts.value()));
1588
0
        refinement_result = result->as_subbitmap();
1589
0
        return &refinement_result.value();
1590
0
    };
1591
1592
    // 6.4.5 Decoding the text region
1593
1594
    // "1) Fill a bitmap SBREG, of the size given by SBW and SBH, with the SBDEFPIXEL value."
1595
6
    auto result = TRY(BilevelImage::create(inputs.region_width, inputs.region_height));
1596
6
    result->fill(inputs.default_pixel != 0);
1597
1598
    // "2) Decode the initial STRIPT value as described in 6.4.6. Negate the decoded value and assign this negated value to the variable STRIPT.
1599
    //     Assign the value 0 to FIRSTS. Assign the value 0 to NINSTANCES."
1600
6
    i32 strip_t = -TRY(read_delta_t());
1601
6
    i32 first_s = 0;
1602
6
    u32 n_instances = 0;
1603
1604
    // "3) If COLEXTFLAG is 1, decode the colour section as described in 6.4.12."
1605
    // FIXME: Implement support for colors one day.
1606
1607
    // "4) Decode each strip as follows:
1608
    //      a) If NINSTANCES is equal to SBNUMINSTANCES then there are no more strips to decode,
1609
    //         and the process of decoding the text region is complete; proceed to step 4)."
1610
    // NOTE: The spec means "proceed to step 5)" at the end of 4a).
1611
6
    while (n_instances < inputs.number_of_instances) {
1612
        // "b) Decode the strip's delta T value as described in 6.4.6. Let DT be the decoded value. Set:
1613
        //         STRIPT = STRIPT + DT"
1614
3
        i32 delta_t = TRY(read_delta_t());
1615
3
        strip_t += delta_t;
1616
1617
3
        i32 cur_s;
1618
3
        bool is_first_symbol = true;
1619
3
        while (true) {
1620
            // "c) Decode each symbol instance in the strip as follows:
1621
            //      i) If the current symbol instance is the first symbol instance in the strip, then decode the first
1622
            //         symbol instance's S coordinate as described in 6.4.7. Let DFS be the decoded value. Set:
1623
            //              FIRSTS = FIRSTS + DFS
1624
            //              CURS = FIRSTS
1625
            //      ii) Otherwise, if the current symbol instance is not the first symbol instance in the strip, decode
1626
            //          the symbol instance's S coordinate as described in 6.4.8. If the result of this decoding is OOB
1627
            //          then the last symbol instance of the strip has been decoded; proceed to step 3 d). Otherwise, let
1628
            //          IDS be the decoded value. Set:
1629
            //              CURS = CURS + IDS + SBDSOFFSET"
1630
            // NOTE: The spec means "proceed to step 4 d)" in 4c ii).
1631
3
            if (is_first_symbol) {
1632
3
                i32 delta_first_s = TRY(read_first_s());
1633
3
                first_s += delta_first_s;
1634
3
                cur_s = first_s;
1635
3
                is_first_symbol = false;
1636
3
            } else {
1637
0
                auto subsequent_s = TRY(read_subsequent_s());
1638
0
                if (!subsequent_s.has_value())
1639
0
                    break;
1640
0
                i32 instance_delta_s = subsequent_s.value();
1641
0
                cur_s += instance_delta_s + inputs.delta_s_offset;
1642
0
            }
1643
1644
            //     "iii) Decode the symbol instance's T coordinate as described in 6.4.9. Let CURT be the decoded value. Set:
1645
            //              TI = STRIPT + CURT"
1646
3
            i32 cur_t = TRY(read_instance_t());
1647
3
            i32 t_instance = strip_t + cur_t;
1648
1649
            //     "iv) Decode the symbol instance's symbol ID as described in 6.4.10. Let IDI be the decoded value."
1650
3
            u32 id = TRY(read_symbol_id());
1651
1652
            //     "v) Determine the symbol instance's bitmap IBI as described in 6.4.11. The width and height of this
1653
            //         bitmap shall be denoted as WI and HI respectively."
1654
0
            auto const& symbol = *TRY(read_bitmap(id));
1655
1656
            //     "vi) Update CURS as follows:
1657
            //      • If TRANSPOSED is 0, and REFCORNER is TOPRIGHT or BOTTOMRIGHT, set:
1658
            //              CURS = CURS + WI – 1
1659
            //      • If TRANSPOSED is 1, and REFCORNER is BOTTOMLEFT or BOTTOMRIGHT, set:
1660
            //              CURS = CURS + HI – 1
1661
            //      • Otherwise, do not change CURS in this step."
1662
0
            using enum JBIG2::ReferenceCorner;
1663
0
            if (!inputs.is_transposed && (inputs.reference_corner == TopRight || inputs.reference_corner == BottomRight))
1664
0
                cur_s += symbol.width() - 1;
1665
0
            if (inputs.is_transposed && (inputs.reference_corner == BottomLeft || inputs.reference_corner == BottomRight))
1666
0
                cur_s += symbol.height() - 1;
1667
1668
            //     "vii) Set:
1669
            //              SI = CURS"
1670
0
            auto s_instance = cur_s;
1671
1672
            //     "viii) Determine the location of the symbol instance bitmap with respect to SBREG as follows:
1673
            //          • If TRANSPOSED is 0, then:
1674
            //              – If REFCORNER is TOPLEFT then the top left pixel of the symbol instance bitmap
1675
            //                IBI shall be placed at SBREG[SI, TI].
1676
            //              – If REFCORNER is TOPRIGHT then the top right pixel of the symbol instance
1677
            //                bitmap IBI shall be placed at SBREG[SI, TI].
1678
            //              – If REFCORNER is BOTTOMLEFT then the bottom left pixel of the symbol
1679
            //                instance bitmap IBI shall be placed at SBREG[SI, TI].
1680
            //              – If REFCORNER is BOTTOMRIGHT then the bottom right pixel of the symbol
1681
            //                instance bitmap IBI shall be placed at SBREG[SI, TI].
1682
            //          • If TRANSPOSED is 1, then:
1683
            //              – If REFCORNER is TOPLEFT then the top left pixel of the symbol instance bitmap
1684
            //                IBI shall be placed at SBREG[TI, SI].
1685
            //              – If REFCORNER is TOPRIGHT then the top right pixel of the symbol instance
1686
            //                bitmap IBI shall be placed at SBREG[TI, SI].
1687
            //              – If REFCORNER is BOTTOMLEFT then the bottom left pixel of the symbol
1688
            //                instance bitmap IBI shall be placed at SBREG[TI, SI].
1689
            //              – If REFCORNER is BOTTOMRIGHT then the bottom right pixel of the symbol
1690
            //                instance bitmap IBI shall be placed at SBREG[TI, SI].
1691
            //          If any part of IBI, when placed at this location, lies outside the bounds of SBREG, then ignore
1692
            //          this part of IBI in step 3 c) ix)."
1693
            // NOTE: The spec means "ignore this part of IBI in step 3 c) x)" in 3c viii)'s last sentence.
1694
0
            if (inputs.is_transposed)
1695
0
                swap(s_instance, t_instance);
1696
0
            if (inputs.reference_corner == TopRight || inputs.reference_corner == BottomRight)
1697
0
                s_instance -= symbol.width() - 1;
1698
0
            if (inputs.reference_corner == BottomLeft || inputs.reference_corner == BottomRight)
1699
0
                t_instance -= symbol.height() - 1;
1700
1701
            //     "ix) If COLEXTFLAG is 1, set the colour specified by SBCOLS[SBFGCOLID[NINSTANCES]]
1702
            //          to the foreground colour of the symbol instance bitmap IBI."
1703
            // FIXME: Implement support for colors one day.
1704
1705
            //     "x) Draw IBI into SBREG. Combine each pixel of IBI with the current value of the corresponding
1706
            //         pixel in SBREG, using the combination operator specified by SBCOMBOP. Write the results
1707
            //         of each combination into that pixel in SBREG."
1708
0
            dbgln_if(JBIG2_DEBUG, "combining symbol {} ({}x{}) at ({}, {}) with operator {}", id, symbol.width(), symbol.height(), s_instance, t_instance, (int)inputs.operator_);
1709
0
            symbol.composite_onto(*result, { s_instance, t_instance }, to_composition_type(inputs.operator_));
1710
1711
            //     "xi) Update CURS as follows:
1712
            //          • If TRANSPOSED is 0, and REFCORNER is TOPLEFT or BOTTOMLEFT, set:
1713
            //              CURS = CURS + WI – 1
1714
            //          • If TRANSPOSED is 1, and REFCORNER is TOPLEFT or TOPRIGHT, set:
1715
            //              CURS = CURS + HI – 1
1716
            //          • Otherwise, do not change CURS in this step."
1717
0
            if (!inputs.is_transposed && (inputs.reference_corner == TopLeft || inputs.reference_corner == BottomLeft))
1718
0
                cur_s += symbol.width() - 1;
1719
0
            if (inputs.is_transposed && (inputs.reference_corner == TopLeft || inputs.reference_corner == TopRight))
1720
0
                cur_s += symbol.height() - 1;
1721
1722
            //      "xii) Set:
1723
            //              NINSTANCES = NINSTANCES + 1"
1724
0
            ++n_instances;
1725
0
        }
1726
        //  "d) When the strip has been completely decoded, decode the next strip."
1727
        // (Done in the next loop iteration.)
1728
3
    }
1729
1730
    //  "5) After all the strips have been decoded, the current contents of SBREG are the results that shall be
1731
    //      obtained by every decoder, whether it performs this exact sequence of steps or not."
1732
3
    return result;
1733
6
}
1734
1735
// 6.5.2 Input parameters
1736
// Table 13 – Parameters for the symbol dictionary decoding procedure
1737
struct SymbolDictionaryDecodingInputParameters {
1738
1739
    bool uses_huffman_encoding { false };               // "SDHUFF" in spec.
1740
    bool uses_refinement_or_aggregate_coding { false }; // "SDREFAGG" in spec.
1741
1742
    Vector<BilevelSubImage> input_symbols; // "SDNUMINSYMS", "SDINSYMS" in spec.
1743
1744
    u32 number_of_new_symbols { 0 };      // "SDNUMNEWSYMS" in spec.
1745
    u32 number_of_exported_symbols { 0 }; // "SDNUMEXSYMS" in spec.
1746
1747
    // Only set if uses_huffman_encoding is true.
1748
    JBIG2::HuffmanTable const* delta_height_table { nullptr };               // "SDHUFFDH" in spec.
1749
    JBIG2::HuffmanTable const* delta_width_table { nullptr };                // "SDHUFFDW" in spec.
1750
    JBIG2::HuffmanTable const* bitmap_size_table { nullptr };                // "SDHUFFBMSIZE" in spec.
1751
    JBIG2::HuffmanTable const* number_of_symbol_instances_table { nullptr }; // "SDHUFFAGGINST" in spec.
1752
1753
    u8 symbol_template { 0 };                                           // "SDTEMPLATE" in spec.
1754
    Array<JBIG2::AdaptiveTemplatePixel, 4> adaptive_template_pixels {}; // "SDATX" / "SDATY" in spec.
1755
1756
    u8 refinement_template { 0 };                                                  // "SDRTEMPLATE" in spec;
1757
    Array<JBIG2::AdaptiveTemplatePixel, 2> refinement_adaptive_template_pixels {}; // "SDRATX" / "SDRATY" in spec.
1758
};
1759
1760
struct SymbolContexts {
1761
    JBIG2::ArithmeticIntegerDecoder delta_height_integer_decoder;       // "IADH" in spec.
1762
    JBIG2::ArithmeticIntegerDecoder delta_width_integer_decoder;        // "IADW" in spec.
1763
    JBIG2::ArithmeticIntegerDecoder number_of_symbol_instances_decoder; // "IAAI" in spec.
1764
    JBIG2::ArithmeticIntegerDecoder export_integer_decoder;             // "IAEX" in spec.
1765
};
1766
1767
// 6.5 Symbol Dictionary Decoding Procedure
1768
static ErrorOr<Vector<BilevelSubImage>> symbol_dictionary_decoding_procedure(SymbolDictionaryDecodingInputParameters const& inputs, Optional<JBIG2::GenericContexts>& generic_contexts, Optional<JBIG2::RefinementContexts>& refinement_contexts, ReadonlyBytes data)
1769
3
{
1770
3
    Optional<FixedMemoryStream> stream;
1771
3
    Optional<BigEndianInputBitStream> bit_stream;
1772
3
    Optional<MQArithmeticDecoder> decoder;
1773
3
    Optional<SymbolContexts> symbol_contexts;
1774
3
    if (inputs.uses_huffman_encoding) {
1775
2
        stream = FixedMemoryStream { data };
1776
2
        bit_stream = BigEndianInputBitStream { MaybeOwned { stream.value() } };
1777
2
    } else {
1778
1
        decoder = TRY(MQArithmeticDecoder::initialize(data));
1779
1
        symbol_contexts = SymbolContexts {};
1780
1
    }
1781
1782
    // 6.5.6 Height class delta height
1783
    // "If SDHUFF is 1, decode a value using the Huffman table specified by SDHUFFDH.
1784
    //  If SDHUFF is 0, decode a value using the IADH integer arithmetic decoding procedure (see Annex A)."
1785
3
    auto read_delta_height = [&]() -> ErrorOr<i32> {
1786
2
        if (inputs.uses_huffman_encoding)
1787
2
            return inputs.delta_height_table->read_symbol_non_oob(*bit_stream);
1788
0
        return symbol_contexts->delta_height_integer_decoder.decode_non_oob(*decoder);
1789
2
    };
1790
1791
    // 6.5.7 Delta width
1792
    // "If SDHUFF is 1, decode a value using the Huffman table specified by SDHUFFDW.
1793
    //  If SDHUFF is 0, decode a value using the IADW integer arithmetic decoding procedure (see Annex A).
1794
    //  In either case it is possible that the result of this decoding is the out-of-band value OOB."
1795
71
    auto read_delta_width = [&]() -> ErrorOr<Optional<i32>> {
1796
71
        if (inputs.uses_huffman_encoding)
1797
71
            return inputs.delta_width_table->read_symbol(*bit_stream);
1798
0
        return symbol_contexts->delta_width_integer_decoder.decode(*decoder);
1799
71
    };
1800
1801
    // 6.5.8 Symbol bitmap
1802
    // "This field is only present if SDHUFF = 0 or SDREFAGG = 1. This field takes one of two forms; SDREFAGG
1803
    //  determines which form is used."
1804
1805
    // 6.5.8.2.1 Number of symbol instances in aggregation
1806
    // If SDHUFF is 1, decode a value using the Huffman table specified by SDHUFFAGGINST.
1807
    // If SDHUFF is 0, decode a value using the IAAI integer arithmetic decoding procedure (see Annex A).
1808
3
    auto read_number_of_symbol_instances = [&]() -> ErrorOr<i32> {
1809
1
        if (inputs.uses_huffman_encoding)
1810
1
            return inputs.number_of_symbol_instances_table->read_symbol_non_oob(*bit_stream);
1811
0
        return symbol_contexts->number_of_symbol_instances_decoder.decode_non_oob(*decoder);
1812
1
    };
1813
1814
    // 6.5.8.1 Direct-coded symbol bitmap
1815
3
    Optional<TextContexts> text_contexts;
1816
1817
    // This belongs in 6.5.5 1) below, but also needs to be captured by read_symbol_bitmap here.
1818
3
    Vector<BilevelSubImage> new_symbols;
1819
1820
    // Likewise, this is from 6.5.8.2.3 below.
1821
3
    Vector<JBIG2::Code> symbol_id_codes;
1822
3
    Optional<JBIG2::HuffmanTable> symbol_id_table_storage;
1823
1824
3
    auto read_symbol_bitmap = [&](u32 width, u32 height) -> ErrorOr<NonnullRefPtr<BilevelImage>> {
1825
        // 6.5.8 Symbol bitmap
1826
1827
        // 6.5.8.1 Direct-coded symbol bitmap
1828
        // "If SDREFAGG is 0, then decode the symbol's bitmap using a generic region decoding procedure as described in 6.2.
1829
        //  Set the parameters to this decoding procedure as shown in Table 16."
1830
1
        if (!inputs.uses_refinement_or_aggregate_coding) {
1831
0
            VERIFY(!inputs.uses_huffman_encoding);
1832
1833
            // Table 16 – Parameters used to decode a symbol's bitmap using generic bitmap decoding
1834
0
            GenericRegionDecodingInputParameters generic_inputs;
1835
0
            generic_inputs.is_modified_modified_read = false;
1836
0
            generic_inputs.region_width = width;
1837
0
            generic_inputs.region_height = height;
1838
0
            generic_inputs.gb_template = inputs.symbol_template;
1839
0
            generic_inputs.is_extended_reference_template_used = false; // Missing from spec in table 16.
1840
0
            for (int i = 0; i < 4; ++i)
1841
0
                generic_inputs.adaptive_template_pixels[i] = inputs.adaptive_template_pixels[i];
1842
0
            generic_inputs.arithmetic_decoder = &decoder.value();
1843
0
            return generic_region_decoding_procedure(generic_inputs, generic_contexts);
1844
0
        }
1845
1846
        // 6.5.8.2 Refinement/aggregate-coded symbol bitmap
1847
        // "1) Decode the number of symbol instances contained in the aggregation, as specified in 6.5.8.2.1. Let REFAGGNINST be the value decoded."
1848
1
        auto number_of_symbol_instances = TRY(read_number_of_symbol_instances()); // "REFAGGNINST" in spec.
1849
1
        dbgln_if(JBIG2_DEBUG, "Number of symbol instances: {}", number_of_symbol_instances);
1850
1851
        // 6.5.8.2.3 Setting SBSYMCODES and SBSYMCODELEN
1852
1
        u32 number_of_symbols = inputs.input_symbols.size() + inputs.number_of_new_symbols; // "SBNUMSYMS" in spec.
1853
1
        u32 code_length = AK::ceil_log2(number_of_symbols);                                 // "SBSYMCODELEN" in spec.
1854
1
        JBIG2::HuffmanTable const* symbol_id_table { nullptr };
1855
1
        if (inputs.uses_huffman_encoding) {
1856
1
            if (!symbol_id_table_storage.has_value()) {
1857
1
                symbol_id_codes = TRY(JBIG2::uniform_huffman_codes(number_of_symbols, max(code_length, 1u)));
1858
1
                symbol_id_table_storage = JBIG2::HuffmanTable { symbol_id_codes };
1859
1
            }
1860
1
            symbol_id_table = &symbol_id_table_storage.value();
1861
1
        }
1862
1863
1
        if (!text_contexts.has_value())
1864
1
            text_contexts = TextContexts { code_length };
1865
1866
1
        if (number_of_symbol_instances > 1) {
1867
            // "2) If REFAGGNINST is greater than one, then decode the bitmap itself using a text region decoding procedure
1868
            //     as described in 6.4. Set the parameters to this decoding procedure as shown in Table 17."
1869
1870
            // Table 17 – Parameters used to decode a symbol's bitmap using refinement/aggregate decoding
1871
1
            TextRegionDecodingInputParameters text_inputs;
1872
1
            text_inputs.uses_huffman_encoding = inputs.uses_huffman_encoding;
1873
1
            text_inputs.uses_refinement_coding = true;
1874
1
            text_inputs.region_width = width;
1875
1
            text_inputs.region_height = height;
1876
1
            text_inputs.number_of_instances = number_of_symbol_instances;
1877
1
            text_inputs.size_of_symbol_instance_strips = 1;
1878
1
            text_inputs.symbol_id_table = symbol_id_table;
1879
1
            text_inputs.id_symbol_code_length = code_length;
1880
1881
            // 6.5.8.2.4 Setting SBSYMS
1882
            // "Set SBSYMS to an array of SDNUMINSYMS + NSYMSDECODED symbols, formed by concatenating the array
1883
            //  SDINSYMS and the first NSYMSDECODED entries of the array SDNEWSYMS."
1884
1
            text_inputs.symbols.extend(inputs.input_symbols);
1885
1
            text_inputs.symbols.extend(new_symbols);
1886
1887
1
            text_inputs.default_pixel = 0;
1888
1
            text_inputs.operator_ = JBIG2::CombinationOperator::Or;
1889
1
            text_inputs.is_transposed = false;
1890
1
            text_inputs.reference_corner = JBIG2::ReferenceCorner::TopLeft;
1891
1
            text_inputs.delta_s_offset = 0;
1892
1
            text_inputs.first_s_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_6));
1893
1
            text_inputs.subsequent_s_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_8));
1894
1
            text_inputs.delta_t_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_11));
1895
1
            text_inputs.refinement_delta_width_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15));
1896
1
            text_inputs.refinement_delta_height_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15));
1897
1
            text_inputs.refinement_x_offset_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15));
1898
1
            text_inputs.refinement_y_offset_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15));
1899
1
            text_inputs.refinement_size_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_1));
1900
1
            text_inputs.refinement_template = inputs.refinement_template;
1901
1
            text_inputs.refinement_adaptive_template_pixels = inputs.refinement_adaptive_template_pixels;
1902
1903
1
            if (inputs.uses_huffman_encoding)
1904
1
                text_inputs.bit_stream = &bit_stream.value();
1905
0
            else
1906
0
                text_inputs.arithmetic_decoder = &decoder.value();
1907
1
            return text_region_decoding_procedure(text_inputs, text_contexts, refinement_contexts);
1908
1
        }
1909
1910
        // "3) If REFAGGNINST is equal to one, then decode the bitmap as described in 6.5.8.2.2."
1911
1912
        // 6.5.8.2.2 Decoding a bitmap when REFAGGNINST = 1
1913
0
        if (number_of_symbol_instances != 1)
1914
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unexpected number of symbol instances");
1915
1916
0
        u32 symbol_id;
1917
0
        if (inputs.uses_huffman_encoding)
1918
0
            symbol_id = TRY(symbol_id_table->read_symbol_non_oob(*bit_stream));
1919
0
        else
1920
0
            symbol_id = text_contexts->id_decoder.decode(*decoder);
1921
1922
0
        i32 refinement_x_offset;
1923
0
        if (inputs.uses_huffman_encoding)
1924
0
            refinement_x_offset = TRY(TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15))->read_symbol_non_oob(*bit_stream));
1925
0
        else
1926
0
            refinement_x_offset = TRY(text_contexts->refinement_x_offset_decoder.decode_non_oob(*decoder));
1927
1928
0
        i32 refinement_y_offset;
1929
0
        if (inputs.uses_huffman_encoding)
1930
0
            refinement_y_offset = TRY(TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_15))->read_symbol_non_oob(*bit_stream));
1931
0
        else
1932
0
            refinement_y_offset = TRY(text_contexts->refinement_y_offset_decoder.decode_non_oob(*decoder));
1933
1934
0
        if (symbol_id >= inputs.input_symbols.size() && symbol_id - inputs.input_symbols.size() >= new_symbols.size())
1935
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Refinement/aggregate symbol ID out of range");
1936
1937
0
        MQArithmeticDecoder* refinement_decoder = nullptr;
1938
0
        Optional<MQArithmeticDecoder> huffman_refinement_decoder;
1939
0
        if (inputs.uses_huffman_encoding) {
1940
0
            auto data_size = TRY(TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_1))->read_symbol_non_oob(*bit_stream));
1941
0
            bit_stream->align_to_byte_boundary();
1942
0
            auto huffman_refinement_data = data.slice(stream->offset(), data_size);
1943
0
            TRY(stream->discard(data_size));
1944
0
            huffman_refinement_decoder = TRY(MQArithmeticDecoder::initialize(huffman_refinement_data));
1945
0
            refinement_decoder = &huffman_refinement_decoder.value();
1946
0
        } else {
1947
0
            refinement_decoder = &decoder.value();
1948
0
        }
1949
1950
0
        auto const& IBO = (symbol_id < inputs.input_symbols.size()) ? inputs.input_symbols[symbol_id] : new_symbols[symbol_id - inputs.input_symbols.size()];
1951
        // Table 18 – Parameters used to decode a symbol's bitmap when REFAGGNINST = 1
1952
0
        GenericRefinementRegionDecodingInputParameters refinement_inputs;
1953
0
        refinement_inputs.region_width = width;
1954
0
        refinement_inputs.region_height = height;
1955
0
        refinement_inputs.gr_template = inputs.refinement_template;
1956
0
        refinement_inputs.reference_bitmap = &IBO;
1957
0
        refinement_inputs.reference_x_offset = refinement_x_offset;
1958
0
        refinement_inputs.reference_y_offset = refinement_y_offset;
1959
0
        refinement_inputs.is_typical_prediction_used = false;
1960
0
        refinement_inputs.adaptive_template_pixels = inputs.refinement_adaptive_template_pixels;
1961
0
        return generic_refinement_region_decoding_procedure(refinement_inputs, *refinement_decoder, refinement_contexts.value());
1962
0
    };
1963
1964
3
    auto read_height_class_collective_bitmap = [&](u32 total_width, u32 height) -> ErrorOr<NonnullRefPtr<BilevelImage>> {
1965
        // 6.5.9 Height class collective bitmap
1966
        // "1) Read the size in bytes using the SDHUFFBMSIZE Huffman table. Let BMSIZE be the value decoded."
1967
1
        auto bitmap_size = TRY(inputs.bitmap_size_table->read_symbol_non_oob(*bit_stream));
1968
1969
        // "2) Skip over any bits remaining in the last byte read."
1970
1
        bit_stream->align_to_byte_boundary();
1971
1972
1
        NonnullRefPtr<BilevelImage> result = TRY([&]() -> ErrorOr<NonnullRefPtr<BilevelImage>> {
1973
            // "3) If BMSIZE is zero, then the bitmap is stored uncompressed, and the actual size in bytes is:
1974
            //
1975
            //         HCHEIGHT * ceil_div(TOTWIDTH, 8)
1976
            //
1977
            //     Decode the bitmap by reading this many bytes and treating it as HCHEIGHT rows of TOTWIDTH pixels, each
1978
            //     row padded out to a byte boundary with 0-7 0 bits."
1979
0
            if (bitmap_size == 0) {
1980
0
                auto result = TRY(BilevelImage::create(total_width, height));
1981
1982
0
                u32 padding_bits = align_up_to(total_width, 8) - total_width;
1983
0
                for (u32 y = 0; y < height; ++y) {
1984
0
                    for (u32 x = 0; x < total_width; ++x)
1985
0
                        result->set_bit(x, y, TRY(bit_stream->read_bit()));
1986
1987
0
                    if (TRY(bit_stream->read_bits(padding_bits)) != 0)
1988
0
                        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Non-zero padding bits in uncompressed height class collective bitmap");
1989
0
                }
1990
1991
0
                return result;
1992
0
            }
1993
            // "4) Otherwise, decode the bitmap using a generic bitmap decoding procedure as described in 6.2. Set the
1994
            //     parameters to this decoding procedure as shown in Table 19."
1995
            // Table 19 – Parameters used to decode a height class collective bitmap
1996
0
            GenericRegionDecodingInputParameters generic_inputs;
1997
0
            generic_inputs.is_modified_modified_read = true;
1998
0
            generic_inputs.region_width = total_width;
1999
0
            generic_inputs.region_height = height;
2000
2001
0
            ReadonlyBytes bitmap_data = data.slice(stream->offset(), bitmap_size);
2002
0
            TRY(stream->discard(bitmap_size));
2003
0
            FixedMemoryStream bitmap_stream { bitmap_data };
2004
0
            generic_inputs.stream = &bitmap_stream;
2005
0
            return generic_region_decoding_procedure(generic_inputs, generic_contexts);
2006
0
        }());
2007
2008
        // "5) Skip over any bits remaining in the last byte read."
2009
        // Already done above. This step allowed us to slice the data in step 4.
2010
2011
0
        return result;
2012
1
    };
2013
2014
    // 6.5.5 Decoding the symbol dictionary
2015
    // "1) Create an array SDNEWSYMS of bitmaps, having SDNUMNEWSYMS entries."
2016
    // Done above read_symbol_bitmap's definition.
2017
2018
    // "2) If SDHUFF is 1 and SDREFAGG is 0, create an array SDNEWSYMWIDTHS of integers, having SDNUMNEWSYMS entries."
2019
3
    Vector<u32> new_symbol_widths;
2020
2021
    // "3) Set:
2022
    //      HCHEIGHT = 0
2023
    //      NSYMSDECODED = 0"
2024
3
    u32 height_class_height = 0;
2025
3
    u32 number_of_symbols_decoded = 0;
2026
2027
    // "4) Decode each height class as follows:
2028
    //      a) If NSYMSDECODED == SDNUMNEWSYMS then all the symbols in the dictionary have been decoded; proceed to step 5)."
2029
3
    while (number_of_symbols_decoded < inputs.number_of_new_symbols) {
2030
        // "b) Decode the height class delta height as described in 6.5.6. Let HCDH be the decoded value. Set:
2031
        //      HCHEIGHT = HCEIGHT + HCDH
2032
        //      SYMWIDTH = 0
2033
        //      TOTWIDTH = 0
2034
        //      HCFIRSTSYM = NSYMSDECODED"
2035
        // NOTE: The spec means "HCHEIGHT" with "HCEIGHT" presumably.
2036
2
        i32 delta_height = TRY(read_delta_height());
2037
2
        height_class_height += delta_height;
2038
2
        u32 symbol_width = 0;
2039
2
        u32 total_width = 0;
2040
2
        u32 height_class_first_symbol = number_of_symbols_decoded;
2041
        // "c) Decode each symbol within the height class as follows:"
2042
71
        while (true) {
2043
            // "i) Decode the delta width for the symbol as described in 6.5.7."
2044
71
            auto opt_delta_width = TRY(read_delta_width());
2045
            // "   If the result of this decoding is OOB then all the symbols in this height class have been decoded; proceed to step 4 d)."
2046
71
            if (!opt_delta_width.has_value())
2047
1
                break;
2048
2049
70
            if (number_of_symbols_decoded >= inputs.number_of_new_symbols)
2050
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid bitstream, too many symbols decoded in symbol dictionary");
2051
2052
            // "   Otherwise let DW be the decoded value and set:"
2053
            //         SYMWIDTH = SYMWIDTH + DW
2054
            //         TOTWIDTH = TOTWIDTH + SYMWIDTH"
2055
70
            i32 delta_width = opt_delta_width.value();
2056
70
            symbol_width += delta_width;
2057
70
            total_width += symbol_width;
2058
2059
            // "ii) If SDHUFF is 0 or SDREFAGG is 1, then decode the symbol's bitmap as described in 6.5.8.
2060
            //      Let BS be the decoded bitmap (this bitmap has width SYMWIDTH and height HCHEIGHT). Set:
2061
            //          SDNEWSYMS[NSYMSDECODED] = BS"
2062
            // FIXME: Doing this eagerly is pretty wasteful. Decode on demand instead?
2063
70
            if (!inputs.uses_huffman_encoding || inputs.uses_refinement_or_aggregate_coding) {
2064
1
                auto bitmap = TRY(read_symbol_bitmap(symbol_width, height_class_height));
2065
0
                TRY(new_symbols.try_append(bitmap->as_subbitmap()));
2066
0
            }
2067
2068
            // "iii) If SDHUFF is 1 and SDREFAGG is 0, then set:
2069
            //      SDNEWSYMWIDTHS[NSYMSDECODED] = SYMWIDTH"
2070
70
            if (inputs.uses_huffman_encoding && !inputs.uses_refinement_or_aggregate_coding)
2071
69
                TRY(new_symbol_widths.try_append(symbol_width));
2072
2073
            // "iv) Set:
2074
            //      NSYMSDECODED = NSYMSDECODED + 1"
2075
69
            number_of_symbols_decoded++;
2076
69
        }
2077
2078
        // "d) If SDHUFF is 1 and SDREFAGG is 0, then decode the height class collective bitmap as described
2079
        //     in 6.5.9. Let BHC be the decoded bitmap. This bitmap has width TOTWIDTH and height
2080
        //     HCHEIGHT. Break up the bitmap BHC as follows to obtain the symbols
2081
        //     SDNEWSYMS[HCFIRSTSYM] through SDNEWSYMS[NSYMSDECODED – 1].
2082
        //
2083
        //     BHC contains the NSYMSDECODED – HCFIRSTSYM symbols concatenated left-to-right, with no
2084
        //     intervening gaps. For each I between HCFIRSTSYM and NSYMSDECODED – 1:
2085
        //
2086
        //     • the width of SDNEWSYMS[I] is the value of SDNEWSYMWIDTHS[I];
2087
        //     • the height of SDNEWSYMS[I] is HCHEIGHT; and
2088
        //     • the bitmap SDNEWSYMS[I] can be obtained by extracting the columns of BHC from:
2089
        //
2090
        //           sum(J=HCFIRSTSYM to I-1, SDNEWSYMWIDTHS[J]) to sum(J=HCFIRSTSYM to I-1, SDNEWSYMWIDTHS[J])^(-1)"
2091
        // Note: I think the spec means "...to sum(J=HCFIRSTSYM to I, SDNEWSYMWIDTHS[J]) - 1" in the last sentence.
2092
1
        if (inputs.uses_huffman_encoding && !inputs.uses_refinement_or_aggregate_coding) {
2093
1
            auto collective_bitmap = TRY(read_height_class_collective_bitmap(total_width, height_class_height));
2094
0
            u32 current_column = 0;
2095
0
            for (size_t i = height_class_first_symbol; i < number_of_symbols_decoded; ++i) {
2096
0
                auto width = new_symbol_widths[i];
2097
0
                IntRect symbol_rect { static_cast<int>(current_column), 0, static_cast<int>(width), static_cast<int>(height_class_height) };
2098
0
                TRY(new_symbols.try_append(collective_bitmap->subbitmap(symbol_rect)));
2099
0
                current_column += width;
2100
0
            }
2101
0
        }
2102
1
    }
2103
2104
    // "5) Determine which symbol bitmaps are exported from this symbol dictionary, as described in 6.5.10. These
2105
    //     bitmaps can be drawn from the symbols that are used as input to the symbol dictionary decoding
2106
    //     procedure as well as the new symbols produced by the decoding procedure."
2107
1
    Optional<JBIG2::HuffmanTable*> export_table;
2108
1
    if (inputs.uses_huffman_encoding)
2109
0
        export_table = TRY(JBIG2::HuffmanTable::standard_huffman_table(JBIG2::HuffmanTable::StandardTable::B_1));
2110
2111
    // 6.5.10 Exported symbols
2112
1
    Vector<bool> export_flags;
2113
1
    export_flags.resize(inputs.input_symbols.size() + inputs.number_of_new_symbols);
2114
2115
    // "1) Set:
2116
    //      EXINDEX = 0
2117
    //      CUREXFLAG = 0"
2118
1
    u32 exported_index = 0;
2119
1
    bool current_export_flag = false;
2120
2121
1
    do {
2122
        // "2) Decode a value using Table B.1 if SDHUFF is 1, or the IAEX integer arithmetic decoding procedure if
2123
        //  SDHUFF is 0. Let EXRUNLENGTH be the decoded value."
2124
1
        i32 export_run_length;
2125
1
        if (inputs.uses_huffman_encoding)
2126
0
            export_run_length = TRY(export_table.value()->read_symbol_non_oob(*bit_stream));
2127
1
        else
2128
1
            export_run_length = TRY(symbol_contexts->export_integer_decoder.decode_non_oob(*decoder));
2129
2130
        // "3) Set EXFLAGS[EXINDEX] through EXFLAGS[EXINDEX + EXRUNLENGTH – 1] to CUREXFLAG.
2131
        //  If EXRUNLENGTH = 0, then this step does not change any values."
2132
1
        for (int i = 0; i < export_run_length; ++i)
2133
0
            export_flags[exported_index + i] = current_export_flag;
2134
2135
        // "4) Set:
2136
        //      EXINDEX = EXINDEX + EXRUNLENGTH
2137
        //      CUREXFLAG = NOT(CUREXFLAG)"
2138
0
        exported_index += export_run_length;
2139
0
        current_export_flag = !current_export_flag;
2140
2141
        //  5) Repeat steps 2) through 4) until EXINDEX == SDNUMINSYMS + SDNUMNEWSYMS.
2142
0
    } while (exported_index < inputs.input_symbols.size() + inputs.number_of_new_symbols);
2143
2144
    // "6) The array EXFLAGS now contains 1 for each symbol that is exported from the dictionary, and 0 for each
2145
    //  symbol that is not exported."
2146
1
    Vector<BilevelSubImage> exported_symbols;
2147
2148
    // "7) Set:
2149
    //      I = 0
2150
    //      J = 0
2151
    //  8) For each value of I from 0 to SDNUMINSYMS + SDNUMNEWSYMS – 1,"
2152
0
    for (size_t i = 0; i < inputs.input_symbols.size() + inputs.number_of_new_symbols; ++i) {
2153
        // "if EXFLAGS[I] == 1 then perform the following steps:"
2154
0
        if (!export_flags[i])
2155
0
            continue;
2156
        //  "a) If I < SDNUMINSYMS then set:
2157
        //       SDEXSYMS[J] = SDINSYMS[I]
2158
        //       J = J + 1"
2159
0
        if (i < inputs.input_symbols.size())
2160
0
            TRY(exported_symbols.try_append(inputs.input_symbols[i]));
2161
2162
        //  "b) If I >= SDNUMINSYMS then set:
2163
        //       SDEXSYMS[J] = SDNEWSYMS[I – SDNUMINSYMS]
2164
        //       J = J + 1"
2165
0
        if (i >= inputs.input_symbols.size())
2166
0
            TRY(exported_symbols.try_append(move(new_symbols[i - inputs.input_symbols.size()])));
2167
0
    }
2168
2169
0
    if (exported_symbols.size() != inputs.number_of_exported_symbols)
2170
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unexpected number of exported symbols");
2171
2172
0
    return exported_symbols;
2173
0
}
2174
2175
// Annex C Gray-scale image decoding procedure
2176
2177
// C.2 Input parameters
2178
// Table C.1 – Parameters for the gray-scale image decoding procedure
2179
struct GrayscaleInputParameters {
2180
    bool uses_mmr { false }; // "GSMMR" in spec.
2181
2182
    Optional<BilevelImage const&> skip_pattern; // "GSUSESKIP" / "GSKIP" in spec.
2183
2184
    u8 bpp { 0 };         // "GSBPP" in spec.
2185
    u32 width { 0 };      // "GSW" in spec.
2186
    u32 height { 0 };     // "GSH" in spec.
2187
    u8 template_id { 0 }; // "GSTEMPLATE" in spec.
2188
2189
    // If uses_mmr is false, grayscale_image_decoding_procedure() reads data off this decoder.
2190
    MQArithmeticDecoder* arithmetic_decoder { nullptr };
2191
};
2192
2193
// C.5 Decoding the gray-scale image
2194
static ErrorOr<Vector<u64>> grayscale_image_decoding_procedure(GrayscaleInputParameters const& inputs, ReadonlyBytes data, Optional<JBIG2::GenericContexts>& contexts)
2195
0
{
2196
0
    VERIFY(inputs.bpp < 64);
2197
2198
    // Table C.4 – Parameters used to decode a bitplane of the gray-scale image
2199
0
    GenericRegionDecodingInputParameters generic_inputs;
2200
0
    generic_inputs.is_modified_modified_read = inputs.uses_mmr;
2201
0
    generic_inputs.region_width = inputs.width;
2202
0
    generic_inputs.region_height = inputs.height;
2203
0
    generic_inputs.gb_template = inputs.template_id;
2204
0
    generic_inputs.is_typical_prediction_used = false;
2205
0
    generic_inputs.is_extended_reference_template_used = false; // Missing from spec.
2206
0
    generic_inputs.skip_pattern = inputs.skip_pattern;
2207
0
    generic_inputs.adaptive_template_pixels[0].x = inputs.template_id <= 1 ? 3 : 2;
2208
0
    generic_inputs.adaptive_template_pixels[0].y = -1;
2209
0
    generic_inputs.adaptive_template_pixels[1].x = -3;
2210
0
    generic_inputs.adaptive_template_pixels[1].y = -1;
2211
0
    generic_inputs.adaptive_template_pixels[2].x = 2;
2212
0
    generic_inputs.adaptive_template_pixels[2].y = -2;
2213
0
    generic_inputs.adaptive_template_pixels[3].x = -2;
2214
0
    generic_inputs.adaptive_template_pixels[3].y = -2;
2215
0
    generic_inputs.arithmetic_decoder = inputs.arithmetic_decoder;
2216
2217
    // An MMR graymap is the only case where the size of the a generic region is not known in advance,
2218
    // and where the data is immediately followed by more MMR data. We need to have the MMR decoder
2219
    // skip the EOFB marker at the end, so that the following bitplanes can be decoded.
2220
    // See 6.2.6 Decoding using MMR coding.
2221
0
    generic_inputs.require_eof_after_mmr = GenericRegionDecodingInputParameters::RequireEOFBAfterMMR::Yes;
2222
2223
0
    FixedMemoryStream stream { data };
2224
0
    generic_inputs.stream = &stream;
2225
2226
    // "The gray-scale image is obtained by decoding GSBPP bitplanes. These bitplanes are denoted (from least significant to
2227
    //  most significant) GSPLANES[0], GSPLANES[1], . . . , GSPLANES[GSBPP – 1]. The bitplanes are Gray-coded, so
2228
    //  that each bitplane's true value is equal to its coded value XORed with the next-more-significant bitplane."
2229
0
    Vector<RefPtr<BilevelImage>> bitplanes;
2230
0
    bitplanes.resize(inputs.bpp);
2231
2232
    // "1) Decode GSPLANES[GSBPP – 1] using the generic region decoding procedure. The parameters to the
2233
    //     generic region decoding procedure are as shown in Table C.4."
2234
0
    bitplanes[inputs.bpp - 1] = TRY(generic_region_decoding_procedure(generic_inputs, contexts));
2235
2236
    // "2) Set J = GSBPP – 2."
2237
0
    int j = inputs.bpp - 2;
2238
2239
    // "3) While J >= 0, perform the following steps:"
2240
0
    while (j >= 0) {
2241
        // "a) Decode GSPLANES[J] using the generic region decoding procedure. The parameters to the generic
2242
        //     region decoding procedure are as shown in Table C.4."
2243
0
        bitplanes[j] = TRY(generic_region_decoding_procedure(generic_inputs, contexts));
2244
2245
        // "b) For each pixel (x, y) in GSPLANES[J], set:
2246
        //     GSPLANES[J][x, y] = GSPLANES[J + 1][x, y] XOR GSPLANES[J][x, y]"
2247
0
        bitplanes[j + 1]->composite_onto(*bitplanes[j], { 0, 0 }, BilevelImage::CompositionType::Xor);
2248
2249
        // "c) Set J = J – 1."
2250
0
        j = j - 1;
2251
0
    }
2252
2253
    // "4) For each (x, y), set:
2254
    //     GSVALS [x, y] = sum_{J = 0}^{GSBPP - 1} GSPLANES[J][x,y] × 2**J)"
2255
0
    Vector<u64> result;
2256
0
    result.resize(inputs.width * inputs.height);
2257
0
    for (u32 y = 0; y < inputs.height; ++y) {
2258
0
        for (u32 x = 0; x < inputs.width; ++x) {
2259
0
            u64 value = 0;
2260
0
            for (int j = 0; j < inputs.bpp; ++j) {
2261
0
                if (bitplanes[j]->get_bit(x, y))
2262
0
                    value |= 1 << j;
2263
0
            }
2264
0
            result[y * inputs.width + x] = value;
2265
0
        }
2266
0
    }
2267
0
    return result;
2268
0
}
2269
2270
// 6.6.2 Input parameters
2271
// Table 20 – Parameters for the halftone region decoding procedure
2272
struct HalftoneRegionDecodingInputParameters {
2273
    u32 region_width { 0 };                                                             // "HBW" in spec.
2274
    u32 region_height { 0 };                                                            // "HBH" in spec.
2275
    bool uses_mmr { false };                                                            // "HMMR" in spec.
2276
    u8 halftone_template { 0 };                                                         // "HTEMPLATE" in spec.
2277
    Vector<BilevelSubImage> patterns;                                                   // "HNUMPATS" / "HPATS" in spec.
2278
    bool default_pixel_value { false };                                                 // "HDEFPIXEL" in spec.
2279
    JBIG2::CombinationOperator combination_operator { JBIG2::CombinationOperator::Or }; // "HCOMBOP" in spec.
2280
    bool enable_skip { false };                                                         // "HENABLESKIP" in spec.
2281
    u32 grayscale_width { 0 };                                                          // "HGW" in spec.
2282
    u32 grayscale_height { 0 };                                                         // "HGH" in spec.
2283
    i32 grid_origin_x_offset { 0 };                                                     // "HGX" in spec.
2284
    i32 grid_origin_y_offset { 0 };                                                     // "HGY" in spec.
2285
    u16 grid_vector_x { 0 };                                                            // "HRY" in spec.
2286
    u16 grid_vector_y { 0 };                                                            // "HRX" in spec.
2287
    u8 pattern_width { 0 };                                                             // "HPW" in spec.
2288
    u8 pattern_height { 0 };                                                            // "HPH" in spec.
2289
};
2290
2291
// 6.6 Halftone Region Decoding Procedure
2292
static ErrorOr<NonnullRefPtr<BilevelImage>> halftone_region_decoding_procedure(HalftoneRegionDecodingInputParameters const& inputs, ReadonlyBytes data, Optional<JBIG2::GenericContexts>& contexts)
2293
0
{
2294
    // 6.6.5 Decoding the halftone region
2295
    // "1) Fill a bitmap HTREG, of the size given by HBW and HBH, with the HDEFPIXEL value."
2296
0
    auto result = TRY(BilevelImage::create(inputs.region_width, inputs.region_height));
2297
0
    result->fill(inputs.default_pixel_value);
2298
2299
    // "2) If HENABLESKIP equals 1, compute a bitmap HSKIP as shown in 6.6.5.1."
2300
0
    Optional<BilevelImage const&> skip_pattern;
2301
0
    RefPtr<BilevelImage> skip_pattern_storage;
2302
0
    if (inputs.enable_skip) {
2303
0
        skip_pattern_storage = TRY(JBIG2::halftone_skip_pattern({
2304
0
            inputs.region_width,
2305
0
            inputs.region_height,
2306
0
            inputs.grayscale_width,
2307
0
            inputs.grayscale_height,
2308
0
            inputs.grid_origin_x_offset,
2309
0
            inputs.grid_origin_y_offset,
2310
0
            inputs.grid_vector_x,
2311
0
            inputs.grid_vector_y,
2312
0
            inputs.pattern_width,
2313
0
            inputs.pattern_height,
2314
0
        }));
2315
0
        skip_pattern = *skip_pattern_storage;
2316
0
    }
2317
2318
    // "3) Set HBPP to ⌈log2 (HNUMPATS)⌉."
2319
0
    u32 bits_per_pattern = AK::ceil_log2(inputs.patterns.size());
2320
2321
    // "4) Decode an image GI of size HGW by HGH with HBPP bits per pixel using the gray-scale image decoding
2322
    //     procedure as described in Annex C. Set the parameters to this decoding procedure as shown in Table 23.
2323
    //     Let GI be the results of invoking this decoding procedure."
2324
0
    GrayscaleInputParameters grayscale_inputs;
2325
0
    grayscale_inputs.uses_mmr = inputs.uses_mmr;
2326
0
    grayscale_inputs.width = inputs.grayscale_width;
2327
0
    grayscale_inputs.height = inputs.grayscale_height;
2328
    // HBPP is a 32-bit word in Table 22, Table 23 says to copy it to GSBPP, and according to Table C.1 GSBPP is 6 bits.
2329
0
    if (bits_per_pattern >= 64)
2330
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Too many patterns for grayscale image decoding");
2331
0
    grayscale_inputs.bpp = bits_per_pattern;
2332
0
    grayscale_inputs.skip_pattern = skip_pattern;
2333
0
    grayscale_inputs.template_id = inputs.halftone_template;
2334
2335
0
    Optional<MQArithmeticDecoder> decoder;
2336
0
    if (!inputs.uses_mmr) {
2337
0
        decoder = TRY(MQArithmeticDecoder::initialize(data));
2338
0
        grayscale_inputs.arithmetic_decoder = &decoder.value();
2339
0
    }
2340
2341
0
    auto grayscale_image = TRY(grayscale_image_decoding_procedure(grayscale_inputs, data, contexts));
2342
2343
    // "5) Place sequentially the patterns corresponding to the values in GI into HTREG by the procedure described in 6.6.5.2.
2344
    //     The rendering procedure is illustrated in Figure 26. The outline of two patterns are marked by dotted boxes."
2345
0
    {
2346
        // 6.6.5.2 Rendering the patterns
2347
        // "Draw the patterns into HTREG using the following procedure:
2348
        //  1) For each value of m_g between 0 and HGH – 1, beginning from 0, perform the following steps."
2349
0
        for (int m_g = 0; m_g < (int)inputs.grayscale_height; ++m_g) {
2350
            // "a) For each value of n_g between 0 and HGW – 1, beginning from 0, perform the following steps."
2351
0
            for (int n_g = 0; n_g < (int)inputs.grayscale_width; ++n_g) {
2352
                // "i) Set:
2353
                //      x = (HGX + m_g × HRY + n_g × HRX) >> 8
2354
                //      y = (HGY + m_g × HRX – n_g × HRY) >> 8"
2355
0
                auto x = (inputs.grid_origin_x_offset + m_g * inputs.grid_vector_y + n_g * inputs.grid_vector_x) >> 8;
2356
0
                auto y = (inputs.grid_origin_y_offset + m_g * inputs.grid_vector_x - n_g * inputs.grid_vector_y) >> 8;
2357
2358
                // "ii) Draw the pattern HPATS[GI[n_g, m_g]] into HTREG such that its upper left pixel is at location (x, y) in HTREG.
2359
                //
2360
                //      A pattern is drawn into HTREG as follows. Each pixel of the pattern shall be combined with
2361
                //      the current value of the corresponding pixel in the halftone-coded bitmap, using the
2362
                //      combination operator specified by HCOMBOP. The results of each combination shall be
2363
                //      written into that pixel in the halftone-coded bitmap.
2364
                //
2365
                //      If any part of a decoded pattern, when placed at location (x, y) lies outside the actual halftone-
2366
                //      coded bitmap, then this part of the pattern shall be ignored in the process of combining the
2367
                //      pattern with the bitmap."
2368
0
                auto grayscale_value = grayscale_image[n_g + m_g * inputs.grayscale_width];
2369
0
                if (grayscale_value >= inputs.patterns.size())
2370
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Grayscale value out of range");
2371
0
                auto const& pattern = inputs.patterns[grayscale_value];
2372
0
                pattern.composite_onto(*result, { x, y }, to_composition_type(inputs.combination_operator));
2373
0
            }
2374
0
        }
2375
0
    }
2376
2377
    // "6) After all the patterns have been placed on the bitmap, the current contents of the halftone-coded bitmap are
2378
    //     the results that shall be obtained by every decoder, whether it performs this exact sequence of steps or not."
2379
0
    return result;
2380
0
}
2381
2382
// 6.7.2 Input parameters
2383
// Table 24 – Parameters for the pattern dictionary decoding procedure
2384
struct PatternDictionaryDecodingInputParameters {
2385
    bool uses_mmr { false }; // "HDMMR" in spec.
2386
    u32 width { 0 };         // "HDPW" in spec.
2387
    u32 height { 0 };        // "HDPH" in spec.
2388
    u32 gray_max { 0 };      // "GRAYMAX" in spec.
2389
    u8 hd_template { 0 };    // "HDTEMPLATE" in spec.
2390
};
2391
2392
// 6.7 Pattern Dictionary Decoding Procedure
2393
static ErrorOr<Vector<BilevelSubImage>> pattern_dictionary_decoding_procedure(PatternDictionaryDecodingInputParameters const& inputs, ReadonlyBytes data, Optional<JBIG2::GenericContexts>& contexts)
2394
3
{
2395
    // Table 27 – Parameters used to decode a pattern dictionary's collective bitmap
2396
3
    GenericRegionDecodingInputParameters generic_inputs;
2397
3
    generic_inputs.is_modified_modified_read = inputs.uses_mmr;
2398
3
    generic_inputs.region_width = (inputs.gray_max + 1) * inputs.width;
2399
3
    generic_inputs.region_height = inputs.height;
2400
3
    generic_inputs.gb_template = inputs.hd_template;
2401
3
    generic_inputs.is_typical_prediction_used = false;
2402
3
    generic_inputs.is_extended_reference_template_used = false; // Missing from spec in table 27.
2403
3
    generic_inputs.skip_pattern = OptionalNone {};
2404
3
    generic_inputs.adaptive_template_pixels[0].x = -inputs.width;
2405
3
    generic_inputs.adaptive_template_pixels[0].y = 0;
2406
3
    generic_inputs.adaptive_template_pixels[1].x = -3;
2407
3
    generic_inputs.adaptive_template_pixels[1].y = -1;
2408
3
    generic_inputs.adaptive_template_pixels[2].x = 2;
2409
3
    generic_inputs.adaptive_template_pixels[2].y = -2;
2410
3
    generic_inputs.adaptive_template_pixels[3].x = -2;
2411
3
    generic_inputs.adaptive_template_pixels[3].y = -2;
2412
2413
3
    Optional<FixedMemoryStream> stream;
2414
3
    Optional<MQArithmeticDecoder> decoder;
2415
3
    if (inputs.uses_mmr) {
2416
2
        stream = FixedMemoryStream { data };
2417
2
        generic_inputs.stream = &stream.value();
2418
2
    } else {
2419
1
        decoder = TRY(MQArithmeticDecoder::initialize(data));
2420
1
        generic_inputs.arithmetic_decoder = &decoder.value();
2421
1
    }
2422
2423
3
    auto bitmap = TRY(generic_region_decoding_procedure(generic_inputs, contexts));
2424
2425
1
    Vector<BilevelSubImage> patterns;
2426
4
    for (u32 gray = 0; gray <= inputs.gray_max; ++gray) {
2427
3
        int x = gray * inputs.width;
2428
3
        auto pattern = bitmap->subbitmap({ x, 0, static_cast<int>(inputs.width), static_cast<int>(inputs.height) });
2429
3
        patterns.append(move(pattern));
2430
3
    }
2431
2432
1
    dbgln_if(JBIG2_DEBUG, "Pattern dictionary: {} patterns", patterns.size());
2433
2434
1
    return patterns;
2435
3
}
2436
2437
static ErrorOr<void> decode_symbol_dictionary(JBIG2LoadingContext& context, SegmentData& segment)
2438
4
{
2439
    // 7.4.2 Symbol dictionary segment syntax
2440
2441
    // Retrieve referred-to symbols and tables. The spec does this later,
2442
    // but having the custom tables available is convenient for collecting huffman tables below.
2443
4
    Vector<BilevelSubImage> symbols;
2444
4
    Vector<JBIG2::HuffmanTable const*> custom_tables;
2445
4
    SegmentData const* last_referred_to_symbol_dictionary_segment = nullptr;
2446
4
    for (auto const* referred_to_segment : segment.referred_to_segments) {
2447
0
        dbgln_if(JBIG2_DEBUG, "Symbol segment refers to segment id {}", referred_to_segment->header.segment_number);
2448
0
        if (referred_to_segment->symbols.has_value()) {
2449
0
            symbols.extend(referred_to_segment->symbols.value());
2450
0
            last_referred_to_symbol_dictionary_segment = referred_to_segment;
2451
0
        } else if (referred_to_segment->huffman_table.has_value()) {
2452
0
            custom_tables.append(&referred_to_segment->huffman_table.value());
2453
0
        } else {
2454
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Symbol segment referred-to segment without symbols or huffman table");
2455
0
        }
2456
0
    }
2457
2458
    // 7.4.2.1 Symbol dictionary segment data header
2459
4
    FixedMemoryStream stream(segment.data);
2460
2461
    // 7.4.2.1.1 Symbol dictionary flags
2462
4
    u16 flags = TRY(stream.read_value<BigEndian<u16>>());
2463
3
    bool uses_huffman_encoding = (flags & 1) != 0;               // "SDHUFF" in spec.
2464
3
    bool uses_refinement_or_aggregate_coding = (flags & 2) != 0; // "SDREFAGG" in spec.
2465
2466
3
    auto huffman_tables = TRY(symbol_dictionary_huffman_tables_from_flags(flags, custom_tables));
2467
2468
3
    bool bitmap_coding_context_used = (flags >> 8) & 1;
2469
3
    if (uses_huffman_encoding && !uses_refinement_or_aggregate_coding && bitmap_coding_context_used)
2470
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid bitmap_coding_context_used");
2471
2472
3
    bool bitmap_coding_context_retained = (flags >> 9) & 1;
2473
3
    if (uses_huffman_encoding && !uses_refinement_or_aggregate_coding && bitmap_coding_context_retained)
2474
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid bitmap_coding_context_retained");
2475
2476
3
    u8 template_used = (flags >> 10) & 0b11; // "SDTEMPLATE" in spec.
2477
3
    if (uses_huffman_encoding && template_used != 0)
2478
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid template_used");
2479
2480
3
    u8 refinement_template_used = (flags >> 12) & 1; // "SDREFTEMPLATE" in spec.
2481
2482
    // Quirk: 042_22.jb2 does not set SDREFAGG but it does set SDREFTEMPLATE.
2483
3
    if (!uses_refinement_or_aggregate_coding && refinement_template_used != 0 && !context.allow_power_jbig2_quirks)
2484
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid refinement_template_used");
2485
2486
3
    if (flags & 0b1110'0000'0000'0000)
2487
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid symbol dictionary flags");
2488
2489
    // 7.4.2.1.2 Symbol dictionary AT flags
2490
3
    Array<JBIG2::AdaptiveTemplatePixel, 4> adaptive_template {};
2491
3
    if (!uses_huffman_encoding) {
2492
1
        int number_of_adaptive_template_pixels = template_used == 0 ? 4 : 1;
2493
2
        for (int i = 0; i < number_of_adaptive_template_pixels; ++i) {
2494
1
            adaptive_template[i].x = TRY(stream.read_value<i8>());
2495
1
            adaptive_template[i].y = TRY(stream.read_value<i8>());
2496
1
        }
2497
1
    }
2498
2499
    // 7.4.2.1.3 Symbol dictionary refinement AT flags
2500
3
    Array<JBIG2::AdaptiveTemplatePixel, 2> adaptive_refinement_template {};
2501
3
    if (uses_refinement_or_aggregate_coding && refinement_template_used == 0) {
2502
3
        for (size_t i = 0; i < adaptive_refinement_template.size(); ++i) {
2503
2
            adaptive_refinement_template[i].x = TRY(stream.read_value<i8>());
2504
2
            adaptive_refinement_template[i].y = TRY(stream.read_value<i8>());
2505
2
        }
2506
1
    }
2507
2508
    // 7.4.2.1.4 Number of exported symbols (SDNUMEXSYMS)
2509
3
    u32 number_of_exported_symbols = TRY(stream.read_value<BigEndian<u32>>());
2510
2511
    // 7.4.2.1.5 Number of new symbols (SDNUMNEWSYMS)
2512
3
    u32 number_of_new_symbols = TRY(stream.read_value<BigEndian<u32>>());
2513
2514
3
    u8 huffman_table_selection_for_height_differences = (flags >> 2) & 0b11; // "SDHUFFDH" in spec.
2515
3
    u8 huffman_table_selection_for_width_differences = (flags >> 4) & 0b11;  // "SDHUFFDW" in spec.
2516
3
    bool uses_user_supplied_size_table = (flags >> 6) & 1;                   // "SDHUFFBMSIZE" in spec.
2517
3
    bool uses_user_supplied_aggregate_table = (flags >> 7) & 1;              // "SDHUFFAGGINST" in spec.
2518
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: uses_huffman_encoding={}", uses_huffman_encoding);
2519
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: uses_refinement_or_aggregate_coding={}", uses_refinement_or_aggregate_coding);
2520
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: huffman_table_selection_for_height_differences={}", huffman_table_selection_for_height_differences);
2521
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: huffman_table_selection_for_width_differences={}", huffman_table_selection_for_width_differences);
2522
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: uses_user_supplied_size_table={}", uses_user_supplied_size_table);
2523
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: uses_user_supplied_aggregate_table={}", uses_user_supplied_aggregate_table);
2524
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: bitmap_coding_context_used={}", bitmap_coding_context_used);
2525
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: bitmap_coding_context_retained={}", bitmap_coding_context_retained);
2526
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: template_used={}", template_used);
2527
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: refinement_template_used={}", refinement_template_used);
2528
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: number_of_exported_symbols={}", number_of_exported_symbols);
2529
3
    dbgln_if(JBIG2_DEBUG, "Symbol dictionary: number_of_new_symbols={}", number_of_new_symbols);
2530
2531
    // 7.4.2.1.6 Symbol dictionary segment Huffman table selection
2532
    // We currently do this as part of handling 7.4.2.1.1 a bit further up.
2533
2534
    // 7.4.2.2 Decoding a symbol dictionary segment
2535
    // "1) Interpret its header, as described in 7.4.2.1."
2536
    // Done!
2537
2538
    // "2) Decode (or retrieve the results of decoding) any referred-to symbol dictionary and tables segments."
2539
    // Done further up already.
2540
2541
    // "3) If the "bitmap coding context used" bit in the header was 1, then, as described in E.3.8, set the arithmetic
2542
    //     coding statistics for the generic region and generic refinement region decoding procedures to the values
2543
    //     that they contained at the end of decoding the last-referred-to symbol dictionary segment. That symbol
2544
    //     dictionary segment's symbol dictionary segment data header must have had the "bitmap coding context
2545
    //     retained" bit equal to 1. The values of SDHUFF, SDREFAGG, SDTEMPLATE, SDRTEMPLATE,
2546
    //     and all of the AT locations (both direct and refinement) for this symbol dictionary must match the
2547
    //     corresponding values from the symbol dictionary whose context values are being used."
2548
3
    Optional<JBIG2::GenericContexts> generic_contexts;
2549
3
    Optional<JBIG2::RefinementContexts> refinement_contexts;
2550
3
    if (bitmap_coding_context_used) {
2551
0
        if (!last_referred_to_symbol_dictionary_segment)
2552
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but no last-referred-to symbol dictionary segment present");
2553
0
        if (!last_referred_to_symbol_dictionary_segment->retained_bitmap_coding_contexts.has_value())
2554
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but last-referred-to symbol dictionary segment did not set \"bitmap coding context retained\"");
2555
2556
0
        auto const& last_state = last_referred_to_symbol_dictionary_segment->retained_bitmap_coding_contexts.value();
2557
0
        if (last_state.used_huffman_encoding != uses_huffman_encoding)
2558
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDHUFF values do not match");
2559
0
        if (last_state.used_refinement_or_aggregate_coding != uses_refinement_or_aggregate_coding)
2560
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDREFAGG values do not match");
2561
0
        if (last_state.symbol_template != template_used)
2562
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDTEMPLATE values do not match");
2563
0
        if (last_state.refinement_template != refinement_template_used)
2564
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDRTEMPLATE values do not match");
2565
0
        if (last_state.adaptive_template_pixels != adaptive_template)
2566
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDATX / SDATY values do not match");
2567
0
        if (last_state.refinement_adaptive_template_pixels != adaptive_refinement_template)
2568
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: \"bitmap coding context used\" bit set, but SDRATX / SDRATY values do not match");
2569
2570
0
        generic_contexts = last_state.generic_contexts;
2571
0
        refinement_contexts = last_state.refinement_contexts;
2572
0
    }
2573
    // "4) If the "bitmap coding context used" bit in the header was 0, then, as described in E.3.7,
2574
    //     reset all the arithmetic coding statistics for the generic region and generic refinement region decoding procedures to zero."
2575
3
    else {
2576
3
        if (!uses_huffman_encoding)
2577
1
            generic_contexts = JBIG2::GenericContexts { template_used };
2578
3
        if (uses_refinement_or_aggregate_coding)
2579
1
            refinement_contexts = JBIG2::RefinementContexts { refinement_template_used };
2580
3
    }
2581
2582
    // "5) Reset the arithmetic coding statistics for all the contexts of all the arithmetic integer coders to zero."
2583
    // We currently do this by keeping the statistics as locals in symbol_dictionary_decoding_procedure().
2584
2585
    // "6) Invoke the symbol dictionary decoding procedure described in 6.5, with the parameters to the symbol dictionary decoding procedure set as shown in Table 31."
2586
3
    SymbolDictionaryDecodingInputParameters inputs;
2587
3
    inputs.uses_huffman_encoding = uses_huffman_encoding;
2588
3
    inputs.uses_refinement_or_aggregate_coding = uses_refinement_or_aggregate_coding;
2589
3
    inputs.input_symbols = move(symbols);
2590
3
    inputs.number_of_new_symbols = number_of_new_symbols;
2591
3
    inputs.number_of_exported_symbols = number_of_exported_symbols;
2592
3
    inputs.delta_height_table = huffman_tables.delta_height_table;
2593
3
    inputs.delta_width_table = huffman_tables.delta_width_table;
2594
3
    inputs.bitmap_size_table = huffman_tables.bitmap_size_table;
2595
3
    inputs.number_of_symbol_instances_table = huffman_tables.number_of_symbol_instances_table;
2596
3
    inputs.symbol_template = template_used;
2597
3
    inputs.adaptive_template_pixels = adaptive_template;
2598
3
    inputs.refinement_template = refinement_template_used;
2599
3
    inputs.refinement_adaptive_template_pixels = adaptive_refinement_template;
2600
3
    auto result = TRY(symbol_dictionary_decoding_procedure(inputs, generic_contexts, refinement_contexts, segment.data.slice(TRY(stream.tell()))));
2601
2602
    // "7) If the "bitmap coding context retained" bit in the header was 1, then, as described in E.3.8, preserve the current contents
2603
    //     of the arithmetic coding statistics for the generic region and generic refinement region decoding procedures."
2604
0
    if (bitmap_coding_context_retained) {
2605
0
        segment.retained_bitmap_coding_contexts = {
2606
0
            move(generic_contexts),
2607
0
            move(refinement_contexts),
2608
0
            uses_huffman_encoding,
2609
0
            uses_refinement_or_aggregate_coding,
2610
0
            template_used,
2611
0
            refinement_template_used,
2612
0
            adaptive_template,
2613
0
            adaptive_refinement_template,
2614
0
        };
2615
0
    }
2616
2617
0
    segment.symbols = move(result);
2618
2619
0
    return {};
2620
3
}
2621
2622
struct RegionResult {
2623
    JBIG2::RegionSegmentInformationField information_field;
2624
    NonnullRefPtr<BilevelImage> bitmap;
2625
};
2626
2627
static void handle_immediate_direct_region(JBIG2LoadingContext& context, RegionResult const& result)
2628
3
{
2629
    // 8.2 Page image composition, 5a.
2630
3
    result.bitmap->composite_onto(
2631
3
        *context.page.bits,
2632
3
        { result.information_field.x_location, result.information_field.y_location },
2633
3
        to_composition_type(result.information_field.external_combination_operator()));
2634
3
}
2635
2636
static ErrorOr<void> handle_intermediate_direct_region(JBIG2LoadingContext&, SegmentData& segment, RegionResult& result)
2637
0
{
2638
    // 8.2 Page image composition, 5b.
2639
0
    VERIFY(result.bitmap->width() == result.information_field.width);
2640
0
    VERIFY(result.bitmap->height() == result.information_field.height);
2641
0
    segment.aux_buffer = move(result.bitmap);
2642
0
    segment.aux_buffer_information_field = result.information_field;
2643
0
    return {};
2644
0
}
2645
2646
static ErrorOr<RegionResult> decode_text_region(JBIG2LoadingContext& context, SegmentData const& segment)
2647
7
{
2648
    // 7.4.3 Text region segment syntax
2649
7
    auto data = segment.data;
2650
7
    auto information_field = TRY(decode_region_segment_information_field(data));
2651
7
    data = data.slice(sizeof(information_field));
2652
2653
7
    dbgln_if(JBIG2_DEBUG, "Text region: width={}, height={}, x={}, y={}, flags={:#x}", information_field.width, information_field.height, information_field.x_location, information_field.y_location, information_field.flags);
2654
7
    TRY(validate_segment_combination_operator_consistency(context, information_field));
2655
2656
7
    FixedMemoryStream stream(data);
2657
2658
    // 7.4.3.1.1 Text region segment flags
2659
7
    u16 text_region_segment_flags = TRY(stream.read_value<BigEndian<u16>>());
2660
7
    bool uses_huffman_encoding = (text_region_segment_flags & 1) != 0;  // "SBHUFF" in spec.
2661
7
    bool uses_refinement_coding = (text_region_segment_flags >> 1) & 1; // "SBREFINE" in spec.
2662
7
    u8 log_strip_size = (text_region_segment_flags >> 2) & 3;           // "LOGSBSTRIPS" in spec.
2663
7
    u8 strip_size = 1u << log_strip_size;
2664
7
    u8 reference_corner = (text_region_segment_flags >> 4) & 3;     // "REFCORNER"
2665
7
    bool is_transposed = (text_region_segment_flags >> 6) & 1;      // "TRANSPOSED" in spec.
2666
7
    u8 combination_operator = (text_region_segment_flags >> 7) & 3; // "SBCOMBOP" in spec.
2667
7
    if (combination_operator > 4)
2668
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid text region combination operator");
2669
2670
7
    u8 default_pixel_value = (text_region_segment_flags >> 9) & 1; // "SBDEFPIXEL" in spec.
2671
2672
7
    u8 delta_s_offset_value = (text_region_segment_flags >> 10) & 0x1f; // "SBDSOFFSET" in spec.
2673
7
    i8 delta_s_offset = AK::sign_extend(delta_s_offset_value, 5);
2674
2675
7
    u8 refinement_template = (text_region_segment_flags >> 15) != 0; // "SBRTEMPLATE" in spec.
2676
7
    if (!uses_refinement_coding && refinement_template != 0)
2677
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid refinement_template");
2678
2679
    // Retrieve referred-to symbols and tables. The spec does this later, but the number of symbols is needed to decode the symbol ID Huffman table,
2680
    // and having the custom tables available is convenient for handling 7.4.3.1.2 below.
2681
7
    Vector<BilevelSubImage> symbols; // `symbols.size()` is "SBNUMSYMS" in spec.
2682
7
    Vector<JBIG2::HuffmanTable const*> custom_tables;
2683
7
    for (auto const* referred_to_segment : segment.referred_to_segments) {
2684
0
        dbgln_if(JBIG2_DEBUG, "Text segment refers to segment id {}", referred_to_segment->header.segment_number);
2685
0
        if (referred_to_segment->symbols.has_value())
2686
0
            symbols.extend(referred_to_segment->symbols.value());
2687
0
        else if (referred_to_segment->huffman_table.has_value())
2688
0
            custom_tables.append(&referred_to_segment->huffman_table.value());
2689
0
        else
2690
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Text segment referred-to segment without symbols or huffman table");
2691
0
    }
2692
2693
    // 7.4.3.1.2 Text region segment Huffman flags
2694
    // "This field is only present if SBHUFF is 1."
2695
7
    JBIG2::TextRegionHuffmanTables huffman_tables;
2696
7
    if (uses_huffman_encoding) {
2697
4
        u16 huffman_flags = TRY(stream.read_value<BigEndian<u16>>());
2698
2699
        // Quirk: 042_11.jb2 has refinement huffman table bits set but the SBREFINE bit is not set.
2700
4
        if (!uses_refinement_coding && (huffman_flags & 0x7fc0) != 0 && !context.allow_power_jbig2_quirks)
2701
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Huffman flags have refinement bits set but refinement bit is not set");
2702
2703
4
        huffman_tables = TRY(text_region_huffman_tables_from_flags(huffman_flags, custom_tables));
2704
2
    }
2705
2706
    // 7.4.3.1.3 Text region refinement AT flags
2707
    // "This field is only present if SBREFINE is 1 and SBRTEMPLATE is 0."
2708
5
    Array<JBIG2::AdaptiveTemplatePixel, 2> adaptive_refinement_template {};
2709
5
    if (uses_refinement_coding && refinement_template == 0) {
2710
6
        for (size_t i = 0; i < adaptive_refinement_template.size(); ++i) {
2711
4
            adaptive_refinement_template[i].x = TRY(stream.read_value<i8>());
2712
4
            adaptive_refinement_template[i].y = TRY(stream.read_value<i8>());
2713
4
        }
2714
2
    }
2715
2716
    // 7.4.3.1.4 Number of symbol instances (SBNUMINSTANCES)
2717
5
    u32 number_of_symbol_instances = TRY(stream.read_value<BigEndian<u32>>());
2718
2719
    // 7.4.3.1.5 Text region segment symbol ID Huffman decoding table
2720
    // "It is only present if SBHUFF is 1."
2721
5
    Vector<JBIG2::Code> symbol_id_codes;
2722
5
    Optional<JBIG2::HuffmanTable> symbol_id_table_storage;
2723
5
    JBIG2::HuffmanTable const* symbol_id_table = nullptr;
2724
5
    if (uses_huffman_encoding) {
2725
        // 7.4.3.1.7 Symbol ID Huffman table decoding
2726
2
        auto bit_stream = BigEndianInputBitStream { MaybeOwned { stream } };
2727
2728
        // "1) Read the code lengths for RUNCODE0 through RUNCODE34; each is stored as a four-bit value."
2729
2
        Array<u8, 35> code_length_lengths {};
2730
72
        for (size_t i = 0; i < code_length_lengths.size(); ++i)
2731
70
            code_length_lengths[i] = TRY(bit_stream.read_bits<u8>(4));
2732
2733
        // "2) Given the lengths, assign Huffman codes for RUNCODE0 through RUNCODE34 using the algorithm
2734
        //     in B.3."
2735
2
        auto code_length_codes = TRY(JBIG2::assign_huffman_codes(code_length_lengths));
2736
2737
2
        Vector<JBIG2::Code, 35> code_lengths_entries;
2738
70
        for (auto const& [i, length] : enumerate(code_length_lengths)) {
2739
70
            if (length == 0)
2740
24
                continue;
2741
46
            JBIG2::Code code { .prefix_length = length, .range_length = 0, .first_value = i, .code = code_length_codes[i] };
2742
46
            code_lengths_entries.append(code);
2743
46
        }
2744
2
        JBIG2::HuffmanTable code_lengths_table { code_lengths_entries };
2745
2746
2
        Vector<u8> code_lengths;
2747
2
        do {
2748
            // "3) Read a Huffman code using this assignment. This decodes into one of RUNCODE0 through
2749
            //     RUNCODE34. If it is RUNCODE32, read two additional bits. If it is RUNCODE33, read three
2750
            //     additional bits. If it is RUNCODE34, read seven additional bits."
2751
2
            auto code = TRY(code_lengths_table.read_symbol_non_oob(bit_stream));
2752
2
            u8 repeats = 0;
2753
2
            if (code == 32)
2754
0
                repeats = TRY(bit_stream.read_bits<u8>(2)) + 3;
2755
2
            else if (code == 33)
2756
1
                repeats = TRY(bit_stream.read_bits<u8>(3)) + 3;
2757
1
            else if (code == 34)
2758
0
                repeats = TRY(bit_stream.read_bits<u8>(7)) + 11;
2759
2760
            // "4) Interpret the RUNCODE code and the additional bits (if any) according to Table 29. This gives the
2761
            //     symbol ID code lengths for one or more symbols."
2762
            // Note: The spec means "Table 32" here.
2763
2
            if (code < 32) {
2764
1
                code_lengths.append(code);
2765
1
            } else if (code == 32) {
2766
0
                if (code_lengths.is_empty())
2767
0
                    return Error::from_string_literal("JBIG2ImageDecoderPlugin: RUNCODE32 without previous code");
2768
0
                auto last_value = code_lengths.last();
2769
0
                for (size_t i = 0; i < repeats; ++i)
2770
0
                    code_lengths.append(last_value);
2771
1
            } else if (code == 33 || code == 34) {
2772
6
                for (size_t i = 0; i < repeats; ++i)
2773
5
                    code_lengths.append(0);
2774
1
            }
2775
2776
            // "5) Repeat steps 3) and 4) until the symbol ID code lengths for all SBNUMSYMS symbols have been
2777
            //     determined."
2778
2
        } while (code_lengths.size() < symbols.size());
2779
2780
        // "6) Skip over the remaining bits in the last byte read, so that the actual text region decoding procedure begins
2781
        //     on a byte boundary."
2782
        // Done automatically by the BigEndianInputBitStream wrapping `stream`.
2783
2784
        // "7) Assign a Huffman code to each symbol by applying the algorithm in B.3 to the symbol ID code lengths
2785
        //     just decoded. The result is the symbol ID Huffman table SBSYMCODES."
2786
2
        auto codes = TRY(JBIG2::assign_huffman_codes(code_lengths));
2787
6
        for (auto const& [i, length] : enumerate(code_lengths)) {
2788
6
            if (length == 0)
2789
5
                continue;
2790
1
            JBIG2::Code code { .prefix_length = length, .range_length = 0, .first_value = i, .code = codes[i] };
2791
1
            symbol_id_codes.append(code);
2792
1
        }
2793
2
        symbol_id_table_storage = JBIG2::HuffmanTable { symbol_id_codes };
2794
2
        symbol_id_table = &symbol_id_table_storage.value();
2795
2
    }
2796
2797
5
    dbgln_if(JBIG2_DEBUG, "Text region: uses_huffman_encoding={}, uses_refinement_coding={}, strip_size={}, reference_corner={}, is_transposed={}", uses_huffman_encoding, uses_refinement_coding, strip_size, reference_corner, is_transposed);
2798
5
    dbgln_if(JBIG2_DEBUG, "Text region: combination_operator={}, default_pixel_value={}, delta_s_offset={}, refinement_template={}", combination_operator, default_pixel_value, delta_s_offset, refinement_template);
2799
5
    dbgln_if(JBIG2_DEBUG, "Text region: number_of_symbol_instances={}", number_of_symbol_instances);
2800
2801
    // 7.4.3.2 Decoding a text region segment
2802
    // "1) Interpret its header, as described in 7.4.3.1."
2803
    // Done!
2804
2805
    // "2) Decode (or retrieve the results of decoding) any referred-to symbol dictionary and tables segments."
2806
    // Done further up, since it's needed to decode the symbol ID Huffman table already.
2807
2808
    // "3) As described in E.3.7, reset all the arithmetic coding statistics to zero."
2809
5
    u32 id_symbol_code_length = AK::ceil_log2(symbols.size());
2810
5
    Optional<TextContexts> text_contexts;
2811
5
    if (!uses_huffman_encoding)
2812
3
        text_contexts = TextContexts { id_symbol_code_length };
2813
5
    Optional<JBIG2::RefinementContexts> refinement_contexts;
2814
5
    if (uses_refinement_coding)
2815
2
        refinement_contexts = JBIG2::RefinementContexts { refinement_template };
2816
2817
    // "4) Invoke the text region decoding procedure described in 6.4, with the parameters to the text region decoding procedure set as shown in Table 34."
2818
5
    TextRegionDecodingInputParameters inputs;
2819
5
    inputs.uses_huffman_encoding = uses_huffman_encoding;
2820
5
    inputs.uses_refinement_coding = uses_refinement_coding;
2821
5
    inputs.default_pixel = default_pixel_value;
2822
5
    inputs.operator_ = static_cast<JBIG2::CombinationOperator>(combination_operator);
2823
5
    inputs.is_transposed = is_transposed;
2824
5
    inputs.reference_corner = static_cast<JBIG2::ReferenceCorner>(reference_corner);
2825
5
    inputs.delta_s_offset = delta_s_offset;
2826
5
    inputs.region_width = information_field.width;
2827
5
    inputs.region_height = information_field.height;
2828
5
    inputs.number_of_instances = number_of_symbol_instances;
2829
5
    inputs.size_of_symbol_instance_strips = strip_size;
2830
5
    inputs.symbol_id_table = symbol_id_table;
2831
5
    inputs.id_symbol_code_length = id_symbol_code_length;
2832
5
    inputs.symbols = move(symbols);
2833
5
    inputs.first_s_table = huffman_tables.first_s_table;
2834
5
    inputs.subsequent_s_table = huffman_tables.subsequent_s_table;
2835
5
    inputs.delta_t_table = huffman_tables.delta_t_table;
2836
5
    inputs.refinement_delta_width_table = huffman_tables.refinement_delta_width_table;
2837
5
    inputs.refinement_delta_height_table = huffman_tables.refinement_delta_height_table;
2838
5
    inputs.refinement_x_offset_table = huffman_tables.refinement_x_offset_table;
2839
5
    inputs.refinement_y_offset_table = huffman_tables.refinement_y_offset_table;
2840
5
    inputs.refinement_size_table = huffman_tables.refinement_size_table;
2841
5
    inputs.refinement_template = refinement_template;
2842
5
    inputs.refinement_adaptive_template_pixels = adaptive_refinement_template;
2843
2844
5
    Optional<MQArithmeticDecoder> decoder;
2845
5
    Optional<BigEndianInputBitStream> bit_stream;
2846
5
    if (uses_huffman_encoding) {
2847
2
        bit_stream = BigEndianInputBitStream { MaybeOwned { stream } };
2848
2
        inputs.bit_stream = &bit_stream.value();
2849
3
    } else {
2850
6
        decoder = TRY(MQArithmeticDecoder::initialize(data.slice(TRY(stream.tell()))));
2851
6
        inputs.arithmetic_decoder = &decoder.value();
2852
6
    }
2853
2854
5
    auto result = TRY(text_region_decoding_procedure(inputs, text_contexts, refinement_contexts));
2855
3
    return RegionResult { .information_field = information_field, .bitmap = move(result) };
2856
5
}
2857
2858
static ErrorOr<void> decode_intermediate_text_region(JBIG2LoadingContext& context, SegmentData& segment)
2859
0
{
2860
0
    auto result = TRY(decode_text_region(context, segment));
2861
0
    return handle_intermediate_direct_region(context, segment, result);
2862
0
}
2863
2864
static ErrorOr<void> decode_immediate_text_region(JBIG2LoadingContext& context, SegmentData const& segment)
2865
7
{
2866
7
    auto result = TRY(decode_text_region(context, segment));
2867
3
    handle_immediate_direct_region(context, result);
2868
3
    return {};
2869
7
}
2870
2871
static ErrorOr<void> decode_immediate_lossless_text_region(JBIG2LoadingContext& context, SegmentData const& segment)
2872
0
{
2873
    // 7.4.3 Text region segment syntax
2874
    // "The data parts of all three of the text region segment types ("intermediate text region", "immediate text region" and
2875
    //  "immediate lossless text region") are coded identically, but are acted upon differently, see 8.2."
2876
    // But 8.2 only describes a difference between intermediate and immediate regions as far as I can tell,
2877
    // and calling the immediate text region handler for immediate lossless text regions seems to do the right thing (?).
2878
0
    return decode_immediate_text_region(context, segment);
2879
0
}
2880
2881
static ErrorOr<void> decode_pattern_dictionary(JBIG2LoadingContext&, SegmentData& segment)
2882
3
{
2883
    // 7.4.4 Pattern dictionary segment syntax
2884
3
    FixedMemoryStream stream(segment.data);
2885
2886
    // 7.4.4.1.1 Pattern dictionary flags
2887
3
    u8 flags = TRY(stream.read_value<u8>());
2888
3
    bool uses_mmr = flags & 1;
2889
3
    u8 hd_template = (flags >> 1) & 3;
2890
3
    if (uses_mmr && hd_template != 0)
2891
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid hd_template");
2892
3
    if (flags & 0b1111'1000)
2893
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid flags");
2894
2895
    // 7.4.4.1.2 Width of the patterns in the pattern dictionary (HDPW)
2896
3
    u8 width = TRY(stream.read_value<u8>());
2897
3
    if (width == 0)
2898
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid width");
2899
2900
    // 7.4.4.1.3 Height of the patterns in the pattern dictionary (HDPH)
2901
3
    u8 height = TRY(stream.read_value<u8>());
2902
3
    if (height == 0)
2903
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid height");
2904
2905
    // 7.4.4.1.4 Largest gray-scale value (GRAYMAX)
2906
3
    u32 gray_max = TRY(stream.read_value<BigEndian<u32>>());
2907
2908
    // 7.4.4.2 Decoding a pattern dictionary segment
2909
3
    dbgln_if(JBIG2_DEBUG, "Pattern dictionary: uses_mmr={}, hd_template={}, width={}, height={}, gray_max={}", uses_mmr, hd_template, width, height, gray_max);
2910
3
    auto data = segment.data.slice(TRY(stream.tell()));
2911
2912
    // "1) Interpret its header, as described in 7.4.4.1."
2913
    // Done!
2914
2915
    // "2) As described in E.3.7, reset all the arithmetic coding statistics to zero."
2916
3
    Optional<JBIG2::GenericContexts> contexts;
2917
3
    if (!uses_mmr)
2918
1
        contexts = JBIG2::GenericContexts { hd_template };
2919
2920
    // "3) Invoke the pattern dictionary decoding procedure described in 6.7, with the parameters to the pattern
2921
    //     dictionary decoding procedure set as shown in Table 35."
2922
3
    PatternDictionaryDecodingInputParameters inputs;
2923
3
    inputs.uses_mmr = uses_mmr;
2924
3
    inputs.width = width;
2925
3
    inputs.height = height;
2926
3
    inputs.gray_max = gray_max;
2927
3
    inputs.hd_template = hd_template;
2928
3
    auto result = TRY(pattern_dictionary_decoding_procedure(inputs, data, contexts));
2929
2930
1
    segment.patterns = move(result);
2931
2932
1
    return {};
2933
3
}
2934
2935
static ErrorOr<RegionResult> decode_halftone_region(JBIG2LoadingContext& context, SegmentData const& segment)
2936
0
{
2937
    // 7.4.5 Halftone region segment syntax
2938
0
    auto data = segment.data;
2939
0
    auto information_field = TRY(decode_region_segment_information_field(data));
2940
0
    data = data.slice(sizeof(information_field));
2941
2942
0
    dbgln_if(JBIG2_DEBUG, "Halftone region: width={}, height={}, x={}, y={}, flags={:#x}", information_field.width, information_field.height, information_field.x_location, information_field.y_location, information_field.flags);
2943
0
    TRY(validate_segment_combination_operator_consistency(context, information_field));
2944
2945
0
    FixedMemoryStream stream(data);
2946
2947
    // 7.4.5.1.1 Halftone region segment flags
2948
0
    u8 flags = TRY(stream.read_value<u8>());
2949
0
    bool uses_mmr = flags & 1;           // "HMMR" in spec.
2950
0
    u8 template_used = (flags >> 1) & 3; // "HTTEMPLATE" in spec.
2951
0
    if (uses_mmr && template_used != 0)
2952
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid template_used");
2953
0
    bool enable_skip = (flags >> 3) & 1;        // "HENABLESKIP" in spec.
2954
0
    u8 combination_operator = (flags >> 4) & 7; // "HCOMBOP" in spec.
2955
0
    if (combination_operator > 4)
2956
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid combination_operator");
2957
0
    bool default_pixel_value = (flags >> 7) & 1; // "HDEFPIXEL" in spec.
2958
2959
0
    dbgln_if(JBIG2_DEBUG, "Halftone region: uses_mmr={}, template_used={}, enable_skip={}, combination_operator={}, default_pixel_value={}", uses_mmr, template_used, enable_skip, combination_operator, default_pixel_value);
2960
2961
    // 7.4.5.1.2 Halftone grid position and size
2962
    // 7.4.5.1.2.1 Width of the gray-scale image (HGW)
2963
0
    u32 gray_width = TRY(stream.read_value<BigEndian<u32>>());
2964
2965
    // 7.4.5.1.2.2 Height of the gray-scale image (HGH)
2966
0
    u32 gray_height = TRY(stream.read_value<BigEndian<u32>>());
2967
2968
    // 7.4.5.1.2.3 Horizontal offset of the grid (HGX)
2969
0
    i32 grid_x = TRY(stream.read_value<BigEndian<i32>>());
2970
2971
    // 7.4.5.1.2.4 Vertical offset of the grid (HGY)
2972
0
    i32 grid_y = TRY(stream.read_value<BigEndian<i32>>());
2973
2974
    // 7.4.5.1.3 Halftone grid vector
2975
    // 7.4.5.1.3.1 Horizontal coordinate of the halftone grid vector (HRX)
2976
0
    u16 grid_vector_x = TRY(stream.read_value<BigEndian<u16>>());
2977
2978
    // 7.4.5.1.3.2 Vertical coordinate of the halftone grid vector (HRY)
2979
0
    u16 grid_vector_y = TRY(stream.read_value<BigEndian<u16>>());
2980
2981
0
    dbgln_if(JBIG2_DEBUG, "Halftone region: gray_width={}, gray_height={}, grid_x={}, grid_y={}, grid_vector_x={}, grid_vector_y={}", gray_width, gray_height, grid_x, grid_y, grid_vector_x, grid_vector_y);
2982
2983
    // 7.4.5.2 Decoding a halftone region segment
2984
    // "1) Interpret its header, as described in 7.4.5.1."
2985
    // Done!
2986
2987
    // "2) Decode (or retrieve the results of decoding) the referred-to pattern dictionary segment."
2988
0
    VERIFY(segment.referred_to_segments.size() == 1);
2989
0
    dbgln_if(JBIG2_DEBUG, "Halftone segment refers to segment id {}", segment.referred_to_segments[0]->header.segment_number);
2990
0
    Vector<BilevelSubImage> patterns = segment.referred_to_segments[0]->patterns.value();
2991
0
    if (patterns.is_empty())
2992
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Halftone segment without patterns");
2993
2994
    // "3) As described in E.3.7, reset all the arithmetic coding statistics to zero."
2995
0
    Optional<JBIG2::GenericContexts> contexts;
2996
0
    if (!uses_mmr)
2997
0
        contexts = JBIG2::GenericContexts { template_used };
2998
2999
    // "4) Invoke the halftone region decoding procedure described in 6.6, with the parameters to the halftone
3000
    //     region decoding procedure set as shown in Table 36."
3001
0
    data = data.slice(TRY(stream.tell()));
3002
0
    HalftoneRegionDecodingInputParameters inputs;
3003
0
    inputs.region_width = information_field.width;
3004
0
    inputs.region_height = information_field.height;
3005
0
    inputs.uses_mmr = uses_mmr;
3006
0
    inputs.halftone_template = template_used;
3007
0
    inputs.enable_skip = enable_skip;
3008
0
    inputs.combination_operator = static_cast<JBIG2::CombinationOperator>(combination_operator);
3009
0
    inputs.default_pixel_value = default_pixel_value;
3010
0
    inputs.grayscale_width = gray_width;
3011
0
    inputs.grayscale_height = gray_height;
3012
0
    inputs.grid_origin_x_offset = grid_x;
3013
0
    inputs.grid_origin_y_offset = grid_y;
3014
0
    inputs.grid_vector_x = grid_vector_x;
3015
0
    inputs.grid_vector_y = grid_vector_y;
3016
0
    inputs.patterns = move(patterns);
3017
0
    inputs.pattern_width = inputs.patterns[0].width();
3018
0
    inputs.pattern_height = inputs.patterns[0].height();
3019
0
    auto result = TRY(halftone_region_decoding_procedure(inputs, data, contexts));
3020
3021
0
    return RegionResult { .information_field = information_field, .bitmap = move(result) };
3022
0
}
3023
3024
static ErrorOr<void> decode_intermediate_halftone_region(JBIG2LoadingContext& context, SegmentData& segment)
3025
0
{
3026
0
    auto result = TRY(decode_halftone_region(context, segment));
3027
0
    return handle_intermediate_direct_region(context, segment, result);
3028
0
}
3029
3030
static ErrorOr<void> decode_immediate_halftone_region(JBIG2LoadingContext& context, SegmentData const& segment)
3031
0
{
3032
0
    auto result = TRY(decode_halftone_region(context, segment));
3033
0
    handle_immediate_direct_region(context, result);
3034
0
    return {};
3035
0
}
3036
3037
static ErrorOr<void> decode_immediate_lossless_halftone_region(JBIG2LoadingContext& context, SegmentData const& segment)
3038
0
{
3039
    // 7.4.5 Halftone region segment syntax
3040
    // "The data parts of all three of the halftone region segment types ("intermediate halftone region", "immediate halftone
3041
    //  region" and "immediate lossless halftone region") are coded identically, but are acted upon differently, see 8.2."
3042
    // But 8.2 only describes a difference between intermediate and immediate regions as far as I can tell,
3043
    // and calling the immediate halftone region handler for immediate lossless halftone regions seems to do the right thing (?).
3044
0
    return decode_immediate_halftone_region(context, segment);
3045
0
}
3046
3047
static ErrorOr<RegionResult> decode_generic_region(JBIG2LoadingContext& context, SegmentData const& segment)
3048
0
{
3049
    // 7.4.6 Generic region segment syntax
3050
0
    auto data = segment.data;
3051
0
    auto information_field = TRY(decode_region_segment_information_field(data));
3052
3053
    // "As a special case, as noted in 7.2.7, an immediate generic region segment may have an unknown length. In this case, it
3054
    //  is also possible that the segment may contain fewer rows of bitmap data than are indicated in the segment's region
3055
    //  segment information field.
3056
    //  In order for the decoder to correctly decode the segment, it needs to read the four-byte row count field, which is stored
3057
    //  in the last four bytes of the segment's data part. [...] The row count field contains the actual number of rows contained in
3058
    //  this segment; it must be no greater than the region segment bitmap height value in the segment's region segment
3059
    //  information field."
3060
    // scan_for_immediate_generic_region_size() made `data` the right size for this case, just need to get the rows from the end.
3061
0
    if (!segment.header.data_length.has_value()) {
3062
0
        auto last_four_bytes = data.slice_from_end(4);
3063
0
        u32 row_count = (last_four_bytes[0] << 24) | (last_four_bytes[1] << 16) | (last_four_bytes[2] << 8) | last_four_bytes[3];
3064
0
        if (row_count > information_field.height)
3065
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Row count after data for immediate generic region greater than region segment height");
3066
0
        if (row_count != information_field.height)
3067
0
            dbgln_if(JBIG2_DEBUG, "JBIG2ImageDecoderPlugin: Changing row count from {} to {}", information_field.height, row_count);
3068
0
        information_field.height = row_count;
3069
0
        data = data.slice(0, data.size() - 4);
3070
0
    }
3071
3072
0
    data = data.slice(sizeof(information_field));
3073
3074
0
    dbgln_if(JBIG2_DEBUG, "Generic region: width={}, height={}, x={}, y={}, flags={:#x}", information_field.width, information_field.height, information_field.x_location, information_field.y_location, information_field.flags);
3075
0
    TRY(validate_segment_combination_operator_consistency(context, information_field));
3076
3077
    // 7.4.6.2 Generic region segment flags
3078
0
    if (data.is_empty())
3079
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: No segment data");
3080
0
    u8 flags = data[0];
3081
0
    bool uses_mmr = (flags & 1) != 0;
3082
3083
    // "GBTEMPLATE"
3084
    // "If MMR is 1 then this field must contain the value zero."
3085
0
    u8 arithmetic_coding_template = (flags >> 1) & 3;
3086
0
    if (uses_mmr && arithmetic_coding_template != 0)
3087
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid GBTEMPLATE");
3088
3089
0
    bool typical_prediction_generic_decoding_on = (flags >> 3) & 1; // "TPGDON"; "TPGD" is short for "Typical Prediction for Generic Direct coding"
3090
0
    bool uses_extended_reference_template = (flags >> 4) & 1;       // "EXTTEMPLATE"
3091
0
    if (flags & 0b1110'0000)
3092
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid flags");
3093
0
    data = data.slice(sizeof(flags));
3094
3095
    // 7.4.6.3 Generic region segment AT flags
3096
0
    Array<JBIG2::AdaptiveTemplatePixel, 12> adaptive_template_pixels {};
3097
0
    if (!uses_mmr) {
3098
0
        dbgln_if(JBIG2_DEBUG, "Non-MMR generic region, GBTEMPLATE={} TPGDON={} EXTTEMPLATE={}", arithmetic_coding_template, typical_prediction_generic_decoding_on, uses_extended_reference_template);
3099
3100
0
        if (arithmetic_coding_template == 0 && uses_extended_reference_template) {
3101
            // This was added in T.88 Amendment 2 (https://www.itu.int/rec/T-REC-T.88-200306-S!Amd2/en) mid-2003.
3102
            // I haven't seen it being used in the wild, and the spec says "32-byte field as shown below" and then shows 24 bytes,
3103
            // so it's not clear how much data to read.
3104
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: GBTEMPLATE=0 EXTTEMPLATE=1 not yet implemented");
3105
0
        }
3106
3107
0
        size_t number_of_adaptive_template_pixels = arithmetic_coding_template == 0 ? 4 : 1;
3108
0
        if (data.size() < 2 * number_of_adaptive_template_pixels)
3109
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: No adaptive template data");
3110
0
        for (size_t i = 0; i < number_of_adaptive_template_pixels; ++i) {
3111
0
            adaptive_template_pixels[i].x = static_cast<i8>(data[2 * i]);
3112
0
            adaptive_template_pixels[i].y = static_cast<i8>(data[2 * i + 1]);
3113
0
            dbgln_if(JBIG2_DEBUG, "GBAT{}: {}, {}", i, adaptive_template_pixels[i].x, adaptive_template_pixels[i].y);
3114
0
        }
3115
0
        data = data.slice(2 * number_of_adaptive_template_pixels);
3116
0
    }
3117
3118
    // 7.4.6.4 Decoding a generic region segment
3119
    // "1) Interpret its header, as described in 7.4.6.1"
3120
    // Done above.
3121
    // "2) As described in E.3.7, reset all the arithmetic coding statistics to zero."
3122
0
    Optional<JBIG2::GenericContexts> contexts;
3123
0
    if (!uses_mmr)
3124
0
        contexts = JBIG2::GenericContexts { arithmetic_coding_template };
3125
3126
    // "3) Invoke the generic region decoding procedure described in 6.2, with the parameters to the generic region decoding procedure set as shown in Table 37."
3127
0
    GenericRegionDecodingInputParameters inputs;
3128
0
    inputs.is_modified_modified_read = uses_mmr;
3129
0
    inputs.region_width = information_field.width;
3130
0
    inputs.region_height = information_field.height;
3131
0
    inputs.gb_template = arithmetic_coding_template;
3132
0
    inputs.is_typical_prediction_used = typical_prediction_generic_decoding_on;
3133
0
    inputs.is_extended_reference_template_used = uses_extended_reference_template;
3134
0
    inputs.skip_pattern = OptionalNone {};
3135
0
    inputs.adaptive_template_pixels = adaptive_template_pixels;
3136
3137
0
    Optional<FixedMemoryStream> stream;
3138
0
    Optional<MQArithmeticDecoder> decoder;
3139
0
    if (uses_mmr) {
3140
0
        stream = FixedMemoryStream { data };
3141
0
        inputs.stream = &stream.value();
3142
0
    } else {
3143
0
        decoder = TRY(MQArithmeticDecoder::initialize(data));
3144
0
        inputs.arithmetic_decoder = &decoder.value();
3145
0
    }
3146
3147
0
    auto result = TRY(generic_region_decoding_procedure(inputs, contexts));
3148
3149
    // 8.2 Page image composition step 5)
3150
0
    if (information_field.x_location + information_field.width > (u32)context.page.size.width()
3151
0
        || information_field.y_location + information_field.height > (u32)context.page.size.height()) {
3152
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Region bounds outsize of page bounds");
3153
0
    }
3154
3155
0
    return RegionResult { .information_field = information_field, .bitmap = move(result) };
3156
0
}
3157
3158
static ErrorOr<void> decode_intermediate_generic_region(JBIG2LoadingContext& context, SegmentData& segment)
3159
0
{
3160
0
    auto result = TRY(decode_generic_region(context, segment));
3161
0
    return handle_intermediate_direct_region(context, segment, result);
3162
0
}
3163
3164
static ErrorOr<void> decode_immediate_generic_region(JBIG2LoadingContext& context, SegmentData const& segment)
3165
0
{
3166
0
    auto result = TRY(decode_generic_region(context, segment));
3167
0
    handle_immediate_direct_region(context, result);
3168
0
    return {};
3169
0
}
3170
3171
static ErrorOr<void> decode_immediate_lossless_generic_region(JBIG2LoadingContext& context, SegmentData const& segment)
3172
0
{
3173
    // 7.4.6 Generic region segment syntax
3174
    // "The data parts of all three of the generic region segment types ("intermediate generic region", "immediate generic region" and
3175
    //  "immediate lossless generic region") are coded identically, but are acted upon differently, see 8.2."
3176
    // But 8.2 only describes a difference between intermediate and immediate regions as far as I can tell,
3177
    // and calling the immediate generic region handler for immediate lossless generic regions seems to do the right thing (?).
3178
0
    return decode_immediate_generic_region(context, segment);
3179
0
}
3180
3181
static ErrorOr<RegionResult> decode_generic_refinement_region(JBIG2LoadingContext& context, SegmentData const& segment)
3182
0
{
3183
    // 7.4.7 Generic refinement region syntax
3184
0
    auto data = segment.data;
3185
0
    auto information_field = TRY(decode_region_segment_information_field(data));
3186
0
    data = data.slice(sizeof(information_field));
3187
3188
0
    dbgln_if(JBIG2_DEBUG, "Generic refinement region: width={}, height={}, x={}, y={}, flags={:#x}", information_field.width, information_field.height, information_field.x_location, information_field.y_location, information_field.flags);
3189
0
    TRY(validate_segment_combination_operator_consistency(context, information_field));
3190
3191
0
    FixedMemoryStream stream(data);
3192
3193
    // 7.4.7.2 Generic refinement region segment flags
3194
0
    u8 flags = TRY(stream.read_value<u8>());
3195
0
    u8 arithmetic_coding_template = flags & 1;                        // "GRTEMPLATE"
3196
0
    bool typical_prediction_generic_refinement_on = (flags >> 1) & 1; // "TPGRON"; "TPGR" is short for "Typical Prediction for Generic Refinement coding"
3197
0
    if (flags & 0b1111'1100)
3198
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid refinement flags");
3199
3200
0
    dbgln_if(JBIG2_DEBUG, "GRTEMPLATE={} TPGRON={}", arithmetic_coding_template, typical_prediction_generic_refinement_on);
3201
3202
    // 7.4.7.3 Generic refinement region segment AT flags
3203
0
    Array<JBIG2::AdaptiveTemplatePixel, 2> adaptive_template_pixels {};
3204
0
    if (arithmetic_coding_template == 0) {
3205
0
        for (size_t i = 0; i < 2; ++i) {
3206
0
            adaptive_template_pixels[i].x = TRY(stream.read_value<i8>());
3207
0
            adaptive_template_pixels[i].y = TRY(stream.read_value<i8>());
3208
0
            dbgln_if(JBIG2_DEBUG, "GRAT{}: {}, {}", i, adaptive_template_pixels[i].x, adaptive_template_pixels[i].y);
3209
0
        }
3210
0
    }
3211
3212
    // 7.4.7.5 Decoding a generic refinement region segment
3213
    // "1) Interpret its header as described in 7.4.7.1."
3214
    // Done above.
3215
3216
0
    VERIFY(segment.referred_to_segments.size() <= 1);
3217
3218
    // "If this segment does not refer to another region segment then its external combination operator must be REPLACE."
3219
0
    if (segment.referred_to_segments.is_empty()) {
3220
0
        if (information_field.external_combination_operator() != JBIG2::CombinationOperator::Replace)
3221
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Generic refinement region without reference segment must use REPLACE operator");
3222
0
    }
3223
    // "If it does refer to another region segment, then this segment's region bitmap size, location, and external combination operator
3224
    //  must be equal to that other segment's region bitmap size, location, and external combination operator."
3225
0
    else {
3226
0
        auto const& other_information_field = segment.referred_to_segments[0]->aux_buffer_information_field;
3227
0
        if (information_field.width != other_information_field.width
3228
0
            || information_field.height != other_information_field.height
3229
0
            || information_field.x_location != other_information_field.x_location
3230
0
            || information_field.y_location != other_information_field.y_location
3231
0
            || information_field.external_combination_operator() != other_information_field.external_combination_operator()) {
3232
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Generic refinement region with reference segment must match size, location and combination operator of referenced segment");
3233
0
        }
3234
0
    }
3235
3236
    // "2) As described in E.3.7, reset all the arithmetic coding statistics to zero."
3237
0
    JBIG2::RefinementContexts contexts { arithmetic_coding_template };
3238
3239
    // "3) Determine the buffer associated with the region segment that this segment refers to."
3240
    // Details described in 7.4.7.4 Reference bitmap selection.
3241
0
    BilevelSubImage reference_bitmap = [&]() {
3242
0
        if (segment.referred_to_segments.size() == 1) {
3243
0
            auto reference_bitmap = segment.referred_to_segments[0]->aux_buffer;
3244
0
            VERIFY(reference_bitmap->width() == segment.referred_to_segments[0]->aux_buffer_information_field.width);
3245
0
            VERIFY(reference_bitmap->height() == segment.referred_to_segments[0]->aux_buffer_information_field.height);
3246
0
            return reference_bitmap->as_subbitmap();
3247
0
        }
3248
3249
        // Enforced by validate_segment_header_references() earlier.
3250
0
        VERIFY(segment.type() != JBIG2::SegmentType::IntermediateGenericRefinementRegion);
3251
3252
0
        return context.page.bits->subbitmap(information_field.rect());
3253
0
    }();
3254
3255
    // "4) Invoke the generic refinement region decoding procedure described in 6.3, with the parameters to the
3256
    //     generic refinement region decoding procedure set as shown in Table 38."
3257
0
    data = data.slice(TRY(stream.tell()));
3258
0
    GenericRefinementRegionDecodingInputParameters inputs;
3259
0
    inputs.region_width = information_field.width;
3260
0
    inputs.region_height = information_field.height;
3261
0
    inputs.gr_template = arithmetic_coding_template;
3262
0
    inputs.reference_bitmap = &reference_bitmap;
3263
0
    inputs.reference_x_offset = 0;
3264
0
    inputs.reference_y_offset = 0;
3265
0
    inputs.is_typical_prediction_used = typical_prediction_generic_refinement_on;
3266
0
    inputs.adaptive_template_pixels = adaptive_template_pixels;
3267
3268
0
    auto decoder = TRY(MQArithmeticDecoder::initialize(data));
3269
0
    auto result = TRY(generic_refinement_region_decoding_procedure(inputs, decoder, contexts));
3270
0
    return RegionResult { .information_field = information_field, .bitmap = move(result) };
3271
0
}
3272
3273
static ErrorOr<void> decode_intermediate_generic_refinement_region(JBIG2LoadingContext& context, SegmentData& segment)
3274
0
{
3275
0
    auto result = TRY(decode_generic_refinement_region(context, segment));
3276
3277
    // 8.2 Page image composition, 5e.
3278
0
    VERIFY(result.bitmap->width() == result.information_field.width);
3279
0
    VERIFY(result.bitmap->height() == result.information_field.height);
3280
0
    segment.aux_buffer = move(result.bitmap);
3281
0
    segment.aux_buffer_information_field = result.information_field;
3282
0
    return {};
3283
0
}
3284
3285
static ErrorOr<void> decode_immediate_generic_refinement_region(JBIG2LoadingContext& context, SegmentData const& segment)
3286
0
{
3287
0
    auto result = TRY(decode_generic_refinement_region(context, segment));
3288
3289
    // 8.2 Page image composition, 5d.
3290
0
    result.bitmap->composite_onto(
3291
0
        *context.page.bits,
3292
0
        { result.information_field.x_location, result.information_field.y_location },
3293
0
        to_composition_type(result.information_field.external_combination_operator()));
3294
3295
0
    return {};
3296
0
}
3297
3298
static ErrorOr<void> decode_immediate_lossless_generic_refinement_region(JBIG2LoadingContext& context, SegmentData const& segment)
3299
0
{
3300
    // 7.4.7 Generic refinement region syntax
3301
    // "The data parts of all three of the generic refinement region segment types ("intermediate generic refinement region",
3302
    //  "immediate generic refinement region" and "immediate lossless generic refinement region") are coded identically, but
3303
    //  are acted upon differently, see 8.2."
3304
    // But 8.2 only describes a difference between intermediate and immediate regions as far as I can tell,
3305
    // and calling the immediate generic refinement region handler for immediate lossless generic refinement regions seems to do the right thing (?).
3306
0
    return decode_immediate_generic_refinement_region(context, segment);
3307
0
}
3308
3309
static ErrorOr<void> decode_page_information(JBIG2LoadingContext& context, SegmentData const& segment)
3310
16
{
3311
    // 7.4.8 Page information segment syntax and 8.1 Decoder model steps 1) - 3).
3312
3313
    // "1) Decode the page information segment."
3314
16
    auto page_information = TRY(decode_page_information_segment(segment.data));
3315
3316
16
    u8 default_color = page_information.default_color();
3317
16
    context.page.default_combination_operator = page_information.default_combination_operator();
3318
16
    context.page.direct_region_segments_override_default_combination_operator = page_information.direct_region_segments_override_default_combination_operator();
3319
3320
16
    if (page_information.bitmap_height == 0xffff'ffff && !page_information.page_is_striped())
3321
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Non-striped bitmaps of indeterminate height not allowed");
3322
3323
16
    dbgln_if(JBIG2_DEBUG, "Page information: width={}, height={}, x_resolution={}, y_resolution={}, is_striped={}, max_stripe_size={}", page_information.bitmap_width, page_information.bitmap_height, page_information.page_x_resolution, page_information.page_y_resolution, page_information.page_is_striped(), page_information.maximum_stripe_size());
3324
16
    dbgln_if(JBIG2_DEBUG, "Page information flags: {:#02x}", page_information.flags);
3325
16
    dbgln_if(JBIG2_DEBUG, "    is_eventually_lossless={}", page_information.is_eventually_lossless());
3326
16
    dbgln_if(JBIG2_DEBUG, "    might_contain_refinements={}", page_information.might_contain_refinements());
3327
16
    dbgln_if(JBIG2_DEBUG, "    default_color={}", default_color);
3328
16
    dbgln_if(JBIG2_DEBUG, "    default_combination_operator={}", (int)context.page.default_combination_operator);
3329
16
    dbgln_if(JBIG2_DEBUG, "    requires_auxiliary_buffers={}", page_information.requires_auxiliary_buffers());
3330
16
    dbgln_if(JBIG2_DEBUG, "    direct_region_segments_override_default_combination_operator={}", context.page.direct_region_segments_override_default_combination_operator);
3331
16
    dbgln_if(JBIG2_DEBUG, "    might_contain_coloured_segment={}", page_information.might_contain_coloured_segments());
3332
3333
    // "2) Create the page buffer, of the size given in the page information segment.
3334
    //
3335
    //     If the page height is unknown, then this is not possible. However, in this case the page must be striped,
3336
    //     and the maximum stripe height specified, and the initial page buffer can be created with height initially
3337
    //     equal to this maximum stripe height."
3338
    // ...but we don't care about streaming input (yet?), so scan_for_page_size() already looked at all segment headers
3339
    // and filled in context.page.size from page information and end of stripe segments.
3340
16
    context.page.bits = TRY(BilevelImage::create(context.page.size.width(), context.page.size.height()));
3341
3342
    // "3) Fill the page buffer with the page's default pixel value."
3343
16
    context.page.bits->fill(default_color != 0);
3344
3345
16
    return {};
3346
16
}
3347
3348
static ErrorOr<void> decode_end_of_page(JBIG2LoadingContext&, SegmentData const& segment)
3349
6
{
3350
    // 7.4.9 End of page segment syntax
3351
6
    if (segment.data.size() != 0)
3352
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of page segment has non-zero size");
3353
3354
6
    dbgln_if(JBIG2_DEBUG, "End of page");
3355
3356
    // Actual processing of this segment is in scan_for_page_size().
3357
6
    return {};
3358
6
}
3359
3360
static ErrorOr<void> decode_end_of_stripe(JBIG2LoadingContext&, SegmentData const& segment)
3361
0
{
3362
    // 7.4.10 End of stripe segment syntax
3363
0
    auto end_of_stripe = TRY(decode_end_of_stripe_segment(segment.data));
3364
3365
    // The data in these segments is used in scan_for_page_size().
3366
0
    dbgln_if(JBIG2_DEBUG, "End of stripe: y={}", end_of_stripe.y_coordinate);
3367
3368
0
    return {};
3369
0
}
3370
3371
static ErrorOr<void> decode_end_of_file(JBIG2LoadingContext&, SegmentData const& segment)
3372
0
{
3373
    // 7.4.11 End of file segment syntax
3374
0
    if (segment.data.size() != 0)
3375
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of file segment has non-zero size");
3376
3377
0
    dbgln_if(JBIG2_DEBUG, "End of file");
3378
3379
0
    return {};
3380
0
}
3381
3382
static ErrorOr<void> decode_profiles(JBIG2LoadingContext&, SegmentData const&)
3383
0
{
3384
0
    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode profiles yet");
3385
0
}
3386
3387
static ErrorOr<void> decode_tables(JBIG2LoadingContext&, SegmentData& segment)
3388
2
{
3389
    // 7.4.13 Code table segment syntax
3390
    // B.2 Code table structure
3391
2
    FixedMemoryStream stream { segment.data };
3392
3393
    // "1) Decode the code table flags field as described in B.2.1. This sets the values HTOOB, HTPS and HTRS."
3394
2
    u8 flags = TRY(stream.read_value<u8>());
3395
2
    if (flags & 0x80)
3396
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid code table flags");
3397
2
    bool has_out_of_band = flags & 1;             // "HTOOB" in spec.
3398
2
    u8 prefix_bit_count = ((flags >> 1) & 7) + 1; // "HTPS" (hash table prefix size) in spec.
3399
2
    u8 range_bit_count = ((flags >> 4) & 7) + 1;  // "HTRS" (hash table range size) in spec.
3400
2
    dbgln_if(JBIG2_DEBUG, "Tables: has_out_of_band={}, prefix_bit_count={}, range_bit_count={}", has_out_of_band, prefix_bit_count, range_bit_count);
3401
3402
    // "2) Decode the code table lowest value field as described in B.2.2. Let HTLOW be the value decoded."
3403
2
    i32 lowest_value = TRY(stream.read_value<BigEndian<i32>>()); // "HTLOW" in spec.
3404
2
    dbgln_if(JBIG2_DEBUG, "Tables: lower bound={}", lowest_value);
3405
3406
    // "3) Decode the code table highest value field as described in B.2.3. Let HTHIGH be the value decoded."
3407
2
    i32 highest_value = TRY(stream.read_value<BigEndian<i32>>()); // "HTHIGH" in spec.
3408
2
    dbgln_if(JBIG2_DEBUG, "Tables: One more than upper bound={}", highest_value);
3409
3410
    // "4) Set:
3411
    //         CURRANGELOW = HTLOW
3412
    //         NTEMP = 0"
3413
2
    i32 value = lowest_value; // "CURRANGELOW" in spec.
3414
2
    auto bit_stream = BigEndianInputBitStream { MaybeOwned { stream } };
3415
3416
    // "5) Decode each table line as follows:"
3417
2
    Vector<u8> prefix_lengths;
3418
2
    Vector<u8> range_lengths;
3419
2
    Vector<Optional<i32>> range_lows;
3420
223
    do {
3421
        // "a) Read HTPS bits. Set PREFLEN[NTEMP] to the value decoded."
3422
223
        u8 prefix_length = TRY(bit_stream.read_bits<u8>(prefix_bit_count));
3423
223
        TRY(prefix_lengths.try_append(prefix_length));
3424
3425
        // "b) Read HTRS bits. Let RANGELEN[NTEMP] be the value decoded."
3426
223
        u8 range_length = TRY(bit_stream.read_bits<u8>(range_bit_count));
3427
223
        TRY(range_lengths.try_append(range_length));
3428
3429
223
        dbgln_if(JBIG2_DEBUG, "Tables[{}]: prefix_length={}, range_length={}, range_low={}", prefix_lengths.size() - 1, prefix_length, range_length, value);
3430
3431
        // "c) Set:
3432
        //         RANGELOW[NTEMP] = CURRANGELOW
3433
        //         CURRANGELOW = CURRANGELOW + 2 ** RANGELEN[NTEMP]
3434
        //         NTEMP = NTEMP + 1"
3435
223
        TRY(range_lows.try_append(value));
3436
223
        value += 1 << range_length;
3437
3438
        // "d) If CURRANGELOW ≥ HTHIGH then proceed to step 6)."
3439
223
    } while (value < highest_value);
3440
3441
    // "6) Read HTPS bits. Let LOWPREFLEN be the value read."
3442
2
    u8 prefix_length = TRY(bit_stream.read_bits<u8>(prefix_bit_count)); // "LOWPREFLEN" in spec.
3443
3444
2
    dbgln_if(JBIG2_DEBUG, "lower: prefix_length={}", prefix_length);
3445
3446
    // "7) [...] This is the lower range table line for this table."
3447
2
    TRY(prefix_lengths.try_append(prefix_length));
3448
2
    TRY(range_lengths.try_append(32));
3449
2
    TRY(range_lows.try_append(lowest_value - 1));
3450
3451
    // "8) Read HTPS bits. Let HIGHPREFLEN be the value read."
3452
2
    prefix_length = TRY(bit_stream.read_bits<u8>(prefix_bit_count)); // "HIGHPREFLEN" in spec.
3453
3454
2
    dbgln_if(JBIG2_DEBUG, "upper: prefix_length={}", prefix_length);
3455
3456
    // "9) [...] This is the upper range table line for this table."
3457
2
    TRY(prefix_lengths.try_append(prefix_length));
3458
2
    TRY(range_lengths.try_append(32));
3459
2
    TRY(range_lows.try_append(highest_value));
3460
3461
    // "10) If HTOOB is 1, then:"
3462
2
    if (has_out_of_band) {
3463
        // "a) Read HTPS bits. Let OOBPREFLEN be the value read."
3464
1
        prefix_length = TRY(bit_stream.read_bits<u8>(prefix_bit_count)); // "OOBPREFLEN" in spec.
3465
3466
1
        dbgln_if(JBIG2_DEBUG, "oob: prefix_length={}", prefix_length);
3467
3468
        // "b) [...] This is the out-of-band table line for this table. Note that there is no range associated with this value."
3469
1
        TRY(prefix_lengths.try_append(prefix_length));
3470
1
        TRY(range_lengths.try_append(0));
3471
1
        TRY(range_lows.try_append(OptionalNone {}));
3472
1
    }
3473
3474
    // "11) Create the prefix codes using the algorithm described in B.3."
3475
2
    auto codes = TRY(JBIG2::assign_huffman_codes(prefix_lengths));
3476
3477
2
    Vector<JBIG2::Code> table_codes;
3478
228
    for (auto const& [i, length] : enumerate(prefix_lengths)) {
3479
228
        if (length == 0)
3480
157
            continue;
3481
3482
71
        JBIG2::Code code { .prefix_length = length, .range_length = range_lengths[i], .first_value = range_lows[i], .code = codes[i] };
3483
71
        if (i == prefix_lengths.size() - (has_out_of_band ? 3 : 2))
3484
1
            code.prefix_length |= JBIG2::Code::LowerRangeBit;
3485
71
        table_codes.append(code);
3486
71
    }
3487
3488
2
    segment.codes = move(table_codes);
3489
2
    segment.huffman_table = JBIG2::HuffmanTable { segment.codes->span(), has_out_of_band };
3490
3491
2
    return {};
3492
2
}
3493
3494
static ErrorOr<void> decode_color_palette(JBIG2LoadingContext&, SegmentData const&)
3495
0
{
3496
0
    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Cannot decode color palette yet");
3497
0
}
3498
3499
static ErrorOr<void> decode_extension(JBIG2LoadingContext& context, SegmentData const& segment)
3500
0
{
3501
    // 7.4.14 Extension segment syntax
3502
0
    FixedMemoryStream stream { segment.data };
3503
3504
0
    u32 type = TRY(stream.read_value<BigEndian<u32>>());
3505
3506
0
    dbgln_if(JBIG2_DEBUG, "Extension, type {:#x}", type);
3507
3508
0
    auto read_string = [&]<class T>() -> ErrorOr<Vector<T>> {
3509
0
        Vector<T> result;
3510
0
        do {
3511
0
            result.append(TRY(stream.read_value<BigEndian<T>>()));
3512
0
        } while (result.last());
3513
0
        result.take_last();
3514
0
        return result;
3515
0
    };
Unexecuted instantiation: JBIG2Loader.cpp:AK::ErrorOr<AK::Vector<unsigned char, 0ul>, AK::Error> Gfx::decode_extension(Gfx::JBIG2LoadingContext&, Gfx::SegmentData const&)::$_0::operator()<unsigned char>() const
Unexecuted instantiation: JBIG2Loader.cpp:AK::ErrorOr<AK::Vector<unsigned short, 0ul>, AK::Error> Gfx::decode_extension(Gfx::JBIG2LoadingContext&, Gfx::SegmentData const&)::$_0::operator()<unsigned short>() const
3516
3517
0
    switch (type) {
3518
0
    case to_underlying(JBIG2::ExtensionType::SingleByteCodedComment): {
3519
        // 7.4.15.1 Single-byte coded comment
3520
        // Pairs of zero-terminated ISO/IEC 8859-1 (latin1) pairs, terminated by another \0.
3521
0
        while (true) {
3522
0
            auto first_bytes = TRY(read_string.template operator()<u8>());
3523
0
            if (first_bytes.is_empty())
3524
0
                break;
3525
3526
0
            auto second_bytes = TRY(read_string.template operator()<u8>());
3527
3528
0
            auto first = TRY(TextCodec::decoder_for_exact_name("ISO-8859-1"sv)->to_utf8(StringView { first_bytes }));
3529
0
            auto second = TRY(TextCodec::decoder_for_exact_name("ISO-8859-1"sv)->to_utf8(StringView { second_bytes }));
3530
0
            if (context.options.log_comments == JBIG2DecoderOptions::LogComments::Yes)
3531
0
                dbgln("JBIG2ImageDecoderPlugin: key '{}', value '{}'", first, second);
3532
0
        }
3533
0
        if (!stream.is_eof())
3534
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Trailing data after SingleByteCodedComment");
3535
0
        return {};
3536
0
    }
3537
0
    case to_underlying(JBIG2::ExtensionType::MultiByteCodedComment): {
3538
        // 7.4.15.2 Multi-byte coded comment
3539
        // Pairs of (two-byte-)zero-terminated UCS-2 pairs, terminated by another \0\0.
3540
0
        while (true) {
3541
0
            auto first_ucs2 = TRY(read_string.template operator()<u16>());
3542
0
            if (first_ucs2.is_empty())
3543
0
                break;
3544
3545
0
            auto second_ucs2 = TRY(read_string.template operator()<u16>());
3546
3547
0
            auto first = TRY(Utf16View(first_ucs2).to_utf8());
3548
0
            auto second = TRY(Utf16View(second_ucs2).to_utf8());
3549
0
            if (context.options.log_comments == JBIG2DecoderOptions::LogComments::Yes)
3550
0
                dbgln("JBIG2ImageDecoderPlugin: key '{}', value '{}'", first, second);
3551
0
        }
3552
0
        if (!stream.is_eof())
3553
0
            return Error::from_string_literal("JBIG2ImageDecoderPlugin: Trailing data after MultiByteCodedComment");
3554
0
        return {};
3555
0
    }
3556
0
    }
3557
3558
    // FIXME: If bit 31 in `type` is not set, the extension isn't necessary, and we could ignore it.
3559
0
    dbgln("JBIG2ImageDecoderPlugin: Unknown extension type {:#x}", type);
3560
0
    return Error::from_string_literal("JBIG2ImageDecoderPlugin: Unknown extension type");
3561
0
}
3562
3563
static ErrorOr<void> decode_data(JBIG2LoadingContext& context)
3564
16
{
3565
46
    for (size_t i = 0; i < context.segments.size(); ++i) {
3566
40
        auto& segment = context.segments[i];
3567
3568
40
        if (segment.header.page_association != 0 && segment.header.page_association != context.current_page_number)
3569
2
            continue;
3570
3571
38
        switch (segment.type()) {
3572
4
        case JBIG2::SegmentType::SymbolDictionary:
3573
4
            TRY(decode_symbol_dictionary(context, segment));
3574
0
            break;
3575
0
        case JBIG2::SegmentType::IntermediateTextRegion:
3576
0
            TRY(decode_intermediate_text_region(context, segment));
3577
0
            break;
3578
7
        case JBIG2::SegmentType::ImmediateTextRegion:
3579
7
            TRY(decode_immediate_text_region(context, segment));
3580
3
            break;
3581
0
        case JBIG2::SegmentType::ImmediateLosslessTextRegion:
3582
0
            TRY(decode_immediate_lossless_text_region(context, segment));
3583
0
            break;
3584
3
        case JBIG2::SegmentType::PatternDictionary:
3585
3
            TRY(decode_pattern_dictionary(context, segment));
3586
1
            break;
3587
0
        case JBIG2::SegmentType::IntermediateHalftoneRegion:
3588
0
            TRY(decode_intermediate_halftone_region(context, segment));
3589
0
            break;
3590
0
        case JBIG2::SegmentType::ImmediateHalftoneRegion:
3591
0
            TRY(decode_immediate_halftone_region(context, segment));
3592
0
            break;
3593
0
        case JBIG2::SegmentType::ImmediateLosslessHalftoneRegion:
3594
0
            TRY(decode_immediate_lossless_halftone_region(context, segment));
3595
0
            break;
3596
0
        case JBIG2::SegmentType::IntermediateGenericRegion:
3597
0
            TRY(decode_intermediate_generic_region(context, segment));
3598
0
            break;
3599
0
        case JBIG2::SegmentType::ImmediateGenericRegion:
3600
0
            TRY(decode_immediate_generic_region(context, segment));
3601
0
            break;
3602
0
        case JBIG2::SegmentType::ImmediateLosslessGenericRegion:
3603
0
            TRY(decode_immediate_lossless_generic_region(context, segment));
3604
0
            break;
3605
0
        case JBIG2::SegmentType::IntermediateGenericRefinementRegion:
3606
0
            TRY(decode_intermediate_generic_refinement_region(context, segment));
3607
0
            break;
3608
0
        case JBIG2::SegmentType::ImmediateGenericRefinementRegion:
3609
0
            TRY(decode_immediate_generic_refinement_region(context, segment));
3610
0
            break;
3611
0
        case JBIG2::SegmentType::ImmediateLosslessGenericRefinementRegion:
3612
0
            TRY(decode_immediate_lossless_generic_refinement_region(context, segment));
3613
0
            break;
3614
16
        case JBIG2::SegmentType::PageInformation:
3615
16
            TRY(decode_page_information(context, segment));
3616
16
            break;
3617
6
        case JBIG2::SegmentType::EndOfPage:
3618
6
            TRY(decode_end_of_page(context, segment));
3619
6
            break;
3620
0
        case JBIG2::SegmentType::EndOfStripe:
3621
0
            TRY(decode_end_of_stripe(context, segment));
3622
0
            break;
3623
0
        case JBIG2::SegmentType::EndOfFile:
3624
0
            TRY(decode_end_of_file(context, segment));
3625
            // "If a file contains an end of file segment, it must be the last segment."
3626
0
            if (i != context.segments.size() - 1)
3627
0
                return Error::from_string_literal("JBIG2ImageDecoderPlugin: End of file segment not last segment");
3628
0
            break;
3629
0
        case JBIG2::SegmentType::Profiles:
3630
0
            TRY(decode_profiles(context, segment));
3631
0
            break;
3632
2
        case JBIG2::SegmentType::Tables:
3633
2
            TRY(decode_tables(context, segment));
3634
2
            break;
3635
0
        case JBIG2::SegmentType::ColorPalette:
3636
0
            TRY(decode_color_palette(context, segment));
3637
0
            break;
3638
0
        case JBIG2::SegmentType::Extension:
3639
0
            TRY(decode_extension(context, segment));
3640
0
            break;
3641
38
        }
3642
3643
28
        dbgln_if(JBIG2_DEBUG, "");
3644
28
    }
3645
3646
6
    return {};
3647
16
}
3648
3649
JBIG2ImageDecoderPlugin::JBIG2ImageDecoderPlugin(JBIG2DecoderOptions options)
3650
417
{
3651
417
    m_context = make<JBIG2LoadingContext>();
3652
417
    m_context->options = options;
3653
417
}
3654
3655
417
JBIG2ImageDecoderPlugin::~JBIG2ImageDecoderPlugin() = default;
3656
3657
IntSize JBIG2ImageDecoderPlugin::size()
3658
0
{
3659
0
    return m_context->page.size;
3660
0
}
3661
3662
bool JBIG2ImageDecoderPlugin::sniff(ReadonlyBytes data)
3663
417
{
3664
417
    return data.starts_with(JBIG2::id_string);
3665
417
}
3666
3667
ErrorOr<NonnullOwnPtr<ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create(ReadonlyBytes data)
3668
417
{
3669
417
    return create_with_options(data, {});
3670
417
}
3671
3672
ErrorOr<NonnullOwnPtr<ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create_with_options(ReadonlyBytes data, JBIG2DecoderOptions options)
3673
417
{
3674
417
    auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin(options)));
3675
417
    TRY(decode_jbig2_header(*plugin->m_context, data));
3676
3677
412
    data = data.slice(sizeof(JBIG2::id_string) + sizeof(u8) + (plugin->m_context->number_of_pages.has_value() ? sizeof(u32) : 0));
3678
412
    TRY(decode_segment_headers(*plugin->m_context, data));
3679
121
    TRY(complete_decoding_all_segment_headers(*plugin->m_context));
3680
3681
31
    TRY(scan_for_page_size(*plugin->m_context));
3682
17
    TRY(scan_for_page_numbers(*plugin->m_context));
3683
3684
16
    return plugin;
3685
17
}
3686
3687
size_t JBIG2ImageDecoderPlugin::frame_count()
3688
16
{
3689
16
    return m_context->page_numbers.size();
3690
16
}
3691
3692
ErrorOr<ImageFrameDescriptor> JBIG2ImageDecoderPlugin::frame(size_t index, Optional<IntSize>)
3693
16
{
3694
16
    if (index >= frame_count())
3695
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Invalid frame index");
3696
3697
16
    if (m_context->current_page_number != m_context->page_numbers[index]) {
3698
0
        m_context->current_page_number = m_context->page_numbers[index];
3699
0
        m_context->state = JBIG2LoadingContext::State::NotDecoded;
3700
0
        TRY(scan_for_page_size(*m_context));
3701
0
    }
3702
3703
16
    if (m_context->state == JBIG2LoadingContext::State::Error)
3704
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Decoding failed");
3705
3706
16
    if (m_context->state < JBIG2LoadingContext::State::Decoded) {
3707
16
        auto result = decode_data(*m_context);
3708
16
        if (result.is_error()) {
3709
10
            m_context->state = JBIG2LoadingContext::State::Error;
3710
10
            return result.release_error();
3711
10
        }
3712
6
        m_context->state = JBIG2LoadingContext::State::Decoded;
3713
6
    }
3714
3715
6
    auto bitmap = TRY(m_context->page.bits->to_gfx_bitmap());
3716
0
    return ImageFrameDescriptor { move(bitmap), 0 };
3717
6
}
3718
3719
ErrorOr<NonnullOwnPtr<JBIG2ImageDecoderPlugin>> JBIG2ImageDecoderPlugin::create_embedded_jbig2_decoder(Vector<ReadonlyBytes> data)
3720
0
{
3721
0
    auto plugin = TRY(adopt_nonnull_own_or_enomem(new (nothrow) JBIG2ImageDecoderPlugin({})));
3722
0
    plugin->m_context->organization = JBIG2::Organization::Embedded;
3723
3724
0
    for (auto const& segment_data : data)
3725
0
        TRY(decode_segment_headers(*plugin->m_context, segment_data));
3726
0
    TRY(complete_decoding_all_segment_headers(*plugin->m_context));
3727
3728
0
    TRY(scan_for_page_size(*plugin->m_context));
3729
0
    TRY(scan_for_page_numbers(*plugin->m_context));
3730
3731
0
    if (plugin->frame_count() != 1)
3732
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Embedded JBIG2 data must have exactly one page");
3733
3734
0
    TRY(decode_data(*plugin->m_context));
3735
0
    return plugin;
3736
0
}
3737
3738
ErrorOr<NonnullRefPtr<BilevelImage>> JBIG2ImageDecoderPlugin::decode_embedded(Vector<ReadonlyBytes> data)
3739
0
{
3740
0
    auto plugin = TRY(create_embedded_jbig2_decoder(data));
3741
0
    return *plugin->m_context->page.bits;
3742
0
}
3743
3744
ErrorOr<NonnullRefPtr<BilevelImage>> JBIG2ImageDecoderPlugin::decode_embedded_intermediate_region_segment(Vector<ReadonlyBytes> data, u32 segment_number)
3745
0
{
3746
0
    auto plugin = TRY(create_embedded_jbig2_decoder(data));
3747
3748
0
    auto target_segment = plugin->m_context->segments.find_if([&segment_number](auto const& segment) {
3749
0
        return segment.header.segment_number == segment_number;
3750
0
    });
3751
0
    if (target_segment == plugin->m_context->segments.end())
3752
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Segment number not found in embedded JBIG2 data");
3753
3754
0
    if (!is_intermediate_region_segment(target_segment->type()))
3755
0
        return Error::from_string_literal("JBIG2ImageDecoderPlugin: Target segment is not an intermediate region segment");
3756
3757
0
    return *target_segment->aux_buffer;
3758
0
}
3759
3760
}