Coverage Report

Created: 2026-02-02 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jbig2dec/jbig2_arith.c
Line
Count
Source
1
/* Copyright (C) 2001-2023 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/*
17
    jbig2dec
18
*/
19
20
#ifdef HAVE_CONFIG_H
21
#include "config.h"
22
#endif
23
#include "os_types.h"
24
25
#include <stdio.h>
26
#include <stdlib.h>
27
28
#include "jbig2.h"
29
#include "jbig2_priv.h"
30
#include "jbig2_arith.h"
31
32
struct _Jbig2ArithState {
33
    uint32_t C;
34
    uint32_t A;
35
36
    int CT;
37
38
    uint32_t next_word;
39
    size_t next_word_bytes;
40
    int err;
41
42
    Jbig2WordStream *ws;
43
    size_t offset;
44
};
45
46
/*
47
  Previous versions of this code had a #define to allow
48
  us to choose between using the revised arithmetic decoding
49
  specified in the 'Software Convention' section of the spec.
50
  Back to back tests showed that the 'Software Convention'
51
  version was indeed slightly faster. We therefore enable it
52
  by default. We also strip the option out, because a) it
53
  makes the code harder to read, and b) such things are an
54
  invitation to bitrot.
55
*/
56
57
static int
58
jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as)
59
23.1M
{
60
23.1M
    byte B;
61
62
    /* Treat both errors and reading beyond end of stream as an error. */
63
23.1M
    if (as->err != 0) {
64
0
        jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding");
65
0
        return -1;
66
0
    }
67
23.1M
    if (as->next_word_bytes == 0) {
68
0
        jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding");
69
0
        return -1;
70
0
    }
71
72
    /* At this point there is at least one byte in as->next_word. */
73
74
    /* This code confused me no end when I first read it, so a quick note
75
     * to save others (and future me's) from being similarly confused.
76
     * 'next_word' does indeed contain 'next_word_bytes' of valid data
77
     * (always starting at the most significant byte). The confusing
78
     * thing is that the first byte has always already been read.
79
     * i.e. it serves only as an indication that the last byte we read
80
     * was FF or not.
81
     *
82
     * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as
83
     * marker bytes. These never occur in normal streams of arithmetic
84
     * encoding, so meeting one terminates the stream (with an infinite
85
     * series of 1 bits).
86
     *
87
     * If we meet an FF byte, we return it as normal. We just 'remember'
88
     * that fact for the next byte we read.
89
     */
90
91
    /* Figure F.3 */
92
23.1M
    B = (byte)((as->next_word >> 24) & 0xFF);
93
23.1M
    if (B == 0xFF) {
94
21.8M
        byte B1;
95
96
        /* next_word_bytes can only be == 1 here, but let's be defensive. */
97
21.8M
        if (as->next_word_bytes <= 1) {
98
4.76k
            int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word);
99
4.76k
            if (ret < 0) {
100
0
                as->err = 1;
101
0
                return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding");
102
0
            }
103
4.76k
            as->next_word_bytes = (size_t) ret;
104
105
4.76k
            if (as->next_word_bytes == 0) {
106
172
                jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code");
107
172
                as->next_word = 0xFF900000;
108
172
                as->next_word_bytes = 2;
109
172
                as->C += 0xFF00;
110
172
                as->CT = 8;
111
172
                return 0;
112
172
            }
113
114
4.59k
            as->offset += as->next_word_bytes;
115
116
4.59k
            B1 = (byte)((as->next_word >> 24) & 0xFF);
117
4.59k
            if (B1 > 0x8F) {
118
#ifdef JBIG2_DEBUG_ARITH
119
                fprintf(stderr, "read %02x (aa)\n", B);
120
#endif
121
3.13k
                as->CT = 8;
122
3.13k
                as->next_word = 0xFF000000 | (as->next_word >> 8);
123
3.13k
                as->next_word_bytes = 2;
124
3.13k
                as->offset--;
125
3.13k
            } else {
126
#ifdef JBIG2_DEBUG_ARITH
127
                fprintf(stderr, "read %02x (a)\n", B);
128
#endif
129
1.45k
                as->C += 0xFE00 - (B1 << 9);
130
1.45k
                as->CT = 7;
131
1.45k
            }
132
21.8M
        } else {
133
21.8M
            B1 = (byte)((as->next_word >> 16) & 0xFF);
134
21.8M
            if (B1 > 0x8F) {
135
#ifdef JBIG2_DEBUG_ARITH
136
                fprintf(stderr, "read %02x (ba)\n", B);
137
#endif
138
21.8M
                as->CT = 8;
139
21.8M
            } else {
140
6.15k
                as->next_word_bytes--;
141
6.15k
                as->next_word <<= 8;
142
#ifdef JBIG2_DEBUG_ARITH
143
                fprintf(stderr, "read %02x (b)\n", B);
144
#endif
145
146
6.15k
                as->C += 0xFE00 - (B1 << 9);
147
6.15k
                as->CT = 7;
148
6.15k
            }
149
21.8M
        }
150
21.8M
    } else {
151
#ifdef JBIG2_DEBUG_ARITH
152
        fprintf(stderr, "read %02x\n", B);
153
#endif
154
1.21M
        as->next_word <<= 8;
155
1.21M
        as->next_word_bytes--;
156
157
1.21M
        if (as->next_word_bytes == 0) {
158
300k
            int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word);
159
300k
            if (ret < 0) {
160
0
                as->err = 1;
161
0
                return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding");
162
0
            }
163
300k
            as->next_word_bytes = (size_t) ret;
164
165
300k
            if (as->next_word_bytes == 0) {
166
2.90k
                jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code");
167
2.90k
                as->next_word = 0xFF900000;
168
2.90k
                as->next_word_bytes = 2;
169
2.90k
                as->C += 0xFF00;
170
2.90k
                as->CT = 8;
171
2.90k
                return 0;
172
2.90k
            }
173
174
297k
            as->offset += as->next_word_bytes;
175
297k
        }
176
177
1.21M
        B = (byte)((as->next_word >> 24) & 0xFF);
178
1.21M
        as->C += 0xFF00 - (B << 8);
179
1.21M
        as->CT = 8;
180
1.21M
    }
181
182
23.0M
    return 0;
183
23.1M
}
184
185
/** Allocate and initialize a new arithmetic coding state
186
 *  the returned pointer can simply be freed; this does
187
 *  not affect the associated Jbig2WordStream.
188
 */
189
Jbig2ArithState *
190
jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws)
191
16.0k
{
192
16.0k
    Jbig2ArithState *result;
193
16.0k
    int ret;
194
195
16.0k
    result = jbig2_new(ctx, Jbig2ArithState, 1);
196
16.0k
    if (result == NULL) {
197
1
        jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state");
198
1
        return NULL;
199
1
    }
200
201
16.0k
    result->err = 0;
202
16.0k
    result->ws = ws;
203
16.0k
    result->offset = 0;
204
205
16.0k
    ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word);
206
16.0k
    if (ret < 0) {
207
0
        jbig2_free(ctx->allocator, result);
208
0
        jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder");
209
0
        return NULL;
210
0
    }
211
212
16.0k
    result->next_word_bytes = (size_t) ret;
213
16.0k
    if (result->next_word_bytes == 0) {
214
13
        jbig2_free(ctx->allocator, result);
215
13
        jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder");
216
13
        return NULL;
217
13
    }
218
219
16.0k
    result->offset += result->next_word_bytes;
220
221
    /* Figure F.1 */
222
16.0k
    result->C = (~(result->next_word >> 8)) & 0xFF0000;
223
224
    /* Figure E.20 (2) */
225
16.0k
    if (jbig2_arith_bytein(ctx, result) < 0) {
226
0
        jbig2_free(ctx->allocator, result);
227
0
        jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder");
228
0
        return NULL;
229
0
    }
230
231
    /* Figure E.20 (3) */
232
16.0k
    result->C <<= 7;
233
16.0k
    result->CT -= 7;
234
16.0k
    result->A = 0x8000;
235
236
16.0k
    return result;
237
16.0k
}
238
239
5.90G
#define MAX_QE_ARRAY_SIZE 47
240
241
/* could put bit fields in to minimize memory usage */
242
typedef struct {
243
    uint16_t Qe;
244
    byte mps_xor;               /* mps_xor = index ^ NMPS */
245
    byte lps_xor;               /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */
246
} Jbig2ArithQe;
247
248
#define MPS(index, nmps) ((index) ^ (nmps))
249
#define LPS(index, nlps, swtch) ((index) ^ (nlps) ^ ((swtch) << 7))
250
251
static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = {
252
    {0x5601, MPS(0, 1), LPS(0, 1, 1)},
253
    {0x3401, MPS(1, 2), LPS(1, 6, 0)},
254
    {0x1801, MPS(2, 3), LPS(2, 9, 0)},
255
    {0x0AC1, MPS(3, 4), LPS(3, 12, 0)},
256
    {0x0521, MPS(4, 5), LPS(4, 29, 0)},
257
    {0x0221, MPS(5, 38), LPS(5, 33, 0)},
258
    {0x5601, MPS(6, 7), LPS(6, 6, 1)},
259
    {0x5401, MPS(7, 8), LPS(7, 14, 0)},
260
    {0x4801, MPS(8, 9), LPS(8, 14, 0)},
261
    {0x3801, MPS(9, 10), LPS(9, 14, 0)},
262
    {0x3001, MPS(10, 11), LPS(10, 17, 0)},
263
    {0x2401, MPS(11, 12), LPS(11, 18, 0)},
264
    {0x1C01, MPS(12, 13), LPS(12, 20, 0)},
265
    {0x1601, MPS(13, 29), LPS(13, 21, 0)},
266
    {0x5601, MPS(14, 15), LPS(14, 14, 1)},
267
    {0x5401, MPS(15, 16), LPS(15, 14, 0)},
268
    {0x5101, MPS(16, 17), LPS(16, 15, 0)},
269
    {0x4801, MPS(17, 18), LPS(17, 16, 0)},
270
    {0x3801, MPS(18, 19), LPS(18, 17, 0)},
271
    {0x3401, MPS(19, 20), LPS(19, 18, 0)},
272
    {0x3001, MPS(20, 21), LPS(20, 19, 0)},
273
    {0x2801, MPS(21, 22), LPS(21, 19, 0)},
274
    {0x2401, MPS(22, 23), LPS(22, 20, 0)},
275
    {0x2201, MPS(23, 24), LPS(23, 21, 0)},
276
    {0x1C01, MPS(24, 25), LPS(24, 22, 0)},
277
    {0x1801, MPS(25, 26), LPS(25, 23, 0)},
278
    {0x1601, MPS(26, 27), LPS(26, 24, 0)},
279
    {0x1401, MPS(27, 28), LPS(27, 25, 0)},
280
    {0x1201, MPS(28, 29), LPS(28, 26, 0)},
281
    {0x1101, MPS(29, 30), LPS(29, 27, 0)},
282
    {0x0AC1, MPS(30, 31), LPS(30, 28, 0)},
283
    {0x09C1, MPS(31, 32), LPS(31, 29, 0)},
284
    {0x08A1, MPS(32, 33), LPS(32, 30, 0)},
285
    {0x0521, MPS(33, 34), LPS(33, 31, 0)},
286
    {0x0441, MPS(34, 35), LPS(34, 32, 0)},
287
    {0x02A1, MPS(35, 36), LPS(35, 33, 0)},
288
    {0x0221, MPS(36, 37), LPS(36, 34, 0)},
289
    {0x0141, MPS(37, 38), LPS(37, 35, 0)},
290
    {0x0111, MPS(38, 39), LPS(38, 36, 0)},
291
    {0x0085, MPS(39, 40), LPS(39, 37, 0)},
292
    {0x0049, MPS(40, 41), LPS(40, 38, 0)},
293
    {0x0025, MPS(41, 42), LPS(41, 39, 0)},
294
    {0x0015, MPS(42, 43), LPS(42, 40, 0)},
295
    {0x0009, MPS(43, 44), LPS(43, 41, 0)},
296
    {0x0005, MPS(44, 45), LPS(44, 42, 0)},
297
    {0x0001, MPS(45, 45), LPS(45, 43, 0)},
298
    {0x5601, MPS(46, 46), LPS(46, 46, 0)}
299
};
300
301
static int
302
jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as)
303
142M
{
304
    /* Figure E.18 */
305
184M
    do {
306
184M
        if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) {
307
0
            return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream");
308
0
        }
309
184M
        as->A <<= 1;
310
184M
        as->C <<= 1;
311
184M
        as->CT--;
312
184M
    } while ((as->A & 0x8000) == 0);
313
314
142M
    return 0;
315
142M
}
316
317
int
318
jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx)
319
5.90G
{
320
5.90G
    Jbig2ArithCx cx = *pcx;
321
5.90G
    const Jbig2ArithQe *pqe;
322
5.90G
    unsigned int index = cx & 0x7f;
323
5.90G
    bool D;
324
325
5.90G
    if (index >= MAX_QE_ARRAY_SIZE) {
326
0
        return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range");
327
0
    }
328
329
5.90G
    pqe = &jbig2_arith_Qe[index];
330
331
    /* Figure F.2 */
332
5.90G
    as->A -= pqe->Qe;
333
5.90G
    if ((as->C >> 16) < as->A) {
334
5.83G
        if ((as->A & 0x8000) == 0) {
335
            /* MPS_EXCHANGE, Figure E.16 */
336
79.1M
            if (as->A < pqe->Qe) {
337
12.0M
                D = 1 - (cx >> 7);
338
12.0M
                *pcx ^= pqe->lps_xor;
339
67.1M
            } else {
340
67.1M
                D = cx >> 7;
341
67.1M
                *pcx ^= pqe->mps_xor;
342
67.1M
            }
343
79.1M
            if (jbig2_arith_renormd(ctx, as) < 0) {
344
0
                return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder");
345
0
            }
346
347
79.1M
            return D;
348
5.75G
        } else {
349
5.75G
            return cx >> 7;
350
5.75G
        }
351
5.83G
    } else {
352
63.4M
        as->C -= (as->A) << 16;
353
        /* LPS_EXCHANGE, Figure E.17 */
354
63.4M
        if (as->A < pqe->Qe) {
355
13.8M
            as->A = pqe->Qe;
356
13.8M
            D = cx >> 7;
357
13.8M
            *pcx ^= pqe->mps_xor;
358
49.6M
        } else {
359
49.6M
            as->A = pqe->Qe;
360
49.6M
            D = 1 - (cx >> 7);
361
49.6M
            *pcx ^= pqe->lps_xor;
362
49.6M
        }
363
63.4M
        if (jbig2_arith_renormd(ctx, as) < 0) {
364
0
            return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder");
365
0
        }
366
367
63.4M
        return D;
368
63.4M
    }
369
5.90G
}
370
371
#ifdef TEST
372
373
static const byte test_stream[] = {
374
    0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00,
375
    0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47,
376
    0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC,
377
    0x00, 0x00
378
};
379
380
#if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH)
381
static void
382
jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx)
383
{
384
    fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C);
385
}
386
#endif
387
388
static int
389
test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word)
390
{
391
    uint32_t val = 0;
392
    int ret = 0;
393
394
    if (self == NULL || word == NULL)
395
        return -1;
396
    if (offset >= sizeof (test_stream))
397
        return 0;
398
399
    if (offset < sizeof(test_stream)) {
400
        val |= test_stream[offset] << 24;
401
        ret++;
402
    }
403
    if (offset + 1 < sizeof(test_stream)) {
404
        val |= test_stream[offset + 1] << 16;
405
        ret++;
406
    }
407
    if (offset + 2 < sizeof(test_stream)) {
408
        val |= test_stream[offset + 2] << 8;
409
        ret++;
410
    }
411
    if (offset + 3 < sizeof(test_stream)) {
412
        val |= test_stream[offset + 3];
413
        ret++;
414
    }
415
    *word = val;
416
    return ret;
417
}
418
419
int
420
main(int argc, char **argv)
421
{
422
    Jbig2Ctx *ctx;
423
    Jbig2WordStream ws;
424
    Jbig2ArithState *as;
425
    int i;
426
    Jbig2ArithCx cx = 0;
427
428
    ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL);
429
430
    ws.get_next_word = test_get_word;
431
    as = jbig2_arith_new(ctx, &ws);
432
#ifdef JBIG2_DEBUG_ARITH
433
    jbig2_arith_trace(as, cx);
434
#endif
435
436
    for (i = 0; i < 256; i++) {
437
#ifdef JBIG2_DEBUG_ARITH
438
        int D =
439
#else
440
        (void)
441
#endif
442
            jbig2_arith_decode(ctx, as, &cx);
443
444
#ifdef JBIG2_DEBUG_ARITH
445
        fprintf(stderr, "%3d: D = %d, ", i, D);
446
        jbig2_arith_trace(as, cx);
447
#endif
448
    }
449
450
    jbig2_free(ctx->allocator, as);
451
452
    jbig2_ctx_free(ctx);
453
454
    return 0;
455
}
456
#endif