/src/ghostpdl/jbig2dec/jbig2_arith.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* |
17 | | jbig2dec |
18 | | */ |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | #include "config.h" |
22 | | #endif |
23 | | #include "os_types.h" |
24 | | |
25 | | #include <stdio.h> |
26 | | #include <stdlib.h> |
27 | | |
28 | | #include "jbig2.h" |
29 | | #include "jbig2_priv.h" |
30 | | #include "jbig2_arith.h" |
31 | | |
32 | | struct _Jbig2ArithState { |
33 | | uint32_t C; |
34 | | uint32_t A; |
35 | | |
36 | | int CT; |
37 | | |
38 | | uint32_t next_word; |
39 | | size_t next_word_bytes; |
40 | | int err; |
41 | | |
42 | | Jbig2WordStream *ws; |
43 | | size_t offset; |
44 | | }; |
45 | | |
46 | | /* |
47 | | Previous versions of this code had a #define to allow |
48 | | us to choose between using the revised arithmetic decoding |
49 | | specified in the 'Software Convention' section of the spec. |
50 | | Back to back tests showed that the 'Software Convention' |
51 | | version was indeed slightly faster. We therefore enable it |
52 | | by default. We also strip the option out, because a) it |
53 | | makes the code harder to read, and b) such things are an |
54 | | invitation to bitrot. |
55 | | */ |
56 | | |
57 | | static int |
58 | | jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as) |
59 | 33.6M | { |
60 | 33.6M | byte B; |
61 | | |
62 | | /* Treat both errors and reading beyond end of stream as an error. */ |
63 | 33.6M | if (as->err != 0) { |
64 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); |
65 | 0 | return -1; |
66 | 0 | } |
67 | 33.6M | if (as->next_word_bytes == 0) { |
68 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding"); |
69 | 0 | return -1; |
70 | 0 | } |
71 | | |
72 | | /* At this point there is at least one byte in as->next_word. */ |
73 | | |
74 | | /* This code confused me no end when I first read it, so a quick note |
75 | | * to save others (and future me's) from being similarly confused. |
76 | | * 'next_word' does indeed contain 'next_word_bytes' of valid data |
77 | | * (always starting at the most significant byte). The confusing |
78 | | * thing is that the first byte has always already been read. |
79 | | * i.e. it serves only as an indication that the last byte we read |
80 | | * was FF or not. |
81 | | * |
82 | | * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as |
83 | | * marker bytes. These never occur in normal streams of arithmetic |
84 | | * encoding, so meeting one terminates the stream (with an infinite |
85 | | * series of 1 bits). |
86 | | * |
87 | | * If we meet an FF byte, we return it as normal. We just 'remember' |
88 | | * that fact for the next byte we read. |
89 | | */ |
90 | | |
91 | | /* Figure F.3 */ |
92 | 33.6M | B = (byte)((as->next_word >> 24) & 0xFF); |
93 | 33.6M | if (B == 0xFF) { |
94 | 23.6M | byte B1; |
95 | | |
96 | | /* next_word_bytes can only be == 1 here, but let's be defensive. */ |
97 | 23.6M | if (as->next_word_bytes <= 1) { |
98 | 14.0k | int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); |
99 | 14.0k | if (ret < 0) { |
100 | 0 | as->err = 1; |
101 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding"); |
102 | 0 | } |
103 | 14.0k | as->next_word_bytes = (size_t) ret; |
104 | | |
105 | 14.0k | if (as->next_word_bytes == 0) { |
106 | 24 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code"); |
107 | 24 | as->next_word = 0xFF900000; |
108 | 24 | as->next_word_bytes = 2; |
109 | 24 | as->C += 0xFF00; |
110 | 24 | as->CT = 8; |
111 | 24 | return 0; |
112 | 24 | } |
113 | | |
114 | 14.0k | as->offset += as->next_word_bytes; |
115 | | |
116 | 14.0k | B1 = (byte)((as->next_word >> 24) & 0xFF); |
117 | 14.0k | if (B1 > 0x8F) { |
118 | | #ifdef JBIG2_DEBUG_ARITH |
119 | | fprintf(stderr, "read %02x (aa)\n", B); |
120 | | #endif |
121 | 80 | as->CT = 8; |
122 | 80 | as->next_word = 0xFF000000 | (as->next_word >> 8); |
123 | 80 | as->next_word_bytes = 2; |
124 | 80 | as->offset--; |
125 | 13.9k | } else { |
126 | | #ifdef JBIG2_DEBUG_ARITH |
127 | | fprintf(stderr, "read %02x (a)\n", B); |
128 | | #endif |
129 | 13.9k | as->C += 0xFE00 - (B1 << 9); |
130 | 13.9k | as->CT = 7; |
131 | 13.9k | } |
132 | 23.6M | } else { |
133 | 23.6M | B1 = (byte)((as->next_word >> 16) & 0xFF); |
134 | 23.6M | if (B1 > 0x8F) { |
135 | | #ifdef JBIG2_DEBUG_ARITH |
136 | | fprintf(stderr, "read %02x (ba)\n", B); |
137 | | #endif |
138 | 23.6M | as->CT = 8; |
139 | 23.6M | } else { |
140 | 39.2k | as->next_word_bytes--; |
141 | 39.2k | as->next_word <<= 8; |
142 | | #ifdef JBIG2_DEBUG_ARITH |
143 | | fprintf(stderr, "read %02x (b)\n", B); |
144 | | #endif |
145 | | |
146 | 39.2k | as->C += 0xFE00 - (B1 << 9); |
147 | 39.2k | as->CT = 7; |
148 | 39.2k | } |
149 | 23.6M | } |
150 | 23.6M | } else { |
151 | | #ifdef JBIG2_DEBUG_ARITH |
152 | | fprintf(stderr, "read %02x\n", B); |
153 | | #endif |
154 | 9.98M | as->next_word <<= 8; |
155 | 9.98M | as->next_word_bytes--; |
156 | | |
157 | 9.98M | if (as->next_word_bytes == 0) { |
158 | 2.49M | int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); |
159 | 2.49M | if (ret < 0) { |
160 | 0 | as->err = 1; |
161 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); |
162 | 0 | } |
163 | 2.49M | as->next_word_bytes = (size_t) ret; |
164 | | |
165 | 2.49M | if (as->next_word_bytes == 0) { |
166 | 159 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code"); |
167 | 159 | as->next_word = 0xFF900000; |
168 | 159 | as->next_word_bytes = 2; |
169 | 159 | as->C += 0xFF00; |
170 | 159 | as->CT = 8; |
171 | 159 | return 0; |
172 | 159 | } |
173 | | |
174 | 2.49M | as->offset += as->next_word_bytes; |
175 | 2.49M | } |
176 | | |
177 | 9.98M | B = (byte)((as->next_word >> 24) & 0xFF); |
178 | 9.98M | as->C += 0xFF00 - (B << 8); |
179 | 9.98M | as->CT = 8; |
180 | 9.98M | } |
181 | | |
182 | 33.6M | return 0; |
183 | 33.6M | } |
184 | | |
185 | | /** Allocate and initialize a new arithmetic coding state |
186 | | * the returned pointer can simply be freed; this does |
187 | | * not affect the associated Jbig2WordStream. |
188 | | */ |
189 | | Jbig2ArithState * |
190 | | jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws) |
191 | 554 | { |
192 | 554 | Jbig2ArithState *result; |
193 | 554 | int ret; |
194 | | |
195 | 554 | result = jbig2_new(ctx, Jbig2ArithState, 1); |
196 | 554 | if (result == NULL) { |
197 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state"); |
198 | 0 | return NULL; |
199 | 0 | } |
200 | | |
201 | 554 | result->err = 0; |
202 | 554 | result->ws = ws; |
203 | 554 | result->offset = 0; |
204 | | |
205 | 554 | ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word); |
206 | 554 | if (ret < 0) { |
207 | 0 | jbig2_free(ctx->allocator, result); |
208 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder"); |
209 | 0 | return NULL; |
210 | 0 | } |
211 | | |
212 | 554 | result->next_word_bytes = (size_t) ret; |
213 | 554 | if (result->next_word_bytes == 0) { |
214 | 0 | jbig2_free(ctx->allocator, result); |
215 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder"); |
216 | 0 | return NULL; |
217 | 0 | } |
218 | | |
219 | 554 | result->offset += result->next_word_bytes; |
220 | | |
221 | | /* Figure F.1 */ |
222 | 554 | result->C = (~(result->next_word >> 8)) & 0xFF0000; |
223 | | |
224 | | /* Figure E.20 (2) */ |
225 | 554 | if (jbig2_arith_bytein(ctx, result) < 0) { |
226 | 0 | jbig2_free(ctx->allocator, result); |
227 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder"); |
228 | 0 | return NULL; |
229 | 0 | } |
230 | | |
231 | | /* Figure E.20 (3) */ |
232 | 554 | result->C <<= 7; |
233 | 554 | result->CT -= 7; |
234 | 554 | result->A = 0x8000; |
235 | | |
236 | 554 | return result; |
237 | 554 | } |
238 | | |
239 | 3.82G | #define MAX_QE_ARRAY_SIZE 47 |
240 | | |
241 | | /* could put bit fields in to minimize memory usage */ |
242 | | typedef struct { |
243 | | uint16_t Qe; |
244 | | byte mps_xor; /* mps_xor = index ^ NMPS */ |
245 | | byte lps_xor; /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */ |
246 | | } Jbig2ArithQe; |
247 | | |
248 | | #define MPS(index, nmps) ((index) ^ (nmps)) |
249 | | #define LPS(index, nlps, swtch) ((index) ^ (nlps) ^ ((swtch) << 7)) |
250 | | |
251 | | static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = { |
252 | | {0x5601, MPS(0, 1), LPS(0, 1, 1)}, |
253 | | {0x3401, MPS(1, 2), LPS(1, 6, 0)}, |
254 | | {0x1801, MPS(2, 3), LPS(2, 9, 0)}, |
255 | | {0x0AC1, MPS(3, 4), LPS(3, 12, 0)}, |
256 | | {0x0521, MPS(4, 5), LPS(4, 29, 0)}, |
257 | | {0x0221, MPS(5, 38), LPS(5, 33, 0)}, |
258 | | {0x5601, MPS(6, 7), LPS(6, 6, 1)}, |
259 | | {0x5401, MPS(7, 8), LPS(7, 14, 0)}, |
260 | | {0x4801, MPS(8, 9), LPS(8, 14, 0)}, |
261 | | {0x3801, MPS(9, 10), LPS(9, 14, 0)}, |
262 | | {0x3001, MPS(10, 11), LPS(10, 17, 0)}, |
263 | | {0x2401, MPS(11, 12), LPS(11, 18, 0)}, |
264 | | {0x1C01, MPS(12, 13), LPS(12, 20, 0)}, |
265 | | {0x1601, MPS(13, 29), LPS(13, 21, 0)}, |
266 | | {0x5601, MPS(14, 15), LPS(14, 14, 1)}, |
267 | | {0x5401, MPS(15, 16), LPS(15, 14, 0)}, |
268 | | {0x5101, MPS(16, 17), LPS(16, 15, 0)}, |
269 | | {0x4801, MPS(17, 18), LPS(17, 16, 0)}, |
270 | | {0x3801, MPS(18, 19), LPS(18, 17, 0)}, |
271 | | {0x3401, MPS(19, 20), LPS(19, 18, 0)}, |
272 | | {0x3001, MPS(20, 21), LPS(20, 19, 0)}, |
273 | | {0x2801, MPS(21, 22), LPS(21, 19, 0)}, |
274 | | {0x2401, MPS(22, 23), LPS(22, 20, 0)}, |
275 | | {0x2201, MPS(23, 24), LPS(23, 21, 0)}, |
276 | | {0x1C01, MPS(24, 25), LPS(24, 22, 0)}, |
277 | | {0x1801, MPS(25, 26), LPS(25, 23, 0)}, |
278 | | {0x1601, MPS(26, 27), LPS(26, 24, 0)}, |
279 | | {0x1401, MPS(27, 28), LPS(27, 25, 0)}, |
280 | | {0x1201, MPS(28, 29), LPS(28, 26, 0)}, |
281 | | {0x1101, MPS(29, 30), LPS(29, 27, 0)}, |
282 | | {0x0AC1, MPS(30, 31), LPS(30, 28, 0)}, |
283 | | {0x09C1, MPS(31, 32), LPS(31, 29, 0)}, |
284 | | {0x08A1, MPS(32, 33), LPS(32, 30, 0)}, |
285 | | {0x0521, MPS(33, 34), LPS(33, 31, 0)}, |
286 | | {0x0441, MPS(34, 35), LPS(34, 32, 0)}, |
287 | | {0x02A1, MPS(35, 36), LPS(35, 33, 0)}, |
288 | | {0x0221, MPS(36, 37), LPS(36, 34, 0)}, |
289 | | {0x0141, MPS(37, 38), LPS(37, 35, 0)}, |
290 | | {0x0111, MPS(38, 39), LPS(38, 36, 0)}, |
291 | | {0x0085, MPS(39, 40), LPS(39, 37, 0)}, |
292 | | {0x0049, MPS(40, 41), LPS(40, 38, 0)}, |
293 | | {0x0025, MPS(41, 42), LPS(41, 39, 0)}, |
294 | | {0x0015, MPS(42, 43), LPS(42, 40, 0)}, |
295 | | {0x0009, MPS(43, 44), LPS(43, 41, 0)}, |
296 | | {0x0005, MPS(44, 45), LPS(44, 42, 0)}, |
297 | | {0x0001, MPS(45, 45), LPS(45, 43, 0)}, |
298 | | {0x5601, MPS(46, 46), LPS(46, 46, 0)} |
299 | | }; |
300 | | |
301 | | static int |
302 | | jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as) |
303 | 218M | { |
304 | | /* Figure E.18 */ |
305 | 269M | do { |
306 | 269M | if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) { |
307 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream"); |
308 | 0 | } |
309 | 269M | as->A <<= 1; |
310 | 269M | as->C <<= 1; |
311 | 269M | as->CT--; |
312 | 269M | } while ((as->A & 0x8000) == 0); |
313 | | |
314 | 218M | return 0; |
315 | 218M | } |
316 | | |
317 | | int |
318 | | jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx) |
319 | 3.82G | { |
320 | 3.82G | Jbig2ArithCx cx = *pcx; |
321 | 3.82G | const Jbig2ArithQe *pqe; |
322 | 3.82G | unsigned int index = cx & 0x7f; |
323 | 3.82G | bool D; |
324 | | |
325 | 3.82G | if (index >= MAX_QE_ARRAY_SIZE) { |
326 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range"); |
327 | 0 | } |
328 | | |
329 | 3.82G | pqe = &jbig2_arith_Qe[index]; |
330 | | |
331 | | /* Figure F.2 */ |
332 | 3.82G | as->A -= pqe->Qe; |
333 | 3.82G | if ((as->C >> 16) < as->A) { |
334 | 3.78G | if ((as->A & 0x8000) == 0) { |
335 | | /* MPS_EXCHANGE, Figure E.16 */ |
336 | 180M | if (as->A < pqe->Qe) { |
337 | 34.2M | D = 1 - (cx >> 7); |
338 | 34.2M | *pcx ^= pqe->lps_xor; |
339 | 146M | } else { |
340 | 146M | D = cx >> 7; |
341 | 146M | *pcx ^= pqe->mps_xor; |
342 | 146M | } |
343 | 180M | if (jbig2_arith_renormd(ctx, as) < 0) { |
344 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); |
345 | 0 | } |
346 | | |
347 | 180M | return D; |
348 | 3.60G | } else { |
349 | 3.60G | return cx >> 7; |
350 | 3.60G | } |
351 | 3.78G | } else { |
352 | 37.6M | as->C -= (as->A) << 16; |
353 | | /* LPS_EXCHANGE, Figure E.17 */ |
354 | 37.6M | if (as->A < pqe->Qe) { |
355 | 6.39M | as->A = pqe->Qe; |
356 | 6.39M | D = cx >> 7; |
357 | 6.39M | *pcx ^= pqe->mps_xor; |
358 | 31.2M | } else { |
359 | 31.2M | as->A = pqe->Qe; |
360 | 31.2M | D = 1 - (cx >> 7); |
361 | 31.2M | *pcx ^= pqe->lps_xor; |
362 | 31.2M | } |
363 | 37.6M | if (jbig2_arith_renormd(ctx, as) < 0) { |
364 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); |
365 | 0 | } |
366 | | |
367 | 37.6M | return D; |
368 | 37.6M | } |
369 | 3.82G | } |
370 | | |
371 | | #ifdef TEST |
372 | | |
373 | | static const byte test_stream[] = { |
374 | | 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, |
375 | | 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, |
376 | | 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC, |
377 | | 0x00, 0x00 |
378 | | }; |
379 | | |
380 | | #if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH) |
381 | | static void |
382 | | jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx) |
383 | | { |
384 | | fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C); |
385 | | } |
386 | | #endif |
387 | | |
388 | | static int |
389 | | test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word) |
390 | | { |
391 | | uint32_t val = 0; |
392 | | int ret = 0; |
393 | | |
394 | | if (self == NULL || word == NULL) |
395 | | return -1; |
396 | | if (offset >= sizeof (test_stream)) |
397 | | return 0; |
398 | | |
399 | | if (offset < sizeof(test_stream)) { |
400 | | val |= test_stream[offset] << 24; |
401 | | ret++; |
402 | | } |
403 | | if (offset + 1 < sizeof(test_stream)) { |
404 | | val |= test_stream[offset + 1] << 16; |
405 | | ret++; |
406 | | } |
407 | | if (offset + 2 < sizeof(test_stream)) { |
408 | | val |= test_stream[offset + 2] << 8; |
409 | | ret++; |
410 | | } |
411 | | if (offset + 3 < sizeof(test_stream)) { |
412 | | val |= test_stream[offset + 3]; |
413 | | ret++; |
414 | | } |
415 | | *word = val; |
416 | | return ret; |
417 | | } |
418 | | |
419 | | int |
420 | | main(int argc, char **argv) |
421 | | { |
422 | | Jbig2Ctx *ctx; |
423 | | Jbig2WordStream ws; |
424 | | Jbig2ArithState *as; |
425 | | int i; |
426 | | Jbig2ArithCx cx = 0; |
427 | | |
428 | | ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL); |
429 | | |
430 | | ws.get_next_word = test_get_word; |
431 | | as = jbig2_arith_new(ctx, &ws); |
432 | | #ifdef JBIG2_DEBUG_ARITH |
433 | | jbig2_arith_trace(as, cx); |
434 | | #endif |
435 | | |
436 | | for (i = 0; i < 256; i++) { |
437 | | #ifdef JBIG2_DEBUG_ARITH |
438 | | int D = |
439 | | #else |
440 | | (void) |
441 | | #endif |
442 | | jbig2_arith_decode(ctx, as, &cx); |
443 | | |
444 | | #ifdef JBIG2_DEBUG_ARITH |
445 | | fprintf(stderr, "%3d: D = %d, ", i, D); |
446 | | jbig2_arith_trace(as, cx); |
447 | | #endif |
448 | | } |
449 | | |
450 | | jbig2_free(ctx->allocator, as); |
451 | | |
452 | | jbig2_ctx_free(ctx); |
453 | | |
454 | | return 0; |
455 | | } |
456 | | #endif |