/src/ghostpdl/jbig2dec/jbig2_arith.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2021 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | /* |
17 | | jbig2dec |
18 | | */ |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | #include "config.h" |
22 | | #endif |
23 | | #include "os_types.h" |
24 | | |
25 | | #include <stdio.h> |
26 | | #include <stdlib.h> |
27 | | |
28 | | #include "jbig2.h" |
29 | | #include "jbig2_priv.h" |
30 | | #include "jbig2_arith.h" |
31 | | |
32 | | struct _Jbig2ArithState { |
33 | | uint32_t C; |
34 | | uint32_t A; |
35 | | |
36 | | int CT; |
37 | | |
38 | | uint32_t next_word; |
39 | | size_t next_word_bytes; |
40 | | int err; |
41 | | |
42 | | Jbig2WordStream *ws; |
43 | | size_t offset; |
44 | | }; |
45 | | |
46 | | /* |
47 | | Previous versions of this code had a #define to allow |
48 | | us to choose between using the revised arithmetic decoding |
49 | | specified in the 'Software Convention' section of the spec. |
50 | | Back to back tests showed that the 'Software Convention' |
51 | | version was indeed slightly faster. We therefore enable it |
52 | | by default. We also strip the option out, because a) it |
53 | | makes the code harder to read, and b) such things are an |
54 | | invitation to bitrot. |
55 | | */ |
56 | | |
57 | | static int |
58 | | jbig2_arith_bytein(Jbig2Ctx *ctx, Jbig2ArithState *as) |
59 | 1.88M | { |
60 | 1.88M | byte B; |
61 | | |
62 | | /* Treat both errors and reading beyond end of stream as an error. */ |
63 | 1.88M | if (as->err != 0) { |
64 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); |
65 | 0 | return -1; |
66 | 0 | } |
67 | 1.88M | if (as->next_word_bytes == 0) { |
68 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read beyond end of underlying stream during arithmetic decoding"); |
69 | 0 | return -1; |
70 | 0 | } |
71 | | |
72 | | /* At this point there is at least one byte in as->next_word. */ |
73 | | |
74 | | /* This code confused me no end when I first read it, so a quick note |
75 | | * to save others (and future me's) from being similarly confused. |
76 | | * 'next_word' does indeed contain 'next_word_bytes' of valid data |
77 | | * (always starting at the most significant byte). The confusing |
78 | | * thing is that the first byte has always already been read. |
79 | | * i.e. it serves only as an indication that the last byte we read |
80 | | * was FF or not. |
81 | | * |
82 | | * The jbig2 bytestream uses FF bytes, followed by a byte > 0x8F as |
83 | | * marker bytes. These never occur in normal streams of arithmetic |
84 | | * encoding, so meeting one terminates the stream (with an infinite |
85 | | * series of 1 bits). |
86 | | * |
87 | | * If we meet an FF byte, we return it as normal. We just 'remember' |
88 | | * that fact for the next byte we read. |
89 | | */ |
90 | | |
91 | | /* Figure F.3 */ |
92 | 1.88M | B = (byte)((as->next_word >> 24) & 0xFF); |
93 | 1.88M | if (B == 0xFF) { |
94 | 857k | byte B1; |
95 | | |
96 | | /* next_word_bytes can only be == 1 here, but let's be defensive. */ |
97 | 857k | if (as->next_word_bytes <= 1) { |
98 | 1.42k | int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); |
99 | 1.42k | if (ret < 0) { |
100 | 0 | as->err = 1; |
101 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to check for marker code due to failure in underlying stream during arithmetic decoding"); |
102 | 0 | } |
103 | 1.42k | as->next_word_bytes = (size_t) ret; |
104 | | |
105 | 1.42k | if (as->next_word_bytes == 0) { |
106 | 1 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read end of possible terminating marker code, assuming terminating marker code"); |
107 | 1 | as->next_word = 0xFF900000; |
108 | 1 | as->next_word_bytes = 2; |
109 | 1 | as->C += 0xFF00; |
110 | 1 | as->CT = 8; |
111 | 1 | return 0; |
112 | 1 | } |
113 | | |
114 | 1.42k | as->offset += as->next_word_bytes; |
115 | | |
116 | 1.42k | B1 = (byte)((as->next_word >> 24) & 0xFF); |
117 | 1.42k | if (B1 > 0x8F) { |
118 | | #ifdef JBIG2_DEBUG_ARITH |
119 | | fprintf(stderr, "read %02x (aa)\n", B); |
120 | | #endif |
121 | 10 | as->CT = 8; |
122 | 10 | as->next_word = 0xFF000000 | (as->next_word >> 8); |
123 | 10 | as->next_word_bytes = 2; |
124 | 10 | as->offset--; |
125 | 1.41k | } else { |
126 | | #ifdef JBIG2_DEBUG_ARITH |
127 | | fprintf(stderr, "read %02x (a)\n", B); |
128 | | #endif |
129 | 1.41k | as->C += 0xFE00 - (B1 << 9); |
130 | 1.41k | as->CT = 7; |
131 | 1.41k | } |
132 | 856k | } else { |
133 | 856k | B1 = (byte)((as->next_word >> 16) & 0xFF); |
134 | 856k | if (B1 > 0x8F) { |
135 | | #ifdef JBIG2_DEBUG_ARITH |
136 | | fprintf(stderr, "read %02x (ba)\n", B); |
137 | | #endif |
138 | 852k | as->CT = 8; |
139 | 852k | } else { |
140 | 4.18k | as->next_word_bytes--; |
141 | 4.18k | as->next_word <<= 8; |
142 | | #ifdef JBIG2_DEBUG_ARITH |
143 | | fprintf(stderr, "read %02x (b)\n", B); |
144 | | #endif |
145 | | |
146 | 4.18k | as->C += 0xFE00 - (B1 << 9); |
147 | 4.18k | as->CT = 7; |
148 | 4.18k | } |
149 | 856k | } |
150 | 1.02M | } else { |
151 | | #ifdef JBIG2_DEBUG_ARITH |
152 | | fprintf(stderr, "read %02x\n", B); |
153 | | #endif |
154 | 1.02M | as->next_word <<= 8; |
155 | 1.02M | as->next_word_bytes--; |
156 | | |
157 | 1.02M | if (as->next_word_bytes == 0) { |
158 | 256k | int ret = as->ws->get_next_word(ctx, as->ws, as->offset, &as->next_word); |
159 | 256k | if (ret < 0) { |
160 | 0 | as->err = 1; |
161 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read from underlying stream during arithmetic decoding"); |
162 | 0 | } |
163 | 256k | as->next_word_bytes = (size_t) ret; |
164 | | |
165 | 256k | if (as->next_word_bytes == 0) { |
166 | 7 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to find terminating marker code before end of underlying stream, assuming terminating marker code"); |
167 | 7 | as->next_word = 0xFF900000; |
168 | 7 | as->next_word_bytes = 2; |
169 | 7 | as->C += 0xFF00; |
170 | 7 | as->CT = 8; |
171 | 7 | return 0; |
172 | 7 | } |
173 | | |
174 | 256k | as->offset += as->next_word_bytes; |
175 | 256k | } |
176 | | |
177 | 1.02M | B = (byte)((as->next_word >> 24) & 0xFF); |
178 | 1.02M | as->C += 0xFF00 - (B << 8); |
179 | 1.02M | as->CT = 8; |
180 | 1.02M | } |
181 | | |
182 | 1.88M | return 0; |
183 | 1.88M | } |
184 | | |
185 | | /** Allocate and initialize a new arithmetic coding state |
186 | | * the returned pointer can simply be freed; this does |
187 | | * not affect the associated Jbig2WordStream. |
188 | | */ |
189 | | Jbig2ArithState * |
190 | | jbig2_arith_new(Jbig2Ctx *ctx, Jbig2WordStream *ws) |
191 | 65 | { |
192 | 65 | Jbig2ArithState *result; |
193 | 65 | int ret; |
194 | | |
195 | 65 | result = jbig2_new(ctx, Jbig2ArithState, 1); |
196 | 65 | if (result == NULL) { |
197 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to allocate arithmetic coding state"); |
198 | 0 | return NULL; |
199 | 0 | } |
200 | | |
201 | 65 | result->err = 0; |
202 | 65 | result->ws = ws; |
203 | 65 | result->offset = 0; |
204 | | |
205 | 65 | ret = result->ws->get_next_word(ctx, result->ws, result->offset, &result->next_word); |
206 | 65 | if (ret < 0) { |
207 | 0 | jbig2_free(ctx->allocator, result); |
208 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to initialize underlying stream of arithmetic decoder"); |
209 | 0 | return NULL; |
210 | 0 | } |
211 | | |
212 | 65 | result->next_word_bytes = (size_t) ret; |
213 | 65 | if (result->next_word_bytes == 0) { |
214 | 0 | jbig2_free(ctx->allocator, result); |
215 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read first byte from underlying stream when initializing arithmetic decoder"); |
216 | 0 | return NULL; |
217 | 0 | } |
218 | | |
219 | 65 | result->offset += result->next_word_bytes; |
220 | | |
221 | | /* Figure F.1 */ |
222 | 65 | result->C = (~(result->next_word >> 8)) & 0xFF0000; |
223 | | |
224 | | /* Figure E.20 (2) */ |
225 | 65 | if (jbig2_arith_bytein(ctx, result) < 0) { |
226 | 0 | jbig2_free(ctx->allocator, result); |
227 | 0 | jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read second byte from underlying stream when initializing arithmetic decoder"); |
228 | 0 | return NULL; |
229 | 0 | } |
230 | | |
231 | | /* Figure E.20 (3) */ |
232 | 65 | result->C <<= 7; |
233 | 65 | result->CT -= 7; |
234 | 65 | result->A = 0x8000; |
235 | | |
236 | 65 | return result; |
237 | 65 | } |
238 | | |
239 | 391M | #define MAX_QE_ARRAY_SIZE 47 |
240 | | |
241 | | /* could put bit fields in to minimize memory usage */ |
242 | | typedef struct { |
243 | | uint16_t Qe; |
244 | | byte mps_xor; /* mps_xor = index ^ NMPS */ |
245 | | byte lps_xor; /* lps_xor = index ^ NLPS ^ (SWITCH << 7) */ |
246 | | } Jbig2ArithQe; |
247 | | |
248 | | static const Jbig2ArithQe jbig2_arith_Qe[MAX_QE_ARRAY_SIZE] = { |
249 | | {0x5601, 1 ^ 0, 1 ^ 0 ^ 0x80}, |
250 | | {0x3401, 2 ^ 1, 6 ^ 1}, |
251 | | {0x1801, 3 ^ 2, 9 ^ 2}, |
252 | | {0x0AC1, 4 ^ 3, 12 ^ 3}, |
253 | | {0x0521, 5 ^ 4, 29 ^ 4}, |
254 | | {0x0221, 38 ^ 5, 33 ^ 5}, |
255 | | {0x5601, 7 ^ 6, 6 ^ 6 ^ 0x80}, |
256 | | {0x5401, 8 ^ 7, 14 ^ 7}, |
257 | | {0x4801, 9 ^ 8, 14 ^ 8}, |
258 | | {0x3801, 10 ^ 9, 14 ^ 9}, |
259 | | {0x3001, 11 ^ 10, 17 ^ 10}, |
260 | | {0x2401, 12 ^ 11, 18 ^ 11}, |
261 | | {0x1C01, 13 ^ 12, 20 ^ 12}, |
262 | | {0x1601, 29 ^ 13, 21 ^ 13}, |
263 | | {0x5601, 15 ^ 14, 14 ^ 14 ^ 0x80}, |
264 | | {0x5401, 16 ^ 15, 14 ^ 15}, |
265 | | {0x5101, 17 ^ 16, 15 ^ 16}, |
266 | | {0x4801, 18 ^ 17, 16 ^ 17}, |
267 | | {0x3801, 19 ^ 18, 17 ^ 18}, |
268 | | {0x3401, 20 ^ 19, 18 ^ 19}, |
269 | | {0x3001, 21 ^ 20, 19 ^ 20}, |
270 | | {0x2801, 22 ^ 21, 19 ^ 21}, |
271 | | {0x2401, 23 ^ 22, 20 ^ 22}, |
272 | | {0x2201, 24 ^ 23, 21 ^ 23}, |
273 | | {0x1C01, 25 ^ 24, 22 ^ 24}, |
274 | | {0x1801, 26 ^ 25, 23 ^ 25}, |
275 | | {0x1601, 27 ^ 26, 24 ^ 26}, |
276 | | {0x1401, 28 ^ 27, 25 ^ 27}, |
277 | | {0x1201, 29 ^ 28, 26 ^ 28}, |
278 | | {0x1101, 30 ^ 29, 27 ^ 29}, |
279 | | {0x0AC1, 31 ^ 30, 28 ^ 30}, |
280 | | {0x09C1, 32 ^ 31, 29 ^ 31}, |
281 | | {0x08A1, 33 ^ 32, 30 ^ 32}, |
282 | | {0x0521, 34 ^ 33, 31 ^ 33}, |
283 | | {0x0441, 35 ^ 34, 32 ^ 34}, |
284 | | {0x02A1, 36 ^ 35, 33 ^ 35}, |
285 | | {0x0221, 37 ^ 36, 34 ^ 36}, |
286 | | {0x0141, 38 ^ 37, 35 ^ 37}, |
287 | | {0x0111, 39 ^ 38, 36 ^ 38}, |
288 | | {0x0085, 40 ^ 39, 37 ^ 39}, |
289 | | {0x0049, 41 ^ 40, 38 ^ 40}, |
290 | | {0x0025, 42 ^ 41, 39 ^ 41}, |
291 | | {0x0015, 43 ^ 42, 40 ^ 42}, |
292 | | {0x0009, 44 ^ 43, 41 ^ 43}, |
293 | | {0x0005, 45 ^ 44, 42 ^ 44}, |
294 | | {0x0001, 45 ^ 45, 43 ^ 45}, |
295 | | {0x5601, 46 ^ 46, 46 ^ 46} |
296 | | }; |
297 | | |
298 | | static int |
299 | | jbig2_arith_renormd(Jbig2Ctx *ctx, Jbig2ArithState *as) |
300 | 11.0M | { |
301 | | /* Figure E.18 */ |
302 | 15.0M | do { |
303 | 15.0M | if (as->CT == 0 && jbig2_arith_bytein(ctx, as) < 0) { |
304 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to read byte from compressed data stream"); |
305 | 0 | } |
306 | 15.0M | as->A <<= 1; |
307 | 15.0M | as->C <<= 1; |
308 | 15.0M | as->CT--; |
309 | 15.0M | } while ((as->A & 0x8000) == 0); |
310 | | |
311 | 11.0M | return 0; |
312 | 11.0M | } |
313 | | |
314 | | int |
315 | | jbig2_arith_decode(Jbig2Ctx *ctx, Jbig2ArithState *as, Jbig2ArithCx *pcx) |
316 | 391M | { |
317 | 391M | Jbig2ArithCx cx = *pcx; |
318 | 391M | const Jbig2ArithQe *pqe; |
319 | 391M | unsigned int index = cx & 0x7f; |
320 | 391M | bool D; |
321 | | |
322 | 391M | if (index >= MAX_QE_ARRAY_SIZE) { |
323 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_FATAL, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to determine probability estimate because index out of range"); |
324 | 0 | } |
325 | | |
326 | 391M | pqe = &jbig2_arith_Qe[index]; |
327 | | |
328 | | /* Figure F.2 */ |
329 | 391M | as->A -= pqe->Qe; |
330 | 391M | if ((as->C >> 16) < as->A) { |
331 | 388M | if ((as->A & 0x8000) == 0) { |
332 | | /* MPS_EXCHANGE, Figure E.16 */ |
333 | 8.35M | if (as->A < pqe->Qe) { |
334 | 1.39M | D = 1 - (cx >> 7); |
335 | 1.39M | *pcx ^= pqe->lps_xor; |
336 | 6.95M | } else { |
337 | 6.95M | D = cx >> 7; |
338 | 6.95M | *pcx ^= pqe->mps_xor; |
339 | 6.95M | } |
340 | 8.35M | if (jbig2_arith_renormd(ctx, as) < 0) { |
341 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); |
342 | 0 | } |
343 | | |
344 | 8.35M | return D; |
345 | 380M | } else { |
346 | 380M | return cx >> 7; |
347 | 380M | } |
348 | 388M | } else { |
349 | 2.70M | as->C -= (as->A) << 16; |
350 | | /* LPS_EXCHANGE, Figure E.17 */ |
351 | 2.70M | if (as->A < pqe->Qe) { |
352 | 396k | as->A = pqe->Qe; |
353 | 396k | D = cx >> 7; |
354 | 396k | *pcx ^= pqe->mps_xor; |
355 | 2.30M | } else { |
356 | 2.30M | as->A = pqe->Qe; |
357 | 2.30M | D = 1 - (cx >> 7); |
358 | 2.30M | *pcx ^= pqe->lps_xor; |
359 | 2.30M | } |
360 | 2.70M | if (jbig2_arith_renormd(ctx, as) < 0) { |
361 | 0 | return jbig2_error(ctx, JBIG2_SEVERITY_WARNING, JBIG2_UNKNOWN_SEGMENT_NUMBER, "failed to renormalize decoder"); |
362 | 0 | } |
363 | | |
364 | 2.70M | return D; |
365 | 2.70M | } |
366 | 391M | } |
367 | | |
368 | | #ifdef TEST |
369 | | |
370 | | static const byte test_stream[] = { |
371 | | 0x84, 0xC7, 0x3B, 0xFC, 0xE1, 0xA1, 0x43, 0x04, 0x02, 0x20, 0x00, 0x00, |
372 | | 0x41, 0x0D, 0xBB, 0x86, 0xF4, 0x31, 0x7F, 0xFF, 0x88, 0xFF, 0x37, 0x47, |
373 | | 0x1A, 0xDB, 0x6A, 0xDF, 0xFF, 0xAC, |
374 | | 0x00, 0x00 |
375 | | }; |
376 | | |
377 | | #if defined(JBIG2_DEBUG) || defined(JBIG2_DEBUG_ARITH) |
378 | | static void |
379 | | jbig2_arith_trace(Jbig2ArithState *as, Jbig2ArithCx cx) |
380 | | { |
381 | | fprintf(stderr, "I = %2d, MPS = %d, A = %04x, CT = %2d, C = %08x\n", cx & 0x7f, cx >> 7, as->A, as->CT, as->C); |
382 | | } |
383 | | #endif |
384 | | |
385 | | static int |
386 | | test_get_word(Jbig2Ctx *ctx, Jbig2WordStream *self, size_t offset, uint32_t *word) |
387 | | { |
388 | | uint32_t val = 0; |
389 | | int ret = 0; |
390 | | |
391 | | if (self == NULL || word == NULL) |
392 | | return -1; |
393 | | if (offset >= sizeof (test_stream)) |
394 | | return 0; |
395 | | |
396 | | if (offset < sizeof(test_stream)) { |
397 | | val |= test_stream[offset] << 24; |
398 | | ret++; |
399 | | } |
400 | | if (offset + 1 < sizeof(test_stream)) { |
401 | | val |= test_stream[offset + 1] << 16; |
402 | | ret++; |
403 | | } |
404 | | if (offset + 2 < sizeof(test_stream)) { |
405 | | val |= test_stream[offset + 2] << 8; |
406 | | ret++; |
407 | | } |
408 | | if (offset + 3 < sizeof(test_stream)) { |
409 | | val |= test_stream[offset + 3]; |
410 | | ret++; |
411 | | } |
412 | | *word = val; |
413 | | return ret; |
414 | | } |
415 | | |
416 | | int |
417 | | main(int argc, char **argv) |
418 | | { |
419 | | Jbig2Ctx *ctx; |
420 | | Jbig2WordStream ws; |
421 | | Jbig2ArithState *as; |
422 | | int i; |
423 | | Jbig2ArithCx cx = 0; |
424 | | |
425 | | ctx = jbig2_ctx_new(NULL, 0, NULL, NULL, NULL); |
426 | | |
427 | | ws.get_next_word = test_get_word; |
428 | | as = jbig2_arith_new(ctx, &ws); |
429 | | #ifdef JBIG2_DEBUG_ARITH |
430 | | jbig2_arith_trace(as, cx); |
431 | | #endif |
432 | | |
433 | | for (i = 0; i < 256; i++) { |
434 | | #ifdef JBIG2_DEBUG_ARITH |
435 | | int D = |
436 | | #else |
437 | | (void) |
438 | | #endif |
439 | | jbig2_arith_decode(ctx, as, &cx); |
440 | | |
441 | | #ifdef JBIG2_DEBUG_ARITH |
442 | | fprintf(stderr, "%3d: D = %d, ", i, D); |
443 | | jbig2_arith_trace(as, cx); |
444 | | #endif |
445 | | } |
446 | | |
447 | | jbig2_free(ctx->allocator, as); |
448 | | |
449 | | jbig2_ctx_free(ctx); |
450 | | |
451 | | return 0; |
452 | | } |
453 | | #endif |