/src/jbigkit/libjbig/jbig_ar.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Arithmetic encoder and decoder of the portable JBIG |
3 | | * compression library |
4 | | * |
5 | | * Markus Kuhn -- http://www.cl.cam.ac.uk/~mgk25/jbigkit/ |
6 | | * |
7 | | * This module implements a portable standard C arithmetic encoder |
8 | | * and decoder used by the JBIG lossless bi-level image compression |
9 | | * algorithm as specified in International Standard ISO 11544:1993 |
10 | | * and ITU-T Recommendation T.82. |
11 | | * |
12 | | * This program is free software; you can redistribute it and/or modify |
13 | | * it under the terms of the GNU General Public License as published by |
14 | | * the Free Software Foundation; either version 2 of the License, or |
15 | | * (at your option) any later version. |
16 | | * |
17 | | * This program is distributed in the hope that it will be useful, |
18 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20 | | * GNU General Public License for more details. |
21 | | * |
22 | | * You should have received a copy of the GNU General Public License |
23 | | * along with this program; if not, write to the Free Software |
24 | | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
25 | | * |
26 | | * If you want to use this program under different license conditions, |
27 | | * then contact the author for an arrangement. |
28 | | */ |
29 | | |
30 | | #include <assert.h> |
31 | | #include "jbig_ar.h" |
32 | | |
33 | | /* |
34 | | * Probability estimation tables for the arithmetic encoder/decoder |
35 | | * given by ITU T.82 Table 24. |
36 | | */ |
37 | | |
38 | | static short lsztab[113] = { |
39 | | 0x5a1d, 0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, |
40 | | 0x0036, 0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, |
41 | | 0x2cf2, 0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, |
42 | | 0x0406, 0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, |
43 | | 0x0068, 0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, |
44 | | 0x261f, 0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, |
45 | | 0x0861, 0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, |
46 | | 0x01f8, 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, |
47 | | 0x5b12, 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, |
48 | | 0x1aa9, 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, |
49 | | 0x5832, 0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, |
50 | | 0x5570, 0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, |
51 | | 0x4f46, 0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, |
52 | | 0x415e, 0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, |
53 | | 0x59eb |
54 | | }; |
55 | | |
56 | | static unsigned char nmpstab[113] = { |
57 | | 1, 2, 3, 4, 5, 6, 7, 8, |
58 | | 9, 10, 11, 12, 13, 13, 15, 16, |
59 | | 17, 18, 19, 20, 21, 22, 23, 24, |
60 | | 25, 26, 27, 28, 29, 30, 31, 32, |
61 | | 33, 34, 35, 9, 37, 38, 39, 40, |
62 | | 41, 42, 43, 44, 45, 46, 47, 48, |
63 | | 49, 50, 51, 52, 53, 54, 55, 56, |
64 | | 57, 58, 59, 60, 61, 62, 63, 32, |
65 | | 65, 66, 67, 68, 69, 70, 71, 72, |
66 | | 73, 74, 75, 76, 77, 78, 79, 48, |
67 | | 81, 82, 83, 84, 85, 86, 87, 71, |
68 | | 89, 90, 91, 92, 93, 94, 86, 96, |
69 | | 97, 98, 99, 100, 93, 102, 103, 104, |
70 | | 99, 106, 107, 103, 109, 107, 111, 109, |
71 | | 111 |
72 | | }; |
73 | | |
74 | | /* |
75 | | * least significant 7 bits (mask 0x7f) of nlpstab[] contain NLPS value, |
76 | | * most significant bit (mask 0x80) contains SWTCH bit |
77 | | */ |
78 | | static unsigned char nlpstab[113] = { |
79 | | 129, 14, 16, 18, 20, 23, 25, 28, |
80 | | 30, 33, 35, 9, 10, 12, 143, 36, |
81 | | 38, 39, 40, 42, 43, 45, 46, 48, |
82 | | 49, 51, 52, 54, 56, 57, 59, 60, |
83 | | 62, 63, 32, 33, 165, 64, 65, 67, |
84 | | 68, 69, 70, 72, 73, 74, 75, 77, |
85 | | 78, 79, 48, 50, 50, 51, 52, 53, |
86 | | 54, 55, 56, 57, 58, 59, 61, 61, |
87 | | 193, 80, 81, 82, 83, 84, 86, 87, |
88 | | 87, 72, 72, 74, 74, 75, 77, 77, |
89 | | 208, 88, 89, 90, 91, 92, 93, 86, |
90 | | 216, 95, 96, 97, 99, 99, 93, 223, |
91 | | 101, 102, 103, 104, 99, 105, 106, 107, |
92 | | 103, 233, 108, 109, 110, 111, 238, 112, |
93 | | 240 |
94 | | }; |
95 | | |
96 | | /* |
97 | | * The next functions implement the arithmedic encoder and decoder |
98 | | * required for JBIG. The same algorithm is also used in the arithmetic |
99 | | * variant of JPEG. |
100 | | */ |
101 | | |
102 | | /* marker codes */ |
103 | 429k | #define MARKER_STUFF 0x00 |
104 | 3.14M | #define MARKER_ESC 0xff |
105 | | |
106 | | void arith_encode_init(struct jbg_arenc_state *s, int reuse_st) |
107 | 73.3k | { |
108 | 73.3k | int i; |
109 | | |
110 | 73.3k | if (!reuse_st) |
111 | 9.05M | for (i = 0; i < 4096; s->st[i++] = 0) ; |
112 | 73.3k | s->c = 0; |
113 | 73.3k | s->a = 0x10000L; |
114 | 73.3k | s->sc = 0; |
115 | 73.3k | s->ct = 11; |
116 | 73.3k | s->buffer = -1; /* empty */ |
117 | | |
118 | 73.3k | return; |
119 | 73.3k | } |
120 | | |
121 | | |
122 | | void arith_encode_flush(struct jbg_arenc_state *s) |
123 | 73.3k | { |
124 | 73.3k | unsigned long temp; |
125 | | |
126 | | /* find the s->c in the coding interval with the largest |
127 | | * number of trailing zero bits */ |
128 | 73.3k | if ((temp = (s->a - 1 + s->c) & 0xffff0000L) < s->c) |
129 | 17.9k | s->c = temp + 0x8000; |
130 | 55.3k | else |
131 | 55.3k | s->c = temp; |
132 | | /* send remaining bytes to output */ |
133 | 73.3k | s->c <<= s->ct; |
134 | 73.3k | if (s->c & 0xf8000000L) { |
135 | | /* one final overflow has to be handled */ |
136 | 3.35k | if (s->buffer >= 0) { |
137 | 3.35k | s->byte_out(s->buffer + 1, s->file); |
138 | 3.35k | if (s->buffer + 1 == MARKER_ESC) |
139 | 28 | s->byte_out(MARKER_STUFF, s->file); |
140 | 3.35k | } |
141 | | /* output 0x00 bytes only when more non-0x00 will follow */ |
142 | 3.35k | if (s->c & 0x7fff800L) |
143 | 2.17k | for (; s->sc; --s->sc) |
144 | 448 | s->byte_out(0x00, s->file); |
145 | 69.9k | } else { |
146 | 69.9k | if (s->buffer >= 0) |
147 | 61.6k | s->byte_out(s->buffer, s->file); |
148 | | /* T.82 figure 30 says buffer+1 for the above line! Typo? */ |
149 | 71.0k | for (; s->sc; --s->sc) { |
150 | 1.01k | s->byte_out(0xff, s->file); |
151 | 1.01k | s->byte_out(MARKER_STUFF, s->file); |
152 | 1.01k | } |
153 | 69.9k | } |
154 | | /* output final bytes only if they are not 0x00 */ |
155 | 73.3k | if (s->c & 0x7fff800L) { |
156 | 61.8k | s->byte_out((s->c >> 19) & 0xff, s->file); |
157 | 61.8k | if (((s->c >> 19) & 0xff) == MARKER_ESC) |
158 | 226 | s->byte_out(MARKER_STUFF, s->file); |
159 | 61.8k | if (s->c & 0x7f800L) { |
160 | 13.6k | s->byte_out((s->c >> 11) & 0xff, s->file); |
161 | 13.6k | if (((s->c >> 11) & 0xff) == MARKER_ESC) |
162 | 0 | s->byte_out(MARKER_STUFF, s->file); |
163 | 13.6k | } |
164 | 61.8k | } |
165 | | |
166 | 73.3k | return; |
167 | 73.3k | } |
168 | | |
169 | | |
170 | | void arith_encode(struct jbg_arenc_state *s, int cx, int pix) |
171 | 1.06G | { |
172 | 1.06G | register unsigned lsz, ss; |
173 | 1.06G | register unsigned char *st; |
174 | 1.06G | long temp; |
175 | | |
176 | 1.06G | assert(cx >= 0 && cx < 4096); |
177 | 1.06G | st = s->st + cx; |
178 | 1.06G | ss = *st & 0x7f; |
179 | 1.06G | assert(ss < 113); |
180 | 1.06G | lsz = lsztab[ss]; |
181 | | |
182 | | #if 0 |
183 | | fprintf(stderr, "pix = %d, cx = %d, mps = %d, st = %3d, lsz = 0x%04x, " |
184 | | "a = 0x%05lx, c = 0x%08lx, ct = %2d, buf = 0x%02x\n", |
185 | | pix, cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct, |
186 | | s->buffer); |
187 | | #endif |
188 | | |
189 | 1.06G | if (((pix << 7) ^ s->st[cx]) & 0x80) { |
190 | | /* encode the less probable symbol */ |
191 | 208M | if ((s->a -= lsz) >= lsz) { |
192 | | /* If the interval size (lsz) for the less probable symbol (LPS) |
193 | | * is larger than the interval size for the MPS, then exchange |
194 | | * the two symbols for coding efficiency, otherwise code the LPS |
195 | | * as usual: */ |
196 | 174M | s->c += s->a; |
197 | 174M | s->a = lsz; |
198 | 174M | } |
199 | | /* Check whether MPS/LPS exchange is necessary |
200 | | * and chose next probability estimator status */ |
201 | 208M | *st &= 0x80; |
202 | 208M | *st ^= nlpstab[ss]; |
203 | 855M | } else { |
204 | | /* encode the more probable symbol */ |
205 | 855M | if ((s->a -= lsz) & 0xffff8000L) |
206 | 636M | return; /* A >= 0x8000 -> ready, no renormalization required */ |
207 | 218M | if (s->a < lsz) { |
208 | | /* If the interval size (lsz) for the less probable symbol (LPS) |
209 | | * is larger than the interval size for the MPS, then exchange |
210 | | * the two symbols for coding efficiency: */ |
211 | 39.1M | s->c += s->a; |
212 | 39.1M | s->a = lsz; |
213 | 39.1M | } |
214 | | /* chose next probability estimator status */ |
215 | 218M | *st &= 0x80; |
216 | 218M | *st |= nmpstab[ss]; |
217 | 218M | } |
218 | | |
219 | | /* renormalization of coding interval */ |
220 | 582M | do { |
221 | 582M | s->a <<= 1; |
222 | 582M | s->c <<= 1; |
223 | 582M | --s->ct; |
224 | 582M | if (s->ct == 0) { |
225 | | /* another byte is ready for output */ |
226 | 72.7M | temp = s->c >> 19; |
227 | 72.7M | if (temp & 0xffffff00L) { |
228 | | /* handle overflow over all buffered 0xff bytes */ |
229 | 3.06M | if (s->buffer >= 0) { |
230 | 3.06M | ++s->buffer; |
231 | 3.06M | s->byte_out(s->buffer, s->file); |
232 | 3.06M | if (s->buffer == MARKER_ESC) |
233 | 12.0k | s->byte_out(MARKER_STUFF, s->file); |
234 | 3.06M | } |
235 | 3.08M | for (; s->sc; --s->sc) |
236 | 23.0k | s->byte_out(0x00, s->file); |
237 | 3.06M | s->buffer = temp & 0xff; /* new output byte, might overflow later */ |
238 | 3.06M | assert(s->buffer != 0xff); |
239 | | /* can s->buffer really never become 0xff here? */ |
240 | 69.6M | } else if (temp == 0xff) { |
241 | | /* buffer 0xff byte (which might overflow later) */ |
242 | 324k | ++s->sc; |
243 | 69.3M | } else { |
244 | | /* output all buffered 0xff bytes, they will not overflow any more */ |
245 | 69.3M | if (s->buffer >= 0) |
246 | 69.2M | s->byte_out(s->buffer, s->file); |
247 | 69.6M | for (; s->sc; --s->sc) { |
248 | 290k | s->byte_out(0xff, s->file); |
249 | 290k | s->byte_out(MARKER_STUFF, s->file); |
250 | 290k | } |
251 | 69.3M | s->buffer = temp; /* buffer new output byte (can still overflow) */ |
252 | 69.3M | } |
253 | 72.7M | s->c &= 0x7ffffL; |
254 | 72.7M | s->ct = 8; |
255 | 72.7M | } |
256 | 582M | } while (s->a < 0x8000); |
257 | | |
258 | 427M | return; |
259 | 427M | } |
260 | | |
261 | | |
262 | | void arith_decode_init(struct jbg_ardec_state *s, int reuse_st) |
263 | 570k | { |
264 | 570k | int i; |
265 | | |
266 | 570k | if (!reuse_st) |
267 | 2.29G | for (i = 0; i < 4096; s->st[i++] = 0) ; |
268 | 570k | s->c = 0; |
269 | 570k | s->a = 1; |
270 | 570k | s->ct = 0; |
271 | 570k | s->startup = 1; |
272 | 570k | s->nopadding = 0; |
273 | 570k | return; |
274 | 570k | } |
275 | | |
276 | | /* |
277 | | * Decode and return one symbol from the provided PSCD byte stream |
278 | | * that starts in s->pscd_ptr and ends in the byte before s->pscd_end. |
279 | | * The context cx is a 12-bit integer in the range 0..4095. This |
280 | | * function will advance s->pscd_ptr each time it has consumed all |
281 | | * information from that PSCD byte. |
282 | | * |
283 | | * If a symbol has been decoded successfully, the return value will be |
284 | | * 0 or 1 (depending on the symbol). |
285 | | * |
286 | | * If the decoder was not able to decode a symbol from the provided |
287 | | * PSCD, then the return value will be -1, and two cases can be |
288 | | * distinguished: |
289 | | * |
290 | | * s->pscd_ptr == s->pscd_end: |
291 | | * |
292 | | * The decoder has used up all information in the provided PSCD |
293 | | * bytes. Further PSCD bytes have to be provided (via new values of |
294 | | * s->pscd_ptr and/or s->pscd_end) before another symbol can be |
295 | | * decoded. |
296 | | * |
297 | | * s->pscd_ptr == s->pscd_end - 1: |
298 | | * |
299 | | * The decoder has used up all provided PSCD bytes except for the |
300 | | * very last byte, because that has the value 0xff. The decoder can |
301 | | * at this point not yet tell whether this 0xff belongs to a |
302 | | * MARKER_STUFF sequence or marks the end of the PSCD. Further PSCD |
303 | | * bytes have to be provided (via new values of s->pscd_ptr and/or |
304 | | * s->pscd_end), including the not yet processed 0xff byte, before |
305 | | * another symbol can be decoded successfully. |
306 | | * |
307 | | * If s->nopadding != 0, the decoder will return -2 when it reaches |
308 | | * the first two bytes of the marker segment that follows (and |
309 | | * terminates) the PSCD, but before decoding the first symbol that |
310 | | * depends on a bit in the input data that could have been the result |
311 | | * of zero padding, and might, therefore, never have been encoded. |
312 | | * This gives the caller the opportunity to lookahead early enough |
313 | | * beyond a terminating SDNORM/SDRST for a trailing NEWLEN (as |
314 | | * required by T.85) before decoding remaining symbols. Call the |
315 | | * decoder again afterwards as often as necessary (leaving s->pscd_ptr |
316 | | * pointing to the start of the marker segment) to retrieve any |
317 | | * required remaining symbols that might depend on padding. |
318 | | * |
319 | | * [Note that each PSCD can be decoded into an infinitely long |
320 | | * sequence of symbols, because the encoder might have truncated away |
321 | | * an arbitrarily long sequence of trailing 0x00 bytes, which the |
322 | | * decoder will append automatically as needed when it reaches the end |
323 | | * of the PSCD. Therefore, the decoder cannot report any end of the |
324 | | * symbol sequence and other means (external to the PSCD and |
325 | | * arithmetic decoding process) are needed to determine that.] |
326 | | */ |
327 | | |
328 | | int arith_decode(struct jbg_ardec_state *s, int cx) |
329 | 21.4G | { |
330 | 21.4G | register unsigned lsz, ss; |
331 | 21.4G | register unsigned char *st; |
332 | 21.4G | int pix; |
333 | | |
334 | | /* renormalization */ |
335 | 21.8G | while (s->a < 0x8000 || s->startup) { |
336 | 484M | while (s->ct <= 8 && s->ct >= 0) { |
337 | | /* first we can move a new byte into s->c */ |
338 | 8.17M | if (s->pscd_ptr >= s->pscd_end) { |
339 | 28.8k | return -1; /* more bytes needed */ |
340 | 28.8k | } |
341 | 8.14M | if (*s->pscd_ptr == 0xff) |
342 | 125k | if (s->pscd_ptr + 1 >= s->pscd_end) { |
343 | 102 | return -1; /* final 0xff byte not processed */ |
344 | 125k | } else { |
345 | 125k | if (*(s->pscd_ptr + 1) == MARKER_STUFF) { |
346 | 92.1k | s->c |= 0xffL << (8 - s->ct); |
347 | 92.1k | s->ct += 8; |
348 | 92.1k | s->pscd_ptr += 2; |
349 | 92.1k | } else { |
350 | 33.5k | s->ct = -1; /* start padding with zero bytes */ |
351 | 33.5k | if (s->nopadding) { |
352 | 0 | s->nopadding = 0; |
353 | 0 | return -2; /* subsequent symbols might depend on zero padding */ |
354 | 0 | } |
355 | 33.5k | } |
356 | 125k | } |
357 | 8.02M | else { |
358 | 8.02M | s->c |= (long)*(s->pscd_ptr++) << (8 - s->ct); |
359 | 8.02M | s->ct += 8; |
360 | 8.02M | } |
361 | 8.14M | } |
362 | 476M | s->c <<= 1; |
363 | 476M | s->a <<= 1; |
364 | 476M | if (s->ct >= 0) s->ct--; |
365 | 476M | if (s->a == 0x10000L) |
366 | 35.0k | s->startup = 0; |
367 | 476M | } |
368 | | |
369 | 21.4G | st = s->st + cx; |
370 | 21.4G | ss = *st & 0x7f; |
371 | 21.4G | assert(ss < 113); |
372 | 21.4G | lsz = lsztab[ss]; |
373 | | |
374 | | #if 0 |
375 | | fprintf(stderr, "cx = %d, mps = %d, st = %3d, lsz = 0x%04x, a = 0x%05lx, " |
376 | | "c = 0x%08lx, ct = %2d\n", |
377 | | cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct); |
378 | | #endif |
379 | | |
380 | 21.4G | if ((s->c >> 16) < (s->a -= lsz)) |
381 | 21.2G | if (s->a & 0xffff8000L) |
382 | 21.0G | return *st >> 7; |
383 | 251M | else { |
384 | | /* MPS_EXCHANGE */ |
385 | 251M | if (s->a < lsz) { |
386 | 32.3M | pix = 1 - (*st >> 7); |
387 | | /* Check whether MPS/LPS exchange is necessary |
388 | | * and chose next probability estimator status */ |
389 | 32.3M | *st &= 0x80; |
390 | 32.3M | *st ^= nlpstab[ss]; |
391 | 219M | } else { |
392 | 219M | pix = *st >> 7; |
393 | 219M | *st &= 0x80; |
394 | 219M | *st |= nmpstab[ss]; |
395 | 219M | } |
396 | 251M | } |
397 | 121M | else { |
398 | | /* LPS_EXCHANGE */ |
399 | 121M | if (s->a < lsz) { |
400 | 27.2M | s->c -= s->a << 16; |
401 | 27.2M | s->a = lsz; |
402 | 27.2M | pix = *st >> 7; |
403 | 27.2M | *st &= 0x80; |
404 | 27.2M | *st |= nmpstab[ss]; |
405 | 94.3M | } else { |
406 | 94.3M | s->c -= s->a << 16; |
407 | 94.3M | s->a = lsz; |
408 | 94.3M | pix = 1 - (*st >> 7); |
409 | | /* Check whether MPS/LPS exchange is necessary |
410 | | * and chose next probability estimator status */ |
411 | 94.3M | *st &= 0x80; |
412 | 94.3M | *st ^= nlpstab[ss]; |
413 | 94.3M | } |
414 | 121M | } |
415 | | |
416 | 373M | return pix; |
417 | 21.4G | } |