Coverage Report

Created: 2025-07-23 08:18

/src/jbigkit/libjbig/jbig_ar.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Arithmetic encoder and decoder of the portable JBIG
3
 *  compression library
4
 *
5
 *  Markus Kuhn -- http://www.cl.cam.ac.uk/~mgk25/jbigkit/
6
 *
7
 *  This module implements a portable standard C arithmetic encoder
8
 *  and decoder used by the JBIG lossless bi-level image compression
9
 *  algorithm as specified in International Standard ISO 11544:1993
10
 *  and ITU-T Recommendation T.82.
11
 *
12
 *  This program is free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 2 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  This program is distributed in the hope that it will be useful,
18
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 *  GNU General Public License for more details.
21
 *
22
 *  You should have received a copy of the GNU General Public License
23
 *  along with this program; if not, write to the Free Software
24
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25
 *
26
 *  If you want to use this program under different license conditions,
27
 *  then contact the author for an arrangement.
28
 */
29
30
#include <assert.h>
31
#include "jbig_ar.h"
32
33
/*
34
 *  Probability estimation tables for the arithmetic encoder/decoder
35
 *  given by ITU T.82 Table 24.
36
 */
37
38
static short lsztab[113] = {
39
  0x5a1d, 0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f,
40
  0x0036, 0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25,
41
  0x2cf2, 0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c,
42
  0x0406, 0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a,
43
  0x0068, 0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1,
44
  0x261f, 0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8,
45
  0x0861, 0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c,
46
  0x01f8, 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f,
47
  0x5b12, 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf,
48
  0x1aa9, 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40,
49
  0x5832, 0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516,
50
  0x5570, 0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8,
51
  0x4f46, 0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639,
52
  0x415e, 0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522,
53
  0x59eb
54
};
55
56
static unsigned char nmpstab[113] = {
57
    1,   2,   3,   4,   5,   6,   7,   8,
58
    9,  10,  11,  12,  13,  13,  15,  16,
59
   17,  18,  19,  20,  21,  22,  23,  24,
60
   25,  26,  27,  28,  29,  30,  31,  32,
61
   33,  34,  35,   9,  37,  38,  39,  40,
62
   41,  42,  43,  44,  45,  46,  47,  48,
63
   49,  50,  51,  52,  53,  54,  55,  56,
64
   57,  58,  59,  60,  61,  62,  63,  32,
65
   65,  66,  67,  68,  69,  70,  71,  72,
66
   73,  74,  75,  76,  77,  78,  79,  48,
67
   81,  82,  83,  84,  85,  86,  87,  71,
68
   89,  90,  91,  92,  93,  94,  86,  96,
69
   97,  98,  99, 100,  93, 102, 103, 104,
70
   99, 106, 107, 103, 109, 107, 111, 109,
71
  111
72
};
73
74
/*
75
 * least significant 7 bits (mask 0x7f) of nlpstab[] contain NLPS value,
76
 * most significant bit (mask 0x80) contains SWTCH bit
77
 */
78
static unsigned char nlpstab[113] = {
79
  129,  14,  16,  18,  20,  23,  25,  28,
80
   30,  33,  35,   9,  10,  12, 143,  36,
81
   38,  39,  40,  42,  43,  45,  46,  48,
82
   49,  51,  52,  54,  56,  57,  59,  60,
83
   62,  63,  32,  33, 165,  64,  65,  67,
84
   68,  69,  70,  72,  73,  74,  75,  77,
85
   78,  79,  48,  50,  50,  51,  52,  53,
86
   54,  55,  56,  57,  58,  59,  61,  61,
87
  193,  80,  81,  82,  83,  84,  86,  87,
88
   87,  72,  72,  74,  74,  75,  77,  77,
89
  208,  88,  89,  90,  91,  92,  93,  86,
90
  216,  95,  96,  97,  99,  99,  93, 223,
91
  101, 102, 103, 104,  99, 105, 106, 107,
92
  103, 233, 108, 109, 110, 111, 238, 112,
93
  240
94
};
95
96
/*
97
 * The next functions implement the arithmedic encoder and decoder
98
 * required for JBIG. The same algorithm is also used in the arithmetic
99
 * variant of JPEG.
100
 */
101
102
/* marker codes */
103
429k
#define MARKER_STUFF    0x00
104
3.14M
#define MARKER_ESC      0xff
105
106
void arith_encode_init(struct jbg_arenc_state *s, int reuse_st)
107
73.3k
{
108
73.3k
  int i;
109
110
73.3k
  if (!reuse_st)
111
9.05M
    for (i = 0; i < 4096; s->st[i++] = 0) ;
112
73.3k
  s->c = 0;
113
73.3k
  s->a = 0x10000L;
114
73.3k
  s->sc = 0;
115
73.3k
  s->ct = 11;
116
73.3k
  s->buffer = -1;    /* empty */
117
118
73.3k
  return;
119
73.3k
}
120
121
122
void arith_encode_flush(struct jbg_arenc_state *s)
123
73.3k
{
124
73.3k
  unsigned long temp;
125
126
  /* find the s->c in the coding interval with the largest
127
   * number of trailing zero bits */
128
73.3k
  if ((temp = (s->a - 1 + s->c) & 0xffff0000L) < s->c)
129
17.9k
    s->c = temp + 0x8000;
130
55.3k
  else
131
55.3k
    s->c = temp;
132
  /* send remaining bytes to output */
133
73.3k
  s->c <<= s->ct;
134
73.3k
  if (s->c & 0xf8000000L) {
135
    /* one final overflow has to be handled */
136
3.35k
    if (s->buffer >= 0) {
137
3.35k
      s->byte_out(s->buffer + 1, s->file);
138
3.35k
      if (s->buffer + 1 == MARKER_ESC)
139
28
  s->byte_out(MARKER_STUFF, s->file);
140
3.35k
    }
141
    /* output 0x00 bytes only when more non-0x00 will follow */
142
3.35k
    if (s->c & 0x7fff800L)
143
2.17k
      for (; s->sc; --s->sc)
144
448
  s->byte_out(0x00, s->file);
145
69.9k
  } else {
146
69.9k
    if (s->buffer >= 0)
147
61.6k
      s->byte_out(s->buffer, s->file);
148
    /* T.82 figure 30 says buffer+1 for the above line! Typo? */
149
71.0k
    for (; s->sc; --s->sc) {
150
1.01k
      s->byte_out(0xff, s->file);
151
1.01k
      s->byte_out(MARKER_STUFF, s->file);
152
1.01k
    }
153
69.9k
  }
154
  /* output final bytes only if they are not 0x00 */
155
73.3k
  if (s->c & 0x7fff800L) {
156
61.8k
    s->byte_out((s->c >> 19) & 0xff, s->file);
157
61.8k
    if (((s->c >> 19) & 0xff) == MARKER_ESC)
158
226
      s->byte_out(MARKER_STUFF, s->file);
159
61.8k
    if (s->c & 0x7f800L) {
160
13.6k
      s->byte_out((s->c >> 11) & 0xff, s->file);
161
13.6k
      if (((s->c >> 11) & 0xff) == MARKER_ESC)
162
0
  s->byte_out(MARKER_STUFF, s->file);
163
13.6k
    }
164
61.8k
  }
165
166
73.3k
  return;
167
73.3k
}
168
169
170
void arith_encode(struct jbg_arenc_state *s, int cx, int pix)
171
1.06G
{
172
1.06G
  register unsigned lsz, ss;
173
1.06G
  register unsigned char *st;
174
1.06G
  long temp;
175
176
1.06G
  assert(cx >= 0 && cx < 4096);
177
1.06G
  st = s->st + cx;
178
1.06G
  ss = *st & 0x7f;
179
1.06G
  assert(ss < 113);
180
1.06G
  lsz = lsztab[ss];
181
182
#if 0
183
  fprintf(stderr, "pix = %d, cx = %d, mps = %d, st = %3d, lsz = 0x%04x, "
184
    "a = 0x%05lx, c = 0x%08lx, ct = %2d, buf = 0x%02x\n",
185
    pix, cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct,
186
    s->buffer);
187
#endif
188
189
1.06G
  if (((pix << 7) ^ s->st[cx]) & 0x80) {
190
    /* encode the less probable symbol */
191
208M
    if ((s->a -= lsz) >= lsz) {
192
      /* If the interval size (lsz) for the less probable symbol (LPS)
193
       * is larger than the interval size for the MPS, then exchange
194
       * the two symbols for coding efficiency, otherwise code the LPS
195
       * as usual: */
196
174M
      s->c += s->a;
197
174M
      s->a = lsz;
198
174M
    }
199
    /* Check whether MPS/LPS exchange is necessary
200
     * and chose next probability estimator status */
201
208M
    *st &= 0x80;
202
208M
    *st ^= nlpstab[ss];
203
855M
  } else {
204
    /* encode the more probable symbol */
205
855M
    if ((s->a -= lsz) & 0xffff8000L)
206
636M
      return;   /* A >= 0x8000 -> ready, no renormalization required */
207
218M
    if (s->a < lsz) {
208
      /* If the interval size (lsz) for the less probable symbol (LPS)
209
       * is larger than the interval size for the MPS, then exchange
210
       * the two symbols for coding efficiency: */
211
39.1M
      s->c += s->a;
212
39.1M
      s->a = lsz;
213
39.1M
    }
214
    /* chose next probability estimator status */
215
218M
    *st &= 0x80;
216
218M
    *st |= nmpstab[ss];
217
218M
  }
218
219
  /* renormalization of coding interval */
220
582M
  do {
221
582M
    s->a <<= 1;
222
582M
    s->c <<= 1;
223
582M
    --s->ct;
224
582M
    if (s->ct == 0) {
225
      /* another byte is ready for output */
226
72.7M
      temp = s->c >> 19;
227
72.7M
      if (temp & 0xffffff00L) {
228
  /* handle overflow over all buffered 0xff bytes */
229
3.06M
  if (s->buffer >= 0) {
230
3.06M
    ++s->buffer;
231
3.06M
    s->byte_out(s->buffer, s->file);
232
3.06M
    if (s->buffer == MARKER_ESC)
233
12.0k
      s->byte_out(MARKER_STUFF, s->file);
234
3.06M
  }
235
3.08M
  for (; s->sc; --s->sc)
236
23.0k
    s->byte_out(0x00, s->file);
237
3.06M
  s->buffer = temp & 0xff;  /* new output byte, might overflow later */
238
3.06M
  assert(s->buffer != 0xff);
239
  /* can s->buffer really never become 0xff here? */
240
69.6M
      } else if (temp == 0xff) {
241
  /* buffer 0xff byte (which might overflow later) */
242
324k
  ++s->sc;
243
69.3M
      } else {
244
  /* output all buffered 0xff bytes, they will not overflow any more */
245
69.3M
  if (s->buffer >= 0)
246
69.2M
    s->byte_out(s->buffer, s->file);
247
69.6M
  for (; s->sc; --s->sc) {
248
290k
    s->byte_out(0xff, s->file);
249
290k
    s->byte_out(MARKER_STUFF, s->file);
250
290k
  }
251
69.3M
  s->buffer = temp;   /* buffer new output byte (can still overflow) */
252
69.3M
      }
253
72.7M
      s->c &= 0x7ffffL;
254
72.7M
      s->ct = 8;
255
72.7M
    }
256
582M
  } while (s->a < 0x8000);
257
258
427M
  return;
259
427M
}
260
261
262
void arith_decode_init(struct jbg_ardec_state *s, int reuse_st)
263
570k
{
264
570k
  int i;
265
266
570k
  if (!reuse_st)
267
2.29G
    for (i = 0; i < 4096; s->st[i++] = 0) ;
268
570k
  s->c = 0;
269
570k
  s->a = 1;
270
570k
  s->ct = 0;
271
570k
  s->startup = 1;
272
570k
  s->nopadding = 0;
273
570k
  return;
274
570k
}
275
276
/*
277
 * Decode and return one symbol from the provided PSCD byte stream
278
 * that starts in s->pscd_ptr and ends in the byte before s->pscd_end.
279
 * The context cx is a 12-bit integer in the range 0..4095. This
280
 * function will advance s->pscd_ptr each time it has consumed all
281
 * information from that PSCD byte.
282
 *
283
 * If a symbol has been decoded successfully, the return value will be
284
 * 0 or 1 (depending on the symbol).
285
 *
286
 * If the decoder was not able to decode a symbol from the provided
287
 * PSCD, then the return value will be -1, and two cases can be
288
 * distinguished:
289
 *
290
 * s->pscd_ptr == s->pscd_end:
291
 *
292
 *   The decoder has used up all information in the provided PSCD
293
 *   bytes. Further PSCD bytes have to be provided (via new values of
294
 *   s->pscd_ptr and/or s->pscd_end) before another symbol can be
295
 *   decoded.
296
 *
297
 * s->pscd_ptr == s->pscd_end - 1:
298
 *
299
 *   The decoder has used up all provided PSCD bytes except for the
300
 *   very last byte, because that has the value 0xff. The decoder can
301
 *   at this point not yet tell whether this 0xff belongs to a
302
 *   MARKER_STUFF sequence or marks the end of the PSCD. Further PSCD
303
 *   bytes have to be provided (via new values of s->pscd_ptr and/or
304
 *   s->pscd_end), including the not yet processed 0xff byte, before
305
 *   another symbol can be decoded successfully.
306
 *
307
 * If s->nopadding != 0, the decoder will return -2 when it reaches
308
 * the first two bytes of the marker segment that follows (and
309
 * terminates) the PSCD, but before decoding the first symbol that
310
 * depends on a bit in the input data that could have been the result
311
 * of zero padding, and might, therefore, never have been encoded.
312
 * This gives the caller the opportunity to lookahead early enough
313
 * beyond a terminating SDNORM/SDRST for a trailing NEWLEN (as
314
 * required by T.85) before decoding remaining symbols. Call the
315
 * decoder again afterwards as often as necessary (leaving s->pscd_ptr
316
 * pointing to the start of the marker segment) to retrieve any
317
 * required remaining symbols that might depend on padding.
318
 *
319
 * [Note that each PSCD can be decoded into an infinitely long
320
 * sequence of symbols, because the encoder might have truncated away
321
 * an arbitrarily long sequence of trailing 0x00 bytes, which the
322
 * decoder will append automatically as needed when it reaches the end
323
 * of the PSCD. Therefore, the decoder cannot report any end of the
324
 * symbol sequence and other means (external to the PSCD and
325
 * arithmetic decoding process) are needed to determine that.]
326
 */
327
328
int arith_decode(struct jbg_ardec_state *s, int cx)
329
21.4G
{
330
21.4G
  register unsigned lsz, ss;
331
21.4G
  register unsigned char *st;
332
21.4G
  int pix;
333
334
  /* renormalization */
335
21.8G
  while (s->a < 0x8000 || s->startup) {
336
484M
    while (s->ct <= 8 && s->ct >= 0) {
337
      /* first we can move a new byte into s->c */
338
8.17M
      if (s->pscd_ptr >= s->pscd_end) {
339
28.8k
  return -1;  /* more bytes needed */
340
28.8k
      }
341
8.14M
      if (*s->pscd_ptr == 0xff)
342
125k
  if (s->pscd_ptr + 1 >= s->pscd_end) {
343
102
    return -1; /* final 0xff byte not processed */
344
125k
  } else {
345
125k
    if (*(s->pscd_ptr + 1) == MARKER_STUFF) {
346
92.1k
      s->c |= 0xffL << (8 - s->ct);
347
92.1k
      s->ct += 8;
348
92.1k
      s->pscd_ptr += 2;
349
92.1k
    } else {
350
33.5k
      s->ct = -1; /* start padding with zero bytes */
351
33.5k
      if (s->nopadding) {
352
0
        s->nopadding = 0;
353
0
        return -2; /* subsequent symbols might depend on zero padding */
354
0
      }
355
33.5k
    }
356
125k
  }
357
8.02M
      else {
358
8.02M
  s->c |= (long)*(s->pscd_ptr++) << (8 - s->ct);
359
8.02M
  s->ct += 8;
360
8.02M
      }
361
8.14M
    }
362
476M
    s->c <<= 1;
363
476M
    s->a <<= 1;
364
476M
    if (s->ct >= 0) s->ct--;
365
476M
    if (s->a == 0x10000L)
366
35.0k
      s->startup = 0;
367
476M
  }
368
369
21.4G
  st = s->st + cx;
370
21.4G
  ss = *st & 0x7f;
371
21.4G
  assert(ss < 113);
372
21.4G
  lsz = lsztab[ss];
373
374
#if 0
375
  fprintf(stderr, "cx = %d, mps = %d, st = %3d, lsz = 0x%04x, a = 0x%05lx, "
376
    "c = 0x%08lx, ct = %2d\n",
377
    cx, !!(s->st[cx] & 0x80), ss, lsz, s->a, s->c, s->ct);
378
#endif
379
380
21.4G
  if ((s->c >> 16) < (s->a -= lsz))
381
21.2G
    if (s->a & 0xffff8000L)
382
21.0G
      return *st >> 7;
383
251M
    else {
384
      /* MPS_EXCHANGE */
385
251M
      if (s->a < lsz) {
386
32.3M
  pix = 1 - (*st >> 7);
387
  /* Check whether MPS/LPS exchange is necessary
388
   * and chose next probability estimator status */
389
32.3M
  *st &= 0x80;
390
32.3M
  *st ^= nlpstab[ss];
391
219M
      } else {
392
219M
  pix = *st >> 7;
393
219M
  *st &= 0x80;
394
219M
  *st |= nmpstab[ss];
395
219M
      }
396
251M
    }
397
121M
  else {
398
    /* LPS_EXCHANGE */
399
121M
    if (s->a < lsz) {
400
27.2M
      s->c -= s->a << 16;
401
27.2M
      s->a = lsz;
402
27.2M
      pix = *st >> 7;
403
27.2M
      *st &= 0x80;
404
27.2M
      *st |= nmpstab[ss];
405
94.3M
    } else {
406
94.3M
      s->c -= s->a << 16;
407
94.3M
      s->a = lsz;
408
94.3M
      pix = 1 - (*st >> 7);
409
      /* Check whether MPS/LPS exchange is necessary
410
       * and chose next probability estimator status */
411
94.3M
      *st &= 0x80;
412
94.3M
      *st ^= nlpstab[ss];
413
94.3M
    }
414
121M
  }
415
416
373M
  return pix;
417
21.4G
}