Coverage Report

Created: 2025-12-05 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openexr/external/deflate/lib/decompress_template.h
Line
Count
Source
1
/*
2
 * decompress_template.h
3
 *
4
 * Copyright 2016 Eric Biggers
5
 *
6
 * Permission is hereby granted, free of charge, to any person
7
 * obtaining a copy of this software and associated documentation
8
 * files (the "Software"), to deal in the Software without
9
 * restriction, including without limitation the rights to use,
10
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following
13
 * conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be
16
 * included in all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
 * OTHER DEALINGS IN THE SOFTWARE.
26
 */
27
28
/*
29
 * This is the actual DEFLATE decompression routine, lifted out of
30
 * deflate_decompress.c so that it can be compiled multiple times with different
31
 * target instruction sets.
32
 */
33
34
#ifndef ATTRIBUTES
35
#  define ATTRIBUTES
36
#endif
37
#ifndef EXTRACT_VARBITS
38
4.22k
#  define EXTRACT_VARBITS(word, count)  ((word) & BITMASK(count))
39
#endif
40
#ifndef EXTRACT_VARBITS8
41
112k
#  define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
42
#endif
43
44
static ATTRIBUTES MAYBE_UNUSED enum libdeflate_result
45
FUNCNAME(struct libdeflate_decompressor * restrict d,
46
   const void * restrict in, size_t in_nbytes,
47
   void * restrict out, size_t out_nbytes_avail,
48
   size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
49
3.36k
{
50
3.36k
  u8 *out_next = out;
51
3.36k
  u8 * const out_end = out_next + out_nbytes_avail;
52
3.36k
  u8 * const out_fastloop_end =
53
3.36k
    out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
54
55
  /* Input bitstream state; see deflate_decompress.c for documentation */
56
3.36k
  const u8 *in_next = in;
57
3.36k
  const u8 * const in_end = in_next + in_nbytes;
58
3.36k
  const u8 * const in_fastloop_end =
59
3.36k
    in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
60
3.36k
  bitbuf_t bitbuf = 0;
61
3.36k
  bitbuf_t saved_bitbuf;
62
3.36k
  u32 bitsleft = 0;
63
3.36k
  size_t overread_count = 0;
64
65
3.36k
  bool is_final_block;
66
3.36k
  unsigned block_type;
67
3.36k
  unsigned num_litlen_syms;
68
3.36k
  unsigned num_offset_syms;
69
3.36k
  bitbuf_t litlen_tablemask;
70
3.36k
  u32 entry;
71
72
7.81k
next_block:
73
  /* Starting to read the next block */
74
7.81k
  ;
75
76
7.81k
  STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
77
7.81k
  REFILL_BITS();
78
79
  /* BFINAL: 1 bit */
80
7.74k
  is_final_block = bitbuf & BITMASK(1);
81
82
  /* BTYPE: 2 bits */
83
7.74k
  block_type = (bitbuf >> 1) & BITMASK(2);
84
85
7.74k
  if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
86
87
    /* Dynamic Huffman block */
88
89
    /* The order in which precode lengths are stored */
90
1.93k
    static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
91
1.93k
      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
92
1.93k
    };
93
94
1.93k
    unsigned num_explicit_precode_lens;
95
1.93k
    unsigned i;
96
97
    /* Read the codeword length counts. */
98
99
1.93k
    STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
100
1.93k
    num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
101
102
1.93k
    STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
103
1.93k
    num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
104
105
1.93k
    STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
106
1.93k
    num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
107
108
1.93k
    d->static_codes_loaded = false;
109
110
    /*
111
     * Read the precode codeword lengths.
112
     *
113
     * A 64-bit bitbuffer is just one bit too small to hold the
114
     * maximum number of precode lens, so to minimize branches we
115
     * merge one len with the previous fields.
116
     */
117
1.93k
    STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
118
1.93k
    if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
119
1.93k
      d->u.precode_lens[deflate_precode_lens_permutation[0]] =
120
1.93k
        (bitbuf >> 17) & BITMASK(3);
121
1.93k
      bitbuf >>= 20;
122
1.93k
      bitsleft -= 20;
123
1.93k
      REFILL_BITS();
124
1.91k
      i = 1;
125
27.6k
      do {
126
27.6k
        d->u.precode_lens[deflate_precode_lens_permutation[i]] =
127
27.6k
          bitbuf & BITMASK(3);
128
27.6k
        bitbuf >>= 3;
129
27.6k
        bitsleft -= 3;
130
27.6k
      } while (++i < num_explicit_precode_lens);
131
1.91k
    } else {
132
0
      bitbuf >>= 17;
133
0
      bitsleft -= 17;
134
0
      i = 0;
135
0
      do {
136
0
        if ((u8)bitsleft < 3)
137
0
          REFILL_BITS();
138
0
        d->u.precode_lens[deflate_precode_lens_permutation[i]] =
139
0
          bitbuf & BITMASK(3);
140
0
        bitbuf >>= 3;
141
0
        bitsleft -= 3;
142
0
      } while (++i < num_explicit_precode_lens);
143
0
    }
144
8.75k
    for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
145
6.84k
      d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
146
147
    /* Build the decode table for the precode. */
148
1.91k
    SAFETY_CHECK(build_precode_decode_table(d));
149
150
    /* Decode the litlen and offset codeword lengths. */
151
1.83k
    i = 0;
152
140k
    do {
153
140k
      unsigned presym;
154
140k
      u8 rep_val;
155
140k
      unsigned rep_count;
156
157
140k
      if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
158
8.34k
        REFILL_BITS();
159
160
      /*
161
       * The code below assumes that the precode decode table
162
       * doesn't have any subtables.
163
       */
164
140k
      STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
165
166
      /* Decode the next precode symbol. */
167
140k
      entry = d->u.l.precode_decode_table[
168
140k
        bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
169
140k
      bitbuf >>= (u8)entry;
170
140k
      bitsleft -= entry; /* optimization: subtract full entry */
171
140k
      presym = entry >> 16;
172
173
140k
      if (presym < 16) {
174
        /* Explicit codeword length */
175
109k
        d->u.l.lens[i++] = presym;
176
109k
        continue;
177
109k
      }
178
179
      /* Run-length encoded codeword lengths */
180
181
      /*
182
       * Note: we don't need to immediately verify that the
183
       * repeat count doesn't overflow the number of elements,
184
       * since we've sized the lens array to have enough extra
185
       * space to allow for the worst-case overrun (138 zeroes
186
       * when only 1 length was remaining).
187
       *
188
       * In the case of the small repeat counts (presyms 16
189
       * and 17), it is fastest to always write the maximum
190
       * number of entries.  That gets rid of branches that
191
       * would otherwise be required.
192
       *
193
       * It is not just because of the numerical order that
194
       * our checks go in the order 'presym < 16', 'presym ==
195
       * 16', and 'presym == 17'.  For typical data this is
196
       * ordered from most frequent to least frequent case.
197
       */
198
30.6k
      STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
199
200
30.6k
      if (presym == 16) {
201
        /* Repeat the previous length 3 - 6 times. */
202
21.6k
        SAFETY_CHECK(i != 0);
203
21.6k
        rep_val = d->u.l.lens[i - 1];
204
21.6k
        STATIC_ASSERT(3 + BITMASK(2) == 6);
205
21.6k
        rep_count = 3 + (bitbuf & BITMASK(2));
206
21.6k
        bitbuf >>= 2;
207
21.6k
        bitsleft -= 2;
208
21.6k
        d->u.l.lens[i + 0] = rep_val;
209
21.6k
        d->u.l.lens[i + 1] = rep_val;
210
21.6k
        d->u.l.lens[i + 2] = rep_val;
211
21.6k
        d->u.l.lens[i + 3] = rep_val;
212
21.6k
        d->u.l.lens[i + 4] = rep_val;
213
21.6k
        d->u.l.lens[i + 5] = rep_val;
214
21.6k
        i += rep_count;
215
21.6k
      } else if (presym == 17) {
216
        /* Repeat zero 3 - 10 times. */
217
5.06k
        STATIC_ASSERT(3 + BITMASK(3) == 10);
218
5.06k
        rep_count = 3 + (bitbuf & BITMASK(3));
219
5.06k
        bitbuf >>= 3;
220
5.06k
        bitsleft -= 3;
221
5.06k
        d->u.l.lens[i + 0] = 0;
222
5.06k
        d->u.l.lens[i + 1] = 0;
223
5.06k
        d->u.l.lens[i + 2] = 0;
224
5.06k
        d->u.l.lens[i + 3] = 0;
225
5.06k
        d->u.l.lens[i + 4] = 0;
226
5.06k
        d->u.l.lens[i + 5] = 0;
227
5.06k
        d->u.l.lens[i + 6] = 0;
228
5.06k
        d->u.l.lens[i + 7] = 0;
229
5.06k
        d->u.l.lens[i + 8] = 0;
230
5.06k
        d->u.l.lens[i + 9] = 0;
231
5.06k
        i += rep_count;
232
5.06k
      } else {
233
        /* Repeat zero 11 - 138 times. */
234
3.97k
        STATIC_ASSERT(11 + BITMASK(7) == 138);
235
3.97k
        rep_count = 11 + (bitbuf & BITMASK(7));
236
3.97k
        bitbuf >>= 7;
237
3.97k
        bitsleft -= 7;
238
3.97k
        memset(&d->u.l.lens[i], 0,
239
3.97k
               rep_count * sizeof(d->u.l.lens[i]));
240
3.97k
        i += rep_count;
241
3.97k
      }
242
140k
    } while (i < num_litlen_syms + num_offset_syms);
243
244
    /* Unnecessary, but check this for consistency with zlib. */
245
1.67k
    SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
246
247
5.80k
  } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
248
912
    u16 len, nlen;
249
250
    /*
251
     * Uncompressed block: copy 'len' bytes literally from the input
252
     * buffer to the output buffer.
253
     */
254
255
912
    bitsleft -= 3; /* for BTYPE and BFINAL */
256
257
    /*
258
     * Align the bitstream to the next byte boundary.  This means
259
     * the next byte boundary as if we were reading a byte at a
260
     * time.  Therefore, we have to rewind 'in_next' by any bytes
261
     * that have been refilled but not actually consumed yet (not
262
     * counting overread bytes, which don't increment 'in_next').
263
     */
264
912
    bitsleft = (u8)bitsleft;
265
912
    SAFETY_CHECK(overread_count <= (bitsleft >> 3));
266
840
    in_next -= (bitsleft >> 3) - overread_count;
267
840
    overread_count = 0;
268
840
    bitbuf = 0;
269
840
    bitsleft = 0;
270
271
840
    SAFETY_CHECK(in_end - in_next >= 4);
272
783
    len = get_unaligned_le16(in_next);
273
783
    nlen = get_unaligned_le16(in_next + 2);
274
783
    in_next += 4;
275
276
783
    SAFETY_CHECK(len == (u16)~nlen);
277
630
    if (unlikely(len > out_end - out_next))
278
20
      return LIBDEFLATE_INSUFFICIENT_SPACE;
279
610
    SAFETY_CHECK(len <= in_end - in_next);
280
281
580
    memcpy(out_next, in_next, len);
282
580
    in_next += len;
283
580
    out_next += len;
284
285
580
    goto block_done;
286
287
4.89k
  } else {
288
4.89k
    unsigned i;
289
290
4.89k
    SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
291
292
    /*
293
     * Static Huffman block: build the decode tables for the static
294
     * codes.  Skip doing so if the tables are already set up from
295
     * an earlier static block; this speeds up decompression of
296
     * degenerate input of many empty or very short static blocks.
297
     *
298
     * Afterwards, the remainder is the same as decompressing a
299
     * dynamic Huffman block.
300
     */
301
302
4.84k
    bitbuf >>= 3; /* for BTYPE and BFINAL */
303
4.84k
    bitsleft -= 3;
304
305
4.84k
    if (d->static_codes_loaded)
306
873
      goto have_decode_tables;
307
308
3.97k
    d->static_codes_loaded = true;
309
310
3.97k
    STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
311
3.97k
    STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
312
313
576k
    for (i = 0; i < 144; i++)
314
572k
      d->u.l.lens[i] = 8;
315
449k
    for (; i < 256; i++)
316
445k
      d->u.l.lens[i] = 9;
317
99.4k
    for (; i < 280; i++)
318
95.4k
      d->u.l.lens[i] = 7;
319
35.7k
    for (; i < 288; i++)
320
31.8k
      d->u.l.lens[i] = 8;
321
322
131k
    for (; i < 288 + 32; i++)
323
127k
      d->u.l.lens[i] = 5;
324
325
3.97k
    num_litlen_syms = 288;
326
3.97k
    num_offset_syms = 32;
327
3.97k
  }
328
329
  /* Decompressing a Huffman block (either dynamic or static) */
330
331
5.61k
  SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
332
5.56k
  SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
333
6.38k
have_decode_tables:
334
6.38k
  litlen_tablemask = BITMASK(d->litlen_tablebits);
335
336
  /*
337
   * This is the "fastloop" for decoding literals and matches.  It does
338
   * bounds checks on in_next and out_next in the loop conditions so that
339
   * additional bounds checks aren't needed inside the loop body.
340
   *
341
   * To reduce latency, the bitbuffer is refilled and the next litlen
342
   * decode table entry is preloaded before each loop iteration.
343
   */
344
6.38k
  if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
345
1.89k
    goto generic_loop;
346
4.49k
  REFILL_BITS_IN_FASTLOOP();
347
4.49k
  entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
348
106k
  do {
349
106k
    u32 length, offset, lit;
350
106k
    const u8 *src;
351
106k
    u8 *dst;
352
353
    /*
354
     * Consume the bits for the litlen decode table entry.  Save the
355
     * original bitbuf for later, in case the extra match length
356
     * bits need to be extracted from it.
357
     */
358
106k
    saved_bitbuf = bitbuf;
359
106k
    bitbuf >>= (u8)entry;
360
106k
    bitsleft -= entry; /* optimization: subtract full entry */
361
362
    /*
363
     * Begin by checking for a "fast" literal, i.e. a literal that
364
     * doesn't need a subtable.
365
     */
366
106k
    if (entry & HUFFDEC_LITERAL) {
367
      /*
368
       * On 64-bit platforms, we decode up to 2 extra fast
369
       * literals in addition to the primary item, as this
370
       * increases performance and still leaves enough bits
371
       * remaining for what follows.  We could actually do 3,
372
       * assuming LITLEN_TABLEBITS=11, but that actually
373
       * decreases performance slightly (perhaps by messing
374
       * with the branch prediction of the conditional refill
375
       * that happens later while decoding the match offset).
376
       *
377
       * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
378
       * and FASTLOOP_MAX_BYTES_READ need to be updated if the
379
       * number of extra literals decoded here is changed.
380
       */
381
65.3k
      if (/* enough bits for 2 fast literals + length + offset preload? */
382
65.3k
          CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
383
65.3k
               LENGTH_MAXBITS,
384
65.3k
               OFFSET_TABLEBITS) &&
385
          /* enough bits for 2 fast literals + slow literal + litlen preload? */
386
0
          CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
387
65.3k
               DEFLATE_MAX_LITLEN_CODEWORD_LEN,
388
65.3k
               LITLEN_TABLEBITS)) {
389
        /* 1st extra fast literal */
390
65.3k
        lit = entry >> 16;
391
65.3k
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
392
65.3k
        saved_bitbuf = bitbuf;
393
65.3k
        bitbuf >>= (u8)entry;
394
65.3k
        bitsleft -= entry;
395
65.3k
        *out_next++ = lit;
396
65.3k
        if (entry & HUFFDEC_LITERAL) {
397
          /* 2nd extra fast literal */
398
58.6k
          lit = entry >> 16;
399
58.6k
          entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
400
58.6k
          saved_bitbuf = bitbuf;
401
58.6k
          bitbuf >>= (u8)entry;
402
58.6k
          bitsleft -= entry;
403
58.6k
          *out_next++ = lit;
404
58.6k
          if (entry & HUFFDEC_LITERAL) {
405
            /*
406
             * Another fast literal, but
407
             * this one is in lieu of the
408
             * primary item, so it doesn't
409
             * count as one of the extras.
410
             */
411
54.8k
            lit = entry >> 16;
412
54.8k
            entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
413
54.8k
            REFILL_BITS_IN_FASTLOOP();
414
54.8k
            *out_next++ = lit;
415
54.8k
            continue;
416
54.8k
          }
417
58.6k
        }
418
65.3k
      } else {
419
        /*
420
         * Decode a literal.  While doing so, preload
421
         * the next litlen decode table entry and refill
422
         * the bitbuffer.  To reduce latency, we've
423
         * arranged for there to be enough "preloadable"
424
         * bits remaining to do the table preload
425
         * independently of the refill.
426
         */
427
0
        STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
428
0
            LITLEN_TABLEBITS, LITLEN_TABLEBITS));
429
0
        lit = entry >> 16;
430
0
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
431
0
        REFILL_BITS_IN_FASTLOOP();
432
0
        *out_next++ = lit;
433
0
        continue;
434
0
      }
435
65.3k
    }
436
437
    /*
438
     * It's not a literal entry, so it can be a length entry, a
439
     * subtable pointer entry, or an end-of-block entry.  Detect the
440
     * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
441
     */
442
51.8k
    if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
443
      /* Subtable pointer or end-of-block entry */
444
445
6.48k
      if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
446
2.64k
        goto block_done;
447
448
      /*
449
       * A subtable is required.  Load and consume the
450
       * subtable entry.  The subtable entry can be of any
451
       * type: literal, length, or end-of-block.
452
       */
453
3.84k
      entry = d->u.litlen_decode_table[(entry >> 16) +
454
3.84k
        EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
455
3.84k
      saved_bitbuf = bitbuf;
456
3.84k
      bitbuf >>= (u8)entry;
457
3.84k
      bitsleft -= entry;
458
459
      /*
460
       * 32-bit platforms that use the byte-at-a-time refill
461
       * method have to do a refill here for there to always
462
       * be enough bits to decode a literal that requires a
463
       * subtable, then preload the next litlen decode table
464
       * entry; or to decode a match length that requires a
465
       * subtable, then preload the offset decode table entry.
466
       */
467
3.84k
      if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
468
3.84k
                LITLEN_TABLEBITS) ||
469
0
          !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
470
3.84k
                OFFSET_TABLEBITS))
471
0
        REFILL_BITS_IN_FASTLOOP();
472
3.84k
      if (entry & HUFFDEC_LITERAL) {
473
        /* Decode a literal that required a subtable. */
474
3.21k
        lit = entry >> 16;
475
3.21k
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
476
3.21k
        REFILL_BITS_IN_FASTLOOP();
477
3.21k
        *out_next++ = lit;
478
3.21k
        continue;
479
3.21k
      }
480
628
      if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
481
405
        goto block_done;
482
      /* Else, it's a length that required a subtable. */
483
628
    }
484
485
    /*
486
     * Decode the match length: the length base value associated
487
     * with the litlen symbol (which we extract from the decode
488
     * table entry), plus the extra length bits.  We don't need to
489
     * consume the extra length bits here, as they were included in
490
     * the bits consumed by the entry earlier.  We also don't need
491
     * to check for too-long matches here, as this is inside the
492
     * fastloop where it's already been verified that the output
493
     * buffer has enough space remaining to copy a max-length match.
494
     */
495
45.5k
    length = entry >> 16;
496
45.5k
    length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
497
498
    /*
499
     * Decode the match offset.  There are enough "preloadable" bits
500
     * remaining to preload the offset decode table entry, but a
501
     * refill might be needed before consuming it.
502
     */
503
45.5k
    STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
504
45.5k
                 OFFSET_TABLEBITS));
505
45.5k
    entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
506
45.5k
    if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
507
45.5k
             LITLEN_TABLEBITS)) {
508
      /*
509
       * Decoding a match offset on a 64-bit platform.  We may
510
       * need to refill once, but then we can decode the whole
511
       * offset and preload the next litlen table entry.
512
       */
513
45.5k
      if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
514
        /* Offset codeword requires a subtable */
515
0
        if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
516
0
               LITLEN_TABLEBITS - PRELOAD_SLACK))
517
0
          REFILL_BITS_IN_FASTLOOP();
518
0
        bitbuf >>= OFFSET_TABLEBITS;
519
0
        bitsleft -= OFFSET_TABLEBITS;
520
0
        entry = d->offset_decode_table[(entry >> 16) +
521
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
522
45.5k
      } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
523
45.5k
              LITLEN_TABLEBITS - PRELOAD_SLACK))
524
446
        REFILL_BITS_IN_FASTLOOP();
525
45.5k
    } else {
526
      /* Decoding a match offset on a 32-bit platform */
527
0
      REFILL_BITS_IN_FASTLOOP();
528
0
      if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
529
        /* Offset codeword requires a subtable */
530
0
        bitbuf >>= OFFSET_TABLEBITS;
531
0
        bitsleft -= OFFSET_TABLEBITS;
532
0
        entry = d->offset_decode_table[(entry >> 16) +
533
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
534
0
        REFILL_BITS_IN_FASTLOOP();
535
        /* No further refill needed before extra bits */
536
0
        STATIC_ASSERT(CAN_CONSUME(
537
0
          OFFSET_MAXBITS - OFFSET_TABLEBITS));
538
0
      } else {
539
        /* No refill needed before extra bits */
540
0
        STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
541
0
      }
542
0
    }
543
45.5k
    saved_bitbuf = bitbuf;
544
45.5k
    bitbuf >>= (u8)entry;
545
45.5k
    bitsleft -= entry; /* optimization: subtract full entry */
546
45.5k
    offset = entry >> 16;
547
45.5k
    offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
548
549
    /* Validate the match offset; needed even in the fastloop. */
550
45.5k
    SAFETY_CHECK(offset <= out_next - (const u8 *)out);
551
45.5k
    src = out_next - offset;
552
45.5k
    dst = out_next;
553
45.5k
    out_next += length;
554
555
    /*
556
     * Before starting to issue the instructions to copy the match,
557
     * refill the bitbuffer and preload the litlen decode table
558
     * entry for the next loop iteration.  This can increase
559
     * performance by allowing the latency of the match copy to
560
     * overlap with these other operations.  To further reduce
561
     * latency, we've arranged for there to be enough bits remaining
562
     * to do the table preload independently of the refill, except
563
     * on 32-bit platforms using the byte-at-a-time refill method.
564
     */
565
45.5k
    if (!CAN_CONSUME_AND_THEN_PRELOAD(
566
45.5k
      MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
567
45.5k
          OFFSET_MAXFASTBITS),
568
45.5k
      LITLEN_TABLEBITS) &&
569
0
        unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
570
0
      REFILL_BITS_IN_FASTLOOP();
571
45.5k
    entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
572
45.5k
    REFILL_BITS_IN_FASTLOOP();
573
574
    /*
575
     * Copy the match.  On most CPUs the fastest method is a
576
     * word-at-a-time copy, unconditionally copying about 5 words
577
     * since this is enough for most matches without being too much.
578
     *
579
     * The normal word-at-a-time copy works for offset >= WORDBYTES,
580
     * which is most cases.  The case of offset == 1 is also common
581
     * and is worth optimizing for, since it is just RLE encoding of
582
     * the previous byte, which is the result of compressing long
583
     * runs of the same byte.
584
     *
585
     * Writing past the match 'length' is allowed here, since it's
586
     * been ensured there is enough output space left for a slight
587
     * overrun.  FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
588
     * the maximum possible overrun here is changed.
589
     */
590
45.5k
    if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
591
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
592
13.0k
      src += WORDBYTES;
593
13.0k
      dst += WORDBYTES;
594
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
595
13.0k
      src += WORDBYTES;
596
13.0k
      dst += WORDBYTES;
597
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
598
13.0k
      src += WORDBYTES;
599
13.0k
      dst += WORDBYTES;
600
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
601
13.0k
      src += WORDBYTES;
602
13.0k
      dst += WORDBYTES;
603
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
604
13.0k
      src += WORDBYTES;
605
13.0k
      dst += WORDBYTES;
606
54.6k
      while (dst < out_next) {
607
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
608
41.5k
        src += WORDBYTES;
609
41.5k
        dst += WORDBYTES;
610
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
611
41.5k
        src += WORDBYTES;
612
41.5k
        dst += WORDBYTES;
613
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
614
41.5k
        src += WORDBYTES;
615
41.5k
        dst += WORDBYTES;
616
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
617
41.5k
        src += WORDBYTES;
618
41.5k
        dst += WORDBYTES;
619
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
620
41.5k
        src += WORDBYTES;
621
41.5k
        dst += WORDBYTES;
622
41.5k
      }
623
32.5k
    } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
624
27.2k
      machine_word_t v;
625
626
      /*
627
       * This part tends to get auto-vectorized, so keep it
628
       * copying a multiple of 16 bytes at a time.
629
       */
630
27.2k
      v = (machine_word_t)0x0101010101010101 * src[0];
631
27.2k
      store_word_unaligned(v, dst);
632
27.2k
      dst += WORDBYTES;
633
27.2k
      store_word_unaligned(v, dst);
634
27.2k
      dst += WORDBYTES;
635
27.2k
      store_word_unaligned(v, dst);
636
27.2k
      dst += WORDBYTES;
637
27.2k
      store_word_unaligned(v, dst);
638
27.2k
      dst += WORDBYTES;
639
119k
      while (dst < out_next) {
640
92.7k
        store_word_unaligned(v, dst);
641
92.7k
        dst += WORDBYTES;
642
92.7k
        store_word_unaligned(v, dst);
643
92.7k
        dst += WORDBYTES;
644
92.7k
        store_word_unaligned(v, dst);
645
92.7k
        dst += WORDBYTES;
646
92.7k
        store_word_unaligned(v, dst);
647
92.7k
        dst += WORDBYTES;
648
92.7k
      }
649
27.2k
    } else if (UNALIGNED_ACCESS_IS_FAST) {
650
5.29k
      store_word_unaligned(load_word_unaligned(src), dst);
651
5.29k
      src += offset;
652
5.29k
      dst += offset;
653
5.29k
      store_word_unaligned(load_word_unaligned(src), dst);
654
5.29k
      src += offset;
655
5.29k
      dst += offset;
656
91.9k
      do {
657
91.9k
        store_word_unaligned(load_word_unaligned(src), dst);
658
91.9k
        src += offset;
659
91.9k
        dst += offset;
660
91.9k
        store_word_unaligned(load_word_unaligned(src), dst);
661
91.9k
        src += offset;
662
91.9k
        dst += offset;
663
91.9k
      } while (dst < out_next);
664
5.29k
    } else {
665
0
      *dst++ = *src++;
666
0
      *dst++ = *src++;
667
0
      do {
668
0
        *dst++ = *src++;
669
0
      } while (dst < out_next);
670
0
    }
671
103k
  } while (in_next < in_fastloop_end && out_next < out_fastloop_end);
672
673
  /*
674
   * This is the generic loop for decoding literals and matches.  This
675
   * handles cases where in_next and out_next are close to the end of
676
   * their respective buffers.  Usually this loop isn't performance-
677
   * critical, as most time is spent in the fastloop above instead.  We
678
   * therefore omit some optimizations here in favor of smaller code.
679
   */
680
3.32k
generic_loop:
681
58.2k
  for (;;) {
682
58.2k
    u32 length, offset;
683
58.2k
    const u8 *src;
684
58.2k
    u8 *dst;
685
686
58.2k
    REFILL_BITS();
687
57.9k
    entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
688
57.9k
    saved_bitbuf = bitbuf;
689
57.9k
    bitbuf >>= (u8)entry;
690
57.9k
    bitsleft -= entry;
691
57.9k
    if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
692
379
      entry = d->u.litlen_decode_table[(entry >> 16) +
693
379
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
694
379
      saved_bitbuf = bitbuf;
695
379
      bitbuf >>= (u8)entry;
696
379
      bitsleft -= entry;
697
379
    }
698
57.9k
    length = entry >> 16;
699
57.9k
    if (entry & HUFFDEC_LITERAL) {
700
44.5k
      if (unlikely(out_next == out_end))
701
10
        return LIBDEFLATE_INSUFFICIENT_SPACE;
702
44.5k
      *out_next++ = length;
703
44.5k
      continue;
704
44.5k
    }
705
13.3k
    if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
706
2.87k
      goto block_done;
707
10.5k
    length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
708
10.5k
    if (unlikely(length > out_end - out_next))
709
27
      return LIBDEFLATE_INSUFFICIENT_SPACE;
710
711
10.4k
    if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
712
0
      REFILL_BITS();
713
10.4k
    entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
714
10.4k
    if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
715
0
      bitbuf >>= OFFSET_TABLEBITS;
716
0
      bitsleft -= OFFSET_TABLEBITS;
717
0
      entry = d->offset_decode_table[(entry >> 16) +
718
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
719
0
      if (!CAN_CONSUME(OFFSET_MAXBITS))
720
0
        REFILL_BITS();
721
0
    }
722
10.4k
    offset = entry >> 16;
723
10.4k
    offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
724
10.4k
    bitbuf >>= (u8)entry;
725
10.4k
    bitsleft -= entry;
726
727
10.4k
    SAFETY_CHECK(offset <= out_next - (const u8 *)out);
728
10.3k
    src = out_next - offset;
729
10.3k
    dst = out_next;
730
10.3k
    out_next += length;
731
732
10.3k
    STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
733
10.3k
    *dst++ = *src++;
734
10.3k
    *dst++ = *src++;
735
1.14M
    do {
736
1.14M
      *dst++ = *src++;
737
1.14M
    } while (dst < out_next);
738
10.3k
  }
739
740
6.50k
block_done:
741
  /* Finished decoding a block */
742
743
6.50k
  if (!is_final_block)
744
4.44k
    goto next_block;
745
746
  /* That was the last block. */
747
748
2.06k
  bitsleft = (u8)bitsleft;
749
750
  /*
751
   * If any of the implicit appended zero bytes were consumed (not just
752
   * refilled) before hitting end of stream, then the data is bad.
753
   */
754
2.06k
  SAFETY_CHECK(overread_count <= (bitsleft >> 3));
755
756
  /* Optionally return the actual number of bytes consumed. */
757
2.00k
  if (actual_in_nbytes_ret) {
758
    /* Don't count bytes that were refilled but not consumed. */
759
2.00k
    in_next -= (bitsleft >> 3) - overread_count;
760
761
2.00k
    *actual_in_nbytes_ret = in_next - (u8 *)in;
762
2.00k
  }
763
764
  /* Optionally return the actual number of bytes written. */
765
2.00k
  if (actual_out_nbytes_ret) {
766
2.00k
    *actual_out_nbytes_ret = out_next - (u8 *)out;
767
2.00k
  } else {
768
3
    if (out_next != out_end)
769
1
      return LIBDEFLATE_SHORT_OUTPUT;
770
3
  }
771
2.00k
  return LIBDEFLATE_SUCCESS;
772
2.00k
}
compression.c:deflate_decompress_bmi2
Line
Count
Source
49
3.36k
{
50
3.36k
  u8 *out_next = out;
51
3.36k
  u8 * const out_end = out_next + out_nbytes_avail;
52
3.36k
  u8 * const out_fastloop_end =
53
3.36k
    out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
54
55
  /* Input bitstream state; see deflate_decompress.c for documentation */
56
3.36k
  const u8 *in_next = in;
57
3.36k
  const u8 * const in_end = in_next + in_nbytes;
58
3.36k
  const u8 * const in_fastloop_end =
59
3.36k
    in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
60
3.36k
  bitbuf_t bitbuf = 0;
61
3.36k
  bitbuf_t saved_bitbuf;
62
3.36k
  u32 bitsleft = 0;
63
3.36k
  size_t overread_count = 0;
64
65
3.36k
  bool is_final_block;
66
3.36k
  unsigned block_type;
67
3.36k
  unsigned num_litlen_syms;
68
3.36k
  unsigned num_offset_syms;
69
3.36k
  bitbuf_t litlen_tablemask;
70
3.36k
  u32 entry;
71
72
7.81k
next_block:
73
  /* Starting to read the next block */
74
7.81k
  ;
75
76
7.81k
  STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
77
7.81k
  REFILL_BITS();
78
79
  /* BFINAL: 1 bit */
80
7.74k
  is_final_block = bitbuf & BITMASK(1);
81
82
  /* BTYPE: 2 bits */
83
7.74k
  block_type = (bitbuf >> 1) & BITMASK(2);
84
85
7.74k
  if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
86
87
    /* Dynamic Huffman block */
88
89
    /* The order in which precode lengths are stored */
90
1.93k
    static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
91
1.93k
      16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
92
1.93k
    };
93
94
1.93k
    unsigned num_explicit_precode_lens;
95
1.93k
    unsigned i;
96
97
    /* Read the codeword length counts. */
98
99
1.93k
    STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
100
1.93k
    num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
101
102
1.93k
    STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
103
1.93k
    num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
104
105
1.93k
    STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
106
1.93k
    num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
107
108
1.93k
    d->static_codes_loaded = false;
109
110
    /*
111
     * Read the precode codeword lengths.
112
     *
113
     * A 64-bit bitbuffer is just one bit too small to hold the
114
     * maximum number of precode lens, so to minimize branches we
115
     * merge one len with the previous fields.
116
     */
117
1.93k
    STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
118
1.93k
    if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
119
1.93k
      d->u.precode_lens[deflate_precode_lens_permutation[0]] =
120
1.93k
        (bitbuf >> 17) & BITMASK(3);
121
1.93k
      bitbuf >>= 20;
122
1.93k
      bitsleft -= 20;
123
1.93k
      REFILL_BITS();
124
1.91k
      i = 1;
125
27.6k
      do {
126
27.6k
        d->u.precode_lens[deflate_precode_lens_permutation[i]] =
127
27.6k
          bitbuf & BITMASK(3);
128
27.6k
        bitbuf >>= 3;
129
27.6k
        bitsleft -= 3;
130
27.6k
      } while (++i < num_explicit_precode_lens);
131
1.91k
    } else {
132
0
      bitbuf >>= 17;
133
0
      bitsleft -= 17;
134
0
      i = 0;
135
0
      do {
136
0
        if ((u8)bitsleft < 3)
137
0
          REFILL_BITS();
138
0
        d->u.precode_lens[deflate_precode_lens_permutation[i]] =
139
0
          bitbuf & BITMASK(3);
140
0
        bitbuf >>= 3;
141
0
        bitsleft -= 3;
142
0
      } while (++i < num_explicit_precode_lens);
143
0
    }
144
8.75k
    for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
145
6.84k
      d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
146
147
    /* Build the decode table for the precode. */
148
1.91k
    SAFETY_CHECK(build_precode_decode_table(d));
149
150
    /* Decode the litlen and offset codeword lengths. */
151
1.83k
    i = 0;
152
140k
    do {
153
140k
      unsigned presym;
154
140k
      u8 rep_val;
155
140k
      unsigned rep_count;
156
157
140k
      if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
158
8.34k
        REFILL_BITS();
159
160
      /*
161
       * The code below assumes that the precode decode table
162
       * doesn't have any subtables.
163
       */
164
140k
      STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
165
166
      /* Decode the next precode symbol. */
167
140k
      entry = d->u.l.precode_decode_table[
168
140k
        bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
169
140k
      bitbuf >>= (u8)entry;
170
140k
      bitsleft -= entry; /* optimization: subtract full entry */
171
140k
      presym = entry >> 16;
172
173
140k
      if (presym < 16) {
174
        /* Explicit codeword length */
175
109k
        d->u.l.lens[i++] = presym;
176
109k
        continue;
177
109k
      }
178
179
      /* Run-length encoded codeword lengths */
180
181
      /*
182
       * Note: we don't need to immediately verify that the
183
       * repeat count doesn't overflow the number of elements,
184
       * since we've sized the lens array to have enough extra
185
       * space to allow for the worst-case overrun (138 zeroes
186
       * when only 1 length was remaining).
187
       *
188
       * In the case of the small repeat counts (presyms 16
189
       * and 17), it is fastest to always write the maximum
190
       * number of entries.  That gets rid of branches that
191
       * would otherwise be required.
192
       *
193
       * It is not just because of the numerical order that
194
       * our checks go in the order 'presym < 16', 'presym ==
195
       * 16', and 'presym == 17'.  For typical data this is
196
       * ordered from most frequent to least frequent case.
197
       */
198
30.6k
      STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
199
200
30.6k
      if (presym == 16) {
201
        /* Repeat the previous length 3 - 6 times. */
202
21.6k
        SAFETY_CHECK(i != 0);
203
21.6k
        rep_val = d->u.l.lens[i - 1];
204
21.6k
        STATIC_ASSERT(3 + BITMASK(2) == 6);
205
21.6k
        rep_count = 3 + (bitbuf & BITMASK(2));
206
21.6k
        bitbuf >>= 2;
207
21.6k
        bitsleft -= 2;
208
21.6k
        d->u.l.lens[i + 0] = rep_val;
209
21.6k
        d->u.l.lens[i + 1] = rep_val;
210
21.6k
        d->u.l.lens[i + 2] = rep_val;
211
21.6k
        d->u.l.lens[i + 3] = rep_val;
212
21.6k
        d->u.l.lens[i + 4] = rep_val;
213
21.6k
        d->u.l.lens[i + 5] = rep_val;
214
21.6k
        i += rep_count;
215
21.6k
      } else if (presym == 17) {
216
        /* Repeat zero 3 - 10 times. */
217
5.06k
        STATIC_ASSERT(3 + BITMASK(3) == 10);
218
5.06k
        rep_count = 3 + (bitbuf & BITMASK(3));
219
5.06k
        bitbuf >>= 3;
220
5.06k
        bitsleft -= 3;
221
5.06k
        d->u.l.lens[i + 0] = 0;
222
5.06k
        d->u.l.lens[i + 1] = 0;
223
5.06k
        d->u.l.lens[i + 2] = 0;
224
5.06k
        d->u.l.lens[i + 3] = 0;
225
5.06k
        d->u.l.lens[i + 4] = 0;
226
5.06k
        d->u.l.lens[i + 5] = 0;
227
5.06k
        d->u.l.lens[i + 6] = 0;
228
5.06k
        d->u.l.lens[i + 7] = 0;
229
5.06k
        d->u.l.lens[i + 8] = 0;
230
5.06k
        d->u.l.lens[i + 9] = 0;
231
5.06k
        i += rep_count;
232
5.06k
      } else {
233
        /* Repeat zero 11 - 138 times. */
234
3.97k
        STATIC_ASSERT(11 + BITMASK(7) == 138);
235
3.97k
        rep_count = 11 + (bitbuf & BITMASK(7));
236
3.97k
        bitbuf >>= 7;
237
3.97k
        bitsleft -= 7;
238
3.97k
        memset(&d->u.l.lens[i], 0,
239
3.97k
               rep_count * sizeof(d->u.l.lens[i]));
240
3.97k
        i += rep_count;
241
3.97k
      }
242
140k
    } while (i < num_litlen_syms + num_offset_syms);
243
244
    /* Unnecessary, but check this for consistency with zlib. */
245
1.67k
    SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
246
247
5.80k
  } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
248
912
    u16 len, nlen;
249
250
    /*
251
     * Uncompressed block: copy 'len' bytes literally from the input
252
     * buffer to the output buffer.
253
     */
254
255
912
    bitsleft -= 3; /* for BTYPE and BFINAL */
256
257
    /*
258
     * Align the bitstream to the next byte boundary.  This means
259
     * the next byte boundary as if we were reading a byte at a
260
     * time.  Therefore, we have to rewind 'in_next' by any bytes
261
     * that have been refilled but not actually consumed yet (not
262
     * counting overread bytes, which don't increment 'in_next').
263
     */
264
912
    bitsleft = (u8)bitsleft;
265
912
    SAFETY_CHECK(overread_count <= (bitsleft >> 3));
266
840
    in_next -= (bitsleft >> 3) - overread_count;
267
840
    overread_count = 0;
268
840
    bitbuf = 0;
269
840
    bitsleft = 0;
270
271
840
    SAFETY_CHECK(in_end - in_next >= 4);
272
783
    len = get_unaligned_le16(in_next);
273
783
    nlen = get_unaligned_le16(in_next + 2);
274
783
    in_next += 4;
275
276
783
    SAFETY_CHECK(len == (u16)~nlen);
277
630
    if (unlikely(len > out_end - out_next))
278
20
      return LIBDEFLATE_INSUFFICIENT_SPACE;
279
610
    SAFETY_CHECK(len <= in_end - in_next);
280
281
580
    memcpy(out_next, in_next, len);
282
580
    in_next += len;
283
580
    out_next += len;
284
285
580
    goto block_done;
286
287
4.89k
  } else {
288
4.89k
    unsigned i;
289
290
4.89k
    SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
291
292
    /*
293
     * Static Huffman block: build the decode tables for the static
294
     * codes.  Skip doing so if the tables are already set up from
295
     * an earlier static block; this speeds up decompression of
296
     * degenerate input of many empty or very short static blocks.
297
     *
298
     * Afterwards, the remainder is the same as decompressing a
299
     * dynamic Huffman block.
300
     */
301
302
4.84k
    bitbuf >>= 3; /* for BTYPE and BFINAL */
303
4.84k
    bitsleft -= 3;
304
305
4.84k
    if (d->static_codes_loaded)
306
873
      goto have_decode_tables;
307
308
3.97k
    d->static_codes_loaded = true;
309
310
3.97k
    STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
311
3.97k
    STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
312
313
576k
    for (i = 0; i < 144; i++)
314
572k
      d->u.l.lens[i] = 8;
315
449k
    for (; i < 256; i++)
316
445k
      d->u.l.lens[i] = 9;
317
99.4k
    for (; i < 280; i++)
318
95.4k
      d->u.l.lens[i] = 7;
319
35.7k
    for (; i < 288; i++)
320
31.8k
      d->u.l.lens[i] = 8;
321
322
131k
    for (; i < 288 + 32; i++)
323
127k
      d->u.l.lens[i] = 5;
324
325
3.97k
    num_litlen_syms = 288;
326
3.97k
    num_offset_syms = 32;
327
3.97k
  }
328
329
  /* Decompressing a Huffman block (either dynamic or static) */
330
331
5.61k
  SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
332
5.56k
  SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
333
6.38k
have_decode_tables:
334
6.38k
  litlen_tablemask = BITMASK(d->litlen_tablebits);
335
336
  /*
337
   * This is the "fastloop" for decoding literals and matches.  It does
338
   * bounds checks on in_next and out_next in the loop conditions so that
339
   * additional bounds checks aren't needed inside the loop body.
340
   *
341
   * To reduce latency, the bitbuffer is refilled and the next litlen
342
   * decode table entry is preloaded before each loop iteration.
343
   */
344
6.38k
  if (in_next >= in_fastloop_end || out_next >= out_fastloop_end)
345
1.89k
    goto generic_loop;
346
4.49k
  REFILL_BITS_IN_FASTLOOP();
347
4.49k
  entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
348
106k
  do {
349
106k
    u32 length, offset, lit;
350
106k
    const u8 *src;
351
106k
    u8 *dst;
352
353
    /*
354
     * Consume the bits for the litlen decode table entry.  Save the
355
     * original bitbuf for later, in case the extra match length
356
     * bits need to be extracted from it.
357
     */
358
106k
    saved_bitbuf = bitbuf;
359
106k
    bitbuf >>= (u8)entry;
360
106k
    bitsleft -= entry; /* optimization: subtract full entry */
361
362
    /*
363
     * Begin by checking for a "fast" literal, i.e. a literal that
364
     * doesn't need a subtable.
365
     */
366
106k
    if (entry & HUFFDEC_LITERAL) {
367
      /*
368
       * On 64-bit platforms, we decode up to 2 extra fast
369
       * literals in addition to the primary item, as this
370
       * increases performance and still leaves enough bits
371
       * remaining for what follows.  We could actually do 3,
372
       * assuming LITLEN_TABLEBITS=11, but that actually
373
       * decreases performance slightly (perhaps by messing
374
       * with the branch prediction of the conditional refill
375
       * that happens later while decoding the match offset).
376
       *
377
       * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN
378
       * and FASTLOOP_MAX_BYTES_READ need to be updated if the
379
       * number of extra literals decoded here is changed.
380
       */
381
65.3k
      if (/* enough bits for 2 fast literals + length + offset preload? */
382
65.3k
          CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
383
65.3k
               LENGTH_MAXBITS,
384
65.3k
               OFFSET_TABLEBITS) &&
385
          /* enough bits for 2 fast literals + slow literal + litlen preload? */
386
0
          CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
387
65.3k
               DEFLATE_MAX_LITLEN_CODEWORD_LEN,
388
65.3k
               LITLEN_TABLEBITS)) {
389
        /* 1st extra fast literal */
390
65.3k
        lit = entry >> 16;
391
65.3k
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
392
65.3k
        saved_bitbuf = bitbuf;
393
65.3k
        bitbuf >>= (u8)entry;
394
65.3k
        bitsleft -= entry;
395
65.3k
        *out_next++ = lit;
396
65.3k
        if (entry & HUFFDEC_LITERAL) {
397
          /* 2nd extra fast literal */
398
58.6k
          lit = entry >> 16;
399
58.6k
          entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
400
58.6k
          saved_bitbuf = bitbuf;
401
58.6k
          bitbuf >>= (u8)entry;
402
58.6k
          bitsleft -= entry;
403
58.6k
          *out_next++ = lit;
404
58.6k
          if (entry & HUFFDEC_LITERAL) {
405
            /*
406
             * Another fast literal, but
407
             * this one is in lieu of the
408
             * primary item, so it doesn't
409
             * count as one of the extras.
410
             */
411
54.8k
            lit = entry >> 16;
412
54.8k
            entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
413
54.8k
            REFILL_BITS_IN_FASTLOOP();
414
54.8k
            *out_next++ = lit;
415
54.8k
            continue;
416
54.8k
          }
417
58.6k
        }
418
65.3k
      } else {
419
        /*
420
         * Decode a literal.  While doing so, preload
421
         * the next litlen decode table entry and refill
422
         * the bitbuffer.  To reduce latency, we've
423
         * arranged for there to be enough "preloadable"
424
         * bits remaining to do the table preload
425
         * independently of the refill.
426
         */
427
0
        STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
428
0
            LITLEN_TABLEBITS, LITLEN_TABLEBITS));
429
0
        lit = entry >> 16;
430
0
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
431
0
        REFILL_BITS_IN_FASTLOOP();
432
0
        *out_next++ = lit;
433
0
        continue;
434
0
      }
435
65.3k
    }
436
437
    /*
438
     * It's not a literal entry, so it can be a length entry, a
439
     * subtable pointer entry, or an end-of-block entry.  Detect the
440
     * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
441
     */
442
51.8k
    if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
443
      /* Subtable pointer or end-of-block entry */
444
445
6.48k
      if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
446
2.64k
        goto block_done;
447
448
      /*
449
       * A subtable is required.  Load and consume the
450
       * subtable entry.  The subtable entry can be of any
451
       * type: literal, length, or end-of-block.
452
       */
453
3.84k
      entry = d->u.litlen_decode_table[(entry >> 16) +
454
3.84k
        EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
455
3.84k
      saved_bitbuf = bitbuf;
456
3.84k
      bitbuf >>= (u8)entry;
457
3.84k
      bitsleft -= entry;
458
459
      /*
460
       * 32-bit platforms that use the byte-at-a-time refill
461
       * method have to do a refill here for there to always
462
       * be enough bits to decode a literal that requires a
463
       * subtable, then preload the next litlen decode table
464
       * entry; or to decode a match length that requires a
465
       * subtable, then preload the offset decode table entry.
466
       */
467
3.84k
      if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
468
3.84k
                LITLEN_TABLEBITS) ||
469
0
          !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
470
3.84k
                OFFSET_TABLEBITS))
471
0
        REFILL_BITS_IN_FASTLOOP();
472
3.84k
      if (entry & HUFFDEC_LITERAL) {
473
        /* Decode a literal that required a subtable. */
474
3.21k
        lit = entry >> 16;
475
3.21k
        entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
476
3.21k
        REFILL_BITS_IN_FASTLOOP();
477
3.21k
        *out_next++ = lit;
478
3.21k
        continue;
479
3.21k
      }
480
628
      if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
481
405
        goto block_done;
482
      /* Else, it's a length that required a subtable. */
483
628
    }
484
485
    /*
486
     * Decode the match length: the length base value associated
487
     * with the litlen symbol (which we extract from the decode
488
     * table entry), plus the extra length bits.  We don't need to
489
     * consume the extra length bits here, as they were included in
490
     * the bits consumed by the entry earlier.  We also don't need
491
     * to check for too-long matches here, as this is inside the
492
     * fastloop where it's already been verified that the output
493
     * buffer has enough space remaining to copy a max-length match.
494
     */
495
45.5k
    length = entry >> 16;
496
45.5k
    length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
497
498
    /*
499
     * Decode the match offset.  There are enough "preloadable" bits
500
     * remaining to preload the offset decode table entry, but a
501
     * refill might be needed before consuming it.
502
     */
503
45.5k
    STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
504
45.5k
                 OFFSET_TABLEBITS));
505
45.5k
    entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
506
45.5k
    if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
507
45.5k
             LITLEN_TABLEBITS)) {
508
      /*
509
       * Decoding a match offset on a 64-bit platform.  We may
510
       * need to refill once, but then we can decode the whole
511
       * offset and preload the next litlen table entry.
512
       */
513
45.5k
      if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
514
        /* Offset codeword requires a subtable */
515
0
        if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
516
0
               LITLEN_TABLEBITS - PRELOAD_SLACK))
517
0
          REFILL_BITS_IN_FASTLOOP();
518
0
        bitbuf >>= OFFSET_TABLEBITS;
519
0
        bitsleft -= OFFSET_TABLEBITS;
520
0
        entry = d->offset_decode_table[(entry >> 16) +
521
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
522
45.5k
      } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
523
45.5k
              LITLEN_TABLEBITS - PRELOAD_SLACK))
524
446
        REFILL_BITS_IN_FASTLOOP();
525
45.5k
    } else {
526
      /* Decoding a match offset on a 32-bit platform */
527
0
      REFILL_BITS_IN_FASTLOOP();
528
0
      if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
529
        /* Offset codeword requires a subtable */
530
0
        bitbuf >>= OFFSET_TABLEBITS;
531
0
        bitsleft -= OFFSET_TABLEBITS;
532
0
        entry = d->offset_decode_table[(entry >> 16) +
533
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
534
0
        REFILL_BITS_IN_FASTLOOP();
535
        /* No further refill needed before extra bits */
536
0
        STATIC_ASSERT(CAN_CONSUME(
537
0
          OFFSET_MAXBITS - OFFSET_TABLEBITS));
538
0
      } else {
539
        /* No refill needed before extra bits */
540
0
        STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
541
0
      }
542
0
    }
543
45.5k
    saved_bitbuf = bitbuf;
544
45.5k
    bitbuf >>= (u8)entry;
545
45.5k
    bitsleft -= entry; /* optimization: subtract full entry */
546
45.5k
    offset = entry >> 16;
547
45.5k
    offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
548
549
    /* Validate the match offset; needed even in the fastloop. */
550
45.5k
    SAFETY_CHECK(offset <= out_next - (const u8 *)out);
551
45.5k
    src = out_next - offset;
552
45.5k
    dst = out_next;
553
45.5k
    out_next += length;
554
555
    /*
556
     * Before starting to issue the instructions to copy the match,
557
     * refill the bitbuffer and preload the litlen decode table
558
     * entry for the next loop iteration.  This can increase
559
     * performance by allowing the latency of the match copy to
560
     * overlap with these other operations.  To further reduce
561
     * latency, we've arranged for there to be enough bits remaining
562
     * to do the table preload independently of the refill, except
563
     * on 32-bit platforms using the byte-at-a-time refill method.
564
     */
565
45.5k
    if (!CAN_CONSUME_AND_THEN_PRELOAD(
566
45.5k
      MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
567
45.5k
          OFFSET_MAXFASTBITS),
568
45.5k
      LITLEN_TABLEBITS) &&
569
0
        unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
570
0
      REFILL_BITS_IN_FASTLOOP();
571
45.5k
    entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
572
45.5k
    REFILL_BITS_IN_FASTLOOP();
573
574
    /*
575
     * Copy the match.  On most CPUs the fastest method is a
576
     * word-at-a-time copy, unconditionally copying about 5 words
577
     * since this is enough for most matches without being too much.
578
     *
579
     * The normal word-at-a-time copy works for offset >= WORDBYTES,
580
     * which is most cases.  The case of offset == 1 is also common
581
     * and is worth optimizing for, since it is just RLE encoding of
582
     * the previous byte, which is the result of compressing long
583
     * runs of the same byte.
584
     *
585
     * Writing past the match 'length' is allowed here, since it's
586
     * been ensured there is enough output space left for a slight
587
     * overrun.  FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if
588
     * the maximum possible overrun here is changed.
589
     */
590
45.5k
    if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
591
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
592
13.0k
      src += WORDBYTES;
593
13.0k
      dst += WORDBYTES;
594
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
595
13.0k
      src += WORDBYTES;
596
13.0k
      dst += WORDBYTES;
597
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
598
13.0k
      src += WORDBYTES;
599
13.0k
      dst += WORDBYTES;
600
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
601
13.0k
      src += WORDBYTES;
602
13.0k
      dst += WORDBYTES;
603
13.0k
      store_word_unaligned(load_word_unaligned(src), dst);
604
13.0k
      src += WORDBYTES;
605
13.0k
      dst += WORDBYTES;
606
54.6k
      while (dst < out_next) {
607
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
608
41.5k
        src += WORDBYTES;
609
41.5k
        dst += WORDBYTES;
610
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
611
41.5k
        src += WORDBYTES;
612
41.5k
        dst += WORDBYTES;
613
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
614
41.5k
        src += WORDBYTES;
615
41.5k
        dst += WORDBYTES;
616
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
617
41.5k
        src += WORDBYTES;
618
41.5k
        dst += WORDBYTES;
619
41.5k
        store_word_unaligned(load_word_unaligned(src), dst);
620
41.5k
        src += WORDBYTES;
621
41.5k
        dst += WORDBYTES;
622
41.5k
      }
623
32.5k
    } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
624
27.2k
      machine_word_t v;
625
626
      /*
627
       * This part tends to get auto-vectorized, so keep it
628
       * copying a multiple of 16 bytes at a time.
629
       */
630
27.2k
      v = (machine_word_t)0x0101010101010101 * src[0];
631
27.2k
      store_word_unaligned(v, dst);
632
27.2k
      dst += WORDBYTES;
633
27.2k
      store_word_unaligned(v, dst);
634
27.2k
      dst += WORDBYTES;
635
27.2k
      store_word_unaligned(v, dst);
636
27.2k
      dst += WORDBYTES;
637
27.2k
      store_word_unaligned(v, dst);
638
27.2k
      dst += WORDBYTES;
639
119k
      while (dst < out_next) {
640
92.7k
        store_word_unaligned(v, dst);
641
92.7k
        dst += WORDBYTES;
642
92.7k
        store_word_unaligned(v, dst);
643
92.7k
        dst += WORDBYTES;
644
92.7k
        store_word_unaligned(v, dst);
645
92.7k
        dst += WORDBYTES;
646
92.7k
        store_word_unaligned(v, dst);
647
92.7k
        dst += WORDBYTES;
648
92.7k
      }
649
27.2k
    } else if (UNALIGNED_ACCESS_IS_FAST) {
650
5.29k
      store_word_unaligned(load_word_unaligned(src), dst);
651
5.29k
      src += offset;
652
5.29k
      dst += offset;
653
5.29k
      store_word_unaligned(load_word_unaligned(src), dst);
654
5.29k
      src += offset;
655
5.29k
      dst += offset;
656
91.9k
      do {
657
91.9k
        store_word_unaligned(load_word_unaligned(src), dst);
658
91.9k
        src += offset;
659
91.9k
        dst += offset;
660
91.9k
        store_word_unaligned(load_word_unaligned(src), dst);
661
91.9k
        src += offset;
662
91.9k
        dst += offset;
663
91.9k
      } while (dst < out_next);
664
5.29k
    } else {
665
0
      *dst++ = *src++;
666
0
      *dst++ = *src++;
667
0
      do {
668
0
        *dst++ = *src++;
669
0
      } while (dst < out_next);
670
0
    }
671
103k
  } while (in_next < in_fastloop_end && out_next < out_fastloop_end);
672
673
  /*
674
   * This is the generic loop for decoding literals and matches.  This
675
   * handles cases where in_next and out_next are close to the end of
676
   * their respective buffers.  Usually this loop isn't performance-
677
   * critical, as most time is spent in the fastloop above instead.  We
678
   * therefore omit some optimizations here in favor of smaller code.
679
   */
680
3.32k
generic_loop:
681
58.2k
  for (;;) {
682
58.2k
    u32 length, offset;
683
58.2k
    const u8 *src;
684
58.2k
    u8 *dst;
685
686
58.2k
    REFILL_BITS();
687
57.9k
    entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
688
57.9k
    saved_bitbuf = bitbuf;
689
57.9k
    bitbuf >>= (u8)entry;
690
57.9k
    bitsleft -= entry;
691
57.9k
    if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
692
379
      entry = d->u.litlen_decode_table[(entry >> 16) +
693
379
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
694
379
      saved_bitbuf = bitbuf;
695
379
      bitbuf >>= (u8)entry;
696
379
      bitsleft -= entry;
697
379
    }
698
57.9k
    length = entry >> 16;
699
57.9k
    if (entry & HUFFDEC_LITERAL) {
700
44.5k
      if (unlikely(out_next == out_end))
701
10
        return LIBDEFLATE_INSUFFICIENT_SPACE;
702
44.5k
      *out_next++ = length;
703
44.5k
      continue;
704
44.5k
    }
705
13.3k
    if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
706
2.87k
      goto block_done;
707
10.5k
    length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
708
10.5k
    if (unlikely(length > out_end - out_next))
709
27
      return LIBDEFLATE_INSUFFICIENT_SPACE;
710
711
10.4k
    if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
712
0
      REFILL_BITS();
713
10.4k
    entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
714
10.4k
    if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
715
0
      bitbuf >>= OFFSET_TABLEBITS;
716
0
      bitsleft -= OFFSET_TABLEBITS;
717
0
      entry = d->offset_decode_table[(entry >> 16) +
718
0
          EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
719
0
      if (!CAN_CONSUME(OFFSET_MAXBITS))
720
0
        REFILL_BITS();
721
0
    }
722
10.4k
    offset = entry >> 16;
723
10.4k
    offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
724
10.4k
    bitbuf >>= (u8)entry;
725
10.4k
    bitsleft -= entry;
726
727
10.4k
    SAFETY_CHECK(offset <= out_next - (const u8 *)out);
728
10.3k
    src = out_next - offset;
729
10.3k
    dst = out_next;
730
10.3k
    out_next += length;
731
732
10.3k
    STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
733
10.3k
    *dst++ = *src++;
734
10.3k
    *dst++ = *src++;
735
1.14M
    do {
736
1.14M
      *dst++ = *src++;
737
1.14M
    } while (dst < out_next);
738
10.3k
  }
739
740
6.50k
block_done:
741
  /* Finished decoding a block */
742
743
6.50k
  if (!is_final_block)
744
4.44k
    goto next_block;
745
746
  /* That was the last block. */
747
748
2.06k
  bitsleft = (u8)bitsleft;
749
750
  /*
751
   * If any of the implicit appended zero bytes were consumed (not just
752
   * refilled) before hitting end of stream, then the data is bad.
753
   */
754
2.06k
  SAFETY_CHECK(overread_count <= (bitsleft >> 3));
755
756
  /* Optionally return the actual number of bytes consumed. */
757
2.00k
  if (actual_in_nbytes_ret) {
758
    /* Don't count bytes that were refilled but not consumed. */
759
2.00k
    in_next -= (bitsleft >> 3) - overread_count;
760
761
2.00k
    *actual_in_nbytes_ret = in_next - (u8 *)in;
762
2.00k
  }
763
764
  /* Optionally return the actual number of bytes written. */
765
2.00k
  if (actual_out_nbytes_ret) {
766
2.00k
    *actual_out_nbytes_ret = out_next - (u8 *)out;
767
2.00k
  } else {
768
3
    if (out_next != out_end)
769
1
      return LIBDEFLATE_SHORT_OUTPUT;
770
3
  }
771
2.00k
  return LIBDEFLATE_SUCCESS;
772
2.00k
}
Unexecuted instantiation: compression.c:deflate_decompress_default
773
774
#undef FUNCNAME
775
#undef ATTRIBUTES
776
#undef EXTRACT_VARBITS
777
#undef EXTRACT_VARBITS8