Coverage Report

Created: 2026-02-24 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/inftrees.c
Line
Count
Source
1
/* inftrees.c -- generate Huffman trees for efficient decoding
2
 * Copyright (C) 1995-2024 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 */
5
6
#include "zbuild.h"
7
#include "zutil.h"
8
#include "inftrees.h"
9
#include "inflate_p.h"
10
#include "fallback_builtins.h"
11
12
#if defined(__SSE2__)
13
#  include "arch/x86/x86_intrins.h"
14
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
15
#  include "arch/arm/neon_intrins.h"
16
#endif
17
18
const char PREFIX(inflate_copyright)[] = " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
19
/*
20
  If you use the zlib library in a product, an acknowledgment is welcome
21
  in the documentation of your product. If for some reason you cannot
22
  include such an acknowledgment, I would appreciate that you keep this
23
  copyright string in the executable of your product.
24
 */
25
26
/* Count number of codes for each code length. */
27
90.3k
static inline void count_lengths(uint16_t *lens, int codes, uint16_t *count) {
28
90.3k
    int sym;
29
90.3k
    static const ALIGNED_(16) uint8_t one[256] = {
30
90.3k
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
90.3k
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
90.3k
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
90.3k
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
90.3k
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
90.3k
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
90.3k
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
90.3k
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
38
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
39
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
40
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
41
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
42
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
43
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
44
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
45
90.3k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
46
90.3k
    };
47
48
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
49
    uint8x16_t s1 = vdupq_n_u8(0);
50
    uint8x16_t s2 = vdupq_n_u8(0);
51
52
    if (codes & 1) {
53
        s1 = vld1q_u8(&one[16 * lens[0]]);
54
    }
55
    for (sym = codes & 1; sym < codes; sym += 2) {
56
      s1 = vaddq_u8(s1, vld1q_u8(&one[16 * lens[sym]]));
57
      s2 = vaddq_u8(s2, vld1q_u8(&one[16 * lens[sym+1]]));
58
    }
59
60
    vst1q_u16(&count[0], vaddl_u8(vget_low_u8(s1), vget_low_u8(s2)));
61
    vst1q_u16(&count[8], vaddl_u8(vget_high_u8(s1), vget_high_u8(s2)));
62
63
#elif defined(__SSE2__)
64
    __m128i s1 = _mm_setzero_si128();
65
90.3k
    __m128i s2 = _mm_setzero_si128();
66
67
90.3k
    if (codes & 1) {
68
55.7k
        s1 = _mm_load_si128((const __m128i*)&one[16 * lens[0]]);
69
55.7k
    }
70
4.66M
    for (sym = codes & 1; sym < codes; sym += 2) {
71
4.57M
        s1 = _mm_add_epi8(s1, _mm_load_si128((const __m128i*)&one[16 * lens[sym]]));  // vaddq_u8
72
4.57M
        s2 = _mm_add_epi8(s2, _mm_load_si128((const __m128i*)&one[16 * lens[sym+1]]));
73
4.57M
    }
74
75
#  if defined(__AVX2__)
76
    __m256i w1 = _mm256_cvtepu8_epi16(s1);
77
    __m256i w2 = _mm256_cvtepu8_epi16(s2);
78
    __m256i sum = _mm256_add_epi16(w1, w2);
79
80
    _mm256_storeu_si256((__m256i*)&count[0], sum);
81
#  else
82
90.3k
    __m128i zero = _mm_setzero_si128();
83
84
90.3k
    __m128i s1_lo = _mm_unpacklo_epi8(s1, zero);
85
90.3k
    __m128i s2_lo = _mm_unpacklo_epi8(s2, zero);
86
90.3k
    __m128i sum_lo = _mm_add_epi16(s1_lo, s2_lo);
87
90.3k
    _mm_storeu_si128((__m128i*)&count[0], sum_lo);
88
89
90.3k
    __m128i s1_hi = _mm_unpackhi_epi8(s1, zero);
90
90.3k
    __m128i s2_hi = _mm_unpackhi_epi8(s2, zero);
91
90.3k
    __m128i sum_hi = _mm_add_epi16(s1_hi, s2_hi);
92
90.3k
    _mm_storeu_si128((__m128i*)&count[8], sum_hi);
93
90.3k
#  endif
94
#else
95
    int len;
96
    for (len = 0; len <= MAX_BITS; len++)
97
        count[len] = 0;
98
    for (sym = 0; sym < codes; sym++)
99
        count[lens[sym]]++;
100
    Z_UNUSED(one);
101
#endif
102
90.3k
}
103
104
/*
105
   Build a set of tables to decode the provided canonical Huffman code.
106
   The code lengths are lens[0..codes-1].  The result starts at *table,
107
   whose indices are 0..2^bits-1.  work is a writable array of at least
108
   lens shorts, which is used as a work area.  type is the type of code
109
   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
110
   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
111
   on return points to the next available entry's address.  bits is the
112
   requested root table index bits, and on return it is the actual root
113
   table index bits.  It will differ if the request is greater than the
114
   longest code or if it is less than the shortest code.
115
 */
116
int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes,
117
90.3k
                                 code * *table, unsigned *bits, uint16_t *work) {
118
90.3k
    unsigned len;               /* a code's length in bits */
119
90.3k
    unsigned sym;               /* index of code symbols */
120
90.3k
    unsigned min, max;          /* minimum and maximum code lengths */
121
90.3k
    unsigned root;              /* number of index bits for root table */
122
90.3k
    unsigned curr;              /* number of index bits for current table */
123
90.3k
    unsigned drop;              /* code bits to drop for sub-table */
124
90.3k
    int left;                   /* number of prefix codes available */
125
90.3k
    unsigned used;              /* code entries in table used */
126
90.3k
    uint16_t rhuff;             /* Reversed huffman code */
127
90.3k
    unsigned huff;              /* Huffman code */
128
90.3k
    unsigned incr;              /* for incrementing code, index */
129
90.3k
    unsigned fill;              /* index for replicating entries */
130
90.3k
    unsigned low;               /* low bits for current root entry */
131
90.3k
    unsigned mask;              /* mask for low root bits */
132
90.3k
    code here;                  /* table entry for duplication */
133
90.3k
    code *next;                 /* next available space in table */
134
90.3k
    const uint16_t *base;       /* base value table to use */
135
90.3k
    const uint16_t *extra;      /* extra bits table to use */
136
90.3k
    unsigned match;             /* use base and extra for symbol >= match */
137
90.3k
    uint16_t count[MAX_BITS+1]; /* number of codes of each length */
138
90.3k
    uint16_t offs[MAX_BITS+1];  /* offsets in table for each length */
139
90.3k
    static const uint16_t lbase[31] = { /* Length codes 257..285 base */
140
90.3k
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
141
90.3k
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
142
90.3k
    static const uint16_t lext[31] = { /* Length codes 257..285 extra */
143
90.3k
        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
144
90.3k
        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
145
90.3k
    static const uint16_t dbase[32] = { /* Distance codes 0..29 base */
146
90.3k
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
147
90.3k
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
148
90.3k
        8193, 12289, 16385, 24577, 0, 0};
149
90.3k
    static const uint16_t dext[32] = { /* Distance codes 0..29 extra */
150
90.3k
        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
151
90.3k
        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
152
90.3k
        28, 28, 29, 29, 64, 64};
153
154
    /*
155
       Process a set of code lengths to create a canonical Huffman code.  The
156
       code lengths are lens[0..codes-1].  Each length corresponds to the
157
       symbols 0..codes-1.  The Huffman code is generated by first sorting the
158
       symbols by length from short to long, and retaining the symbol order
159
       for codes with equal lengths.  Then the code starts with all zero bits
160
       for the first code of the shortest length, and the codes are integer
161
       increments for the same length, and zeros are appended as the length
162
       increases.  For the deflate format, these bits are stored backwards
163
       from their more natural integer increment ordering, and so when the
164
       decoding tables are built in the large loop below, the integer codes
165
       are incremented backwards.
166
167
       This routine assumes, but does not check, that all of the entries in
168
       lens[] are in the range 0..MAXBITS.  The caller must assure this.
169
       1..MAXBITS is interpreted as that code length.  zero means that that
170
       symbol does not occur in this code.
171
172
       The codes are sorted by computing a count of codes for each length,
173
       creating from that a table of starting indices for each length in the
174
       sorted table, and then entering the symbols in order in the sorted
175
       table.  The sorted table is work[], with that space being provided by
176
       the caller.
177
178
       The length counts are used for other purposes as well, i.e. finding
179
       the minimum and maximum length codes, determining if there are any
180
       codes at all, checking for a valid set of lengths, and looking ahead
181
       at length counts to determine sub-table sizes when building the
182
       decoding tables.
183
     */
184
185
    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
186
90.3k
    count_lengths(lens, codes, count);
187
188
    /* bound code lengths, force root to be within code lengths */
189
90.3k
    root = *bits;
190
848k
    for (max = MAX_BITS; max >= 1; max--)
191
846k
        if (count[max] != 0) break;
192
90.3k
    root = MIN(root, max);
193
90.3k
    if (UNLIKELY(max == 0)) {           /* no symbols to code at all */
194
1.32k
        here.op = (unsigned char)64;    /* invalid code marker */
195
1.32k
        here.bits = (unsigned char)1;
196
1.32k
        here.val = (uint16_t)0;
197
1.32k
        *(*table)++ = here;             /* make a table to force an error */
198
1.32k
        *(*table)++ = here;
199
1.32k
        *bits = 1;
200
1.32k
        return 0;     /* no symbols, but wait for decoding to report error */
201
1.32k
    }
202
218k
    for (min = 1; min < max; min++)
203
197k
        if (count[min] != 0) break;
204
88.9k
    root = MAX(root, min);
205
206
    /* check for an over-subscribed or incomplete set of lengths */
207
88.9k
    left = 1;
208
1.41M
    for (len = 1; len <= MAX_BITS; len++) {
209
1.32M
        left <<= 1;
210
1.32M
        left -= count[len];
211
1.32M
        if (left < 0) return -1;        /* over-subscribed */
212
1.32M
    }
213
88.5k
    if (left > 0 && (type == CODES || max != 1))
214
394
        return -1;                      /* incomplete set */
215
216
    /* generate offsets into symbol table for each length for sorting */
217
88.1k
    offs[1] = 0;
218
1.32M
    for (len = 1; len < MAX_BITS; len++)
219
1.23M
        offs[len + 1] = offs[len] + count[len];
220
221
    /* sort symbols by length, by symbol order within each length */
222
9.21M
    for (sym = 0; sym < codes; sym++)
223
9.12M
        if (lens[sym] != 0) work[offs[lens[sym]]++] = (uint16_t)sym;
224
225
    /*
226
       Create and fill in decoding tables.  In this loop, the table being
227
       filled is at next and has curr index bits.  The code being used is huff
228
       with length len.  That code is converted to an index by dropping drop
229
       bits off of the bottom.  For codes where len is less than drop + curr,
230
       those top drop + curr - len bits are incremented through all values to
231
       fill the table with replicated entries.
232
233
       root is the number of index bits for the root table.  When len exceeds
234
       root, sub-tables are created pointed to by the root entry with an index
235
       of the low root bits of huff.  This is saved in low to check for when a
236
       new sub-table should be started.  drop is zero when the root table is
237
       being filled, and drop is root when sub-tables are being filled.
238
239
       When a new sub-table is needed, it is necessary to look ahead in the
240
       code lengths to determine what size sub-table is needed.  The length
241
       counts are used for this, and so count[] is decremented as codes are
242
       entered in the tables.
243
244
       used keeps track of how many table entries have been allocated from the
245
       provided *table space.  It is checked for LENS and DIST tables against
246
       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
247
       the initial root table size constants.  See the comments in inftrees.h
248
       for more information.
249
250
       sym increments through all symbols, and the loop terminates when
251
       all codes of length max, i.e. all codes, have been processed.  This
252
       routine permits incomplete codes, so another loop after this one fills
253
       in the rest of the decoding tables with invalid code markers.
254
     */
255
256
    /* set up for code type */
257
88.1k
    switch (type) {
258
30.1k
    case CODES:
259
30.1k
        base = extra = work;    /* dummy value--not used */
260
30.1k
        match = 20;
261
30.1k
        break;
262
29.6k
    case LENS:
263
29.6k
        base = lbase;
264
29.6k
        extra = lext;
265
29.6k
        match = 257;
266
29.6k
        break;
267
28.3k
    default:    /* DISTS */
268
28.3k
        base = dbase;
269
28.3k
        extra = dext;
270
28.3k
        match = 0;
271
88.1k
    }
272
273
    /* initialize state for loop */
274
88.1k
    rhuff = 0;                  /* starting code, reversed */
275
88.1k
    huff = 0;                   /* starting code */
276
88.1k
    sym = 0;                    /* starting code symbol */
277
88.1k
    len = min;                  /* starting code length */
278
88.1k
    next = *table;              /* current table to fill in */
279
88.1k
    curr = root;                /* current table index bits */
280
88.1k
    drop = 0;                   /* current bits to drop from code for index */
281
88.1k
    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
282
88.1k
    used = 1U << root;          /* use root table entries */
283
88.1k
    mask = used - 1;            /* mask for comparing low */
284
285
    /* check available table space */
286
88.1k
    if ((type == LENS && used > ENOUGH_LENS) ||
287
88.1k
        (type == DISTS && used > ENOUGH_DISTS))
288
0
        return 1;
289
290
    /* process all codes and make table entries */
291
5.19M
    for (;;) {
292
        /* create table entry */
293
5.19M
        here.bits = (unsigned char)(len - drop);
294
5.19M
        if (LIKELY(work[sym] >= match)) {
295
537k
            unsigned op = extra[work[sym] - match];
296
537k
            here.op = COMBINE_OP(op, here.bits);
297
537k
            here.bits = COMBINE_BITS(here.bits, op);
298
537k
            here.val = base[work[sym] - match];
299
4.65M
        } else if (work[sym] + 1U < match) {
300
4.62M
            here.op = (unsigned char)0;
301
4.62M
            here.val = work[sym];
302
4.62M
        } else {
303
29.6k
            here.op = (unsigned char)(32 + 64);         /* end of block */
304
29.6k
            here.val = 0;
305
29.6k
        }
306
307
        /* replicate for those indices with low len bits equal to huff */
308
5.19M
        incr = 1U << (len - drop);
309
5.19M
        fill = 1U << curr;
310
5.19M
        min = fill;                 /* save offset to next table */
311
26.6M
        do {
312
26.6M
            fill -= incr;
313
26.6M
            next[(huff >> drop) + fill] = here;
314
26.6M
        } while (fill != 0);
315
316
        /* backwards increment the len-bit code huff */
317
5.19M
        rhuff += (0x8000u >> (len - 1));
318
5.19M
        huff = zng_bitreverse16(rhuff);
319
320
        /* go to next symbol, update count, len */
321
5.19M
        sym++;
322
5.19M
        if (--(count[len]) == 0) {
323
416k
            if (len == max)
324
88.1k
                break;
325
328k
            len = lens[work[sym]];
326
328k
        }
327
328
        /* create new sub-table if needed */
329
5.10M
        if (len > root && (huff & mask) != low) {
330
            /* if first time, transition to sub-tables */
331
141k
            if (drop == 0)
332
19.9k
                drop = root;
333
334
            /* increment past last table */
335
141k
            next += min;            /* here min is 1 << curr */
336
337
            /* determine length of next table */
338
141k
            curr = len - drop;
339
141k
            left = (int)(1 << curr);
340
172k
            while (curr + drop < max) {
341
121k
                left -= count[curr + drop];
342
121k
                if (left <= 0)
343
91.2k
                    break;
344
30.6k
                curr++;
345
30.6k
                left <<= 1;
346
30.6k
            }
347
348
            /* check for enough space */
349
141k
            used += 1U << curr;
350
141k
            if ((type == LENS && used > ENOUGH_LENS) || (type == DISTS && used > ENOUGH_DISTS))
351
0
                return 1;
352
353
            /* point entry in root table to sub-table */
354
141k
            low = huff & mask;
355
141k
            (*table)[low].op = (unsigned char)curr;
356
141k
            (*table)[low].bits = (unsigned char)root;
357
141k
            (*table)[low].val = (uint16_t)(next - *table);
358
141k
        }
359
5.10M
    }
360
361
    /* fill in remaining table entry if code is incomplete (guaranteed to have
362
       at most one remaining entry, since if the code is incomplete, the
363
       maximum code length that was allowed to get this far is one bit) */
364
88.1k
    if (UNLIKELY(huff != 0)) {
365
7.53k
        here.op = (unsigned char)64;            /* invalid code marker */
366
7.53k
        here.bits = (unsigned char)(len - drop);
367
7.53k
        here.val = (uint16_t)0;
368
7.53k
        next[huff] = here;
369
7.53k
    }
370
371
    /* set return parameters */
372
88.1k
    *table += used;
373
88.1k
    *bits = root;
374
88.1k
    return 0;
375
88.1k
}