Coverage Report

Created: 2026-02-24 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/inftrees.c
Line
Count
Source
1
/* inftrees.c -- generate Huffman trees for efficient decoding
2
 * Copyright (C) 1995-2024 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 */
5
6
#include "zbuild.h"
7
#include "zutil.h"
8
#include "inftrees.h"
9
#include "inflate_p.h"
10
#include "fallback_builtins.h"
11
12
#if defined(__SSE2__)
13
#  include "arch/x86/x86_intrins.h"
14
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
15
#  include "arch/arm/neon_intrins.h"
16
#endif
17
18
const char PREFIX(inflate_copyright)[] = " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
19
/*
20
  If you use the zlib library in a product, an acknowledgment is welcome
21
  in the documentation of your product. If for some reason you cannot
22
  include such an acknowledgment, I would appreciate that you keep this
23
  copyright string in the executable of your product.
24
 */
25
26
/* Count number of codes for each code length. */
27
38.7k
static inline void count_lengths(uint16_t *lens, int codes, uint16_t *count) {
28
38.7k
    int sym;
29
38.7k
    static const ALIGNED_(16) uint8_t one[256] = {
30
38.7k
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
38.7k
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
38.7k
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
38.7k
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
38.7k
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
38.7k
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
38.7k
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
38.7k
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
38
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
39
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
40
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
41
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
42
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
43
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
44
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
45
38.7k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
46
38.7k
    };
47
48
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
49
    uint8x16_t s1 = vdupq_n_u8(0);
50
    uint8x16_t s2 = vdupq_n_u8(0);
51
52
    if (codes & 1) {
53
        s1 = vld1q_u8(&one[16 * lens[0]]);
54
    }
55
    for (sym = codes & 1; sym < codes; sym += 2) {
56
      s1 = vaddq_u8(s1, vld1q_u8(&one[16 * lens[sym]]));
57
      s2 = vaddq_u8(s2, vld1q_u8(&one[16 * lens[sym+1]]));
58
    }
59
60
    vst1q_u16(&count[0], vaddl_u8(vget_low_u8(s1), vget_low_u8(s2)));
61
    vst1q_u16(&count[8], vaddl_u8(vget_high_u8(s1), vget_high_u8(s2)));
62
63
#elif defined(__SSE2__)
64
    __m128i s1 = _mm_setzero_si128();
65
38.7k
    __m128i s2 = _mm_setzero_si128();
66
67
38.7k
    if (codes & 1) {
68
21.8k
        s1 = _mm_load_si128((const __m128i*)&one[16 * lens[0]]);
69
21.8k
    }
70
1.96M
    for (sym = codes & 1; sym < codes; sym += 2) {
71
1.92M
        s1 = _mm_add_epi8(s1, _mm_load_si128((const __m128i*)&one[16 * lens[sym]]));  // vaddq_u8
72
1.92M
        s2 = _mm_add_epi8(s2, _mm_load_si128((const __m128i*)&one[16 * lens[sym+1]]));
73
1.92M
    }
74
75
#  if defined(__AVX2__)
76
    __m256i w1 = _mm256_cvtepu8_epi16(s1);
77
    __m256i w2 = _mm256_cvtepu8_epi16(s2);
78
    __m256i sum = _mm256_add_epi16(w1, w2);
79
80
    _mm256_storeu_si256((__m256i*)&count[0], sum);
81
#  else
82
38.7k
    __m128i zero = _mm_setzero_si128();
83
84
38.7k
    __m128i s1_lo = _mm_unpacklo_epi8(s1, zero);
85
38.7k
    __m128i s2_lo = _mm_unpacklo_epi8(s2, zero);
86
38.7k
    __m128i sum_lo = _mm_add_epi16(s1_lo, s2_lo);
87
38.7k
    _mm_storeu_si128((__m128i*)&count[0], sum_lo);
88
89
38.7k
    __m128i s1_hi = _mm_unpackhi_epi8(s1, zero);
90
38.7k
    __m128i s2_hi = _mm_unpackhi_epi8(s2, zero);
91
38.7k
    __m128i sum_hi = _mm_add_epi16(s1_hi, s2_hi);
92
38.7k
    _mm_storeu_si128((__m128i*)&count[8], sum_hi);
93
38.7k
#  endif
94
#else
95
    int len;
96
    for (len = 0; len <= MAX_BITS; len++)
97
        count[len] = 0;
98
    for (sym = 0; sym < codes; sym++)
99
        count[lens[sym]]++;
100
    Z_UNUSED(one);
101
#endif
102
38.7k
}
103
104
/*
105
   Build a set of tables to decode the provided canonical Huffman code.
106
   The code lengths are lens[0..codes-1].  The result starts at *table,
107
   whose indices are 0..2^bits-1.  work is a writable array of at least
108
   lens shorts, which is used as a work area.  type is the type of code
109
   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
110
   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
111
   on return points to the next available entry's address.  bits is the
112
   requested root table index bits, and on return it is the actual root
113
   table index bits.  It will differ if the request is greater than the
114
   longest code or if it is less than the shortest code.
115
 */
116
int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes,
117
38.7k
                                 code * *table, unsigned *bits, uint16_t *work) {
118
38.7k
    unsigned len;               /* a code's length in bits */
119
38.7k
    unsigned sym;               /* index of code symbols */
120
38.7k
    unsigned min, max;          /* minimum and maximum code lengths */
121
38.7k
    unsigned root;              /* number of index bits for root table */
122
38.7k
    unsigned curr;              /* number of index bits for current table */
123
38.7k
    unsigned drop;              /* code bits to drop for sub-table */
124
38.7k
    int left;                   /* number of prefix codes available */
125
38.7k
    unsigned used;              /* code entries in table used */
126
38.7k
    uint16_t rhuff;             /* Reversed huffman code */
127
38.7k
    unsigned huff;              /* Huffman code */
128
38.7k
    unsigned incr;              /* for incrementing code, index */
129
38.7k
    unsigned fill;              /* index for replicating entries */
130
38.7k
    unsigned low;               /* low bits for current root entry */
131
38.7k
    unsigned mask;              /* mask for low root bits */
132
38.7k
    code here;                  /* table entry for duplication */
133
38.7k
    code *next;                 /* next available space in table */
134
38.7k
    const uint16_t *base;       /* base value table to use */
135
38.7k
    const uint16_t *extra;      /* extra bits table to use */
136
38.7k
    unsigned match;             /* use base and extra for symbol >= match */
137
38.7k
    uint16_t count[MAX_BITS+1]; /* number of codes of each length */
138
38.7k
    uint16_t offs[MAX_BITS+1];  /* offsets in table for each length */
139
38.7k
    static const uint16_t lbase[31] = { /* Length codes 257..285 base */
140
38.7k
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
141
38.7k
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
142
38.7k
    static const uint16_t lext[31] = { /* Length codes 257..285 extra */
143
38.7k
        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
144
38.7k
        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
145
38.7k
    static const uint16_t dbase[32] = { /* Distance codes 0..29 base */
146
38.7k
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
147
38.7k
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
148
38.7k
        8193, 12289, 16385, 24577, 0, 0};
149
38.7k
    static const uint16_t dext[32] = { /* Distance codes 0..29 extra */
150
38.7k
        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
151
38.7k
        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
152
38.7k
        28, 28, 29, 29, 64, 64};
153
154
    /*
155
       Process a set of code lengths to create a canonical Huffman code.  The
156
       code lengths are lens[0..codes-1].  Each length corresponds to the
157
       symbols 0..codes-1.  The Huffman code is generated by first sorting the
158
       symbols by length from short to long, and retaining the symbol order
159
       for codes with equal lengths.  Then the code starts with all zero bits
160
       for the first code of the shortest length, and the codes are integer
161
       increments for the same length, and zeros are appended as the length
162
       increases.  For the deflate format, these bits are stored backwards
163
       from their more natural integer increment ordering, and so when the
164
       decoding tables are built in the large loop below, the integer codes
165
       are incremented backwards.
166
167
       This routine assumes, but does not check, that all of the entries in
168
       lens[] are in the range 0..MAXBITS.  The caller must assure this.
169
       1..MAXBITS is interpreted as that code length.  zero means that that
170
       symbol does not occur in this code.
171
172
       The codes are sorted by computing a count of codes for each length,
173
       creating from that a table of starting indices for each length in the
174
       sorted table, and then entering the symbols in order in the sorted
175
       table.  The sorted table is work[], with that space being provided by
176
       the caller.
177
178
       The length counts are used for other purposes as well, i.e. finding
179
       the minimum and maximum length codes, determining if there are any
180
       codes at all, checking for a valid set of lengths, and looking ahead
181
       at length counts to determine sub-table sizes when building the
182
       decoding tables.
183
     */
184
185
    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
186
38.7k
    count_lengths(lens, codes, count);
187
188
    /* bound code lengths, force root to be within code lengths */
189
38.7k
    root = *bits;
190
370k
    for (max = MAX_BITS; max >= 1; max--)
191
370k
        if (count[max] != 0) break;
192
38.7k
    root = MIN(root, max);
193
38.7k
    if (UNLIKELY(max == 0)) {           /* no symbols to code at all */
194
0
        here.op = (unsigned char)64;    /* invalid code marker */
195
0
        here.bits = (unsigned char)1;
196
0
        here.val = (uint16_t)0;
197
0
        *(*table)++ = here;             /* make a table to force an error */
198
0
        *(*table)++ = here;
199
0
        *bits = 1;
200
0
        return 0;     /* no symbols, but wait for decoding to report error */
201
0
    }
202
84.0k
    for (min = 1; min < max; min++)
203
74.4k
        if (count[min] != 0) break;
204
38.7k
    root = MAX(root, min);
205
206
    /* check for an over-subscribed or incomplete set of lengths */
207
38.7k
    left = 1;
208
620k
    for (len = 1; len <= MAX_BITS; len++) {
209
581k
        left <<= 1;
210
581k
        left -= count[len];
211
581k
        if (left < 0) return -1;        /* over-subscribed */
212
581k
    }
213
38.7k
    if (left > 0 && (type == CODES || max != 1))
214
0
        return -1;                      /* incomplete set */
215
216
    /* generate offsets into symbol table for each length for sorting */
217
38.7k
    offs[1] = 0;
218
581k
    for (len = 1; len < MAX_BITS; len++)
219
543k
        offs[len + 1] = offs[len] + count[len];
220
221
    /* sort symbols by length, by symbol order within each length */
222
3.90M
    for (sym = 0; sym < codes; sym++)
223
3.86M
        if (lens[sym] != 0) work[offs[lens[sym]]++] = (uint16_t)sym;
224
225
    /*
226
       Create and fill in decoding tables.  In this loop, the table being
227
       filled is at next and has curr index bits.  The code being used is huff
228
       with length len.  That code is converted to an index by dropping drop
229
       bits off of the bottom.  For codes where len is less than drop + curr,
230
       those top drop + curr - len bits are incremented through all values to
231
       fill the table with replicated entries.
232
233
       root is the number of index bits for the root table.  When len exceeds
234
       root, sub-tables are created pointed to by the root entry with an index
235
       of the low root bits of huff.  This is saved in low to check for when a
236
       new sub-table should be started.  drop is zero when the root table is
237
       being filled, and drop is root when sub-tables are being filled.
238
239
       When a new sub-table is needed, it is necessary to look ahead in the
240
       code lengths to determine what size sub-table is needed.  The length
241
       counts are used for this, and so count[] is decremented as codes are
242
       entered in the tables.
243
244
       used keeps track of how many table entries have been allocated from the
245
       provided *table space.  It is checked for LENS and DIST tables against
246
       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
247
       the initial root table size constants.  See the comments in inftrees.h
248
       for more information.
249
250
       sym increments through all symbols, and the loop terminates when
251
       all codes of length max, i.e. all codes, have been processed.  This
252
       routine permits incomplete codes, so another loop after this one fills
253
       in the rest of the decoding tables with invalid code markers.
254
     */
255
256
    /* set up for code type */
257
38.7k
    switch (type) {
258
12.9k
    case CODES:
259
12.9k
        base = extra = work;    /* dummy value--not used */
260
12.9k
        match = 20;
261
12.9k
        break;
262
12.9k
    case LENS:
263
12.9k
        base = lbase;
264
12.9k
        extra = lext;
265
12.9k
        match = 257;
266
12.9k
        break;
267
12.9k
    default:    /* DISTS */
268
12.9k
        base = dbase;
269
12.9k
        extra = dext;
270
12.9k
        match = 0;
271
38.7k
    }
272
273
    /* initialize state for loop */
274
38.7k
    rhuff = 0;                  /* starting code, reversed */
275
38.7k
    huff = 0;                   /* starting code */
276
38.7k
    sym = 0;                    /* starting code symbol */
277
38.7k
    len = min;                  /* starting code length */
278
38.7k
    next = *table;              /* current table to fill in */
279
38.7k
    curr = root;                /* current table index bits */
280
38.7k
    drop = 0;                   /* current bits to drop from code for index */
281
38.7k
    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
282
38.7k
    used = 1U << root;          /* use root table entries */
283
38.7k
    mask = used - 1;            /* mask for comparing low */
284
285
    /* check available table space */
286
38.7k
    if ((type == LENS && used > ENOUGH_LENS) ||
287
38.7k
        (type == DISTS && used > ENOUGH_DISTS))
288
0
        return 1;
289
290
    /* process all codes and make table entries */
291
2.36M
    for (;;) {
292
        /* create table entry */
293
2.36M
        here.bits = (unsigned char)(len - drop);
294
2.36M
        if (LIKELY(work[sym] >= match)) {
295
195k
            unsigned op = extra[work[sym] - match];
296
195k
            here.op = COMBINE_OP(op, here.bits);
297
195k
            here.bits = COMBINE_BITS(here.bits, op);
298
195k
            here.val = base[work[sym] - match];
299
2.17M
        } else if (work[sym] + 1U < match) {
300
2.16M
            here.op = (unsigned char)0;
301
2.16M
            here.val = work[sym];
302
2.16M
        } else {
303
12.9k
            here.op = (unsigned char)(32 + 64);         /* end of block */
304
12.9k
            here.val = 0;
305
12.9k
        }
306
307
        /* replicate for those indices with low len bits equal to huff */
308
2.36M
        incr = 1U << (len - drop);
309
2.36M
        fill = 1U << curr;
310
2.36M
        min = fill;                 /* save offset to next table */
311
12.3M
        do {
312
12.3M
            fill -= incr;
313
12.3M
            next[(huff >> drop) + fill] = here;
314
12.3M
        } while (fill != 0);
315
316
        /* backwards increment the len-bit code huff */
317
2.36M
        rhuff += (0x8000u >> (len - 1));
318
2.36M
        huff = zng_bitreverse16(rhuff);
319
320
        /* go to next symbol, update count, len */
321
2.36M
        sym++;
322
2.36M
        if (--(count[len]) == 0) {
323
178k
            if (len == max)
324
38.7k
                break;
325
139k
            len = lens[work[sym]];
326
139k
        }
327
328
        /* create new sub-table if needed */
329
2.33M
        if (len > root && (huff & mask) != low) {
330
            /* if first time, transition to sub-tables */
331
64.3k
            if (drop == 0)
332
8.87k
                drop = root;
333
334
            /* increment past last table */
335
64.3k
            next += min;            /* here min is 1 << curr */
336
337
            /* determine length of next table */
338
64.3k
            curr = len - drop;
339
64.3k
            left = (int)(1 << curr);
340
77.2k
            while (curr + drop < max) {
341
60.6k
                left -= count[curr + drop];
342
60.6k
                if (left <= 0)
343
47.7k
                    break;
344
12.8k
                curr++;
345
12.8k
                left <<= 1;
346
12.8k
            }
347
348
            /* check for enough space */
349
64.3k
            used += 1U << curr;
350
64.3k
            if ((type == LENS && used > ENOUGH_LENS) || (type == DISTS && used > ENOUGH_DISTS))
351
0
                return 1;
352
353
            /* point entry in root table to sub-table */
354
64.3k
            low = huff & mask;
355
64.3k
            (*table)[low].op = (unsigned char)curr;
356
64.3k
            (*table)[low].bits = (unsigned char)root;
357
64.3k
            (*table)[low].val = (uint16_t)(next - *table);
358
64.3k
        }
359
2.33M
    }
360
361
    /* fill in remaining table entry if code is incomplete (guaranteed to have
362
       at most one remaining entry, since if the code is incomplete, the
363
       maximum code length that was allowed to get this far is one bit) */
364
38.7k
    if (UNLIKELY(huff != 0)) {
365
0
        here.op = (unsigned char)64;            /* invalid code marker */
366
0
        here.bits = (unsigned char)(len - drop);
367
0
        here.val = (uint16_t)0;
368
0
        next[huff] = here;
369
0
    }
370
371
    /* set return parameters */
372
38.7k
    *table += used;
373
38.7k
    *bits = root;
374
38.7k
    return 0;
375
38.7k
}