Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/inftrees.c
Line
Count
Source
1
/* inftrees.c -- generate Huffman trees for efficient decoding
2
 * Copyright (C) 1995-2024 Mark Adler
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 */
5
6
#include "zbuild.h"
7
#include "zutil.h"
8
#include "inftrees.h"
9
#include "inflate_p.h"
10
#include "fallback_builtins.h"
11
12
#if defined(__SSE2__)
13
#  include "arch/x86/x86_intrins.h"
14
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
15
#  include "arch/arm/neon_intrins.h"
16
#endif
17
18
const char PREFIX(inflate_copyright)[] = " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
19
/*
20
  If you use the zlib library in a product, an acknowledgment is welcome
21
  in the documentation of your product. If for some reason you cannot
22
  include such an acknowledgment, I would appreciate that you keep this
23
  copyright string in the executable of your product.
24
 */
25
26
/* Count number of codes for each code length. */
27
7.32k
static inline void count_lengths(uint16_t *lens, int codes, uint16_t *count) {
28
7.32k
    int sym;
29
7.32k
    static const ALIGNED_(16) uint8_t one[256] = {
30
7.32k
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
7.32k
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
7.32k
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
7.32k
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
7.32k
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
7.32k
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
7.32k
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
7.32k
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
38
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
39
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
40
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
41
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
42
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
43
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
44
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
45
7.32k
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
46
7.32k
    };
47
48
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
49
    uint8x16_t s1 = vdupq_n_u8(0);
50
    uint8x16_t s2 = vdupq_n_u8(0);
51
52
    if (codes & 1) {
53
        s1 = vld1q_u8(&one[16 * lens[0]]);
54
    }
55
    for (sym = codes & 1; sym < codes; sym += 2) {
56
      s1 = vaddq_u8(s1, vld1q_u8(&one[16 * lens[sym]]));
57
      s2 = vaddq_u8(s2, vld1q_u8(&one[16 * lens[sym+1]]));
58
    }
59
60
    vst1q_u16(&count[0], vaddl_u8(vget_low_u8(s1), vget_low_u8(s2)));
61
    vst1q_u16(&count[8], vaddl_u8(vget_high_u8(s1), vget_high_u8(s2)));
62
63
#elif defined(__SSE2__)
64
    __m128i s1 = _mm_setzero_si128();
65
7.32k
    __m128i s2 = _mm_setzero_si128();
66
67
7.32k
    if (codes & 1) {
68
3.51k
        s1 = _mm_load_si128((const __m128i*)&one[16 * lens[0]]);
69
3.51k
    }
70
389k
    for (sym = codes & 1; sym < codes; sym += 2) {
71
382k
        s1 = _mm_add_epi8(s1, _mm_load_si128((const __m128i*)&one[16 * lens[sym]]));  // vaddq_u8
72
382k
        s2 = _mm_add_epi8(s2, _mm_load_si128((const __m128i*)&one[16 * lens[sym+1]]));
73
382k
    }
74
75
#  if defined(__AVX2__)
76
    __m256i w1 = _mm256_cvtepu8_epi16(s1);
77
    __m256i w2 = _mm256_cvtepu8_epi16(s2);
78
    __m256i sum = _mm256_add_epi16(w1, w2);
79
80
    _mm256_storeu_si256((__m256i*)&count[0], sum);
81
#  else
82
7.32k
    __m128i zero = _mm_setzero_si128();
83
84
7.32k
    __m128i s1_lo = _mm_unpacklo_epi8(s1, zero);
85
7.32k
    __m128i s2_lo = _mm_unpacklo_epi8(s2, zero);
86
7.32k
    __m128i sum_lo = _mm_add_epi16(s1_lo, s2_lo);
87
7.32k
    _mm_storeu_si128((__m128i*)&count[0], sum_lo);
88
89
7.32k
    __m128i s1_hi = _mm_unpackhi_epi8(s1, zero);
90
7.32k
    __m128i s2_hi = _mm_unpackhi_epi8(s2, zero);
91
7.32k
    __m128i sum_hi = _mm_add_epi16(s1_hi, s2_hi);
92
7.32k
    _mm_storeu_si128((__m128i*)&count[8], sum_hi);
93
7.32k
#  endif
94
#else
95
    int len;
96
    for (len = 0; len <= MAX_BITS; len++)
97
        count[len] = 0;
98
    for (sym = 0; sym < codes; sym++)
99
        count[lens[sym]]++;
100
    Z_UNUSED(one);
101
#endif
102
7.32k
}
103
104
/*
105
   Build a set of tables to decode the provided canonical Huffman code.
106
   The code lengths are lens[0..codes-1].  The result starts at *table,
107
   whose indices are 0..2^bits-1.  work is a writable array of at least
108
   lens shorts, which is used as a work area.  type is the type of code
109
   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
110
   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
111
   on return points to the next available entry's address.  bits is the
112
   requested root table index bits, and on return it is the actual root
113
   table index bits.  It will differ if the request is greater than the
114
   longest code or if it is less than the shortest code.
115
 */
116
int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes,
117
7.32k
                                 code * *table, unsigned *bits, uint16_t *work) {
118
7.32k
    unsigned len;               /* a code's length in bits */
119
7.32k
    unsigned sym;               /* index of code symbols */
120
7.32k
    unsigned min, max;          /* minimum and maximum code lengths */
121
7.32k
    unsigned root;              /* number of index bits for root table */
122
7.32k
    unsigned curr;              /* number of index bits for current table */
123
7.32k
    unsigned drop;              /* code bits to drop for sub-table */
124
7.32k
    int left;                   /* number of prefix codes available */
125
7.32k
    unsigned used;              /* code entries in table used */
126
7.32k
    uint16_t rhuff;             /* Reversed huffman code */
127
7.32k
    unsigned huff;              /* Huffman code */
128
7.32k
    unsigned incr;              /* for incrementing code, index */
129
7.32k
    unsigned fill;              /* index for replicating entries */
130
7.32k
    unsigned low;               /* low bits for current root entry */
131
7.32k
    unsigned mask;              /* mask for low root bits */
132
7.32k
    code here;                  /* table entry for duplication */
133
7.32k
    code *next;                 /* next available space in table */
134
7.32k
    const uint16_t *base;       /* base value table to use */
135
7.32k
    const uint16_t *extra;      /* extra bits table to use */
136
7.32k
    unsigned match;             /* use base and extra for symbol >= match */
137
7.32k
    uint16_t count[MAX_BITS+1]; /* number of codes of each length */
138
7.32k
    uint16_t offs[MAX_BITS+1];  /* offsets in table for each length */
139
7.32k
    static const uint16_t lbase[31] = { /* Length codes 257..285 base */
140
7.32k
        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
141
7.32k
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
142
7.32k
    static const uint16_t lext[31] = { /* Length codes 257..285 extra */
143
7.32k
        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
144
7.32k
        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
145
7.32k
    static const uint16_t dbase[32] = { /* Distance codes 0..29 base */
146
7.32k
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
147
7.32k
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
148
7.32k
        8193, 12289, 16385, 24577, 0, 0};
149
7.32k
    static const uint16_t dext[32] = { /* Distance codes 0..29 extra */
150
7.32k
        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
151
7.32k
        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
152
7.32k
        28, 28, 29, 29, 64, 64};
153
154
    /*
155
       Process a set of code lengths to create a canonical Huffman code.  The
156
       code lengths are lens[0..codes-1].  Each length corresponds to the
157
       symbols 0..codes-1.  The Huffman code is generated by first sorting the
158
       symbols by length from short to long, and retaining the symbol order
159
       for codes with equal lengths.  Then the code starts with all zero bits
160
       for the first code of the shortest length, and the codes are integer
161
       increments for the same length, and zeros are appended as the length
162
       increases.  For the deflate format, these bits are stored backwards
163
       from their more natural integer increment ordering, and so when the
164
       decoding tables are built in the large loop below, the integer codes
165
       are incremented backwards.
166
167
       This routine assumes, but does not check, that all of the entries in
168
       lens[] are in the range 0..MAXBITS.  The caller must assure this.
169
       1..MAXBITS is interpreted as that code length.  zero means that that
170
       symbol does not occur in this code.
171
172
       The codes are sorted by computing a count of codes for each length,
173
       creating from that a table of starting indices for each length in the
174
       sorted table, and then entering the symbols in order in the sorted
175
       table.  The sorted table is work[], with that space being provided by
176
       the caller.
177
178
       The length counts are used for other purposes as well, i.e. finding
179
       the minimum and maximum length codes, determining if there are any
180
       codes at all, checking for a valid set of lengths, and looking ahead
181
       at length counts to determine sub-table sizes when building the
182
       decoding tables.
183
     */
184
185
    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
186
7.32k
    count_lengths(lens, codes, count);
187
188
    /* bound code lengths, force root to be within code lengths */
189
7.32k
    root = *bits;
190
102k
    for (max = MAX_BITS; max >= 1; max--)
191
102k
        if (count[max] != 0) break;
192
7.32k
    root = MIN(root, max);
193
7.32k
    if (UNLIKELY(max == 0)) {           /* no symbols to code at all */
194
0
        here.op = (unsigned char)64;    /* invalid code marker */
195
0
        here.bits = (unsigned char)1;
196
0
        here.val = (uint16_t)0;
197
0
        *(*table)++ = here;             /* make a table to force an error */
198
0
        *(*table)++ = here;
199
0
        *bits = 1;
200
0
        return 0;     /* no symbols, but wait for decoding to report error */
201
0
    }
202
8.39k
    for (min = 1; min < max; min++)
203
5.68k
        if (count[min] != 0) break;
204
7.32k
    root = MAX(root, min);
205
206
    /* check for an over-subscribed or incomplete set of lengths */
207
7.32k
    left = 1;
208
117k
    for (len = 1; len <= MAX_BITS; len++) {
209
109k
        left <<= 1;
210
109k
        left -= count[len];
211
109k
        if (left < 0) return -1;        /* over-subscribed */
212
109k
    }
213
7.32k
    if (left > 0 && (type == CODES || max != 1))
214
0
        return -1;                      /* incomplete set */
215
216
    /* generate offsets into symbol table for each length for sorting */
217
7.32k
    offs[1] = 0;
218
109k
    for (len = 1; len < MAX_BITS; len++)
219
102k
        offs[len + 1] = offs[len] + count[len];
220
221
    /* sort symbols by length, by symbol order within each length */
222
775k
    for (sym = 0; sym < codes; sym++)
223
767k
        if (lens[sym] != 0) work[offs[lens[sym]]++] = (uint16_t)sym;
224
225
    /*
226
       Create and fill in decoding tables.  In this loop, the table being
227
       filled is at next and has curr index bits.  The code being used is huff
228
       with length len.  That code is converted to an index by dropping drop
229
       bits off of the bottom.  For codes where len is less than drop + curr,
230
       those top drop + curr - len bits are incremented through all values to
231
       fill the table with replicated entries.
232
233
       root is the number of index bits for the root table.  When len exceeds
234
       root, sub-tables are created pointed to by the root entry with an index
235
       of the low root bits of huff.  This is saved in low to check for when a
236
       new sub-table should be started.  drop is zero when the root table is
237
       being filled, and drop is root when sub-tables are being filled.
238
239
       When a new sub-table is needed, it is necessary to look ahead in the
240
       code lengths to determine what size sub-table is needed.  The length
241
       counts are used for this, and so count[] is decremented as codes are
242
       entered in the tables.
243
244
       used keeps track of how many table entries have been allocated from the
245
       provided *table space.  It is checked for LENS and DIST tables against
246
       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
247
       the initial root table size constants.  See the comments in inftrees.h
248
       for more information.
249
250
       sym increments through all symbols, and the loop terminates when
251
       all codes of length max, i.e. all codes, have been processed.  This
252
       routine permits incomplete codes, so another loop after this one fills
253
       in the rest of the decoding tables with invalid code markers.
254
     */
255
256
    /* set up for code type */
257
7.32k
    switch (type) {
258
2.44k
    case CODES:
259
2.44k
        base = extra = work;    /* dummy value--not used */
260
2.44k
        match = 20;
261
2.44k
        break;
262
2.44k
    case LENS:
263
2.44k
        base = lbase;
264
2.44k
        extra = lext;
265
2.44k
        match = 257;
266
2.44k
        break;
267
2.44k
    default:    /* DISTS */
268
2.44k
        base = dbase;
269
2.44k
        extra = dext;
270
2.44k
        match = 0;
271
7.32k
    }
272
273
    /* initialize state for loop */
274
7.32k
    rhuff = 0;                  /* starting code, reversed */
275
7.32k
    huff = 0;                   /* starting code */
276
7.32k
    sym = 0;                    /* starting code symbol */
277
7.32k
    len = min;                  /* starting code length */
278
7.32k
    next = *table;              /* current table to fill in */
279
7.32k
    curr = root;                /* current table index bits */
280
7.32k
    drop = 0;                   /* current bits to drop from code for index */
281
7.32k
    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
282
7.32k
    used = 1U << root;          /* use root table entries */
283
7.32k
    mask = used - 1;            /* mask for comparing low */
284
285
    /* check available table space */
286
7.32k
    if ((type == LENS && used > ENOUGH_LENS) ||
287
7.32k
        (type == DISTS && used > ENOUGH_DISTS))
288
0
        return 1;
289
290
    /* process all codes and make table entries */
291
23.3k
    for (;;) {
292
        /* create table entry */
293
23.3k
        here.bits = (unsigned char)(len - drop);
294
23.3k
        if (LIKELY(work[sym] >= match)) {
295
9.49k
            unsigned op = extra[work[sym] - match];
296
9.49k
            here.op = COMBINE_OP(op, here.bits);
297
9.49k
            here.bits = COMBINE_BITS(here.bits, op);
298
9.49k
            here.val = base[work[sym] - match];
299
13.8k
        } else if (work[sym] + 1U < match) {
300
11.4k
            here.op = (unsigned char)0;
301
11.4k
            here.val = work[sym];
302
11.4k
        } else {
303
2.44k
            here.op = (unsigned char)(32 + 64);         /* end of block */
304
2.44k
            here.val = 0;
305
2.44k
        }
306
307
        /* replicate for those indices with low len bits equal to huff */
308
23.3k
        incr = 1U << (len - drop);
309
23.3k
        fill = 1U << curr;
310
23.3k
        min = fill;                 /* save offset to next table */
311
35.4k
        do {
312
35.4k
            fill -= incr;
313
35.4k
            next[(huff >> drop) + fill] = here;
314
35.4k
        } while (fill != 0);
315
316
        /* backwards increment the len-bit code huff */
317
23.3k
        rhuff += (0x8000u >> (len - 1));
318
23.3k
        huff = zng_bitreverse16(rhuff);
319
320
        /* go to next symbol, update count, len */
321
23.3k
        sym++;
322
23.3k
        if (--(count[len]) == 0) {
323
13.9k
            if (len == max)
324
7.32k
                break;
325
6.58k
            len = lens[work[sym]];
326
6.58k
        }
327
328
        /* create new sub-table if needed */
329
16.0k
        if (len > root && (huff & mask) != low) {
330
            /* if first time, transition to sub-tables */
331
0
            if (drop == 0)
332
0
                drop = root;
333
334
            /* increment past last table */
335
0
            next += min;            /* here min is 1 << curr */
336
337
            /* determine length of next table */
338
0
            curr = len - drop;
339
0
            left = (int)(1 << curr);
340
0
            while (curr + drop < max) {
341
0
                left -= count[curr + drop];
342
0
                if (left <= 0)
343
0
                    break;
344
0
                curr++;
345
0
                left <<= 1;
346
0
            }
347
348
            /* check for enough space */
349
0
            used += 1U << curr;
350
0
            if ((type == LENS && used > ENOUGH_LENS) || (type == DISTS && used > ENOUGH_DISTS))
351
0
                return 1;
352
353
            /* point entry in root table to sub-table */
354
0
            low = huff & mask;
355
0
            (*table)[low].op = (unsigned char)curr;
356
0
            (*table)[low].bits = (unsigned char)root;
357
0
            (*table)[low].val = (uint16_t)(next - *table);
358
0
        }
359
16.0k
    }
360
361
    /* fill in remaining table entry if code is incomplete (guaranteed to have
362
       at most one remaining entry, since if the code is incomplete, the
363
       maximum code length that was allowed to get this far is one bit) */
364
7.32k
    if (UNLIKELY(huff != 0)) {
365
0
        here.op = (unsigned char)64;            /* invalid code marker */
366
0
        here.bits = (unsigned char)(len - drop);
367
0
        here.val = (uint16_t)0;
368
0
        next[huff] = here;
369
0
    }
370
371
    /* set return parameters */
372
7.32k
    *table += used;
373
7.32k
    *bits = root;
374
7.32k
    return 0;
375
7.32k
}