Coverage Report

Created: 2026-05-30 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/inflate_p.h
Line
Count
Source
1
/* inflate_p.h -- Private inline functions and macros shared with more than one deflate method
2
 *
3
 */
4
5
#ifndef INFLATE_P_H
6
#define INFLATE_P_H
7
8
#include <stdlib.h>
9
10
#include "zendian.h"
11
#include "zmemory.h"
12
#include "crc32_braid_tbl.h"
13
14
/* Architecture-specific hooks. */
15
#ifdef S390_DFLTCC_INFLATE
16
#  include "arch/s390/dfltcc_inflate.h"
17
/* DFLTCC instructions require window to be page-aligned */
18
#  define PAD_WINDOW            PAD_4096
19
#  define WINDOW_PAD_SIZE       4096
20
#  define HINT_ALIGNED_WINDOW   HINT_ALIGNED_4096
21
#else
22
#  define PAD_WINDOW            PAD_64
23
#  define WINDOW_PAD_SIZE       64
24
#  define HINT_ALIGNED_WINDOW   HINT_ALIGNED_64
25
/* Adjust the window size for the arch-specific inflate code. */
26
57.3k
#  define INFLATE_ADJUST_WINDOW_SIZE(n) (n)
27
/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */
28
28.6k
#  define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
29
/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */
30
0
#  define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0)
31
/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */
32
8.42M
#  define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0)
33
/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */
34
53.5k
#  define INFLATE_NEED_CHECKSUM(strm) 1
35
/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */
36
60.0k
#  define INFLATE_NEED_UPDATEWINDOW(strm) 1
37
/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */
38
0
#  define INFLATE_MARK_HOOK(strm) do {} while (0)
39
/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */
40
0
#  define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0)
41
/* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */
42
0
#  define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
43
/* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */
44
0
#  define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
45
#endif
46
47
/*
48
 *   Macros shared by inflate() and inflateBack()
49
 */
50
51
/* check macros for header crc */
52
#ifdef GUNZIP
53
0
#  define CRC_DO1_B(c, b)    c = crc_table[(c ^ (b)) & 0xff] ^ (c >> 8)
54
55
#  define CRC2(check, word) \
56
0
    do { \
57
0
        uint32_t crc = ~(uint32_t)(check); \
58
0
        CRC_DO1_B(crc, (word)     ); \
59
0
        CRC_DO1_B(crc, (word) >> 8); \
60
0
        (check) = ~crc; \
61
0
    } while (0)
62
63
#  define CRC4(check, word) \
64
0
    do { \
65
0
        uint32_t crc = ~(uint32_t)(check); \
66
0
        CRC_DO1_B(crc, (word)      ); \
67
0
        CRC_DO1_B(crc, (word) >>  8); \
68
0
        CRC_DO1_B(crc, (word) >> 16); \
69
0
        CRC_DO1_B(crc, (word) >> 24); \
70
0
        (check) = ~crc; \
71
0
    } while (0)
72
#endif
73
74
/* Compiler optimization for bit accumulator on x86 architectures */
75
#ifdef ARCH_X86
76
typedef uint8_t bits_t;
77
#else
78
typedef unsigned bits_t;
79
#endif
80
81
/* Load registers with state in inflate() for speed */
82
#define LOAD() \
83
8.44M
    do { \
84
8.44M
        put = strm->next_out; \
85
8.44M
        left = strm->avail_out; \
86
8.44M
        next = strm->next_in; \
87
8.44M
        have = strm->avail_in; \
88
8.44M
        hold = state->hold; \
89
8.44M
        bits = (bits_t)state->bits; \
90
8.44M
    } while (0)
91
92
/* Restore state from registers in inflate() */
93
#define RESTORE() \
94
8.44M
    do { \
95
8.44M
        strm->next_out = put; \
96
8.44M
        strm->avail_out = left; \
97
8.44M
        strm->next_in = (z_const unsigned char *)next; \
98
8.44M
        strm->avail_in = have; \
99
8.44M
        state->hold = hold; \
100
8.44M
        state->bits = bits; \
101
8.44M
    } while (0)
102
103
/* Refill to have at least 56 bits in the bit accumulator */
104
138M
#define REFILL() do { \
105
138M
        hold |= load_64_bits(in, bits); \
106
138M
        in += (63 ^ bits) >> 3; \
107
138M
        bits |= 56; \
108
138M
    } while (0)
109
110
/* Clear the input bit accumulator */
111
#define INITBITS() \
112
60.8k
    do { \
113
60.8k
        hold = 0; \
114
60.8k
        bits = 0; \
115
60.8k
    } while (0)
116
117
/* Ensure that there is at least n bits in the bit accumulator.  If there is
118
   not enough available input to do that, then return from inflate()/inflateBack(). */
119
#define NEEDBITS(n) \
120
9.53M
    do { \
121
9.53M
        unsigned u = (unsigned)(n); \
122
14.3M
        while (bits < (bits_t)u) \
123
9.53M
            PULLBYTE(); \
124
9.53M
    } while (0)
125
126
/* Return the low n bits of the bit accumulator (n < 16) */
127
#define BITS(n) \
128
24.3M
    (hold & ((1U << (unsigned)(n)) - 1))
129
130
/* Remove n bits from the bit accumulator */
131
#define DROPBITS(n) \
132
410M
    do { \
133
410M
        unsigned u = (unsigned)(n); \
134
410M
        hold >>= u; \
135
410M
        bits -= (bits_t)u; \
136
410M
    } while (0)
137
138
/* Remove zero to seven bits as needed to go to a byte boundary */
139
#define BYTEBITS() \
140
33.7k
    do { \
141
33.7k
        hold >>= bits & 7; \
142
33.7k
        bits -= bits & 7; \
143
33.7k
    } while (0)
144
145
/* Set mode=BAD and prepare error message */
146
#define SET_BAD(errmsg) \
147
4.36k
    do { \
148
4.36k
        state->mode = BAD; \
149
4.36k
        strm->msg = (char *)errmsg; \
150
4.36k
    } while (0)
151
152
/* Huffman code table entry format for length/distance codes (op & 16 set):
153
 *   bits = code_bits + extra_bits (combined for single-shift decode)
154
 *   op   = 16 | code_bits
155
 *   val  = base value
156
 *
157
 * For literals (op == 0): bits = code_bits, val = literal byte
158
 */
159
160
/* Extract code size from a Huffman table entry */
161
#define CODE_BITS(here) \
162
609k
    ((unsigned)((here.op & 16) ? (here.op & 15) : here.bits))
163
164
/* Extract extra bits count from a length/distance code entry */
165
#define CODE_EXTRA(here) \
166
52.3k
    ((unsigned)((here.op & 16) ? (here.bits - (here.op & 15)) : 0))
167
168
/* Extract extra bits value from saved bit accumulator */
169
#define EXTRA_BITS(old, here, op) \
170
32.7M
    ((old & (((uint64_t)1 << here.bits) - 1)) >> (op & MAX_BITS))
171
172
/* Build combined op field: preserves extra if not len/dist, else combines with code_bits */
173
#define COMBINE_OP(extra, code_bits) \
174
505k
    ((unsigned char)((extra) & 16 ? (code_bits) | 16 : (extra)))
175
176
/* Build combined bits field: code_bits + extra_bits from extra's low nibble */
177
#define COMBINE_BITS(code_bits, extra) \
178
505k
    ((unsigned char)((code_bits) + ((extra) & 15)))
179
180
/* Trace macros for debugging */
181
#define TRACE_LITERAL(val) \
182
    Tracevv((stderr, val >= 0x20 && val < 0x7f ? \
183
            "inflate:         literal '%c'\n" : \
184
            "inflate:         literal 0x%02x\n", val))
185
186
#define TRACE_LENGTH(len) \
187
    Tracevv((stderr, "inflate:         length %u\n", len))
188
189
#define TRACE_DISTANCE(dist) \
190
    Tracevv((stderr, "inflate:         distance %u\n", dist))
191
192
#define TRACE_END_OF_BLOCK() \
193
    Tracevv((stderr, "inflate:         end of block\n"))
194
195
34.0M
#define INFLATE_FAST_MIN_HAVE 15   /* max input bits per length/distance pair */
196
16.7M
#define INFLATE_FAST_MIN_LEFT 260  /* max output per token (258) + 2 */
197
8.45M
#define INFLATE_FAST_MIN_SAFE 3    /* max unchecked literal writes per iteration */
198
199
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
200
138M
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
201
138M
    uint64_t chunk = zng_memread_8(in);
202
138M
    return Z_U64_FROM_LE(chunk) << bits;
203
138M
}
Unexecuted instantiation: chunkset_sse2.c:load_64_bits
Unexecuted instantiation: chunkset_ssse3.c:load_64_bits
chunkset_avx2.c:load_64_bits
Line
Count
Source
200
138M
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
201
138M
    uint64_t chunk = zng_memread_8(in);
202
138M
    return Z_U64_FROM_LE(chunk) << bits;
203
138M
}
Unexecuted instantiation: chunkset_avx512.c:load_64_bits
Unexecuted instantiation: inflate.c:load_64_bits
Unexecuted instantiation: inftrees.c:load_64_bits
204
205
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
206
75.5k
static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) {
207
75.5k
    size_t safelen = safe - out;
208
75.5k
    len = MIN(len, safelen);
209
75.5k
    int32_t olap_src = from >= out && from < out + len;
210
75.5k
    int32_t olap_dst = out >= from && out < from + len;
211
75.5k
    size_t tocopy;
212
213
    /* For all cases without overlap, memcpy is ideal */
214
75.5k
    if (!(olap_src || olap_dst)) {
215
59.9k
        memcpy(out, from, len);
216
59.9k
        return out + len;
217
59.9k
    }
218
219
    /* Complete overlap: Source == destination */
220
15.6k
    if (out == from) {
221
0
        return out + len;
222
0
    }
223
224
    /* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior,
225
     * we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the
226
     * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest
227
     * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look
228
     * behind or lookahead distance. */
229
15.6k
    size_t non_olap_size = (size_t)ABS(from - out);
230
231
    /* So this doesn't give use a worst case scenario of function calls in a loop,
232
     * we want to instead break this down into copy blocks of fixed lengths
233
     *
234
     * TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
235
     */
236
340k
    while (len) {
237
324k
        tocopy = MIN(non_olap_size, len);
238
324k
        len -= tocopy;
239
240
326k
        while (tocopy >= 16) {
241
1.73k
            memcpy(out, from, 16);
242
1.73k
            out += 16;
243
1.73k
            from += 16;
244
1.73k
            tocopy -= 16;
245
1.73k
        }
246
247
324k
        if (tocopy >= 8) {
248
2.43k
            memcpy(out, from, 8);
249
2.43k
            out += 8;
250
2.43k
            from += 8;
251
2.43k
            tocopy -= 8;
252
2.43k
        }
253
254
324k
        if (tocopy >= 4) {
255
7.93k
            memcpy(out, from, 4);
256
7.93k
            out += 4;
257
7.93k
            from += 4;
258
7.93k
            tocopy -= 4;
259
7.93k
        }
260
261
659k
        while (tocopy--) {
262
334k
            *out++ = *from++;
263
334k
        }
264
324k
    }
265
266
15.6k
    return out;
267
15.6k
}
Unexecuted instantiation: chunkset_sse2.c:chunkcopy_safe
Unexecuted instantiation: chunkset_ssse3.c:chunkcopy_safe
chunkset_avx2.c:chunkcopy_safe
Line
Count
Source
206
75.5k
static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) {
207
75.5k
    size_t safelen = safe - out;
208
75.5k
    len = MIN(len, safelen);
209
75.5k
    int32_t olap_src = from >= out && from < out + len;
210
75.5k
    int32_t olap_dst = out >= from && out < from + len;
211
75.5k
    size_t tocopy;
212
213
    /* For all cases without overlap, memcpy is ideal */
214
75.5k
    if (!(olap_src || olap_dst)) {
215
59.9k
        memcpy(out, from, len);
216
59.9k
        return out + len;
217
59.9k
    }
218
219
    /* Complete overlap: Source == destination */
220
15.6k
    if (out == from) {
221
0
        return out + len;
222
0
    }
223
224
    /* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior,
225
     * we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the
226
     * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest
227
     * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look
228
     * behind or lookahead distance. */
229
15.6k
    size_t non_olap_size = (size_t)ABS(from - out);
230
231
    /* So this doesn't give use a worst case scenario of function calls in a loop,
232
     * we want to instead break this down into copy blocks of fixed lengths
233
     *
234
     * TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
235
     */
236
340k
    while (len) {
237
324k
        tocopy = MIN(non_olap_size, len);
238
324k
        len -= tocopy;
239
240
326k
        while (tocopy >= 16) {
241
1.73k
            memcpy(out, from, 16);
242
1.73k
            out += 16;
243
1.73k
            from += 16;
244
1.73k
            tocopy -= 16;
245
1.73k
        }
246
247
324k
        if (tocopy >= 8) {
248
2.43k
            memcpy(out, from, 8);
249
2.43k
            out += 8;
250
2.43k
            from += 8;
251
2.43k
            tocopy -= 8;
252
2.43k
        }
253
254
324k
        if (tocopy >= 4) {
255
7.93k
            memcpy(out, from, 4);
256
7.93k
            out += 4;
257
7.93k
            from += 4;
258
7.93k
            tocopy -= 4;
259
7.93k
        }
260
261
659k
        while (tocopy--) {
262
334k
            *out++ = *from++;
263
334k
        }
264
324k
    }
265
266
15.6k
    return out;
267
15.6k
}
Unexecuted instantiation: chunkset_avx512.c:chunkcopy_safe
Unexecuted instantiation: inflate.c:chunkcopy_safe
Unexecuted instantiation: inftrees.c:chunkcopy_safe
268
269
#endif