Line | Count | Source (jump to first uncovered line) |
1 | | /* inflate_p.h -- Private inline functions and macros shared with more than one deflate method |
2 | | * |
3 | | */ |
4 | | |
5 | | #ifndef INFLATE_P_H |
6 | | #define INFLATE_P_H |
7 | | |
8 | | #include <stdlib.h> |
9 | | #include "zmemory.h" |
10 | | |
11 | | /* Architecture-specific hooks. */ |
12 | | #ifdef S390_DFLTCC_INFLATE |
13 | | # include "arch/s390/dfltcc_inflate.h" |
14 | | /* DFLTCC instructions require window to be page-aligned */ |
15 | | # define PAD_WINDOW PAD_4096 |
16 | | # define WINDOW_PAD_SIZE 4096 |
17 | | # define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096 |
18 | | #else |
19 | 22.9k | # define PAD_WINDOW PAD_64 |
20 | | # define WINDOW_PAD_SIZE 64 |
21 | 45.9k | # define HINT_ALIGNED_WINDOW HINT_ALIGNED_64 |
22 | | /* Adjust the window size for the arch-specific inflate code. */ |
23 | 45.9k | # define INFLATE_ADJUST_WINDOW_SIZE(n) (n) |
24 | | /* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ |
25 | 22.9k | # define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) |
26 | | /* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ |
27 | 0 | # define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) |
28 | | /* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ |
29 | 7.18M | # define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) |
30 | | /* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ |
31 | 43.2k | # define INFLATE_NEED_CHECKSUM(strm) 1 |
32 | | /* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ |
33 | 48.2k | # define INFLATE_NEED_UPDATEWINDOW(strm) 1 |
34 | | /* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ |
35 | 0 | # define INFLATE_MARK_HOOK(strm) do {} while (0) |
36 | | /* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ |
37 | 0 | # define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) |
38 | | /* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */ |
39 | 0 | # define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) |
40 | | /* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */ |
41 | 0 | # define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) |
42 | | #endif |
43 | | |
44 | | /* |
45 | | * Macros shared by inflate() and inflateBack() |
46 | | */ |
47 | | |
48 | | /* check function to use adler32() for zlib or crc32() for gzip */ |
49 | | #ifdef GUNZIP |
50 | | # define UPDATE(check, buf, len) \ |
51 | | (state->flags ? PREFIX(crc32)(check, buf, len) : FUNCTABLE_CALL(adler32)(check, buf, len)) |
52 | | #else |
53 | | # define UPDATE(check, buf, len) FUNCTABLE_CALL(adler32)(check, buf, len) |
54 | | #endif |
55 | | |
56 | | /* check macros for header crc */ |
57 | | #ifdef GUNZIP |
58 | | # define CRC2(check, word) \ |
59 | 0 | do { \ |
60 | 0 | hbuf[0] = (unsigned char)(word); \ |
61 | 0 | hbuf[1] = (unsigned char)((word) >> 8); \ |
62 | 0 | check = PREFIX(crc32)(check, hbuf, 2); \ |
63 | 0 | } while (0) |
64 | | |
65 | | # define CRC4(check, word) \ |
66 | 0 | do { \ |
67 | 0 | hbuf[0] = (unsigned char)(word); \ |
68 | 0 | hbuf[1] = (unsigned char)((word) >> 8); \ |
69 | 0 | hbuf[2] = (unsigned char)((word) >> 16); \ |
70 | 0 | hbuf[3] = (unsigned char)((word) >> 24); \ |
71 | 0 | check = PREFIX(crc32)(check, hbuf, 4); \ |
72 | 0 | } while (0) |
73 | | #endif |
74 | | |
75 | | /* Load registers with state in inflate() for speed */ |
76 | | #define LOAD() \ |
77 | 7.17M | do { \ |
78 | 7.17M | put = strm->next_out; \ |
79 | 7.17M | left = strm->avail_out; \ |
80 | 7.17M | next = strm->next_in; \ |
81 | 7.17M | have = strm->avail_in; \ |
82 | 7.17M | hold = state->hold; \ |
83 | 7.17M | bits = state->bits; \ |
84 | 7.17M | } while (0) |
85 | | |
86 | | /* Restore state from registers in inflate() */ |
87 | | #define RESTORE() \ |
88 | 7.17M | do { \ |
89 | 7.17M | strm->next_out = put; \ |
90 | 7.17M | strm->avail_out = left; \ |
91 | 7.17M | strm->next_in = (z_const unsigned char *)next; \ |
92 | 7.17M | strm->avail_in = have; \ |
93 | 7.17M | state->hold = hold; \ |
94 | 7.17M | state->bits = bits; \ |
95 | 7.17M | } while (0) |
96 | | |
97 | | /* Clear the input bit accumulator */ |
98 | | #define INITBITS() \ |
99 | 48.1k | do { \ |
100 | 48.1k | hold = 0; \ |
101 | 48.1k | bits = 0; \ |
102 | 48.1k | } while (0) |
103 | | |
104 | | /* Ensure that there is at least n bits in the bit accumulator. If there is |
105 | | not enough available input to do that, then return from inflate()/inflateBack(). */ |
106 | | #define NEEDBITS(n) \ |
107 | 8.27M | do { \ |
108 | 12.4M | while (bits < (unsigned)(n)) \ |
109 | 8.27M | PULLBYTE(); \ |
110 | 8.27M | } while (0) |
111 | | |
112 | | /* Return the low n bits of the bit accumulator (n < 16) */ |
113 | | #define BITS(n) \ |
114 | 65.9M | (hold & ((1U << (unsigned)(n)) - 1)) |
115 | | |
116 | | /* Remove n bits from the bit accumulator */ |
117 | | #define DROPBITS(n) \ |
118 | 420M | do { \ |
119 | 420M | hold >>= (n); \ |
120 | 420M | bits -= (unsigned)(n); \ |
121 | 420M | } while (0) |
122 | | |
123 | | /* Remove zero to seven bits as needed to go to a byte boundary */ |
124 | | #define BYTEBITS() \ |
125 | 26.5k | do { \ |
126 | 26.5k | hold >>= bits & 7; \ |
127 | 26.5k | bits -= bits & 7; \ |
128 | 26.5k | } while (0) |
129 | | |
130 | | /* Set mode=BAD and prepare error message */ |
131 | | #define SET_BAD(errmsg) \ |
132 | 3.48k | do { \ |
133 | 3.48k | state->mode = BAD; \ |
134 | 3.48k | strm->msg = (char *)errmsg; \ |
135 | 3.48k | } while (0) |
136 | | |
137 | 30.5M | #define INFLATE_FAST_MIN_HAVE 15 |
138 | 22.2M | #define INFLATE_FAST_MIN_LEFT 260 |
139 | | |
140 | | /* Load 64 bits from IN and place the bytes at offset BITS in the result. */ |
141 | 128M | static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { |
142 | 128M | uint64_t chunk = zng_memread_8(in); |
143 | | |
144 | 128M | #if BYTE_ORDER == LITTLE_ENDIAN |
145 | 128M | return chunk << bits; |
146 | | #else |
147 | | return ZSWAP64(chunk) << bits; |
148 | | #endif |
149 | 128M | } Unexecuted instantiation: chunkset_sse2.c:load_64_bits Unexecuted instantiation: chunkset_ssse3.c:load_64_bits chunkset_avx2.c:load_64_bits Line | Count | Source | 141 | 128M | static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { | 142 | 128M | uint64_t chunk = zng_memread_8(in); | 143 | | | 144 | 128M | #if BYTE_ORDER == LITTLE_ENDIAN | 145 | 128M | return chunk << bits; | 146 | | #else | 147 | | return ZSWAP64(chunk) << bits; | 148 | | #endif | 149 | 128M | } |
Unexecuted instantiation: chunkset_avx512.c:load_64_bits Unexecuted instantiation: inflate.c:load_64_bits |
150 | | |
151 | | /* Behave like chunkcopy, but avoid writing beyond of legal output. */ |
152 | 0 | static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, uint8_t *safe) { |
153 | 0 | uint64_t safelen = safe - out; |
154 | 0 | len = MIN(len, safelen); |
155 | 0 | int32_t olap_src = from >= out && from < out + len; |
156 | 0 | int32_t olap_dst = out >= from && out < from + len; |
157 | 0 | uint64_t tocopy; |
158 | | |
159 | | /* For all cases without overlap, memcpy is ideal */ |
160 | 0 | if (!(olap_src || olap_dst)) { |
161 | 0 | memcpy(out, from, (size_t)len); |
162 | 0 | return out + len; |
163 | 0 | } |
164 | | |
165 | | /* Complete overlap: Source == destination */ |
166 | 0 | if (out == from) { |
167 | 0 | return out + len; |
168 | 0 | } |
169 | | |
170 | | /* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior, |
171 | | * we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the |
172 | | * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest |
173 | | * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look |
174 | | * behind or lookahead distance. */ |
175 | 0 | uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows |
176 | | |
177 | | /* So this doesn't give use a worst case scenario of function calls in a loop, |
178 | | * we want to instead break this down into copy blocks of fixed lengths |
179 | | * |
180 | | * TODO: The memcpy calls aren't inlined on architectures with strict memory alignment |
181 | | */ |
182 | 0 | while (len) { |
183 | 0 | tocopy = MIN(non_olap_size, len); |
184 | 0 | len -= tocopy; |
185 | |
|
186 | 0 | while (tocopy >= 16) { |
187 | 0 | memcpy(out, from, 16); |
188 | 0 | out += 16; |
189 | 0 | from += 16; |
190 | 0 | tocopy -= 16; |
191 | 0 | } |
192 | |
|
193 | 0 | if (tocopy >= 8) { |
194 | 0 | memcpy(out, from, 8); |
195 | 0 | out += 8; |
196 | 0 | from += 8; |
197 | 0 | tocopy -= 8; |
198 | 0 | } |
199 | |
|
200 | 0 | if (tocopy >= 4) { |
201 | 0 | memcpy(out, from, 4); |
202 | 0 | out += 4; |
203 | 0 | from += 4; |
204 | 0 | tocopy -= 4; |
205 | 0 | } |
206 | |
|
207 | 0 | while (tocopy--) { |
208 | 0 | *out++ = *from++; |
209 | 0 | } |
210 | 0 | } |
211 | |
|
212 | 0 | return out; |
213 | 0 | } Unexecuted instantiation: chunkset_sse2.c:chunkcopy_safe Unexecuted instantiation: chunkset_ssse3.c:chunkcopy_safe Unexecuted instantiation: chunkset_avx2.c:chunkcopy_safe Unexecuted instantiation: chunkset_avx512.c:chunkcopy_safe Unexecuted instantiation: inflate.c:chunkcopy_safe |
214 | | |
215 | | #endif |