/src/c-blosc2/internal-complibs/zlib-ng-2.0.7/chunkset_tpl.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* chunkset_tpl.h -- inline functions to copy small data chunks. |
2 | | * For conditions of distribution and use, see copyright notice in zlib.h |
3 | | */ |
4 | | |
5 | | /* Returns the chunk size */ |
6 | 8.49k | Z_INTERNAL uint32_t CHUNKSIZE(void) { |
7 | 8.49k | return sizeof(chunk_t); |
8 | 8.49k | } |
9 | | |
10 | | /* Behave like memcpy, but assume that it's OK to overwrite at least |
11 | | chunk_t bytes of output even if the length is shorter than this, |
12 | | that the length is non-zero, and that `from` lags `out` by at least |
13 | | sizeof chunk_t bytes (or that they don't overlap at all or simply that |
14 | | the distance is less than the length of the copy). |
15 | | |
16 | | Aside from better memory bus utilisation, this means that short copies |
17 | | (chunk_t bytes or fewer) will fall straight through the loop |
18 | | without iteration, which will hopefully make the branch prediction more |
19 | | reliable. */ |
20 | 227k | Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { |
21 | 227k | Assert(len > 0, "chunkcopy should never have a length 0"); |
22 | 227k | chunk_t chunk; |
23 | 227k | int32_t align = (--len % sizeof(chunk_t)) + 1; |
24 | 227k | loadchunk(from, &chunk); |
25 | 227k | storechunk(out, &chunk); |
26 | 227k | out += align; |
27 | 227k | from += align; |
28 | 227k | len /= sizeof(chunk_t); |
29 | 417k | while (len > 0) { |
30 | 189k | loadchunk(from, &chunk); |
31 | 189k | storechunk(out, &chunk); |
32 | 189k | out += sizeof(chunk_t); |
33 | 189k | from += sizeof(chunk_t); |
34 | 189k | --len; |
35 | 189k | } |
36 | 227k | return out; |
37 | 227k | } |
38 | | |
39 | | /* Behave like chunkcopy, but avoid writing beyond of legal output. */ |
40 | 465k | Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { |
41 | 465k | unsigned safelen = (unsigned)((safe - out) + 1); |
42 | 465k | len = MIN(len, safelen); |
43 | | #if CHUNK_SIZE >= 32 |
44 | | while (len >= 32) { |
45 | | memcpy(out, from, 32); |
46 | | out += 32; |
47 | | from += 32; |
48 | | len -= 32; |
49 | | } |
50 | | #endif |
51 | | #if CHUNK_SIZE >= 16 |
52 | | while (len >= 16) { |
53 | | memcpy(out, from, 16); |
54 | | out += 16; |
55 | | from += 16; |
56 | | len -= 16; |
57 | | } |
58 | | #endif |
59 | 465k | #if CHUNK_SIZE >= 8 |
60 | 465k | while (len >= 8) { |
61 | 0 | memcpy(out, from, 8); |
62 | 0 | out += 8; |
63 | 0 | from += 8; |
64 | 0 | len -= 8; |
65 | 0 | } |
66 | 465k | #endif |
67 | 465k | if (len >= 4) { |
68 | 20.1k | memcpy(out, from, 4); |
69 | 20.1k | out += 4; |
70 | 20.1k | from += 4; |
71 | 20.1k | len -= 4; |
72 | 20.1k | } |
73 | 465k | if (len >= 2) { |
74 | 240k | memcpy(out, from, 2); |
75 | 240k | out += 2; |
76 | 240k | from += 2; |
77 | 240k | len -= 2; |
78 | 240k | } |
79 | 465k | if (len == 1) { |
80 | 383k | *out++ = *from++; |
81 | 383k | } |
82 | 465k | return out; |
83 | 465k | } |
84 | | |
85 | | /* Perform short copies until distance can be rewritten as being at least |
86 | | sizeof chunk_t. |
87 | | |
88 | | This assumes that it's OK to overwrite at least the first |
89 | | 2*sizeof(chunk_t) bytes of output even if the copy is shorter than this. |
90 | | This assumption holds because inflate_fast() starts every iteration with at |
91 | | least 258 bytes of output space available (258 being the maximum length |
92 | | output from a single token; see inflate_fast()'s assumptions below). */ |
93 | 920 | Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { |
94 | 920 | unsigned char const *from = out - *dist; |
95 | 920 | chunk_t chunk; |
96 | 920 | while (*dist < *len && *dist < sizeof(chunk_t)) { |
97 | 0 | loadchunk(from, &chunk); |
98 | 0 | storechunk(out, &chunk); |
99 | 0 | out += *dist; |
100 | 0 | *len -= *dist; |
101 | 0 | *dist += *dist; |
102 | 0 | } |
103 | 920 | return out; |
104 | 920 | } |
105 | | |
106 | | /* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST. |
107 | | Return OUT + LEN. */ |
108 | 102k | Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { |
109 | | /* Debug performance related issues when len < sizeof(uint64_t): |
110 | | Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ |
111 | 102k | Assert(dist > 0, "chunkmemset cannot have a distance 0"); |
112 | | |
113 | 102k | unsigned char *from = out - dist; |
114 | 102k | chunk_t chunk; |
115 | 102k | unsigned sz = sizeof(chunk); |
116 | 102k | if (len < sz) { |
117 | 178k | while (len != 0) { |
118 | 146k | *out++ = *from++; |
119 | 146k | --len; |
120 | 146k | } |
121 | 32.0k | return out; |
122 | 32.0k | } |
123 | | |
124 | | #ifdef HAVE_CHUNKMEMSET_1 |
125 | | if (dist == 1) { |
126 | | chunkmemset_1(from, &chunk); |
127 | | } else |
128 | | #endif |
129 | | #ifdef HAVE_CHUNKMEMSET_2 |
130 | | if (dist == 2) { |
131 | | chunkmemset_2(from, &chunk); |
132 | | } else |
133 | | #endif |
134 | 70.8k | #ifdef HAVE_CHUNKMEMSET_4 |
135 | 70.8k | if (dist == 4) { |
136 | 8.68k | chunkmemset_4(from, &chunk); |
137 | 8.68k | } else |
138 | 62.1k | #endif |
139 | 62.1k | #ifdef HAVE_CHUNKMEMSET_8 |
140 | 62.1k | if (dist == 8) { |
141 | 180 | chunkmemset_8(from, &chunk); |
142 | 180 | } else |
143 | 61.9k | #endif |
144 | 61.9k | if (dist == sz) { |
145 | 0 | loadchunk(from, &chunk); |
146 | 61.9k | } else if (dist < sz) { |
147 | 61.0k | unsigned char *end = out + len - 1; |
148 | 465k | while (len > dist) { |
149 | 404k | out = CHUNKCOPY_SAFE(out, from, dist, end); |
150 | 404k | len -= dist; |
151 | 404k | } |
152 | 61.0k | if (len > 0) { |
153 | 61.0k | out = CHUNKCOPY_SAFE(out, from, len, end); |
154 | 61.0k | } |
155 | 61.0k | return out; |
156 | 61.0k | } else { |
157 | 920 | out = CHUNKUNROLL(out, &dist, &len); |
158 | 920 | return CHUNKCOPY(out, out - dist, len); |
159 | 920 | } |
160 | | |
161 | 8.86k | unsigned rem = len % sz; |
162 | 8.86k | len -= rem; |
163 | 30.5k | while (len) { |
164 | 21.7k | storechunk(out, &chunk); |
165 | 21.7k | out += sz; |
166 | 21.7k | len -= sz; |
167 | 21.7k | } |
168 | | |
169 | | /* Last, deal with the case when LEN is not a multiple of SZ. */ |
170 | 8.86k | if (rem) { |
171 | 5.84k | memcpy(out, from, rem); |
172 | 5.84k | out += rem; |
173 | 5.84k | } |
174 | | |
175 | 8.86k | return out; |
176 | 70.8k | } |
177 | | |
178 | 20.9k | Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) { |
179 | 20.9k | #if !defined(UNALIGNED64_OK) |
180 | 20.9k | # if !defined(UNALIGNED_OK) |
181 | 20.9k | static const uint32_t align_mask = 7; |
182 | | # else |
183 | | static const uint32_t align_mask = 3; |
184 | | # endif |
185 | 20.9k | #endif |
186 | | |
187 | 20.9k | len = MIN(len, left); |
188 | 20.9k | uint8_t *from = out - dist; |
189 | 20.9k | #if !defined(UNALIGNED64_OK) |
190 | 82.3k | while (((uintptr_t)out & align_mask) && (len > 0)) { |
191 | 61.4k | *out++ = *from++; |
192 | 61.4k | --len; |
193 | 61.4k | --left; |
194 | 61.4k | } |
195 | 20.9k | #endif |
196 | 20.9k | if (left < (unsigned)(3 * sizeof(chunk_t))) { |
197 | 7.87k | while (len > 0) { |
198 | 5.74k | *out++ = *from++; |
199 | 5.74k | --len; |
200 | 5.74k | } |
201 | 2.12k | return out; |
202 | 2.12k | } |
203 | 18.7k | if (len) |
204 | 11.9k | return CHUNKMEMSET(out, dist, len); |
205 | | |
206 | 6.84k | return out; |
207 | 18.7k | } |