Coverage Report

Created: 2024-07-27 06:20

/src/c-blosc2/internal-complibs/zlib-ng-2.0.7/functable.c
Line
Count
Source (jump to first uncovered line)
1
/* functable.c -- Choose relevant optimized functions at runtime
2
 * Copyright (C) 2017 Hans Kristian Rosbach
3
 * For conditions of distribution and use, see copyright notice in zlib.h
4
 */
5
6
#include "zbuild.h"
7
#include "zendian.h"
8
#include "deflate.h"
9
#include "deflate_p.h"
10
11
#include "functable.h"
12
13
#ifdef X86_FEATURES
14
#  include "fallback_builtins.h"
15
#endif
16
17
/* insert_string */
18
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
19
#ifdef X86_SSE42_CRC_HASH
20
extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
21
#elif defined(ARM_ACLE_CRC_HASH)
22
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
23
#endif
24
25
/* quick_insert_string */
26
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
27
#ifdef X86_SSE42_CRC_HASH
28
extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
29
#elif defined(ARM_ACLE_CRC_HASH)
30
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
31
#endif
32
33
/* slide_hash */
34
#ifdef X86_SSE2
35
void slide_hash_sse2(deflate_state *s);
36
#elif defined(ARM_NEON_SLIDEHASH)
37
void slide_hash_neon(deflate_state *s);
38
#elif defined(POWER8_VSX_SLIDEHASH)
39
void slide_hash_power8(deflate_state *s);
40
#endif
41
#ifdef X86_AVX2
42
void slide_hash_avx2(deflate_state *s);
43
#endif
44
45
/* adler32 */
46
extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
47
#ifdef ARM_NEON_ADLER32
48
extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
49
#endif
50
#ifdef X86_SSSE3_ADLER32
51
extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
52
#endif
53
#ifdef X86_AVX2_ADLER32
54
extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
55
#endif
56
#ifdef POWER8_VSX_ADLER32
57
extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
58
#endif
59
60
/* memory chunking */
61
extern uint32_t chunksize_c(void);
62
extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len);
63
extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
64
extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
65
extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
66
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
67
#ifdef X86_SSE2_CHUNKSET
68
extern uint32_t chunksize_sse2(void);
69
extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
70
extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
71
extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
72
extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
73
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
74
#endif
75
#ifdef X86_AVX_CHUNKSET
76
extern uint32_t chunksize_avx(void);
77
extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
78
extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
79
extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
80
extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
81
extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
82
#endif
83
#ifdef ARM_NEON_CHUNKSET
84
extern uint32_t chunksize_neon(void);
85
extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
86
extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
87
extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len);
88
extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len);
89
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
90
#endif
91
92
/* CRC32 */
93
Z_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
94
95
#ifdef ARM_ACLE_CRC_HASH
96
extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t);
97
#endif
98
99
#if BYTE_ORDER == LITTLE_ENDIAN
100
extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t);
101
#elif BYTE_ORDER == BIG_ENDIAN
102
extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
103
#endif
104
105
/* compare258 */
106
extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
107
#ifdef UNALIGNED_OK
108
extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
109
extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
110
#ifdef UNALIGNED64_OK
111
extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
112
#endif
113
#ifdef X86_SSE42_CMP_STR
114
extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
115
#endif
116
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
117
extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
118
#endif
119
#endif
120
121
/* longest_match */
122
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
123
#ifdef UNALIGNED_OK
124
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
125
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
126
#ifdef UNALIGNED64_OK
127
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
128
#endif
129
#ifdef X86_SSE42_CMP_STR
130
extern uint32_t longest_match_unaligned_sse4(deflate_state *const s, Pos cur_match);
131
#endif
132
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
133
extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match);
134
#endif
135
#endif
136
137
Z_INTERNAL Z_TLS struct functable_s functable;
138
139
Z_INTERNAL void cpu_check_features(void)
140
5
{
141
5
    static int features_checked = 0;
142
5
    if (features_checked)
143
3
        return;
144
#if defined(X86_FEATURES)
145
    x86_check_features();
146
#elif defined(ARM_FEATURES)
147
    arm_check_features();
148
#elif defined(POWER_FEATURES)
149
    power_check_features();
150
#endif
151
2
    features_checked = 1;
152
2
}
153
154
/* stub functions */
155
1
Z_INTERNAL void insert_string_stub(deflate_state *const s, const uint32_t str, uint32_t count) {
156
    // Initialize default
157
158
1
    functable.insert_string = &insert_string_c;
159
1
    cpu_check_features();
160
161
#ifdef X86_SSE42_CRC_HASH
162
    if (x86_cpu_has_sse42)
163
        functable.insert_string = &insert_string_sse4;
164
#elif defined(ARM_ACLE_CRC_HASH)
165
    if (arm_cpu_has_crc32)
166
        functable.insert_string = &insert_string_acle;
167
#endif
168
169
1
    functable.insert_string(s, str, count);
170
1
}
171
172
1
Z_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const uint32_t str) {
173
1
    functable.quick_insert_string = &quick_insert_string_c;
174
175
#ifdef X86_SSE42_CRC_HASH
176
    if (x86_cpu_has_sse42)
177
        functable.quick_insert_string = &quick_insert_string_sse4;
178
#elif defined(ARM_ACLE_CRC_HASH)
179
    if (arm_cpu_has_crc32)
180
        functable.quick_insert_string = &quick_insert_string_acle;
181
#endif
182
183
1
    return functable.quick_insert_string(s, str);
184
1
}
185
186
0
Z_INTERNAL void slide_hash_stub(deflate_state *s) {
187
188
0
    functable.slide_hash = &slide_hash_c;
189
0
    cpu_check_features();
190
191
#ifdef X86_SSE2
192
#  if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
193
    if (x86_cpu_has_sse2)
194
#  endif
195
        functable.slide_hash = &slide_hash_sse2;
196
#elif defined(ARM_NEON_SLIDEHASH)
197
#  ifndef ARM_NOCHECK_NEON
198
    if (arm_cpu_has_neon)
199
#  endif
200
        functable.slide_hash = &slide_hash_neon;
201
#endif
202
#ifdef X86_AVX2
203
    if (x86_cpu_has_avx2)
204
        functable.slide_hash = &slide_hash_avx2;
205
#endif
206
#ifdef POWER8_VSX_SLIDEHASH
207
    if (power_cpu_has_arch_2_07)
208
        functable.slide_hash = &slide_hash_power8;
209
#endif
210
211
0
    functable.slide_hash(s);
212
0
}
213
214
2
Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
215
    // Initialize default
216
2
    functable.adler32 = &adler32_c;
217
2
    cpu_check_features();
218
219
#ifdef ARM_NEON_ADLER32
220
#  ifndef ARM_NOCHECK_NEON
221
    if (arm_cpu_has_neon)
222
#  endif
223
        functable.adler32 = &adler32_neon;
224
#endif
225
#ifdef X86_SSSE3_ADLER32
226
    if (x86_cpu_has_ssse3)
227
        functable.adler32 = &adler32_ssse3;
228
#endif
229
#ifdef X86_AVX2_ADLER32
230
    if (x86_cpu_has_avx2)
231
        functable.adler32 = &adler32_avx2;
232
#endif
233
#ifdef POWER8_VSX_ADLER32
234
    if (power_cpu_has_arch_2_07)
235
        functable.adler32 = &adler32_power8;
236
#endif
237
238
2
    return functable.adler32(adler, buf, len);
239
2
}
240
241
2
Z_INTERNAL uint32_t chunksize_stub(void) {
242
    // Initialize default
243
2
    functable.chunksize = &chunksize_c;
244
2
    cpu_check_features();
245
246
#ifdef X86_SSE2_CHUNKSET
247
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
248
    if (x86_cpu_has_sse2)
249
# endif
250
        functable.chunksize = &chunksize_sse2;
251
#endif
252
#ifdef X86_AVX_CHUNKSET
253
    if (x86_cpu_has_avx2)
254
        functable.chunksize = &chunksize_avx;
255
#endif
256
#ifdef ARM_NEON_CHUNKSET
257
    if (arm_cpu_has_neon)
258
        functable.chunksize = &chunksize_neon;
259
#endif
260
261
2
    return functable.chunksize();
262
2
}
263
264
2
Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned len) {
265
    // Initialize default
266
2
    functable.chunkcopy = &chunkcopy_c;
267
268
#ifdef X86_SSE2_CHUNKSET
269
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
270
    if (x86_cpu_has_sse2)
271
# endif
272
        functable.chunkcopy = &chunkcopy_sse2;
273
#endif
274
#ifdef X86_AVX_CHUNKSET
275
    if (x86_cpu_has_avx2)
276
        functable.chunkcopy = &chunkcopy_avx;
277
#endif
278
#ifdef ARM_NEON_CHUNKSET
279
    if (arm_cpu_has_neon)
280
        functable.chunkcopy = &chunkcopy_neon;
281
#endif
282
283
2
    return functable.chunkcopy(out, from, len);
284
2
}
285
286
0
Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
287
    // Initialize default
288
0
    functable.chunkcopy_safe = &chunkcopy_safe_c;
289
290
#ifdef X86_SSE2_CHUNKSET
291
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
292
    if (x86_cpu_has_sse2)
293
# endif
294
        functable.chunkcopy_safe = &chunkcopy_safe_sse2;
295
#endif
296
#ifdef X86_AVX_CHUNKSET
297
    if (x86_cpu_has_avx2)
298
        functable.chunkcopy_safe = &chunkcopy_safe_avx;
299
#endif
300
#ifdef ARM_NEON_CHUNKSET
301
    if (arm_cpu_has_neon)
302
        functable.chunkcopy_safe = &chunkcopy_safe_neon;
303
#endif
304
305
0
    return functable.chunkcopy_safe(out, from, len, safe);
306
0
}
307
308
0
Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len) {
309
    // Initialize default
310
0
    functable.chunkunroll = &chunkunroll_c;
311
312
#ifdef X86_SSE2_CHUNKSET
313
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
314
    if (x86_cpu_has_sse2)
315
# endif
316
        functable.chunkunroll = &chunkunroll_sse2;
317
#endif
318
#ifdef X86_AVX_CHUNKSET
319
    if (x86_cpu_has_avx2)
320
        functable.chunkunroll = &chunkunroll_avx;
321
#endif
322
#ifdef ARM_NEON_CHUNKSET
323
    if (arm_cpu_has_neon)
324
        functable.chunkunroll = &chunkunroll_neon;
325
#endif
326
327
0
    return functable.chunkunroll(out, dist, len);
328
0
}
329
330
2
Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) {
331
    // Initialize default
332
2
    functable.chunkmemset = &chunkmemset_c;
333
334
#ifdef X86_SSE2_CHUNKSET
335
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
336
    if (x86_cpu_has_sse2)
337
# endif
338
        functable.chunkmemset = &chunkmemset_sse2;
339
#endif
340
#ifdef X86_AVX_CHUNKSET
341
    if (x86_cpu_has_avx2)
342
        functable.chunkmemset = &chunkmemset_avx;
343
#endif
344
#ifdef ARM_NEON_CHUNKSET
345
    if (arm_cpu_has_neon)
346
        functable.chunkmemset = &chunkmemset_neon;
347
#endif
348
349
2
    return functable.chunkmemset(out, dist, len);
350
2
}
351
352
2
Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
353
    // Initialize default
354
2
    functable.chunkmemset_safe = &chunkmemset_safe_c;
355
356
#ifdef X86_SSE2_CHUNKSET
357
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
358
    if (x86_cpu_has_sse2)
359
# endif
360
        functable.chunkmemset_safe = &chunkmemset_safe_sse2;
361
#endif
362
#ifdef X86_AVX_CHUNKSET
363
    if (x86_cpu_has_avx2)
364
        functable.chunkmemset_safe = &chunkmemset_safe_avx;
365
#endif
366
#ifdef ARM_NEON_CHUNKSET
367
    if (arm_cpu_has_neon)
368
        functable.chunkmemset_safe = &chunkmemset_safe_neon;
369
#endif
370
371
2
    return functable.chunkmemset_safe(out, dist, len, left);
372
2
}
373
374
0
Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
375
0
    int32_t use_byfour = sizeof(void *) == sizeof(ptrdiff_t);
376
377
0
    Assert(sizeof(uint64_t) >= sizeof(size_t),
378
0
           "crc32_z takes size_t but internally we have a uint64_t len");
379
    /* return a function pointer for optimized arches here after a capability test */
380
381
0
    functable.crc32 = &crc32_generic;
382
0
    cpu_check_features();
383
384
0
    if (use_byfour) {
385
0
#if BYTE_ORDER == LITTLE_ENDIAN
386
0
        functable.crc32 = crc32_little;
387
#  if defined(ARM_ACLE_CRC_HASH)
388
        if (arm_cpu_has_crc32)
389
            functable.crc32 = crc32_acle;
390
#  endif
391
#elif BYTE_ORDER == BIG_ENDIAN
392
        functable.crc32 = crc32_big;
393
#else
394
#  error No endian defined
395
#endif
396
0
    }
397
398
0
    return functable.crc32(crc, buf, len);
399
0
}
400
401
1
Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
402
403
1
    functable.compare258 = &compare258_c;
404
405
#ifdef UNALIGNED_OK
406
#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
407
    functable.compare258 = &compare258_unaligned_64;
408
#  elif defined(HAVE_BUILTIN_CTZ)
409
    functable.compare258 = &compare258_unaligned_32;
410
#  else
411
    functable.compare258 = &compare258_unaligned_16;
412
#  endif
413
#  ifdef X86_SSE42_CMP_STR
414
    if (x86_cpu_has_sse42)
415
        functable.compare258 = &compare258_unaligned_sse4;
416
#  endif
417
#  if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
418
    if (x86_cpu_has_avx2)
419
        functable.compare258 = &compare258_unaligned_avx2;
420
#  endif
421
#endif
422
423
1
    return functable.compare258(src0, src1);
424
1
}
425
426
1
Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
427
428
1
    functable.longest_match = &longest_match_c;
429
430
#ifdef UNALIGNED_OK
431
#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
432
    functable.longest_match = &longest_match_unaligned_64;
433
#  elif defined(HAVE_BUILTIN_CTZ)
434
    functable.longest_match = &longest_match_unaligned_32;
435
#  else
436
    functable.longest_match = &longest_match_unaligned_16;
437
#  endif
438
#  ifdef X86_SSE42_CMP_STR
439
    if (x86_cpu_has_sse42)
440
        functable.longest_match = &longest_match_unaligned_sse4;
441
#  endif
442
#  if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
443
    if (x86_cpu_has_avx2)
444
        functable.longest_match = &longest_match_unaligned_avx2;
445
#  endif
446
#endif
447
448
1
    return functable.longest_match(s, cur_match);
449
1
}
450
451
/* functable init */
452
Z_INTERNAL Z_TLS struct functable_s functable = {
453
    insert_string_stub,
454
    quick_insert_string_stub,
455
    adler32_stub,
456
    crc32_stub,
457
    slide_hash_stub,
458
    compare258_stub,
459
    longest_match_stub,
460
    chunksize_stub,
461
    chunkcopy_stub,
462
    chunkcopy_safe_stub,
463
    chunkunroll_stub,
464
    chunkmemset_stub,
465
    chunkmemset_safe_stub
466
};