/src/libdeflate/lib/x86/adler32_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm |
3 | | * |
4 | | * Copyright 2016 Eric Biggers |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person |
7 | | * obtaining a copy of this software and associated documentation |
8 | | * files (the "Software"), to deal in the Software without |
9 | | * restriction, including without limitation the rights to use, |
10 | | * copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | * copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following |
13 | | * conditions: |
14 | | * |
15 | | * The above copyright notice and this permission notice shall be |
16 | | * included in all copies or substantial portions of the Software. |
17 | | * |
18 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
22 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
23 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
24 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
25 | | * OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | |
28 | | #ifndef LIB_X86_ADLER32_IMPL_H |
29 | | #define LIB_X86_ADLER32_IMPL_H |
30 | | |
31 | | #include "cpu_features.h" |
32 | | |
33 | | /* SSE2 and AVX2 implementations. Used on older CPUs. */ |
34 | | #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) |
35 | 0 | # define adler32_x86_sse2 adler32_x86_sse2 |
36 | | # define SUFFIX _sse2 |
37 | | # define ATTRIBUTES _target_attribute("sse2") |
38 | 0 | # define VL 16 |
39 | | # define USE_VNNI 0 |
40 | | # define USE_AVX512 0 |
41 | | # include "adler32_template.h" |
42 | | |
43 | 6 | # define adler32_x86_avx2 adler32_x86_avx2 |
44 | | # define SUFFIX _avx2 |
45 | | # define ATTRIBUTES _target_attribute("avx2") |
46 | 870k | # define VL 32 |
47 | | # define USE_VNNI 0 |
48 | | # define USE_AVX512 0 |
49 | | # include "adler32_template.h" |
50 | | #endif |
51 | | |
52 | | /* |
53 | | * AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI |
54 | | * but don't have AVX-512, for example Intel Alder Lake. |
55 | | * |
56 | | * Unusually for a new CPU feature, gcc added support for the AVX-VNNI |
57 | | * intrinsics (in gcc 11.1) slightly before binutils added support for |
58 | | * assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably |
59 | | * have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12 |
60 | | * instead of gcc 11. (libdeflate supports direct compilation without a |
61 | | * configure step, so checking the binutils version is not always an option.) |
62 | | */ |
63 | | #if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \ |
64 | | !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI) |
65 | 0 | # define adler32_x86_avx2_vnni adler32_x86_avx2_vnni |
66 | | # define SUFFIX _avx2_vnni |
67 | | # define ATTRIBUTES _target_attribute("avx2,avxvnni") |
68 | 0 | # define VL 32 |
69 | | # define USE_VNNI 1 |
70 | | # define USE_AVX512 0 |
71 | | # include "adler32_template.h" |
72 | | #endif |
73 | | |
74 | | #if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \ |
75 | | !defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI) |
76 | | /* |
77 | | * AVX512VNNI implementation using 256-bit vectors. This is very similar to the |
78 | | * AVX-VNNI implementation but takes advantage of masking and more registers. |
79 | | * This is used on certain older Intel CPUs, specifically Ice Lake and Tiger |
80 | | * Lake, which support AVX512VNNI but downclock a bit too eagerly when ZMM |
81 | | * registers are used. |
82 | | */ |
83 | 0 | # define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni |
84 | | # define SUFFIX _avx512_vl256_vnni |
85 | | # define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni") |
86 | 0 | # define VL 32 |
87 | | # define USE_VNNI 1 |
88 | | # define USE_AVX512 1 |
89 | | # include "adler32_template.h" |
90 | | |
91 | | /* |
92 | | * AVX512VNNI implementation using 512-bit vectors. This is used on CPUs that |
93 | | * have a good AVX-512 implementation including AVX512VNNI. |
94 | | */ |
95 | 0 | # define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni |
96 | | # define SUFFIX _avx512_vl512_vnni |
97 | | # define ATTRIBUTES _target_attribute("avx512bw,avx512vnni") |
98 | 0 | # define VL 64 |
99 | | # define USE_VNNI 1 |
100 | | # define USE_AVX512 1 |
101 | | # include "adler32_template.h" |
102 | | #endif |
103 | | |
104 | | static inline adler32_func_t |
105 | | arch_select_adler32_func(void) |
106 | 6 | { |
107 | 6 | const u32 features MAYBE_UNUSED = get_x86_cpu_features(); |
108 | | |
109 | 6 | #ifdef adler32_x86_avx512_vl512_vnni |
110 | 6 | if ((features & X86_CPU_FEATURE_ZMM) && |
111 | 6 | HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features)) |
112 | 0 | return adler32_x86_avx512_vl512_vnni; |
113 | 6 | #endif |
114 | 6 | #ifdef adler32_x86_avx512_vl256_vnni |
115 | 6 | if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) && |
116 | 6 | HAVE_AVX512VNNI(features)) |
117 | 0 | return adler32_x86_avx512_vl256_vnni; |
118 | 6 | #endif |
119 | 6 | #ifdef adler32_x86_avx2_vnni |
120 | 6 | if (HAVE_AVX2(features) && HAVE_AVXVNNI(features)) |
121 | 0 | return adler32_x86_avx2_vnni; |
122 | 6 | #endif |
123 | 6 | #ifdef adler32_x86_avx2 |
124 | 6 | if (HAVE_AVX2(features)) |
125 | 6 | return adler32_x86_avx2; |
126 | 0 | #endif |
127 | 0 | #ifdef adler32_x86_sse2 |
128 | 0 | if (HAVE_SSE2(features)) |
129 | 0 | return adler32_x86_sse2; |
130 | 0 | #endif |
131 | 0 | return NULL; |
132 | 0 | } |
133 | 6 | #define arch_select_adler32_func arch_select_adler32_func |
134 | | |
135 | | #endif /* LIB_X86_ADLER32_IMPL_H */ |