/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Runtime CPU detection for x86 |
3 | | * (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/cpuid.h> |
9 | | |
10 | | #include <botan/mem_ops.h> |
11 | | #include <botan/internal/loadstor.h> |
12 | | #include <botan/internal/target_info.h> |
13 | | |
14 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
15 | | #include <immintrin.h> |
16 | | #endif |
17 | | |
18 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
19 | | #include <intrin.h> |
20 | | #endif |
21 | | |
22 | | namespace Botan { |
23 | | |
24 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
25 | | |
26 | | namespace { |
27 | | |
28 | 26 | void invoke_cpuid(uint32_t type, uint32_t out[4]) { |
29 | 26 | clear_mem(out, 4); |
30 | | |
31 | 26 | #if defined(BOTAN_USE_GCC_INLINE_ASM) |
32 | 26 | asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type)); |
33 | | |
34 | | #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
35 | | __cpuid((int*)out, type); |
36 | | |
37 | | #else |
38 | | BOTAN_UNUSED(type); |
39 | | #warning "No way of calling x86 cpuid instruction for this compiler" |
40 | | #endif |
41 | 26 | } |
42 | | |
43 | 26 | void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) { |
44 | 26 | clear_mem(out, 4); |
45 | | |
46 | 26 | #if defined(BOTAN_USE_GCC_INLINE_ASM) |
47 | 26 | asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level)); |
48 | | |
49 | | #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
50 | | __cpuidex((int*)out, type, level); |
51 | | |
52 | | #else |
53 | | BOTAN_UNUSED(type, level); |
54 | | #warning "No way of calling x86 cpuid instruction for this compiler" |
55 | | #endif |
56 | 26 | } |
57 | | |
58 | 13 | BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() { |
59 | 13 | return _xgetbv(0); |
60 | 13 | } |
61 | | |
62 | | } // namespace |
63 | | |
64 | 13 | uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) { |
65 | 13 | enum class x86_CPUID_1_bits : uint64_t { |
66 | 13 | RDTSC = (1ULL << 4), |
67 | 13 | SSE2 = (1ULL << 26), |
68 | 13 | CLMUL = (1ULL << 33), |
69 | 13 | SSSE3 = (1ULL << 41), |
70 | 13 | SSE41 = (1ULL << 51), |
71 | 13 | AESNI = (1ULL << 57), |
72 | | // AVX + OSXSAVE |
73 | 13 | OSXSAVE = (1ULL << 59) | (1ULL << 60), |
74 | 13 | RDRAND = (1ULL << 62) |
75 | 13 | }; |
76 | | |
77 | 13 | enum class x86_CPUID_7_bits : uint64_t { |
78 | 13 | BMI1 = (1ULL << 3), |
79 | 13 | AVX2 = (1ULL << 5), |
80 | 13 | BMI2 = (1ULL << 8), |
81 | 13 | BMI_1_AND_2 = BMI1 | BMI2, |
82 | 13 | AVX512_F = (1ULL << 16), |
83 | 13 | AVX512_DQ = (1ULL << 17), |
84 | 13 | RDSEED = (1ULL << 18), |
85 | 13 | ADX = (1ULL << 19), |
86 | 13 | AVX512_IFMA = (1ULL << 21), |
87 | 13 | SHA = (1ULL << 29), |
88 | 13 | AVX512_BW = (1ULL << 30), |
89 | 13 | AVX512_VL = (1ULL << 31), |
90 | 13 | AVX512_VBMI = (1ULL << 33), |
91 | 13 | AVX512_VBMI2 = (1ULL << 38), |
92 | 13 | GFNI = (1ULL << 40), |
93 | 13 | AVX512_VAES = (1ULL << 41), |
94 | 13 | AVX512_VCLMUL = (1ULL << 42), |
95 | 13 | AVX512_VBITALG = (1ULL << 44), |
96 | | |
97 | | /* |
98 | | We only enable AVX512 support if all of the below flags are available |
99 | | |
100 | | This is more than we strictly need for most uses, however it also has |
101 | | the effect of preventing execution of AVX512 codepaths on cores that |
102 | | have serious downclocking problems when AVX512 code executes, |
103 | | especially Intel Skylake. |
104 | | |
105 | | VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice |
106 | | Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for |
107 | | executing AVX512. |
108 | | |
109 | | There is nothing stopping some future processor from supporting the |
110 | | above flags and having AVX512 penalties, but maybe you should not have |
111 | | bought such a processor. |
112 | | */ |
113 | 13 | AVX512_PROFILE = |
114 | 13 | AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG, |
115 | 13 | }; |
116 | | |
117 | | // NOLINTNEXTLINE(performance-enum-size) |
118 | 13 | enum class x86_CPUID_7_1_bits : uint64_t { |
119 | 13 | SHA512 = (1 << 0), |
120 | 13 | SM3 = (1 << 1), |
121 | 13 | SM4 = (1 << 2), |
122 | 13 | }; |
123 | | |
124 | 13 | uint32_t feat = 0; |
125 | 13 | uint32_t cpuid[4] = {0}; |
126 | 13 | bool has_os_ymm_support = false; |
127 | 13 | bool has_os_zmm_support = false; |
128 | | |
129 | | // CPUID 0: vendor identification, max sublevel |
130 | 13 | invoke_cpuid(0, cpuid); |
131 | | |
132 | 13 | const uint32_t max_supported_sublevel = cpuid[0]; |
133 | | |
134 | 13 | if(max_supported_sublevel >= 1) { |
135 | | // CPUID 1: feature bits |
136 | 13 | invoke_cpuid(1, cpuid); |
137 | 13 | const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; |
138 | | |
139 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUID::CPUID_RDTSC_BIT, allowed); |
140 | | |
141 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUID::CPUID_RDRAND_BIT, allowed); |
142 | | |
143 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUID::CPUID_SSE2_BIT, allowed); |
144 | | |
145 | 13 | if(feat & CPUID::CPUID_SSE2_BIT) { |
146 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUID::CPUID_SSSE3_BIT, allowed); |
147 | | |
148 | 13 | if(feat & CPUID::CPUID_SSSE3_BIT) { |
149 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUID::CPUID_CLMUL_BIT, allowed); |
150 | 13 | feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUID::CPUID_AESNI_BIT, allowed); |
151 | 13 | } |
152 | | |
153 | 13 | const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE); |
154 | 13 | if((flags0 & osxsave64) == osxsave64) { |
155 | 13 | const uint64_t xcr_flags = xgetbv(); |
156 | 13 | if((xcr_flags & 0x6) == 0x6) { |
157 | 13 | has_os_ymm_support = true; |
158 | 13 | has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0; |
159 | 13 | } |
160 | 13 | } |
161 | 13 | } |
162 | 13 | } |
163 | | |
164 | 13 | if(max_supported_sublevel >= 7) { |
165 | 13 | clear_mem(cpuid, 4); |
166 | 13 | invoke_cpuid_sublevel(7, 0, cpuid); |
167 | | |
168 | 13 | const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; |
169 | | |
170 | 13 | clear_mem(cpuid, 4); |
171 | 13 | invoke_cpuid_sublevel(7, 1, cpuid); |
172 | 13 | const uint32_t flags7_1 = cpuid[0]; |
173 | | |
174 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUID::CPUID_RDSEED_BIT, allowed); |
175 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUID::CPUID_ADX_BIT, allowed); |
176 | | |
177 | | /* |
178 | | We only set the BMI bit if both BMI1 and BMI2 are supported, since |
179 | | typically we want to use both extensions in the same code. |
180 | | */ |
181 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUID::CPUID_BMI_BIT, allowed); |
182 | | |
183 | 13 | if(feat & CPUID::CPUID_SSSE3_BIT) { |
184 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUID::CPUID_SHA_BIT, allowed); |
185 | 13 | feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUID::CPUID_SM3_BIT, allowed); |
186 | 13 | } |
187 | | |
188 | 13 | if(has_os_ymm_support) { |
189 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUID::CPUID_AVX2_BIT, allowed); |
190 | | |
191 | 13 | if(feat & CPUID::CPUID_AVX2_BIT) { |
192 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUID::CPUID_GFNI_BIT, allowed); |
193 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX2_AES_BIT, allowed); |
194 | 13 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX2_CLMUL_BIT, allowed); |
195 | 13 | feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUID::CPUID_SHA512_BIT, allowed); |
196 | 13 | feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUID::CPUID_SM4_BIT, allowed); |
197 | | |
198 | 13 | if(has_os_zmm_support) { |
199 | 0 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUID::CPUID_AVX512_BIT, allowed); |
200 | |
|
201 | 0 | if(feat & CPUID::CPUID_AVX512_BIT) { |
202 | 0 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX512_AES_BIT, allowed); |
203 | 0 | feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX512_CLMUL_BIT, allowed); |
204 | 0 | } |
205 | 0 | } |
206 | 13 | } |
207 | 13 | } |
208 | 13 | } |
209 | | |
210 | | /* |
211 | | * If we don't have access to CPUID, we can still safely assume that |
212 | | * any x86-64 processor has SSE2 and RDTSC |
213 | | */ |
214 | 13 | #if defined(BOTAN_TARGET_ARCH_IS_X86_64) |
215 | 13 | if(feat == 0) { |
216 | 0 | feat |= CPUID::CPUID_SSE2_BIT & allowed; |
217 | 0 | feat |= CPUID::CPUID_RDTSC_BIT & allowed; |
218 | 0 | } |
219 | 13 | #endif |
220 | | |
221 | 13 | return feat; |
222 | 13 | } |
223 | | |
224 | | #endif |
225 | | |
226 | | } // namespace Botan |