/src/Botan-3.4.0/src/lib/utils/cpuid/cpuid_x86.cpp
Line | Count | Source |
1 | | /* |
2 | | * Runtime CPU detection for x86 |
3 | | * (C) 2009,2010,2013,2017,2023 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/cpuid.h> |
9 | | |
10 | | #include <botan/mem_ops.h> |
11 | | #include <botan/internal/loadstor.h> |
12 | | |
13 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
14 | | |
15 | | #include <immintrin.h> |
16 | | |
17 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
18 | | #include <intrin.h> |
19 | | #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) |
20 | | #include <ia32intrin.h> |
21 | | #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) |
22 | | #include <cpuid.h> |
23 | | #endif |
24 | | |
25 | | #endif |
26 | | |
27 | | namespace Botan { |
28 | | |
29 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
30 | | |
31 | | namespace { |
32 | | |
33 | 0 | void invoke_cpuid(uint32_t type, uint32_t out[4]) { |
34 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL) |
35 | | __cpuid((int*)out, type); |
36 | | |
37 | | #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) |
38 | | __get_cpuid(type, out, out + 1, out + 2, out + 3); |
39 | |
|
40 | | #elif defined(BOTAN_USE_GCC_INLINE_ASM) |
41 | | asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type)); |
42 | | |
43 | | #else |
44 | | #warning "No way of calling x86 cpuid instruction for this compiler" |
45 | | clear_mem(out, 4); |
46 | | #endif |
47 | 0 | } |
48 | | |
49 | 0 | BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() { |
50 | 0 | return _xgetbv(0); |
51 | 0 | } |
52 | | |
53 | 0 | void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) { |
54 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
55 | | __cpuidex((int*)out, type, level); |
56 | | |
57 | | #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) |
58 | 0 | __cpuid_count(type, level, out[0], out[1], out[2], out[3]); |
59 | |
|
60 | | #elif defined(BOTAN_USE_GCC_INLINE_ASM) |
61 | | asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level)); |
62 | | |
63 | | #else |
64 | | #warning "No way of calling x86 cpuid instruction for this compiler" |
65 | | clear_mem(out, 4); |
66 | | #endif |
67 | 0 | } |
68 | | |
69 | | } // namespace |
70 | | |
71 | 0 | uint32_t CPUID::CPUID_Data::detect_cpu_features() { |
72 | 0 | uint32_t features_detected = 0; |
73 | 0 | uint32_t cpuid[4] = {0}; |
74 | 0 | bool has_os_ymm_support = false; |
75 | 0 | bool has_os_zmm_support = false; |
76 | | |
77 | | // CPUID 0: vendor identification, max sublevel |
78 | 0 | invoke_cpuid(0, cpuid); |
79 | |
|
80 | 0 | const uint32_t max_supported_sublevel = cpuid[0]; |
81 | |
|
82 | 0 | if(max_supported_sublevel >= 1) { |
83 | | // CPUID 1: feature bits |
84 | 0 | invoke_cpuid(1, cpuid); |
85 | 0 | const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; |
86 | |
|
87 | 0 | enum x86_CPUID_1_bits : uint64_t { |
88 | 0 | RDTSC = (1ULL << 4), |
89 | 0 | SSE2 = (1ULL << 26), |
90 | 0 | CLMUL = (1ULL << 33), |
91 | 0 | SSSE3 = (1ULL << 41), |
92 | 0 | AESNI = (1ULL << 57), |
93 | 0 | OSXSAVE = (1ULL << 59), |
94 | 0 | AVX = (1ULL << 60), |
95 | 0 | RDRAND = (1ULL << 62) |
96 | 0 | }; |
97 | |
|
98 | 0 | if(flags0 & x86_CPUID_1_bits::RDTSC) { |
99 | 0 | features_detected |= CPUID::CPUID_RDTSC_BIT; |
100 | 0 | } |
101 | 0 | if(flags0 & x86_CPUID_1_bits::SSE2) { |
102 | 0 | features_detected |= CPUID::CPUID_SSE2_BIT; |
103 | 0 | } |
104 | 0 | if(flags0 & x86_CPUID_1_bits::CLMUL) { |
105 | 0 | features_detected |= CPUID::CPUID_CLMUL_BIT; |
106 | 0 | } |
107 | 0 | if(flags0 & x86_CPUID_1_bits::SSSE3) { |
108 | 0 | features_detected |= CPUID::CPUID_SSSE3_BIT; |
109 | 0 | } |
110 | 0 | if(flags0 & x86_CPUID_1_bits::AESNI) { |
111 | 0 | features_detected |= CPUID::CPUID_AESNI_BIT; |
112 | 0 | } |
113 | 0 | if(flags0 & x86_CPUID_1_bits::RDRAND) { |
114 | 0 | features_detected |= CPUID::CPUID_RDRAND_BIT; |
115 | 0 | } |
116 | |
|
117 | 0 | if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) { |
118 | 0 | const uint64_t xcr_flags = xgetbv(); |
119 | 0 | if((xcr_flags & 0x6) == 0x6) { |
120 | 0 | has_os_ymm_support = true; |
121 | 0 | has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | } |
125 | |
|
126 | 0 | if(max_supported_sublevel >= 7) { |
127 | 0 | clear_mem(cpuid, 4); |
128 | 0 | invoke_cpuid_sublevel(7, 0, cpuid); |
129 | |
|
130 | 0 | enum x86_CPUID_7_bits : uint64_t { |
131 | 0 | BMI1 = (1ULL << 3), |
132 | 0 | AVX2 = (1ULL << 5), |
133 | 0 | BMI2 = (1ULL << 8), |
134 | 0 | AVX512_F = (1ULL << 16), |
135 | 0 | AVX512_DQ = (1ULL << 17), |
136 | 0 | RDSEED = (1ULL << 18), |
137 | 0 | ADX = (1ULL << 19), |
138 | 0 | AVX512_IFMA = (1ULL << 21), |
139 | 0 | SHA = (1ULL << 29), |
140 | 0 | AVX512_BW = (1ULL << 30), |
141 | 0 | AVX512_VL = (1ULL << 31), |
142 | 0 | AVX512_VBMI = (1ULL << 33), |
143 | 0 | AVX512_VBMI2 = (1ULL << 38), |
144 | 0 | AVX512_VAES = (1ULL << 41), |
145 | 0 | AVX512_VCLMUL = (1ULL << 42), |
146 | 0 | AVX512_VBITALG = (1ULL << 44), |
147 | 0 | }; |
148 | |
|
149 | 0 | const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; |
150 | |
|
151 | 0 | if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) { |
152 | 0 | features_detected |= CPUID::CPUID_AVX2_BIT; |
153 | 0 | } |
154 | 0 | if(flags7 & x86_CPUID_7_bits::RDSEED) { |
155 | 0 | features_detected |= CPUID::CPUID_RDSEED_BIT; |
156 | 0 | } |
157 | 0 | if(flags7 & x86_CPUID_7_bits::ADX) { |
158 | 0 | features_detected |= CPUID::CPUID_ADX_BIT; |
159 | 0 | } |
160 | 0 | if(flags7 & x86_CPUID_7_bits::SHA) { |
161 | 0 | features_detected |= CPUID::CPUID_SHA_BIT; |
162 | 0 | } |
163 | | |
164 | | /* |
165 | | We only set the BMI bit if both BMI1 and BMI2 are supported, since |
166 | | typically we want to use both extensions in the same code. |
167 | | */ |
168 | 0 | if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) { |
169 | 0 | features_detected |= CPUID::CPUID_BMI_BIT; |
170 | 0 | } |
171 | |
|
172 | 0 | if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) { |
173 | 0 | const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ | |
174 | 0 | x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW | |
175 | 0 | x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI | |
176 | 0 | x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG; |
177 | | |
178 | | /* |
179 | | We only enable AVX512 support if all of the above flags are available |
180 | | |
181 | | This is more than we strictly need for most uses, however it also has |
182 | | the effect of preventing execution of AVX512 codepaths on cores that |
183 | | have serious downclocking problems when AVX512 code executes, |
184 | | especially Intel Skylake. |
185 | | |
186 | | VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice |
187 | | Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for |
188 | | executing AVX512. |
189 | | |
190 | | There is nothing stopping some future processor from supporting the |
191 | | above flags and having AVX512 penalties, but maybe you should not have |
192 | | bought such a processor. |
193 | | */ |
194 | 0 | if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) { |
195 | 0 | features_detected |= CPUID::CPUID_AVX512_BIT; |
196 | |
|
197 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_VAES) { |
198 | 0 | features_detected |= CPUID::CPUID_AVX512_AES_BIT; |
199 | 0 | } |
200 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) { |
201 | 0 | features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT; |
202 | 0 | } |
203 | 0 | } |
204 | 0 | } |
205 | 0 | } |
206 | | |
207 | | /* |
208 | | * If we don't have access to CPUID, we can still safely assume that |
209 | | * any x86-64 processor has SSE2 and RDTSC |
210 | | */ |
211 | 0 | #if defined(BOTAN_TARGET_ARCH_IS_X86_64) |
212 | 0 | if(features_detected == 0) { |
213 | 0 | features_detected |= CPUID::CPUID_SSE2_BIT; |
214 | 0 | features_detected |= CPUID::CPUID_RDTSC_BIT; |
215 | 0 | } |
216 | 0 | #endif |
217 | |
|
218 | 0 | return features_detected; |
219 | 0 | } |
220 | | |
221 | | #endif |
222 | | |
223 | | } // namespace Botan |