/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | * Runtime CPU detection for x86  | 
3  |  | * (C) 2009,2010,2013,2017,2023 Jack Lloyd  | 
4  |  | *  | 
5  |  | * Botan is released under the Simplified BSD License (see license.txt)  | 
6  |  | */  | 
7  |  |  | 
8  |  | #include <botan/internal/cpuid.h>  | 
9  |  |  | 
10  |  | #include <botan/mem_ops.h>  | 
11  |  | #include <botan/internal/loadstor.h>  | 
12  |  |  | 
13  |  | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)  | 
14  |  |  | 
15  |  |    #include <immintrin.h>  | 
16  |  |  | 
17  |  |    #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)  | 
18  |  |       #include <intrin.h>  | 
19  |  |    #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)  | 
20  |  |       #include <ia32intrin.h>  | 
21  |  |    #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)  | 
22  |  |       #include <cpuid.h>  | 
23  |  |    #endif  | 
24  |  |  | 
25  |  | #endif  | 
26  |  |  | 
27  |  | namespace Botan { | 
28  |  |  | 
29  |  | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)  | 
30  |  |  | 
31  |  | namespace { | 
32  |  |  | 
33  | 4  | void invoke_cpuid(uint32_t type, uint32_t out[4]) { | 
34  |  |    #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)  | 
35  |  |    __cpuid((int*)out, type);  | 
36  |  |  | 
37  |  |    #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)  | 
38  | 4  |    __get_cpuid(type, out, out + 1, out + 2, out + 3);  | 
39  |  |  | 
40  |  |    #elif defined(BOTAN_USE_GCC_INLINE_ASM)  | 
41  |  |    asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type)); | 
42  |  |  | 
43  |  |    #else  | 
44  |  |       #warning "No way of calling x86 cpuid instruction for this compiler"  | 
45  |  |    clear_mem(out, 4);  | 
46  |  |    #endif  | 
47  | 4  | }  | 
48  |  |  | 
49  | 2  | BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() { | 
50  | 2  |    return _xgetbv(0);  | 
51  | 2  | }  | 
52  |  |  | 
53  | 2  | void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) { | 
54  |  |    #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)  | 
55  |  |    __cpuidex((int*)out, type, level);  | 
56  |  |  | 
57  |  |    #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)  | 
58  | 2  |    __cpuid_count(type, level, out[0], out[1], out[2], out[3]);  | 
59  |  |  | 
60  |  |    #elif defined(BOTAN_USE_GCC_INLINE_ASM)  | 
61  |  |    asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level)); | 
62  |  |  | 
63  |  |    #else  | 
64  |  |       #warning "No way of calling x86 cpuid instruction for this compiler"  | 
65  |  |    clear_mem(out, 4);  | 
66  |  |    #endif  | 
67  | 2  | }  | 
68  |  |  | 
69  |  | }  // namespace  | 
70  |  |  | 
71  | 2  | uint32_t CPUID::CPUID_Data::detect_cpu_features() { | 
72  | 2  |    uint32_t features_detected = 0;  | 
73  | 2  |    uint32_t cpuid[4] = {0}; | 
74  | 2  |    bool has_os_ymm_support = false;  | 
75  | 2  |    bool has_os_zmm_support = false;  | 
76  |  |  | 
77  |  |    // CPUID 0: vendor identification, max sublevel  | 
78  | 2  |    invoke_cpuid(0, cpuid);  | 
79  |  |  | 
80  | 2  |    const uint32_t max_supported_sublevel = cpuid[0];  | 
81  |  |  | 
82  | 2  |    if(max_supported_sublevel >= 1) { | 
83  |  |       // CPUID 1: feature bits  | 
84  | 2  |       invoke_cpuid(1, cpuid);  | 
85  | 2  |       const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];  | 
86  |  |  | 
87  | 2  |       enum x86_CPUID_1_bits : uint64_t { | 
88  | 2  |          RDTSC = (1ULL << 4),  | 
89  | 2  |          SSE2 = (1ULL << 26),  | 
90  | 2  |          CLMUL = (1ULL << 33),  | 
91  | 2  |          SSSE3 = (1ULL << 41),  | 
92  | 2  |          AESNI = (1ULL << 57),  | 
93  | 2  |          OSXSAVE = (1ULL << 59),  | 
94  | 2  |          AVX = (1ULL << 60),  | 
95  | 2  |          RDRAND = (1ULL << 62)  | 
96  | 2  |       };  | 
97  |  |  | 
98  | 2  |       if(flags0 & x86_CPUID_1_bits::RDTSC) { | 
99  | 2  |          features_detected |= CPUID::CPUID_RDTSC_BIT;  | 
100  | 2  |       }  | 
101  | 2  |       if(flags0 & x86_CPUID_1_bits::SSE2) { | 
102  | 2  |          features_detected |= CPUID::CPUID_SSE2_BIT;  | 
103  | 2  |       }  | 
104  | 2  |       if(flags0 & x86_CPUID_1_bits::CLMUL) { | 
105  | 2  |          features_detected |= CPUID::CPUID_CLMUL_BIT;  | 
106  | 2  |       }  | 
107  | 2  |       if(flags0 & x86_CPUID_1_bits::SSSE3) { | 
108  | 2  |          features_detected |= CPUID::CPUID_SSSE3_BIT;  | 
109  | 2  |       }  | 
110  | 2  |       if(flags0 & x86_CPUID_1_bits::AESNI) { | 
111  | 2  |          features_detected |= CPUID::CPUID_AESNI_BIT;  | 
112  | 2  |       }  | 
113  | 2  |       if(flags0 & x86_CPUID_1_bits::RDRAND) { | 
114  | 2  |          features_detected |= CPUID::CPUID_RDRAND_BIT;  | 
115  | 2  |       }  | 
116  |  |  | 
117  | 2  |       if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) { | 
118  | 2  |          const uint64_t xcr_flags = xgetbv();  | 
119  | 2  |          if((xcr_flags & 0x6) == 0x6) { | 
120  | 2  |             has_os_ymm_support = true;  | 
121  | 2  |             has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;  | 
122  | 2  |          }  | 
123  | 2  |       }  | 
124  | 2  |    }  | 
125  |  |  | 
126  | 2  |    if(max_supported_sublevel >= 7) { | 
127  | 2  |       clear_mem(cpuid, 4);  | 
128  | 2  |       invoke_cpuid_sublevel(7, 0, cpuid);  | 
129  |  |  | 
130  | 2  |       enum x86_CPUID_7_bits : uint64_t { | 
131  | 2  |          BMI1 = (1ULL << 3),  | 
132  | 2  |          AVX2 = (1ULL << 5),  | 
133  | 2  |          BMI2 = (1ULL << 8),  | 
134  | 2  |          AVX512_F = (1ULL << 16),  | 
135  | 2  |          AVX512_DQ = (1ULL << 17),  | 
136  | 2  |          RDSEED = (1ULL << 18),  | 
137  | 2  |          ADX = (1ULL << 19),  | 
138  | 2  |          AVX512_IFMA = (1ULL << 21),  | 
139  | 2  |          SHA = (1ULL << 29),  | 
140  | 2  |          AVX512_BW = (1ULL << 30),  | 
141  | 2  |          AVX512_VL = (1ULL << 31),  | 
142  | 2  |          AVX512_VBMI = (1ULL << 33),  | 
143  | 2  |          AVX512_VBMI2 = (1ULL << 38),  | 
144  | 2  |          AVX512_VAES = (1ULL << 41),  | 
145  | 2  |          AVX512_VCLMUL = (1ULL << 42),  | 
146  | 2  |          AVX512_VBITALG = (1ULL << 44),  | 
147  | 2  |       };  | 
148  |  |  | 
149  | 2  |       const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];  | 
150  |  |  | 
151  | 2  |       if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) { | 
152  | 2  |          features_detected |= CPUID::CPUID_AVX2_BIT;  | 
153  | 2  |       }  | 
154  | 2  |       if(flags7 & x86_CPUID_7_bits::RDSEED) { | 
155  | 2  |          features_detected |= CPUID::CPUID_RDSEED_BIT;  | 
156  | 2  |       }  | 
157  | 2  |       if(flags7 & x86_CPUID_7_bits::ADX) { | 
158  | 2  |          features_detected |= CPUID::CPUID_ADX_BIT;  | 
159  | 2  |       }  | 
160  | 2  |       if(flags7 & x86_CPUID_7_bits::SHA) { | 
161  | 0  |          features_detected |= CPUID::CPUID_SHA_BIT;  | 
162  | 0  |       }  | 
163  |  |  | 
164  |  |       /*  | 
165  |  |       We only set the BMI bit if both BMI1 and BMI2 are supported, since  | 
166  |  |       typically we want to use both extensions in the same code.  | 
167  |  |       */  | 
168  | 2  |       if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) { | 
169  | 2  |          features_detected |= CPUID::CPUID_BMI_BIT;  | 
170  | 2  |       }  | 
171  |  |  | 
172  | 2  |       if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) { | 
173  | 0  |          const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ |  | 
174  | 0  |                                                x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW |  | 
175  | 0  |                                                x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI |  | 
176  | 0  |                                                x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG;  | 
177  |  |  | 
178  |  |          /*  | 
179  |  |          We only enable AVX512 support if all of the above flags are available  | 
180  |  |  | 
181  |  |          This is more than we strictly need for most uses, however it also has  | 
182  |  |          the effect of preventing execution of AVX512 codepaths on cores that  | 
183  |  |          have serious downclocking problems when AVX512 code executes,  | 
184  |  |          especially Intel Skylake.  | 
185  |  |  | 
186  |  |          VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice  | 
187  |  |          Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for  | 
188  |  |          executing AVX512.  | 
189  |  |  | 
190  |  |          There is nothing stopping some future processor from supporting the  | 
191  |  |          above flags and having AVX512 penalties, but maybe you should not have  | 
192  |  |          bought such a processor.  | 
193  |  |          */  | 
194  | 0  |          if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) { | 
195  | 0  |             features_detected |= CPUID::CPUID_AVX512_BIT;  | 
196  |  | 
  | 
197  | 0  |             if(flags7 & x86_CPUID_7_bits::AVX512_VAES) { | 
198  | 0  |                features_detected |= CPUID::CPUID_AVX512_AES_BIT;  | 
199  | 0  |             }  | 
200  | 0  |             if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) { | 
201  | 0  |                features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;  | 
202  | 0  |             }  | 
203  | 0  |          }  | 
204  | 0  |       }  | 
205  | 2  |    }  | 
206  |  |  | 
207  |  |    /*  | 
208  |  |    * If we don't have access to CPUID, we can still safely assume that  | 
209  |  |    * any x86-64 processor has SSE2 and RDTSC  | 
210  |  |    */  | 
211  | 2  |    #if defined(BOTAN_TARGET_ARCH_IS_X86_64)  | 
212  | 2  |    if(features_detected == 0) { | 
213  | 0  |       features_detected |= CPUID::CPUID_SSE2_BIT;  | 
214  | 0  |       features_detected |= CPUID::CPUID_RDTSC_BIT;  | 
215  | 0  |    }  | 
216  | 2  |    #endif  | 
217  |  |  | 
218  | 2  |    return features_detected;  | 
219  | 2  | }  | 
220  |  |  | 
221  |  | #endif  | 
222  |  |  | 
223  |  | }  // namespace Botan  |