/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Runtime CPU detection for x86 |
3 | | * (C) 2009,2010,2013,2017 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/cpuid.h> |
9 | | #include <botan/mem_ops.h> |
10 | | #include <botan/internal/loadstor.h> |
11 | | |
12 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
13 | | |
14 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
15 | | #include <intrin.h> |
16 | | #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) |
17 | | #include <ia32intrin.h> |
18 | | #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) |
19 | | #include <cpuid.h> |
20 | | #endif |
21 | | |
22 | | #endif |
23 | | |
24 | | namespace Botan { |
25 | | |
26 | | #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY) |
27 | | |
28 | | uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size) |
29 | 10 | { |
30 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) |
31 | | #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0) |
32 | | #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) |
33 | | |
34 | | #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL) |
35 | | #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0) |
36 | | #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0) |
37 | | |
38 | | #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM) |
39 | 10 | #define X86_CPUID(type, out) \ |
40 | 20 | asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ |
41 | 20 | : "0" (type)) |
42 | | |
43 | 10 | #define X86_CPUID_SUBLEVEL(type, level, out) \ |
44 | 10 | asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \ |
45 | 10 | : "0" (type), "2" (level)) |
46 | | |
47 | | #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG) |
48 | | #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0) |
49 | | |
50 | | #define X86_CPUID_SUBLEVEL(type, level, out) \ |
51 | | do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0) |
52 | | #else |
53 | | #warning "No way of calling x86 cpuid instruction for this compiler" |
54 | | #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0) |
55 | | #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0) |
56 | | #endif |
57 | | |
58 | 10 | uint64_t features_detected = 0; |
59 | 10 | uint32_t cpuid[4] = { 0 }; |
60 | | |
61 | | // CPUID 0: vendor identification, max sublevel |
62 | 10 | X86_CPUID(0, cpuid); |
63 | | |
64 | 10 | const uint32_t max_supported_sublevel = cpuid[0]; |
65 | | |
66 | 10 | const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 }; |
67 | 10 | const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 }; |
68 | 10 | const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3); |
69 | 10 | const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3); |
70 | | |
71 | 10 | if(max_supported_sublevel >= 1) |
72 | 10 | { |
73 | | // CPUID 1: feature bits |
74 | 10 | X86_CPUID(1, cpuid); |
75 | 10 | const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3]; |
76 | | |
77 | 10 | enum x86_CPUID_1_bits : uint64_t { |
78 | 10 | RDTSC = (1ULL << 4), |
79 | 10 | SSE2 = (1ULL << 26), |
80 | 10 | CLMUL = (1ULL << 33), |
81 | 10 | SSSE3 = (1ULL << 41), |
82 | 10 | SSE41 = (1ULL << 51), |
83 | 10 | SSE42 = (1ULL << 52), |
84 | 10 | AESNI = (1ULL << 57), |
85 | 10 | RDRAND = (1ULL << 62) |
86 | 10 | }; |
87 | | |
88 | 10 | if(flags0 & x86_CPUID_1_bits::RDTSC) |
89 | 10 | features_detected |= CPUID::CPUID_RDTSC_BIT; |
90 | 10 | if(flags0 & x86_CPUID_1_bits::SSE2) |
91 | 10 | features_detected |= CPUID::CPUID_SSE2_BIT; |
92 | 10 | if(flags0 & x86_CPUID_1_bits::CLMUL) |
93 | 10 | features_detected |= CPUID::CPUID_CLMUL_BIT; |
94 | 10 | if(flags0 & x86_CPUID_1_bits::SSSE3) |
95 | 10 | features_detected |= CPUID::CPUID_SSSE3_BIT; |
96 | 10 | if(flags0 & x86_CPUID_1_bits::SSE41) |
97 | 10 | features_detected |= CPUID::CPUID_SSE41_BIT; |
98 | 10 | if(flags0 & x86_CPUID_1_bits::SSE42) |
99 | 10 | features_detected |= CPUID::CPUID_SSE42_BIT; |
100 | 10 | if(flags0 & x86_CPUID_1_bits::AESNI) |
101 | 10 | features_detected |= CPUID::CPUID_AESNI_BIT; |
102 | 10 | if(flags0 & x86_CPUID_1_bits::RDRAND) |
103 | 10 | features_detected |= CPUID::CPUID_RDRAND_BIT; |
104 | 10 | } |
105 | | |
106 | 10 | if(is_intel) |
107 | 10 | { |
108 | | // Intel cache line size is in cpuid(1) output |
109 | 10 | *cache_line_size = 8 * get_byte(2, cpuid[1]); |
110 | 10 | } |
111 | 0 | else if(is_amd) |
112 | 0 | { |
113 | | // AMD puts it in vendor zone |
114 | 0 | X86_CPUID(0x80000005, cpuid); |
115 | 0 | *cache_line_size = get_byte(3, cpuid[2]); |
116 | 0 | } |
117 | | |
118 | 10 | if(max_supported_sublevel >= 7) |
119 | 10 | { |
120 | 10 | clear_mem(cpuid, 4); |
121 | 10 | X86_CPUID_SUBLEVEL(7, 0, cpuid); |
122 | | |
123 | 10 | enum x86_CPUID_7_bits : uint64_t { |
124 | 10 | BMI1 = (1ULL << 3), |
125 | 10 | AVX2 = (1ULL << 5), |
126 | 10 | BMI2 = (1ULL << 8), |
127 | 10 | AVX512_F = (1ULL << 16), |
128 | 10 | AVX512_DQ = (1ULL << 17), |
129 | 10 | RDSEED = (1ULL << 18), |
130 | 10 | ADX = (1ULL << 19), |
131 | 10 | AVX512_IFMA = (1ULL << 21), |
132 | 10 | SHA = (1ULL << 29), |
133 | 10 | AVX512_BW = (1ULL << 30), |
134 | 10 | AVX512_VL = (1ULL << 31), |
135 | 10 | AVX512_VBMI = (1ULL << 33), |
136 | 10 | AVX512_VBMI2 = (1ULL << 38), |
137 | 10 | AVX512_VAES = (1ULL << 41), |
138 | 10 | AVX512_VCLMUL = (1ULL << 42), |
139 | 10 | AVX512_VBITALG = (1ULL << 44), |
140 | 10 | }; |
141 | | |
142 | 10 | const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1]; |
143 | | |
144 | 10 | if(flags7 & x86_CPUID_7_bits::AVX2) |
145 | 10 | features_detected |= CPUID::CPUID_AVX2_BIT; |
146 | 10 | if(flags7 & x86_CPUID_7_bits::BMI1) |
147 | 10 | { |
148 | 10 | features_detected |= CPUID::CPUID_BMI1_BIT; |
149 | | /* |
150 | | We only set the BMI2 bit if BMI1 is also supported, so BMI2 |
151 | | code can safely use both extensions. No known processor |
152 | | implements BMI2 but not BMI1. |
153 | | */ |
154 | 10 | if(flags7 & x86_CPUID_7_bits::BMI2) |
155 | 10 | features_detected |= CPUID::CPUID_BMI2_BIT; |
156 | 10 | } |
157 | | |
158 | 10 | if(flags7 & x86_CPUID_7_bits::AVX512_F) |
159 | 0 | { |
160 | 0 | features_detected |= CPUID::CPUID_AVX512F_BIT; |
161 | |
|
162 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_DQ) |
163 | 0 | features_detected |= CPUID::CPUID_AVX512DQ_BIT; |
164 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_BW) |
165 | 0 | features_detected |= CPUID::CPUID_AVX512BW_BIT; |
166 | |
|
167 | 0 | const uint64_t ICELAKE_FLAGS = |
168 | 0 | x86_CPUID_7_bits::AVX512_F | |
169 | 0 | x86_CPUID_7_bits::AVX512_DQ | |
170 | 0 | x86_CPUID_7_bits::AVX512_IFMA | |
171 | 0 | x86_CPUID_7_bits::AVX512_BW | |
172 | 0 | x86_CPUID_7_bits::AVX512_VL | |
173 | 0 | x86_CPUID_7_bits::AVX512_VBMI | |
174 | 0 | x86_CPUID_7_bits::AVX512_VBMI2 | |
175 | 0 | x86_CPUID_7_bits::AVX512_VBITALG; |
176 | |
|
177 | 0 | if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS) |
178 | 0 | features_detected |= CPUID::CPUID_AVX512_ICL_BIT; |
179 | |
|
180 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_VAES) |
181 | 0 | features_detected |= CPUID::CPUID_AVX512_AES_BIT; |
182 | 0 | if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) |
183 | 0 | features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT; |
184 | 0 | } |
185 | | |
186 | 10 | if(flags7 & x86_CPUID_7_bits::RDSEED) |
187 | 0 | features_detected |= CPUID::CPUID_RDSEED_BIT; |
188 | 10 | if(flags7 & x86_CPUID_7_bits::ADX) |
189 | 0 | features_detected |= CPUID::CPUID_ADX_BIT; |
190 | 10 | if(flags7 & x86_CPUID_7_bits::SHA) |
191 | 0 | features_detected |= CPUID::CPUID_SHA_BIT; |
192 | 10 | } |
193 | | |
194 | 10 | #undef X86_CPUID |
195 | 10 | #undef X86_CPUID_SUBLEVEL |
196 | | |
197 | | /* |
198 | | * If we don't have access to CPUID, we can still safely assume that |
199 | | * any x86-64 processor has SSE2 and RDTSC |
200 | | */ |
201 | 10 | #if defined(BOTAN_TARGET_ARCH_IS_X86_64) |
202 | 10 | if(features_detected == 0) |
203 | 0 | { |
204 | 0 | features_detected |= CPUID::CPUID_SSE2_BIT; |
205 | 0 | features_detected |= CPUID::CPUID_RDTSC_BIT; |
206 | 0 | } |
207 | 10 | #endif |
208 | | |
209 | 10 | return features_detected; |
210 | 10 | } |
211 | | |
212 | | #endif |
213 | | |
214 | | } |