Coverage Report

Created: 2023-01-25 06:35

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
#include <botan/mem_ops.h>
10
#include <botan/internal/loadstor.h>
11
12
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14
#include <immintrin.h>
15
16
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
17
  #include <intrin.h>
18
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
19
  #include <ia32intrin.h>
20
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
21
  #include <cpuid.h>
22
#endif
23
24
#endif
25
26
namespace Botan {
27
28
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
29
30
namespace {
31
32
void invoke_cpuid(uint32_t type, uint32_t out[4])
33
26
   {
34
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35
   __cpuid((int*)out, type);
36
37
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
38
26
   __get_cpuid(type, out, out+1, out+2, out+3);
39
40
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
41
   asm("cpuid\n\t"
42
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])
43
       : "0" (type));
44
45
#else
46
   #warning "No way of calling x86 cpuid instruction for this compiler"
47
   clear_mem(out, 4);
48
#endif
49
26
   }
50
51
BOTAN_FUNC_ISA("xsave")
52
uint64_t xgetbv()
53
13
   {
54
13
   return _xgetbv(0);
55
13
   }
56
57
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4])
58
13
   {
59
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
60
   __cpuidex((int*)out, type, level);
61
62
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
63
13
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
64
65
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
66
   asm("cpuid\n\t"
67
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])     \
68
       : "0" (type), "2" (level));
69
70
#else
71
   #warning "No way of calling x86 cpuid instruction for this compiler"
72
   clear_mem(out, 4);
73
#endif
74
13
   }
75
76
}
77
78
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
79
13
   {
80
13
   uint64_t features_detected = 0;
81
13
   uint32_t cpuid[4] = { 0 };
82
13
   bool has_os_ymm_support = false;
83
13
   bool has_os_zmm_support = false;
84
85
   // CPUID 0: vendor identification, max sublevel
86
13
   invoke_cpuid(0, cpuid);
87
88
13
   const uint32_t max_supported_sublevel = cpuid[0];
89
90
13
   const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
91
13
   const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
92
13
   const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
93
13
   const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
94
95
13
   if(max_supported_sublevel >= 1)
96
13
      {
97
      // CPUID 1: feature bits
98
13
      invoke_cpuid(1, cpuid);
99
13
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
100
101
13
      enum x86_CPUID_1_bits : uint64_t {
102
13
         RDTSC = (1ULL << 4),
103
13
         SSE2 = (1ULL << 26),
104
13
         CLMUL = (1ULL << 33),
105
13
         SSSE3 = (1ULL << 41),
106
13
         SSE41 = (1ULL << 51),
107
13
         SSE42 = (1ULL << 52),
108
13
         AESNI = (1ULL << 57),
109
13
         OSXSAVE = (1ULL << 59),
110
13
         AVX = (1ULL << 60),
111
13
         RDRAND = (1ULL << 62)
112
13
      };
113
114
13
      if(flags0 & x86_CPUID_1_bits::RDTSC)
115
13
         features_detected |= CPUID::CPUID_RDTSC_BIT;
116
13
      if(flags0 & x86_CPUID_1_bits::SSE2)
117
13
         features_detected |= CPUID::CPUID_SSE2_BIT;
118
13
      if(flags0 & x86_CPUID_1_bits::CLMUL)
119
13
         features_detected |= CPUID::CPUID_CLMUL_BIT;
120
13
      if(flags0 & x86_CPUID_1_bits::SSSE3)
121
13
         features_detected |= CPUID::CPUID_SSSE3_BIT;
122
13
      if(flags0 & x86_CPUID_1_bits::SSE41)
123
13
         features_detected |= CPUID::CPUID_SSE41_BIT;
124
13
      if(flags0 & x86_CPUID_1_bits::SSE42)
125
13
         features_detected |= CPUID::CPUID_SSE42_BIT;
126
13
      if(flags0 & x86_CPUID_1_bits::AESNI)
127
13
         features_detected |= CPUID::CPUID_AESNI_BIT;
128
13
      if(flags0 & x86_CPUID_1_bits::RDRAND)
129
13
         features_detected |= CPUID::CPUID_RDRAND_BIT;
130
131
13
      if((flags0 & x86_CPUID_1_bits::AVX) &&
132
13
         (flags0 & x86_CPUID_1_bits::OSXSAVE))
133
13
         {
134
13
         const uint64_t xcr_flags = xgetbv();
135
13
         if((xcr_flags & 0x6) == 0x6)
136
13
            {
137
13
            has_os_ymm_support = true;
138
13
            has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
139
13
            }
140
13
         }
141
13
      }
142
143
13
   if(is_intel)
144
13
      {
145
      // Intel cache line size is in cpuid(1) output
146
13
      *cache_line_size = 8 * get_byte<2>(cpuid[1]);
147
13
      }
148
0
   else if(is_amd)
149
0
      {
150
      // AMD puts it in vendor zone
151
0
      invoke_cpuid(0x80000005, cpuid);
152
0
      *cache_line_size = get_byte<3>(cpuid[2]);
153
0
      }
154
155
13
   if(max_supported_sublevel >= 7)
156
13
      {
157
13
      clear_mem(cpuid, 4);
158
13
      invoke_cpuid_sublevel(7, 0, cpuid);
159
160
13
      enum x86_CPUID_7_bits : uint64_t {
161
13
         BMI1 = (1ULL << 3),
162
13
         AVX2 = (1ULL << 5),
163
13
         BMI2 = (1ULL << 8),
164
13
         AVX512_F = (1ULL << 16),
165
13
         AVX512_DQ = (1ULL << 17),
166
13
         RDSEED = (1ULL << 18),
167
13
         ADX = (1ULL << 19),
168
13
         AVX512_IFMA = (1ULL << 21),
169
13
         SHA = (1ULL << 29),
170
13
         AVX512_BW = (1ULL << 30),
171
13
         AVX512_VL = (1ULL << 31),
172
13
         AVX512_VBMI = (1ULL << 33),
173
13
         AVX512_VBMI2 = (1ULL << 38),
174
13
         AVX512_VAES = (1ULL << 41),
175
13
         AVX512_VCLMUL = (1ULL << 42),
176
13
         AVX512_VBITALG = (1ULL << 44),
177
13
      };
178
179
13
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
180
181
13
      if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support)
182
13
         features_detected |= CPUID::CPUID_AVX2_BIT;
183
13
      if(flags7 & x86_CPUID_7_bits::RDSEED)
184
13
         features_detected |= CPUID::CPUID_RDSEED_BIT;
185
13
      if(flags7 & x86_CPUID_7_bits::ADX)
186
13
         features_detected |= CPUID::CPUID_ADX_BIT;
187
13
      if(flags7 & x86_CPUID_7_bits::SHA)
188
0
         features_detected |= CPUID::CPUID_SHA_BIT;
189
190
13
      if(flags7 & x86_CPUID_7_bits::BMI1)
191
13
         {
192
         /*
193
         We only set the BMI bit if both BMI1 and BMI2 are supported, since
194
         typically we want to use both extensions in the same code.
195
         */
196
13
         if(flags7 & x86_CPUID_7_bits::BMI2)
197
13
            {
198
13
            features_detected |= CPUID::CPUID_BMI_BIT;
199
200
            /*
201
            Up until Zen3, AMD CPUs with BMI2 support had microcoded
202
            pdep/pext, which works but is very slow.
203
204
            TODO: check for Zen3/Zen4 here
205
            */
206
13
            if(is_intel)
207
13
               {
208
13
               features_detected |= CPUID::CPUID_FAST_PDEP_BIT;
209
13
               }
210
13
            }
211
13
         }
212
213
13
      if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support)
214
0
         {
215
0
         const uint64_t AVX512_PROFILE_FLAGS =
216
0
            x86_CPUID_7_bits::AVX512_F |
217
0
            x86_CPUID_7_bits::AVX512_DQ |
218
0
            x86_CPUID_7_bits::AVX512_IFMA |
219
0
            x86_CPUID_7_bits::AVX512_BW |
220
0
            x86_CPUID_7_bits::AVX512_VL |
221
0
            x86_CPUID_7_bits::AVX512_VBMI |
222
0
            x86_CPUID_7_bits::AVX512_VBMI2 |
223
0
            x86_CPUID_7_bits::AVX512_VBITALG;
224
225
         /*
226
         We only enable AVX512 support if all of the above flags are available
227
228
         This is more than we strictly need for most uses, however it also has
229
         the effect of preventing execution of AVX512 codepaths on cores that
230
         have serious downclocking problems when AVX512 code executes,
231
         especially Intel Skylake.
232
233
         VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
234
         Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
235
         executing AVX512.
236
237
         There is nothing stopping some future processor from supporting the
238
         above flags and having AVX512 penalties, but maybe you should not have
239
         bought such a processor.
240
         */
241
0
         if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS)
242
0
            features_detected |= CPUID::CPUID_AVX512_BIT;
243
244
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
245
0
            features_detected |= CPUID::CPUID_AVX512_AES_BIT;
246
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
247
0
            features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
248
0
         }
249
13
      }
250
251
   /*
252
   * If we don't have access to CPUID, we can still safely assume that
253
   * any x86-64 processor has SSE2 and RDTSC
254
   */
255
13
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
256
13
   if(features_detected == 0)
257
0
      {
258
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
259
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
260
0
      }
261
13
#endif
262
263
13
   return features_detected;
264
13
   }
265
266
#endif
267
268
}