Coverage Report

Created: 2023-12-08 07:00

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
10
#include <botan/mem_ops.h>
11
#include <botan/internal/loadstor.h>
12
13
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
15
   #include <immintrin.h>
16
17
   #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18
      #include <intrin.h>
19
   #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
20
      #include <ia32intrin.h>
21
   #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
22
      #include <cpuid.h>
23
   #endif
24
25
#endif
26
27
namespace Botan {
28
29
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
30
31
namespace {
32
33
4
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
34
   #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35
   __cpuid((int*)out, type);
36
37
   #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
38
4
   __get_cpuid(type, out, out + 1, out + 2, out + 3);
39
40
   #elif defined(BOTAN_USE_GCC_INLINE_ASM)
41
   asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
42
43
   #else
44
      #warning "No way of calling x86 cpuid instruction for this compiler"
45
   clear_mem(out, 4);
46
   #endif
47
4
}
48
49
2
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
50
2
   return _xgetbv(0);
51
2
}
52
53
2
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
54
   #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
55
   __cpuidex((int*)out, type, level);
56
57
   #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
58
2
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
59
60
   #elif defined(BOTAN_USE_GCC_INLINE_ASM)
61
   asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
62
63
   #else
64
      #warning "No way of calling x86 cpuid instruction for this compiler"
65
   clear_mem(out, 4);
66
   #endif
67
2
}
68
69
}  // namespace
70
71
2
uint32_t CPUID::CPUID_Data::detect_cpu_features() {
72
2
   uint32_t features_detected = 0;
73
2
   uint32_t cpuid[4] = {0};
74
2
   bool has_os_ymm_support = false;
75
2
   bool has_os_zmm_support = false;
76
77
   // CPUID 0: vendor identification, max sublevel
78
2
   invoke_cpuid(0, cpuid);
79
80
2
   const uint32_t max_supported_sublevel = cpuid[0];
81
82
2
   if(max_supported_sublevel >= 1) {
83
      // CPUID 1: feature bits
84
2
      invoke_cpuid(1, cpuid);
85
2
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
86
87
2
      enum x86_CPUID_1_bits : uint64_t {
88
2
         RDTSC = (1ULL << 4),
89
2
         SSE2 = (1ULL << 26),
90
2
         CLMUL = (1ULL << 33),
91
2
         SSSE3 = (1ULL << 41),
92
2
         AESNI = (1ULL << 57),
93
2
         OSXSAVE = (1ULL << 59),
94
2
         AVX = (1ULL << 60),
95
2
         RDRAND = (1ULL << 62)
96
2
      };
97
98
2
      if(flags0 & x86_CPUID_1_bits::RDTSC) {
99
2
         features_detected |= CPUID::CPUID_RDTSC_BIT;
100
2
      }
101
2
      if(flags0 & x86_CPUID_1_bits::SSE2) {
102
2
         features_detected |= CPUID::CPUID_SSE2_BIT;
103
2
      }
104
2
      if(flags0 & x86_CPUID_1_bits::CLMUL) {
105
2
         features_detected |= CPUID::CPUID_CLMUL_BIT;
106
2
      }
107
2
      if(flags0 & x86_CPUID_1_bits::SSSE3) {
108
2
         features_detected |= CPUID::CPUID_SSSE3_BIT;
109
2
      }
110
2
      if(flags0 & x86_CPUID_1_bits::AESNI) {
111
2
         features_detected |= CPUID::CPUID_AESNI_BIT;
112
2
      }
113
2
      if(flags0 & x86_CPUID_1_bits::RDRAND) {
114
2
         features_detected |= CPUID::CPUID_RDRAND_BIT;
115
2
      }
116
117
2
      if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) {
118
2
         const uint64_t xcr_flags = xgetbv();
119
2
         if((xcr_flags & 0x6) == 0x6) {
120
2
            has_os_ymm_support = true;
121
2
            has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
122
2
         }
123
2
      }
124
2
   }
125
126
2
   if(max_supported_sublevel >= 7) {
127
2
      clear_mem(cpuid, 4);
128
2
      invoke_cpuid_sublevel(7, 0, cpuid);
129
130
2
      enum x86_CPUID_7_bits : uint64_t {
131
2
         BMI1 = (1ULL << 3),
132
2
         AVX2 = (1ULL << 5),
133
2
         BMI2 = (1ULL << 8),
134
2
         AVX512_F = (1ULL << 16),
135
2
         AVX512_DQ = (1ULL << 17),
136
2
         RDSEED = (1ULL << 18),
137
2
         ADX = (1ULL << 19),
138
2
         AVX512_IFMA = (1ULL << 21),
139
2
         SHA = (1ULL << 29),
140
2
         AVX512_BW = (1ULL << 30),
141
2
         AVX512_VL = (1ULL << 31),
142
2
         AVX512_VBMI = (1ULL << 33),
143
2
         AVX512_VBMI2 = (1ULL << 38),
144
2
         AVX512_VAES = (1ULL << 41),
145
2
         AVX512_VCLMUL = (1ULL << 42),
146
2
         AVX512_VBITALG = (1ULL << 44),
147
2
      };
148
149
2
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
150
151
2
      if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) {
152
2
         features_detected |= CPUID::CPUID_AVX2_BIT;
153
2
      }
154
2
      if(flags7 & x86_CPUID_7_bits::RDSEED) {
155
2
         features_detected |= CPUID::CPUID_RDSEED_BIT;
156
2
      }
157
2
      if(flags7 & x86_CPUID_7_bits::ADX) {
158
2
         features_detected |= CPUID::CPUID_ADX_BIT;
159
2
      }
160
2
      if(flags7 & x86_CPUID_7_bits::SHA) {
161
0
         features_detected |= CPUID::CPUID_SHA_BIT;
162
0
      }
163
164
      /*
165
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
166
      typically we want to use both extensions in the same code.
167
      */
168
2
      if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) {
169
2
         features_detected |= CPUID::CPUID_BMI_BIT;
170
2
      }
171
172
2
      if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) {
173
0
         const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ |
174
0
                                               x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW |
175
0
                                               x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI |
176
0
                                               x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG;
177
178
         /*
179
         We only enable AVX512 support if all of the above flags are available
180
181
         This is more than we strictly need for most uses, however it also has
182
         the effect of preventing execution of AVX512 codepaths on cores that
183
         have serious downclocking problems when AVX512 code executes,
184
         especially Intel Skylake.
185
186
         VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
187
         Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
188
         executing AVX512.
189
190
         There is nothing stopping some future processor from supporting the
191
         above flags and having AVX512 penalties, but maybe you should not have
192
         bought such a processor.
193
         */
194
0
         if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) {
195
0
            features_detected |= CPUID::CPUID_AVX512_BIT;
196
197
0
            if(flags7 & x86_CPUID_7_bits::AVX512_VAES) {
198
0
               features_detected |= CPUID::CPUID_AVX512_AES_BIT;
199
0
            }
200
0
            if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) {
201
0
               features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
202
0
            }
203
0
         }
204
0
      }
205
2
   }
206
207
   /*
208
   * If we don't have access to CPUID, we can still safely assume that
209
   * any x86-64 processor has SSE2 and RDTSC
210
   */
211
2
   #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
212
2
   if(features_detected == 0) {
213
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
214
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
215
0
   }
216
2
   #endif
217
218
2
   return features_detected;
219
2
}
220
221
#endif
222
223
}  // namespace Botan