Coverage Report

Created: 2024-06-28 06:08

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
10
#include <botan/mem_ops.h>
11
#include <botan/internal/loadstor.h>
12
13
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
15
   #include <immintrin.h>
16
17
   #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18
      #include <intrin.h>
19
   #endif
20
21
#endif
22
23
namespace Botan {
24
25
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
26
27
namespace {
28
29
2
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
30
2
   clear_mem(out, 4);
31
32
2
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
33
2
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
34
35
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
36
   __cpuid((int*)out, type);
37
38
   #else
39
      #warning "No way of calling x86 cpuid instruction for this compiler"
40
   #endif
41
2
}
42
43
1
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
44
1
   clear_mem(out, 4);
45
46
1
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
47
1
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
48
49
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
50
   __cpuidex((int*)out, type, level);
51
52
   #else
53
      #warning "No way of calling x86 cpuid instruction for this compiler"
54
   #endif
55
1
}
56
57
1
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
58
1
   return _xgetbv(0);
59
1
}
60
61
}  // namespace
62
63
1
uint32_t CPUID::CPUID_Data::detect_cpu_features() {
64
1
   uint32_t features_detected = 0;
65
1
   uint32_t cpuid[4] = {0};
66
1
   bool has_os_ymm_support = false;
67
1
   bool has_os_zmm_support = false;
68
69
   // CPUID 0: vendor identification, max sublevel
70
1
   invoke_cpuid(0, cpuid);
71
72
1
   const uint32_t max_supported_sublevel = cpuid[0];
73
74
1
   if(max_supported_sublevel >= 1) {
75
      // CPUID 1: feature bits
76
1
      invoke_cpuid(1, cpuid);
77
1
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
78
79
1
      enum x86_CPUID_1_bits : uint64_t {
80
1
         RDTSC = (1ULL << 4),
81
1
         SSE2 = (1ULL << 26),
82
1
         CLMUL = (1ULL << 33),
83
1
         SSSE3 = (1ULL << 41),
84
1
         AESNI = (1ULL << 57),
85
1
         OSXSAVE = (1ULL << 59),
86
1
         AVX = (1ULL << 60),
87
1
         RDRAND = (1ULL << 62)
88
1
      };
89
90
1
      if(flags0 & x86_CPUID_1_bits::RDTSC) {
91
1
         features_detected |= CPUID::CPUID_RDTSC_BIT;
92
1
      }
93
1
      if(flags0 & x86_CPUID_1_bits::SSE2) {
94
1
         features_detected |= CPUID::CPUID_SSE2_BIT;
95
1
      }
96
1
      if(flags0 & x86_CPUID_1_bits::CLMUL) {
97
1
         features_detected |= CPUID::CPUID_CLMUL_BIT;
98
1
      }
99
1
      if(flags0 & x86_CPUID_1_bits::SSSE3) {
100
1
         features_detected |= CPUID::CPUID_SSSE3_BIT;
101
1
      }
102
1
      if(flags0 & x86_CPUID_1_bits::AESNI) {
103
1
         features_detected |= CPUID::CPUID_AESNI_BIT;
104
1
      }
105
1
      if(flags0 & x86_CPUID_1_bits::RDRAND) {
106
1
         features_detected |= CPUID::CPUID_RDRAND_BIT;
107
1
      }
108
109
1
      if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) {
110
1
         const uint64_t xcr_flags = xgetbv();
111
1
         if((xcr_flags & 0x6) == 0x6) {
112
1
            has_os_ymm_support = true;
113
1
            has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
114
1
         }
115
1
      }
116
1
   }
117
118
1
   if(max_supported_sublevel >= 7) {
119
1
      clear_mem(cpuid, 4);
120
1
      invoke_cpuid_sublevel(7, 0, cpuid);
121
122
1
      enum x86_CPUID_7_bits : uint64_t {
123
1
         BMI1 = (1ULL << 3),
124
1
         AVX2 = (1ULL << 5),
125
1
         BMI2 = (1ULL << 8),
126
1
         AVX512_F = (1ULL << 16),
127
1
         AVX512_DQ = (1ULL << 17),
128
1
         RDSEED = (1ULL << 18),
129
1
         ADX = (1ULL << 19),
130
1
         AVX512_IFMA = (1ULL << 21),
131
1
         SHA = (1ULL << 29),
132
1
         AVX512_BW = (1ULL << 30),
133
1
         AVX512_VL = (1ULL << 31),
134
1
         AVX512_VBMI = (1ULL << 33),
135
1
         AVX512_VBMI2 = (1ULL << 38),
136
1
         AVX512_VAES = (1ULL << 41),
137
1
         AVX512_VCLMUL = (1ULL << 42),
138
1
         AVX512_VBITALG = (1ULL << 44),
139
1
      };
140
141
1
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
142
143
1
      if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) {
144
1
         features_detected |= CPUID::CPUID_AVX2_BIT;
145
1
      }
146
1
      if(flags7 & x86_CPUID_7_bits::RDSEED) {
147
1
         features_detected |= CPUID::CPUID_RDSEED_BIT;
148
1
      }
149
1
      if(flags7 & x86_CPUID_7_bits::ADX) {
150
1
         features_detected |= CPUID::CPUID_ADX_BIT;
151
1
      }
152
1
      if(flags7 & x86_CPUID_7_bits::SHA) {
153
0
         features_detected |= CPUID::CPUID_SHA_BIT;
154
0
      }
155
156
      /*
157
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
158
      typically we want to use both extensions in the same code.
159
      */
160
1
      if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) {
161
1
         features_detected |= CPUID::CPUID_BMI_BIT;
162
1
      }
163
164
1
      if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) {
165
0
         const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ |
166
0
                                               x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW |
167
0
                                               x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI |
168
0
                                               x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG;
169
170
         /*
171
         We only enable AVX512 support if all of the above flags are available
172
173
         This is more than we strictly need for most uses, however it also has
174
         the effect of preventing execution of AVX512 codepaths on cores that
175
         have serious downclocking problems when AVX512 code executes,
176
         especially Intel Skylake.
177
178
         VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
179
         Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
180
         executing AVX512.
181
182
         There is nothing stopping some future processor from supporting the
183
         above flags and having AVX512 penalties, but maybe you should not have
184
         bought such a processor.
185
         */
186
0
         if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) {
187
0
            features_detected |= CPUID::CPUID_AVX512_BIT;
188
189
0
            if(flags7 & x86_CPUID_7_bits::AVX512_VAES) {
190
0
               features_detected |= CPUID::CPUID_AVX512_AES_BIT;
191
0
            }
192
0
            if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) {
193
0
               features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
194
0
            }
195
0
         }
196
0
      }
197
1
   }
198
199
   /*
200
   * If we don't have access to CPUID, we can still safely assume that
201
   * any x86-64 processor has SSE2 and RDTSC
202
   */
203
1
   #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
204
1
   if(features_detected == 0) {
205
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
206
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
207
0
   }
208
1
   #endif
209
210
1
   return features_detected;
211
1
}
212
213
#endif
214
215
}  // namespace Botan