Coverage Report

Created: 2025-03-13 06:40

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
10
#include <botan/mem_ops.h>
11
#include <botan/internal/loadstor.h>
12
#include <botan/internal/target_info.h>
13
14
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
15
   #include <immintrin.h>
16
#endif
17
18
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
19
   #include <intrin.h>
20
#endif
21
22
namespace Botan {
23
24
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
25
26
namespace {
27
28
26
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
29
26
   clear_mem(out, 4);
30
31
26
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
32
26
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
33
34
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
35
   __cpuid((int*)out, type);
36
37
   #else
38
   BOTAN_UNUSED(type);
39
      #warning "No way of calling x86 cpuid instruction for this compiler"
40
   #endif
41
26
}
42
43
26
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
44
26
   clear_mem(out, 4);
45
46
26
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
47
26
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
48
49
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
50
   __cpuidex((int*)out, type, level);
51
52
   #else
53
   BOTAN_UNUSED(type, level);
54
      #warning "No way of calling x86 cpuid instruction for this compiler"
55
   #endif
56
26
}
57
58
13
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
59
13
   return _xgetbv(0);
60
13
}
61
62
}  // namespace
63
64
13
uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
65
13
   enum class x86_CPUID_1_bits : uint64_t {
66
13
      RDTSC = (1ULL << 4),
67
13
      SSE2 = (1ULL << 26),
68
13
      CLMUL = (1ULL << 33),
69
13
      SSSE3 = (1ULL << 41),
70
13
      SSE41 = (1ULL << 51),
71
13
      AESNI = (1ULL << 57),
72
      // AVX + OSXSAVE
73
13
      OSXSAVE = (1ULL << 59) | (1ULL << 60),
74
13
      RDRAND = (1ULL << 62)
75
13
   };
76
77
13
   enum class x86_CPUID_7_bits : uint64_t {
78
13
      BMI1 = (1ULL << 3),
79
13
      AVX2 = (1ULL << 5),
80
13
      BMI2 = (1ULL << 8),
81
13
      BMI_1_AND_2 = BMI1 | BMI2,
82
13
      AVX512_F = (1ULL << 16),
83
13
      AVX512_DQ = (1ULL << 17),
84
13
      RDSEED = (1ULL << 18),
85
13
      ADX = (1ULL << 19),
86
13
      AVX512_IFMA = (1ULL << 21),
87
13
      SHA = (1ULL << 29),
88
13
      AVX512_BW = (1ULL << 30),
89
13
      AVX512_VL = (1ULL << 31),
90
13
      AVX512_VBMI = (1ULL << 33),
91
13
      AVX512_VBMI2 = (1ULL << 38),
92
13
      GFNI = (1ULL << 40),
93
13
      AVX512_VAES = (1ULL << 41),
94
13
      AVX512_VCLMUL = (1ULL << 42),
95
13
      AVX512_VBITALG = (1ULL << 44),
96
97
      /*
98
      We only enable AVX512 support if all of the below flags are available
99
100
      This is more than we strictly need for most uses, however it also has
101
      the effect of preventing execution of AVX512 codepaths on cores that
102
      have serious downclocking problems when AVX512 code executes,
103
      especially Intel Skylake.
104
105
      VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
106
      Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
107
      executing AVX512.
108
109
      There is nothing stopping some future processor from supporting the
110
      above flags and having AVX512 penalties, but maybe you should not have
111
      bought such a processor.
112
      */
113
13
      AVX512_PROFILE =
114
13
         AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
115
13
   };
116
117
   // NOLINTNEXTLINE(performance-enum-size)
118
13
   enum class x86_CPUID_7_1_bits : uint64_t {
119
13
      SHA512 = (1 << 0),
120
13
      SM3 = (1 << 1),
121
13
      SM4 = (1 << 2),
122
13
   };
123
124
13
   uint32_t feat = 0;
125
13
   uint32_t cpuid[4] = {0};
126
13
   bool has_os_ymm_support = false;
127
13
   bool has_os_zmm_support = false;
128
129
   // CPUID 0: vendor identification, max sublevel
130
13
   invoke_cpuid(0, cpuid);
131
132
13
   const uint32_t max_supported_sublevel = cpuid[0];
133
134
13
   if(max_supported_sublevel >= 1) {
135
      // CPUID 1: feature bits
136
13
      invoke_cpuid(1, cpuid);
137
13
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
138
139
13
      feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUID::CPUID_RDTSC_BIT, allowed);
140
141
13
      feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUID::CPUID_RDRAND_BIT, allowed);
142
143
13
      feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUID::CPUID_SSE2_BIT, allowed);
144
145
13
      if(feat & CPUID::CPUID_SSE2_BIT) {
146
13
         feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUID::CPUID_SSSE3_BIT, allowed);
147
148
13
         if(feat & CPUID::CPUID_SSSE3_BIT) {
149
13
            feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUID::CPUID_CLMUL_BIT, allowed);
150
13
            feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUID::CPUID_AESNI_BIT, allowed);
151
13
         }
152
153
13
         const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
154
13
         if((flags0 & osxsave64) == osxsave64) {
155
13
            const uint64_t xcr_flags = xgetbv();
156
13
            if((xcr_flags & 0x6) == 0x6) {
157
13
               has_os_ymm_support = true;
158
13
               has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
159
13
            }
160
13
         }
161
13
      }
162
13
   }
163
164
13
   if(max_supported_sublevel >= 7) {
165
13
      clear_mem(cpuid, 4);
166
13
      invoke_cpuid_sublevel(7, 0, cpuid);
167
168
13
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
169
170
13
      clear_mem(cpuid, 4);
171
13
      invoke_cpuid_sublevel(7, 1, cpuid);
172
13
      const uint32_t flags7_1 = cpuid[0];
173
174
13
      feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUID::CPUID_RDSEED_BIT, allowed);
175
13
      feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUID::CPUID_ADX_BIT, allowed);
176
177
      /*
178
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
179
      typically we want to use both extensions in the same code.
180
      */
181
13
      feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUID::CPUID_BMI_BIT, allowed);
182
183
13
      if(feat & CPUID::CPUID_SSSE3_BIT) {
184
13
         feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUID::CPUID_SHA_BIT, allowed);
185
13
         feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUID::CPUID_SM3_BIT, allowed);
186
13
      }
187
188
13
      if(has_os_ymm_support) {
189
13
         feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUID::CPUID_AVX2_BIT, allowed);
190
191
13
         if(feat & CPUID::CPUID_AVX2_BIT) {
192
13
            feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUID::CPUID_GFNI_BIT, allowed);
193
13
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX2_AES_BIT, allowed);
194
13
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX2_CLMUL_BIT, allowed);
195
13
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUID::CPUID_SHA512_BIT, allowed);
196
13
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUID::CPUID_SM4_BIT, allowed);
197
198
13
            if(has_os_zmm_support) {
199
0
               feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUID::CPUID_AVX512_BIT, allowed);
200
201
0
               if(feat & CPUID::CPUID_AVX512_BIT) {
202
0
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX512_AES_BIT, allowed);
203
0
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX512_CLMUL_BIT, allowed);
204
0
               }
205
0
            }
206
13
         }
207
13
      }
208
13
   }
209
210
   /*
211
   * If we don't have access to CPUID, we can still safely assume that
212
   * any x86-64 processor has SSE2 and RDTSC
213
   */
214
13
   #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
215
13
   if(feat == 0) {
216
0
      feat |= CPUID::CPUID_SSE2_BIT & allowed;
217
0
      feat |= CPUID::CPUID_RDTSC_BIT & allowed;
218
0
   }
219
13
   #endif
220
221
13
   return feat;
222
13
}
223
224
#endif
225
226
}  // namespace Botan