Coverage Report

Created: 2024-11-21 07:03

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
10
#include <botan/mem_ops.h>
11
#include <botan/internal/loadstor.h>
12
13
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
   #include <immintrin.h>
15
#endif
16
17
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18
   #include <intrin.h>
19
#endif
20
21
namespace Botan {
22
23
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
24
25
namespace {
26
27
10
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
28
10
   clear_mem(out, 4);
29
30
10
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
31
10
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
32
33
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
34
   __cpuid((int*)out, type);
35
36
   #else
37
   BOTAN_UNUSED(type);
38
      #warning "No way of calling x86 cpuid instruction for this compiler"
39
   #endif
40
10
}
41
42
10
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
43
10
   clear_mem(out, 4);
44
45
10
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
46
10
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
47
48
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
49
   __cpuidex((int*)out, type, level);
50
51
   #else
52
   BOTAN_UNUSED(type, level);
53
      #warning "No way of calling x86 cpuid instruction for this compiler"
54
   #endif
55
10
}
56
57
5
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
58
5
   return _xgetbv(0);
59
5
}
60
61
}  // namespace
62
63
5
uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
64
5
   enum class x86_CPUID_1_bits : uint64_t {
65
5
      RDTSC = (1ULL << 4),
66
5
      SSE2 = (1ULL << 26),
67
5
      CLMUL = (1ULL << 33),
68
5
      SSSE3 = (1ULL << 41),
69
5
      SSE41 = (1ULL << 51),
70
5
      AESNI = (1ULL << 57),
71
      // AVX + OSXSAVE
72
5
      OSXSAVE = (1ULL << 59) | (1ULL << 60),
73
5
      RDRAND = (1ULL << 62)
74
5
   };
75
76
5
   enum class x86_CPUID_7_bits : uint64_t {
77
5
      BMI1 = (1ULL << 3),
78
5
      AVX2 = (1ULL << 5),
79
5
      BMI2 = (1ULL << 8),
80
5
      BMI_1_AND_2 = BMI1 | BMI2,
81
5
      AVX512_F = (1ULL << 16),
82
5
      AVX512_DQ = (1ULL << 17),
83
5
      RDSEED = (1ULL << 18),
84
5
      ADX = (1ULL << 19),
85
5
      AVX512_IFMA = (1ULL << 21),
86
5
      SHA = (1ULL << 29),
87
5
      AVX512_BW = (1ULL << 30),
88
5
      AVX512_VL = (1ULL << 31),
89
5
      AVX512_VBMI = (1ULL << 33),
90
5
      AVX512_VBMI2 = (1ULL << 38),
91
5
      GFNI = (1ULL << 40),
92
5
      AVX512_VAES = (1ULL << 41),
93
5
      AVX512_VCLMUL = (1ULL << 42),
94
5
      AVX512_VBITALG = (1ULL << 44),
95
96
      /*
97
      We only enable AVX512 support if all of the below flags are available
98
99
      This is more than we strictly need for most uses, however it also has
100
      the effect of preventing execution of AVX512 codepaths on cores that
101
      have serious downclocking problems when AVX512 code executes,
102
      especially Intel Skylake.
103
104
      VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
105
      Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
106
      executing AVX512.
107
108
      There is nothing stopping some future processor from supporting the
109
      above flags and having AVX512 penalties, but maybe you should not have
110
      bought such a processor.
111
      */
112
5
      AVX512_PROFILE =
113
5
         AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
114
5
   };
115
116
   // NOLINTNEXTLINE(performance-enum-size)
117
5
   enum class x86_CPUID_7_1_bits : uint64_t {
118
5
      SHA512 = (1 << 0),
119
5
      SM3 = (1 << 1),
120
5
      SM4 = (1 << 2),
121
5
   };
122
123
5
   uint32_t feat = 0;
124
5
   uint32_t cpuid[4] = {0};
125
5
   bool has_os_ymm_support = false;
126
5
   bool has_os_zmm_support = false;
127
128
   // CPUID 0: vendor identification, max sublevel
129
5
   invoke_cpuid(0, cpuid);
130
131
5
   const uint32_t max_supported_sublevel = cpuid[0];
132
133
5
   if(max_supported_sublevel >= 1) {
134
      // CPUID 1: feature bits
135
5
      invoke_cpuid(1, cpuid);
136
5
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
137
138
5
      feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUID::CPUID_RDTSC_BIT, allowed);
139
140
5
      feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUID::CPUID_RDRAND_BIT, allowed);
141
142
5
      feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUID::CPUID_SSE2_BIT, allowed);
143
144
5
      if(feat & CPUID::CPUID_SSE2_BIT) {
145
5
         feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUID::CPUID_SSSE3_BIT, allowed);
146
147
5
         if(feat & CPUID::CPUID_SSSE3_BIT) {
148
5
            feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUID::CPUID_CLMUL_BIT, allowed);
149
5
            feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUID::CPUID_AESNI_BIT, allowed);
150
5
         }
151
152
5
         const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
153
5
         if((flags0 & osxsave64) == osxsave64) {
154
5
            const uint64_t xcr_flags = xgetbv();
155
5
            if((xcr_flags & 0x6) == 0x6) {
156
5
               has_os_ymm_support = true;
157
5
               has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
158
5
            }
159
5
         }
160
5
      }
161
5
   }
162
163
5
   if(max_supported_sublevel >= 7) {
164
5
      clear_mem(cpuid, 4);
165
5
      invoke_cpuid_sublevel(7, 0, cpuid);
166
167
5
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
168
169
5
      clear_mem(cpuid, 4);
170
5
      invoke_cpuid_sublevel(7, 1, cpuid);
171
5
      const uint32_t flags7_1 = cpuid[0];
172
173
5
      feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUID::CPUID_RDSEED_BIT, allowed);
174
5
      feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUID::CPUID_ADX_BIT, allowed);
175
176
      /*
177
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
178
      typically we want to use both extensions in the same code.
179
      */
180
5
      feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUID::CPUID_BMI_BIT, allowed);
181
182
5
      if(feat & CPUID::CPUID_SSSE3_BIT) {
183
5
         feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUID::CPUID_SHA_BIT, allowed);
184
5
         feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUID::CPUID_SM3_BIT, allowed);
185
5
      }
186
187
5
      if(has_os_ymm_support) {
188
5
         feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUID::CPUID_AVX2_BIT, allowed);
189
190
5
         if(feat & CPUID::CPUID_AVX2_BIT) {
191
5
            feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUID::CPUID_GFNI_BIT, allowed);
192
5
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX2_AES_BIT, allowed);
193
5
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX2_CLMUL_BIT, allowed);
194
5
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUID::CPUID_SHA512_BIT, allowed);
195
5
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUID::CPUID_SM4_BIT, allowed);
196
197
5
            if(has_os_zmm_support) {
198
0
               feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUID::CPUID_AVX512_BIT, allowed);
199
200
0
               if(feat & CPUID::CPUID_AVX512_BIT) {
201
0
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX512_AES_BIT, allowed);
202
0
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX512_CLMUL_BIT, allowed);
203
0
               }
204
0
            }
205
5
         }
206
5
      }
207
5
   }
208
209
   /*
210
   * If we don't have access to CPUID, we can still safely assume that
211
   * any x86-64 processor has SSE2 and RDTSC
212
   */
213
5
   #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
214
5
   if(feat == 0) {
215
0
      feat |= CPUID::CPUID_SSE2_BIT & allowed;
216
0
      feat |= CPUID::CPUID_RDTSC_BIT & allowed;
217
0
   }
218
5
   #endif
219
220
5
   return feat;
221
5
}
222
223
#endif
224
225
}  // namespace Botan