Coverage Report

Created: 2022-11-24 06:56

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
#include <botan/mem_ops.h>
10
#include <botan/internal/loadstor.h>
11
12
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15
  #include <intrin.h>
16
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17
  #include <ia32intrin.h>
18
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19
  #include <cpuid.h>
20
#endif
21
22
#endif
23
24
namespace Botan {
25
26
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
28
namespace {
29
30
void invoke_cpuid(uint32_t type, uint32_t out[4])
31
24
   {
32
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
33
   __cpuid((int*)out, type);
34
35
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
36
24
   __get_cpuid(type, out, out+1, out+2, out+3);
37
38
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
39
   asm("cpuid\n\t"
40
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])
41
       : "0" (type));
42
43
#else
44
   #warning "No way of calling x86 cpuid instruction for this compiler"
45
   clear_mem(out, 4);
46
#endif
47
24
   }
48
49
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4])
50
12
   {
51
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
52
   __cpuidex((int*)out, type, level);
53
54
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
55
12
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
56
57
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
58
   asm("cpuid\n\t"
59
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])     \
60
       : "0" (type), "2" (level));
61
62
#else
63
   #warning "No way of calling x86 cpuid instruction for this compiler"
64
   clear_mem(out, 4);
65
#endif
66
12
   }
67
68
}
69
70
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
71
12
   {
72
12
   uint64_t features_detected = 0;
73
12
   uint32_t cpuid[4] = { 0 };
74
12
   bool has_avx = false;
75
76
   // CPUID 0: vendor identification, max sublevel
77
12
   invoke_cpuid(0, cpuid);
78
79
12
   const uint32_t max_supported_sublevel = cpuid[0];
80
81
12
   const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
82
12
   const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
83
12
   const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
84
12
   const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
85
86
12
   if(max_supported_sublevel >= 1)
87
12
      {
88
      // CPUID 1: feature bits
89
12
      invoke_cpuid(1, cpuid);
90
12
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
91
92
12
      enum x86_CPUID_1_bits : uint64_t {
93
12
         RDTSC = (1ULL << 4),
94
12
         SSE2 = (1ULL << 26),
95
12
         CLMUL = (1ULL << 33),
96
12
         SSSE3 = (1ULL << 41),
97
12
         SSE41 = (1ULL << 51),
98
12
         SSE42 = (1ULL << 52),
99
12
         AESNI = (1ULL << 57),
100
12
         OSXSAVE = (1ULL << 59),
101
12
         AVX = (1ULL << 60),
102
12
         RDRAND = (1ULL << 62)
103
12
      };
104
105
12
      if(flags0 & x86_CPUID_1_bits::RDTSC)
106
12
         features_detected |= CPUID::CPUID_RDTSC_BIT;
107
12
      if(flags0 & x86_CPUID_1_bits::SSE2)
108
12
         features_detected |= CPUID::CPUID_SSE2_BIT;
109
12
      if(flags0 & x86_CPUID_1_bits::CLMUL)
110
12
         features_detected |= CPUID::CPUID_CLMUL_BIT;
111
12
      if(flags0 & x86_CPUID_1_bits::SSSE3)
112
12
         features_detected |= CPUID::CPUID_SSSE3_BIT;
113
12
      if(flags0 & x86_CPUID_1_bits::SSE41)
114
12
         features_detected |= CPUID::CPUID_SSE41_BIT;
115
12
      if(flags0 & x86_CPUID_1_bits::SSE42)
116
12
         features_detected |= CPUID::CPUID_SSE42_BIT;
117
12
      if(flags0 & x86_CPUID_1_bits::AESNI)
118
12
         features_detected |= CPUID::CPUID_AESNI_BIT;
119
12
      if(flags0 & x86_CPUID_1_bits::RDRAND)
120
12
         features_detected |= CPUID::CPUID_RDRAND_BIT;
121
12
      if((flags0 & x86_CPUID_1_bits::AVX) &&
122
12
         (flags0 & x86_CPUID_1_bits::OSXSAVE))
123
12
         has_avx = true;
124
12
      }
125
126
12
   if(is_intel)
127
12
      {
128
      // Intel cache line size is in cpuid(1) output
129
12
      *cache_line_size = 8 * get_byte<2>(cpuid[1]);
130
12
      }
131
0
   else if(is_amd)
132
0
      {
133
      // AMD puts it in vendor zone
134
0
      invoke_cpuid(0x80000005, cpuid);
135
0
      *cache_line_size = get_byte<3>(cpuid[2]);
136
0
      }
137
138
12
   if(max_supported_sublevel >= 7)
139
12
      {
140
12
      clear_mem(cpuid, 4);
141
12
      invoke_cpuid_sublevel(7, 0, cpuid);
142
143
12
      enum x86_CPUID_7_bits : uint64_t {
144
12
         BMI1 = (1ULL << 3),
145
12
         AVX2 = (1ULL << 5),
146
12
         BMI2 = (1ULL << 8),
147
12
         AVX512_F = (1ULL << 16),
148
12
         AVX512_DQ = (1ULL << 17),
149
12
         RDSEED = (1ULL << 18),
150
12
         ADX = (1ULL << 19),
151
12
         AVX512_IFMA = (1ULL << 21),
152
12
         SHA = (1ULL << 29),
153
12
         AVX512_BW = (1ULL << 30),
154
12
         AVX512_VL = (1ULL << 31),
155
12
         AVX512_VBMI = (1ULL << 33),
156
12
         AVX512_VBMI2 = (1ULL << 38),
157
12
         AVX512_VAES = (1ULL << 41),
158
12
         AVX512_VCLMUL = (1ULL << 42),
159
12
         AVX512_VBITALG = (1ULL << 44),
160
12
      };
161
162
12
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
163
164
12
      if((flags7 & x86_CPUID_7_bits::AVX2) && has_avx)
165
12
         features_detected |= CPUID::CPUID_AVX2_BIT;
166
12
      if(flags7 & x86_CPUID_7_bits::BMI1)
167
12
         {
168
12
         features_detected |= CPUID::CPUID_BMI1_BIT;
169
         /*
170
         We only set the BMI2 bit if BMI1 is also supported, so BMI2
171
         code can safely use both extensions. No known processor
172
         implements BMI2 but not BMI1.
173
         */
174
12
         if(flags7 & x86_CPUID_7_bits::BMI2)
175
12
            {
176
12
            features_detected |= CPUID::CPUID_BMI2_BIT;
177
178
            /*
179
            Up until Zen3, AMD CPUs with BMI2 support had microcoded
180
            pdep/pext, which works but is very slow.
181
182
            TODO: check for Zen3 here
183
            */
184
12
            if(is_intel)
185
12
               {
186
12
               features_detected |= CPUID::CPUID_FAST_PDEP_BIT;
187
12
               }
188
12
            }
189
12
         }
190
191
12
      if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_avx)
192
0
         {
193
0
         features_detected |= CPUID::CPUID_AVX512F_BIT;
194
195
0
         if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
196
0
            features_detected |= CPUID::CPUID_AVX512DQ_BIT;
197
0
         if(flags7 & x86_CPUID_7_bits::AVX512_BW)
198
0
            features_detected |= CPUID::CPUID_AVX512BW_BIT;
199
200
0
         const uint64_t ICELAKE_FLAGS =
201
0
            x86_CPUID_7_bits::AVX512_F |
202
0
            x86_CPUID_7_bits::AVX512_DQ |
203
0
            x86_CPUID_7_bits::AVX512_IFMA |
204
0
            x86_CPUID_7_bits::AVX512_BW |
205
0
            x86_CPUID_7_bits::AVX512_VL |
206
0
            x86_CPUID_7_bits::AVX512_VBMI |
207
0
            x86_CPUID_7_bits::AVX512_VBMI2 |
208
0
            x86_CPUID_7_bits::AVX512_VBITALG;
209
210
0
         if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
211
0
            features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
212
213
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
214
0
            features_detected |= CPUID::CPUID_AVX512_AES_BIT;
215
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
216
0
            features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
217
0
         }
218
219
12
      if(flags7 & x86_CPUID_7_bits::RDSEED)
220
12
         features_detected |= CPUID::CPUID_RDSEED_BIT;
221
12
      if(flags7 & x86_CPUID_7_bits::ADX)
222
12
         features_detected |= CPUID::CPUID_ADX_BIT;
223
12
      if(flags7 & x86_CPUID_7_bits::SHA)
224
0
         features_detected |= CPUID::CPUID_SHA_BIT;
225
12
      }
226
227
   /*
228
   * If we don't have access to CPUID, we can still safely assume that
229
   * any x86-64 processor has SSE2 and RDTSC
230
   */
231
12
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
232
12
   if(features_detected == 0)
233
0
      {
234
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
235
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
236
0
      }
237
12
#endif
238
239
12
   return features_detected;
240
12
   }
241
242
#endif
243
244
}