Coverage Report

Created: 2022-06-23 06:44

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
#include <botan/mem_ops.h>
10
#include <botan/internal/loadstor.h>
11
12
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15
  #include <intrin.h>
16
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17
  #include <ia32intrin.h>
18
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19
  #include <cpuid.h>
20
#endif
21
22
#endif
23
24
namespace Botan {
25
26
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
28
namespace {
29
30
void invoke_cpuid(uint32_t type, uint32_t out[4])
31
22
   {
32
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
33
   __cpuid((int*)out, type);
34
35
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
36
22
   __get_cpuid(type, out, out+1, out+2, out+3);
37
38
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
39
   asm("cpuid\n\t"
40
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])
41
       : "0" (type));
42
43
#else
44
   #warning "No way of calling x86 cpuid instruction for this compiler"
45
   clear_mem(out, 4);
46
#endif
47
22
   }
48
49
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4])
50
11
   {
51
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
52
   __cpuidex((int*)out, type, level);
53
54
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
55
11
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
56
57
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
58
   asm("cpuid\n\t"
59
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])     \
60
       : "0" (type), "2" (level));
61
62
#else
63
   #warning "No way of calling x86 cpuid instruction for this compiler"
64
   clear_mem(out, 4);
65
#endif
66
11
   }
67
68
}
69
70
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
71
11
   {
72
11
   uint64_t features_detected = 0;
73
11
   uint32_t cpuid[4] = { 0 };
74
11
   bool has_avx = false;
75
76
   // CPUID 0: vendor identification, max sublevel
77
11
   invoke_cpuid(0, cpuid);
78
79
11
   const uint32_t max_supported_sublevel = cpuid[0];
80
81
11
   const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
82
11
   const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
83
11
   const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
84
11
   const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
85
86
11
   if(max_supported_sublevel >= 1)
87
11
      {
88
      // CPUID 1: feature bits
89
11
      invoke_cpuid(1, cpuid);
90
11
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
91
92
11
      enum x86_CPUID_1_bits : uint64_t {
93
11
         RDTSC = (1ULL << 4),
94
11
         SSE2 = (1ULL << 26),
95
11
         CLMUL = (1ULL << 33),
96
11
         SSSE3 = (1ULL << 41),
97
11
         SSE41 = (1ULL << 51),
98
11
         SSE42 = (1ULL << 52),
99
11
         AESNI = (1ULL << 57),
100
11
         OSXSAVE = (1ULL << 59),
101
11
         AVX = (1ULL << 60),
102
11
         RDRAND = (1ULL << 62)
103
11
      };
104
105
11
      if(flags0 & x86_CPUID_1_bits::RDTSC)
106
11
         features_detected |= CPUID::CPUID_RDTSC_BIT;
107
11
      if(flags0 & x86_CPUID_1_bits::SSE2)
108
11
         features_detected |= CPUID::CPUID_SSE2_BIT;
109
11
      if(flags0 & x86_CPUID_1_bits::CLMUL)
110
11
         features_detected |= CPUID::CPUID_CLMUL_BIT;
111
11
      if(flags0 & x86_CPUID_1_bits::SSSE3)
112
11
         features_detected |= CPUID::CPUID_SSSE3_BIT;
113
11
      if(flags0 & x86_CPUID_1_bits::SSE41)
114
11
         features_detected |= CPUID::CPUID_SSE41_BIT;
115
11
      if(flags0 & x86_CPUID_1_bits::SSE42)
116
11
         features_detected |= CPUID::CPUID_SSE42_BIT;
117
11
      if(flags0 & x86_CPUID_1_bits::AESNI)
118
11
         features_detected |= CPUID::CPUID_AESNI_BIT;
119
11
      if(flags0 & x86_CPUID_1_bits::RDRAND)
120
11
         features_detected |= CPUID::CPUID_RDRAND_BIT;
121
11
      if((flags0 & x86_CPUID_1_bits::AVX) &&
122
11
         (flags0 & x86_CPUID_1_bits::OSXSAVE))
123
11
         has_avx = true;
124
11
      }
125
126
11
   if(is_intel)
127
11
      {
128
      // Intel cache line size is in cpuid(1) output
129
11
      *cache_line_size = 8 * get_byte<2>(cpuid[1]);
130
11
      }
131
0
   else if(is_amd)
132
0
      {
133
      // AMD puts it in vendor zone
134
0
      invoke_cpuid(0x80000005, cpuid);
135
0
      *cache_line_size = get_byte<3>(cpuid[2]);
136
0
      }
137
138
11
   if(max_supported_sublevel >= 7)
139
11
      {
140
11
      clear_mem(cpuid, 4);
141
11
      invoke_cpuid_sublevel(7, 0, cpuid);
142
143
11
      enum x86_CPUID_7_bits : uint64_t {
144
11
         BMI1 = (1ULL << 3),
145
11
         AVX2 = (1ULL << 5),
146
11
         BMI2 = (1ULL << 8),
147
11
         AVX512_F = (1ULL << 16),
148
11
         AVX512_DQ = (1ULL << 17),
149
11
         RDSEED = (1ULL << 18),
150
11
         ADX = (1ULL << 19),
151
11
         AVX512_IFMA = (1ULL << 21),
152
11
         SHA = (1ULL << 29),
153
11
         AVX512_BW = (1ULL << 30),
154
11
         AVX512_VL = (1ULL << 31),
155
11
         AVX512_VBMI = (1ULL << 33),
156
11
         AVX512_VBMI2 = (1ULL << 38),
157
11
         AVX512_VAES = (1ULL << 41),
158
11
         AVX512_VCLMUL = (1ULL << 42),
159
11
         AVX512_VBITALG = (1ULL << 44),
160
11
      };
161
162
11
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
163
164
11
      if((flags7 & x86_CPUID_7_bits::AVX2) && has_avx)
165
11
         features_detected |= CPUID::CPUID_AVX2_BIT;
166
11
      if(flags7 & x86_CPUID_7_bits::BMI1)
167
11
         {
168
11
         features_detected |= CPUID::CPUID_BMI1_BIT;
169
         /*
170
         We only set the BMI2 bit if BMI1 is also supported, so BMI2
171
         code can safely use both extensions. No known processor
172
         implements BMI2 but not BMI1.
173
         */
174
11
         if(flags7 & x86_CPUID_7_bits::BMI2)
175
11
            {
176
11
            features_detected |= CPUID::CPUID_BMI2_BIT;
177
178
            /*
179
            Up until Zen3, AMD CPUs with BMI2 support had microcoded
180
            pdep/pext, which works but is very slow.
181
182
            TODO: check for Zen3 here
183
            */
184
11
            if(is_intel)
185
11
               {
186
11
               features_detected |= CPUID::CPUID_FAST_PDEP_BIT;
187
11
               }
188
11
            }
189
11
         }
190
191
11
      if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_avx)
192
0
         {
193
0
         features_detected |= CPUID::CPUID_AVX512F_BIT;
194
195
0
         if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
196
0
            features_detected |= CPUID::CPUID_AVX512DQ_BIT;
197
0
         if(flags7 & x86_CPUID_7_bits::AVX512_BW)
198
0
            features_detected |= CPUID::CPUID_AVX512BW_BIT;
199
200
0
         const uint64_t ICELAKE_FLAGS =
201
0
            x86_CPUID_7_bits::AVX512_F |
202
0
            x86_CPUID_7_bits::AVX512_DQ |
203
0
            x86_CPUID_7_bits::AVX512_IFMA |
204
0
            x86_CPUID_7_bits::AVX512_BW |
205
0
            x86_CPUID_7_bits::AVX512_VL |
206
0
            x86_CPUID_7_bits::AVX512_VBMI |
207
0
            x86_CPUID_7_bits::AVX512_VBMI2 |
208
0
            x86_CPUID_7_bits::AVX512_VBITALG;
209
210
0
         if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
211
0
            features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
212
213
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
214
0
            features_detected |= CPUID::CPUID_AVX512_AES_BIT;
215
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
216
0
            features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
217
0
         }
218
219
11
      if(flags7 & x86_CPUID_7_bits::RDSEED)
220
11
         features_detected |= CPUID::CPUID_RDSEED_BIT;
221
11
      if(flags7 & x86_CPUID_7_bits::ADX)
222
11
         features_detected |= CPUID::CPUID_ADX_BIT;
223
11
      if(flags7 & x86_CPUID_7_bits::SHA)
224
0
         features_detected |= CPUID::CPUID_SHA_BIT;
225
11
      }
226
227
   /*
228
   * If we don't have access to CPUID, we can still safely assume that
229
   * any x86-64 processor has SSE2 and RDTSC
230
   */
231
11
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
232
11
   if(features_detected == 0)
233
0
      {
234
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
235
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
236
0
      }
237
11
#endif
238
239
11
   return features_detected;
240
11
   }
241
242
#endif
243
244
}