Coverage Report

Created: 2021-05-04 09:02

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
#include <botan/mem_ops.h>
10
#include <botan/internal/loadstor.h>
11
12
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15
  #include <intrin.h>
16
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17
  #include <ia32intrin.h>
18
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19
  #include <cpuid.h>
20
#endif
21
22
#endif
23
24
namespace Botan {
25
26
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
28
namespace {
29
30
void invoke_cpuid(uint32_t type, uint32_t out[4])
31
22
   {
32
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
33
   __cpuid((int*)out, type);
34
35
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
36
   __cpuid(out, type);
37
38
#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39
22
   asm("cpuid\n\t"
40
22
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])
41
22
       : "0" (type));
42
43
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
44
   __get_cpuid(type, out, out+1, out+2, out+3);
45
#else
46
   #warning "No way of calling x86 cpuid instruction for this compiler"
47
   clear_mem(out, 4);
48
#endif
49
22
   }
50
51
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4])
52
11
   {
53
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
54
   __cpuidex((int*)out, type, level);
55
56
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
57
   __cpuidex((int*)out, type, level);
58
59
#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
60
11
   asm("cpuid\n\t"
61
11
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])     \
62
11
       : "0" (type), "2" (level));
63
64
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
65
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
66
#else
67
   #warning "No way of calling x86 cpuid instruction for this compiler"
68
   clear_mem(out, 4);
69
#endif
70
11
   }
71
72
}
73
74
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
75
11
   {
76
11
   uint64_t features_detected = 0;
77
11
   uint32_t cpuid[4] = { 0 };
78
79
   // CPUID 0: vendor identification, max sublevel
80
11
   invoke_cpuid(0, cpuid);
81
82
11
   const uint32_t max_supported_sublevel = cpuid[0];
83
84
11
   const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
85
11
   const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
86
11
   const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
87
11
   const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
88
89
11
   if(max_supported_sublevel >= 1)
90
11
      {
91
      // CPUID 1: feature bits
92
11
      invoke_cpuid(1, cpuid);
93
11
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
94
95
11
      enum x86_CPUID_1_bits : uint64_t {
96
11
         RDTSC = (1ULL << 4),
97
11
         SSE2 = (1ULL << 26),
98
11
         CLMUL = (1ULL << 33),
99
11
         SSSE3 = (1ULL << 41),
100
11
         SSE41 = (1ULL << 51),
101
11
         SSE42 = (1ULL << 52),
102
11
         AESNI = (1ULL << 57),
103
11
         RDRAND = (1ULL << 62)
104
11
      };
105
106
11
      if(flags0 & x86_CPUID_1_bits::RDTSC)
107
11
         features_detected |= CPUID::CPUID_RDTSC_BIT;
108
11
      if(flags0 & x86_CPUID_1_bits::SSE2)
109
11
         features_detected |= CPUID::CPUID_SSE2_BIT;
110
11
      if(flags0 & x86_CPUID_1_bits::CLMUL)
111
11
         features_detected |= CPUID::CPUID_CLMUL_BIT;
112
11
      if(flags0 & x86_CPUID_1_bits::SSSE3)
113
11
         features_detected |= CPUID::CPUID_SSSE3_BIT;
114
11
      if(flags0 & x86_CPUID_1_bits::SSE41)
115
11
         features_detected |= CPUID::CPUID_SSE41_BIT;
116
11
      if(flags0 & x86_CPUID_1_bits::SSE42)
117
11
         features_detected |= CPUID::CPUID_SSE42_BIT;
118
11
      if(flags0 & x86_CPUID_1_bits::AESNI)
119
11
         features_detected |= CPUID::CPUID_AESNI_BIT;
120
11
      if(flags0 & x86_CPUID_1_bits::RDRAND)
121
11
         features_detected |= CPUID::CPUID_RDRAND_BIT;
122
11
      }
123
124
11
   if(is_intel)
125
11
      {
126
      // Intel cache line size is in cpuid(1) output
127
11
      *cache_line_size = 8 * get_byte<2>(cpuid[1]);
128
11
      }
129
0
   else if(is_amd)
130
0
      {
131
      // AMD puts it in vendor zone
132
0
      invoke_cpuid(0x80000005, cpuid);
133
0
      *cache_line_size = get_byte<3>(cpuid[2]);
134
0
      }
135
136
11
   if(max_supported_sublevel >= 7)
137
11
      {
138
11
      clear_mem(cpuid, 4);
139
11
      invoke_cpuid_sublevel(7, 0, cpuid);
140
141
11
      enum x86_CPUID_7_bits : uint64_t {
142
11
         BMI1 = (1ULL << 3),
143
11
         AVX2 = (1ULL << 5),
144
11
         BMI2 = (1ULL << 8),
145
11
         AVX512_F = (1ULL << 16),
146
11
         AVX512_DQ = (1ULL << 17),
147
11
         RDSEED = (1ULL << 18),
148
11
         ADX = (1ULL << 19),
149
11
         AVX512_IFMA = (1ULL << 21),
150
11
         SHA = (1ULL << 29),
151
11
         AVX512_BW = (1ULL << 30),
152
11
         AVX512_VL = (1ULL << 31),
153
11
         AVX512_VBMI = (1ULL << 33),
154
11
         AVX512_VBMI2 = (1ULL << 38),
155
11
         AVX512_VAES = (1ULL << 41),
156
11
         AVX512_VCLMUL = (1ULL << 42),
157
11
         AVX512_VBITALG = (1ULL << 44),
158
11
      };
159
160
11
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
161
162
11
      if(flags7 & x86_CPUID_7_bits::AVX2)
163
11
         features_detected |= CPUID::CPUID_AVX2_BIT;
164
11
      if(flags7 & x86_CPUID_7_bits::BMI1)
165
11
         {
166
11
         features_detected |= CPUID::CPUID_BMI1_BIT;
167
         /*
168
         We only set the BMI2 bit if BMI1 is also supported, so BMI2
169
         code can safely use both extensions. No known processor
170
         implements BMI2 but not BMI1.
171
         */
172
11
         if(flags7 & x86_CPUID_7_bits::BMI2)
173
11
            {
174
11
            features_detected |= CPUID::CPUID_BMI2_BIT;
175
176
            /*
177
            Up until Zen3, AMD CPUs with BMI2 support had microcoded
178
            pdep/pext, which works but is very slow.
179
180
            TODO: check for Zen3 here
181
            */
182
11
            if(is_intel)
183
11
               {
184
11
               features_detected |= CPUID::CPUID_FAST_PDEP_BIT;
185
11
               }
186
11
            }
187
11
         }
188
189
11
      if(flags7 & x86_CPUID_7_bits::AVX512_F)
190
0
         {
191
0
         features_detected |= CPUID::CPUID_AVX512F_BIT;
192
193
0
         if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
194
0
            features_detected |= CPUID::CPUID_AVX512DQ_BIT;
195
0
         if(flags7 & x86_CPUID_7_bits::AVX512_BW)
196
0
            features_detected |= CPUID::CPUID_AVX512BW_BIT;
197
198
0
         const uint64_t ICELAKE_FLAGS =
199
0
            x86_CPUID_7_bits::AVX512_F |
200
0
            x86_CPUID_7_bits::AVX512_DQ |
201
0
            x86_CPUID_7_bits::AVX512_IFMA |
202
0
            x86_CPUID_7_bits::AVX512_BW |
203
0
            x86_CPUID_7_bits::AVX512_VL |
204
0
            x86_CPUID_7_bits::AVX512_VBMI |
205
0
            x86_CPUID_7_bits::AVX512_VBMI2 |
206
0
            x86_CPUID_7_bits::AVX512_VBITALG;
207
208
0
         if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
209
0
            features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
210
211
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
212
0
            features_detected |= CPUID::CPUID_AVX512_AES_BIT;
213
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
214
0
            features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
215
0
         }
216
217
11
      if(flags7 & x86_CPUID_7_bits::RDSEED)
218
0
         features_detected |= CPUID::CPUID_RDSEED_BIT;
219
11
      if(flags7 & x86_CPUID_7_bits::ADX)
220
0
         features_detected |= CPUID::CPUID_ADX_BIT;
221
11
      if(flags7 & x86_CPUID_7_bits::SHA)
222
0
         features_detected |= CPUID::CPUID_SHA_BIT;
223
11
      }
224
225
   /*
226
   * If we don't have access to CPUID, we can still safely assume that
227
   * any x86-64 processor has SSE2 and RDTSC
228
   */
229
11
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
230
11
   if(features_detected == 0)
231
0
      {
232
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
233
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
234
0
      }
235
11
#endif
236
237
11
   return features_detected;
238
11
   }
239
240
#endif
241
242
}