Coverage Report

Created: 2021-10-13 08:49

/src/botan/src/lib/utils/cpuid/cpuid_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/cpuid.h>
9
#include <botan/mem_ops.h>
10
#include <botan/internal/loadstor.h>
11
12
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15
  #include <intrin.h>
16
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17
  #include <ia32intrin.h>
18
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19
  #include <cpuid.h>
20
#endif
21
22
#endif
23
24
namespace Botan {
25
26
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
28
namespace {
29
30
void invoke_cpuid(uint32_t type, uint32_t out[4])
31
22
   {
32
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
33
   __cpuid((int*)out, type);
34
35
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
36
22
   __get_cpuid(type, out, out+1, out+2, out+3);
37
38
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
39
   asm("cpuid\n\t"
40
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])
41
       : "0" (type));
42
43
#else
44
   #warning "No way of calling x86 cpuid instruction for this compiler"
45
   clear_mem(out, 4);
46
#endif
47
22
   }
48
49
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4])
50
11
   {
51
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
52
   __cpuidex((int*)out, type, level);
53
54
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
55
11
   __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
56
57
#elif defined(BOTAN_USE_GCC_INLINE_ASM)
58
   asm("cpuid\n\t"
59
       : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3])     \
60
       : "0" (type), "2" (level));
61
62
#else
63
   #warning "No way of calling x86 cpuid instruction for this compiler"
64
   clear_mem(out, 4);
65
#endif
66
11
   }
67
68
}
69
70
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
71
11
   {
72
11
   uint64_t features_detected = 0;
73
11
   uint32_t cpuid[4] = { 0 };
74
75
   // CPUID 0: vendor identification, max sublevel
76
11
   invoke_cpuid(0, cpuid);
77
78
11
   const uint32_t max_supported_sublevel = cpuid[0];
79
80
11
   const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
81
11
   const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
82
11
   const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
83
11
   const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
84
85
11
   if(max_supported_sublevel >= 1)
86
11
      {
87
      // CPUID 1: feature bits
88
11
      invoke_cpuid(1, cpuid);
89
11
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
90
91
11
      enum x86_CPUID_1_bits : uint64_t {
92
11
         RDTSC = (1ULL << 4),
93
11
         SSE2 = (1ULL << 26),
94
11
         CLMUL = (1ULL << 33),
95
11
         SSSE3 = (1ULL << 41),
96
11
         SSE41 = (1ULL << 51),
97
11
         SSE42 = (1ULL << 52),
98
11
         AESNI = (1ULL << 57),
99
11
         RDRAND = (1ULL << 62)
100
11
      };
101
102
11
      if(flags0 & x86_CPUID_1_bits::RDTSC)
103
11
         features_detected |= CPUID::CPUID_RDTSC_BIT;
104
11
      if(flags0 & x86_CPUID_1_bits::SSE2)
105
11
         features_detected |= CPUID::CPUID_SSE2_BIT;
106
11
      if(flags0 & x86_CPUID_1_bits::CLMUL)
107
11
         features_detected |= CPUID::CPUID_CLMUL_BIT;
108
11
      if(flags0 & x86_CPUID_1_bits::SSSE3)
109
11
         features_detected |= CPUID::CPUID_SSSE3_BIT;
110
11
      if(flags0 & x86_CPUID_1_bits::SSE41)
111
11
         features_detected |= CPUID::CPUID_SSE41_BIT;
112
11
      if(flags0 & x86_CPUID_1_bits::SSE42)
113
11
         features_detected |= CPUID::CPUID_SSE42_BIT;
114
11
      if(flags0 & x86_CPUID_1_bits::AESNI)
115
11
         features_detected |= CPUID::CPUID_AESNI_BIT;
116
11
      if(flags0 & x86_CPUID_1_bits::RDRAND)
117
11
         features_detected |= CPUID::CPUID_RDRAND_BIT;
118
11
      }
119
120
11
   if(is_intel)
121
11
      {
122
      // Intel cache line size is in cpuid(1) output
123
11
      *cache_line_size = 8 * get_byte<2>(cpuid[1]);
124
11
      }
125
0
   else if(is_amd)
126
0
      {
127
      // AMD puts it in vendor zone
128
0
      invoke_cpuid(0x80000005, cpuid);
129
0
      *cache_line_size = get_byte<3>(cpuid[2]);
130
0
      }
131
132
11
   if(max_supported_sublevel >= 7)
133
11
      {
134
11
      clear_mem(cpuid, 4);
135
11
      invoke_cpuid_sublevel(7, 0, cpuid);
136
137
11
      enum x86_CPUID_7_bits : uint64_t {
138
11
         BMI1 = (1ULL << 3),
139
11
         AVX2 = (1ULL << 5),
140
11
         BMI2 = (1ULL << 8),
141
11
         AVX512_F = (1ULL << 16),
142
11
         AVX512_DQ = (1ULL << 17),
143
11
         RDSEED = (1ULL << 18),
144
11
         ADX = (1ULL << 19),
145
11
         AVX512_IFMA = (1ULL << 21),
146
11
         SHA = (1ULL << 29),
147
11
         AVX512_BW = (1ULL << 30),
148
11
         AVX512_VL = (1ULL << 31),
149
11
         AVX512_VBMI = (1ULL << 33),
150
11
         AVX512_VBMI2 = (1ULL << 38),
151
11
         AVX512_VAES = (1ULL << 41),
152
11
         AVX512_VCLMUL = (1ULL << 42),
153
11
         AVX512_VBITALG = (1ULL << 44),
154
11
      };
155
156
11
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
157
158
11
      if(flags7 & x86_CPUID_7_bits::AVX2)
159
11
         features_detected |= CPUID::CPUID_AVX2_BIT;
160
11
      if(flags7 & x86_CPUID_7_bits::BMI1)
161
11
         {
162
11
         features_detected |= CPUID::CPUID_BMI1_BIT;
163
         /*
164
         We only set the BMI2 bit if BMI1 is also supported, so BMI2
165
         code can safely use both extensions. No known processor
166
         implements BMI2 but not BMI1.
167
         */
168
11
         if(flags7 & x86_CPUID_7_bits::BMI2)
169
11
            {
170
11
            features_detected |= CPUID::CPUID_BMI2_BIT;
171
172
            /*
173
            Up until Zen3, AMD CPUs with BMI2 support had microcoded
174
            pdep/pext, which works but is very slow.
175
176
            TODO: check for Zen3 here
177
            */
178
11
            if(is_intel)
179
11
               {
180
11
               features_detected |= CPUID::CPUID_FAST_PDEP_BIT;
181
11
               }
182
11
            }
183
11
         }
184
185
11
      if(flags7 & x86_CPUID_7_bits::AVX512_F)
186
0
         {
187
0
         features_detected |= CPUID::CPUID_AVX512F_BIT;
188
189
0
         if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
190
0
            features_detected |= CPUID::CPUID_AVX512DQ_BIT;
191
0
         if(flags7 & x86_CPUID_7_bits::AVX512_BW)
192
0
            features_detected |= CPUID::CPUID_AVX512BW_BIT;
193
194
0
         const uint64_t ICELAKE_FLAGS =
195
0
            x86_CPUID_7_bits::AVX512_F |
196
0
            x86_CPUID_7_bits::AVX512_DQ |
197
0
            x86_CPUID_7_bits::AVX512_IFMA |
198
0
            x86_CPUID_7_bits::AVX512_BW |
199
0
            x86_CPUID_7_bits::AVX512_VL |
200
0
            x86_CPUID_7_bits::AVX512_VBMI |
201
0
            x86_CPUID_7_bits::AVX512_VBMI2 |
202
0
            x86_CPUID_7_bits::AVX512_VBITALG;
203
204
0
         if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
205
0
            features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
206
207
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
208
0
            features_detected |= CPUID::CPUID_AVX512_AES_BIT;
209
0
         if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
210
0
            features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
211
0
         }
212
213
11
      if(flags7 & x86_CPUID_7_bits::RDSEED)
214
11
         features_detected |= CPUID::CPUID_RDSEED_BIT;
215
11
      if(flags7 & x86_CPUID_7_bits::ADX)
216
11
         features_detected |= CPUID::CPUID_ADX_BIT;
217
11
      if(flags7 & x86_CPUID_7_bits::SHA)
218
0
         features_detected |= CPUID::CPUID_SHA_BIT;
219
11
      }
220
221
   /*
222
   * If we don't have access to CPUID, we can still safely assume that
223
   * any x86-64 processor has SSE2 and RDTSC
224
   */
225
11
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
226
11
   if(features_detected == 0)
227
0
      {
228
0
      features_detected |= CPUID::CPUID_SSE2_BIT;
229
0
      features_detected |= CPUID::CPUID_RDTSC_BIT;
230
0
      }
231
11
#endif
232
233
11
   return features_detected;
234
11
   }
235
236
#endif
237
238
}