/rust/registry/src/index.crates.io-6f17d22bba15001f/ring-0.17.14/crypto/cpu_intel.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include <ring-core/base.h> |
16 | | |
17 | | |
18 | | #if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64)) |
19 | | |
20 | | #if defined(_MSC_VER) && !defined(__clang__) |
21 | | #pragma warning(push, 3) |
22 | | #include <immintrin.h> |
23 | | #include <intrin.h> |
24 | | #pragma warning(pop) |
25 | | #endif |
26 | | |
27 | | #include "internal.h" |
28 | | |
29 | | |
30 | | // OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX |
31 | | // is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through |
32 | | // |*out_edx|. |
33 | | static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx, |
34 | 0 | uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) { |
35 | | #if defined(_MSC_VER) && !defined(__clang__) |
36 | | int tmp[4]; |
37 | | __cpuid(tmp, (int)leaf); |
38 | | *out_eax = (uint32_t)tmp[0]; |
39 | | *out_ebx = (uint32_t)tmp[1]; |
40 | | *out_ecx = (uint32_t)tmp[2]; |
41 | | *out_edx = (uint32_t)tmp[3]; |
42 | | #elif defined(__pic__) && defined(OPENSSL_32_BIT) |
43 | | // Inline assembly may not clobber the PIC register. For 32-bit, this is EBX. |
44 | | // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602. |
45 | | __asm__ volatile ( |
46 | | "xor %%ecx, %%ecx\n" |
47 | | "mov %%ebx, %%edi\n" |
48 | | "cpuid\n" |
49 | | "xchg %%edi, %%ebx\n" |
50 | | : "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx) |
51 | | : "a"(leaf) |
52 | | ); |
53 | | #else |
54 | 0 | __asm__ volatile ( |
55 | 0 | "xor %%ecx, %%ecx\n" |
56 | 0 | "cpuid\n" |
57 | 0 | : "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx) |
58 | 0 | : "a"(leaf) |
59 | 0 | ); |
60 | 0 | #endif |
61 | 0 | } |
62 | | |
63 | | // OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR). |
64 | | // Currently only XCR0 is defined by Intel so |xcr| should always be zero. |
65 | | // |
66 | | // See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family |
67 | 0 | static uint64_t OPENSSL_xgetbv(uint32_t xcr) { |
68 | | #if defined(_MSC_VER) && !defined(__clang__) |
69 | | return (uint64_t)_xgetbv(xcr); |
70 | | #else |
71 | 0 | uint32_t eax, edx; |
72 | 0 | __asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
73 | 0 | return (((uint64_t)edx) << 32) | eax; |
74 | 0 | #endif |
75 | 0 | } |
76 | | |
77 | 0 | void OPENSSL_cpuid_setup(uint32_t OPENSSL_ia32cap_P[4]) { |
78 | | // Determine the vendor and maximum input value. |
79 | 0 | uint32_t eax, ebx, ecx, edx; |
80 | 0 | OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0); |
81 | |
|
82 | 0 | uint32_t num_ids = eax; |
83 | |
|
84 | 0 | int is_intel = ebx == 0x756e6547 /* Genu */ && |
85 | 0 | edx == 0x49656e69 /* ineI */ && |
86 | 0 | ecx == 0x6c65746e /* ntel */; |
87 | |
|
88 | 0 | uint32_t extended_features[2] = {0}; |
89 | 0 | if (num_ids >= 7) { |
90 | 0 | OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7); |
91 | 0 | extended_features[0] = ebx; |
92 | 0 | extended_features[1] = ecx; |
93 | 0 | } |
94 | |
|
95 | 0 | OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1); |
96 | |
|
97 | 0 | const uint32_t base_family = (eax >> 8) & 15; |
98 | 0 | const uint32_t base_model = (eax >> 4) & 15; |
99 | |
|
100 | 0 | uint32_t family = base_family; |
101 | 0 | uint32_t model = base_model; |
102 | 0 | if (base_family == 15) { |
103 | 0 | const uint32_t ext_family = (eax >> 20) & 255; |
104 | 0 | family += ext_family; |
105 | 0 | } |
106 | 0 | if (base_family == 6 || base_family == 15) { |
107 | 0 | const uint32_t ext_model = (eax >> 16) & 15; |
108 | 0 | model |= ext_model << 4; |
109 | 0 | } |
110 | | |
111 | | // Reserved bit #30 is repurposed to signal an Intel CPU. |
112 | 0 | if (is_intel) { |
113 | 0 | edx |= (1u << 30); |
114 | 0 | } else { |
115 | 0 | edx &= ~(1u << 30); |
116 | 0 | } |
117 | |
|
118 | 0 | uint64_t xcr0 = 0; |
119 | 0 | if (ecx & (1u << 27)) { |
120 | | // XCR0 may only be queried if the OSXSAVE bit is set. |
121 | 0 | xcr0 = OPENSSL_xgetbv(0); |
122 | 0 | } |
123 | | // See Intel manual, volume 1, section 14.3. |
124 | 0 | if ((xcr0 & 6) != 6) { |
125 | | // YMM registers cannot be used. |
126 | 0 | ecx &= ~(1u << 28); // AVX |
127 | 0 | ecx &= ~(1u << 12); // FMA |
128 | 0 | ecx &= ~(1u << 11); // AMD XOP |
129 | 0 | extended_features[0] &= ~(1u << 5); // AVX2 |
130 | 0 | extended_features[1] &= ~(1u << 9); // VAES |
131 | 0 | extended_features[1] &= ~(1u << 10); // VPCLMULQDQ |
132 | 0 | } |
133 | | // See Intel manual, volume 1, sections 15.2 ("Detection of AVX-512 Foundation |
134 | | // Instructions") through 15.4 ("Detection of Intel AVX-512 Instruction Groups |
135 | | // Operating at 256 and 128-bit Vector Lengths"). |
136 | 0 | if ((xcr0 & 0xe6) != 0xe6) { |
137 | | // Without XCR0.111xx11x, no AVX512 feature can be used. This includes ZMM |
138 | | // registers, masking, SIMD registers 16-31 (even if accessed as YMM or |
139 | | // XMM), and EVEX-coded instructions (even on YMM or XMM). Even if only |
140 | | // XCR0.ZMM_Hi256 is missing, it isn't valid to use AVX512 features on |
141 | | // shorter vectors, since AVX512 ties everything to the availability of |
142 | | // 512-bit vectors. See the above-mentioned sections of the Intel manual, |
143 | | // which say that *all* these XCR0 bits must be checked even when just using |
144 | | // 128-bit or 256-bit vectors, and also volume 2a section 2.7.11 ("#UD |
145 | | // Equations for EVEX") which says that all EVEX-coded instructions raise an |
146 | | // undefined-instruction exception if any of these XCR0 bits is zero. |
147 | | // |
148 | | // AVX10 fixes this by reorganizing the features that used to be part of |
149 | | // "AVX512" and allowing them to be used independently of 512-bit support. |
150 | | // TODO: add AVX10 detection. |
151 | 0 | extended_features[0] &= ~(1u << 16); // AVX512F |
152 | 0 | extended_features[0] &= ~(1u << 17); // AVX512DQ |
153 | 0 | extended_features[0] &= ~(1u << 21); // AVX512IFMA |
154 | 0 | extended_features[0] &= ~(1u << 26); // AVX512PF |
155 | 0 | extended_features[0] &= ~(1u << 27); // AVX512ER |
156 | 0 | extended_features[0] &= ~(1u << 28); // AVX512CD |
157 | 0 | extended_features[0] &= ~(1u << 30); // AVX512BW |
158 | 0 | extended_features[0] &= ~(1u << 31); // AVX512VL |
159 | 0 | extended_features[1] &= ~(1u << 1); // AVX512VBMI |
160 | 0 | extended_features[1] &= ~(1u << 6); // AVX512VBMI2 |
161 | 0 | extended_features[1] &= ~(1u << 11); // AVX512VNNI |
162 | 0 | extended_features[1] &= ~(1u << 12); // AVX512BITALG |
163 | 0 | extended_features[1] &= ~(1u << 14); // AVX512VPOPCNTDQ |
164 | 0 | } |
165 | | |
166 | | // Repurpose the bit for the removed MPX feature to indicate when using zmm |
167 | | // registers should be avoided even when they are supported. (When set, AVX512 |
168 | | // features can still be used, but only using ymm or xmm registers.) Skylake |
169 | | // suffered from severe downclocking when zmm registers were used, which |
170 | | // affected unrelated code running on the system, making zmm registers not too |
171 | | // useful outside of benchmarks. The situation improved significantly by Ice |
172 | | // Lake, but a small amount of downclocking remained. (See |
173 | | // https://lore.kernel.org/linux-crypto/e8ce1146-3952-6977-1d0e-a22758e58914@intel.com/) |
174 | | // We take a conservative approach of not allowing zmm registers until after |
175 | | // Ice Lake and Tiger Lake, i.e. until Sapphire Rapids on the server side. |
176 | | // |
177 | | // AMD CPUs, which support AVX512 starting with Zen 4, have not been reported |
178 | | // to have any downclocking problem when zmm registers are used. |
179 | 0 | if (is_intel && family == 6 && |
180 | 0 | (model == 85 || // Skylake, Cascade Lake, Cooper Lake (server) |
181 | 0 | model == 106 || // Ice Lake (server) |
182 | 0 | model == 108 || // Ice Lake (micro server) |
183 | 0 | model == 125 || // Ice Lake (client) |
184 | 0 | model == 126 || // Ice Lake (mobile) |
185 | 0 | model == 140 || // Tiger Lake (mobile) |
186 | 0 | model == 141)) { // Tiger Lake (client) |
187 | 0 | extended_features[0] |= 1u << 14; |
188 | 0 | } else { |
189 | 0 | extended_features[0] &= ~(1u << 14); |
190 | 0 | } |
191 | |
|
192 | 0 | OPENSSL_ia32cap_P[0] = edx; |
193 | 0 | OPENSSL_ia32cap_P[1] = ecx; |
194 | 0 | OPENSSL_ia32cap_P[2] = extended_features[0]; |
195 | 0 | OPENSSL_ia32cap_P[3] = extended_features[1]; |
196 | 0 | } |
197 | | |
198 | | #endif // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64) |