Coverage Report

Created: 2025-07-11 06:15

/rust/registry/src/index.crates.io-6f17d22bba15001f/ring-0.17.14/src/cpu/intel.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2016-2021 Brian Smith.
2
//
3
// Permission to use, copy, modify, and/or distribute this software for any
4
// purpose with or without fee is hereby granted, provided that the above
5
// copyright notice and this permission notice appear in all copies.
6
//
7
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15
use cfg_if::cfg_if;
16
17
mod abi_assumptions {
18
    use core::mem::size_of;
19
20
    // TOOD: Support targets that do not have SSE and SSE2 enabled, such as
21
    // x86_64-unknown-linux-none. See
22
    // https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725,
23
    // https://github.com/briansmith/ring/issues/1832,
24
    // https://github.com/briansmith/ring/issues/1833.
25
    const _ASSUMES_SSE2: () =
26
        assert!(cfg!(target_feature = "sse") && cfg!(target_feature = "sse2"));
27
28
    #[cfg(target_arch = "x86_64")]
29
    const _ASSUMED_POINTER_SIZE: usize = 8;
30
    #[cfg(target_arch = "x86")]
31
    const _ASSUMED_POINTER_SIZE: usize = 4;
32
    const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE);
33
    const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE);
34
35
    const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little"));
36
}
37
38
pub(super) mod featureflags {
39
    use super::super::CAPS_STATIC;
40
    use crate::{
41
        cpu,
42
        polyfill::{once_cell::race, usize_from_u32},
43
    };
44
    use core::num::NonZeroUsize;
45
46
0
    pub(in super::super) fn get_or_init() -> cpu::Features {
47
0
        // SAFETY: `OPENSSL_cpuid_setup` must be called only in
48
0
        // `INIT.call_once()` below.
49
0
        prefixed_extern! {
50
0
            fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
51
0
        }
52
0
53
0
        let _: NonZeroUsize = FEATURES.get_or_init(|| {
54
0
            let mut cpuid = [0; 4];
55
0
            // SAFETY: We assume that it is safe to execute CPUID and XGETBV.
56
0
            unsafe {
57
0
                OPENSSL_cpuid_setup(&mut cpuid);
58
0
            }
59
0
            let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
60
0
            let merged = CAPS_STATIC | detected;
61
0
62
0
            let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
63
0
            NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
64
0
        });
65
0
66
0
        // SAFETY: We initialized the CPU features as required.
67
0
        // `INIT.call_once` has `happens-before` semantics.
68
0
        unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() }
69
0
    }
70
71
0
    pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 {
72
0
        // SAFETY: Since only `get_or_init()` could have created
73
0
        // `_cpu_features`, and it only does so after `FEATURES.get_or_init()`,
74
0
        // we know we are reading from `FEATURES` after initializing it.
75
0
        //
76
0
        // Also, 0 means "no features detected" to users, which is designed to
77
0
        // be a safe configuration.
78
0
        let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0);
79
0
80
0
        // The truncation is lossless, as we set the value with a u32.
81
0
        #[allow(clippy::cast_possible_truncation)]
82
0
        let features = features as u32;
83
0
84
0
        features
85
0
    }
86
87
    static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
88
89
    #[cfg(target_arch = "x86")]
90
    #[rustfmt::skip]
91
    pub const STATIC_DETECTED: u32 = 0
92
        | (if cfg!(target_feature = "sse2") { super::Sse2::mask() } else { 0 })
93
        ;
94
95
    // Limited to x86_64-v2 features.
96
    // TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3.
97
    // TODO: Add all features we use.
98
    #[cfg(target_arch = "x86_64")]
99
    #[rustfmt::skip]
100
    pub const STATIC_DETECTED: u32 = 0
101
        | if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
102
        | if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
103
        ;
104
105
    pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
106
}
107
108
0
fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
109
    // "Intel" citations are for "Intel 64 and IA-32 Architectures Software
110
    // Developer’s Manual", Combined Volumes, December 2024.
111
    // "AMD" citations are for "AMD64 Technology AMD64 Architecture
112
    // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
113
114
    // The `prefixed_extern!` uses below assume this
115
    #[cfg(target_arch = "x86_64")]
116
    use core::{mem::align_of, sync::atomic::AtomicU32};
117
    #[cfg(target_arch = "x86_64")]
118
    const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
119
        assert!(align_of::<AtomicU32>() == align_of::<u32>());
120
121
0
    fn check(leaf: u32, bit: u32) -> bool {
122
0
        let shifted = 1 << bit;
123
0
        (leaf & shifted) == shifted
124
0
    }
125
0
    fn set(out: &mut u32, shift: Shift) {
126
0
        let shifted = 1 << (shift as u32);
127
0
        debug_assert_eq!(*out & shifted, 0);
128
0
        *out |= shifted;
129
0
        debug_assert_eq!(*out & shifted, shifted);
130
0
    }
131
132
    #[cfg(target_arch = "x86_64")]
133
0
    let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
134
0
135
0
    // CPUID leaf 1.
136
0
    let leaf1_ecx = cpuid[1];
137
0
138
0
    // Intel: "Structured Extended Feature Flags Enumeration Leaf"
139
0
    #[cfg(target_arch = "x86_64")]
140
0
    let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);
141
0
142
0
    let mut caps = 0;
143
144
    // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
145
    // instructions. All legacy SSE instructions support 128-bit vector
146
    // operands."
147
148
    // Intel: "11.6.2 Checking for Intel SSE and SSE2 Support"
149
    // We have to assume the prerequisites for SSE/SSE2 are met since we're
150
    // already almost definitely using SSE registers if these target features
151
    // are enabled.
152
    //
153
    // These also seem to help ensure CMOV support; There doesn't seem to be
154
    // a `cfg!(target_feature = "cmov")`. It is likely that removing these
155
    // assertions will remove the requirement for CMOV. With our without
156
    // CMOV, it is likely that some of our timing side channel prevention does
157
    // not work. Presumably the people who delete these are verifying that it
158
    // all works fine.
159
    const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse"));
160
    const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2"));
161
162
    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
163
    {
164
        // If somebody is trying to compile for an x86 target without SSE2
165
        // and they deleted the `_SSE2_REQUIRED` const assertion above then
166
        // they're probably trying to support a Linux/BSD/etc. distro that
167
        // tries to support ancient x86 systems without SSE/SSE2. Try to
168
        // reduce the harm caused, by implementing dynamic feature detection
169
        // for them so that most systems will work like normal.
170
        //
171
        // Note that usually an x86-64 target with SSE2 disabled by default,
172
        // usually `-none-` targets, will not support dynamically-detected use
173
        // of SIMD registers via CPUID. A whole different mechanism is needed
174
        // to support them. Same for i*86-*-none targets.
175
        let leaf1_edx = cpuid[0];
176
        let sse1_available = check(leaf1_edx, 25);
177
        let sse2_available = check(leaf1_edx, 26);
178
        if sse1_available && sse2_available {
179
            set(&mut caps, Shift::Sse2);
180
        }
181
    }
182
183
    // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
184
    // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
185
    // do, hopefully they won't delete these redundant assertions, so that
186
    // x86_64 isn't affected.
187
    #[cfg(target_arch = "x86_64")]
188
    const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
189
    #[cfg(target_arch = "x86_64")]
190
    const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
191
192
    // Intel: "12.7.2 Checking for SSSE3 Support"
193
    // If/when we support dynamic detection of SSE/SSE2, make this conditional
194
    // on SSE/SSE2.
195
0
    if check(leaf1_ecx, 9) {
196
0
        set(&mut caps, Shift::Ssse3);
197
0
    }
198
199
    // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
200
    // If/when we support dynamic detection of SSE/SSE2, make this conditional
201
    // on SSE/SSE2.
202
    // XXX: We don't check for SSE3 and we're not sure if it is compatible for
203
    //      us to do so; does AMD advertise SSE3? TODO: address this.
204
    // XXX: We don't condition this on SSSE3 being available. TODO: address
205
    //      this.
206
    #[cfg(target_arch = "x86_64")]
207
0
    if check(leaf1_ecx, 19) {
208
0
        set(&mut caps, Shift::Sse41);
209
0
    }
210
211
    // AMD: "The extended SSE instructions include [...]."
212
213
    // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
214
    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
215
    // support AVX state.
216
0
    let avx_available = check(leaf1_ecx, 28);
217
0
    if avx_available {
218
0
        set(&mut caps, Shift::Avx);
219
0
    }
220
221
    #[cfg(target_arch = "x86_64")]
222
0
    if avx_available {
223
        // The Intel docs don't seem to document the detection. The instruction
224
        // definitions of the VEX.256 instructions reference the
225
        // VAES/VPCLMULQDQ features and the documentation for the extended
226
        // features gives the values. We combine these into one feature because
227
        // we never use them independently.
228
0
        let vaes_available = check(extended_features_ecx, 9);
229
0
        let vclmul_available = check(extended_features_ecx, 10);
230
0
        if vaes_available && vclmul_available {
231
0
            set(&mut caps, Shift::VAesClmul);
232
0
        }
233
0
    }
234
235
    // "14.7.1 Detection of Intel AVX2 Hardware support"
236
    // XXX: We don't condition AVX2 on AVX. TODO: Address this.
237
    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
238
    // support AVX state.
239
    #[cfg(target_arch = "x86_64")]
240
0
    if check(extended_features_ebx, 5) {
241
0
        set(&mut caps, Shift::Avx2);
242
0
243
0
        // Declared as `uint32_t` in the C code.
244
0
        prefixed_extern! {
245
0
            static avx2_available: AtomicU32;
246
0
        }
247
0
        // SAFETY: The C code only reads `avx2_available`, and its reads are
248
0
        // synchronized through the `OnceNonZeroUsize` Acquire/Release
249
0
        // semantics as we ensure we have a `cpu::Features` instance before
250
0
        // calling into the C code.
251
0
        let flag = unsafe { &avx2_available };
252
0
        flag.store(1, core::sync::atomic::Ordering::Relaxed);
253
0
    }
254
255
    // Intel: "12.13.4 Checking for Intel AES-NI Support"
256
    // If/when we support dynamic detection of SSE/SSE2, revisit this.
257
    // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
258
    // and AES-NI & !AVX.
259
    // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
260
    // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
261
    // use will either be supported by SSE* or AVX* instructions. We then
262
    // assume that those supporting instructions' prerequisites (e.g. OS
263
    // support for AVX or SSE state, respectively) are the only prerequisites
264
    // for these features.
265
0
    if check(leaf1_ecx, 1) {
266
0
        set(&mut caps, Shift::ClMul);
267
0
    }
268
0
    if check(leaf1_ecx, 25) {
269
0
        set(&mut caps, Shift::Aes);
270
0
    }
271
    // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
272
    // static feature detection for this.
273
    #[cfg(target_arch = "x86_64")]
274
0
    if check(extended_features_ebx, 29) {
275
0
        set(&mut caps, Shift::Sha);
276
0
    }
277
278
    #[cfg(target_arch = "x86_64")]
279
    {
280
0
        if is_intel {
281
0
            set(&mut caps, Shift::IntelCpu);
282
0
        }
283
284
0
        if check(leaf1_ecx, 22) {
285
0
            set(&mut caps, Shift::Movbe);
286
0
        }
287
288
0
        let adx_available = check(extended_features_ebx, 19);
289
0
        if adx_available {
290
0
            set(&mut caps, Shift::Adx);
291
0
        }
292
293
        // Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2
294
        // when they don't; see erratum "SKD052". The Intel document at
295
        // https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf
296
        // contains the footnote "Affects 6th Generation Intel Pentium processor
297
        // family and Intel Celeron processor family". Further research indicates
298
        // that Skylake Pentium/Celeron do not implement AVX or ADX. It turns
299
        // out that we only use BMI1 and BMI2 in combination with ADX and/or
300
        // AVX.
301
        //
302
        // rust `std::arch::is_x86_feature_detected` does a very similar thing
303
        // but only looks at AVX, not ADX. Note that they reference an older
304
        // version of the erratum labeled SKL052.
305
0
        let believe_bmi_bits = !is_intel || (adx_available || avx_available);
306
307
0
        if check(extended_features_ebx, 3) && believe_bmi_bits {
308
0
            set(&mut caps, Shift::Bmi1);
309
0
        }
310
311
0
        let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits;
312
0
        if bmi2_available {
313
0
            set(&mut caps, Shift::Bmi2);
314
0
        }
315
316
0
        if adx_available && bmi2_available {
317
0
            // Declared as `uint32_t` in the C code.
318
0
            prefixed_extern! {
319
0
                static adx_bmi2_available: AtomicU32;
320
0
            }
321
0
            // SAFETY: The C code only reads `adx_bmi2_available`, and its
322
0
            // reads are synchronized through the `OnceNonZeroUsize`
323
0
            // Acquire/Release semantics as we ensure we have a
324
0
            // `cpu::Features` instance before calling into the C code.
325
0
            let flag = unsafe { &adx_bmi2_available };
326
0
            flag.store(1, core::sync::atomic::Ordering::Relaxed);
327
0
        }
328
    }
329
330
0
    caps
331
0
}
332
333
impl_get_feature! {
334
    features: [
335
        { ("x86_64") => VAesClmul },
336
        { ("x86", "x86_64") => ClMul },
337
        { ("x86", "x86_64") => Ssse3 },
338
        { ("x86_64") => Sse41 },
339
        { ("x86_64") => Movbe },
340
        { ("x86", "x86_64") => Aes },
341
        { ("x86", "x86_64") => Avx },
342
        { ("x86_64") => Bmi1 },
343
        { ("x86_64") => Avx2 },
344
        { ("x86_64") => Bmi2 },
345
        { ("x86_64") => Adx },
346
        // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
347
        // static feature detection for this.
348
        { ("x86_64") => Sha },
349
        // x86_64 can just assume SSE2 is available.
350
        { ("x86") => Sse2 },
351
    ],
352
}
353
354
cfg_if! {
355
    if #[cfg(target_arch = "x86_64")] {
356
        #[derive(Clone, Copy)]
357
        pub(crate) struct IntelCpu(super::Features);
358
359
        impl super::GetFeature<IntelCpu> for super::features::Values {
360
0
            fn get_feature(&self) -> Option<IntelCpu> {
361
                const MASK: u32 = 1 << (Shift::IntelCpu as u32);
362
0
                if (self.values() & MASK) == MASK {
363
0
                    Some(IntelCpu(self.cpu()))
364
                } else {
365
0
                    None
366
                }
367
0
            }
368
        }
369
    }
370
}
371
372
#[cfg(test)]
373
mod tests {
374
    // This should always pass on any x86 system except very, very, old ones.
375
    #[cfg(target_arch = "x86")]
376
    #[test]
377
    fn x86_has_sse2() {
378
        use super::*;
379
        use crate::cpu::{self, GetFeature as _};
380
        assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. })))
381
    }
382
}