Coverage Report

Created: 2021-11-03 07:11

/rust/registry/src/github.com-1ecc6299db9ec823/memchr-2.2.1/src/x86/mod.rs
Line
Count
Source (jump to first uncovered line)
1
use fallback;
2
3
// We only use AVX when we can detect at runtime whether it's available, which
4
// requires std.
5
#[cfg(feature = "use_std")]
6
mod avx;
7
mod sse2;
8
9
// This macro employs a gcc-like "ifunc" trick where by upon first calling
10
// `memchr` (for example), CPU feature detection will be performed at runtime
11
// to determine the best implementation to use. After CPU feature detection
12
// is done, we replace `memchr`'s function pointer with the selection. Upon
13
// subsequent invocations, the CPU-specific routine is invoked directly, which
14
// skips the CPU feature detection and subsequent branch that's required.
15
//
16
// While this typically doesn't matter for rare occurrences or when used on
17
// larger haystacks, `memchr` can be called in tight loops where the overhead
18
// of this branch can actually add up *and is measurable*. This trick was
19
// necessary to bring this implementation up to glibc's speeds for the 'tiny'
20
// benchmarks, for example.
21
//
22
// At some point, I expect the Rust ecosystem will get a nice macro for doing
23
// exactly this, at which point, we can replace our hand-jammed version of it.
24
//
25
// N.B. The ifunc strategy does prevent function inlining of course, but on
26
// modern CPUs, you'll probably end up with the AVX2 implementation, which
27
// probably can't be inlined anyway---unless you've compiled your entire
28
// program with AVX2 enabled. However, even then, the various memchr
29
// implementations aren't exactly small, so inlining might not help anyway!
30
#[cfg(feature = "use_std")]
31
macro_rules! ifunc {
32
    ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
33
        use std::mem;
34
        use std::sync::atomic::{AtomicPtr, Ordering};
35
36
        type FnRaw = *mut ();
37
38
        static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
39
40
8
        fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
41
8
            let fun =
42
0
                if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
43
8
                    avx::$name as FnRaw
44
0
                } else if cfg!(memchr_runtime_sse2) {
45
0
                    sse2::$name as FnRaw
46
                } else {
47
0
                    fallback::$name as FnRaw
48
                };
49
8
            FN.store(fun as FnRaw, Ordering::Relaxed);
50
8
            unsafe {
51
8
                mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
52
8
            }
53
8
        }
Unexecuted instantiation: memchr::x86::memchr3::detect
Unexecuted instantiation: memchr::x86::memrchr::detect
Unexecuted instantiation: memchr::x86::memchr2::detect
Unexecuted instantiation: memchr::x86::memrchr3::detect
memchr::x86::memchr::detect
Line
Count
Source
40
8
        fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
41
8
            let fun =
42
0
                if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
43
8
                    avx::$name as FnRaw
44
0
                } else if cfg!(memchr_runtime_sse2) {
45
0
                    sse2::$name as FnRaw
46
                } else {
47
0
                    fallback::$name as FnRaw
48
                };
49
8
            FN.store(fun as FnRaw, Ordering::Relaxed);
50
8
            unsafe {
51
8
                mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
52
8
            }
53
8
        }
Unexecuted instantiation: memchr::x86::memrchr2::detect
54
55
        unsafe {
56
            let fun = FN.load(Ordering::Relaxed);
57
            mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
58
        }
59
    }}
60
}
61
62
// When std isn't available to provide runtime CPU feature detection, or if
63
// runtime CPU feature detection has been explicitly disabled, then just call
64
// our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
65
// so no CPU feature detection is necessary.
66
#[cfg(not(feature = "use_std"))]
67
macro_rules! ifunc {
68
    ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
69
        if cfg!(memchr_runtime_sse2) {
70
            unsafe { sse2::$name($($needle),+, $haystack) }
71
        } else {
72
            fallback::$name($($needle),+, $haystack)
73
        }
74
    }}
75
}
76
77
#[inline(always)]
78
18.5M
pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
79
18.5M
    ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
80
18.5M
}
81
82
#[inline(always)]
83
0
pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
84
0
    ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memchr2, haystack, n1, n2)
85
0
}
86
87
#[inline(always)]
88
0
pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
89
0
    ifunc!(fn(u8, u8, u8, &[u8]) -> Option<usize>, memchr3, haystack, n1, n2, n3)
90
0
}
91
92
#[inline(always)]
93
0
pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
94
0
    ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
95
0
}
96
97
#[inline(always)]
98
0
pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
99
0
    ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memrchr2, haystack, n1, n2)
100
0
}
101
102
#[inline(always)]
103
0
pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
104
0
    ifunc!(fn(u8, u8, u8, &[u8]) -> Option<usize>, memrchr3, haystack, n1, n2, n3)
105
0
}