Coverage Report

Created: 2024-10-16 07:58

/rust/registry/src/index.crates.io-6f17d22bba15001f/memchr-2.7.4/src/arch/generic/memchr.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
Generic crate-internal routines for the `memchr` family of functions.
3
*/
4
5
// What follows is a vector algorithm generic over the specific vector
6
// type to detect the position of one, two or three needles in a haystack.
7
// From what I know, this is a "classic" algorithm, although I don't
8
// believe it has been published in any peer reviewed journal. I believe
9
// it can be found in places like glibc and Go's standard library. It
10
// appears to be well known and is elaborated on in more detail here:
11
// https://gms.tf/stdfind-and-memchr-optimizations.html
12
//
13
// While the routine below is fairly long and perhaps intimidating, the basic
14
// idea is actually very simple and can be expressed straight-forwardly in
15
// pseudo code. The psuedo code below is written for 128 bit vectors, but the
16
// actual code below works for anything that implements the Vector trait.
17
//
18
//     needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1
19
//     // Note: shift amount is in bytes
20
//
21
//     while i <= haystack.len() - 16:
22
//       // A 16 byte vector. Each byte in chunk corresponds to a byte in
23
//       // the haystack.
24
//       chunk = haystack[i:i+16]
25
//       // Compare bytes in needle with bytes in chunk. The result is a 16
26
//       // byte chunk where each byte is 0xFF if the corresponding bytes
27
//       // in needle and chunk were equal, or 0x00 otherwise.
28
//       eqs = cmpeq(needle, chunk)
29
//       // Return a 32 bit integer where the most significant 16 bits
30
//       // are always 0 and the lower 16 bits correspond to whether the
31
//       // most significant bit in the correspond byte in `eqs` is set.
32
//       // In other words, `mask as u16` has bit i set if and only if
33
//       // needle[i] == chunk[i].
34
//       mask = movemask(eqs)
35
//
36
//       // Mask is 0 if there is no match, and non-zero otherwise.
37
//       if mask != 0:
38
//         // trailing_zeros tells us the position of the least significant
39
//         // bit that is set.
40
//         return i + trailing_zeros(mask)
41
//
42
//     // haystack length may not be a multiple of 16, so search the rest.
43
//     while i < haystack.len():
44
//       if haystack[i] == n1:
45
//         return i
46
//
47
//     // No match found.
48
//     return NULL
49
//
50
// In fact, we could loosely translate the above code to Rust line-for-line
51
// and it would be a pretty fast algorithm. But, we pull out all the stops
52
// to go as fast as possible:
53
//
54
// 1. We use aligned loads. That is, we do some finagling to make sure our
55
//    primary loop not only proceeds in increments of 16 bytes, but that
56
//    the address of haystack's pointer that we dereference is aligned to
57
//    16 bytes. 16 is a magic number here because it is the size of SSE2
58
//    128-bit vector. (For the AVX2 algorithm, 32 is the magic number.)
59
//    Therefore, to get aligned loads, our pointer's address must be evenly
60
//    divisible by 16.
61
// 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's
62
//    kind of like loop unrolling, but we combine the equality comparisons
63
//    using a vector OR such that we only need to extract a single mask to
64
//    determine whether a match exists or not. If so, then we do some
65
//    book-keeping to determine the precise location but otherwise mush on.
66
// 3. We use our "chunk" comparison routine in as many places as possible,
67
//    even if it means using unaligned loads. In particular, if haystack
68
//    starts with an unaligned address, then we do an unaligned load to
69
//    search the first 16 bytes. We then start our primary loop at the
70
//    smallest subsequent aligned address, which will actually overlap with
71
//    previously searched bytes. But we're OK with that. We do a similar
72
//    dance at the end of our primary loop. Finally, to avoid a
73
//    byte-at-a-time loop at the end, we do a final 16 byte unaligned load
74
//    that may overlap with a previous load. This is OK because it converts
75
//    a loop into a small number of very fast vector instructions. The overlap
76
//    is OK because we know the place where the overlap occurs does not
77
//    contain a match.
78
//
79
// And that's pretty all there is to it. Note that since the below is
80
// generic and since it's meant to be inlined into routines with a
81
// `#[target_feature(enable = "...")]` annotation, we must mark all routines as
82
// both unsafe and `#[inline(always)]`.
83
//
84
// The fact that the code below is generic does somewhat inhibit us. For
85
// example, I've noticed that introducing an unlineable `#[cold]` function to
86
// handle the match case in the loop generates tighter assembly, but there is
87
// no way to do this in the generic code below because the generic code doesn't
88
// know what `target_feature` annotation to apply to the unlineable function.
89
// We could make such functions part of the `Vector` trait, but we instead live
90
// with the slightly sub-optimal codegen for now since it doesn't seem to have
91
// a noticeable perf difference.
92
93
use crate::{
94
    ext::Pointer,
95
    vector::{MoveMask, Vector},
96
};
97
98
/// Finds all occurrences of a single byte in a haystack.
99
#[derive(Clone, Copy, Debug)]
100
pub(crate) struct One<V> {
101
    s1: u8,
102
    v1: V,
103
}
104
105
impl<V: Vector> One<V> {
106
    /// The number of bytes we examine per each iteration of our search loop.
107
    const LOOP_SIZE: usize = 4 * V::BYTES;
108
109
    /// Create a new searcher that finds occurrences of the byte given.
110
    #[inline(always)]
111
1.79M
    pub(crate) unsafe fn new(needle: u8) -> One<V> {
112
1.79M
        One { s1: needle, v1: V::splat(needle) }
113
1.79M
    }
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::new
Line
Count
Source
111
895k
    pub(crate) unsafe fn new(needle: u8) -> One<V> {
112
895k
        One { s1: needle, v1: V::splat(needle) }
113
895k
    }
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::new
Line
Count
Source
111
895k
    pub(crate) unsafe fn new(needle: u8) -> One<V> {
112
895k
        One { s1: needle, v1: V::splat(needle) }
113
895k
    }
114
115
    /// Returns the needle given to `One::new`.
116
    #[inline(always)]
117
27.2k
    pub(crate) fn needle1(&self) -> u8 {
118
27.2k
        self.s1
119
27.2k
    }
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::needle1
Line
Count
Source
117
27.2k
    pub(crate) fn needle1(&self) -> u8 {
118
27.2k
        self.s1
119
27.2k
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::needle1
120
121
    /// Return a pointer to the first occurrence of the needle in the given
122
    /// haystack. If no such occurrence exists, then `None` is returned.
123
    ///
124
    /// When a match is found, the pointer returned is guaranteed to be
125
    /// `>= start` and `< end`.
126
    ///
127
    /// # Safety
128
    ///
129
    /// * It must be the case that `start < end` and that the distance between
130
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
131
    /// to do at least an unaligned load of `V` at `start`.
132
    /// * Both `start` and `end` must be valid for reads.
133
    /// * Both `start` and `end` must point to an initialized value.
134
    /// * Both `start` and `end` must point to the same allocated object and
135
    /// must either be in bounds or at most one byte past the end of the
136
    /// allocated object.
137
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
138
    /// object.
139
    /// * The distance between `start` and `end` must not overflow `isize`.
140
    /// * The distance being in bounds must not rely on "wrapping around" the
141
    /// address space.
142
    #[inline(always)]
143
891k
    pub(crate) unsafe fn find_raw(
144
891k
        &self,
145
891k
        start: *const u8,
146
891k
        end: *const u8,
147
891k
    ) -> Option<*const u8> {
148
891k
        // If we want to support vectors bigger than 256 bits, we probably
149
891k
        // need to move up to using a u64 for the masks used below. Currently
150
891k
        // they are 32 bits, which means we're SOL for vectors that need masks
151
891k
        // bigger than 32 bits. Overall unclear until there's a use case.
152
891k
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153
154
891k
        let topos = V::Mask::first_offset;
155
891k
        let len = end.distance(start);
156
891k
        debug_assert!(
157
0
            len >= V::BYTES,
158
0
            "haystack has length {}, but must be at least {}",
159
            len,
160
            V::BYTES
161
        );
162
163
        // Search a possibly unaligned chunk at `start`. This covers any part
164
        // of the haystack prior to where aligned loads can start.
165
891k
        if let Some(cur) = self.search_chunk(start, topos) {
166
891k
            return Some(cur);
167
0
        }
168
0
        // Set `cur` to the first V-aligned pointer greater than `start`.
169
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171
0
        if len >= Self::LOOP_SIZE {
172
0
            while cur <= end.sub(Self::LOOP_SIZE) {
173
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174
175
0
                let a = V::load_aligned(cur);
176
0
                let b = V::load_aligned(cur.add(1 * V::BYTES));
177
0
                let c = V::load_aligned(cur.add(2 * V::BYTES));
178
0
                let d = V::load_aligned(cur.add(3 * V::BYTES));
179
0
                let eqa = self.v1.cmpeq(a);
180
0
                let eqb = self.v1.cmpeq(b);
181
0
                let eqc = self.v1.cmpeq(c);
182
0
                let eqd = self.v1.cmpeq(d);
183
0
                let or1 = eqa.or(eqb);
184
0
                let or2 = eqc.or(eqd);
185
0
                let or3 = or1.or(or2);
186
0
                if or3.movemask_will_have_non_zero() {
187
0
                    let mask = eqa.movemask();
188
0
                    if mask.has_non_zero() {
189
0
                        return Some(cur.add(topos(mask)));
190
0
                    }
191
0
192
0
                    let mask = eqb.movemask();
193
0
                    if mask.has_non_zero() {
194
0
                        return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195
0
                    }
196
0
197
0
                    let mask = eqc.movemask();
198
0
                    if mask.has_non_zero() {
199
0
                        return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200
0
                    }
201
0
202
0
                    let mask = eqd.movemask();
203
0
                    debug_assert!(mask.has_non_zero());
204
0
                    return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205
0
                }
206
0
                cur = cur.add(Self::LOOP_SIZE);
207
            }
208
0
        }
209
        // Handle any leftovers after the aligned loop above. We use unaligned
210
        // loads here, but I believe we are guaranteed that they are aligned
211
        // since `cur` is aligned.
212
0
        while cur <= end.sub(V::BYTES) {
213
0
            debug_assert!(end.distance(cur) >= V::BYTES);
214
0
            if let Some(cur) = self.search_chunk(cur, topos) {
215
0
                return Some(cur);
216
0
            }
217
0
            cur = cur.add(V::BYTES);
218
        }
219
        // Finally handle any remaining bytes less than the size of V. In this
220
        // case, our pointer may indeed be unaligned and the load may overlap
221
        // with the previous one. But that's okay since we know the previous
222
        // load didn't lead to a match (otherwise we wouldn't be here).
223
0
        if cur < end {
224
0
            debug_assert!(end.distance(cur) < V::BYTES);
225
0
            cur = cur.sub(V::BYTES - end.distance(cur));
226
0
            debug_assert_eq!(end.distance(cur), V::BYTES);
227
0
            return self.search_chunk(cur, topos);
228
0
        }
229
0
        None
230
891k
    }
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::find_raw
Line
Count
Source
143
57.8k
    pub(crate) unsafe fn find_raw(
144
57.8k
        &self,
145
57.8k
        start: *const u8,
146
57.8k
        end: *const u8,
147
57.8k
    ) -> Option<*const u8> {
148
57.8k
        // If we want to support vectors bigger than 256 bits, we probably
149
57.8k
        // need to move up to using a u64 for the masks used below. Currently
150
57.8k
        // they are 32 bits, which means we're SOL for vectors that need masks
151
57.8k
        // bigger than 32 bits. Overall unclear until there's a use case.
152
57.8k
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153
154
57.8k
        let topos = V::Mask::first_offset;
155
57.8k
        let len = end.distance(start);
156
57.8k
        debug_assert!(
157
0
            len >= V::BYTES,
158
0
            "haystack has length {}, but must be at least {}",
159
            len,
160
            V::BYTES
161
        );
162
163
        // Search a possibly unaligned chunk at `start`. This covers any part
164
        // of the haystack prior to where aligned loads can start.
165
57.8k
        if let Some(cur) = self.search_chunk(start, topos) {
166
57.8k
            return Some(cur);
167
0
        }
168
0
        // Set `cur` to the first V-aligned pointer greater than `start`.
169
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171
0
        if len >= Self::LOOP_SIZE {
172
0
            while cur <= end.sub(Self::LOOP_SIZE) {
173
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174
175
0
                let a = V::load_aligned(cur);
176
0
                let b = V::load_aligned(cur.add(1 * V::BYTES));
177
0
                let c = V::load_aligned(cur.add(2 * V::BYTES));
178
0
                let d = V::load_aligned(cur.add(3 * V::BYTES));
179
0
                let eqa = self.v1.cmpeq(a);
180
0
                let eqb = self.v1.cmpeq(b);
181
0
                let eqc = self.v1.cmpeq(c);
182
0
                let eqd = self.v1.cmpeq(d);
183
0
                let or1 = eqa.or(eqb);
184
0
                let or2 = eqc.or(eqd);
185
0
                let or3 = or1.or(or2);
186
0
                if or3.movemask_will_have_non_zero() {
187
0
                    let mask = eqa.movemask();
188
0
                    if mask.has_non_zero() {
189
0
                        return Some(cur.add(topos(mask)));
190
0
                    }
191
0
192
0
                    let mask = eqb.movemask();
193
0
                    if mask.has_non_zero() {
194
0
                        return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195
0
                    }
196
0
197
0
                    let mask = eqc.movemask();
198
0
                    if mask.has_non_zero() {
199
0
                        return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200
0
                    }
201
0
202
0
                    let mask = eqd.movemask();
203
0
                    debug_assert!(mask.has_non_zero());
204
0
                    return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205
0
                }
206
0
                cur = cur.add(Self::LOOP_SIZE);
207
            }
208
0
        }
209
        // Handle any leftovers after the aligned loop above. We use unaligned
210
        // loads here, but I believe we are guaranteed that they are aligned
211
        // since `cur` is aligned.
212
0
        while cur <= end.sub(V::BYTES) {
213
0
            debug_assert!(end.distance(cur) >= V::BYTES);
214
0
            if let Some(cur) = self.search_chunk(cur, topos) {
215
0
                return Some(cur);
216
0
            }
217
0
            cur = cur.add(V::BYTES);
218
        }
219
        // Finally handle any remaining bytes less than the size of V. In this
220
        // case, our pointer may indeed be unaligned and the load may overlap
221
        // with the previous one. But that's okay since we know the previous
222
        // load didn't lead to a match (otherwise we wouldn't be here).
223
0
        if cur < end {
224
0
            debug_assert!(end.distance(cur) < V::BYTES);
225
0
            cur = cur.sub(V::BYTES - end.distance(cur));
226
0
            debug_assert_eq!(end.distance(cur), V::BYTES);
227
0
            return self.search_chunk(cur, topos);
228
0
        }
229
0
        None
230
57.8k
    }
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::find_raw
Line
Count
Source
143
833k
    pub(crate) unsafe fn find_raw(
144
833k
        &self,
145
833k
        start: *const u8,
146
833k
        end: *const u8,
147
833k
    ) -> Option<*const u8> {
148
833k
        // If we want to support vectors bigger than 256 bits, we probably
149
833k
        // need to move up to using a u64 for the masks used below. Currently
150
833k
        // they are 32 bits, which means we're SOL for vectors that need masks
151
833k
        // bigger than 32 bits. Overall unclear until there's a use case.
152
833k
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
153
154
833k
        let topos = V::Mask::first_offset;
155
833k
        let len = end.distance(start);
156
833k
        debug_assert!(
157
0
            len >= V::BYTES,
158
0
            "haystack has length {}, but must be at least {}",
159
            len,
160
            V::BYTES
161
        );
162
163
        // Search a possibly unaligned chunk at `start`. This covers any part
164
        // of the haystack prior to where aligned loads can start.
165
833k
        if let Some(cur) = self.search_chunk(start, topos) {
166
833k
            return Some(cur);
167
0
        }
168
0
        // Set `cur` to the first V-aligned pointer greater than `start`.
169
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
170
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
171
0
        if len >= Self::LOOP_SIZE {
172
0
            while cur <= end.sub(Self::LOOP_SIZE) {
173
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
174
175
0
                let a = V::load_aligned(cur);
176
0
                let b = V::load_aligned(cur.add(1 * V::BYTES));
177
0
                let c = V::load_aligned(cur.add(2 * V::BYTES));
178
0
                let d = V::load_aligned(cur.add(3 * V::BYTES));
179
0
                let eqa = self.v1.cmpeq(a);
180
0
                let eqb = self.v1.cmpeq(b);
181
0
                let eqc = self.v1.cmpeq(c);
182
0
                let eqd = self.v1.cmpeq(d);
183
0
                let or1 = eqa.or(eqb);
184
0
                let or2 = eqc.or(eqd);
185
0
                let or3 = or1.or(or2);
186
0
                if or3.movemask_will_have_non_zero() {
187
0
                    let mask = eqa.movemask();
188
0
                    if mask.has_non_zero() {
189
0
                        return Some(cur.add(topos(mask)));
190
0
                    }
191
0
192
0
                    let mask = eqb.movemask();
193
0
                    if mask.has_non_zero() {
194
0
                        return Some(cur.add(1 * V::BYTES).add(topos(mask)));
195
0
                    }
196
0
197
0
                    let mask = eqc.movemask();
198
0
                    if mask.has_non_zero() {
199
0
                        return Some(cur.add(2 * V::BYTES).add(topos(mask)));
200
0
                    }
201
0
202
0
                    let mask = eqd.movemask();
203
0
                    debug_assert!(mask.has_non_zero());
204
0
                    return Some(cur.add(3 * V::BYTES).add(topos(mask)));
205
0
                }
206
0
                cur = cur.add(Self::LOOP_SIZE);
207
            }
208
0
        }
209
        // Handle any leftovers after the aligned loop above. We use unaligned
210
        // loads here, but I believe we are guaranteed that they are aligned
211
        // since `cur` is aligned.
212
0
        while cur <= end.sub(V::BYTES) {
213
0
            debug_assert!(end.distance(cur) >= V::BYTES);
214
0
            if let Some(cur) = self.search_chunk(cur, topos) {
215
0
                return Some(cur);
216
0
            }
217
0
            cur = cur.add(V::BYTES);
218
        }
219
        // Finally handle any remaining bytes less than the size of V. In this
220
        // case, our pointer may indeed be unaligned and the load may overlap
221
        // with the previous one. But that's okay since we know the previous
222
        // load didn't lead to a match (otherwise we wouldn't be here).
223
0
        if cur < end {
224
0
            debug_assert!(end.distance(cur) < V::BYTES);
225
0
            cur = cur.sub(V::BYTES - end.distance(cur));
226
0
            debug_assert_eq!(end.distance(cur), V::BYTES);
227
0
            return self.search_chunk(cur, topos);
228
0
        }
229
0
        None
230
833k
    }
231
232
    /// Return a pointer to the last occurrence of the needle in the given
233
    /// haystack. If no such occurrence exists, then `None` is returned.
234
    ///
235
    /// When a match is found, the pointer returned is guaranteed to be
236
    /// `>= start` and `< end`.
237
    ///
238
    /// # Safety
239
    ///
240
    /// * It must be the case that `start < end` and that the distance between
241
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
242
    /// to do at least an unaligned load of `V` at `start`.
243
    /// * Both `start` and `end` must be valid for reads.
244
    /// * Both `start` and `end` must point to an initialized value.
245
    /// * Both `start` and `end` must point to the same allocated object and
246
    /// must either be in bounds or at most one byte past the end of the
247
    /// allocated object.
248
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
249
    /// object.
250
    /// * The distance between `start` and `end` must not overflow `isize`.
251
    /// * The distance being in bounds must not rely on "wrapping around" the
252
    /// address space.
253
    #[inline(always)]
254
0
    pub(crate) unsafe fn rfind_raw(
255
0
        &self,
256
0
        start: *const u8,
257
0
        end: *const u8,
258
0
    ) -> Option<*const u8> {
259
0
        // If we want to support vectors bigger than 256 bits, we probably
260
0
        // need to move up to using a u64 for the masks used below. Currently
261
0
        // they are 32 bits, which means we're SOL for vectors that need masks
262
0
        // bigger than 32 bits. Overall unclear until there's a use case.
263
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
264
265
0
        let topos = V::Mask::last_offset;
266
0
        let len = end.distance(start);
267
0
        debug_assert!(
268
0
            len >= V::BYTES,
269
0
            "haystack has length {}, but must be at least {}",
270
            len,
271
            V::BYTES
272
        );
273
274
0
        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
275
0
            return Some(cur);
276
0
        }
277
0
        let mut cur = end.sub(end.as_usize() & V::ALIGN);
278
0
        debug_assert!(start <= cur && cur <= end);
279
0
        if len >= Self::LOOP_SIZE {
280
0
            while cur >= start.add(Self::LOOP_SIZE) {
281
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
282
283
0
                cur = cur.sub(Self::LOOP_SIZE);
284
0
                let a = V::load_aligned(cur);
285
0
                let b = V::load_aligned(cur.add(1 * V::BYTES));
286
0
                let c = V::load_aligned(cur.add(2 * V::BYTES));
287
0
                let d = V::load_aligned(cur.add(3 * V::BYTES));
288
0
                let eqa = self.v1.cmpeq(a);
289
0
                let eqb = self.v1.cmpeq(b);
290
0
                let eqc = self.v1.cmpeq(c);
291
0
                let eqd = self.v1.cmpeq(d);
292
0
                let or1 = eqa.or(eqb);
293
0
                let or2 = eqc.or(eqd);
294
0
                let or3 = or1.or(or2);
295
0
                if or3.movemask_will_have_non_zero() {
296
0
                    let mask = eqd.movemask();
297
0
                    if mask.has_non_zero() {
298
0
                        return Some(cur.add(3 * V::BYTES).add(topos(mask)));
299
0
                    }
300
0
301
0
                    let mask = eqc.movemask();
302
0
                    if mask.has_non_zero() {
303
0
                        return Some(cur.add(2 * V::BYTES).add(topos(mask)));
304
0
                    }
305
0
306
0
                    let mask = eqb.movemask();
307
0
                    if mask.has_non_zero() {
308
0
                        return Some(cur.add(1 * V::BYTES).add(topos(mask)));
309
0
                    }
310
0
311
0
                    let mask = eqa.movemask();
312
0
                    debug_assert!(mask.has_non_zero());
313
0
                    return Some(cur.add(topos(mask)));
314
0
                }
315
            }
316
0
        }
317
0
        while cur >= start.add(V::BYTES) {
318
0
            debug_assert!(cur.distance(start) >= V::BYTES);
319
0
            cur = cur.sub(V::BYTES);
320
0
            if let Some(cur) = self.search_chunk(cur, topos) {
321
0
                return Some(cur);
322
0
            }
323
        }
324
0
        if cur > start {
325
0
            debug_assert!(cur.distance(start) < V::BYTES);
326
0
            return self.search_chunk(start, topos);
327
0
        }
328
0
        None
329
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::rfind_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::rfind_raw
330
331
    /// Return a count of all matching bytes in the given haystack.
332
    ///
333
    /// # Safety
334
    ///
335
    /// * It must be the case that `start < end` and that the distance between
336
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
337
    /// to do at least an unaligned load of `V` at `start`.
338
    /// * Both `start` and `end` must be valid for reads.
339
    /// * Both `start` and `end` must point to an initialized value.
340
    /// * Both `start` and `end` must point to the same allocated object and
341
    /// must either be in bounds or at most one byte past the end of the
342
    /// allocated object.
343
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
344
    /// object.
345
    /// * The distance between `start` and `end` must not overflow `isize`.
346
    /// * The distance being in bounds must not rely on "wrapping around" the
347
    /// address space.
348
    #[inline(always)]
349
0
    pub(crate) unsafe fn count_raw(
350
0
        &self,
351
0
        start: *const u8,
352
0
        end: *const u8,
353
0
    ) -> usize {
354
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
355
356
0
        let confirm = |b| b == self.needle1();
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::count_raw::{closure#0}
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::count_raw::{closure#0}
357
0
        let len = end.distance(start);
358
0
        debug_assert!(
359
0
            len >= V::BYTES,
360
0
            "haystack has length {}, but must be at least {}",
361
            len,
362
            V::BYTES
363
        );
364
365
        // Set `cur` to the first V-aligned pointer greater than `start`.
366
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
367
0
        // Count any matching bytes before we start our aligned loop.
368
0
        let mut count = count_byte_by_byte(start, cur, confirm);
369
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
370
0
        if len >= Self::LOOP_SIZE {
371
0
            while cur <= end.sub(Self::LOOP_SIZE) {
372
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
373
374
0
                let a = V::load_aligned(cur);
375
0
                let b = V::load_aligned(cur.add(1 * V::BYTES));
376
0
                let c = V::load_aligned(cur.add(2 * V::BYTES));
377
0
                let d = V::load_aligned(cur.add(3 * V::BYTES));
378
0
                let eqa = self.v1.cmpeq(a);
379
0
                let eqb = self.v1.cmpeq(b);
380
0
                let eqc = self.v1.cmpeq(c);
381
0
                let eqd = self.v1.cmpeq(d);
382
0
                count += eqa.movemask().count_ones();
383
0
                count += eqb.movemask().count_ones();
384
0
                count += eqc.movemask().count_ones();
385
0
                count += eqd.movemask().count_ones();
386
0
                cur = cur.add(Self::LOOP_SIZE);
387
            }
388
0
        }
389
        // Handle any leftovers after the aligned loop above. We use unaligned
390
        // loads here, but I believe we are guaranteed that they are aligned
391
        // since `cur` is aligned.
392
0
        while cur <= end.sub(V::BYTES) {
393
0
            debug_assert!(end.distance(cur) >= V::BYTES);
394
0
            let chunk = V::load_unaligned(cur);
395
0
            count += self.v1.cmpeq(chunk).movemask().count_ones();
396
0
            cur = cur.add(V::BYTES);
397
        }
398
        // And finally count any leftovers that weren't caught above.
399
0
        count += count_byte_by_byte(cur, end, confirm);
400
0
        count
401
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::count_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::count_raw
402
403
    /// Search `V::BYTES` starting at `cur` via an unaligned load.
404
    ///
405
    /// `mask_to_offset` should be a function that converts a `movemask` to
406
    /// an offset such that `cur.add(offset)` corresponds to a pointer to the
407
    /// match location if one is found. Generally it is expected to use either
408
    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
409
    /// one is implementing a forward or reverse search, respectively.
410
    ///
411
    /// # Safety
412
    ///
413
    /// `cur` must be a valid pointer and it must be valid to do an unaligned
414
    /// load of size `V::BYTES` at `cur`.
415
    #[inline(always)]
416
891k
    unsafe fn search_chunk(
417
891k
        &self,
418
891k
        cur: *const u8,
419
891k
        mask_to_offset: impl Fn(V::Mask) -> usize,
420
891k
    ) -> Option<*const u8> {
421
891k
        let chunk = V::load_unaligned(cur);
422
891k
        let mask = self.v1.cmpeq(chunk).movemask();
423
891k
        if mask.has_non_zero() {
424
891k
            Some(cur.add(mask_to_offset(mask)))
425
        } else {
426
0
            None
427
        }
428
891k
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
Line
Count
Source
416
57.8k
    unsafe fn search_chunk(
417
57.8k
        &self,
418
57.8k
        cur: *const u8,
419
57.8k
        mask_to_offset: impl Fn(V::Mask) -> usize,
420
57.8k
    ) -> Option<*const u8> {
421
57.8k
        let chunk = V::load_unaligned(cur);
422
57.8k
        let mask = self.v1.cmpeq(chunk).movemask();
423
57.8k
        if mask.has_non_zero() {
424
57.8k
            Some(cur.add(mask_to_offset(mask)))
425
        } else {
426
0
            None
427
        }
428
57.8k
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
<memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
Line
Count
Source
416
833k
    unsafe fn search_chunk(
417
833k
        &self,
418
833k
        cur: *const u8,
419
833k
        mask_to_offset: impl Fn(V::Mask) -> usize,
420
833k
    ) -> Option<*const u8> {
421
833k
        let chunk = V::load_unaligned(cur);
422
833k
        let mask = self.v1.cmpeq(chunk).movemask();
423
833k
        if mask.has_non_zero() {
424
833k
            Some(cur.add(mask_to_offset(mask)))
425
        } else {
426
0
            None
427
        }
428
833k
    }
429
}
430
431
/// Finds all occurrences of two bytes in a haystack.
432
///
433
/// That is, this reports matches of one of two possible bytes. For example,
434
/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
435
/// `4` and `5`.
436
#[derive(Clone, Copy, Debug)]
437
pub(crate) struct Two<V> {
438
    s1: u8,
439
    s2: u8,
440
    v1: V,
441
    v2: V,
442
}
443
444
impl<V: Vector> Two<V> {
445
    /// The number of bytes we examine per each iteration of our search loop.
446
    const LOOP_SIZE: usize = 2 * V::BYTES;
447
448
    /// Create a new searcher that finds occurrences of the byte given.
449
    #[inline(always)]
450
0
    pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> {
451
0
        Two {
452
0
            s1: needle1,
453
0
            s2: needle2,
454
0
            v1: V::splat(needle1),
455
0
            v2: V::splat(needle2),
456
0
        }
457
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m128i>>::new
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m256i>>::new
458
459
    /// Returns the first needle given to `Two::new`.
460
    #[inline(always)]
461
0
    pub(crate) fn needle1(&self) -> u8 {
462
0
        self.s1
463
0
    }
464
465
    /// Returns the second needle given to `Two::new`.
466
    #[inline(always)]
467
0
    pub(crate) fn needle2(&self) -> u8 {
468
0
        self.s2
469
0
    }
470
471
    /// Return a pointer to the first occurrence of one of the needles in the
472
    /// given haystack. If no such occurrence exists, then `None` is returned.
473
    ///
474
    /// When a match is found, the pointer returned is guaranteed to be
475
    /// `>= start` and `< end`.
476
    ///
477
    /// # Safety
478
    ///
479
    /// * It must be the case that `start < end` and that the distance between
480
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
481
    /// to do at least an unaligned load of `V` at `start`.
482
    /// * Both `start` and `end` must be valid for reads.
483
    /// * Both `start` and `end` must point to an initialized value.
484
    /// * Both `start` and `end` must point to the same allocated object and
485
    /// must either be in bounds or at most one byte past the end of the
486
    /// allocated object.
487
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
488
    /// object.
489
    /// * The distance between `start` and `end` must not overflow `isize`.
490
    /// * The distance being in bounds must not rely on "wrapping around" the
491
    /// address space.
492
    #[inline(always)]
493
0
    pub(crate) unsafe fn find_raw(
494
0
        &self,
495
0
        start: *const u8,
496
0
        end: *const u8,
497
0
    ) -> Option<*const u8> {
498
0
        // If we want to support vectors bigger than 256 bits, we probably
499
0
        // need to move up to using a u64 for the masks used below. Currently
500
0
        // they are 32 bits, which means we're SOL for vectors that need masks
501
0
        // bigger than 32 bits. Overall unclear until there's a use case.
502
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
503
504
0
        let topos = V::Mask::first_offset;
505
0
        let len = end.distance(start);
506
0
        debug_assert!(
507
0
            len >= V::BYTES,
508
0
            "haystack has length {}, but must be at least {}",
509
            len,
510
            V::BYTES
511
        );
512
513
        // Search a possibly unaligned chunk at `start`. This covers any part
514
        // of the haystack prior to where aligned loads can start.
515
0
        if let Some(cur) = self.search_chunk(start, topos) {
516
0
            return Some(cur);
517
0
        }
518
0
        // Set `cur` to the first V-aligned pointer greater than `start`.
519
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
520
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
521
0
        if len >= Self::LOOP_SIZE {
522
0
            while cur <= end.sub(Self::LOOP_SIZE) {
523
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
524
525
0
                let a = V::load_aligned(cur);
526
0
                let b = V::load_aligned(cur.add(V::BYTES));
527
0
                let eqa1 = self.v1.cmpeq(a);
528
0
                let eqb1 = self.v1.cmpeq(b);
529
0
                let eqa2 = self.v2.cmpeq(a);
530
0
                let eqb2 = self.v2.cmpeq(b);
531
0
                let or1 = eqa1.or(eqb1);
532
0
                let or2 = eqa2.or(eqb2);
533
0
                let or3 = or1.or(or2);
534
0
                if or3.movemask_will_have_non_zero() {
535
0
                    let mask = eqa1.movemask().or(eqa2.movemask());
536
0
                    if mask.has_non_zero() {
537
0
                        return Some(cur.add(topos(mask)));
538
0
                    }
539
0
540
0
                    let mask = eqb1.movemask().or(eqb2.movemask());
541
0
                    debug_assert!(mask.has_non_zero());
542
0
                    return Some(cur.add(V::BYTES).add(topos(mask)));
543
0
                }
544
0
                cur = cur.add(Self::LOOP_SIZE);
545
            }
546
0
        }
547
        // Handle any leftovers after the aligned loop above. We use unaligned
548
        // loads here, but I believe we are guaranteed that they are aligned
549
        // since `cur` is aligned.
550
0
        while cur <= end.sub(V::BYTES) {
551
0
            debug_assert!(end.distance(cur) >= V::BYTES);
552
0
            if let Some(cur) = self.search_chunk(cur, topos) {
553
0
                return Some(cur);
554
0
            }
555
0
            cur = cur.add(V::BYTES);
556
        }
557
        // Finally handle any remaining bytes less than the size of V. In this
558
        // case, our pointer may indeed be unaligned and the load may overlap
559
        // with the previous one. But that's okay since we know the previous
560
        // load didn't lead to a match (otherwise we wouldn't be here).
561
0
        if cur < end {
562
0
            debug_assert!(end.distance(cur) < V::BYTES);
563
0
            cur = cur.sub(V::BYTES - end.distance(cur));
564
0
            debug_assert_eq!(end.distance(cur), V::BYTES);
565
0
            return self.search_chunk(cur, topos);
566
0
        }
567
0
        None
568
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m128i>>::find_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m256i>>::find_raw
569
570
    /// Return a pointer to the last occurrence of the needle in the given
571
    /// haystack. If no such occurrence exists, then `None` is returned.
572
    ///
573
    /// When a match is found, the pointer returned is guaranteed to be
574
    /// `>= start` and `< end`.
575
    ///
576
    /// # Safety
577
    ///
578
    /// * It must be the case that `start < end` and that the distance between
579
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
580
    /// to do at least an unaligned load of `V` at `start`.
581
    /// * Both `start` and `end` must be valid for reads.
582
    /// * Both `start` and `end` must point to an initialized value.
583
    /// * Both `start` and `end` must point to the same allocated object and
584
    /// must either be in bounds or at most one byte past the end of the
585
    /// allocated object.
586
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
587
    /// object.
588
    /// * The distance between `start` and `end` must not overflow `isize`.
589
    /// * The distance being in bounds must not rely on "wrapping around" the
590
    /// address space.
591
    #[inline(always)]
592
0
    pub(crate) unsafe fn rfind_raw(
593
0
        &self,
594
0
        start: *const u8,
595
0
        end: *const u8,
596
0
    ) -> Option<*const u8> {
597
0
        // If we want to support vectors bigger than 256 bits, we probably
598
0
        // need to move up to using a u64 for the masks used below. Currently
599
0
        // they are 32 bits, which means we're SOL for vectors that need masks
600
0
        // bigger than 32 bits. Overall unclear until there's a use case.
601
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
602
603
0
        let topos = V::Mask::last_offset;
604
0
        let len = end.distance(start);
605
0
        debug_assert!(
606
0
            len >= V::BYTES,
607
0
            "haystack has length {}, but must be at least {}",
608
            len,
609
            V::BYTES
610
        );
611
612
0
        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
613
0
            return Some(cur);
614
0
        }
615
0
        let mut cur = end.sub(end.as_usize() & V::ALIGN);
616
0
        debug_assert!(start <= cur && cur <= end);
617
0
        if len >= Self::LOOP_SIZE {
618
0
            while cur >= start.add(Self::LOOP_SIZE) {
619
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
620
621
0
                cur = cur.sub(Self::LOOP_SIZE);
622
0
                let a = V::load_aligned(cur);
623
0
                let b = V::load_aligned(cur.add(V::BYTES));
624
0
                let eqa1 = self.v1.cmpeq(a);
625
0
                let eqb1 = self.v1.cmpeq(b);
626
0
                let eqa2 = self.v2.cmpeq(a);
627
0
                let eqb2 = self.v2.cmpeq(b);
628
0
                let or1 = eqa1.or(eqb1);
629
0
                let or2 = eqa2.or(eqb2);
630
0
                let or3 = or1.or(or2);
631
0
                if or3.movemask_will_have_non_zero() {
632
0
                    let mask = eqb1.movemask().or(eqb2.movemask());
633
0
                    if mask.has_non_zero() {
634
0
                        return Some(cur.add(V::BYTES).add(topos(mask)));
635
0
                    }
636
0
637
0
                    let mask = eqa1.movemask().or(eqa2.movemask());
638
0
                    debug_assert!(mask.has_non_zero());
639
0
                    return Some(cur.add(topos(mask)));
640
0
                }
641
            }
642
0
        }
643
0
        while cur >= start.add(V::BYTES) {
644
0
            debug_assert!(cur.distance(start) >= V::BYTES);
645
0
            cur = cur.sub(V::BYTES);
646
0
            if let Some(cur) = self.search_chunk(cur, topos) {
647
0
                return Some(cur);
648
0
            }
649
        }
650
0
        if cur > start {
651
0
            debug_assert!(cur.distance(start) < V::BYTES);
652
0
            return self.search_chunk(start, topos);
653
0
        }
654
0
        None
655
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m128i>>::rfind_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m256i>>::rfind_raw
656
657
    /// Search `V::BYTES` starting at `cur` via an unaligned load.
658
    ///
659
    /// `mask_to_offset` should be a function that converts a `movemask` to
660
    /// an offset such that `cur.add(offset)` corresponds to a pointer to the
661
    /// match location if one is found. Generally it is expected to use either
662
    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
663
    /// one is implementing a forward or reverse search, respectively.
664
    ///
665
    /// # Safety
666
    ///
667
    /// `cur` must be a valid pointer and it must be valid to do an unaligned
668
    /// load of size `V::BYTES` at `cur`.
669
    #[inline(always)]
670
0
    unsafe fn search_chunk(
671
0
        &self,
672
0
        cur: *const u8,
673
0
        mask_to_offset: impl Fn(V::Mask) -> usize,
674
0
    ) -> Option<*const u8> {
675
0
        let chunk = V::load_unaligned(cur);
676
0
        let eq1 = self.v1.cmpeq(chunk);
677
0
        let eq2 = self.v2.cmpeq(chunk);
678
0
        let mask = eq1.or(eq2).movemask();
679
0
        if mask.has_non_zero() {
680
0
            let mask1 = eq1.movemask();
681
0
            let mask2 = eq2.movemask();
682
0
            Some(cur.add(mask_to_offset(mask1.or(mask2))))
683
        } else {
684
0
            None
685
        }
686
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Two<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
687
}
688
689
/// Finds all occurrences of two bytes in a haystack.
690
///
691
/// That is, this reports matches of one of two possible bytes. For example,
692
/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
693
/// `4` and `5`.
694
#[derive(Clone, Copy, Debug)]
695
pub(crate) struct Three<V> {
696
    s1: u8,
697
    s2: u8,
698
    s3: u8,
699
    v1: V,
700
    v2: V,
701
    v3: V,
702
}
703
704
impl<V: Vector> Three<V> {
705
    /// The number of bytes we examine per each iteration of our search loop.
706
    const LOOP_SIZE: usize = 2 * V::BYTES;
707
708
    /// Create a new searcher that finds occurrences of the byte given.
709
    #[inline(always)]
710
0
    pub(crate) unsafe fn new(
711
0
        needle1: u8,
712
0
        needle2: u8,
713
0
        needle3: u8,
714
0
    ) -> Three<V> {
715
0
        Three {
716
0
            s1: needle1,
717
0
            s2: needle2,
718
0
            s3: needle3,
719
0
            v1: V::splat(needle1),
720
0
            v2: V::splat(needle2),
721
0
            v3: V::splat(needle3),
722
0
        }
723
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m128i>>::new
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m256i>>::new
724
725
    /// Returns the first needle given to `Three::new`.
726
    #[inline(always)]
727
0
    pub(crate) fn needle1(&self) -> u8 {
728
0
        self.s1
729
0
    }
730
731
    /// Returns the second needle given to `Three::new`.
732
    #[inline(always)]
733
0
    pub(crate) fn needle2(&self) -> u8 {
734
0
        self.s2
735
0
    }
736
737
    /// Returns the third needle given to `Three::new`.
738
    #[inline(always)]
739
0
    pub(crate) fn needle3(&self) -> u8 {
740
0
        self.s3
741
0
    }
742
743
    /// Return a pointer to the first occurrence of one of the needles in the
744
    /// given haystack. If no such occurrence exists, then `None` is returned.
745
    ///
746
    /// When a match is found, the pointer returned is guaranteed to be
747
    /// `>= start` and `< end`.
748
    ///
749
    /// # Safety
750
    ///
751
    /// * It must be the case that `start < end` and that the distance between
752
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
753
    /// to do at least an unaligned load of `V` at `start`.
754
    /// * Both `start` and `end` must be valid for reads.
755
    /// * Both `start` and `end` must point to an initialized value.
756
    /// * Both `start` and `end` must point to the same allocated object and
757
    /// must either be in bounds or at most one byte past the end of the
758
    /// allocated object.
759
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
760
    /// object.
761
    /// * The distance between `start` and `end` must not overflow `isize`.
762
    /// * The distance being in bounds must not rely on "wrapping around" the
763
    /// address space.
764
    #[inline(always)]
765
0
    pub(crate) unsafe fn find_raw(
766
0
        &self,
767
0
        start: *const u8,
768
0
        end: *const u8,
769
0
    ) -> Option<*const u8> {
770
0
        // If we want to support vectors bigger than 256 bits, we probably
771
0
        // need to move up to using a u64 for the masks used below. Currently
772
0
        // they are 32 bits, which means we're SOL for vectors that need masks
773
0
        // bigger than 32 bits. Overall unclear until there's a use case.
774
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
775
776
0
        let topos = V::Mask::first_offset;
777
0
        let len = end.distance(start);
778
0
        debug_assert!(
779
0
            len >= V::BYTES,
780
0
            "haystack has length {}, but must be at least {}",
781
            len,
782
            V::BYTES
783
        );
784
785
        // Search a possibly unaligned chunk at `start`. This covers any part
786
        // of the haystack prior to where aligned loads can start.
787
0
        if let Some(cur) = self.search_chunk(start, topos) {
788
0
            return Some(cur);
789
0
        }
790
0
        // Set `cur` to the first V-aligned pointer greater than `start`.
791
0
        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN));
792
0
        debug_assert!(cur > start && end.sub(V::BYTES) >= start);
793
0
        if len >= Self::LOOP_SIZE {
794
0
            while cur <= end.sub(Self::LOOP_SIZE) {
795
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
796
797
0
                let a = V::load_aligned(cur);
798
0
                let b = V::load_aligned(cur.add(V::BYTES));
799
0
                let eqa1 = self.v1.cmpeq(a);
800
0
                let eqb1 = self.v1.cmpeq(b);
801
0
                let eqa2 = self.v2.cmpeq(a);
802
0
                let eqb2 = self.v2.cmpeq(b);
803
0
                let eqa3 = self.v3.cmpeq(a);
804
0
                let eqb3 = self.v3.cmpeq(b);
805
0
                let or1 = eqa1.or(eqb1);
806
0
                let or2 = eqa2.or(eqb2);
807
0
                let or3 = eqa3.or(eqb3);
808
0
                let or4 = or1.or(or2);
809
0
                let or5 = or3.or(or4);
810
0
                if or5.movemask_will_have_non_zero() {
811
0
                    let mask = eqa1
812
0
                        .movemask()
813
0
                        .or(eqa2.movemask())
814
0
                        .or(eqa3.movemask());
815
0
                    if mask.has_non_zero() {
816
0
                        return Some(cur.add(topos(mask)));
817
0
                    }
818
0
819
0
                    let mask = eqb1
820
0
                        .movemask()
821
0
                        .or(eqb2.movemask())
822
0
                        .or(eqb3.movemask());
823
0
                    debug_assert!(mask.has_non_zero());
824
0
                    return Some(cur.add(V::BYTES).add(topos(mask)));
825
0
                }
826
0
                cur = cur.add(Self::LOOP_SIZE);
827
            }
828
0
        }
829
        // Handle any leftovers after the aligned loop above. We use unaligned
830
        // loads here, but I believe we are guaranteed that they are aligned
831
        // since `cur` is aligned.
832
0
        while cur <= end.sub(V::BYTES) {
833
0
            debug_assert!(end.distance(cur) >= V::BYTES);
834
0
            if let Some(cur) = self.search_chunk(cur, topos) {
835
0
                return Some(cur);
836
0
            }
837
0
            cur = cur.add(V::BYTES);
838
        }
839
        // Finally handle any remaining bytes less than the size of V. In this
840
        // case, our pointer may indeed be unaligned and the load may overlap
841
        // with the previous one. But that's okay since we know the previous
842
        // load didn't lead to a match (otherwise we wouldn't be here).
843
0
        if cur < end {
844
0
            debug_assert!(end.distance(cur) < V::BYTES);
845
0
            cur = cur.sub(V::BYTES - end.distance(cur));
846
0
            debug_assert_eq!(end.distance(cur), V::BYTES);
847
0
            return self.search_chunk(cur, topos);
848
0
        }
849
0
        None
850
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m128i>>::find_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m256i>>::find_raw
851
852
    /// Return a pointer to the last occurrence of the needle in the given
853
    /// haystack. If no such occurrence exists, then `None` is returned.
854
    ///
855
    /// When a match is found, the pointer returned is guaranteed to be
856
    /// `>= start` and `< end`.
857
    ///
858
    /// # Safety
859
    ///
860
    /// * It must be the case that `start < end` and that the distance between
861
    /// them is at least equal to `V::BYTES`. That is, it must always be valid
862
    /// to do at least an unaligned load of `V` at `start`.
863
    /// * Both `start` and `end` must be valid for reads.
864
    /// * Both `start` and `end` must point to an initialized value.
865
    /// * Both `start` and `end` must point to the same allocated object and
866
    /// must either be in bounds or at most one byte past the end of the
867
    /// allocated object.
868
    /// * Both `start` and `end` must be _derived from_ a pointer to the same
869
    /// object.
870
    /// * The distance between `start` and `end` must not overflow `isize`.
871
    /// * The distance being in bounds must not rely on "wrapping around" the
872
    /// address space.
873
    #[inline(always)]
874
0
    pub(crate) unsafe fn rfind_raw(
875
0
        &self,
876
0
        start: *const u8,
877
0
        end: *const u8,
878
0
    ) -> Option<*const u8> {
879
0
        // If we want to support vectors bigger than 256 bits, we probably
880
0
        // need to move up to using a u64 for the masks used below. Currently
881
0
        // they are 32 bits, which means we're SOL for vectors that need masks
882
0
        // bigger than 32 bits. Overall unclear until there's a use case.
883
0
        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes");
884
885
0
        let topos = V::Mask::last_offset;
886
0
        let len = end.distance(start);
887
0
        debug_assert!(
888
0
            len >= V::BYTES,
889
0
            "haystack has length {}, but must be at least {}",
890
            len,
891
            V::BYTES
892
        );
893
894
0
        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) {
895
0
            return Some(cur);
896
0
        }
897
0
        let mut cur = end.sub(end.as_usize() & V::ALIGN);
898
0
        debug_assert!(start <= cur && cur <= end);
899
0
        if len >= Self::LOOP_SIZE {
900
0
            while cur >= start.add(Self::LOOP_SIZE) {
901
0
                debug_assert_eq!(0, cur.as_usize() % V::BYTES);
902
903
0
                cur = cur.sub(Self::LOOP_SIZE);
904
0
                let a = V::load_aligned(cur);
905
0
                let b = V::load_aligned(cur.add(V::BYTES));
906
0
                let eqa1 = self.v1.cmpeq(a);
907
0
                let eqb1 = self.v1.cmpeq(b);
908
0
                let eqa2 = self.v2.cmpeq(a);
909
0
                let eqb2 = self.v2.cmpeq(b);
910
0
                let eqa3 = self.v3.cmpeq(a);
911
0
                let eqb3 = self.v3.cmpeq(b);
912
0
                let or1 = eqa1.or(eqb1);
913
0
                let or2 = eqa2.or(eqb2);
914
0
                let or3 = eqa3.or(eqb3);
915
0
                let or4 = or1.or(or2);
916
0
                let or5 = or3.or(or4);
917
0
                if or5.movemask_will_have_non_zero() {
918
0
                    let mask = eqb1
919
0
                        .movemask()
920
0
                        .or(eqb2.movemask())
921
0
                        .or(eqb3.movemask());
922
0
                    if mask.has_non_zero() {
923
0
                        return Some(cur.add(V::BYTES).add(topos(mask)));
924
0
                    }
925
0
926
0
                    let mask = eqa1
927
0
                        .movemask()
928
0
                        .or(eqa2.movemask())
929
0
                        .or(eqa3.movemask());
930
0
                    debug_assert!(mask.has_non_zero());
931
0
                    return Some(cur.add(topos(mask)));
932
0
                }
933
            }
934
0
        }
935
0
        while cur >= start.add(V::BYTES) {
936
0
            debug_assert!(cur.distance(start) >= V::BYTES);
937
0
            cur = cur.sub(V::BYTES);
938
0
            if let Some(cur) = self.search_chunk(cur, topos) {
939
0
                return Some(cur);
940
0
            }
941
        }
942
0
        if cur > start {
943
0
            debug_assert!(cur.distance(start) < V::BYTES);
944
0
            return self.search_chunk(start, topos);
945
0
        }
946
0
        None
947
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m128i>>::rfind_raw
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m256i>>::rfind_raw
948
949
    /// Search `V::BYTES` starting at `cur` via an unaligned load.
950
    ///
951
    /// `mask_to_offset` should be a function that converts a `movemask` to
952
    /// an offset such that `cur.add(offset)` corresponds to a pointer to the
953
    /// match location if one is found. Generally it is expected to use either
954
    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether
955
    /// one is implementing a forward or reverse search, respectively.
956
    ///
957
    /// # Safety
958
    ///
959
    /// `cur` must be a valid pointer and it must be valid to do an unaligned
960
    /// load of size `V::BYTES` at `cur`.
961
    #[inline(always)]
962
0
    unsafe fn search_chunk(
963
0
        &self,
964
0
        cur: *const u8,
965
0
        mask_to_offset: impl Fn(V::Mask) -> usize,
966
0
    ) -> Option<*const u8> {
967
0
        let chunk = V::load_unaligned(cur);
968
0
        let eq1 = self.v1.cmpeq(chunk);
969
0
        let eq2 = self.v2.cmpeq(chunk);
970
0
        let eq3 = self.v3.cmpeq(chunk);
971
0
        let mask = eq1.or(eq2).or(eq3).movemask();
972
0
        if mask.has_non_zero() {
973
0
            let mask1 = eq1.movemask();
974
0
            let mask2 = eq2.movemask();
975
0
            let mask3 = eq3.movemask();
976
0
            Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3))))
977
        } else {
978
0
            None
979
        }
980
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m128i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::last_offset>
Unexecuted instantiation: <memchr::arch::generic::memchr::Three<core::core_arch::x86::__m256i>>::search_chunk::<<memchr::vector::SensibleMoveMask as memchr::vector::MoveMask>::first_offset>
981
}
982
983
/// An iterator over all occurrences of a set of bytes in a haystack.
984
///
985
/// This iterator implements the routines necessary to provide a
986
/// `DoubleEndedIterator` impl, which means it can also be used to find
987
/// occurrences in reverse order.
988
///
989
/// The lifetime parameters are as follows:
990
///
991
/// * `'h` refers to the lifetime of the haystack being searched.
992
///
993
/// This type is intended to be used to implement all iterators for the
994
/// `memchr` family of functions. It handles a tiny bit of marginally tricky
995
/// raw pointer math, but otherwise expects the caller to provide `find_raw`
996
/// and `rfind_raw` routines for each call of `next` and `next_back`,
997
/// respectively.
998
#[derive(Clone, Debug)]
999
pub(crate) struct Iter<'h> {
1000
    /// The original starting point into the haystack. We use this to convert
1001
    /// pointers to offsets.
1002
    original_start: *const u8,
1003
    /// The current starting point into the haystack. That is, where the next
1004
    /// search will begin.
1005
    start: *const u8,
1006
    /// The current ending point into the haystack. That is, where the next
1007
    /// reverse search will begin.
1008
    end: *const u8,
1009
    /// A marker for tracking the lifetime of the start/cur_start/cur_end
1010
    /// pointers above, which all point into the haystack.
1011
    haystack: core::marker::PhantomData<&'h [u8]>,
1012
}
1013
1014
// SAFETY: Iter contains no shared references to anything that performs any
1015
// interior mutations. Also, the lifetime guarantees that Iter will not outlive
1016
// the haystack.
1017
unsafe impl<'h> Send for Iter<'h> {}
1018
1019
// SAFETY: Iter perform no interior mutations, therefore no explicit
1020
// synchronization is necessary. Also, the lifetime guarantees that Iter will
1021
// not outlive the haystack.
1022
unsafe impl<'h> Sync for Iter<'h> {}
1023
1024
impl<'h> Iter<'h> {
1025
    /// Create a new generic memchr iterator.
1026
    #[inline(always)]
1027
0
    pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> {
1028
0
        Iter {
1029
0
            original_start: haystack.as_ptr(),
1030
0
            start: haystack.as_ptr(),
1031
0
            end: haystack.as_ptr().wrapping_add(haystack.len()),
1032
0
            haystack: core::marker::PhantomData,
1033
0
        }
1034
0
    }
1035
1036
    /// Returns the next occurrence in the forward direction.
1037
    ///
1038
    /// # Safety
1039
    ///
1040
    /// Callers must ensure that if a pointer is returned from the closure
1041
    /// provided, then it must be greater than or equal to the start pointer
1042
    /// and less than the end pointer.
1043
    #[inline(always)]
1044
0
    pub(crate) unsafe fn next(
1045
0
        &mut self,
1046
0
        mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1047
0
    ) -> Option<usize> {
1048
        // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1049
        // We only ever modify start/end corresponding to a matching offset
1050
        // found between start and end. Thus all changes to start/end maintain
1051
        // our safety requirements.
1052
        //
1053
        // The only other assumption we rely on is that the pointer returned
1054
        // by `find_raw` satisfies `self.start <= found < self.end`, and that
1055
        // safety contract is forwarded to the caller.
1056
0
        let found = find_raw(self.start, self.end)?;
1057
0
        let result = found.distance(self.original_start);
1058
0
        self.start = found.add(1);
1059
0
        Some(result)
1060
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::memchr::Memchr as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::all::memchr::TwoIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::memchr::Memchr2 as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::avx2::memchr::TwoIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::sse2::memchr::TwoIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::all::memchr::ThreeIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::memchr::Memchr3 as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::avx2::memchr::ThreeIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::sse2::memchr::ThreeIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::memchr::Memchr as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::all::memchr::OneIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::avx2::memchr::OneIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next::<<memchr::arch::x86_64::sse2::memchr::OneIter as core::iter::traits::iterator::Iterator>::next::{closure#0}>
1061
1062
    /// Returns the number of remaining elements in this iterator.
1063
    #[inline(always)]
1064
0
    pub(crate) fn count(
1065
0
        self,
1066
0
        mut count_raw: impl FnMut(*const u8, *const u8) -> usize,
1067
0
    ) -> usize {
1068
0
        // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1069
0
        // We only ever modify start/end corresponding to a matching offset
1070
0
        // found between start and end. Thus all changes to start/end maintain
1071
0
        // our safety requirements.
1072
0
        count_raw(self.start, self.end)
1073
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::count::<<memchr::memchr::Memchr as core::iter::traits::iterator::Iterator>::count::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::count::<<memchr::arch::all::memchr::OneIter as core::iter::traits::iterator::Iterator>::count::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::count::<<memchr::arch::x86_64::avx2::memchr::OneIter as core::iter::traits::iterator::Iterator>::count::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::count::<<memchr::arch::x86_64::sse2::memchr::OneIter as core::iter::traits::iterator::Iterator>::count::{closure#0}>
1074
1075
    /// Returns the next occurrence in reverse.
1076
    ///
1077
    /// # Safety
1078
    ///
1079
    /// Callers must ensure that if a pointer is returned from the closure
1080
    /// provided, then it must be greater than or equal to the start pointer
1081
    /// and less than the end pointer.
1082
    #[inline(always)]
1083
0
    pub(crate) unsafe fn next_back(
1084
0
        &mut self,
1085
0
        mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1086
0
    ) -> Option<usize> {
1087
        // SAFETY: Pointers are derived directly from the same &[u8] haystack.
1088
        // We only ever modify start/end corresponding to a matching offset
1089
        // found between start and end. Thus all changes to start/end maintain
1090
        // our safety requirements.
1091
        //
1092
        // The only other assumption we rely on is that the pointer returned
1093
        // by `rfind_raw` satisfies `self.start <= found < self.end`, and that
1094
        // safety contract is forwarded to the caller.
1095
0
        let found = rfind_raw(self.start, self.end)?;
1096
0
        let result = found.distance(self.original_start);
1097
0
        self.end = found;
1098
0
        Some(result)
1099
0
    }
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::memchr::Memchr as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::all::memchr::OneIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::avx2::memchr::OneIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::sse2::memchr::OneIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::all::memchr::TwoIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::memchr::Memchr2 as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::avx2::memchr::TwoIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::sse2::memchr::TwoIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::all::memchr::ThreeIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::memchr::Memchr3 as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::avx2::memchr::ThreeIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
Unexecuted instantiation: <memchr::arch::generic::memchr::Iter>::next_back::<<memchr::arch::x86_64::sse2::memchr::ThreeIter as core::iter::traits::double_ended::DoubleEndedIterator>::next_back::{closure#0}>
1100
1101
    /// Provides an implementation of `Iterator::size_hint`.
1102
    #[inline(always)]
1103
0
    pub(crate) fn size_hint(&self) -> (usize, Option<usize>) {
1104
0
        (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize())))
1105
0
    }
1106
}
1107
1108
/// Search a slice using a function that operates on raw pointers.
1109
///
1110
/// Given a function to search a contiguous sequence of memory for the location
1111
/// of a non-empty set of bytes, this will execute that search on a slice of
1112
/// bytes. The pointer returned by the given function will be converted to an
1113
/// offset relative to the starting point of the given slice. That is, if a
1114
/// match is found, the offset returned by this routine is guaranteed to be a
1115
/// valid index into `haystack`.
1116
///
1117
/// Callers may use this for a forward or reverse search.
1118
///
1119
/// # Safety
1120
///
1121
/// Callers must ensure that if a pointer is returned by `find_raw`, then the
1122
/// pointer must be greater than or equal to the starting pointer and less than
1123
/// the end pointer.
1124
#[inline(always)]
1125
895k
pub(crate) unsafe fn search_slice_with_raw(
1126
895k
    haystack: &[u8],
1127
895k
    mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1128
895k
) -> Option<usize> {
1129
895k
    // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but
1130
895k
    // otherwise, `start` and `end` are valid due to the guarantees provided by
1131
895k
    // a &[u8].
1132
895k
    let start = haystack.as_ptr();
1133
895k
    let end = start.add(haystack.len());
1134
895k
    let found = find_raw(start, end)?;
1135
895k
    Some(found.distance(start))
1136
895k
}
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr::{closure#0}>
memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr::{closure#0}>
Line
Count
Source
1125
895k
pub(crate) unsafe fn search_slice_with_raw(
1126
895k
    haystack: &[u8],
1127
895k
    mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>,
1128
895k
) -> Option<usize> {
1129
895k
    // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but
1130
895k
    // otherwise, `start` and `end` are valid due to the guarantees provided by
1131
895k
    // a &[u8].
1132
895k
    let start = haystack.as_ptr();
1133
895k
    let end = start.add(haystack.len());
1134
895k
    let found = find_raw(start, end)?;
1135
895k
    Some(found.distance(start))
1136
895k
}
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr2::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr2::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::One>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::One>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::One>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::One>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::One>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::One>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::Two>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::Two>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::Two>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::Two>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::Two>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::Two>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::Three>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::all::memchr::Three>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::Three>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::avx2::memchr::Three>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::Three>::find::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<<memchr::arch::x86_64::sse2::memchr::Three>::rfind::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr2::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memchr3::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memrchr::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memrchr2::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::search_slice_with_raw::<memchr::memchr::memrchr3::{closure#0}>
1137
1138
/// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or
1139
/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1140
/// returned. If the latter occurs, then the pointer at which `confirm` returns
1141
/// `true` is returned.
1142
///
1143
/// # Safety
1144
///
1145
/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1146
/// ptr` and `ptr <= end_ptr`.
1147
#[inline(always)]
1148
4.42k
pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>(
1149
4.42k
    start: *const u8,
1150
4.42k
    end: *const u8,
1151
4.42k
    confirm: F,
1152
4.42k
) -> Option<*const u8> {
1153
4.42k
    debug_assert!(start <= end);
1154
4.42k
    let mut ptr = start;
1155
27.2k
    while ptr < end {
1156
27.2k
        if confirm(*ptr) {
1157
4.42k
            return Some(ptr);
1158
22.8k
        }
1159
22.8k
        ptr = ptr.offset(1);
1160
    }
1161
0
    None
1162
4.42k
}
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::all::memchr::One>::find_raw::{closure#0}>
memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::One>::find_raw::{closure#0}>
Line
Count
Source
1148
4.42k
pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>(
1149
4.42k
    start: *const u8,
1150
4.42k
    end: *const u8,
1151
4.42k
    confirm: F,
1152
4.42k
) -> Option<*const u8> {
1153
4.42k
    debug_assert!(start <= end);
1154
4.42k
    let mut ptr = start;
1155
27.2k
    while ptr < end {
1156
27.2k
        if confirm(*ptr) {
1157
4.42k
            return Some(ptr);
1158
22.8k
        }
1159
22.8k
        ptr = ptr.offset(1);
1160
    }
1161
0
    None
1162
4.42k
}
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::One>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::all::memchr::Two>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::Two>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::Two>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::all::memchr::Three>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::Three>::find_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::fwd_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::Three>::find_raw::{closure#0}>
1163
1164
/// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or
1165
/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is
1166
/// returned. If the latter occurs, then the pointer at which `confirm` returns
1167
/// `true` is returned.
1168
///
1169
/// # Safety
1170
///
1171
/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1172
/// ptr` and `ptr <= end_ptr`.
1173
#[inline(always)]
1174
0
pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>(
1175
0
    start: *const u8,
1176
0
    end: *const u8,
1177
0
    confirm: F,
1178
0
) -> Option<*const u8> {
1179
0
    debug_assert!(start <= end);
1180
1181
0
    let mut ptr = end;
1182
0
    while ptr > start {
1183
0
        ptr = ptr.offset(-1);
1184
0
        if confirm(*ptr) {
1185
0
            return Some(ptr);
1186
0
        }
1187
    }
1188
0
    None
1189
0
}
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::all::memchr::One>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::One>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::One>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::all::memchr::Two>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::Two>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::Two>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::all::memchr::Three>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::Three>::rfind_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::rev_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::Three>::rfind_raw::{closure#0}>
1190
1191
/// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns
1192
/// the number of times `confirm(*ptr)` returns `true`.
1193
///
1194
/// # Safety
1195
///
1196
/// Callers must provide valid pointers and they must satisfy `start_ptr <=
1197
/// ptr` and `ptr <= end_ptr`.
1198
#[inline(always)]
1199
0
pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>(
1200
0
    start: *const u8,
1201
0
    end: *const u8,
1202
0
    confirm: F,
1203
0
) -> usize {
1204
0
    debug_assert!(start <= end);
1205
0
    let mut ptr = start;
1206
0
    let mut count = 0;
1207
0
    while ptr < end {
1208
0
        if confirm(*ptr) {
1209
0
            count += 1;
1210
0
        }
1211
0
        ptr = ptr.offset(1);
1212
    }
1213
0
    count
1214
0
}
Unexecuted instantiation: memchr::arch::generic::memchr::count_byte_by_byte::<<memchr::arch::generic::memchr::One<core::core_arch::x86::__m128i>>::count_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::count_byte_by_byte::<<memchr::arch::generic::memchr::One<core::core_arch::x86::__m256i>>::count_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::count_byte_by_byte::<<memchr::arch::x86_64::avx2::memchr::One>::count_raw::{closure#0}>
Unexecuted instantiation: memchr::arch::generic::memchr::count_byte_by_byte::<<memchr::arch::x86_64::sse2::memchr::One>::count_raw::{closure#0}>