Coverage Report

Created: 2026-05-30 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/memchr-2.8.1/src/memmem/searcher.rs
Line
Count
Source
1
use crate::arch::all::{
2
    packedpair::{HeuristicFrequencyRank, Pair},
3
    rabinkarp, twoway,
4
};
5
6
#[cfg(target_arch = "aarch64")]
7
use crate::arch::aarch64::neon::packedpair as neon;
8
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
9
use crate::arch::wasm32::simd128::packedpair as simd128;
10
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
11
use crate::arch::x86_64::{
12
    avx2::packedpair as avx2, sse2::packedpair as sse2,
13
};
14
15
/// A "meta" substring searcher.
16
///
17
/// To a first approximation, this chooses what it believes to be the "best"
18
/// substring search implemnetation based on the needle at construction time.
19
/// Then, every call to `find` will execute that particular implementation. To
20
/// a second approximation, multiple substring search algorithms may be used,
21
/// depending on the haystack. For example, for supremely short haystacks,
22
/// Rabin-Karp is typically used.
23
///
24
/// See the documentation on `Prefilter` for an explanation of the dispatching
25
/// mechanism. The quick summary is that an enum has too much overhead and
26
/// we can't use dynamic dispatch via traits because we need to work in a
27
/// core-only environment. (Dynamic dispatch works in core-only, but you
28
/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter
29
/// requires `alloc`.) So instead, we use a union and an appropriately paired
30
/// free function to read from the correct field on the union and execute the
31
/// chosen substring search implementation.
32
#[derive(Clone)]
33
pub(crate) struct Searcher {
34
    call: SearcherKindFn,
35
    kind: SearcherKind,
36
    rabinkarp: rabinkarp::Finder,
37
}
38
39
impl Searcher {
40
    /// Creates a new "meta" substring searcher that attempts to choose the
41
    /// best algorithm based on the needle, heuristics and what the current
42
    /// target supports.
43
    #[inline]
44
0
    pub(crate) fn new<R: HeuristicFrequencyRank>(
45
0
        prefilter: PrefilterConfig,
46
0
        ranker: R,
47
0
        needle: &[u8],
48
0
    ) -> Searcher {
49
0
        let rabinkarp = rabinkarp::Finder::new(needle);
50
0
        if needle.len() <= 1 {
51
0
            return if needle.is_empty() {
52
                trace!("building empty substring searcher");
53
0
                Searcher {
54
0
                    call: searcher_kind_empty,
55
0
                    kind: SearcherKind { empty: () },
56
0
                    rabinkarp,
57
0
                }
58
            } else {
59
                trace!("building one-byte substring searcher");
60
0
                debug_assert_eq!(1, needle.len());
61
0
                Searcher {
62
0
                    call: searcher_kind_one_byte,
63
0
                    kind: SearcherKind { one_byte: needle[0] },
64
0
                    rabinkarp,
65
0
                }
66
            };
67
0
        }
68
0
        let pair = match Pair::with_ranker(needle, &ranker) {
69
0
            Some(pair) => pair,
70
0
            None => return Searcher::twoway(needle, rabinkarp, None),
71
        };
72
0
        debug_assert_ne!(
73
0
            pair.index1(),
74
0
            pair.index2(),
75
0
            "pair offsets should not be equivalent"
76
        );
77
        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
78
        {
79
0
            if let Some(pp) = avx2::Finder::with_pair(needle, pair) {
80
0
                if do_packed_search(needle) {
81
                    trace!("building x86_64 AVX2 substring searcher");
82
0
                    let kind = SearcherKind { avx2: pp };
83
0
                    Searcher { call: searcher_kind_avx2, kind, rabinkarp }
84
0
                } else if prefilter.is_none() {
85
0
                    Searcher::twoway(needle, rabinkarp, None)
86
                } else {
87
0
                    let prestrat = Prefilter::avx2(pp, needle);
88
0
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
89
                }
90
0
            } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) {
91
0
                if do_packed_search(needle) {
92
                    trace!("building x86_64 SSE2 substring searcher");
93
0
                    let kind = SearcherKind { sse2: pp };
94
0
                    Searcher { call: searcher_kind_sse2, kind, rabinkarp }
95
0
                } else if prefilter.is_none() {
96
0
                    Searcher::twoway(needle, rabinkarp, None)
97
                } else {
98
0
                    let prestrat = Prefilter::sse2(pp, needle);
99
0
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
100
                }
101
0
            } else if prefilter.is_none() {
102
0
                Searcher::twoway(needle, rabinkarp, None)
103
            } else {
104
                // We're pretty unlikely to get to this point, but it is
105
                // possible to be running on x86_64 without SSE2. Namely, it's
106
                // really up to the OS whether it wants to support vector
107
                // registers or not.
108
0
                let prestrat = Prefilter::fallback(ranker, pair, needle);
109
0
                Searcher::twoway(needle, rabinkarp, prestrat)
110
            }
111
        }
112
        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
113
        {
114
            if let Some(pp) = simd128::Finder::with_pair(needle, pair) {
115
                if do_packed_search(needle) {
116
                    trace!("building wasm32 simd128 substring searcher");
117
                    let kind = SearcherKind { simd128: pp };
118
                    Searcher { call: searcher_kind_simd128, kind, rabinkarp }
119
                } else if prefilter.is_none() {
120
                    Searcher::twoway(needle, rabinkarp, None)
121
                } else {
122
                    let prestrat = Prefilter::simd128(pp, needle);
123
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
124
                }
125
            } else if prefilter.is_none() {
126
                Searcher::twoway(needle, rabinkarp, None)
127
            } else {
128
                let prestrat = Prefilter::fallback(ranker, pair, needle);
129
                Searcher::twoway(needle, rabinkarp, prestrat)
130
            }
131
        }
132
        #[cfg(target_arch = "aarch64")]
133
        {
134
            if let Some(pp) = neon::Finder::with_pair(needle, pair) {
135
                if do_packed_search(needle) {
136
                    trace!("building aarch64 neon substring searcher");
137
                    let kind = SearcherKind { neon: pp };
138
                    Searcher { call: searcher_kind_neon, kind, rabinkarp }
139
                } else if prefilter.is_none() {
140
                    Searcher::twoway(needle, rabinkarp, None)
141
                } else {
142
                    let prestrat = Prefilter::neon(pp, needle);
143
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
144
                }
145
            } else if prefilter.is_none() {
146
                Searcher::twoway(needle, rabinkarp, None)
147
            } else {
148
                let prestrat = Prefilter::fallback(ranker, pair, needle);
149
                Searcher::twoway(needle, rabinkarp, prestrat)
150
            }
151
        }
152
        #[cfg(not(any(
153
            all(target_arch = "x86_64", target_feature = "sse2"),
154
            all(target_arch = "wasm32", target_feature = "simd128"),
155
            target_arch = "aarch64"
156
        )))]
157
        {
158
            if prefilter.is_none() {
159
                Searcher::twoway(needle, rabinkarp, None)
160
            } else {
161
                let prestrat = Prefilter::fallback(ranker, pair, needle);
162
                Searcher::twoway(needle, rabinkarp, prestrat)
163
            }
164
        }
165
0
    }
166
167
    /// Creates a new searcher that always uses the Two-Way algorithm. This is
168
    /// typically used when vector algorithms are unavailable or inappropriate.
169
    /// (For example, when the needle is "too long.")
170
    ///
171
    /// If a prefilter is given, then the searcher returned will be accelerated
172
    /// by the prefilter.
173
    #[inline]
174
0
    fn twoway(
175
0
        needle: &[u8],
176
0
        rabinkarp: rabinkarp::Finder,
177
0
        prestrat: Option<Prefilter>,
178
0
    ) -> Searcher {
179
0
        let finder = twoway::Finder::new(needle);
180
0
        match prestrat {
181
            None => {
182
                trace!("building scalar two-way substring searcher");
183
0
                let kind = SearcherKind { two_way: finder };
184
0
                Searcher { call: searcher_kind_two_way, kind, rabinkarp }
185
            }
186
0
            Some(prestrat) => {
187
                trace!(
188
                    "building scalar two-way \
189
                     substring searcher with a prefilter"
190
                );
191
0
                let two_way_with_prefilter =
192
0
                    TwoWayWithPrefilter { finder, prestrat };
193
0
                let kind = SearcherKind { two_way_with_prefilter };
194
0
                Searcher {
195
0
                    call: searcher_kind_two_way_with_prefilter,
196
0
                    kind,
197
0
                    rabinkarp,
198
0
                }
199
            }
200
        }
201
0
    }
202
203
    /// Searches the given haystack for the given needle. The needle given
204
    /// should be the same as the needle that this finder was initialized
205
    /// with.
206
    ///
207
    /// Inlining this can lead to big wins for latency, and #[inline] doesn't
208
    /// seem to be enough in some cases.
209
    #[inline(always)]
210
0
    pub(crate) fn find(
211
0
        &self,
212
0
        prestate: &mut PrefilterState,
213
0
        haystack: &[u8],
214
0
        needle: &[u8],
215
0
    ) -> Option<usize> {
216
0
        if haystack.len() < needle.len() {
217
0
            None
218
        } else {
219
            // SAFETY: By construction, we've ensured that the function
220
            // in `self.call` is properly paired with the union used in
221
            // `self.kind`.
222
0
            unsafe { (self.call)(self, prestate, haystack, needle) }
223
        }
224
0
    }
225
}
226
227
impl core::fmt::Debug for Searcher {
228
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
229
0
        f.debug_struct("Searcher")
230
0
            .field("call", &"<searcher function>")
231
0
            .field("kind", &"<searcher kind union>")
232
0
            .field("rabinkarp", &self.rabinkarp)
233
0
            .finish()
234
0
    }
235
}
236
237
/// A union indicating one of several possible substring search implementations
238
/// that are in active use.
239
///
240
/// This union should only be read by one of the functions prefixed with
241
/// `searcher_kind_`. Namely, the correct function is meant to be paired with
242
/// the union by the caller, such that the function always reads from the
243
/// designated union field.
244
#[derive(Clone, Copy)]
245
union SearcherKind {
246
    empty: (),
247
    one_byte: u8,
248
    two_way: twoway::Finder,
249
    two_way_with_prefilter: TwoWayWithPrefilter,
250
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
251
    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
252
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
253
    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
254
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
255
    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
256
    #[cfg(target_arch = "aarch64")]
257
    neon: crate::arch::aarch64::neon::packedpair::Finder,
258
}
259
260
/// A two-way substring searcher with a prefilter.
261
#[derive(Copy, Clone, Debug)]
262
struct TwoWayWithPrefilter {
263
    finder: twoway::Finder,
264
    prestrat: Prefilter,
265
}
266
267
/// The type of a substring search function.
268
///
269
/// # Safety
270
///
271
/// When using a function of this type, callers must ensure that the correct
272
/// function is paired with the value populated in `SearcherKind` union.
273
type SearcherKindFn = unsafe fn(
274
    searcher: &Searcher,
275
    prestate: &mut PrefilterState,
276
    haystack: &[u8],
277
    needle: &[u8],
278
) -> Option<usize>;
279
280
/// Reads from the `empty` field of `SearcherKind` to handle the case of
281
/// searching for the empty needle. Works on all platforms.
282
///
283
/// # Safety
284
///
285
/// Callers must ensure that the `searcher.kind.empty` union field is set.
286
0
unsafe fn searcher_kind_empty(
287
0
    _searcher: &Searcher,
288
0
    _prestate: &mut PrefilterState,
289
0
    _haystack: &[u8],
290
0
    _needle: &[u8],
291
0
) -> Option<usize> {
292
0
    Some(0)
293
0
}
294
295
/// Reads from the `one_byte` field of `SearcherKind` to handle the case of
296
/// searching for a single byte needle. Works on all platforms.
297
///
298
/// # Safety
299
///
300
/// Callers must ensure that the `searcher.kind.one_byte` union field is set.
301
0
unsafe fn searcher_kind_one_byte(
302
0
    searcher: &Searcher,
303
0
    _prestate: &mut PrefilterState,
304
0
    haystack: &[u8],
305
0
    _needle: &[u8],
306
0
) -> Option<usize> {
307
0
    let needle = searcher.kind.one_byte;
308
0
    crate::memchr(needle, haystack)
309
0
}
310
311
/// Reads from the `two_way` field of `SearcherKind` to handle the case of
312
/// searching for an arbitrary needle without prefilter acceleration. Works on
313
/// all platforms.
314
///
315
/// # Safety
316
///
317
/// Callers must ensure that the `searcher.kind.two_way` union field is set.
318
0
unsafe fn searcher_kind_two_way(
319
0
    searcher: &Searcher,
320
0
    _prestate: &mut PrefilterState,
321
0
    haystack: &[u8],
322
0
    needle: &[u8],
323
0
) -> Option<usize> {
324
0
    if rabinkarp::is_fast(haystack, needle) {
325
0
        searcher.rabinkarp.find(haystack, needle)
326
    } else {
327
0
        searcher.kind.two_way.find(haystack, needle)
328
    }
329
0
}
330
331
/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle
332
/// the case of searching for an arbitrary needle with prefilter acceleration.
333
/// Works on all platforms.
334
///
335
/// # Safety
336
///
337
/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union
338
/// field is set.
339
0
unsafe fn searcher_kind_two_way_with_prefilter(
340
0
    searcher: &Searcher,
341
0
    prestate: &mut PrefilterState,
342
0
    haystack: &[u8],
343
0
    needle: &[u8],
344
0
) -> Option<usize> {
345
0
    if rabinkarp::is_fast(haystack, needle) {
346
0
        searcher.rabinkarp.find(haystack, needle)
347
    } else {
348
0
        let TwoWayWithPrefilter { ref finder, ref prestrat } =
349
0
            searcher.kind.two_way_with_prefilter;
350
0
        let pre = Pre { prestate, prestrat };
351
0
        finder.find_with_prefilter(Some(pre), haystack, needle)
352
    }
353
0
}
354
355
/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2
356
/// vectorized substring search implementation.
357
///
358
/// # Safety
359
///
360
/// Callers must ensure that the `searcher.kind.sse2` union field is set.
361
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
362
0
unsafe fn searcher_kind_sse2(
363
0
    searcher: &Searcher,
364
0
    _prestate: &mut PrefilterState,
365
0
    haystack: &[u8],
366
0
    needle: &[u8],
367
0
) -> Option<usize> {
368
0
    let finder = &searcher.kind.sse2;
369
0
    if haystack.len() < finder.min_haystack_len() {
370
0
        searcher.rabinkarp.find(haystack, needle)
371
    } else {
372
0
        finder.find(haystack, needle)
373
    }
374
0
}
375
376
/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2
377
/// vectorized substring search implementation.
378
///
379
/// # Safety
380
///
381
/// Callers must ensure that the `searcher.kind.avx2` union field is set.
382
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
383
0
unsafe fn searcher_kind_avx2(
384
0
    searcher: &Searcher,
385
0
    _prestate: &mut PrefilterState,
386
0
    haystack: &[u8],
387
0
    needle: &[u8],
388
0
) -> Option<usize> {
389
0
    let finder = &searcher.kind.avx2;
390
0
    if haystack.len() < finder.min_haystack_len() {
391
0
        searcher.rabinkarp.find(haystack, needle)
392
    } else {
393
0
        finder.find(haystack, needle)
394
    }
395
0
}
396
397
/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32
398
/// simd128 vectorized substring search implementation.
399
///
400
/// # Safety
401
///
402
/// Callers must ensure that the `searcher.kind.simd128` union field is set.
403
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
404
unsafe fn searcher_kind_simd128(
405
    searcher: &Searcher,
406
    _prestate: &mut PrefilterState,
407
    haystack: &[u8],
408
    needle: &[u8],
409
) -> Option<usize> {
410
    let finder = &searcher.kind.simd128;
411
    if haystack.len() < finder.min_haystack_len() {
412
        searcher.rabinkarp.find(haystack, needle)
413
    } else {
414
        finder.find(haystack, needle)
415
    }
416
}
417
418
/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon
419
/// vectorized substring search implementation.
420
///
421
/// # Safety
422
///
423
/// Callers must ensure that the `searcher.kind.neon` union field is set.
424
#[cfg(target_arch = "aarch64")]
425
unsafe fn searcher_kind_neon(
426
    searcher: &Searcher,
427
    _prestate: &mut PrefilterState,
428
    haystack: &[u8],
429
    needle: &[u8],
430
) -> Option<usize> {
431
    let finder = &searcher.kind.neon;
432
    if haystack.len() < finder.min_haystack_len() {
433
        searcher.rabinkarp.find(haystack, needle)
434
    } else {
435
        finder.find(haystack, needle)
436
    }
437
}
438
439
/// A reverse substring searcher.
440
#[derive(Clone, Debug)]
441
pub(crate) struct SearcherRev {
442
    kind: SearcherRevKind,
443
    rabinkarp: rabinkarp::FinderRev,
444
}
445
446
/// The kind of the reverse searcher.
447
///
448
/// For the reverse case, we don't do any SIMD acceleration or prefilters.
449
/// There is no specific technical reason why we don't, but rather don't do it
450
/// because it's not clear it's worth the extra code to do so. If you have a
451
/// use case for it, please file an issue.
452
///
453
/// We also don't do the union trick as we do with the forward case and
454
/// prefilters. Basically for the same reason we don't have prefilters or
455
/// vector algorithms for reverse searching: it's not clear it's worth doing.
456
/// Please file an issue if you have a compelling use case for fast reverse
457
/// substring search.
458
#[derive(Clone, Debug)]
459
enum SearcherRevKind {
460
    Empty,
461
    OneByte { needle: u8 },
462
    TwoWay { finder: twoway::FinderRev },
463
}
464
465
impl SearcherRev {
466
    /// Creates a new searcher for finding occurrences of the given needle in
467
    /// reverse. That is, it reports the last (instead of the first) occurrence
468
    /// of a needle in a haystack.
469
    #[inline]
470
0
    pub(crate) fn new(needle: &[u8]) -> SearcherRev {
471
0
        let kind = if needle.len() <= 1 {
472
0
            if needle.is_empty() {
473
                trace!("building empty reverse substring searcher");
474
0
                SearcherRevKind::Empty
475
            } else {
476
                trace!("building one-byte reverse substring searcher");
477
0
                debug_assert_eq!(1, needle.len());
478
0
                SearcherRevKind::OneByte { needle: needle[0] }
479
            }
480
        } else {
481
            trace!("building scalar two-way reverse substring searcher");
482
0
            let finder = twoway::FinderRev::new(needle);
483
0
            SearcherRevKind::TwoWay { finder }
484
        };
485
0
        let rabinkarp = rabinkarp::FinderRev::new(needle);
486
0
        SearcherRev { kind, rabinkarp }
487
0
    }
488
489
    /// Searches the given haystack for the last occurrence of the given
490
    /// needle. The needle given should be the same as the needle that this
491
    /// finder was initialized with.
492
    #[inline]
493
0
    pub(crate) fn rfind(
494
0
        &self,
495
0
        haystack: &[u8],
496
0
        needle: &[u8],
497
0
    ) -> Option<usize> {
498
0
        if haystack.len() < needle.len() {
499
0
            return None;
500
0
        }
501
0
        match self.kind {
502
0
            SearcherRevKind::Empty => Some(haystack.len()),
503
0
            SearcherRevKind::OneByte { needle } => {
504
0
                crate::memrchr(needle, haystack)
505
            }
506
0
            SearcherRevKind::TwoWay { ref finder } => {
507
0
                if rabinkarp::is_fast(haystack, needle) {
508
0
                    self.rabinkarp.rfind(haystack, needle)
509
                } else {
510
0
                    finder.rfind(haystack, needle)
511
                }
512
            }
513
        }
514
0
    }
515
}
516
517
/// Prefilter controls whether heuristics are used to accelerate searching.
518
///
519
/// A prefilter refers to the idea of detecting candidate matches very quickly,
520
/// and then confirming whether those candidates are full matches. This
521
/// idea can be quite effective since it's often the case that looking for
522
/// candidates can be a lot faster than running a complete substring search
523
/// over the entire input. Namely, looking for candidates can be done with
524
/// extremely fast vectorized code.
525
///
526
/// The downside of a prefilter is that it assumes false positives (which are
527
/// candidates generated by a prefilter that aren't matches) are somewhat rare
528
/// relative to the frequency of full matches. That is, if a lot of false
529
/// positives are generated, then it's possible for search time to be worse
530
/// than if the prefilter wasn't enabled in the first place.
531
///
532
/// Another downside of a prefilter is that it can result in highly variable
533
/// performance, where some cases are extraordinarily fast and others aren't.
534
/// Typically, variable performance isn't a problem, but it may be for your use
535
/// case.
536
///
537
/// The use of prefilters in this implementation does use a heuristic to detect
538
/// when a prefilter might not be carrying its weight, and will dynamically
539
/// disable its use. Nevertheless, this configuration option gives callers
540
/// the ability to disable prefilters if you have knowledge that they won't be
541
/// useful.
542
#[derive(Clone, Copy, Debug)]
543
#[non_exhaustive]
544
pub enum PrefilterConfig {
545
    /// Never used a prefilter in substring search.
546
    None,
547
    /// Automatically detect whether a heuristic prefilter should be used. If
548
    /// it is used, then heuristics will be used to dynamically disable the
549
    /// prefilter if it is believed to not be carrying its weight.
550
    Auto,
551
}
552
553
impl Default for PrefilterConfig {
554
0
    fn default() -> PrefilterConfig {
555
0
        PrefilterConfig::Auto
556
0
    }
557
}
558
559
impl PrefilterConfig {
560
    /// Returns true when this prefilter is set to the `None` variant.
561
0
    fn is_none(&self) -> bool {
562
0
        matches!(*self, PrefilterConfig::None)
563
0
    }
564
}
565
566
/// The implementation of a prefilter.
567
///
568
/// This type encapsulates dispatch to one of several possible choices for a
569
/// prefilter. Generally speaking, all prefilters have the same approximate
570
/// algorithm: they choose a couple of bytes from the needle that are believed
571
/// to be rare, use a fast vector algorithm to look for those bytes and return
572
/// positions as candidates for some substring search algorithm (currently only
573
/// Two-Way) to confirm as a match or not.
574
///
575
/// The differences between the algorithms are actually at the vector
576
/// implementation level. Namely, we need different routines based on both
577
/// which target architecture we're on and what CPU features are supported.
578
///
579
/// The straight-forwardly obvious approach here is to use an enum, and make
580
/// `Prefilter::find` do case analysis to determine which algorithm was
581
/// selected and invoke it. However, I've observed that this leads to poor
582
/// codegen in some cases, especially in latency sensitive benchmarks. That is,
583
/// this approach comes with overhead that I wasn't able to eliminate.
584
///
585
/// The second obvious approach is to use dynamic dispatch with traits. Doing
586
/// that in this context where `Prefilter` owns the selection generally
587
/// requires heap allocation, and this code is designed to run in core-only
588
/// environments.
589
///
590
/// So we settle on using a union (that's `PrefilterKind`) and a function
591
/// pointer (that's `PrefilterKindFn`). We select the right function pointer
592
/// based on which field in the union we set, and that function in turn
593
/// knows which field of the union to access. The downside of this approach
594
/// is that it forces us to think about safety, but the upside is that
595
/// there are some nice latency improvements to benchmarks. (Especially the
596
/// `memmem/sliceslice/short` benchmark.)
597
///
598
/// In cases where we've selected a vector algorithm and the haystack given
599
/// is too short, we fallback to the scalar version of `memchr` on the
600
/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive
601
/// byte-at-a-time loop because it will read in `usize`-sized chunks at a
602
/// time.)
603
#[derive(Clone, Copy)]
604
struct Prefilter {
605
    call: PrefilterKindFn,
606
    kind: PrefilterKind,
607
    rarest_byte: u8,
608
    rarest_offset: u8,
609
}
610
611
impl Prefilter {
612
    /// Return a "fallback" prefilter, but only if it is believed to be
613
    /// effective.
614
    #[inline]
615
0
    fn fallback<R: HeuristicFrequencyRank>(
616
0
        ranker: R,
617
0
        pair: Pair,
618
0
        needle: &[u8],
619
0
    ) -> Option<Prefilter> {
620
        /// The maximum frequency rank permitted for the fallback prefilter.
621
        /// If the rarest byte in the needle has a frequency rank above this
622
        /// value, then no prefilter is used if the fallback prefilter would
623
        /// otherwise be selected.
624
        const MAX_FALLBACK_RANK: u8 = 250;
625
626
        trace!("building fallback prefilter");
627
0
        let rarest_offset = pair.index1();
628
0
        let rarest_byte = needle[usize::from(rarest_offset)];
629
0
        let rarest_rank = ranker.rank(rarest_byte);
630
0
        if rarest_rank > MAX_FALLBACK_RANK {
631
0
            None
632
        } else {
633
0
            let finder =
634
0
                crate::arch::all::packedpair::Finder::with_pair(needle, pair)?;
635
0
            let call = prefilter_kind_fallback;
636
0
            let kind = PrefilterKind { fallback: finder };
637
0
            Some(Prefilter { call, kind, rarest_byte, rarest_offset })
638
        }
639
0
    }
640
641
    /// Return a prefilter using a x86_64 SSE2 vector algorithm.
642
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
643
    #[inline]
644
0
    fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter {
645
        trace!("building x86_64 SSE2 prefilter");
646
0
        let rarest_offset = finder.pair().index1();
647
0
        let rarest_byte = needle[usize::from(rarest_offset)];
648
0
        Prefilter {
649
0
            call: prefilter_kind_sse2,
650
0
            kind: PrefilterKind { sse2: finder },
651
0
            rarest_byte,
652
0
            rarest_offset,
653
0
        }
654
0
    }
655
656
    /// Return a prefilter using a x86_64 AVX2 vector algorithm.
657
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
658
    #[inline]
659
0
    fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter {
660
        trace!("building x86_64 AVX2 prefilter");
661
0
        let rarest_offset = finder.pair().index1();
662
0
        let rarest_byte = needle[usize::from(rarest_offset)];
663
0
        Prefilter {
664
0
            call: prefilter_kind_avx2,
665
0
            kind: PrefilterKind { avx2: finder },
666
0
            rarest_byte,
667
0
            rarest_offset,
668
0
        }
669
0
    }
670
671
    /// Return a prefilter using a wasm32 simd128 vector algorithm.
672
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
673
    #[inline]
674
    fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter {
675
        trace!("building wasm32 simd128 prefilter");
676
        let rarest_offset = finder.pair().index1();
677
        let rarest_byte = needle[usize::from(rarest_offset)];
678
        Prefilter {
679
            call: prefilter_kind_simd128,
680
            kind: PrefilterKind { simd128: finder },
681
            rarest_byte,
682
            rarest_offset,
683
        }
684
    }
685
686
    /// Return a prefilter using a aarch64 neon vector algorithm.
687
    #[cfg(target_arch = "aarch64")]
688
    #[inline]
689
    fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter {
690
        trace!("building aarch64 neon prefilter");
691
        let rarest_offset = finder.pair().index1();
692
        let rarest_byte = needle[usize::from(rarest_offset)];
693
        Prefilter {
694
            call: prefilter_kind_neon,
695
            kind: PrefilterKind { neon: finder },
696
            rarest_byte,
697
            rarest_offset,
698
        }
699
    }
700
701
    /// Return a *candidate* position for a match.
702
    ///
703
    /// When this returns an offset, it implies that a match could begin at
704
    /// that offset, but it may not. That is, it is possible for a false
705
    /// positive to be returned.
706
    ///
707
    /// When `None` is returned, then it is guaranteed that there are no
708
    /// matches for the needle in the given haystack. That is, it is impossible
709
    /// for a false negative to be returned.
710
    ///
711
    /// The purpose of this routine is to look for candidate matching positions
712
    /// as quickly as possible before running a (likely) slower confirmation
713
    /// step.
714
    #[inline]
715
0
    fn find(&self, haystack: &[u8]) -> Option<usize> {
716
        // SAFETY: By construction, we've ensured that the function in
717
        // `self.call` is properly paired with the union used in `self.kind`.
718
0
        unsafe { (self.call)(self, haystack) }
719
0
    }
720
721
    /// A "simple" prefilter that just looks for the occurrence of the rarest
722
    /// byte from the needle. This is generally only used for very small
723
    /// haystacks.
724
    #[inline]
725
0
    fn find_simple(&self, haystack: &[u8]) -> Option<usize> {
726
        // We don't use crate::memchr here because the haystack should be small
727
        // enough that memchr won't be able to use vector routines anyway. So
728
        // we just skip straight to the fallback implementation which is likely
729
        // faster. (A byte-at-a-time loop is only used when the haystack is
730
        // smaller than `size_of::<usize>()`.)
731
0
        crate::arch::all::memchr::One::new(self.rarest_byte)
732
0
            .find(haystack)
733
0
            .map(|i| i.saturating_sub(usize::from(self.rarest_offset)))
734
0
    }
735
}
736
737
impl core::fmt::Debug for Prefilter {
738
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
739
0
        f.debug_struct("Prefilter")
740
0
            .field("call", &"<prefilter function>")
741
0
            .field("kind", &"<prefilter kind union>")
742
0
            .field("rarest_byte", &self.rarest_byte)
743
0
            .field("rarest_offset", &self.rarest_offset)
744
0
            .finish()
745
0
    }
746
}
747
748
/// A union indicating one of several possible prefilters that are in active
749
/// use.
750
///
751
/// This union should only be read by one of the functions prefixed with
752
/// `prefilter_kind_`. Namely, the correct function is meant to be paired with
753
/// the union by the caller, such that the function always reads from the
754
/// designated union field.
755
#[derive(Clone, Copy)]
756
union PrefilterKind {
757
    fallback: crate::arch::all::packedpair::Finder,
758
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
759
    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
760
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
761
    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
762
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
763
    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
764
    #[cfg(target_arch = "aarch64")]
765
    neon: crate::arch::aarch64::neon::packedpair::Finder,
766
}
767
768
/// The type of a prefilter function.
769
///
770
/// # Safety
771
///
772
/// When using a function of this type, callers must ensure that the correct
773
/// function is paired with the value populated in `PrefilterKind` union.
774
type PrefilterKindFn =
775
    unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>;
776
777
/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback
778
/// prefilter. Works on all platforms.
779
///
780
/// # Safety
781
///
782
/// Callers must ensure that the `strat.kind.fallback` union field is set.
783
0
unsafe fn prefilter_kind_fallback(
784
0
    strat: &Prefilter,
785
0
    haystack: &[u8],
786
0
) -> Option<usize> {
787
0
    strat.kind.fallback.find_prefilter(haystack)
788
0
}
789
790
/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2
791
/// prefilter.
792
///
793
/// # Safety
794
///
795
/// Callers must ensure that the `strat.kind.sse2` union field is set.
796
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
797
0
unsafe fn prefilter_kind_sse2(
798
0
    strat: &Prefilter,
799
0
    haystack: &[u8],
800
0
) -> Option<usize> {
801
0
    let finder = &strat.kind.sse2;
802
0
    if haystack.len() < finder.min_haystack_len() {
803
0
        strat.find_simple(haystack)
804
    } else {
805
0
        finder.find_prefilter(haystack)
806
    }
807
0
}
808
809
/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2
810
/// prefilter.
811
///
812
/// # Safety
813
///
814
/// Callers must ensure that the `strat.kind.avx2` union field is set.
815
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
816
0
unsafe fn prefilter_kind_avx2(
817
0
    strat: &Prefilter,
818
0
    haystack: &[u8],
819
0
) -> Option<usize> {
820
0
    let finder = &strat.kind.avx2;
821
0
    if haystack.len() < finder.min_haystack_len() {
822
0
        strat.find_simple(haystack)
823
    } else {
824
0
        finder.find_prefilter(haystack)
825
    }
826
0
}
827
828
/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32
829
/// simd128 prefilter.
830
///
831
/// # Safety
832
///
833
/// Callers must ensure that the `strat.kind.simd128` union field is set.
834
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
835
unsafe fn prefilter_kind_simd128(
836
    strat: &Prefilter,
837
    haystack: &[u8],
838
) -> Option<usize> {
839
    let finder = &strat.kind.simd128;
840
    if haystack.len() < finder.min_haystack_len() {
841
        strat.find_simple(haystack)
842
    } else {
843
        finder.find_prefilter(haystack)
844
    }
845
}
846
847
/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon
848
/// prefilter.
849
///
850
/// # Safety
851
///
852
/// Callers must ensure that the `strat.kind.neon` union field is set.
853
#[cfg(target_arch = "aarch64")]
854
unsafe fn prefilter_kind_neon(
855
    strat: &Prefilter,
856
    haystack: &[u8],
857
) -> Option<usize> {
858
    let finder = &strat.kind.neon;
859
    if haystack.len() < finder.min_haystack_len() {
860
        strat.find_simple(haystack)
861
    } else {
862
        finder.find_prefilter(haystack)
863
    }
864
}
865
866
/// PrefilterState tracks state associated with the effectiveness of a
867
/// prefilter. It is used to track how many bytes, on average, are skipped by
868
/// the prefilter. If this average dips below a certain threshold over time,
869
/// then the state renders the prefilter inert and stops using it.
870
///
871
/// A prefilter state should be created for each search. (Where creating an
872
/// iterator is treated as a single search.) A prefilter state should only be
873
/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
874
/// `PrefilterState`.
875
#[derive(Clone, Copy, Debug)]
876
pub(crate) struct PrefilterState {
877
    /// The number of skips that has been executed. This is always 1 greater
878
    /// than the actual number of skips. The special sentinel value of 0
879
    /// indicates that the prefilter is inert. This is useful to avoid
880
    /// additional checks to determine whether the prefilter is still
881
    /// "effective." Once a prefilter becomes inert, it should no longer be
882
    /// used (according to our heuristics).
883
    skips: u32,
884
    /// The total number of bytes that have been skipped.
885
    skipped: u32,
886
}
887
888
impl PrefilterState {
889
    /// The minimum number of skip attempts to try before considering whether
890
    /// a prefilter is effective or not.
891
    const MIN_SKIPS: u32 = 50;
892
893
    /// The minimum amount of bytes that skipping must average.
894
    ///
895
    /// This value was chosen based on varying it and checking
896
    /// the microbenchmarks. In particular, this can impact the
897
    /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
898
    /// too low.
899
    const MIN_SKIP_BYTES: u32 = 8;
900
901
    /// Create a fresh prefilter state.
902
    #[inline]
903
0
    pub(crate) fn new() -> PrefilterState {
904
0
        PrefilterState { skips: 1, skipped: 0 }
905
0
    }
906
907
    /// Update this state with the number of bytes skipped on the last
908
    /// invocation of the prefilter.
909
    #[inline]
910
0
    fn update(&mut self, skipped: usize) {
911
0
        self.skips = self.skips.saturating_add(1);
912
        // We need to do this dance since it's technically possible for
913
        // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
914
        // size of a prefilter state.)
915
0
        self.skipped = match u32::try_from(skipped) {
916
0
            Err(_) => core::u32::MAX,
917
0
            Ok(skipped) => self.skipped.saturating_add(skipped),
918
        };
919
0
    }
920
921
    /// Return true if and only if this state indicates that a prefilter is
922
    /// still effective.
923
    #[inline]
924
0
    fn is_effective(&mut self) -> bool {
925
0
        if self.is_inert() {
926
0
            return false;
927
0
        }
928
0
        if self.skips() < PrefilterState::MIN_SKIPS {
929
0
            return true;
930
0
        }
931
0
        if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
932
0
            return true;
933
0
        }
934
935
        // We're inert.
936
0
        self.skips = 0;
937
0
        false
938
0
    }
939
940
    /// Returns true if the prefilter this state represents should no longer
941
    /// be used.
942
    #[inline]
943
0
    fn is_inert(&self) -> bool {
944
0
        self.skips == 0
945
0
    }
946
947
    /// Returns the total number of times the prefilter has been used.
948
    #[inline]
949
0
    fn skips(&self) -> u32 {
950
        // Remember, `0` is a sentinel value indicating inertness, so we
951
        // always need to subtract `1` to get our actual number of skips.
952
0
        self.skips.saturating_sub(1)
953
0
    }
954
}
955
956
/// A combination of prefilter effectiveness state and the prefilter itself.
957
#[derive(Debug)]
958
pub(crate) struct Pre<'a> {
959
    /// State that tracks the effectiveness of a prefilter.
960
    prestate: &'a mut PrefilterState,
961
    /// The actual prefilter.
962
    prestrat: &'a Prefilter,
963
}
964
965
impl<'a> Pre<'a> {
966
    /// Call this prefilter on the given haystack with the given needle.
967
    #[inline]
968
0
    pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> {
969
0
        let result = self.prestrat.find(haystack);
970
0
        self.prestate.update(result.unwrap_or(haystack.len()));
971
0
        result
972
0
    }
973
974
    /// Return true if and only if this prefilter should be used.
975
    #[inline]
976
0
    pub(crate) fn is_effective(&mut self) -> bool {
977
0
        self.prestate.is_effective()
978
0
    }
979
}
980
981
/// Returns true if the needle has the right characteristics for a vector
982
/// algorithm to handle the entirety of substring search.
983
///
984
/// Vector algorithms can be used for prefilters for other substring search
985
/// algorithms (like Two-Way), but they can also be used for substring search
986
/// on their own. When used for substring search, vector algorithms will
987
/// quickly identify candidate match positions (just like in the prefilter
988
/// case), but instead of returning the candidate position they will try to
989
/// confirm the match themselves. Confirmation happens via `memcmp`. This
990
/// works well for short needles, but can break down when many false candidate
991
/// positions are generated for large needles. Thus, we only permit vector
992
/// algorithms to own substring search when the needle is of a certain length.
993
#[inline]
994
0
fn do_packed_search(needle: &[u8]) -> bool {
995
    /// The minimum length of a needle required for this algorithm. The minimum
996
    /// is 2 since a length of 1 should just use memchr and a length of 0 isn't
997
    /// a case handled by this searcher.
998
    const MIN_LEN: usize = 2;
999
1000
    /// The maximum length of a needle required for this algorithm.
1001
    ///
1002
    /// In reality, there is no hard max here. The code below can handle any
1003
    /// length needle. (Perhaps that suggests there are missing optimizations.)
1004
    /// Instead, this is a heuristic and a bound guaranteeing our linear time
1005
    /// complexity.
1006
    ///
1007
    /// It is a heuristic because when a candidate match is found, memcmp is
1008
    /// run. For very large needles with lots of false positives, memcmp can
1009
    /// make the code run quite slow.
1010
    ///
1011
    /// It is a bound because the worst case behavior with memcmp is
1012
    /// multiplicative in the size of the needle and haystack, and we want
1013
    /// to keep that additive. This bound ensures we still meet that bound
1014
    /// theoretically, since it's just a constant. We aren't acting in bad
1015
    /// faith here, memcmp on tiny needles is so fast that even in pathological
1016
    /// cases (see pathological vector benchmarks), this is still just as fast
1017
    /// or faster in practice.
1018
    ///
1019
    /// This specific number was chosen by tweaking a bit and running
1020
    /// benchmarks. The rare-medium-needle, for example, gets about 5% faster
1021
    /// by using this algorithm instead of a prefilter-accelerated Two-Way.
1022
    /// There's also a theoretical desire to keep this number reasonably
1023
    /// low, to mitigate the impact of pathological cases. I did try 64, and
1024
    /// some benchmarks got a little better, and others (particularly the
1025
    /// pathological ones), got a lot worse. So... 32 it is?
1026
    const MAX_LEN: usize = 32;
1027
0
    MIN_LEN <= needle.len() && needle.len() <= MAX_LEN
1028
0
}