Coverage Report

Created: 2025-07-23 06:18

/rust/registry/src/index.crates.io-6f17d22bba15001f/memchr-2.5.0/src/memmem/mod.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
This module provides forward and reverse substring search routines.
3
4
Unlike the standard library's substring search routines, these work on
5
arbitrary bytes. For all non-empty needles, these routines will report exactly
6
the same values as the corresponding routines in the standard library. For
7
the empty needle, the standard library reports matches only at valid UTF-8
8
boundaries, where as these routines will report matches at every position.
9
10
Other than being able to work on arbitrary bytes, the primary reason to prefer
11
these routines over the standard library routines is that these will generally
12
be faster. In some cases, significantly so.
13
14
# Example: iterating over substring matches
15
16
This example shows how to use [`find_iter`] to find occurrences of a substring
17
in a haystack.
18
19
```
20
use memchr::memmem;
21
22
let haystack = b"foo bar foo baz foo";
23
24
let mut it = memmem::find_iter(haystack, "foo");
25
assert_eq!(Some(0), it.next());
26
assert_eq!(Some(8), it.next());
27
assert_eq!(Some(16), it.next());
28
assert_eq!(None, it.next());
29
```
30
31
# Example: iterating over substring matches in reverse
32
33
This example shows how to use [`rfind_iter`] to find occurrences of a substring
34
in a haystack starting from the end of the haystack.
35
36
**NOTE:** This module does not implement double ended iterators, so reverse
37
searches aren't done by calling `rev` on a forward iterator.
38
39
```
40
use memchr::memmem;
41
42
let haystack = b"foo bar foo baz foo";
43
44
let mut it = memmem::rfind_iter(haystack, "foo");
45
assert_eq!(Some(16), it.next());
46
assert_eq!(Some(8), it.next());
47
assert_eq!(Some(0), it.next());
48
assert_eq!(None, it.next());
49
```
50
51
# Example: repeating a search for the same needle
52
53
It may be possible for the overhead of constructing a substring searcher to be
54
measurable in some workloads. In cases where the same needle is used to search
55
many haystacks, it is possible to do construction once and thus to avoid it for
56
subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for
57
reverse searches).
58
59
```
60
use memchr::memmem;
61
62
let finder = memmem::Finder::new("foo");
63
64
assert_eq!(Some(4), finder.find(b"baz foo quux"));
65
assert_eq!(None, finder.find(b"quux baz bar"));
66
```
67
*/
68
69
pub use self::prefilter::Prefilter;
70
71
use crate::{
72
    cow::CowBytes,
73
    memmem::{
74
        prefilter::{Pre, PrefilterFn, PrefilterState},
75
        rabinkarp::NeedleHash,
76
        rarebytes::RareNeedleBytes,
77
    },
78
};
79
80
/// Defines a suite of quickcheck properties for forward and reverse
81
/// substring searching.
82
///
83
/// This is defined in this specific spot so that it can be used freely among
84
/// the different substring search implementations. I couldn't be bothered to
85
/// fight with the macro-visibility rules enough to figure out how to stuff it
86
/// somewhere more convenient.
87
#[cfg(all(test, feature = "std"))]
88
macro_rules! define_memmem_quickcheck_tests {
89
    ($fwd:expr, $rev:expr) => {
90
        use crate::memmem::proptests;
91
92
        quickcheck::quickcheck! {
93
            fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool {
94
                proptests::prefix_is_substring(false, &bs, $fwd)
95
            }
96
97
            fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool {
98
                proptests::suffix_is_substring(false, &bs, $fwd)
99
            }
100
101
            fn qc_fwd_matches_naive(
102
                haystack: Vec<u8>,
103
                needle: Vec<u8>
104
            ) -> bool {
105
                proptests::matches_naive(false, &haystack, &needle, $fwd)
106
            }
107
108
            fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool {
109
                proptests::prefix_is_substring(true, &bs, $rev)
110
            }
111
112
            fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool {
113
                proptests::suffix_is_substring(true, &bs, $rev)
114
            }
115
116
            fn qc_rev_matches_naive(
117
                haystack: Vec<u8>,
118
                needle: Vec<u8>
119
            ) -> bool {
120
                proptests::matches_naive(true, &haystack, &needle, $rev)
121
            }
122
        }
123
    };
124
}
125
126
/// Defines a suite of "simple" hand-written tests for a substring
127
/// implementation.
128
///
129
/// This is defined here for the same reason that
130
/// define_memmem_quickcheck_tests is defined here.
131
#[cfg(test)]
132
macro_rules! define_memmem_simple_tests {
133
    ($fwd:expr, $rev:expr) => {
134
        use crate::memmem::testsimples;
135
136
        #[test]
137
        fn simple_forward() {
138
            testsimples::run_search_tests_fwd($fwd);
139
        }
140
141
        #[test]
142
        fn simple_reverse() {
143
            testsimples::run_search_tests_rev($rev);
144
        }
145
    };
146
}
147
148
mod byte_frequencies;
149
#[cfg(memchr_runtime_simd)]
150
mod genericsimd;
151
mod prefilter;
152
mod rabinkarp;
153
mod rarebytes;
154
mod twoway;
155
mod util;
156
#[cfg(memchr_runtime_simd)]
157
mod vector;
158
#[cfg(all(memchr_runtime_wasm128))]
159
mod wasm;
160
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
161
mod x86;
162
163
/// Returns an iterator over all non-overlapping occurrences of a substring in
164
/// a haystack.
165
///
166
/// # Complexity
167
///
168
/// This routine is guaranteed to have worst case linear time complexity
169
/// with respect to both the needle and the haystack. That is, this runs
170
/// in `O(needle.len() + haystack.len())` time.
171
///
172
/// This routine is also guaranteed to have worst case constant space
173
/// complexity.
174
///
175
/// # Examples
176
///
177
/// Basic usage:
178
///
179
/// ```
180
/// use memchr::memmem;
181
///
182
/// let haystack = b"foo bar foo baz foo";
183
/// let mut it = memmem::find_iter(haystack, b"foo");
184
/// assert_eq!(Some(0), it.next());
185
/// assert_eq!(Some(8), it.next());
186
/// assert_eq!(Some(16), it.next());
187
/// assert_eq!(None, it.next());
188
/// ```
189
#[inline]
190
0
pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
191
0
    haystack: &'h [u8],
192
0
    needle: &'n N,
193
0
) -> FindIter<'h, 'n> {
194
0
    FindIter::new(haystack, Finder::new(needle))
195
0
}
196
197
/// Returns a reverse iterator over all non-overlapping occurrences of a
198
/// substring in a haystack.
199
///
200
/// # Complexity
201
///
202
/// This routine is guaranteed to have worst case linear time complexity
203
/// with respect to both the needle and the haystack. That is, this runs
204
/// in `O(needle.len() + haystack.len())` time.
205
///
206
/// This routine is also guaranteed to have worst case constant space
207
/// complexity.
208
///
209
/// # Examples
210
///
211
/// Basic usage:
212
///
213
/// ```
214
/// use memchr::memmem;
215
///
216
/// let haystack = b"foo bar foo baz foo";
217
/// let mut it = memmem::rfind_iter(haystack, b"foo");
218
/// assert_eq!(Some(16), it.next());
219
/// assert_eq!(Some(8), it.next());
220
/// assert_eq!(Some(0), it.next());
221
/// assert_eq!(None, it.next());
222
/// ```
223
#[inline]
224
0
pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
225
0
    haystack: &'h [u8],
226
0
    needle: &'n N,
227
0
) -> FindRevIter<'h, 'n> {
228
0
    FindRevIter::new(haystack, FinderRev::new(needle))
229
0
}
230
231
/// Returns the index of the first occurrence of the given needle.
232
///
233
/// Note that if you're are searching for the same needle in many different
234
/// small haystacks, it may be faster to initialize a [`Finder`] once,
235
/// and reuse it for each search.
236
///
237
/// # Complexity
238
///
239
/// This routine is guaranteed to have worst case linear time complexity
240
/// with respect to both the needle and the haystack. That is, this runs
241
/// in `O(needle.len() + haystack.len())` time.
242
///
243
/// This routine is also guaranteed to have worst case constant space
244
/// complexity.
245
///
246
/// # Examples
247
///
248
/// Basic usage:
249
///
250
/// ```
251
/// use memchr::memmem;
252
///
253
/// let haystack = b"foo bar baz";
254
/// assert_eq!(Some(0), memmem::find(haystack, b"foo"));
255
/// assert_eq!(Some(4), memmem::find(haystack, b"bar"));
256
/// assert_eq!(None, memmem::find(haystack, b"quux"));
257
/// ```
258
#[inline]
259
0
pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
260
0
    if haystack.len() < 64 {
261
0
        rabinkarp::find(haystack, needle)
262
    } else {
263
0
        Finder::new(needle).find(haystack)
264
    }
265
0
}
266
267
/// Returns the index of the last occurrence of the given needle.
268
///
269
/// Note that if you're are searching for the same needle in many different
270
/// small haystacks, it may be faster to initialize a [`FinderRev`] once,
271
/// and reuse it for each search.
272
///
273
/// # Complexity
274
///
275
/// This routine is guaranteed to have worst case linear time complexity
276
/// with respect to both the needle and the haystack. That is, this runs
277
/// in `O(needle.len() + haystack.len())` time.
278
///
279
/// This routine is also guaranteed to have worst case constant space
280
/// complexity.
281
///
282
/// # Examples
283
///
284
/// Basic usage:
285
///
286
/// ```
287
/// use memchr::memmem;
288
///
289
/// let haystack = b"foo bar baz";
290
/// assert_eq!(Some(0), memmem::rfind(haystack, b"foo"));
291
/// assert_eq!(Some(4), memmem::rfind(haystack, b"bar"));
292
/// assert_eq!(Some(8), memmem::rfind(haystack, b"ba"));
293
/// assert_eq!(None, memmem::rfind(haystack, b"quux"));
294
/// ```
295
#[inline]
296
0
pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
297
0
    if haystack.len() < 64 {
298
0
        rabinkarp::rfind(haystack, needle)
299
    } else {
300
0
        FinderRev::new(needle).rfind(haystack)
301
    }
302
0
}
303
304
/// An iterator over non-overlapping substring matches.
305
///
306
/// Matches are reported by the byte offset at which they begin.
307
///
308
/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
309
/// needle.
310
#[derive(Debug)]
311
pub struct FindIter<'h, 'n> {
312
    haystack: &'h [u8],
313
    prestate: PrefilterState,
314
    finder: Finder<'n>,
315
    pos: usize,
316
}
317
318
impl<'h, 'n> FindIter<'h, 'n> {
319
    #[inline(always)]
320
0
    pub(crate) fn new(
321
0
        haystack: &'h [u8],
322
0
        finder: Finder<'n>,
323
0
    ) -> FindIter<'h, 'n> {
324
0
        let prestate = finder.searcher.prefilter_state();
325
0
        FindIter { haystack, prestate, finder, pos: 0 }
326
0
    }
327
328
    /// Convert this iterator into its owned variant, such that it no longer
329
    /// borrows the finder and needle.
330
    ///
331
    /// If this is already an owned iterator, then this is a no-op. Otherwise,
332
    /// this copies the needle.
333
    ///
334
    /// This is only available when the `std` feature is enabled.
335
    #[cfg(feature = "std")]
336
    #[inline]
337
0
    pub fn into_owned(self) -> FindIter<'h, 'static> {
338
0
        FindIter {
339
0
            haystack: self.haystack,
340
0
            prestate: self.prestate,
341
0
            finder: self.finder.into_owned(),
342
0
            pos: self.pos,
343
0
        }
344
0
    }
345
}
346
347
impl<'h, 'n> Iterator for FindIter<'h, 'n> {
348
    type Item = usize;
349
350
0
    fn next(&mut self) -> Option<usize> {
351
0
        if self.pos > self.haystack.len() {
352
0
            return None;
353
0
        }
354
0
        let result = self
355
0
            .finder
356
0
            .searcher
357
0
            .find(&mut self.prestate, &self.haystack[self.pos..]);
358
0
        match result {
359
0
            None => None,
360
0
            Some(i) => {
361
0
                let pos = self.pos + i;
362
0
                self.pos = pos + core::cmp::max(1, self.finder.needle().len());
363
0
                Some(pos)
364
            }
365
        }
366
0
    }
367
}
368
369
/// An iterator over non-overlapping substring matches in reverse.
370
///
371
/// Matches are reported by the byte offset at which they begin.
372
///
373
/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
374
/// needle.
375
#[derive(Debug)]
376
pub struct FindRevIter<'h, 'n> {
377
    haystack: &'h [u8],
378
    finder: FinderRev<'n>,
379
    /// When searching with an empty needle, this gets set to `None` after
380
    /// we've yielded the last element at `0`.
381
    pos: Option<usize>,
382
}
383
384
impl<'h, 'n> FindRevIter<'h, 'n> {
385
    #[inline(always)]
386
0
    pub(crate) fn new(
387
0
        haystack: &'h [u8],
388
0
        finder: FinderRev<'n>,
389
0
    ) -> FindRevIter<'h, 'n> {
390
0
        let pos = Some(haystack.len());
391
0
        FindRevIter { haystack, finder, pos }
392
0
    }
393
394
    /// Convert this iterator into its owned variant, such that it no longer
395
    /// borrows the finder and needle.
396
    ///
397
    /// If this is already an owned iterator, then this is a no-op. Otherwise,
398
    /// this copies the needle.
399
    ///
400
    /// This is only available when the `std` feature is enabled.
401
    #[cfg(feature = "std")]
402
    #[inline]
403
0
    pub fn into_owned(self) -> FindRevIter<'h, 'static> {
404
0
        FindRevIter {
405
0
            haystack: self.haystack,
406
0
            finder: self.finder.into_owned(),
407
0
            pos: self.pos,
408
0
        }
409
0
    }
410
}
411
412
impl<'h, 'n> Iterator for FindRevIter<'h, 'n> {
413
    type Item = usize;
414
415
0
    fn next(&mut self) -> Option<usize> {
416
0
        let pos = match self.pos {
417
0
            None => return None,
418
0
            Some(pos) => pos,
419
0
        };
420
0
        let result = self.finder.rfind(&self.haystack[..pos]);
421
0
        match result {
422
0
            None => None,
423
0
            Some(i) => {
424
0
                if pos == i {
425
0
                    self.pos = pos.checked_sub(1);
426
0
                } else {
427
0
                    self.pos = Some(i);
428
0
                }
429
0
                Some(i)
430
            }
431
        }
432
0
    }
433
}
434
435
/// A single substring searcher fixed to a particular needle.
436
///
437
/// The purpose of this type is to permit callers to construct a substring
438
/// searcher that can be used to search haystacks without the overhead of
439
/// constructing the searcher in the first place. This is a somewhat niche
440
/// concern when it's necessary to re-use the same needle to search multiple
441
/// different haystacks with as little overhead as possible. In general, using
442
/// [`find`] is good enough, but `Finder` is useful when you can meaningfully
443
/// observe searcher construction time in a profile.
444
///
445
/// When the `std` feature is enabled, then this type has an `into_owned`
446
/// version which permits building a `Finder` that is not connected to
447
/// the lifetime of its needle.
448
#[derive(Clone, Debug)]
449
pub struct Finder<'n> {
450
    searcher: Searcher<'n>,
451
}
452
453
impl<'n> Finder<'n> {
454
    /// Create a new finder for the given needle.
455
    #[inline]
456
0
    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> {
457
0
        FinderBuilder::new().build_forward(needle)
458
0
    }
459
460
    /// Returns the index of the first occurrence of this needle in the given
461
    /// haystack.
462
    ///
463
    /// # Complexity
464
    ///
465
    /// This routine is guaranteed to have worst case linear time complexity
466
    /// with respect to both the needle and the haystack. That is, this runs
467
    /// in `O(needle.len() + haystack.len())` time.
468
    ///
469
    /// This routine is also guaranteed to have worst case constant space
470
    /// complexity.
471
    ///
472
    /// # Examples
473
    ///
474
    /// Basic usage:
475
    ///
476
    /// ```
477
    /// use memchr::memmem::Finder;
478
    ///
479
    /// let haystack = b"foo bar baz";
480
    /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
481
    /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
482
    /// assert_eq!(None, Finder::new("quux").find(haystack));
483
    /// ```
484
0
    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
485
0
        self.searcher.find(&mut self.searcher.prefilter_state(), haystack)
486
0
    }
487
488
    /// Returns an iterator over all occurrences of a substring in a haystack.
489
    ///
490
    /// # Complexity
491
    ///
492
    /// This routine is guaranteed to have worst case linear time complexity
493
    /// with respect to both the needle and the haystack. That is, this runs
494
    /// in `O(needle.len() + haystack.len())` time.
495
    ///
496
    /// This routine is also guaranteed to have worst case constant space
497
    /// complexity.
498
    ///
499
    /// # Examples
500
    ///
501
    /// Basic usage:
502
    ///
503
    /// ```
504
    /// use memchr::memmem::Finder;
505
    ///
506
    /// let haystack = b"foo bar foo baz foo";
507
    /// let finder = Finder::new(b"foo");
508
    /// let mut it = finder.find_iter(haystack);
509
    /// assert_eq!(Some(0), it.next());
510
    /// assert_eq!(Some(8), it.next());
511
    /// assert_eq!(Some(16), it.next());
512
    /// assert_eq!(None, it.next());
513
    /// ```
514
    #[inline]
515
0
    pub fn find_iter<'a, 'h>(
516
0
        &'a self,
517
0
        haystack: &'h [u8],
518
0
    ) -> FindIter<'h, 'a> {
519
0
        FindIter::new(haystack, self.as_ref())
520
0
    }
521
522
    /// Convert this finder into its owned variant, such that it no longer
523
    /// borrows the needle.
524
    ///
525
    /// If this is already an owned finder, then this is a no-op. Otherwise,
526
    /// this copies the needle.
527
    ///
528
    /// This is only available when the `std` feature is enabled.
529
    #[cfg(feature = "std")]
530
    #[inline]
531
0
    pub fn into_owned(self) -> Finder<'static> {
532
0
        Finder { searcher: self.searcher.into_owned() }
533
0
    }
534
535
    /// Convert this finder into its borrowed variant.
536
    ///
537
    /// This is primarily useful if your finder is owned and you'd like to
538
    /// store its borrowed variant in some intermediate data structure.
539
    ///
540
    /// Note that the lifetime parameter of the returned finder is tied to the
541
    /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
542
    /// needle itself. Namely, a finder's needle can be either borrowed or
543
    /// owned, so the lifetime of the needle returned must necessarily be the
544
    /// shorter of the two.
545
    #[inline]
546
0
    pub fn as_ref(&self) -> Finder<'_> {
547
0
        Finder { searcher: self.searcher.as_ref() }
548
0
    }
549
550
    /// Returns the needle that this finder searches for.
551
    ///
552
    /// Note that the lifetime of the needle returned is tied to the lifetime
553
    /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
554
    /// finder's needle can be either borrowed or owned, so the lifetime of the
555
    /// needle returned must necessarily be the shorter of the two.
556
    #[inline]
557
0
    pub fn needle(&self) -> &[u8] {
558
0
        self.searcher.needle()
559
0
    }
560
}
561
562
/// A single substring reverse searcher fixed to a particular needle.
563
///
564
/// The purpose of this type is to permit callers to construct a substring
565
/// searcher that can be used to search haystacks without the overhead of
566
/// constructing the searcher in the first place. This is a somewhat niche
567
/// concern when it's necessary to re-use the same needle to search multiple
568
/// different haystacks with as little overhead as possible. In general,
569
/// using [`rfind`] is good enough, but `FinderRev` is useful when you can
570
/// meaningfully observe searcher construction time in a profile.
571
///
572
/// When the `std` feature is enabled, then this type has an `into_owned`
573
/// version which permits building a `FinderRev` that is not connected to
574
/// the lifetime of its needle.
575
#[derive(Clone, Debug)]
576
pub struct FinderRev<'n> {
577
    searcher: SearcherRev<'n>,
578
}
579
580
impl<'n> FinderRev<'n> {
581
    /// Create a new reverse finder for the given needle.
582
    #[inline]
583
0
    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> {
584
0
        FinderBuilder::new().build_reverse(needle)
585
0
    }
586
587
    /// Returns the index of the last occurrence of this needle in the given
588
    /// haystack.
589
    ///
590
    /// The haystack may be any type that can be cheaply converted into a
591
    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
592
    ///
593
    /// # Complexity
594
    ///
595
    /// This routine is guaranteed to have worst case linear time complexity
596
    /// with respect to both the needle and the haystack. That is, this runs
597
    /// in `O(needle.len() + haystack.len())` time.
598
    ///
599
    /// This routine is also guaranteed to have worst case constant space
600
    /// complexity.
601
    ///
602
    /// # Examples
603
    ///
604
    /// Basic usage:
605
    ///
606
    /// ```
607
    /// use memchr::memmem::FinderRev;
608
    ///
609
    /// let haystack = b"foo bar baz";
610
    /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack));
611
    /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack));
612
    /// assert_eq!(None, FinderRev::new("quux").rfind(haystack));
613
    /// ```
614
0
    pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
615
0
        self.searcher.rfind(haystack.as_ref())
616
0
    }
617
618
    /// Returns a reverse iterator over all occurrences of a substring in a
619
    /// haystack.
620
    ///
621
    /// # Complexity
622
    ///
623
    /// This routine is guaranteed to have worst case linear time complexity
624
    /// with respect to both the needle and the haystack. That is, this runs
625
    /// in `O(needle.len() + haystack.len())` time.
626
    ///
627
    /// This routine is also guaranteed to have worst case constant space
628
    /// complexity.
629
    ///
630
    /// # Examples
631
    ///
632
    /// Basic usage:
633
    ///
634
    /// ```
635
    /// use memchr::memmem::FinderRev;
636
    ///
637
    /// let haystack = b"foo bar foo baz foo";
638
    /// let finder = FinderRev::new(b"foo");
639
    /// let mut it = finder.rfind_iter(haystack);
640
    /// assert_eq!(Some(16), it.next());
641
    /// assert_eq!(Some(8), it.next());
642
    /// assert_eq!(Some(0), it.next());
643
    /// assert_eq!(None, it.next());
644
    /// ```
645
    #[inline]
646
0
    pub fn rfind_iter<'a, 'h>(
647
0
        &'a self,
648
0
        haystack: &'h [u8],
649
0
    ) -> FindRevIter<'h, 'a> {
650
0
        FindRevIter::new(haystack, self.as_ref())
651
0
    }
652
653
    /// Convert this finder into its owned variant, such that it no longer
654
    /// borrows the needle.
655
    ///
656
    /// If this is already an owned finder, then this is a no-op. Otherwise,
657
    /// this copies the needle.
658
    ///
659
    /// This is only available when the `std` feature is enabled.
660
    #[cfg(feature = "std")]
661
    #[inline]
662
0
    pub fn into_owned(self) -> FinderRev<'static> {
663
0
        FinderRev { searcher: self.searcher.into_owned() }
664
0
    }
665
666
    /// Convert this finder into its borrowed variant.
667
    ///
668
    /// This is primarily useful if your finder is owned and you'd like to
669
    /// store its borrowed variant in some intermediate data structure.
670
    ///
671
    /// Note that the lifetime parameter of the returned finder is tied to the
672
    /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
673
    /// needle itself. Namely, a finder's needle can be either borrowed or
674
    /// owned, so the lifetime of the needle returned must necessarily be the
675
    /// shorter of the two.
676
    #[inline]
677
0
    pub fn as_ref(&self) -> FinderRev<'_> {
678
0
        FinderRev { searcher: self.searcher.as_ref() }
679
0
    }
680
681
    /// Returns the needle that this finder searches for.
682
    ///
683
    /// Note that the lifetime of the needle returned is tied to the lifetime
684
    /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
685
    /// finder's needle can be either borrowed or owned, so the lifetime of the
686
    /// needle returned must necessarily be the shorter of the two.
687
    #[inline]
688
0
    pub fn needle(&self) -> &[u8] {
689
0
        self.searcher.needle()
690
0
    }
691
}
692
693
/// A builder for constructing non-default forward or reverse memmem finders.
694
///
695
/// A builder is primarily useful for configuring a substring searcher.
696
/// Currently, the only configuration exposed is the ability to disable
697
/// heuristic prefilters used to speed up certain searches.
698
#[derive(Clone, Debug, Default)]
699
pub struct FinderBuilder {
700
    config: SearcherConfig,
701
}
702
703
impl FinderBuilder {
704
    /// Create a new finder builder with default settings.
705
0
    pub fn new() -> FinderBuilder {
706
0
        FinderBuilder::default()
707
0
    }
708
709
    /// Build a forward finder using the given needle from the current
710
    /// settings.
711
0
    pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(
712
0
        &self,
713
0
        needle: &'n B,
714
0
    ) -> Finder<'n> {
715
0
        Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
716
0
    }
717
718
    /// Build a reverse finder using the given needle from the current
719
    /// settings.
720
0
    pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(
721
0
        &self,
722
0
        needle: &'n B,
723
0
    ) -> FinderRev<'n> {
724
0
        FinderRev { searcher: SearcherRev::new(needle.as_ref()) }
725
0
    }
726
727
    /// Configure the prefilter setting for the finder.
728
    ///
729
    /// See the documentation for [`Prefilter`] for more discussion on why
730
    /// you might want to configure this.
731
0
    pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder {
732
0
        self.config.prefilter = prefilter;
733
0
        self
734
0
    }
735
}
736
737
/// The internal implementation of a forward substring searcher.
738
///
739
/// The reality is that this is a "meta" searcher. Namely, depending on a
740
/// variety of parameters (CPU support, target, needle size, haystack size and
741
/// even dynamic properties such as prefilter effectiveness), the actual
742
/// algorithm employed to do substring search may change.
743
#[derive(Clone, Debug)]
744
struct Searcher<'n> {
745
    /// The actual needle we're searching for.
746
    ///
747
    /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is
748
    /// specialized to a single variant (the borrowed form).
749
    needle: CowBytes<'n>,
750
    /// A collection of facts computed on the needle that are useful for more
751
    /// than one substring search algorithm.
752
    ninfo: NeedleInfo,
753
    /// A prefilter function, if it was deemed appropriate.
754
    ///
755
    /// Some substring search implementations (like Two-Way) benefit greatly
756
    /// if we can quickly find candidate starting positions for a match.
757
    prefn: Option<PrefilterFn>,
758
    /// The actual substring implementation in use.
759
    kind: SearcherKind,
760
}
761
762
/// A collection of facts computed about a search needle.
763
///
764
/// We group these things together because it's useful to be able to hand them
765
/// to prefilters or substring algorithms that want them.
766
#[derive(Clone, Copy, Debug)]
767
pub(crate) struct NeedleInfo {
768
    /// The offsets of "rare" bytes detected in the needle.
769
    ///
770
    /// This is meant to be a heuristic in order to maximize the effectiveness
771
    /// of vectorized code. Namely, vectorized code tends to focus on only
772
    /// one or two bytes. If we pick bytes from the needle that occur
773
    /// infrequently, then more time will be spent in the vectorized code and
774
    /// will likely make the overall search (much) faster.
775
    ///
776
    /// Of course, this is only a heuristic based on a background frequency
777
    /// distribution of bytes. But it tends to work very well in practice.
778
    pub(crate) rarebytes: RareNeedleBytes,
779
    /// A Rabin-Karp hash of the needle.
780
    ///
781
    /// This is store here instead of in a more specific Rabin-Karp search
782
    /// since Rabin-Karp may be used even if another SearchKind corresponds
783
    /// to some other search implementation. e.g., If measurements suggest RK
784
    /// is faster in some cases or if a search implementation can't handle
785
    /// particularly small haystack. (Moreover, we cannot use RK *generally*,
786
    /// since its worst case time is multiplicative. Instead, we only use it
787
    /// some small haystacks, where "small" is a constant.)
788
    pub(crate) nhash: NeedleHash,
789
}
790
791
/// Configuration for substring search.
792
#[derive(Clone, Copy, Debug, Default)]
793
struct SearcherConfig {
794
    /// This permits changing the behavior of the prefilter, since it can have
795
    /// a variable impact on performance.
796
    prefilter: Prefilter,
797
}
798
799
#[derive(Clone, Debug)]
800
enum SearcherKind {
801
    /// A special case for empty needles. An empty needle always matches, even
802
    /// in an empty haystack.
803
    Empty,
804
    /// This is used whenever the needle is a single byte. In this case, we
805
    /// always use memchr.
806
    OneByte(u8),
807
    /// Two-Way is the generic work horse and is what provides our additive
808
    /// linear time guarantee. In general, it's used when the needle is bigger
809
    /// than 8 bytes or so.
810
    TwoWay(twoway::Forward),
811
    #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
812
    GenericSIMD128(x86::sse::Forward),
813
    #[cfg(memchr_runtime_wasm128)]
814
    GenericSIMD128(wasm::Forward),
815
    #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
816
    GenericSIMD256(x86::avx::Forward),
817
}
818
819
impl<'n> Searcher<'n> {
820
0
    fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
821
        use self::SearcherKind::*;
822
823
0
        let ninfo = NeedleInfo::new(needle);
824
0
        let mk = |kind: SearcherKind| {
825
0
            let prefn = prefilter::forward(
826
0
                &config.prefilter,
827
0
                &ninfo.rarebytes,
828
0
                needle,
829
0
            );
830
0
            Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
831
0
        };
832
0
        if needle.len() == 0 {
833
0
            return mk(Empty);
834
0
        }
835
0
        if needle.len() == 1 {
836
0
            return mk(OneByte(needle[0]));
837
0
        }
838
        #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
839
        {
840
0
            if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
841
0
                return mk(GenericSIMD256(fwd));
842
0
            } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
843
0
                return mk(GenericSIMD128(fwd));
844
0
            }
845
0
        }
846
0
        #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))]
847
0
        {
848
0
            if let Some(fwd) = wasm::Forward::new(&ninfo, needle) {
849
0
                return mk(GenericSIMD128(fwd));
850
0
            }
851
0
        }
852
0
853
0
        mk(TwoWay(twoway::Forward::new(needle)))
854
0
    }
855
856
    /// Return a fresh prefilter state that can be used with this searcher.
857
    /// A prefilter state is used to track the effectiveness of a searcher's
858
    /// prefilter for speeding up searches. Therefore, the prefilter state
859
    /// should generally be reused on subsequent searches (such as in an
860
    /// iterator). For searches on a different haystack, then a new prefilter
861
    /// state should be used.
862
    ///
863
    /// This always initializes a valid (but possibly inert) prefilter state
864
    /// even if this searcher does not have a prefilter enabled.
865
0
    fn prefilter_state(&self) -> PrefilterState {
866
0
        if self.prefn.is_none() {
867
0
            PrefilterState::inert()
868
        } else {
869
0
            PrefilterState::new()
870
        }
871
0
    }
872
873
0
    fn needle(&self) -> &[u8] {
874
0
        self.needle.as_slice()
875
0
    }
876
877
0
    fn as_ref(&self) -> Searcher<'_> {
878
        use self::SearcherKind::*;
879
880
0
        let kind = match self.kind {
881
0
            Empty => Empty,
882
0
            OneByte(b) => OneByte(b),
883
0
            TwoWay(tw) => TwoWay(tw),
884
            #[cfg(all(not(miri), memchr_runtime_simd))]
885
0
            GenericSIMD128(gs) => GenericSIMD128(gs),
886
            #[cfg(all(
887
                not(miri),
888
                target_arch = "x86_64",
889
                memchr_runtime_simd
890
            ))]
891
0
            GenericSIMD256(gs) => GenericSIMD256(gs),
892
        };
893
0
        Searcher {
894
0
            needle: CowBytes::new(self.needle()),
895
0
            ninfo: self.ninfo,
896
0
            prefn: self.prefn,
897
0
            kind,
898
0
        }
899
0
    }
900
901
    #[cfg(feature = "std")]
902
0
    fn into_owned(self) -> Searcher<'static> {
903
        use self::SearcherKind::*;
904
905
0
        let kind = match self.kind {
906
0
            Empty => Empty,
907
0
            OneByte(b) => OneByte(b),
908
0
            TwoWay(tw) => TwoWay(tw),
909
            #[cfg(all(not(miri), memchr_runtime_simd))]
910
0
            GenericSIMD128(gs) => GenericSIMD128(gs),
911
            #[cfg(all(
912
                not(miri),
913
                target_arch = "x86_64",
914
                memchr_runtime_simd
915
            ))]
916
0
            GenericSIMD256(gs) => GenericSIMD256(gs),
917
        };
918
0
        Searcher {
919
0
            needle: self.needle.into_owned(),
920
0
            ninfo: self.ninfo,
921
0
            prefn: self.prefn,
922
0
            kind,
923
0
        }
924
0
    }
925
926
    /// Implements forward substring search by selecting the implementation
927
    /// chosen at construction and executing it on the given haystack with the
928
    /// prefilter's current state of effectiveness.
929
    #[inline(always)]
930
0
    fn find(
931
0
        &self,
932
0
        state: &mut PrefilterState,
933
0
        haystack: &[u8],
934
0
    ) -> Option<usize> {
935
        use self::SearcherKind::*;
936
937
0
        let needle = self.needle();
938
0
        if haystack.len() < needle.len() {
939
0
            return None;
940
0
        }
941
0
        match self.kind {
942
0
            Empty => Some(0),
943
0
            OneByte(b) => crate::memchr(b, haystack),
944
0
            TwoWay(ref tw) => {
945
0
                // For very short haystacks (e.g., where the prefilter probably
946
0
                // can't run), it's faster to just run RK.
947
0
                if rabinkarp::is_fast(haystack, needle) {
948
0
                    rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
949
                } else {
950
0
                    self.find_tw(tw, state, haystack, needle)
951
                }
952
            }
953
            #[cfg(all(not(miri), memchr_runtime_simd))]
954
0
            GenericSIMD128(ref gs) => {
955
0
                // The SIMD matcher can't handle particularly short haystacks,
956
0
                // so we fall back to RK in these cases.
957
0
                if haystack.len() < gs.min_haystack_len() {
958
0
                    rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
959
                } else {
960
0
                    gs.find(haystack, needle)
961
                }
962
            }
963
            #[cfg(all(
964
                not(miri),
965
                target_arch = "x86_64",
966
                memchr_runtime_simd
967
            ))]
968
0
            GenericSIMD256(ref gs) => {
969
0
                // The SIMD matcher can't handle particularly short haystacks,
970
0
                // so we fall back to RK in these cases.
971
0
                if haystack.len() < gs.min_haystack_len() {
972
0
                    rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
973
                } else {
974
0
                    gs.find(haystack, needle)
975
                }
976
            }
977
        }
978
0
    }
979
980
    /// Calls Two-Way on the given haystack/needle.
981
    ///
982
    /// This is marked as unlineable since it seems to have a better overall
983
    /// effect on benchmarks. However, this is one of those cases where
984
    /// inlining it results an improvement in other benchmarks too, so I
985
    /// suspect we just don't have enough data yet to make the right call here.
986
    ///
987
    /// I suspect the main problem is that this function contains two different
988
    /// inlined copies of Two-Way: one with and one without prefilters enabled.
989
    #[inline(never)]
990
0
    fn find_tw(
991
0
        &self,
992
0
        tw: &twoway::Forward,
993
0
        state: &mut PrefilterState,
994
0
        haystack: &[u8],
995
0
        needle: &[u8],
996
0
    ) -> Option<usize> {
997
0
        if let Some(prefn) = self.prefn {
998
            // We used to look at the length of a haystack here. That is, if
999
            // it was too small, then don't bother with the prefilter. But two
1000
            // things changed: the prefilter falls back to memchr for small
1001
            // haystacks, and, above, Rabin-Karp is employed for tiny haystacks
1002
            // anyway.
1003
0
            if state.is_effective() {
1004
0
                let mut pre = Pre { state, prefn, ninfo: &self.ninfo };
1005
0
                return tw.find(Some(&mut pre), haystack, needle);
1006
0
            }
1007
0
        }
1008
0
        tw.find(None, haystack, needle)
1009
0
    }
1010
}
1011
1012
impl NeedleInfo {
1013
0
    pub(crate) fn new(needle: &[u8]) -> NeedleInfo {
1014
0
        NeedleInfo {
1015
0
            rarebytes: RareNeedleBytes::forward(needle),
1016
0
            nhash: NeedleHash::forward(needle),
1017
0
        }
1018
0
    }
1019
}
1020
1021
/// The internal implementation of a reverse substring searcher.
1022
///
1023
/// See the forward searcher docs for more details. Currently, the reverse
1024
/// searcher is considerably simpler since it lacks prefilter support. This
1025
/// was done because it adds a lot of code, and more surface area to test. And
1026
/// in particular, it's not clear whether a prefilter on reverse searching is
1027
/// worth it. (If you have a compelling use case, please file an issue!)
1028
#[derive(Clone, Debug)]
1029
struct SearcherRev<'n> {
1030
    /// The actual needle we're searching for.
1031
    needle: CowBytes<'n>,
1032
    /// A Rabin-Karp hash of the needle.
1033
    nhash: NeedleHash,
1034
    /// The actual substring implementation in use.
1035
    kind: SearcherRevKind,
1036
}
1037
1038
#[derive(Clone, Debug)]
1039
enum SearcherRevKind {
1040
    /// A special case for empty needles. An empty needle always matches, even
1041
    /// in an empty haystack.
1042
    Empty,
1043
    /// This is used whenever the needle is a single byte. In this case, we
1044
    /// always use memchr.
1045
    OneByte(u8),
1046
    /// Two-Way is the generic work horse and is what provides our additive
1047
    /// linear time guarantee. In general, it's used when the needle is bigger
1048
    /// than 8 bytes or so.
1049
    TwoWay(twoway::Reverse),
1050
}
1051
1052
impl<'n> SearcherRev<'n> {
1053
0
    fn new(needle: &'n [u8]) -> SearcherRev<'n> {
1054
        use self::SearcherRevKind::*;
1055
1056
0
        let kind = if needle.len() == 0 {
1057
0
            Empty
1058
0
        } else if needle.len() == 1 {
1059
0
            OneByte(needle[0])
1060
        } else {
1061
0
            TwoWay(twoway::Reverse::new(needle))
1062
        };
1063
0
        SearcherRev {
1064
0
            needle: CowBytes::new(needle),
1065
0
            nhash: NeedleHash::reverse(needle),
1066
0
            kind,
1067
0
        }
1068
0
    }
1069
1070
0
    fn needle(&self) -> &[u8] {
1071
0
        self.needle.as_slice()
1072
0
    }
1073
1074
0
    fn as_ref(&self) -> SearcherRev<'_> {
1075
        use self::SearcherRevKind::*;
1076
1077
0
        let kind = match self.kind {
1078
0
            Empty => Empty,
1079
0
            OneByte(b) => OneByte(b),
1080
0
            TwoWay(tw) => TwoWay(tw),
1081
        };
1082
0
        SearcherRev {
1083
0
            needle: CowBytes::new(self.needle()),
1084
0
            nhash: self.nhash,
1085
0
            kind,
1086
0
        }
1087
0
    }
1088
1089
    #[cfg(feature = "std")]
1090
0
    fn into_owned(self) -> SearcherRev<'static> {
1091
        use self::SearcherRevKind::*;
1092
1093
0
        let kind = match self.kind {
1094
0
            Empty => Empty,
1095
0
            OneByte(b) => OneByte(b),
1096
0
            TwoWay(tw) => TwoWay(tw),
1097
        };
1098
0
        SearcherRev {
1099
0
            needle: self.needle.into_owned(),
1100
0
            nhash: self.nhash,
1101
0
            kind,
1102
0
        }
1103
0
    }
1104
1105
    /// Implements reverse substring search by selecting the implementation
1106
    /// chosen at construction and executing it on the given haystack with the
1107
    /// prefilter's current state of effectiveness.
1108
    #[inline(always)]
1109
0
    fn rfind(&self, haystack: &[u8]) -> Option<usize> {
1110
        use self::SearcherRevKind::*;
1111
1112
0
        let needle = self.needle();
1113
0
        if haystack.len() < needle.len() {
1114
0
            return None;
1115
0
        }
1116
0
        match self.kind {
1117
0
            Empty => Some(haystack.len()),
1118
0
            OneByte(b) => crate::memrchr(b, haystack),
1119
0
            TwoWay(ref tw) => {
1120
0
                // For very short haystacks (e.g., where the prefilter probably
1121
0
                // can't run), it's faster to just run RK.
1122
0
                if rabinkarp::is_fast(haystack, needle) {
1123
0
                    rabinkarp::rfind_with(&self.nhash, haystack, needle)
1124
                } else {
1125
0
                    tw.rfind(haystack, needle)
1126
                }
1127
            }
1128
        }
1129
0
    }
1130
}
1131
1132
/// This module defines some generic quickcheck properties useful for testing
1133
/// any substring search algorithm. It also runs those properties for the
1134
/// top-level public API memmem routines. (The properties are also used to
1135
/// test various substring search implementations more granularly elsewhere as
1136
/// well.)
1137
#[cfg(all(test, feature = "std", not(miri)))]
1138
mod proptests {
1139
    // N.B. This defines the quickcheck tests using the properties defined
1140
    // below. Because of macro-visibility weirdness, the actual macro is
1141
    // defined at the top of this file.
1142
    define_memmem_quickcheck_tests!(super::find, super::rfind);
1143
1144
    /// Check that every prefix of the given byte string is a substring.
1145
    pub(crate) fn prefix_is_substring(
1146
        reverse: bool,
1147
        bs: &[u8],
1148
        mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1149
    ) -> bool {
1150
        if bs.is_empty() {
1151
            return true;
1152
        }
1153
        for i in 0..(bs.len() - 1) {
1154
            let prefix = &bs[..i];
1155
            if reverse {
1156
                assert_eq!(naive_rfind(bs, prefix), search(bs, prefix));
1157
            } else {
1158
                assert_eq!(naive_find(bs, prefix), search(bs, prefix));
1159
            }
1160
        }
1161
        true
1162
    }
1163
1164
    /// Check that every suffix of the given byte string is a substring.
1165
    pub(crate) fn suffix_is_substring(
1166
        reverse: bool,
1167
        bs: &[u8],
1168
        mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1169
    ) -> bool {
1170
        if bs.is_empty() {
1171
            return true;
1172
        }
1173
        for i in 0..(bs.len() - 1) {
1174
            let suffix = &bs[i..];
1175
            if reverse {
1176
                assert_eq!(naive_rfind(bs, suffix), search(bs, suffix));
1177
            } else {
1178
                assert_eq!(naive_find(bs, suffix), search(bs, suffix));
1179
            }
1180
        }
1181
        true
1182
    }
1183
1184
    /// Check that naive substring search matches the result of the given search
1185
    /// algorithm.
1186
    pub(crate) fn matches_naive(
1187
        reverse: bool,
1188
        haystack: &[u8],
1189
        needle: &[u8],
1190
        mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1191
    ) -> bool {
1192
        if reverse {
1193
            naive_rfind(haystack, needle) == search(haystack, needle)
1194
        } else {
1195
            naive_find(haystack, needle) == search(haystack, needle)
1196
        }
1197
    }
1198
1199
    /// Naively search forwards for the given needle in the given haystack.
1200
    fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1201
        if needle.is_empty() {
1202
            return Some(0);
1203
        } else if haystack.len() < needle.len() {
1204
            return None;
1205
        }
1206
        for i in 0..(haystack.len() - needle.len() + 1) {
1207
            if needle == &haystack[i..i + needle.len()] {
1208
                return Some(i);
1209
            }
1210
        }
1211
        None
1212
    }
1213
1214
    /// Naively search in reverse for the given needle in the given haystack.
1215
    fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1216
        if needle.is_empty() {
1217
            return Some(haystack.len());
1218
        } else if haystack.len() < needle.len() {
1219
            return None;
1220
        }
1221
        for i in (0..(haystack.len() - needle.len() + 1)).rev() {
1222
            if needle == &haystack[i..i + needle.len()] {
1223
                return Some(i);
1224
            }
1225
        }
1226
        None
1227
    }
1228
}
1229
1230
/// This module defines some hand-written "simple" substring tests. It
1231
/// also provides routines for easily running them on any substring search
1232
/// implementation.
1233
#[cfg(test)]
1234
mod testsimples {
1235
    define_memmem_simple_tests!(super::find, super::rfind);
1236
1237
    /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple.
1238
    type SearchTest =
1239
        (&'static str, &'static str, Option<usize>, Option<usize>);
1240
1241
    const SEARCH_TESTS: &'static [SearchTest] = &[
1242
        ("", "", Some(0), Some(0)),
1243
        ("", "a", Some(0), Some(1)),
1244
        ("", "ab", Some(0), Some(2)),
1245
        ("", "abc", Some(0), Some(3)),
1246
        ("a", "", None, None),
1247
        ("a", "a", Some(0), Some(0)),
1248
        ("a", "aa", Some(0), Some(1)),
1249
        ("a", "ba", Some(1), Some(1)),
1250
        ("a", "bba", Some(2), Some(2)),
1251
        ("a", "bbba", Some(3), Some(3)),
1252
        ("a", "bbbab", Some(3), Some(3)),
1253
        ("a", "bbbabb", Some(3), Some(3)),
1254
        ("a", "bbbabbb", Some(3), Some(3)),
1255
        ("a", "bbbbbb", None, None),
1256
        ("ab", "", None, None),
1257
        ("ab", "a", None, None),
1258
        ("ab", "b", None, None),
1259
        ("ab", "ab", Some(0), Some(0)),
1260
        ("ab", "aab", Some(1), Some(1)),
1261
        ("ab", "aaab", Some(2), Some(2)),
1262
        ("ab", "abaab", Some(0), Some(3)),
1263
        ("ab", "baaab", Some(3), Some(3)),
1264
        ("ab", "acb", None, None),
1265
        ("ab", "abba", Some(0), Some(0)),
1266
        ("abc", "ab", None, None),
1267
        ("abc", "abc", Some(0), Some(0)),
1268
        ("abc", "abcz", Some(0), Some(0)),
1269
        ("abc", "abczz", Some(0), Some(0)),
1270
        ("abc", "zabc", Some(1), Some(1)),
1271
        ("abc", "zzabc", Some(2), Some(2)),
1272
        ("abc", "azbc", None, None),
1273
        ("abc", "abzc", None, None),
1274
        ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)),
1275
        ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)),
1276
        ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)),
1277
        // Failures caught by quickcheck.
1278
        ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)),
1279
        ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None),
1280
    ];
1281
1282
    /// Run the substring search tests. `search` should be a closure that
1283
    /// accepts a haystack and a needle and returns the starting position
1284
    /// of the first occurrence of needle in the haystack, or `None` if one
1285
    /// doesn't exist.
1286
    pub(crate) fn run_search_tests_fwd(
1287
        mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1288
    ) {
1289
        for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS {
1290
            let (n, h) = (needle.as_bytes(), haystack.as_bytes());
1291
            assert_eq!(
1292
                expected_fwd,
1293
                search(h, n),
1294
                "needle: {:?}, haystack: {:?}, expected: {:?}",
1295
                n,
1296
                h,
1297
                expected_fwd
1298
            );
1299
        }
1300
    }
1301
1302
    /// Run the substring search tests. `search` should be a closure that
1303
    /// accepts a haystack and a needle and returns the starting position of
1304
    /// the last occurrence of needle in the haystack, or `None` if one doesn't
1305
    /// exist.
1306
    pub(crate) fn run_search_tests_rev(
1307
        mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1308
    ) {
1309
        for &(needle, haystack, _, expected_rev) in SEARCH_TESTS {
1310
            let (n, h) = (needle.as_bytes(), haystack.as_bytes());
1311
            assert_eq!(
1312
                expected_rev,
1313
                search(h, n),
1314
                "needle: {:?}, haystack: {:?}, expected: {:?}",
1315
                n,
1316
                h,
1317
                expected_rev
1318
            );
1319
        }
1320
    }
1321
}