/rust/registry/src/index.crates.io-6f17d22bba15001f/memchr-2.7.4/src/memmem/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | This module provides forward and reverse substring search routines. |
3 | | |
4 | | Unlike the standard library's substring search routines, these work on |
5 | | arbitrary bytes. For all non-empty needles, these routines will report exactly |
6 | | the same values as the corresponding routines in the standard library. For |
7 | | the empty needle, the standard library reports matches only at valid UTF-8 |
8 | | boundaries, where as these routines will report matches at every position. |
9 | | |
10 | | Other than being able to work on arbitrary bytes, the primary reason to prefer |
11 | | these routines over the standard library routines is that these will generally |
12 | | be faster. In some cases, significantly so. |
13 | | |
14 | | # Example: iterating over substring matches |
15 | | |
16 | | This example shows how to use [`find_iter`] to find occurrences of a substring |
17 | | in a haystack. |
18 | | |
19 | | ``` |
20 | | use memchr::memmem; |
21 | | |
22 | | let haystack = b"foo bar foo baz foo"; |
23 | | |
24 | | let mut it = memmem::find_iter(haystack, "foo"); |
25 | | assert_eq!(Some(0), it.next()); |
26 | | assert_eq!(Some(8), it.next()); |
27 | | assert_eq!(Some(16), it.next()); |
28 | | assert_eq!(None, it.next()); |
29 | | ``` |
30 | | |
31 | | # Example: iterating over substring matches in reverse |
32 | | |
33 | | This example shows how to use [`rfind_iter`] to find occurrences of a substring |
34 | | in a haystack starting from the end of the haystack. |
35 | | |
36 | | **NOTE:** This module does not implement double ended iterators, so reverse |
37 | | searches aren't done by calling `rev` on a forward iterator. |
38 | | |
39 | | ``` |
40 | | use memchr::memmem; |
41 | | |
42 | | let haystack = b"foo bar foo baz foo"; |
43 | | |
44 | | let mut it = memmem::rfind_iter(haystack, "foo"); |
45 | | assert_eq!(Some(16), it.next()); |
46 | | assert_eq!(Some(8), it.next()); |
47 | | assert_eq!(Some(0), it.next()); |
48 | | assert_eq!(None, it.next()); |
49 | | ``` |
50 | | |
51 | | # Example: repeating a search for the same needle |
52 | | |
53 | | It may be possible for the overhead of constructing a substring searcher to be |
54 | | measurable in some workloads. In cases where the same needle is used to search |
55 | | many haystacks, it is possible to do construction once and thus to avoid it for |
56 | | subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for |
57 | | reverse searches). |
58 | | |
59 | | ``` |
60 | | use memchr::memmem; |
61 | | |
62 | | let finder = memmem::Finder::new("foo"); |
63 | | |
64 | | assert_eq!(Some(4), finder.find(b"baz foo quux")); |
65 | | assert_eq!(None, finder.find(b"quux baz bar")); |
66 | | ``` |
67 | | */ |
68 | | |
69 | | pub use crate::memmem::searcher::PrefilterConfig as Prefilter; |
70 | | |
71 | | // This is exported here for use in the crate::arch::all::twoway |
72 | | // implementation. This is essentially an abstraction breaker. Namely, the |
73 | | // public API of twoway doesn't support providing a prefilter, but its crate |
74 | | // internal API does. The main reason for this is that I didn't want to do the |
75 | | // API design required to support it without a concrete use case. |
76 | | pub(crate) use crate::memmem::searcher::Pre; |
77 | | |
78 | | use crate::{ |
79 | | arch::all::{ |
80 | | packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, |
81 | | rabinkarp, |
82 | | }, |
83 | | cow::CowBytes, |
84 | | memmem::searcher::{PrefilterState, Searcher, SearcherRev}, |
85 | | }; |
86 | | |
87 | | mod searcher; |
88 | | |
89 | | /// Returns an iterator over all non-overlapping occurrences of a substring in |
90 | | /// a haystack. |
91 | | /// |
92 | | /// # Complexity |
93 | | /// |
94 | | /// This routine is guaranteed to have worst case linear time complexity |
95 | | /// with respect to both the needle and the haystack. That is, this runs |
96 | | /// in `O(needle.len() + haystack.len())` time. |
97 | | /// |
98 | | /// This routine is also guaranteed to have worst case constant space |
99 | | /// complexity. |
100 | | /// |
101 | | /// # Examples |
102 | | /// |
103 | | /// Basic usage: |
104 | | /// |
105 | | /// ``` |
106 | | /// use memchr::memmem; |
107 | | /// |
108 | | /// let haystack = b"foo bar foo baz foo"; |
109 | | /// let mut it = memmem::find_iter(haystack, b"foo"); |
110 | | /// assert_eq!(Some(0), it.next()); |
111 | | /// assert_eq!(Some(8), it.next()); |
112 | | /// assert_eq!(Some(16), it.next()); |
113 | | /// assert_eq!(None, it.next()); |
114 | | /// ``` |
115 | | #[inline] |
116 | 0 | pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
117 | 0 | haystack: &'h [u8], |
118 | 0 | needle: &'n N, |
119 | 0 | ) -> FindIter<'h, 'n> { |
120 | 0 | FindIter::new(haystack, Finder::new(needle)) |
121 | 0 | } |
122 | | |
123 | | /// Returns a reverse iterator over all non-overlapping occurrences of a |
124 | | /// substring in a haystack. |
125 | | /// |
126 | | /// # Complexity |
127 | | /// |
128 | | /// This routine is guaranteed to have worst case linear time complexity |
129 | | /// with respect to both the needle and the haystack. That is, this runs |
130 | | /// in `O(needle.len() + haystack.len())` time. |
131 | | /// |
132 | | /// This routine is also guaranteed to have worst case constant space |
133 | | /// complexity. |
134 | | /// |
135 | | /// # Examples |
136 | | /// |
137 | | /// Basic usage: |
138 | | /// |
139 | | /// ``` |
140 | | /// use memchr::memmem; |
141 | | /// |
142 | | /// let haystack = b"foo bar foo baz foo"; |
143 | | /// let mut it = memmem::rfind_iter(haystack, b"foo"); |
144 | | /// assert_eq!(Some(16), it.next()); |
145 | | /// assert_eq!(Some(8), it.next()); |
146 | | /// assert_eq!(Some(0), it.next()); |
147 | | /// assert_eq!(None, it.next()); |
148 | | /// ``` |
149 | | #[inline] |
150 | 0 | pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( |
151 | 0 | haystack: &'h [u8], |
152 | 0 | needle: &'n N, |
153 | 0 | ) -> FindRevIter<'h, 'n> { |
154 | 0 | FindRevIter::new(haystack, FinderRev::new(needle)) |
155 | 0 | } |
156 | | |
157 | | /// Returns the index of the first occurrence of the given needle. |
158 | | /// |
159 | | /// Note that if you're are searching for the same needle in many different |
160 | | /// small haystacks, it may be faster to initialize a [`Finder`] once, |
161 | | /// and reuse it for each search. |
162 | | /// |
163 | | /// # Complexity |
164 | | /// |
165 | | /// This routine is guaranteed to have worst case linear time complexity |
166 | | /// with respect to both the needle and the haystack. That is, this runs |
167 | | /// in `O(needle.len() + haystack.len())` time. |
168 | | /// |
169 | | /// This routine is also guaranteed to have worst case constant space |
170 | | /// complexity. |
171 | | /// |
172 | | /// # Examples |
173 | | /// |
174 | | /// Basic usage: |
175 | | /// |
176 | | /// ``` |
177 | | /// use memchr::memmem; |
178 | | /// |
179 | | /// let haystack = b"foo bar baz"; |
180 | | /// assert_eq!(Some(0), memmem::find(haystack, b"foo")); |
181 | | /// assert_eq!(Some(4), memmem::find(haystack, b"bar")); |
182 | | /// assert_eq!(None, memmem::find(haystack, b"quux")); |
183 | | /// ``` |
184 | | #[inline] |
185 | 0 | pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
186 | 0 | if haystack.len() < 64 { |
187 | 0 | rabinkarp::Finder::new(needle).find(haystack, needle) |
188 | | } else { |
189 | 0 | Finder::new(needle).find(haystack) |
190 | | } |
191 | 0 | } |
192 | | |
193 | | /// Returns the index of the last occurrence of the given needle. |
194 | | /// |
195 | | /// Note that if you're are searching for the same needle in many different |
196 | | /// small haystacks, it may be faster to initialize a [`FinderRev`] once, |
197 | | /// and reuse it for each search. |
198 | | /// |
199 | | /// # Complexity |
200 | | /// |
201 | | /// This routine is guaranteed to have worst case linear time complexity |
202 | | /// with respect to both the needle and the haystack. That is, this runs |
203 | | /// in `O(needle.len() + haystack.len())` time. |
204 | | /// |
205 | | /// This routine is also guaranteed to have worst case constant space |
206 | | /// complexity. |
207 | | /// |
208 | | /// # Examples |
209 | | /// |
210 | | /// Basic usage: |
211 | | /// |
212 | | /// ``` |
213 | | /// use memchr::memmem; |
214 | | /// |
215 | | /// let haystack = b"foo bar baz"; |
216 | | /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo")); |
217 | | /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar")); |
218 | | /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba")); |
219 | | /// assert_eq!(None, memmem::rfind(haystack, b"quux")); |
220 | | /// ``` |
221 | | #[inline] |
222 | 0 | pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { |
223 | 0 | if haystack.len() < 64 { |
224 | 0 | rabinkarp::FinderRev::new(needle).rfind(haystack, needle) |
225 | | } else { |
226 | 0 | FinderRev::new(needle).rfind(haystack) |
227 | | } |
228 | 0 | } |
229 | | |
230 | | /// An iterator over non-overlapping substring matches. |
231 | | /// |
232 | | /// Matches are reported by the byte offset at which they begin. |
233 | | /// |
234 | | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
235 | | /// needle. |
236 | | #[derive(Debug, Clone)] |
237 | | pub struct FindIter<'h, 'n> { |
238 | | haystack: &'h [u8], |
239 | | prestate: PrefilterState, |
240 | | finder: Finder<'n>, |
241 | | pos: usize, |
242 | | } |
243 | | |
244 | | impl<'h, 'n> FindIter<'h, 'n> { |
245 | | #[inline(always)] |
246 | 0 | pub(crate) fn new( |
247 | 0 | haystack: &'h [u8], |
248 | 0 | finder: Finder<'n>, |
249 | 0 | ) -> FindIter<'h, 'n> { |
250 | 0 | let prestate = PrefilterState::new(); |
251 | 0 | FindIter { haystack, prestate, finder, pos: 0 } |
252 | 0 | } |
253 | | |
254 | | /// Convert this iterator into its owned variant, such that it no longer |
255 | | /// borrows the finder and needle. |
256 | | /// |
257 | | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
258 | | /// this copies the needle. |
259 | | /// |
260 | | /// This is only available when the `alloc` feature is enabled. |
261 | | #[cfg(feature = "alloc")] |
262 | | #[inline] |
263 | 0 | pub fn into_owned(self) -> FindIter<'h, 'static> { |
264 | 0 | FindIter { |
265 | 0 | haystack: self.haystack, |
266 | 0 | prestate: self.prestate, |
267 | 0 | finder: self.finder.into_owned(), |
268 | 0 | pos: self.pos, |
269 | 0 | } |
270 | 0 | } |
271 | | } |
272 | | |
273 | | impl<'h, 'n> Iterator for FindIter<'h, 'n> { |
274 | | type Item = usize; |
275 | | |
276 | 0 | fn next(&mut self) -> Option<usize> { |
277 | 0 | let needle = self.finder.needle(); |
278 | 0 | let haystack = self.haystack.get(self.pos..)?; |
279 | 0 | let idx = |
280 | 0 | self.finder.searcher.find(&mut self.prestate, haystack, needle)?; |
281 | | |
282 | 0 | let pos = self.pos + idx; |
283 | 0 | self.pos = pos + needle.len().max(1); |
284 | 0 |
|
285 | 0 | Some(pos) |
286 | 0 | } |
287 | | |
288 | 0 | fn size_hint(&self) -> (usize, Option<usize>) { |
289 | 0 | // The largest possible number of non-overlapping matches is the |
290 | 0 | // quotient of the haystack and the needle (or the length of the |
291 | 0 | // haystack, if the needle is empty) |
292 | 0 | match self.haystack.len().checked_sub(self.pos) { |
293 | 0 | None => (0, Some(0)), |
294 | 0 | Some(haystack_len) => match self.finder.needle().len() { |
295 | | // Empty needles always succeed and match at every point |
296 | | // (including the very end) |
297 | 0 | 0 => ( |
298 | 0 | haystack_len.saturating_add(1), |
299 | 0 | haystack_len.checked_add(1), |
300 | 0 | ), |
301 | 0 | needle_len => (0, Some(haystack_len / needle_len)), |
302 | | }, |
303 | | } |
304 | 0 | } |
305 | | } |
306 | | |
307 | | /// An iterator over non-overlapping substring matches in reverse. |
308 | | /// |
309 | | /// Matches are reported by the byte offset at which they begin. |
310 | | /// |
311 | | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
312 | | /// needle. |
313 | | #[derive(Clone, Debug)] |
314 | | pub struct FindRevIter<'h, 'n> { |
315 | | haystack: &'h [u8], |
316 | | finder: FinderRev<'n>, |
317 | | /// When searching with an empty needle, this gets set to `None` after |
318 | | /// we've yielded the last element at `0`. |
319 | | pos: Option<usize>, |
320 | | } |
321 | | |
322 | | impl<'h, 'n> FindRevIter<'h, 'n> { |
323 | | #[inline(always)] |
324 | 0 | pub(crate) fn new( |
325 | 0 | haystack: &'h [u8], |
326 | 0 | finder: FinderRev<'n>, |
327 | 0 | ) -> FindRevIter<'h, 'n> { |
328 | 0 | let pos = Some(haystack.len()); |
329 | 0 | FindRevIter { haystack, finder, pos } |
330 | 0 | } |
331 | | |
332 | | /// Convert this iterator into its owned variant, such that it no longer |
333 | | /// borrows the finder and needle. |
334 | | /// |
335 | | /// If this is already an owned iterator, then this is a no-op. Otherwise, |
336 | | /// this copies the needle. |
337 | | /// |
338 | | /// This is only available when the `std` feature is enabled. |
339 | | #[cfg(feature = "alloc")] |
340 | | #[inline] |
341 | 0 | pub fn into_owned(self) -> FindRevIter<'h, 'static> { |
342 | 0 | FindRevIter { |
343 | 0 | haystack: self.haystack, |
344 | 0 | finder: self.finder.into_owned(), |
345 | 0 | pos: self.pos, |
346 | 0 | } |
347 | 0 | } |
348 | | } |
349 | | |
350 | | impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { |
351 | | type Item = usize; |
352 | | |
353 | 0 | fn next(&mut self) -> Option<usize> { |
354 | 0 | let pos = match self.pos { |
355 | 0 | None => return None, |
356 | 0 | Some(pos) => pos, |
357 | 0 | }; |
358 | 0 | let result = self.finder.rfind(&self.haystack[..pos]); |
359 | 0 | match result { |
360 | 0 | None => None, |
361 | 0 | Some(i) => { |
362 | 0 | if pos == i { |
363 | 0 | self.pos = pos.checked_sub(1); |
364 | 0 | } else { |
365 | 0 | self.pos = Some(i); |
366 | 0 | } |
367 | 0 | Some(i) |
368 | | } |
369 | | } |
370 | 0 | } |
371 | | } |
372 | | |
373 | | /// A single substring searcher fixed to a particular needle. |
374 | | /// |
375 | | /// The purpose of this type is to permit callers to construct a substring |
376 | | /// searcher that can be used to search haystacks without the overhead of |
377 | | /// constructing the searcher in the first place. This is a somewhat niche |
378 | | /// concern when it's necessary to re-use the same needle to search multiple |
379 | | /// different haystacks with as little overhead as possible. In general, using |
380 | | /// [`find`] is good enough, but `Finder` is useful when you can meaningfully |
381 | | /// observe searcher construction time in a profile. |
382 | | /// |
383 | | /// When the `std` feature is enabled, then this type has an `into_owned` |
384 | | /// version which permits building a `Finder` that is not connected to |
385 | | /// the lifetime of its needle. |
386 | | #[derive(Clone, Debug)] |
387 | | pub struct Finder<'n> { |
388 | | needle: CowBytes<'n>, |
389 | | searcher: Searcher, |
390 | | } |
391 | | |
392 | | impl<'n> Finder<'n> { |
393 | | /// Create a new finder for the given needle. |
394 | | #[inline] |
395 | 0 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { |
396 | 0 | FinderBuilder::new().build_forward(needle) |
397 | 0 | } |
398 | | |
399 | | /// Returns the index of the first occurrence of this needle in the given |
400 | | /// haystack. |
401 | | /// |
402 | | /// # Complexity |
403 | | /// |
404 | | /// This routine is guaranteed to have worst case linear time complexity |
405 | | /// with respect to both the needle and the haystack. That is, this runs |
406 | | /// in `O(needle.len() + haystack.len())` time. |
407 | | /// |
408 | | /// This routine is also guaranteed to have worst case constant space |
409 | | /// complexity. |
410 | | /// |
411 | | /// # Examples |
412 | | /// |
413 | | /// Basic usage: |
414 | | /// |
415 | | /// ``` |
416 | | /// use memchr::memmem::Finder; |
417 | | /// |
418 | | /// let haystack = b"foo bar baz"; |
419 | | /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); |
420 | | /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); |
421 | | /// assert_eq!(None, Finder::new("quux").find(haystack)); |
422 | | /// ``` |
423 | | #[inline] |
424 | 0 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { |
425 | 0 | let mut prestate = PrefilterState::new(); |
426 | 0 | let needle = self.needle.as_slice(); |
427 | 0 | self.searcher.find(&mut prestate, haystack, needle) |
428 | 0 | } |
429 | | |
430 | | /// Returns an iterator over all occurrences of a substring in a haystack. |
431 | | /// |
432 | | /// # Complexity |
433 | | /// |
434 | | /// This routine is guaranteed to have worst case linear time complexity |
435 | | /// with respect to both the needle and the haystack. That is, this runs |
436 | | /// in `O(needle.len() + haystack.len())` time. |
437 | | /// |
438 | | /// This routine is also guaranteed to have worst case constant space |
439 | | /// complexity. |
440 | | /// |
441 | | /// # Examples |
442 | | /// |
443 | | /// Basic usage: |
444 | | /// |
445 | | /// ``` |
446 | | /// use memchr::memmem::Finder; |
447 | | /// |
448 | | /// let haystack = b"foo bar foo baz foo"; |
449 | | /// let finder = Finder::new(b"foo"); |
450 | | /// let mut it = finder.find_iter(haystack); |
451 | | /// assert_eq!(Some(0), it.next()); |
452 | | /// assert_eq!(Some(8), it.next()); |
453 | | /// assert_eq!(Some(16), it.next()); |
454 | | /// assert_eq!(None, it.next()); |
455 | | /// ``` |
456 | | #[inline] |
457 | 0 | pub fn find_iter<'a, 'h>( |
458 | 0 | &'a self, |
459 | 0 | haystack: &'h [u8], |
460 | 0 | ) -> FindIter<'h, 'a> { |
461 | 0 | FindIter::new(haystack, self.as_ref()) |
462 | 0 | } |
463 | | |
464 | | /// Convert this finder into its owned variant, such that it no longer |
465 | | /// borrows the needle. |
466 | | /// |
467 | | /// If this is already an owned finder, then this is a no-op. Otherwise, |
468 | | /// this copies the needle. |
469 | | /// |
470 | | /// This is only available when the `alloc` feature is enabled. |
471 | | #[cfg(feature = "alloc")] |
472 | | #[inline] |
473 | 0 | pub fn into_owned(self) -> Finder<'static> { |
474 | 0 | Finder { |
475 | 0 | needle: self.needle.into_owned(), |
476 | 0 | searcher: self.searcher.clone(), |
477 | 0 | } |
478 | 0 | } |
479 | | |
480 | | /// Convert this finder into its borrowed variant. |
481 | | /// |
482 | | /// This is primarily useful if your finder is owned and you'd like to |
483 | | /// store its borrowed variant in some intermediate data structure. |
484 | | /// |
485 | | /// Note that the lifetime parameter of the returned finder is tied to the |
486 | | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
487 | | /// needle itself. Namely, a finder's needle can be either borrowed or |
488 | | /// owned, so the lifetime of the needle returned must necessarily be the |
489 | | /// shorter of the two. |
490 | | #[inline] |
491 | 0 | pub fn as_ref(&self) -> Finder<'_> { |
492 | 0 | Finder { |
493 | 0 | needle: CowBytes::new(self.needle()), |
494 | 0 | searcher: self.searcher.clone(), |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | | /// Returns the needle that this finder searches for. |
499 | | /// |
500 | | /// Note that the lifetime of the needle returned is tied to the lifetime |
501 | | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
502 | | /// finder's needle can be either borrowed or owned, so the lifetime of the |
503 | | /// needle returned must necessarily be the shorter of the two. |
504 | | #[inline] |
505 | 0 | pub fn needle(&self) -> &[u8] { |
506 | 0 | self.needle.as_slice() |
507 | 0 | } |
508 | | } |
509 | | |
510 | | /// A single substring reverse searcher fixed to a particular needle. |
511 | | /// |
512 | | /// The purpose of this type is to permit callers to construct a substring |
513 | | /// searcher that can be used to search haystacks without the overhead of |
514 | | /// constructing the searcher in the first place. This is a somewhat niche |
515 | | /// concern when it's necessary to re-use the same needle to search multiple |
516 | | /// different haystacks with as little overhead as possible. In general, |
517 | | /// using [`rfind`] is good enough, but `FinderRev` is useful when you can |
518 | | /// meaningfully observe searcher construction time in a profile. |
519 | | /// |
520 | | /// When the `std` feature is enabled, then this type has an `into_owned` |
521 | | /// version which permits building a `FinderRev` that is not connected to |
522 | | /// the lifetime of its needle. |
523 | | #[derive(Clone, Debug)] |
524 | | pub struct FinderRev<'n> { |
525 | | needle: CowBytes<'n>, |
526 | | searcher: SearcherRev, |
527 | | } |
528 | | |
529 | | impl<'n> FinderRev<'n> { |
530 | | /// Create a new reverse finder for the given needle. |
531 | | #[inline] |
532 | 0 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { |
533 | 0 | FinderBuilder::new().build_reverse(needle) |
534 | 0 | } |
535 | | |
536 | | /// Returns the index of the last occurrence of this needle in the given |
537 | | /// haystack. |
538 | | /// |
539 | | /// The haystack may be any type that can be cheaply converted into a |
540 | | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
541 | | /// |
542 | | /// # Complexity |
543 | | /// |
544 | | /// This routine is guaranteed to have worst case linear time complexity |
545 | | /// with respect to both the needle and the haystack. That is, this runs |
546 | | /// in `O(needle.len() + haystack.len())` time. |
547 | | /// |
548 | | /// This routine is also guaranteed to have worst case constant space |
549 | | /// complexity. |
550 | | /// |
551 | | /// # Examples |
552 | | /// |
553 | | /// Basic usage: |
554 | | /// |
555 | | /// ``` |
556 | | /// use memchr::memmem::FinderRev; |
557 | | /// |
558 | | /// let haystack = b"foo bar baz"; |
559 | | /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); |
560 | | /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); |
561 | | /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); |
562 | | /// ``` |
563 | 0 | pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { |
564 | 0 | self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) |
565 | 0 | } |
566 | | |
567 | | /// Returns a reverse iterator over all occurrences of a substring in a |
568 | | /// haystack. |
569 | | /// |
570 | | /// # Complexity |
571 | | /// |
572 | | /// This routine is guaranteed to have worst case linear time complexity |
573 | | /// with respect to both the needle and the haystack. That is, this runs |
574 | | /// in `O(needle.len() + haystack.len())` time. |
575 | | /// |
576 | | /// This routine is also guaranteed to have worst case constant space |
577 | | /// complexity. |
578 | | /// |
579 | | /// # Examples |
580 | | /// |
581 | | /// Basic usage: |
582 | | /// |
583 | | /// ``` |
584 | | /// use memchr::memmem::FinderRev; |
585 | | /// |
586 | | /// let haystack = b"foo bar foo baz foo"; |
587 | | /// let finder = FinderRev::new(b"foo"); |
588 | | /// let mut it = finder.rfind_iter(haystack); |
589 | | /// assert_eq!(Some(16), it.next()); |
590 | | /// assert_eq!(Some(8), it.next()); |
591 | | /// assert_eq!(Some(0), it.next()); |
592 | | /// assert_eq!(None, it.next()); |
593 | | /// ``` |
594 | | #[inline] |
595 | 0 | pub fn rfind_iter<'a, 'h>( |
596 | 0 | &'a self, |
597 | 0 | haystack: &'h [u8], |
598 | 0 | ) -> FindRevIter<'h, 'a> { |
599 | 0 | FindRevIter::new(haystack, self.as_ref()) |
600 | 0 | } |
601 | | |
602 | | /// Convert this finder into its owned variant, such that it no longer |
603 | | /// borrows the needle. |
604 | | /// |
605 | | /// If this is already an owned finder, then this is a no-op. Otherwise, |
606 | | /// this copies the needle. |
607 | | /// |
608 | | /// This is only available when the `std` feature is enabled. |
609 | | #[cfg(feature = "alloc")] |
610 | | #[inline] |
611 | 0 | pub fn into_owned(self) -> FinderRev<'static> { |
612 | 0 | FinderRev { |
613 | 0 | needle: self.needle.into_owned(), |
614 | 0 | searcher: self.searcher.clone(), |
615 | 0 | } |
616 | 0 | } |
617 | | |
618 | | /// Convert this finder into its borrowed variant. |
619 | | /// |
620 | | /// This is primarily useful if your finder is owned and you'd like to |
621 | | /// store its borrowed variant in some intermediate data structure. |
622 | | /// |
623 | | /// Note that the lifetime parameter of the returned finder is tied to the |
624 | | /// lifetime of `self`, and may be shorter than the `'n` lifetime of the |
625 | | /// needle itself. Namely, a finder's needle can be either borrowed or |
626 | | /// owned, so the lifetime of the needle returned must necessarily be the |
627 | | /// shorter of the two. |
628 | | #[inline] |
629 | 0 | pub fn as_ref(&self) -> FinderRev<'_> { |
630 | 0 | FinderRev { |
631 | 0 | needle: CowBytes::new(self.needle()), |
632 | 0 | searcher: self.searcher.clone(), |
633 | 0 | } |
634 | 0 | } |
635 | | |
636 | | /// Returns the needle that this finder searches for. |
637 | | /// |
638 | | /// Note that the lifetime of the needle returned is tied to the lifetime |
639 | | /// of the finder, and may be shorter than the `'n` lifetime. Namely, a |
640 | | /// finder's needle can be either borrowed or owned, so the lifetime of the |
641 | | /// needle returned must necessarily be the shorter of the two. |
642 | | #[inline] |
643 | 0 | pub fn needle(&self) -> &[u8] { |
644 | 0 | self.needle.as_slice() |
645 | 0 | } |
646 | | } |
647 | | |
648 | | /// A builder for constructing non-default forward or reverse memmem finders. |
649 | | /// |
650 | | /// A builder is primarily useful for configuring a substring searcher. |
651 | | /// Currently, the only configuration exposed is the ability to disable |
652 | | /// heuristic prefilters used to speed up certain searches. |
653 | | #[derive(Clone, Debug, Default)] |
654 | | pub struct FinderBuilder { |
655 | | prefilter: Prefilter, |
656 | | } |
657 | | |
658 | | impl FinderBuilder { |
659 | | /// Create a new finder builder with default settings. |
660 | 0 | pub fn new() -> FinderBuilder { |
661 | 0 | FinderBuilder::default() |
662 | 0 | } |
663 | | |
664 | | /// Build a forward finder using the given needle from the current |
665 | | /// settings. |
666 | 0 | pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( |
667 | 0 | &self, |
668 | 0 | needle: &'n B, |
669 | 0 | ) -> Finder<'n> { |
670 | 0 | self.build_forward_with_ranker(DefaultFrequencyRank, needle) |
671 | 0 | } |
672 | | |
673 | | /// Build a forward finder using the given needle and a custom heuristic for |
674 | | /// determining the frequency of a given byte in the dataset. |
675 | | /// See [`HeuristicFrequencyRank`] for more details. |
676 | 0 | pub fn build_forward_with_ranker< |
677 | 0 | 'n, |
678 | 0 | R: HeuristicFrequencyRank, |
679 | 0 | B: ?Sized + AsRef<[u8]>, |
680 | 0 | >( |
681 | 0 | &self, |
682 | 0 | ranker: R, |
683 | 0 | needle: &'n B, |
684 | 0 | ) -> Finder<'n> { |
685 | 0 | let needle = needle.as_ref(); |
686 | 0 | Finder { |
687 | 0 | needle: CowBytes::new(needle), |
688 | 0 | searcher: Searcher::new(self.prefilter, ranker, needle), |
689 | 0 | } |
690 | 0 | } |
691 | | |
692 | | /// Build a reverse finder using the given needle from the current |
693 | | /// settings. |
694 | 0 | pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( |
695 | 0 | &self, |
696 | 0 | needle: &'n B, |
697 | 0 | ) -> FinderRev<'n> { |
698 | 0 | let needle = needle.as_ref(); |
699 | 0 | FinderRev { |
700 | 0 | needle: CowBytes::new(needle), |
701 | 0 | searcher: SearcherRev::new(needle), |
702 | 0 | } |
703 | 0 | } |
704 | | |
705 | | /// Configure the prefilter setting for the finder. |
706 | | /// |
707 | | /// See the documentation for [`Prefilter`] for more discussion on why |
708 | | /// you might want to configure this. |
709 | 0 | pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { |
710 | 0 | self.prefilter = prefilter; |
711 | 0 | self |
712 | 0 | } |
713 | | } |
714 | | |
715 | | #[cfg(test)] |
716 | | mod tests { |
717 | | use super::*; |
718 | | |
719 | | define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); |
720 | | define_substring_reverse_quickcheck!(|h, n| Some( |
721 | | FinderRev::new(n).rfind(h) |
722 | | )); |
723 | | |
724 | | #[test] |
725 | | fn forward() { |
726 | | crate::tests::substring::Runner::new() |
727 | | .fwd(|h, n| Some(Finder::new(n).find(h))) |
728 | | .run(); |
729 | | } |
730 | | |
731 | | #[test] |
732 | | fn reverse() { |
733 | | crate::tests::substring::Runner::new() |
734 | | .rev(|h, n| Some(FinderRev::new(n).rfind(h))) |
735 | | .run(); |
736 | | } |
737 | | } |