/rust/registry/src/index.crates.io-1949cf8c6b5b557f/memchr-2.7.6/src/memmem/mod.rs
Line  | Count  | Source  | 
1  |  | /*!  | 
2  |  | This module provides forward and reverse substring search routines.  | 
3  |  |  | 
4  |  | Unlike the standard library's substring search routines, these work on  | 
5  |  | arbitrary bytes. For all non-empty needles, these routines will report exactly  | 
6  |  | the same values as the corresponding routines in the standard library. For  | 
7  |  | the empty needle, the standard library reports matches only at valid UTF-8  | 
8  |  | boundaries, where as these routines will report matches at every position.  | 
9  |  |  | 
10  |  | Other than being able to work on arbitrary bytes, the primary reason to prefer  | 
11  |  | these routines over the standard library routines is that these will generally  | 
12  |  | be faster. In some cases, significantly so.  | 
13  |  |  | 
14  |  | # Example: iterating over substring matches  | 
15  |  |  | 
16  |  | This example shows how to use [`find_iter`] to find occurrences of a substring  | 
17  |  | in a haystack.  | 
18  |  |  | 
19  |  | ```  | 
20  |  | use memchr::memmem;  | 
21  |  |  | 
22  |  | let haystack = b"foo bar foo baz foo";  | 
23  |  |  | 
24  |  | let mut it = memmem::find_iter(haystack, "foo");  | 
25  |  | assert_eq!(Some(0), it.next());  | 
26  |  | assert_eq!(Some(8), it.next());  | 
27  |  | assert_eq!(Some(16), it.next());  | 
28  |  | assert_eq!(None, it.next());  | 
29  |  | ```  | 
30  |  |  | 
31  |  | # Example: iterating over substring matches in reverse  | 
32  |  |  | 
33  |  | This example shows how to use [`rfind_iter`] to find occurrences of a substring  | 
34  |  | in a haystack starting from the end of the haystack.  | 
35  |  |  | 
36  |  | **NOTE:** This module does not implement double ended iterators, so reverse  | 
37  |  | searches aren't done by calling `rev` on a forward iterator.  | 
38  |  |  | 
39  |  | ```  | 
40  |  | use memchr::memmem;  | 
41  |  |  | 
42  |  | let haystack = b"foo bar foo baz foo";  | 
43  |  |  | 
44  |  | let mut it = memmem::rfind_iter(haystack, "foo");  | 
45  |  | assert_eq!(Some(16), it.next());  | 
46  |  | assert_eq!(Some(8), it.next());  | 
47  |  | assert_eq!(Some(0), it.next());  | 
48  |  | assert_eq!(None, it.next());  | 
49  |  | ```  | 
50  |  |  | 
51  |  | # Example: repeating a search for the same needle  | 
52  |  |  | 
53  |  | It may be possible for the overhead of constructing a substring searcher to be  | 
54  |  | measurable in some workloads. In cases where the same needle is used to search  | 
55  |  | many haystacks, it is possible to do construction once and thus to avoid it for  | 
56  |  | subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for  | 
57  |  | reverse searches).  | 
58  |  |  | 
59  |  | ```  | 
60  |  | use memchr::memmem;  | 
61  |  |  | 
62  |  | let finder = memmem::Finder::new("foo"); | 
63  |  |  | 
64  |  | assert_eq!(Some(4), finder.find(b"baz foo quux"));  | 
65  |  | assert_eq!(None, finder.find(b"quux baz bar"));  | 
66  |  | ```  | 
67  |  | */  | 
68  |  |  | 
69  |  | pub use crate::memmem::searcher::PrefilterConfig as Prefilter;  | 
70  |  |  | 
71  |  | // This is exported here for use in the crate::arch::all::twoway  | 
72  |  | // implementation. This is essentially an abstraction breaker. Namely, the  | 
73  |  | // public API of twoway doesn't support providing a prefilter, but its crate  | 
74  |  | // internal API does. The main reason for this is that I didn't want to do the  | 
75  |  | // API design required to support it without a concrete use case.  | 
76  |  | pub(crate) use crate::memmem::searcher::Pre;  | 
77  |  |  | 
78  |  | use crate::{ | 
79  |  |     arch::all::{ | 
80  |  |         packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, | 
81  |  |         rabinkarp,  | 
82  |  |     },  | 
83  |  |     cow::CowBytes,  | 
84  |  |     memmem::searcher::{PrefilterState, Searcher, SearcherRev}, | 
85  |  | };  | 
86  |  |  | 
87  |  | mod searcher;  | 
88  |  |  | 
89  |  | /// Returns an iterator over all non-overlapping occurrences of a substring in  | 
90  |  | /// a haystack.  | 
91  |  | ///  | 
92  |  | /// # Complexity  | 
93  |  | ///  | 
94  |  | /// This routine is guaranteed to have worst case linear time complexity  | 
95  |  | /// with respect to both the needle and the haystack. That is, this runs  | 
96  |  | /// in `O(needle.len() + haystack.len())` time.  | 
97  |  | ///  | 
98  |  | /// This routine is also guaranteed to have worst case constant space  | 
99  |  | /// complexity.  | 
100  |  | ///  | 
101  |  | /// # Examples  | 
102  |  | ///  | 
103  |  | /// Basic usage:  | 
104  |  | ///  | 
105  |  | /// ```  | 
106  |  | /// use memchr::memmem;  | 
107  |  | ///  | 
108  |  | /// let haystack = b"foo bar foo baz foo";  | 
109  |  | /// let mut it = memmem::find_iter(haystack, b"foo");  | 
110  |  | /// assert_eq!(Some(0), it.next());  | 
111  |  | /// assert_eq!(Some(8), it.next());  | 
112  |  | /// assert_eq!(Some(16), it.next());  | 
113  |  | /// assert_eq!(None, it.next());  | 
114  |  | /// ```  | 
115  |  | #[inline]  | 
116  | 0  | pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(  | 
117  | 0  |     haystack: &'h [u8],  | 
118  | 0  |     needle: &'n N,  | 
119  | 0  | ) -> FindIter<'h, 'n> { | 
120  | 0  |     FindIter::new(haystack, Finder::new(needle))  | 
121  | 0  | }  | 
122  |  |  | 
123  |  | /// Returns a reverse iterator over all non-overlapping occurrences of a  | 
124  |  | /// substring in a haystack.  | 
125  |  | ///  | 
126  |  | /// # Complexity  | 
127  |  | ///  | 
128  |  | /// This routine is guaranteed to have worst case linear time complexity  | 
129  |  | /// with respect to both the needle and the haystack. That is, this runs  | 
130  |  | /// in `O(needle.len() + haystack.len())` time.  | 
131  |  | ///  | 
132  |  | /// This routine is also guaranteed to have worst case constant space  | 
133  |  | /// complexity.  | 
134  |  | ///  | 
135  |  | /// # Examples  | 
136  |  | ///  | 
137  |  | /// Basic usage:  | 
138  |  | ///  | 
139  |  | /// ```  | 
140  |  | /// use memchr::memmem;  | 
141  |  | ///  | 
142  |  | /// let haystack = b"foo bar foo baz foo";  | 
143  |  | /// let mut it = memmem::rfind_iter(haystack, b"foo");  | 
144  |  | /// assert_eq!(Some(16), it.next());  | 
145  |  | /// assert_eq!(Some(8), it.next());  | 
146  |  | /// assert_eq!(Some(0), it.next());  | 
147  |  | /// assert_eq!(None, it.next());  | 
148  |  | /// ```  | 
149  |  | #[inline]  | 
150  | 0  | pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(  | 
151  | 0  |     haystack: &'h [u8],  | 
152  | 0  |     needle: &'n N,  | 
153  | 0  | ) -> FindRevIter<'h, 'n> { | 
154  | 0  |     FindRevIter::new(haystack, FinderRev::new(needle))  | 
155  | 0  | }  | 
156  |  |  | 
157  |  | /// Returns the index of the first occurrence of the given needle.  | 
158  |  | ///  | 
159  |  | /// Note that if you're are searching for the same needle in many different  | 
160  |  | /// small haystacks, it may be faster to initialize a [`Finder`] once,  | 
161  |  | /// and reuse it for each search.  | 
162  |  | ///  | 
163  |  | /// # Complexity  | 
164  |  | ///  | 
165  |  | /// This routine is guaranteed to have worst case linear time complexity  | 
166  |  | /// with respect to both the needle and the haystack. That is, this runs  | 
167  |  | /// in `O(needle.len() + haystack.len())` time.  | 
168  |  | ///  | 
169  |  | /// This routine is also guaranteed to have worst case constant space  | 
170  |  | /// complexity.  | 
171  |  | ///  | 
172  |  | /// # Examples  | 
173  |  | ///  | 
174  |  | /// Basic usage:  | 
175  |  | ///  | 
176  |  | /// ```  | 
177  |  | /// use memchr::memmem;  | 
178  |  | ///  | 
179  |  | /// let haystack = b"foo bar baz";  | 
180  |  | /// assert_eq!(Some(0), memmem::find(haystack, b"foo"));  | 
181  |  | /// assert_eq!(Some(4), memmem::find(haystack, b"bar"));  | 
182  |  | /// assert_eq!(None, memmem::find(haystack, b"quux"));  | 
183  |  | /// ```  | 
184  |  | #[inline]  | 
185  | 0  | pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { | 
186  | 0  |     if haystack.len() < 64 { | 
187  | 0  |         rabinkarp::Finder::new(needle).find(haystack, needle)  | 
188  |  |     } else { | 
189  | 0  |         Finder::new(needle).find(haystack)  | 
190  |  |     }  | 
191  | 0  | }  | 
192  |  |  | 
193  |  | /// Returns the index of the last occurrence of the given needle.  | 
194  |  | ///  | 
195  |  | /// Note that if you're are searching for the same needle in many different  | 
196  |  | /// small haystacks, it may be faster to initialize a [`FinderRev`] once,  | 
197  |  | /// and reuse it for each search.  | 
198  |  | ///  | 
199  |  | /// # Complexity  | 
200  |  | ///  | 
201  |  | /// This routine is guaranteed to have worst case linear time complexity  | 
202  |  | /// with respect to both the needle and the haystack. That is, this runs  | 
203  |  | /// in `O(needle.len() + haystack.len())` time.  | 
204  |  | ///  | 
205  |  | /// This routine is also guaranteed to have worst case constant space  | 
206  |  | /// complexity.  | 
207  |  | ///  | 
208  |  | /// # Examples  | 
209  |  | ///  | 
210  |  | /// Basic usage:  | 
211  |  | ///  | 
212  |  | /// ```  | 
213  |  | /// use memchr::memmem;  | 
214  |  | ///  | 
215  |  | /// let haystack = b"foo bar baz";  | 
216  |  | /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo"));  | 
217  |  | /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar"));  | 
218  |  | /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba"));  | 
219  |  | /// assert_eq!(None, memmem::rfind(haystack, b"quux"));  | 
220  |  | /// ```  | 
221  |  | #[inline]  | 
222  | 0  | pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { | 
223  | 0  |     if haystack.len() < 64 { | 
224  | 0  |         rabinkarp::FinderRev::new(needle).rfind(haystack, needle)  | 
225  |  |     } else { | 
226  | 0  |         FinderRev::new(needle).rfind(haystack)  | 
227  |  |     }  | 
228  | 0  | }  | 
229  |  |  | 
230  |  | /// An iterator over non-overlapping substring matches.  | 
231  |  | ///  | 
232  |  | /// Matches are reported by the byte offset at which they begin.  | 
233  |  | ///  | 
234  |  | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the  | 
235  |  | /// needle.  | 
236  |  | #[derive(Debug, Clone)]  | 
237  |  | pub struct FindIter<'h, 'n> { | 
238  |  |     haystack: &'h [u8],  | 
239  |  |     prestate: PrefilterState,  | 
240  |  |     finder: Finder<'n>,  | 
241  |  |     pos: usize,  | 
242  |  | }  | 
243  |  |  | 
244  |  | impl<'h, 'n> FindIter<'h, 'n> { | 
245  |  |     #[inline(always)]  | 
246  | 0  |     pub(crate) fn new(  | 
247  | 0  |         haystack: &'h [u8],  | 
248  | 0  |         finder: Finder<'n>,  | 
249  | 0  |     ) -> FindIter<'h, 'n> { | 
250  | 0  |         let prestate = PrefilterState::new();  | 
251  | 0  |         FindIter { haystack, prestate, finder, pos: 0 } | 
252  | 0  |     }  | 
253  |  |  | 
254  |  |     /// Convert this iterator into its owned variant, such that it no longer  | 
255  |  |     /// borrows the finder and needle.  | 
256  |  |     ///  | 
257  |  |     /// If this is already an owned iterator, then this is a no-op. Otherwise,  | 
258  |  |     /// this copies the needle.  | 
259  |  |     ///  | 
260  |  |     /// This is only available when the `alloc` feature is enabled.  | 
261  |  |     #[cfg(feature = "alloc")]  | 
262  |  |     #[inline]  | 
263  | 0  |     pub fn into_owned(self) -> FindIter<'h, 'static> { | 
264  | 0  |         FindIter { | 
265  | 0  |             haystack: self.haystack,  | 
266  | 0  |             prestate: self.prestate,  | 
267  | 0  |             finder: self.finder.into_owned(),  | 
268  | 0  |             pos: self.pos,  | 
269  | 0  |         }  | 
270  | 0  |     }  | 
271  |  | }  | 
272  |  |  | 
273  |  | impl<'h, 'n> Iterator for FindIter<'h, 'n> { | 
274  |  |     type Item = usize;  | 
275  |  |  | 
276  | 0  |     fn next(&mut self) -> Option<usize> { | 
277  | 0  |         let needle = self.finder.needle();  | 
278  | 0  |         let haystack = self.haystack.get(self.pos..)?;  | 
279  | 0  |         let idx =  | 
280  | 0  |             self.finder.searcher.find(&mut self.prestate, haystack, needle)?;  | 
281  |  |  | 
282  | 0  |         let pos = self.pos + idx;  | 
283  | 0  |         self.pos = pos + needle.len().max(1);  | 
284  |  |  | 
285  | 0  |         Some(pos)  | 
286  | 0  |     }  | 
287  |  |  | 
288  | 0  |     fn size_hint(&self) -> (usize, Option<usize>) { | 
289  |  |         // The largest possible number of non-overlapping matches is the  | 
290  |  |         // quotient of the haystack and the needle (or the length of the  | 
291  |  |         // haystack, if the needle is empty)  | 
292  | 0  |         match self.haystack.len().checked_sub(self.pos) { | 
293  | 0  |             None => (0, Some(0)),  | 
294  | 0  |             Some(haystack_len) => match self.finder.needle().len() { | 
295  |  |                 // Empty needles always succeed and match at every point  | 
296  |  |                 // (including the very end)  | 
297  | 0  |                 0 => (  | 
298  | 0  |                     haystack_len.saturating_add(1),  | 
299  | 0  |                     haystack_len.checked_add(1),  | 
300  | 0  |                 ),  | 
301  | 0  |                 needle_len => (0, Some(haystack_len / needle_len)),  | 
302  |  |             },  | 
303  |  |         }  | 
304  | 0  |     }  | 
305  |  | }  | 
306  |  |  | 
307  |  | /// An iterator over non-overlapping substring matches in reverse.  | 
308  |  | ///  | 
309  |  | /// Matches are reported by the byte offset at which they begin.  | 
310  |  | ///  | 
311  |  | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the  | 
312  |  | /// needle.  | 
313  |  | #[derive(Clone, Debug)]  | 
314  |  | pub struct FindRevIter<'h, 'n> { | 
315  |  |     haystack: &'h [u8],  | 
316  |  |     finder: FinderRev<'n>,  | 
317  |  |     /// When searching with an empty needle, this gets set to `None` after  | 
318  |  |     /// we've yielded the last element at `0`.  | 
319  |  |     pos: Option<usize>,  | 
320  |  | }  | 
321  |  |  | 
322  |  | impl<'h, 'n> FindRevIter<'h, 'n> { | 
323  |  |     #[inline(always)]  | 
324  | 0  |     pub(crate) fn new(  | 
325  | 0  |         haystack: &'h [u8],  | 
326  | 0  |         finder: FinderRev<'n>,  | 
327  | 0  |     ) -> FindRevIter<'h, 'n> { | 
328  | 0  |         let pos = Some(haystack.len());  | 
329  | 0  |         FindRevIter { haystack, finder, pos } | 
330  | 0  |     }  | 
331  |  |  | 
332  |  |     /// Convert this iterator into its owned variant, such that it no longer  | 
333  |  |     /// borrows the finder and needle.  | 
334  |  |     ///  | 
335  |  |     /// If this is already an owned iterator, then this is a no-op. Otherwise,  | 
336  |  |     /// this copies the needle.  | 
337  |  |     ///  | 
338  |  |     /// This is only available when the `std` feature is enabled.  | 
339  |  |     #[cfg(feature = "alloc")]  | 
340  |  |     #[inline]  | 
341  | 0  |     pub fn into_owned(self) -> FindRevIter<'h, 'static> { | 
342  | 0  |         FindRevIter { | 
343  | 0  |             haystack: self.haystack,  | 
344  | 0  |             finder: self.finder.into_owned(),  | 
345  | 0  |             pos: self.pos,  | 
346  | 0  |         }  | 
347  | 0  |     }  | 
348  |  | }  | 
349  |  |  | 
350  |  | impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { | 
351  |  |     type Item = usize;  | 
352  |  |  | 
353  | 0  |     fn next(&mut self) -> Option<usize> { | 
354  | 0  |         let pos = match self.pos { | 
355  | 0  |             None => return None,  | 
356  | 0  |             Some(pos) => pos,  | 
357  |  |         };  | 
358  | 0  |         let result = self.finder.rfind(&self.haystack[..pos]);  | 
359  | 0  |         match result { | 
360  | 0  |             None => None,  | 
361  | 0  |             Some(i) => { | 
362  | 0  |                 if pos == i { | 
363  | 0  |                     self.pos = pos.checked_sub(1);  | 
364  | 0  |                 } else { | 
365  | 0  |                     self.pos = Some(i);  | 
366  | 0  |                 }  | 
367  | 0  |                 Some(i)  | 
368  |  |             }  | 
369  |  |         }  | 
370  | 0  |     }  | 
371  |  | }  | 
372  |  |  | 
373  |  | /// A single substring searcher fixed to a particular needle.  | 
374  |  | ///  | 
375  |  | /// The purpose of this type is to permit callers to construct a substring  | 
376  |  | /// searcher that can be used to search haystacks without the overhead of  | 
377  |  | /// constructing the searcher in the first place. This is a somewhat niche  | 
378  |  | /// concern when it's necessary to re-use the same needle to search multiple  | 
379  |  | /// different haystacks with as little overhead as possible. In general, using  | 
380  |  | /// [`find`] is good enough, but `Finder` is useful when you can meaningfully  | 
381  |  | /// observe searcher construction time in a profile.  | 
382  |  | ///  | 
383  |  | /// When the `std` feature is enabled, then this type has an `into_owned`  | 
384  |  | /// version which permits building a `Finder` that is not connected to  | 
385  |  | /// the lifetime of its needle.  | 
386  |  | #[derive(Clone, Debug)]  | 
387  |  | pub struct Finder<'n> { | 
388  |  |     needle: CowBytes<'n>,  | 
389  |  |     searcher: Searcher,  | 
390  |  | }  | 
391  |  |  | 
392  |  | impl<'n> Finder<'n> { | 
393  |  |     /// Create a new finder for the given needle.  | 
394  |  |     #[inline]  | 
395  | 0  |     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { | 
396  | 0  |         FinderBuilder::new().build_forward(needle)  | 
397  | 0  |     }  | 
398  |  |  | 
399  |  |     /// Returns the index of the first occurrence of this needle in the given  | 
400  |  |     /// haystack.  | 
401  |  |     ///  | 
402  |  |     /// # Complexity  | 
403  |  |     ///  | 
404  |  |     /// This routine is guaranteed to have worst case linear time complexity  | 
405  |  |     /// with respect to both the needle and the haystack. That is, this runs  | 
406  |  |     /// in `O(needle.len() + haystack.len())` time.  | 
407  |  |     ///  | 
408  |  |     /// This routine is also guaranteed to have worst case constant space  | 
409  |  |     /// complexity.  | 
410  |  |     ///  | 
411  |  |     /// # Examples  | 
412  |  |     ///  | 
413  |  |     /// Basic usage:  | 
414  |  |     ///  | 
415  |  |     /// ```  | 
416  |  |     /// use memchr::memmem::Finder;  | 
417  |  |     ///  | 
418  |  |     /// let haystack = b"foo bar baz";  | 
419  |  |     /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); | 
420  |  |     /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); | 
421  |  |     /// assert_eq!(None, Finder::new("quux").find(haystack)); | 
422  |  |     /// ```  | 
423  |  |     #[inline]  | 
424  | 0  |     pub fn find(&self, haystack: &[u8]) -> Option<usize> { | 
425  | 0  |         let mut prestate = PrefilterState::new();  | 
426  | 0  |         let needle = self.needle.as_slice();  | 
427  | 0  |         self.searcher.find(&mut prestate, haystack, needle)  | 
428  | 0  |     }  | 
429  |  |  | 
430  |  |     /// Returns an iterator over all occurrences of a substring in a haystack.  | 
431  |  |     ///  | 
432  |  |     /// # Complexity  | 
433  |  |     ///  | 
434  |  |     /// This routine is guaranteed to have worst case linear time complexity  | 
435  |  |     /// with respect to both the needle and the haystack. That is, this runs  | 
436  |  |     /// in `O(needle.len() + haystack.len())` time.  | 
437  |  |     ///  | 
438  |  |     /// This routine is also guaranteed to have worst case constant space  | 
439  |  |     /// complexity.  | 
440  |  |     ///  | 
441  |  |     /// # Examples  | 
442  |  |     ///  | 
443  |  |     /// Basic usage:  | 
444  |  |     ///  | 
445  |  |     /// ```  | 
446  |  |     /// use memchr::memmem::Finder;  | 
447  |  |     ///  | 
448  |  |     /// let haystack = b"foo bar foo baz foo";  | 
449  |  |     /// let finder = Finder::new(b"foo");  | 
450  |  |     /// let mut it = finder.find_iter(haystack);  | 
451  |  |     /// assert_eq!(Some(0), it.next());  | 
452  |  |     /// assert_eq!(Some(8), it.next());  | 
453  |  |     /// assert_eq!(Some(16), it.next());  | 
454  |  |     /// assert_eq!(None, it.next());  | 
455  |  |     /// ```  | 
456  |  |     #[inline]  | 
457  | 0  |     pub fn find_iter<'a, 'h>(  | 
458  | 0  |         &'a self,  | 
459  | 0  |         haystack: &'h [u8],  | 
460  | 0  |     ) -> FindIter<'h, 'a> { | 
461  | 0  |         FindIter::new(haystack, self.as_ref())  | 
462  | 0  |     }  | 
463  |  |  | 
464  |  |     /// Convert this finder into its owned variant, such that it no longer  | 
465  |  |     /// borrows the needle.  | 
466  |  |     ///  | 
467  |  |     /// If this is already an owned finder, then this is a no-op. Otherwise,  | 
468  |  |     /// this copies the needle.  | 
469  |  |     ///  | 
470  |  |     /// This is only available when the `alloc` feature is enabled.  | 
471  |  |     #[cfg(feature = "alloc")]  | 
472  |  |     #[inline]  | 
473  | 0  |     pub fn into_owned(self) -> Finder<'static> { | 
474  | 0  |         Finder { | 
475  | 0  |             needle: self.needle.into_owned(),  | 
476  | 0  |             searcher: self.searcher.clone(),  | 
477  | 0  |         }  | 
478  | 0  |     }  | 
479  |  |  | 
480  |  |     /// Convert this finder into its borrowed variant.  | 
481  |  |     ///  | 
482  |  |     /// This is primarily useful if your finder is owned and you'd like to  | 
483  |  |     /// store its borrowed variant in some intermediate data structure.  | 
484  |  |     ///  | 
485  |  |     /// Note that the lifetime parameter of the returned finder is tied to the  | 
486  |  |     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the  | 
487  |  |     /// needle itself. Namely, a finder's needle can be either borrowed or  | 
488  |  |     /// owned, so the lifetime of the needle returned must necessarily be the  | 
489  |  |     /// shorter of the two.  | 
490  |  |     #[inline]  | 
491  | 0  |     pub fn as_ref(&self) -> Finder<'_> { | 
492  | 0  |         Finder { | 
493  | 0  |             needle: CowBytes::new(self.needle()),  | 
494  | 0  |             searcher: self.searcher.clone(),  | 
495  | 0  |         }  | 
496  | 0  |     }  | 
497  |  |  | 
498  |  |     /// Returns the needle that this finder searches for.  | 
499  |  |     ///  | 
500  |  |     /// Note that the lifetime of the needle returned is tied to the lifetime  | 
501  |  |     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a  | 
502  |  |     /// finder's needle can be either borrowed or owned, so the lifetime of the  | 
503  |  |     /// needle returned must necessarily be the shorter of the two.  | 
504  |  |     #[inline]  | 
505  | 0  |     pub fn needle(&self) -> &[u8] { | 
506  | 0  |         self.needle.as_slice()  | 
507  | 0  |     }  | 
508  |  | }  | 
509  |  |  | 
510  |  | /// A single substring reverse searcher fixed to a particular needle.  | 
511  |  | ///  | 
512  |  | /// The purpose of this type is to permit callers to construct a substring  | 
513  |  | /// searcher that can be used to search haystacks without the overhead of  | 
514  |  | /// constructing the searcher in the first place. This is a somewhat niche  | 
515  |  | /// concern when it's necessary to re-use the same needle to search multiple  | 
516  |  | /// different haystacks with as little overhead as possible. In general,  | 
517  |  | /// using [`rfind`] is good enough, but `FinderRev` is useful when you can  | 
518  |  | /// meaningfully observe searcher construction time in a profile.  | 
519  |  | ///  | 
520  |  | /// When the `std` feature is enabled, then this type has an `into_owned`  | 
521  |  | /// version which permits building a `FinderRev` that is not connected to  | 
522  |  | /// the lifetime of its needle.  | 
523  |  | #[derive(Clone, Debug)]  | 
524  |  | pub struct FinderRev<'n> { | 
525  |  |     needle: CowBytes<'n>,  | 
526  |  |     searcher: SearcherRev,  | 
527  |  | }  | 
528  |  |  | 
529  |  | impl<'n> FinderRev<'n> { | 
530  |  |     /// Create a new reverse finder for the given needle.  | 
531  |  |     #[inline]  | 
532  | 0  |     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { | 
533  | 0  |         FinderBuilder::new().build_reverse(needle)  | 
534  | 0  |     }  | 
535  |  |  | 
536  |  |     /// Returns the index of the last occurrence of this needle in the given  | 
537  |  |     /// haystack.  | 
538  |  |     ///  | 
539  |  |     /// The haystack may be any type that can be cheaply converted into a  | 
540  |  |     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.  | 
541  |  |     ///  | 
542  |  |     /// # Complexity  | 
543  |  |     ///  | 
544  |  |     /// This routine is guaranteed to have worst case linear time complexity  | 
545  |  |     /// with respect to both the needle and the haystack. That is, this runs  | 
546  |  |     /// in `O(needle.len() + haystack.len())` time.  | 
547  |  |     ///  | 
548  |  |     /// This routine is also guaranteed to have worst case constant space  | 
549  |  |     /// complexity.  | 
550  |  |     ///  | 
551  |  |     /// # Examples  | 
552  |  |     ///  | 
553  |  |     /// Basic usage:  | 
554  |  |     ///  | 
555  |  |     /// ```  | 
556  |  |     /// use memchr::memmem::FinderRev;  | 
557  |  |     ///  | 
558  |  |     /// let haystack = b"foo bar baz";  | 
559  |  |     /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); | 
560  |  |     /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); | 
561  |  |     /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); | 
562  |  |     /// ```  | 
563  | 0  |     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { | 
564  | 0  |         self.searcher.rfind(haystack.as_ref(), self.needle.as_slice())  | 
565  | 0  |     }  | 
566  |  |  | 
567  |  |     /// Returns a reverse iterator over all occurrences of a substring in a  | 
568  |  |     /// haystack.  | 
569  |  |     ///  | 
570  |  |     /// # Complexity  | 
571  |  |     ///  | 
572  |  |     /// This routine is guaranteed to have worst case linear time complexity  | 
573  |  |     /// with respect to both the needle and the haystack. That is, this runs  | 
574  |  |     /// in `O(needle.len() + haystack.len())` time.  | 
575  |  |     ///  | 
576  |  |     /// This routine is also guaranteed to have worst case constant space  | 
577  |  |     /// complexity.  | 
578  |  |     ///  | 
579  |  |     /// # Examples  | 
580  |  |     ///  | 
581  |  |     /// Basic usage:  | 
582  |  |     ///  | 
583  |  |     /// ```  | 
584  |  |     /// use memchr::memmem::FinderRev;  | 
585  |  |     ///  | 
586  |  |     /// let haystack = b"foo bar foo baz foo";  | 
587  |  |     /// let finder = FinderRev::new(b"foo");  | 
588  |  |     /// let mut it = finder.rfind_iter(haystack);  | 
589  |  |     /// assert_eq!(Some(16), it.next());  | 
590  |  |     /// assert_eq!(Some(8), it.next());  | 
591  |  |     /// assert_eq!(Some(0), it.next());  | 
592  |  |     /// assert_eq!(None, it.next());  | 
593  |  |     /// ```  | 
594  |  |     #[inline]  | 
595  | 0  |     pub fn rfind_iter<'a, 'h>(  | 
596  | 0  |         &'a self,  | 
597  | 0  |         haystack: &'h [u8],  | 
598  | 0  |     ) -> FindRevIter<'h, 'a> { | 
599  | 0  |         FindRevIter::new(haystack, self.as_ref())  | 
600  | 0  |     }  | 
601  |  |  | 
602  |  |     /// Convert this finder into its owned variant, such that it no longer  | 
603  |  |     /// borrows the needle.  | 
604  |  |     ///  | 
605  |  |     /// If this is already an owned finder, then this is a no-op. Otherwise,  | 
606  |  |     /// this copies the needle.  | 
607  |  |     ///  | 
608  |  |     /// This is only available when the `std` feature is enabled.  | 
609  |  |     #[cfg(feature = "alloc")]  | 
610  |  |     #[inline]  | 
611  | 0  |     pub fn into_owned(self) -> FinderRev<'static> { | 
612  | 0  |         FinderRev { | 
613  | 0  |             needle: self.needle.into_owned(),  | 
614  | 0  |             searcher: self.searcher.clone(),  | 
615  | 0  |         }  | 
616  | 0  |     }  | 
617  |  |  | 
618  |  |     /// Convert this finder into its borrowed variant.  | 
619  |  |     ///  | 
620  |  |     /// This is primarily useful if your finder is owned and you'd like to  | 
621  |  |     /// store its borrowed variant in some intermediate data structure.  | 
622  |  |     ///  | 
623  |  |     /// Note that the lifetime parameter of the returned finder is tied to the  | 
624  |  |     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the  | 
625  |  |     /// needle itself. Namely, a finder's needle can be either borrowed or  | 
626  |  |     /// owned, so the lifetime of the needle returned must necessarily be the  | 
627  |  |     /// shorter of the two.  | 
628  |  |     #[inline]  | 
629  | 0  |     pub fn as_ref(&self) -> FinderRev<'_> { | 
630  | 0  |         FinderRev { | 
631  | 0  |             needle: CowBytes::new(self.needle()),  | 
632  | 0  |             searcher: self.searcher.clone(),  | 
633  | 0  |         }  | 
634  | 0  |     }  | 
635  |  |  | 
636  |  |     /// Returns the needle that this finder searches for.  | 
637  |  |     ///  | 
638  |  |     /// Note that the lifetime of the needle returned is tied to the lifetime  | 
639  |  |     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a  | 
640  |  |     /// finder's needle can be either borrowed or owned, so the lifetime of the  | 
641  |  |     /// needle returned must necessarily be the shorter of the two.  | 
642  |  |     #[inline]  | 
643  | 0  |     pub fn needle(&self) -> &[u8] { | 
644  | 0  |         self.needle.as_slice()  | 
645  | 0  |     }  | 
646  |  | }  | 
647  |  |  | 
648  |  | /// A builder for constructing non-default forward or reverse memmem finders.  | 
649  |  | ///  | 
650  |  | /// A builder is primarily useful for configuring a substring searcher.  | 
651  |  | /// Currently, the only configuration exposed is the ability to disable  | 
652  |  | /// heuristic prefilters used to speed up certain searches.  | 
653  |  | #[derive(Clone, Debug, Default)]  | 
654  |  | pub struct FinderBuilder { | 
655  |  |     prefilter: Prefilter,  | 
656  |  | }  | 
657  |  |  | 
658  |  | impl FinderBuilder { | 
659  |  |     /// Create a new finder builder with default settings.  | 
660  | 0  |     pub fn new() -> FinderBuilder { | 
661  | 0  |         FinderBuilder::default()  | 
662  | 0  |     }  | 
663  |  |  | 
664  |  |     /// Build a forward finder using the given needle from the current  | 
665  |  |     /// settings.  | 
666  | 0  |     pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(  | 
667  | 0  |         &self,  | 
668  | 0  |         needle: &'n B,  | 
669  | 0  |     ) -> Finder<'n> { | 
670  | 0  |         self.build_forward_with_ranker(DefaultFrequencyRank, needle)  | 
671  | 0  |     }  | 
672  |  |  | 
673  |  |     /// Build a forward finder using the given needle and a custom heuristic for  | 
674  |  |     /// determining the frequency of a given byte in the dataset.  | 
675  |  |     /// See [`HeuristicFrequencyRank`] for more details.  | 
676  | 0  |     pub fn build_forward_with_ranker<  | 
677  | 0  |         'n,  | 
678  | 0  |         R: HeuristicFrequencyRank,  | 
679  | 0  |         B: ?Sized + AsRef<[u8]>,  | 
680  | 0  |     >(  | 
681  | 0  |         &self,  | 
682  | 0  |         ranker: R,  | 
683  | 0  |         needle: &'n B,  | 
684  | 0  |     ) -> Finder<'n> { | 
685  | 0  |         let needle = needle.as_ref();  | 
686  | 0  |         Finder { | 
687  | 0  |             needle: CowBytes::new(needle),  | 
688  | 0  |             searcher: Searcher::new(self.prefilter, ranker, needle),  | 
689  | 0  |         }  | 
690  | 0  |     }  | 
691  |  |  | 
692  |  |     /// Build a reverse finder using the given needle from the current  | 
693  |  |     /// settings.  | 
694  | 0  |     pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(  | 
695  | 0  |         &self,  | 
696  | 0  |         needle: &'n B,  | 
697  | 0  |     ) -> FinderRev<'n> { | 
698  | 0  |         let needle = needle.as_ref();  | 
699  | 0  |         FinderRev { | 
700  | 0  |             needle: CowBytes::new(needle),  | 
701  | 0  |             searcher: SearcherRev::new(needle),  | 
702  | 0  |         }  | 
703  | 0  |     }  | 
704  |  |  | 
705  |  |     /// Configure the prefilter setting for the finder.  | 
706  |  |     ///  | 
707  |  |     /// See the documentation for [`Prefilter`] for more discussion on why  | 
708  |  |     /// you might want to configure this.  | 
709  | 0  |     pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { | 
710  | 0  |         self.prefilter = prefilter;  | 
711  | 0  |         self  | 
712  | 0  |     }  | 
713  |  | }  | 
714  |  |  | 
715  |  | #[cfg(test)]  | 
716  |  | mod tests { | 
717  |  |     use super::*;  | 
718  |  |  | 
719  |  |     define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h)));  | 
720  |  |     define_substring_reverse_quickcheck!(|h, n| Some(  | 
721  |  |         FinderRev::new(n).rfind(h)  | 
722  |  |     ));  | 
723  |  |  | 
724  |  |     #[test]  | 
725  |  |     fn forward() { | 
726  |  |         crate::tests::substring::Runner::new()  | 
727  |  |             .fwd(|h, n| Some(Finder::new(n).find(h)))  | 
728  |  |             .run();  | 
729  |  |     }  | 
730  |  |  | 
731  |  |     #[test]  | 
732  |  |     fn reverse() { | 
733  |  |         crate::tests::substring::Runner::new()  | 
734  |  |             .rev(|h, n| Some(FinderRev::new(n).rfind(h)))  | 
735  |  |             .run();  | 
736  |  |     }  | 
737  |  | }  |