Coverage Report

Created: 2025-12-31 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-1.12.2/src/regexset/bytes.rs
Line
Count
Source
1
use alloc::string::String;
2
3
use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
4
5
use crate::{bytes::RegexSetBuilder, Error};
6
7
/// Match multiple, possibly overlapping, regexes in a single search.
8
///
9
/// A regex set corresponds to the union of zero or more regular expressions.
10
/// That is, a regex set will match a haystack when at least one of its
11
/// constituent regexes matches. A regex set as its formulated here provides a
12
/// touch more power: it will also report *which* regular expressions in the
13
/// set match. Indeed, this is the key difference between regex sets and a
14
/// single `Regex` with many alternates, since only one alternate can match at
15
/// a time.
16
///
17
/// For example, consider regular expressions to match email addresses and
18
/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
19
/// regex set is constructed from those regexes, then searching the haystack
20
/// `foo@example.com` will report both regexes as matching. Of course, one
21
/// could accomplish this by compiling each regex on its own and doing two
22
/// searches over the haystack. The key advantage of using a regex set is
23
/// that it will report the matching regexes using a *single pass through the
24
/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
25
/// (like a URL router for a complex web application or a user agent matcher),
26
/// then a regex set *can* realize huge performance gains.
27
///
28
/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet`
29
/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this
30
/// `RegexSet` is permitted to match invalid UTF-8.
31
///
32
/// # Limitations
33
///
34
/// Regex sets are limited to answering the following two questions:
35
///
36
/// 1. Does any regex in the set match?
37
/// 2. If so, which regexes in the set match?
38
///
39
/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask
40
/// (1) instead of (2) since the matching engines can stop after the first
41
/// match is found.
42
///
43
/// You cannot directly extract [`Match`][crate::bytes::Match] or
44
/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need
45
/// these operations, the recommended approach is to compile each pattern in
46
/// the set independently and scan the exact same haystack a second time with
47
/// those independently compiled patterns:
48
///
49
/// ```
50
/// use regex::bytes::{Regex, RegexSet};
51
///
52
/// let patterns = ["foo", "bar"];
53
/// // Both patterns will match different ranges of this string.
54
/// let hay = b"barfoo";
55
///
56
/// // Compile a set matching any of our patterns.
57
/// let set = RegexSet::new(patterns).unwrap();
58
/// // Compile each pattern independently.
59
/// let regexes: Vec<_> = set
60
///     .patterns()
61
///     .iter()
62
///     .map(|pat| Regex::new(pat).unwrap())
63
///     .collect();
64
///
65
/// // Match against the whole set first and identify the individual
66
/// // matching patterns.
67
/// let matches: Vec<&[u8]> = set
68
///     .matches(hay)
69
///     .into_iter()
70
///     // Dereference the match index to get the corresponding
71
///     // compiled pattern.
72
///     .map(|index| &regexes[index])
73
///     // To get match locations or any other info, we then have to search the
74
///     // exact same haystack again, using our separately-compiled pattern.
75
///     .map(|re| re.find(hay).unwrap().as_bytes())
76
///     .collect();
77
///
78
/// // Matches arrive in the order the constituent patterns were declared,
79
/// // not the order they appear in the haystack.
80
/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches);
81
/// ```
82
///
83
/// # Performance
84
///
85
/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
86
/// search takes `O(m * n)` time, where `m` is proportional to the size of the
87
/// regex set and `n` is proportional to the length of the haystack.
88
///
89
/// # Trait implementations
90
///
91
/// The `Default` trait is implemented for `RegexSet`. The default value
92
/// is an empty set. An empty set can also be explicitly constructed via
93
/// [`RegexSet::empty`].
94
///
95
/// # Example
96
///
97
/// This shows how the above two regexes (for matching email addresses and
98
/// domains) might work:
99
///
100
/// ```
101
/// use regex::bytes::RegexSet;
102
///
103
/// let set = RegexSet::new(&[
104
///     r"[a-z]+@[a-z]+\.(com|org|net)",
105
///     r"[a-z]+\.(com|org|net)",
106
/// ]).unwrap();
107
///
108
/// // Ask whether any regexes in the set match.
109
/// assert!(set.is_match(b"foo@example.com"));
110
///
111
/// // Identify which regexes in the set match.
112
/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
113
/// assert_eq!(vec![0, 1], matches);
114
///
115
/// // Try again, but with a haystack that only matches one of the regexes.
116
/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
117
/// assert_eq!(vec![1], matches);
118
///
119
/// // Try again, but with a haystack that doesn't match any regex in the set.
120
/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
121
/// assert!(matches.is_empty());
122
/// ```
123
///
124
/// Note that it would be possible to adapt the above example to using `Regex`
125
/// with an expression like:
126
///
127
/// ```text
128
/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
129
/// ```
130
///
131
/// After a match, one could then inspect the capture groups to figure out
132
/// which alternates matched. The problem is that it is hard to make this
133
/// approach scale when there are many regexes since the overlap between each
134
/// alternate isn't always obvious to reason about.
135
#[derive(Clone)]
136
pub struct RegexSet {
137
    pub(crate) meta: meta::Regex,
138
    pub(crate) patterns: alloc::sync::Arc<[String]>,
139
}
140
141
impl RegexSet {
142
    /// Create a new regex set with the given regular expressions.
143
    ///
144
    /// This takes an iterator of `S`, where `S` is something that can produce
145
    /// a `&str`. If any of the strings in the iterator are not valid regular
146
    /// expressions, then an error is returned.
147
    ///
148
    /// # Example
149
    ///
150
    /// Create a new regex set from an iterator of strings:
151
    ///
152
    /// ```
153
    /// use regex::bytes::RegexSet;
154
    ///
155
    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
156
    /// assert!(set.is_match(b"foo"));
157
    /// ```
158
0
    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
159
0
    where
160
0
        S: AsRef<str>,
161
0
        I: IntoIterator<Item = S>,
162
    {
163
0
        RegexSetBuilder::new(exprs).build()
164
0
    }
165
166
    /// Create a new empty regex set.
167
    ///
168
    /// An empty regex never matches anything.
169
    ///
170
    /// This is a convenience function for `RegexSet::new([])`, but doesn't
171
    /// require one to specify the type of the input.
172
    ///
173
    /// # Example
174
    ///
175
    /// ```
176
    /// use regex::bytes::RegexSet;
177
    ///
178
    /// let set = RegexSet::empty();
179
    /// assert!(set.is_empty());
180
    /// // an empty set matches nothing
181
    /// assert!(!set.is_match(b""));
182
    /// ```
183
0
    pub fn empty() -> RegexSet {
184
0
        let empty: [&str; 0] = [];
185
0
        RegexSetBuilder::new(empty).build().unwrap()
186
0
    }
187
188
    /// Returns true if and only if one of the regexes in this set matches
189
    /// the haystack given.
190
    ///
191
    /// This method should be preferred if you only need to test whether any
192
    /// of the regexes in the set should match, but don't care about *which*
193
    /// regexes matched. This is because the underlying matching engine will
194
    /// quit immediately after seeing the first match instead of continuing to
195
    /// find all matches.
196
    ///
197
    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
198
    /// expression is unanchored by default. That is, if the regex does not
199
    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
200
    /// to match anywhere in the haystack.
201
    ///
202
    /// # Example
203
    ///
204
    /// Tests whether a set matches somewhere in a haystack:
205
    ///
206
    /// ```
207
    /// use regex::bytes::RegexSet;
208
    ///
209
    /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
210
    /// assert!(set.is_match(b"foo"));
211
    /// assert!(!set.is_match("☃".as_bytes()));
212
    /// ```
213
    #[inline]
214
0
    pub fn is_match(&self, haystack: &[u8]) -> bool {
215
0
        self.is_match_at(haystack, 0)
216
0
    }
217
218
    /// Returns true if and only if one of the regexes in this set matches the
219
    /// haystack given, with the search starting at the offset given.
220
    ///
221
    /// The significance of the starting point is that it takes the surrounding
222
    /// context into consideration. For example, the `\A` anchor can only
223
    /// match when `start == 0`.
224
    ///
225
    /// # Panics
226
    ///
227
    /// This panics when `start >= haystack.len() + 1`.
228
    ///
229
    /// # Example
230
    ///
231
    /// This example shows the significance of `start`. Namely, consider a
232
    /// haystack `foobar` and a desire to execute a search starting at offset
233
    /// `3`. You could search a substring explicitly, but then the look-around
234
    /// assertions won't work correctly. Instead, you can use this method to
235
    /// specify the start position of a search.
236
    ///
237
    /// ```
238
    /// use regex::bytes::RegexSet;
239
    ///
240
    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
241
    /// let hay = b"foobar";
242
    /// // We get a match here, but it's probably not intended.
243
    /// assert!(set.is_match(&hay[3..]));
244
    /// // No match because the  assertions take the context into account.
245
    /// assert!(!set.is_match_at(hay, 3));
246
    /// ```
247
    #[inline]
248
0
    pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
249
0
        self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
250
0
    }
251
252
    /// Returns the set of regexes that match in the given haystack.
253
    ///
254
    /// The set returned contains the index of each regex that matches in
255
    /// the given haystack. The index is in correspondence with the order of
256
    /// regular expressions given to `RegexSet`'s constructor.
257
    ///
258
    /// The set can also be used to iterate over the matched indices. The order
259
    /// of iteration is always ascending with respect to the matching indices.
260
    ///
261
    /// Note that as with searches using [`Regex`](crate::bytes::Regex), the
262
    /// expression is unanchored by default. That is, if the regex does not
263
    /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
264
    /// to match anywhere in the haystack.
265
    ///
266
    /// # Example
267
    ///
268
    /// Tests which regular expressions match the given haystack:
269
    ///
270
    /// ```
271
    /// use regex::bytes::RegexSet;
272
    ///
273
    /// let set = RegexSet::new([
274
    ///     r"\w+",
275
    ///     r"\d+",
276
    ///     r"\pL+",
277
    ///     r"foo",
278
    ///     r"bar",
279
    ///     r"barfoo",
280
    ///     r"foobar",
281
    /// ]).unwrap();
282
    /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect();
283
    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
284
    ///
285
    /// // You can also test whether a particular regex matched:
286
    /// let matches = set.matches(b"foobar");
287
    /// assert!(!matches.matched(5));
288
    /// assert!(matches.matched(6));
289
    /// ```
290
    #[inline]
291
0
    pub fn matches(&self, haystack: &[u8]) -> SetMatches {
292
0
        self.matches_at(haystack, 0)
293
0
    }
294
295
    /// Returns the set of regexes that match in the given haystack.
296
    ///
297
    /// The set returned contains the index of each regex that matches in
298
    /// the given haystack. The index is in correspondence with the order of
299
    /// regular expressions given to `RegexSet`'s constructor.
300
    ///
301
    /// The set can also be used to iterate over the matched indices. The order
302
    /// of iteration is always ascending with respect to the matching indices.
303
    ///
304
    /// The significance of the starting point is that it takes the surrounding
305
    /// context into consideration. For example, the `\A` anchor can only
306
    /// match when `start == 0`.
307
    ///
308
    /// # Panics
309
    ///
310
    /// This panics when `start >= haystack.len() + 1`.
311
    ///
312
    /// # Example
313
    ///
314
    /// Tests which regular expressions match the given haystack:
315
    ///
316
    /// ```
317
    /// use regex::bytes::RegexSet;
318
    ///
319
    /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
320
    /// let hay = b"foobar";
321
    /// // We get matches here, but it's probably not intended.
322
    /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
323
    /// assert_eq!(matches, vec![0, 1]);
324
    /// // No matches because the  assertions take the context into account.
325
    /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
326
    /// assert_eq!(matches, vec![]);
327
    /// ```
328
    #[inline]
329
0
    pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches {
330
0
        let input = Input::new(haystack).span(start..haystack.len());
331
0
        let mut patset = PatternSet::new(self.meta.pattern_len());
332
0
        self.meta.which_overlapping_matches(&input, &mut patset);
333
0
        SetMatches(patset)
334
0
    }
335
336
    /// Returns the same as matches, but starts the search at the given
337
    /// offset and stores the matches into the slice given.
338
    ///
339
    /// The significance of the starting point is that it takes the surrounding
340
    /// context into consideration. For example, the `\A` anchor can only
341
    /// match when `start == 0`.
342
    ///
343
    /// `matches` must have a length that is at least the number of regexes
344
    /// in this set.
345
    ///
346
    /// This method returns true if and only if at least one member of
347
    /// `matches` is true after executing the set against `haystack`.
348
    #[doc(hidden)]
349
    #[inline]
350
0
    pub fn matches_read_at(
351
0
        &self,
352
0
        matches: &mut [bool],
353
0
        haystack: &[u8],
354
0
        start: usize,
355
0
    ) -> bool {
356
        // This is pretty dumb. We should try to fix this, but the
357
        // regex-automata API doesn't provide a way to store matches in an
358
        // arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and
359
        // thus not public... But regex-capi currently uses it. We should
360
        // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
361
        // is in regex-automata, not regex. So maybe we should just accept a
362
        // 'SetMatches', which is basically just a newtype around PatternSet.
363
0
        let mut patset = PatternSet::new(self.meta.pattern_len());
364
0
        let mut input = Input::new(haystack);
365
0
        input.set_start(start);
366
0
        self.meta.which_overlapping_matches(&input, &mut patset);
367
0
        for pid in patset.iter() {
368
0
            matches[pid] = true;
369
0
        }
370
0
        !patset.is_empty()
371
0
    }
372
373
    /// An alias for `matches_read_at` to preserve backward compatibility.
374
    ///
375
    /// The `regex-capi` crate used this method, so to avoid breaking that
376
    /// crate, we continue to export it as an undocumented API.
377
    #[doc(hidden)]
378
    #[inline]
379
0
    pub fn read_matches_at(
380
0
        &self,
381
0
        matches: &mut [bool],
382
0
        haystack: &[u8],
383
0
        start: usize,
384
0
    ) -> bool {
385
0
        self.matches_read_at(matches, haystack, start)
386
0
    }
387
388
    /// Returns the total number of regexes in this set.
389
    ///
390
    /// # Example
391
    ///
392
    /// ```
393
    /// use regex::bytes::RegexSet;
394
    ///
395
    /// assert_eq!(0, RegexSet::empty().len());
396
    /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
397
    /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
398
    /// ```
399
    #[inline]
400
0
    pub fn len(&self) -> usize {
401
0
        self.meta.pattern_len()
402
0
    }
403
404
    /// Returns `true` if this set contains no regexes.
405
    ///
406
    /// # Example
407
    ///
408
    /// ```
409
    /// use regex::bytes::RegexSet;
410
    ///
411
    /// assert!(RegexSet::empty().is_empty());
412
    /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
413
    /// ```
414
    #[inline]
415
0
    pub fn is_empty(&self) -> bool {
416
0
        self.meta.pattern_len() == 0
417
0
    }
418
419
    /// Returns the regex patterns that this regex set was constructed from.
420
    ///
421
    /// This function can be used to determine the pattern for a match. The
422
    /// slice returned has exactly as many patterns givens to this regex set,
423
    /// and the order of the slice is the same as the order of the patterns
424
    /// provided to the set.
425
    ///
426
    /// # Example
427
    ///
428
    /// ```
429
    /// use regex::bytes::RegexSet;
430
    ///
431
    /// let set = RegexSet::new(&[
432
    ///     r"\w+",
433
    ///     r"\d+",
434
    ///     r"\pL+",
435
    ///     r"foo",
436
    ///     r"bar",
437
    ///     r"barfoo",
438
    ///     r"foobar",
439
    /// ]).unwrap();
440
    /// let matches: Vec<_> = set
441
    ///     .matches(b"foobar")
442
    ///     .into_iter()
443
    ///     .map(|index| &set.patterns()[index])
444
    ///     .collect();
445
    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
446
    /// ```
447
    #[inline]
448
0
    pub fn patterns(&self) -> &[String] {
449
0
        &self.patterns
450
0
    }
451
}
452
453
impl Default for RegexSet {
454
0
    fn default() -> Self {
455
0
        RegexSet::empty()
456
0
    }
457
}
458
459
/// A set of matches returned by a regex set.
460
///
461
/// Values of this type are constructed by [`RegexSet::matches`].
462
#[derive(Clone, Debug)]
463
pub struct SetMatches(PatternSet);
464
465
impl SetMatches {
466
    /// Whether this set contains any matches.
467
    ///
468
    /// # Example
469
    ///
470
    /// ```
471
    /// use regex::bytes::RegexSet;
472
    ///
473
    /// let set = RegexSet::new(&[
474
    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
475
    ///     r"[a-z]+\.(com|org|net)",
476
    /// ]).unwrap();
477
    /// let matches = set.matches(b"foo@example.com");
478
    /// assert!(matches.matched_any());
479
    /// ```
480
    #[inline]
481
0
    pub fn matched_any(&self) -> bool {
482
0
        !self.0.is_empty()
483
0
    }
484
485
    /// Whether all patterns in this set matched.
486
    ///
487
    /// # Example
488
    ///
489
    /// ```
490
    /// use regex::bytes::RegexSet;
491
    ///
492
    /// let set = RegexSet::new(&[
493
    ///     r"^foo",
494
    ///     r"[a-z]+\.com",
495
    /// ]).unwrap();
496
    /// let matches = set.matches(b"foo.example.com");
497
    /// assert!(matches.matched_all());
498
    /// ```
499
0
    pub fn matched_all(&self) -> bool {
500
0
        self.0.is_full()
501
0
    }
502
503
    /// Whether the regex at the given index matched.
504
    ///
505
    /// The index for a regex is determined by its insertion order upon the
506
    /// initial construction of a `RegexSet`, starting at `0`.
507
    ///
508
    /// # Panics
509
    ///
510
    /// If `index` is greater than or equal to the number of regexes in the
511
    /// original set that produced these matches. Equivalently, when `index`
512
    /// is greater than or equal to [`SetMatches::len`].
513
    ///
514
    /// # Example
515
    ///
516
    /// ```
517
    /// use regex::bytes::RegexSet;
518
    ///
519
    /// let set = RegexSet::new([
520
    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
521
    ///     r"[a-z]+\.(com|org|net)",
522
    /// ]).unwrap();
523
    /// let matches = set.matches(b"example.com");
524
    /// assert!(!matches.matched(0));
525
    /// assert!(matches.matched(1));
526
    /// ```
527
    #[inline]
528
0
    pub fn matched(&self, index: usize) -> bool {
529
0
        self.0.contains(PatternID::new_unchecked(index))
530
0
    }
531
532
    /// The total number of regexes in the set that created these matches.
533
    ///
534
    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
535
    /// In particular, it does *not* return the number of elements yielded by
536
    /// [`SetMatches::iter`]. The only way to determine the total number of
537
    /// matched regexes is to iterate over them.
538
    ///
539
    /// # Example
540
    ///
541
    /// Notice that this method returns the total number of regexes in the
542
    /// original set, and *not* the total number of regexes that matched.
543
    ///
544
    /// ```
545
    /// use regex::bytes::RegexSet;
546
    ///
547
    /// let set = RegexSet::new([
548
    ///     r"[a-z]+@[a-z]+\.(com|org|net)",
549
    ///     r"[a-z]+\.(com|org|net)",
550
    /// ]).unwrap();
551
    /// let matches = set.matches(b"example.com");
552
    /// // Total number of patterns that matched.
553
    /// assert_eq!(1, matches.iter().count());
554
    /// // Total number of patterns in the set.
555
    /// assert_eq!(2, matches.len());
556
    /// ```
557
    #[inline]
558
0
    pub fn len(&self) -> usize {
559
0
        self.0.capacity()
560
0
    }
561
562
    /// Returns an iterator over the indices of the regexes that matched.
563
    ///
564
    /// This will always produces matches in ascending order, where the index
565
    /// yielded corresponds to the index of the regex that matched with respect
566
    /// to its position when initially building the set.
567
    ///
568
    /// # Example
569
    ///
570
    /// ```
571
    /// use regex::bytes::RegexSet;
572
    ///
573
    /// let set = RegexSet::new([
574
    ///     r"[0-9]",
575
    ///     r"[a-z]",
576
    ///     r"[A-Z]",
577
    ///     r"\p{Greek}",
578
    /// ]).unwrap();
579
    /// let hay = "βa1".as_bytes();
580
    /// let matches: Vec<_> = set.matches(hay).iter().collect();
581
    /// assert_eq!(matches, vec![0, 1, 3]);
582
    /// ```
583
    ///
584
    /// Note that `SetMatches` also implements the `IntoIterator` trait, so
585
    /// this method is not always needed. For example:
586
    ///
587
    /// ```
588
    /// use regex::bytes::RegexSet;
589
    ///
590
    /// let set = RegexSet::new([
591
    ///     r"[0-9]",
592
    ///     r"[a-z]",
593
    ///     r"[A-Z]",
594
    ///     r"\p{Greek}",
595
    /// ]).unwrap();
596
    /// let hay = "βa1".as_bytes();
597
    /// let mut matches = vec![];
598
    /// for index in set.matches(hay) {
599
    ///     matches.push(index);
600
    /// }
601
    /// assert_eq!(matches, vec![0, 1, 3]);
602
    /// ```
603
    #[inline]
604
0
    pub fn iter(&self) -> SetMatchesIter<'_> {
605
0
        SetMatchesIter(self.0.iter())
606
0
    }
607
}
608
609
impl IntoIterator for SetMatches {
610
    type IntoIter = SetMatchesIntoIter;
611
    type Item = usize;
612
613
0
    fn into_iter(self) -> Self::IntoIter {
614
0
        let it = 0..self.0.capacity();
615
0
        SetMatchesIntoIter { patset: self.0, it }
616
0
    }
617
}
618
619
impl<'a> IntoIterator for &'a SetMatches {
620
    type IntoIter = SetMatchesIter<'a>;
621
    type Item = usize;
622
623
0
    fn into_iter(self) -> Self::IntoIter {
624
0
        self.iter()
625
0
    }
626
}
627
628
/// An owned iterator over the set of matches from a regex set.
629
///
630
/// This will always produces matches in ascending order of index, where the
631
/// index corresponds to the index of the regex that matched with respect to
632
/// its position when initially building the set.
633
///
634
/// This iterator is created by calling `SetMatches::into_iter` via the
635
/// `IntoIterator` trait. This is automatically done in `for` loops.
636
///
637
/// # Example
638
///
639
/// ```
640
/// use regex::bytes::RegexSet;
641
///
642
/// let set = RegexSet::new([
643
///     r"[0-9]",
644
///     r"[a-z]",
645
///     r"[A-Z]",
646
///     r"\p{Greek}",
647
/// ]).unwrap();
648
/// let hay = "βa1".as_bytes();
649
/// let mut matches = vec![];
650
/// for index in set.matches(hay) {
651
///     matches.push(index);
652
/// }
653
/// assert_eq!(matches, vec![0, 1, 3]);
654
/// ```
655
#[derive(Debug)]
656
pub struct SetMatchesIntoIter {
657
    patset: PatternSet,
658
    it: core::ops::Range<usize>,
659
}
660
661
impl Iterator for SetMatchesIntoIter {
662
    type Item = usize;
663
664
0
    fn next(&mut self) -> Option<usize> {
665
        loop {
666
0
            let id = self.it.next()?;
667
0
            if self.patset.contains(PatternID::new_unchecked(id)) {
668
0
                return Some(id);
669
0
            }
670
        }
671
0
    }
672
673
0
    fn size_hint(&self) -> (usize, Option<usize>) {
674
0
        self.it.size_hint()
675
0
    }
676
}
677
678
impl DoubleEndedIterator for SetMatchesIntoIter {
679
0
    fn next_back(&mut self) -> Option<usize> {
680
        loop {
681
0
            let id = self.it.next_back()?;
682
0
            if self.patset.contains(PatternID::new_unchecked(id)) {
683
0
                return Some(id);
684
0
            }
685
        }
686
0
    }
687
}
688
689
impl core::iter::FusedIterator for SetMatchesIntoIter {}
690
691
/// A borrowed iterator over the set of matches from a regex set.
692
///
693
/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
694
/// created this iterator.
695
///
696
/// This will always produces matches in ascending order, where the index
697
/// corresponds to the index of the regex that matched with respect to its
698
/// position when initially building the set.
699
///
700
/// This iterator is created by the [`SetMatches::iter`] method.
701
#[derive(Clone, Debug)]
702
pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
703
704
impl<'a> Iterator for SetMatchesIter<'a> {
705
    type Item = usize;
706
707
0
    fn next(&mut self) -> Option<usize> {
708
0
        self.0.next().map(|pid| pid.as_usize())
709
0
    }
710
711
0
    fn size_hint(&self) -> (usize, Option<usize>) {
712
0
        self.0.size_hint()
713
0
    }
714
}
715
716
impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
717
0
    fn next_back(&mut self) -> Option<usize> {
718
0
        self.0.next_back().map(|pid| pid.as_usize())
719
0
    }
720
}
721
722
impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
723
724
impl core::fmt::Debug for RegexSet {
725
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
726
0
        write!(f, "RegexSet({:?})", self.patterns())
727
0
    }
728
}