Coverage Report

Created: 2026-02-14 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/regex/regex-automata/src/meta/stopat.rs
Line
Count
Source
1
/*!
2
This module defines two bespoke forward DFA search routines. One for the lazy
3
DFA and one for the fully compiled DFA. These routines differ from the normal
4
ones by reporting the position at which the search terminates when a match
5
*isn't* found.
6
7
This position at which a search terminates is useful in contexts where the meta
8
regex engine runs optimizations that could go quadratic if we aren't careful.
9
Namely, a regex search *could* scan to the end of the haystack only to report a
10
non-match. If the caller doesn't know that the search scanned to the end of the
11
haystack, it might restart the search at the next literal candidate it finds
12
and repeat the process.
13
14
Providing the caller with the position at which the search stopped provides a
15
way for the caller to determine the point at which subsequent scans should not
16
pass. This is principally used in the "reverse inner" optimization, which works
17
like this:
18
19
1. Look for a match of an inner literal. Say, 'Z' in '\w+Z\d+'.
20
2. At the spot where 'Z' matches, do a reverse anchored search from there for
21
'\w+'.
22
3. If the reverse search matches, it corresponds to the start position of a
23
(possible) match. At this point, do a forward anchored search to find the end
24
position. If an end position is found, then we have a match and we know its
25
bounds.
26
27
If the forward anchored search in (3) searches the entire rest of the haystack
28
but reports a non-match, then a naive implementation of the above will continue
29
back at step 1 looking for more candidates. There might still be a match to be
30
found! It's possible. But we already scanned the whole haystack. So if we keep
31
repeating the process, then we might wind up taking quadratic time in the size
32
of the haystack, which is not great.
33
34
So if the forward anchored search in (3) reports the position at which it
35
stops, then we can detect whether quadratic behavior might be occurring in
36
steps (1) and (2). For (1), it occurs if the literal candidate found occurs
37
*before* the end of the previous search in (3), since that means we're now
38
going to look for another match in a place where the forward search has already
39
scanned. It is *correct* to do so, but our technique has become inefficient.
40
For (2), quadratic behavior occurs similarly when its reverse search extends
41
past the point where the previous forward search in (3) terminated. Indeed, to
42
implement (2), we use the sibling 'limited' module for ensuring our reverse
43
scan doesn't go further than we want.
44
45
See the 'opt/reverse-inner' benchmarks in rebar for a real demonstration of
46
how quadratic behavior is mitigated.
47
*/
48
49
use crate::{meta::error::RetryFailError, HalfMatch, Input, MatchError};
50
51
#[cfg(feature = "dfa-build")]
52
8.94k
pub(crate) fn dfa_try_search_half_fwd(
53
8.94k
    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
54
8.94k
    input: &Input<'_>,
55
8.94k
) -> Result<Result<HalfMatch, usize>, RetryFailError> {
56
    use crate::dfa::{accel, Automaton};
57
58
8.94k
    let mut mat = None;
59
8.94k
    let mut sid = dfa.start_state_forward(input)?;
60
8.93k
    let mut at = input.start();
61
49.9k
    while at < input.end() {
62
49.5k
        sid = dfa.next_state(sid, input.haystack()[at]);
63
49.5k
        if dfa.is_special_state(sid) {
64
21.7k
            if dfa.is_match_state(sid) {
65
9.86k
                let pattern = dfa.match_pattern(sid, 0);
66
9.86k
                mat = Some(HalfMatch::new(pattern, at));
67
9.86k
                if input.get_earliest() {
68
131
                    return Ok(mat.ok_or(at));
69
9.73k
                }
70
9.73k
                if dfa.is_accel_state(sid) {
71
736
                    let needs = dfa.accelerator(sid);
72
736
                    at = accel::find_fwd(needs, input.haystack(), at)
73
736
                        .unwrap_or(input.end());
74
736
                    continue;
75
9.00k
                }
76
11.8k
            } else if dfa.is_accel_state(sid) {
77
2.96k
                let needs = dfa.accelerator(sid);
78
2.96k
                at = accel::find_fwd(needs, input.haystack(), at)
79
2.96k
                    .unwrap_or(input.end());
80
2.96k
                continue;
81
8.87k
            } else if dfa.is_dead_state(sid) {
82
8.38k
                return Ok(mat.ok_or(at));
83
487
            } else if dfa.is_quit_state(sid) {
84
39
                return Err(MatchError::quit(input.haystack()[at], at).into());
85
            } else {
86
                // Ideally we wouldn't use a DFA that specialized start states
87
                // and thus 'is_start_state()' could never be true here, but in
88
                // practice we reuse the DFA created for the full regex which
89
                // will specialize start states whenever there is a prefilter.
90
448
                debug_assert!(dfa.is_start_state(sid));
91
            }
92
27.8k
        }
93
37.3k
        at += 1;
94
    }
95
376
    dfa_eoi_fwd(dfa, input, &mut sid, &mut mat)?;
96
376
    Ok(mat.ok_or(at))
97
8.94k
}
98
99
#[cfg(feature = "hybrid")]
100
28.8k
pub(crate) fn hybrid_try_search_half_fwd(
101
28.8k
    dfa: &crate::hybrid::dfa::DFA,
102
28.8k
    cache: &mut crate::hybrid::dfa::Cache,
103
28.8k
    input: &Input<'_>,
104
28.8k
) -> Result<Result<HalfMatch, usize>, RetryFailError> {
105
28.8k
    let mut mat = None;
106
28.8k
    let mut sid = dfa.start_state_forward(cache, input)?;
107
28.8k
    let mut at = input.start();
108
403k
    while at < input.end() {
109
403k
        sid = dfa
110
403k
            .next_state(cache, sid, input.haystack()[at])
111
403k
            .map_err(|_| MatchError::gave_up(at))?;
112
403k
        if sid.is_tagged() {
113
39.8k
            if sid.is_match() {
114
10.7k
                let pattern = dfa.match_pattern(cache, sid, 0);
115
10.7k
                mat = Some(HalfMatch::new(pattern, at));
116
10.7k
                if input.get_earliest() {
117
62
                    return Ok(mat.ok_or(at));
118
10.6k
                }
119
29.1k
            } else if sid.is_dead() {
120
28.4k
                return Ok(mat.ok_or(at));
121
727
            } else if sid.is_quit() {
122
67
                return Err(MatchError::quit(input.haystack()[at], at).into());
123
            } else {
124
                // We should NEVER get an unknown state ID back from
125
                // dfa.next_state().
126
660
                debug_assert!(!sid.is_unknown());
127
                // Ideally we wouldn't use a lazy DFA that specialized start
128
                // states and thus 'sid.is_start()' could never be true here,
129
                // but in practice we reuse the lazy DFA created for the full
130
                // regex which will specialize start states whenever there is
131
                // a prefilter.
132
660
                debug_assert!(sid.is_start());
133
            }
134
363k
        }
135
374k
        at += 1;
136
    }
137
324
    hybrid_eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
138
324
    Ok(mat.ok_or(at))
139
28.8k
}
140
141
#[cfg(feature = "dfa-build")]
142
#[cfg_attr(feature = "perf-inline", inline(always))]
143
376
fn dfa_eoi_fwd(
144
376
    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
145
376
    input: &Input<'_>,
146
376
    sid: &mut crate::util::primitives::StateID,
147
376
    mat: &mut Option<HalfMatch>,
148
376
) -> Result<(), MatchError> {
149
    use crate::dfa::Automaton;
150
151
376
    let sp = input.get_span();
152
376
    match input.haystack().get(sp.end) {
153
0
        Some(&b) => {
154
0
            *sid = dfa.next_state(*sid, b);
155
0
            if dfa.is_match_state(*sid) {
156
0
                let pattern = dfa.match_pattern(*sid, 0);
157
0
                *mat = Some(HalfMatch::new(pattern, sp.end));
158
0
            } else if dfa.is_quit_state(*sid) {
159
0
                return Err(MatchError::quit(b, sp.end));
160
0
            }
161
        }
162
        None => {
163
376
            *sid = dfa.next_eoi_state(*sid);
164
376
            if dfa.is_match_state(*sid) {
165
149
                let pattern = dfa.match_pattern(*sid, 0);
166
149
                *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
167
227
            }
168
            // N.B. We don't have to check 'is_quit' here because the EOI
169
            // transition can never lead to a quit state.
170
376
            debug_assert!(!dfa.is_quit_state(*sid));
171
        }
172
    }
173
376
    Ok(())
174
376
}
175
176
#[cfg(feature = "hybrid")]
177
#[cfg_attr(feature = "perf-inline", inline(always))]
178
324
fn hybrid_eoi_fwd(
179
324
    dfa: &crate::hybrid::dfa::DFA,
180
324
    cache: &mut crate::hybrid::dfa::Cache,
181
324
    input: &Input<'_>,
182
324
    sid: &mut crate::hybrid::LazyStateID,
183
324
    mat: &mut Option<HalfMatch>,
184
324
) -> Result<(), MatchError> {
185
324
    let sp = input.get_span();
186
324
    match input.haystack().get(sp.end) {
187
0
        Some(&b) => {
188
0
            *sid = dfa
189
0
                .next_state(cache, *sid, b)
190
0
                .map_err(|_| MatchError::gave_up(sp.end))?;
191
0
            if sid.is_match() {
192
0
                let pattern = dfa.match_pattern(cache, *sid, 0);
193
0
                *mat = Some(HalfMatch::new(pattern, sp.end));
194
0
            } else if sid.is_quit() {
195
0
                return Err(MatchError::quit(b, sp.end));
196
0
            }
197
        }
198
        None => {
199
324
            *sid = dfa
200
324
                .next_eoi_state(cache, *sid)
201
324
                .map_err(|_| MatchError::gave_up(input.haystack().len()))?;
202
324
            if sid.is_match() {
203
80
                let pattern = dfa.match_pattern(cache, *sid, 0);
204
80
                *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
205
244
            }
206
            // N.B. We don't have to check 'is_quit' here because the EOI
207
            // transition can never lead to a quit state.
208
324
            debug_assert!(!sid.is_quit());
209
        }
210
    }
211
324
    Ok(())
212
324
}