/src/regex/regex-automata/src/dfa/search.rs
Line | Count | Source |
1 | | use crate::{ |
2 | | dfa::{ |
3 | | accel, |
4 | | automaton::{Automaton, OverlappingState}, |
5 | | }, |
6 | | util::{ |
7 | | prefilter::Prefilter, |
8 | | primitives::StateID, |
9 | | search::{Anchored, HalfMatch, Input, Span}, |
10 | | }, |
11 | | MatchError, |
12 | | }; |
13 | | |
14 | | #[inline(never)] |
15 | 105k | pub fn find_fwd<A: Automaton + ?Sized>( |
16 | 105k | dfa: &A, |
17 | 105k | input: &Input<'_>, |
18 | 105k | ) -> Result<Option<HalfMatch>, MatchError> { |
19 | 105k | if input.is_done() { |
20 | 0 | return Ok(None); |
21 | 105k | } |
22 | 105k | let pre = if input.get_anchored().is_anchored() { |
23 | 207 | None |
24 | | } else { |
25 | 104k | dfa.get_prefilter() |
26 | | }; |
27 | | // Searching with a pattern ID is always anchored, so we should never use |
28 | | // a prefilter. |
29 | 105k | if pre.is_some() { |
30 | 26.5k | if input.get_earliest() { |
31 | 10.9k | find_fwd_imp(dfa, input, pre, true) |
32 | | } else { |
33 | 15.6k | find_fwd_imp(dfa, input, pre, false) |
34 | | } |
35 | | } else { |
36 | 78.4k | if input.get_earliest() { |
37 | 17.5k | find_fwd_imp(dfa, input, None, true) |
38 | | } else { |
39 | 60.9k | find_fwd_imp(dfa, input, None, false) |
40 | | } |
41 | | } |
42 | 105k | } regex_automata::dfa::search::find_fwd::<®ex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> Line | Count | Source | 15 | 73.1k | pub fn find_fwd<A: Automaton + ?Sized>( | 16 | 73.1k | dfa: &A, | 17 | 73.1k | input: &Input<'_>, | 18 | 73.1k | ) -> Result<Option<HalfMatch>, MatchError> { | 19 | 73.1k | if input.is_done() { | 20 | 0 | return Ok(None); | 21 | 73.1k | } | 22 | 73.1k | let pre = if input.get_anchored().is_anchored() { | 23 | 207 | None | 24 | | } else { | 25 | 72.9k | dfa.get_prefilter() | 26 | | }; | 27 | | // Searching with a pattern ID is always anchored, so we should never use | 28 | | // a prefilter. | 29 | 73.1k | if pre.is_some() { | 30 | 26.5k | if input.get_earliest() { | 31 | 10.9k | find_fwd_imp(dfa, input, pre, true) | 32 | | } else { | 33 | 15.6k | find_fwd_imp(dfa, input, pre, false) | 34 | | } | 35 | | } else { | 36 | 46.5k | if input.get_earliest() { | 37 | 17.5k | find_fwd_imp(dfa, input, None, true) | 38 | | } else { | 39 | 29.0k | find_fwd_imp(dfa, input, None, false) | 40 | | } | 41 | | } | 42 | 73.1k | } |
regex_automata::dfa::search::find_fwd::<®ex_automata::dfa::dense::DFA<&[u32]>> Line | Count | Source | 15 | 13.2k | pub fn find_fwd<A: Automaton + ?Sized>( | 16 | 13.2k | dfa: &A, | 17 | 13.2k | input: &Input<'_>, | 18 | 13.2k | ) -> Result<Option<HalfMatch>, MatchError> { | 19 | 13.2k | if input.is_done() { | 20 | 0 | return Ok(None); | 21 | 13.2k | } | 22 | 13.2k | let pre = if input.get_anchored().is_anchored() { | 23 | 0 | None | 24 | | } else { | 25 | 13.2k | dfa.get_prefilter() | 26 | | }; | 27 | | // Searching with a pattern ID is always anchored, so we should never use | 28 | | // a prefilter. | 29 | 13.2k | if pre.is_some() { | 30 | 0 | if input.get_earliest() { | 31 | 0 | find_fwd_imp(dfa, input, pre, true) | 32 | | } else { | 33 | 0 | find_fwd_imp(dfa, input, pre, false) | 34 | | } | 35 | | } else { | 36 | 13.2k | if input.get_earliest() { | 37 | 0 | find_fwd_imp(dfa, input, None, true) | 38 | | } else { | 39 | 13.2k | find_fwd_imp(dfa, input, None, false) | 40 | | } | 41 | | } | 42 | 13.2k | } |
regex_automata::dfa::search::find_fwd::<®ex_automata::dfa::sparse::DFA<&[u8]>> Line | Count | Source | 15 | 18.7k | pub fn find_fwd<A: Automaton + ?Sized>( | 16 | 18.7k | dfa: &A, | 17 | 18.7k | input: &Input<'_>, | 18 | 18.7k | ) -> Result<Option<HalfMatch>, MatchError> { | 19 | 18.7k | if input.is_done() { | 20 | 0 | return Ok(None); | 21 | 18.7k | } | 22 | 18.7k | let pre = if input.get_anchored().is_anchored() { | 23 | 0 | None | 24 | | } else { | 25 | 18.7k | dfa.get_prefilter() | 26 | | }; | 27 | | // Searching with a pattern ID is always anchored, so we should never use | 28 | | // a prefilter. | 29 | 18.7k | if pre.is_some() { | 30 | 0 | if input.get_earliest() { | 31 | 0 | find_fwd_imp(dfa, input, pre, true) | 32 | | } else { | 33 | 0 | find_fwd_imp(dfa, input, pre, false) | 34 | | } | 35 | | } else { | 36 | 18.7k | if input.get_earliest() { | 37 | 0 | find_fwd_imp(dfa, input, None, true) | 38 | | } else { | 39 | 18.7k | find_fwd_imp(dfa, input, None, false) | 40 | | } | 41 | | } | 42 | 18.7k | } |
|
43 | | |
44 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
45 | 105k | fn find_fwd_imp<A: Automaton + ?Sized>( |
46 | 105k | dfa: &A, |
47 | 105k | input: &Input<'_>, |
48 | 105k | pre: Option<&'_ Prefilter>, |
49 | 105k | earliest: bool, |
50 | 105k | ) -> Result<Option<HalfMatch>, MatchError> { |
51 | | // See 'prefilter_restart' docs for explanation. |
52 | 105k | let universal_start = dfa.universal_start_state(Anchored::No).is_some(); |
53 | 105k | let mut mat = None; |
54 | 105k | let mut sid = init_fwd(dfa, input)?; |
55 | 104k | let mut at = input.start(); |
56 | | // This could just be a closure, but then I think it would be unsound |
57 | | // because it would need to be safe to invoke. This way, the lack of safety |
58 | | // is clearer in the code below. |
59 | | macro_rules! next_unchecked { |
60 | | ($sid:expr, $at:expr) => {{ |
61 | | let byte = *input.haystack().get_unchecked($at); |
62 | | dfa.next_state_unchecked($sid, byte) |
63 | | }}; |
64 | | } |
65 | | |
66 | 104k | if let Some(ref pre) = pre { |
67 | 26.5k | let span = Span::from(at..input.end()); |
68 | | // If a prefilter doesn't report false positives, then we don't need to |
69 | | // touch the DFA at all. However, since all matches include the pattern |
70 | | // ID, and the prefilter infrastructure doesn't report pattern IDs, we |
71 | | // limit this optimization to cases where there is exactly one pattern. |
72 | | // In that case, any match must be the 0th pattern. |
73 | 26.5k | match pre.find(input.haystack(), span) { |
74 | 8.39k | None => return Ok(mat), |
75 | 18.1k | Some(ref span) => { |
76 | 18.1k | at = span.start; |
77 | 18.1k | if !universal_start { |
78 | 2.61k | sid = prefilter_restart(dfa, &input, at)?; |
79 | 15.5k | } |
80 | | } |
81 | | } |
82 | 78.0k | } |
83 | 999k | while at < input.end() { |
84 | | // SAFETY: There are two safety invariants we need to uphold here in |
85 | | // the loops below: that 'sid' and 'prev_sid' are valid state IDs |
86 | | // for this DFA, and that 'at' is a valid index into 'haystack'. |
87 | | // For the former, we rely on the invariant that next_state* and |
88 | | // start_state_forward always returns a valid state ID (given a valid |
89 | | // state ID in the former case). For the latter safety invariant, we |
90 | | // always guard unchecked access with a check that 'at' is less than |
91 | | // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we |
92 | | // ensure that 'at' is always in bounds. |
93 | | // |
94 | | // PERF: See a similar comment in src/hybrid/search.rs that justifies |
95 | | // this extra work to make the search loop fast. The same reasoning and |
96 | | // benchmarks apply here. |
97 | | let mut prev_sid; |
98 | 1.42M | while at < input.end() { |
99 | 1.42M | prev_sid = unsafe { next_unchecked!(sid, at) }; |
100 | 1.42M | if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { |
101 | 816k | core::mem::swap(&mut prev_sid, &mut sid); |
102 | 816k | break; |
103 | 606k | } |
104 | 606k | at += 1; |
105 | | |
106 | 606k | sid = unsafe { next_unchecked!(prev_sid, at) }; |
107 | 606k | if dfa.is_special_state(sid) { |
108 | 68.8k | break; |
109 | 537k | } |
110 | 537k | at += 1; |
111 | | |
112 | 537k | prev_sid = unsafe { next_unchecked!(sid, at) }; |
113 | 537k | if dfa.is_special_state(prev_sid) { |
114 | 40.2k | core::mem::swap(&mut prev_sid, &mut sid); |
115 | 40.2k | break; |
116 | 497k | } |
117 | 497k | at += 1; |
118 | | |
119 | 497k | sid = unsafe { next_unchecked!(prev_sid, at) }; |
120 | 497k | if dfa.is_special_state(sid) { |
121 | 30.6k | break; |
122 | 467k | } |
123 | 467k | at += 1; |
124 | | } |
125 | 957k | if dfa.is_special_state(sid) { |
126 | 946k | if dfa.is_start_state(sid) { |
127 | 316k | if let Some(ref pre) = pre { |
128 | 205k | let span = Span::from(at..input.end()); |
129 | 205k | match pre.find(input.haystack(), span) { |
130 | 3.46k | None => return Ok(mat), |
131 | 202k | Some(ref span) => { |
132 | | // We want to skip any update to 'at' below |
133 | | // at the end of this iteration and just |
134 | | // jump immediately back to the next state |
135 | | // transition at the leading position of the |
136 | | // candidate match. |
137 | | // |
138 | | // ... but only if we actually made progress |
139 | | // with our prefilter, otherwise if the start |
140 | | // state has a self-loop, we can get stuck. |
141 | 202k | if span.start > at { |
142 | 92.2k | at = span.start; |
143 | 92.2k | if !universal_start { |
144 | 25.3k | sid = prefilter_restart(dfa, &input, at)?; |
145 | 66.8k | } |
146 | 91.8k | continue; |
147 | 110k | } |
148 | | } |
149 | | } |
150 | 111k | } else if dfa.is_accel_state(sid) { |
151 | 19.8k | let needles = dfa.accelerator(sid); |
152 | 19.8k | at = accel::find_fwd(needles, input.haystack(), at + 1) |
153 | 19.8k | .unwrap_or(input.end()); |
154 | 19.8k | continue; |
155 | 91.2k | } |
156 | 629k | } else if dfa.is_match_state(sid) { |
157 | 491k | let pattern = dfa.match_pattern(sid, 0); |
158 | 491k | mat = Some(HalfMatch::new(pattern, at)); |
159 | 491k | if earliest { |
160 | 6.82k | return Ok(mat); |
161 | 485k | } |
162 | 485k | if dfa.is_accel_state(sid) { |
163 | 90.2k | let needles = dfa.accelerator(sid); |
164 | 90.2k | at = accel::find_fwd(needles, input.haystack(), at + 1) |
165 | 90.2k | .unwrap_or(input.end()); |
166 | 90.2k | continue; |
167 | 394k | } |
168 | 138k | } else if dfa.is_accel_state(sid) { |
169 | 95.1k | let needs = dfa.accelerator(sid); |
170 | 95.1k | at = accel::find_fwd(needs, input.haystack(), at + 1) |
171 | 95.1k | .unwrap_or(input.end()); |
172 | 95.1k | continue; |
173 | 42.8k | } else if dfa.is_dead_state(sid) { |
174 | 19.6k | return Ok(mat); |
175 | | } else { |
176 | | // It's important that this is a debug_assert, since this can |
177 | | // actually be tripped even if DFA::from_bytes succeeds and |
178 | | // returns a supposedly valid DFA. |
179 | 23.1k | return Err(MatchError::quit(input.haystack()[at], at)); |
180 | | } |
181 | 10.6k | } |
182 | 606k | at += 1; |
183 | | } |
184 | 42.5k | eoi_fwd(dfa, input, &mut sid, &mut mat)?; |
185 | 42.5k | Ok(mat) |
186 | 105k | } regex_automata::dfa::search::find_fwd_imp::<®ex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> Line | Count | Source | 45 | 73.1k | fn find_fwd_imp<A: Automaton + ?Sized>( | 46 | 73.1k | dfa: &A, | 47 | 73.1k | input: &Input<'_>, | 48 | 73.1k | pre: Option<&'_ Prefilter>, | 49 | 73.1k | earliest: bool, | 50 | 73.1k | ) -> Result<Option<HalfMatch>, MatchError> { | 51 | | // See 'prefilter_restart' docs for explanation. | 52 | 73.1k | let universal_start = dfa.universal_start_state(Anchored::No).is_some(); | 53 | 73.1k | let mut mat = None; | 54 | 73.1k | let mut sid = init_fwd(dfa, input)?; | 55 | 73.1k | let mut at = input.start(); | 56 | | // This could just be a closure, but then I think it would be unsound | 57 | | // because it would need to be safe to invoke. This way, the lack of safety | 58 | | // is clearer in the code below. | 59 | | macro_rules! next_unchecked { | 60 | | ($sid:expr, $at:expr) => {{ | 61 | | let byte = *input.haystack().get_unchecked($at); | 62 | | dfa.next_state_unchecked($sid, byte) | 63 | | }}; | 64 | | } | 65 | | | 66 | 73.1k | if let Some(ref pre) = pre { | 67 | 26.5k | let span = Span::from(at..input.end()); | 68 | | // If a prefilter doesn't report false positives, then we don't need to | 69 | | // touch the DFA at all. However, since all matches include the pattern | 70 | | // ID, and the prefilter infrastructure doesn't report pattern IDs, we | 71 | | // limit this optimization to cases where there is exactly one pattern. | 72 | | // In that case, any match must be the 0th pattern. | 73 | 26.5k | match pre.find(input.haystack(), span) { | 74 | 8.39k | None => return Ok(mat), | 75 | 18.1k | Some(ref span) => { | 76 | 18.1k | at = span.start; | 77 | 18.1k | if !universal_start { | 78 | 2.61k | sid = prefilter_restart(dfa, &input, at)?; | 79 | 15.5k | } | 80 | | } | 81 | | } | 82 | 46.5k | } | 83 | 417k | while at < input.end() { | 84 | | // SAFETY: There are two safety invariants we need to uphold here in | 85 | | // the loops below: that 'sid' and 'prev_sid' are valid state IDs | 86 | | // for this DFA, and that 'at' is a valid index into 'haystack'. | 87 | | // For the former, we rely on the invariant that next_state* and | 88 | | // start_state_forward always returns a valid state ID (given a valid | 89 | | // state ID in the former case). For the latter safety invariant, we | 90 | | // always guard unchecked access with a check that 'at' is less than | 91 | | // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we | 92 | | // ensure that 'at' is always in bounds. | 93 | | // | 94 | | // PERF: See a similar comment in src/hybrid/search.rs that justifies | 95 | | // this extra work to make the search loop fast. The same reasoning and | 96 | | // benchmarks apply here. | 97 | | let mut prev_sid; | 98 | 859k | while at < input.end() { | 99 | 858k | prev_sid = unsafe { next_unchecked!(sid, at) }; | 100 | 858k | if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { | 101 | 260k | core::mem::swap(&mut prev_sid, &mut sid); | 102 | 260k | break; | 103 | 598k | } | 104 | 598k | at += 1; | 105 | | | 106 | 598k | sid = unsafe { next_unchecked!(prev_sid, at) }; | 107 | 598k | if dfa.is_special_state(sid) { | 108 | 63.4k | break; | 109 | 534k | } | 110 | 534k | at += 1; | 111 | | | 112 | 534k | prev_sid = unsafe { next_unchecked!(sid, at) }; | 113 | 534k | if dfa.is_special_state(prev_sid) { | 114 | 39.8k | core::mem::swap(&mut prev_sid, &mut sid); | 115 | 39.8k | break; | 116 | 494k | } | 117 | 494k | at += 1; | 118 | | | 119 | 494k | sid = unsafe { next_unchecked!(prev_sid, at) }; | 120 | 494k | if dfa.is_special_state(sid) { | 121 | 30.3k | break; | 122 | 464k | } | 123 | 464k | at += 1; | 124 | | } | 125 | 394k | if dfa.is_special_state(sid) { | 126 | 385k | if dfa.is_start_state(sid) { | 127 | 206k | if let Some(ref pre) = pre { | 128 | 205k | let span = Span::from(at..input.end()); | 129 | 205k | match pre.find(input.haystack(), span) { | 130 | 3.46k | None => return Ok(mat), | 131 | 202k | Some(ref span) => { | 132 | | // We want to skip any update to 'at' below | 133 | | // at the end of this iteration and just | 134 | | // jump immediately back to the next state | 135 | | // transition at the leading position of the | 136 | | // candidate match. | 137 | | // | 138 | | // ... but only if we actually made progress | 139 | | // with our prefilter, otherwise if the start | 140 | | // state has a self-loop, we can get stuck. | 141 | 202k | if span.start > at { | 142 | 92.2k | at = span.start; | 143 | 92.2k | if !universal_start { | 144 | 25.3k | sid = prefilter_restart(dfa, &input, at)?; | 145 | 66.8k | } | 146 | 91.8k | continue; | 147 | 110k | } | 148 | | } | 149 | | } | 150 | 636 | } else if dfa.is_accel_state(sid) { | 151 | 0 | let needles = dfa.accelerator(sid); | 152 | 0 | at = accel::find_fwd(needles, input.haystack(), at + 1) | 153 | 0 | .unwrap_or(input.end()); | 154 | 0 | continue; | 155 | 636 | } | 156 | 178k | } else if dfa.is_match_state(sid) { | 157 | 95.1k | let pattern = dfa.match_pattern(sid, 0); | 158 | 95.1k | mat = Some(HalfMatch::new(pattern, at)); | 159 | 95.1k | if earliest { | 160 | 6.82k | return Ok(mat); | 161 | 88.3k | } | 162 | 88.3k | if dfa.is_accel_state(sid) { | 163 | 7.06k | let needles = dfa.accelerator(sid); | 164 | 7.06k | at = accel::find_fwd(needles, input.haystack(), at + 1) | 165 | 7.06k | .unwrap_or(input.end()); | 166 | 7.06k | continue; | 167 | 81.2k | } | 168 | 83.8k | } else if dfa.is_accel_state(sid) { | 169 | 52.2k | let needs = dfa.accelerator(sid); | 170 | 52.2k | at = accel::find_fwd(needs, input.haystack(), at + 1) | 171 | 52.2k | .unwrap_or(input.end()); | 172 | 52.2k | continue; | 173 | 31.5k | } else if dfa.is_dead_state(sid) { | 174 | 8.41k | return Ok(mat); | 175 | | } else { | 176 | | // It's important that this is a debug_assert, since this can | 177 | | // actually be tripped even if DFA::from_bytes succeeds and | 178 | | // returns a supposedly valid DFA. | 179 | 23.1k | return Err(MatchError::quit(input.haystack()[at], at)); | 180 | | } | 181 | 9.49k | } | 182 | 201k | at += 1; | 183 | | } | 184 | 22.2k | eoi_fwd(dfa, input, &mut sid, &mut mat)?; | 185 | 22.2k | Ok(mat) | 186 | 73.1k | } |
regex_automata::dfa::search::find_fwd_imp::<®ex_automata::dfa::dense::DFA<&[u32]>> Line | Count | Source | 45 | 13.2k | fn find_fwd_imp<A: Automaton + ?Sized>( | 46 | 13.2k | dfa: &A, | 47 | 13.2k | input: &Input<'_>, | 48 | 13.2k | pre: Option<&'_ Prefilter>, | 49 | 13.2k | earliest: bool, | 50 | 13.2k | ) -> Result<Option<HalfMatch>, MatchError> { | 51 | | // See 'prefilter_restart' docs for explanation. | 52 | 13.2k | let universal_start = dfa.universal_start_state(Anchored::No).is_some(); | 53 | 13.2k | let mut mat = None; | 54 | 13.2k | let mut sid = init_fwd(dfa, input)?; | 55 | 12.9k | let mut at = input.start(); | 56 | | // This could just be a closure, but then I think it would be unsound | 57 | | // because it would need to be safe to invoke. This way, the lack of safety | 58 | | // is clearer in the code below. | 59 | | macro_rules! next_unchecked { | 60 | | ($sid:expr, $at:expr) => {{ | 61 | | let byte = *input.haystack().get_unchecked($at); | 62 | | dfa.next_state_unchecked($sid, byte) | 63 | | }}; | 64 | | } | 65 | | | 66 | 12.9k | if let Some(ref pre) = pre { | 67 | 0 | let span = Span::from(at..input.end()); | 68 | | // If a prefilter doesn't report false positives, then we don't need to | 69 | | // touch the DFA at all. However, since all matches include the pattern | 70 | | // ID, and the prefilter infrastructure doesn't report pattern IDs, we | 71 | | // limit this optimization to cases where there is exactly one pattern. | 72 | | // In that case, any match must be the 0th pattern. | 73 | 0 | match pre.find(input.haystack(), span) { | 74 | 0 | None => return Ok(mat), | 75 | 0 | Some(ref span) => { | 76 | 0 | at = span.start; | 77 | 0 | if !universal_start { | 78 | 0 | sid = prefilter_restart(dfa, &input, at)?; | 79 | 0 | } | 80 | | } | 81 | | } | 82 | 12.9k | } | 83 | 514k | while at < input.end() { | 84 | | // SAFETY: There are two safety invariants we need to uphold here in | 85 | | // the loops below: that 'sid' and 'prev_sid' are valid state IDs | 86 | | // for this DFA, and that 'at' is a valid index into 'haystack'. | 87 | | // For the former, we rely on the invariant that next_state* and | 88 | | // start_state_forward always returns a valid state ID (given a valid | 89 | | // state ID in the former case). For the latter safety invariant, we | 90 | | // always guard unchecked access with a check that 'at' is less than | 91 | | // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we | 92 | | // ensure that 'at' is always in bounds. | 93 | | // | 94 | | // PERF: See a similar comment in src/hybrid/search.rs that justifies | 95 | | // this extra work to make the search loop fast. The same reasoning and | 96 | | // benchmarks apply here. | 97 | | let mut prev_sid; | 98 | 504k | while at < input.end() { | 99 | 504k | prev_sid = unsafe { next_unchecked!(sid, at) }; | 100 | 504k | if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { | 101 | 496k | core::mem::swap(&mut prev_sid, &mut sid); | 102 | 496k | break; | 103 | 8.30k | } | 104 | 8.30k | at += 1; | 105 | | | 106 | 8.30k | sid = unsafe { next_unchecked!(prev_sid, at) }; | 107 | 8.30k | if dfa.is_special_state(sid) { | 108 | 5.15k | break; | 109 | 3.14k | } | 110 | 3.14k | at += 1; | 111 | | | 112 | 3.14k | prev_sid = unsafe { next_unchecked!(sid, at) }; | 113 | 3.14k | if dfa.is_special_state(prev_sid) { | 114 | 449 | core::mem::swap(&mut prev_sid, &mut sid); | 115 | 449 | break; | 116 | 2.70k | } | 117 | 2.70k | at += 1; | 118 | | | 119 | 2.70k | sid = unsafe { next_unchecked!(prev_sid, at) }; | 120 | 2.70k | if dfa.is_special_state(sid) { | 121 | 326 | break; | 122 | 2.37k | } | 123 | 2.37k | at += 1; | 124 | | } | 125 | 502k | if dfa.is_special_state(sid) { | 126 | 501k | if dfa.is_start_state(sid) { | 127 | 103k | if let Some(ref pre) = pre { | 128 | 0 | let span = Span::from(at..input.end()); | 129 | 0 | match pre.find(input.haystack(), span) { | 130 | 0 | None => return Ok(mat), | 131 | 0 | Some(ref span) => { | 132 | | // We want to skip any update to 'at' below | 133 | | // at the end of this iteration and just | 134 | | // jump immediately back to the next state | 135 | | // transition at the leading position of the | 136 | | // candidate match. | 137 | | // | 138 | | // ... but only if we actually made progress | 139 | | // with our prefilter, otherwise if the start | 140 | | // state has a self-loop, we can get stuck. | 141 | 0 | if span.start > at { | 142 | 0 | at = span.start; | 143 | 0 | if !universal_start { | 144 | 0 | sid = prefilter_restart(dfa, &input, at)?; | 145 | 0 | } | 146 | 0 | continue; | 147 | 0 | } | 148 | | } | 149 | | } | 150 | 103k | } else if dfa.is_accel_state(sid) { | 151 | 13.7k | let needles = dfa.accelerator(sid); | 152 | 13.7k | at = accel::find_fwd(needles, input.haystack(), at + 1) | 153 | 13.7k | .unwrap_or(input.end()); | 154 | 13.7k | continue; | 155 | 89.9k | } | 156 | 397k | } else if dfa.is_match_state(sid) { | 157 | 379k | let pattern = dfa.match_pattern(sid, 0); | 158 | 379k | mat = Some(HalfMatch::new(pattern, at)); | 159 | 379k | if earliest { | 160 | 0 | return Ok(mat); | 161 | 379k | } | 162 | 379k | if dfa.is_accel_state(sid) { | 163 | 83.2k | let needles = dfa.accelerator(sid); | 164 | 83.2k | at = accel::find_fwd(needles, input.haystack(), at + 1) | 165 | 83.2k | .unwrap_or(input.end()); | 166 | 83.2k | continue; | 167 | 296k | } | 168 | 18.1k | } else if dfa.is_accel_state(sid) { | 169 | 17.3k | let needs = dfa.accelerator(sid); | 170 | 17.3k | at = accel::find_fwd(needs, input.haystack(), at + 1) | 171 | 17.3k | .unwrap_or(input.end()); | 172 | 17.3k | continue; | 173 | 838 | } else if dfa.is_dead_state(sid) { | 174 | 808 | return Ok(mat); | 175 | | } else { | 176 | | // It's important that this is a debug_assert, since this can | 177 | | // actually be tripped even if DFA::from_bytes succeeds and | 178 | | // returns a supposedly valid DFA. | 179 | 30 | return Err(MatchError::quit(input.haystack()[at], at)); | 180 | | } | 181 | 1.05k | } | 182 | 387k | at += 1; | 183 | | } | 184 | 12.1k | eoi_fwd(dfa, input, &mut sid, &mut mat)?; | 185 | 12.1k | Ok(mat) | 186 | 13.2k | } |
regex_automata::dfa::search::find_fwd_imp::<®ex_automata::dfa::sparse::DFA<&[u8]>> Line | Count | Source | 45 | 18.7k | fn find_fwd_imp<A: Automaton + ?Sized>( | 46 | 18.7k | dfa: &A, | 47 | 18.7k | input: &Input<'_>, | 48 | 18.7k | pre: Option<&'_ Prefilter>, | 49 | 18.7k | earliest: bool, | 50 | 18.7k | ) -> Result<Option<HalfMatch>, MatchError> { | 51 | | // See 'prefilter_restart' docs for explanation. | 52 | 18.7k | let universal_start = dfa.universal_start_state(Anchored::No).is_some(); | 53 | 18.7k | let mut mat = None; | 54 | 18.7k | let mut sid = init_fwd(dfa, input)?; | 55 | 18.5k | let mut at = input.start(); | 56 | | // This could just be a closure, but then I think it would be unsound | 57 | | // because it would need to be safe to invoke. This way, the lack of safety | 58 | | // is clearer in the code below. | 59 | | macro_rules! next_unchecked { | 60 | | ($sid:expr, $at:expr) => {{ | 61 | | let byte = *input.haystack().get_unchecked($at); | 62 | | dfa.next_state_unchecked($sid, byte) | 63 | | }}; | 64 | | } | 65 | | | 66 | 18.5k | if let Some(ref pre) = pre { | 67 | 0 | let span = Span::from(at..input.end()); | 68 | | // If a prefilter doesn't report false positives, then we don't need to | 69 | | // touch the DFA at all. However, since all matches include the pattern | 70 | | // ID, and the prefilter infrastructure doesn't report pattern IDs, we | 71 | | // limit this optimization to cases where there is exactly one pattern. | 72 | | // In that case, any match must be the 0th pattern. | 73 | 0 | match pre.find(input.haystack(), span) { | 74 | 0 | None => return Ok(mat), | 75 | 0 | Some(ref span) => { | 76 | 0 | at = span.start; | 77 | 0 | if !universal_start { | 78 | 0 | sid = prefilter_restart(dfa, &input, at)?; | 79 | 0 | } | 80 | | } | 81 | | } | 82 | 18.5k | } | 83 | 68.1k | while at < input.end() { | 84 | | // SAFETY: There are two safety invariants we need to uphold here in | 85 | | // the loops below: that 'sid' and 'prev_sid' are valid state IDs | 86 | | // for this DFA, and that 'at' is a valid index into 'haystack'. | 87 | | // For the former, we rely on the invariant that next_state* and | 88 | | // start_state_forward always returns a valid state ID (given a valid | 89 | | // state ID in the former case). For the latter safety invariant, we | 90 | | // always guard unchecked access with a check that 'at' is less than | 91 | | // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we | 92 | | // ensure that 'at' is always in bounds. | 93 | | // | 94 | | // PERF: See a similar comment in src/hybrid/search.rs that justifies | 95 | | // this extra work to make the search loop fast. The same reasoning and | 96 | | // benchmarks apply here. | 97 | | let mut prev_sid; | 98 | 60.3k | while at < input.end() { | 99 | 60.3k | prev_sid = unsafe { next_unchecked!(sid, at) }; | 100 | 60.3k | if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { | 101 | 59.9k | core::mem::swap(&mut prev_sid, &mut sid); | 102 | 59.9k | break; | 103 | 444 | } | 104 | 444 | at += 1; | 105 | | | 106 | 444 | sid = unsafe { next_unchecked!(prev_sid, at) }; | 107 | 444 | if dfa.is_special_state(sid) { | 108 | 176 | break; | 109 | 268 | } | 110 | 268 | at += 1; | 111 | | | 112 | 268 | prev_sid = unsafe { next_unchecked!(sid, at) }; | 113 | 268 | if dfa.is_special_state(prev_sid) { | 114 | 1 | core::mem::swap(&mut prev_sid, &mut sid); | 115 | 1 | break; | 116 | 267 | } | 117 | 267 | at += 1; | 118 | | | 119 | 267 | sid = unsafe { next_unchecked!(prev_sid, at) }; | 120 | 267 | if dfa.is_special_state(sid) { | 121 | 2 | break; | 122 | 265 | } | 123 | 265 | at += 1; | 124 | | } | 125 | 60.1k | if dfa.is_special_state(sid) { | 126 | 60.0k | if dfa.is_start_state(sid) { | 127 | 6.74k | if let Some(ref pre) = pre { | 128 | 0 | let span = Span::from(at..input.end()); | 129 | 0 | match pre.find(input.haystack(), span) { | 130 | 0 | None => return Ok(mat), | 131 | 0 | Some(ref span) => { | 132 | | // We want to skip any update to 'at' below | 133 | | // at the end of this iteration and just | 134 | | // jump immediately back to the next state | 135 | | // transition at the leading position of the | 136 | | // candidate match. | 137 | | // | 138 | | // ... but only if we actually made progress | 139 | | // with our prefilter, otherwise if the start | 140 | | // state has a self-loop, we can get stuck. | 141 | 0 | if span.start > at { | 142 | 0 | at = span.start; | 143 | 0 | if !universal_start { | 144 | 0 | sid = prefilter_restart(dfa, &input, at)?; | 145 | 0 | } | 146 | 0 | continue; | 147 | 0 | } | 148 | | } | 149 | | } | 150 | 6.74k | } else if dfa.is_accel_state(sid) { | 151 | 6.15k | let needles = dfa.accelerator(sid); | 152 | 6.15k | at = accel::find_fwd(needles, input.haystack(), at + 1) | 153 | 6.15k | .unwrap_or(input.end()); | 154 | 6.15k | continue; | 155 | 582 | } | 156 | 53.3k | } else if dfa.is_match_state(sid) { | 157 | 17.2k | let pattern = dfa.match_pattern(sid, 0); | 158 | 17.2k | mat = Some(HalfMatch::new(pattern, at)); | 159 | 17.2k | if earliest { | 160 | 0 | return Ok(mat); | 161 | 17.2k | } | 162 | 17.2k | if dfa.is_accel_state(sid) { | 163 | 0 | let needles = dfa.accelerator(sid); | 164 | 0 | at = accel::find_fwd(needles, input.haystack(), at + 1) | 165 | 0 | .unwrap_or(input.end()); | 166 | 0 | continue; | 167 | 17.2k | } | 168 | 36.0k | } else if dfa.is_accel_state(sid) { | 169 | 25.5k | let needs = dfa.accelerator(sid); | 170 | 25.5k | at = accel::find_fwd(needs, input.haystack(), at + 1) | 171 | 25.5k | .unwrap_or(input.end()); | 172 | 25.5k | continue; | 173 | 10.4k | } else if dfa.is_dead_state(sid) { | 174 | 10.4k | return Ok(mat); | 175 | | } else { | 176 | | // It's important that this is a debug_assert, since this can | 177 | | // actually be tripped even if DFA::from_bytes succeeds and | 178 | | // returns a supposedly valid DFA. | 179 | 15 | return Err(MatchError::quit(input.haystack()[at], at)); | 180 | | } | 181 | 75 | } | 182 | 17.9k | at += 1; | 183 | | } | 184 | 8.05k | eoi_fwd(dfa, input, &mut sid, &mut mat)?; | 185 | 8.05k | Ok(mat) | 186 | 18.7k | } |
|
187 | | |
188 | | #[inline(never)] |
189 | 4.94k | pub fn find_rev<A: Automaton + ?Sized>( |
190 | 4.94k | dfa: &A, |
191 | 4.94k | input: &Input<'_>, |
192 | 4.94k | ) -> Result<Option<HalfMatch>, MatchError> { |
193 | 4.94k | if input.is_done() { |
194 | 0 | return Ok(None); |
195 | 4.94k | } |
196 | 4.94k | if input.get_earliest() { |
197 | 724 | find_rev_imp(dfa, input, true) |
198 | | } else { |
199 | 4.22k | find_rev_imp(dfa, input, false) |
200 | | } |
201 | 4.94k | } |
202 | | |
203 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
204 | 4.94k | fn find_rev_imp<A: Automaton + ?Sized>( |
205 | 4.94k | dfa: &A, |
206 | 4.94k | input: &Input<'_>, |
207 | 4.94k | earliest: bool, |
208 | 4.94k | ) -> Result<Option<HalfMatch>, MatchError> { |
209 | 4.94k | let mut mat = None; |
210 | 4.94k | let mut sid = init_rev(dfa, input)?; |
211 | | // In reverse search, the loop below can't handle the case of searching an |
212 | | // empty slice. Ideally we could write something congruent to the forward |
213 | | // search, i.e., 'while at >= start', but 'start' might be 0. Since we use |
214 | | // an unsigned offset, 'at >= 0' is trivially always true. We could avoid |
215 | | // this extra case handling by using a signed offset, but Rust makes it |
216 | | // annoying to do. So... We just handle the empty case separately. |
217 | 4.94k | if input.start() == input.end() { |
218 | 252 | eoi_rev(dfa, input, &mut sid, &mut mat)?; |
219 | 252 | return Ok(mat); |
220 | 4.69k | } |
221 | | |
222 | 4.69k | let mut at = input.end() - 1; |
223 | | macro_rules! next_unchecked { |
224 | | ($sid:expr, $at:expr) => {{ |
225 | | let byte = *input.haystack().get_unchecked($at); |
226 | | dfa.next_state_unchecked($sid, byte) |
227 | | }}; |
228 | | } |
229 | | loop { |
230 | | // SAFETY: See comments in 'find_fwd' for a safety argument. |
231 | | let mut prev_sid; |
232 | 143k | while at >= input.start() { |
233 | 143k | prev_sid = unsafe { next_unchecked!(sid, at) }; |
234 | 143k | if dfa.is_special_state(prev_sid) |
235 | 57.1k | || at <= input.start().saturating_add(3) |
236 | | { |
237 | 88.7k | core::mem::swap(&mut prev_sid, &mut sid); |
238 | 88.7k | break; |
239 | 55.0k | } |
240 | 55.0k | at -= 1; |
241 | | |
242 | 55.0k | sid = unsafe { next_unchecked!(prev_sid, at) }; |
243 | 55.0k | if dfa.is_special_state(sid) { |
244 | 17.3k | break; |
245 | 37.6k | } |
246 | 37.6k | at -= 1; |
247 | | |
248 | 37.6k | prev_sid = unsafe { next_unchecked!(sid, at) }; |
249 | 37.6k | if dfa.is_special_state(prev_sid) { |
250 | 7.85k | core::mem::swap(&mut prev_sid, &mut sid); |
251 | 7.85k | break; |
252 | 29.8k | } |
253 | 29.8k | at -= 1; |
254 | | |
255 | 29.8k | sid = unsafe { next_unchecked!(prev_sid, at) }; |
256 | 29.8k | if dfa.is_special_state(sid) { |
257 | 8.64k | break; |
258 | 21.1k | } |
259 | 21.1k | at -= 1; |
260 | | } |
261 | 122k | if dfa.is_special_state(sid) { |
262 | 120k | if dfa.is_start_state(sid) { |
263 | 0 | if dfa.is_accel_state(sid) { |
264 | 0 | let needles = dfa.accelerator(sid); |
265 | 0 | at = accel::find_rev(needles, input.haystack(), at) |
266 | 0 | .map(|i| i + 1) |
267 | 0 | .unwrap_or(input.start()); |
268 | 0 | } |
269 | 120k | } else if dfa.is_match_state(sid) { |
270 | 82.7k | let pattern = dfa.match_pattern(sid, 0); |
271 | | // Since reverse searches report the beginning of a match |
272 | | // and the beginning is inclusive (not exclusive like the |
273 | | // end of a match), we add 1 to make it inclusive. |
274 | 82.7k | mat = Some(HalfMatch::new(pattern, at + 1)); |
275 | 82.7k | if earliest { |
276 | 191 | return Ok(mat); |
277 | 82.5k | } |
278 | 82.5k | if dfa.is_accel_state(sid) { |
279 | 6.12k | let needles = dfa.accelerator(sid); |
280 | 6.12k | at = accel::find_rev(needles, input.haystack(), at) |
281 | 6.12k | .map(|i| i + 1) |
282 | 6.12k | .unwrap_or(input.start()); |
283 | 76.4k | } |
284 | 37.7k | } else if dfa.is_accel_state(sid) { |
285 | 35.7k | let needles = dfa.accelerator(sid); |
286 | | // If the accelerator returns nothing, why don't we quit the |
287 | | // search? Well, if the accelerator doesn't find anything, that |
288 | | // doesn't mean we don't have a match. It just means that we |
289 | | // can't leave the current state given one of the 255 possible |
290 | | // byte values. However, there might be an EOI transition. So |
291 | | // we set 'at' to the end of the haystack, which will cause |
292 | | // this loop to stop and fall down into the EOI transition. |
293 | 35.7k | at = accel::find_rev(needles, input.haystack(), at) |
294 | 35.7k | .map(|i| i + 1) |
295 | 35.7k | .unwrap_or(input.start()); |
296 | 1.95k | } else if dfa.is_dead_state(sid) { |
297 | 1.29k | return Ok(mat); |
298 | | } else { |
299 | 664 | return Err(MatchError::quit(input.haystack()[at], at)); |
300 | | } |
301 | 2.11k | } |
302 | 120k | if at == input.start() { |
303 | 2.54k | break; |
304 | 117k | } |
305 | 117k | at -= 1; |
306 | | } |
307 | 2.54k | eoi_rev(dfa, input, &mut sid, &mut mat)?; |
308 | 2.54k | Ok(mat) |
309 | 4.94k | } |
310 | | |
311 | | #[inline(never)] |
312 | 0 | pub fn find_overlapping_fwd<A: Automaton + ?Sized>( |
313 | 0 | dfa: &A, |
314 | 0 | input: &Input<'_>, |
315 | 0 | state: &mut OverlappingState, |
316 | 0 | ) -> Result<(), MatchError> { |
317 | 0 | state.mat = None; |
318 | 0 | if input.is_done() { |
319 | 0 | return Ok(()); |
320 | 0 | } |
321 | 0 | let pre = if input.get_anchored().is_anchored() { |
322 | 0 | None |
323 | | } else { |
324 | 0 | dfa.get_prefilter() |
325 | | }; |
326 | 0 | if pre.is_some() { |
327 | 0 | find_overlapping_fwd_imp(dfa, input, pre, state) |
328 | | } else { |
329 | 0 | find_overlapping_fwd_imp(dfa, input, None, state) |
330 | | } |
331 | 0 | } |
332 | | |
333 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
334 | 0 | fn find_overlapping_fwd_imp<A: Automaton + ?Sized>( |
335 | 0 | dfa: &A, |
336 | 0 | input: &Input<'_>, |
337 | 0 | pre: Option<&'_ Prefilter>, |
338 | 0 | state: &mut OverlappingState, |
339 | 0 | ) -> Result<(), MatchError> { |
340 | | // See 'prefilter_restart' docs for explanation. |
341 | 0 | let universal_start = dfa.universal_start_state(Anchored::No).is_some(); |
342 | 0 | let mut sid = match state.id { |
343 | | None => { |
344 | 0 | state.at = input.start(); |
345 | 0 | init_fwd(dfa, input)? |
346 | | } |
347 | 0 | Some(sid) => { |
348 | 0 | if let Some(match_index) = state.next_match_index { |
349 | 0 | let match_len = dfa.match_len(sid); |
350 | 0 | if match_index < match_len { |
351 | 0 | state.next_match_index = Some(match_index + 1); |
352 | 0 | let pattern = dfa.match_pattern(sid, match_index); |
353 | 0 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
354 | 0 | return Ok(()); |
355 | 0 | } |
356 | 0 | } |
357 | | // Once we've reported all matches at a given position, we need to |
358 | | // advance the search to the next position. |
359 | 0 | state.at += 1; |
360 | 0 | if state.at > input.end() { |
361 | 0 | return Ok(()); |
362 | 0 | } |
363 | 0 | sid |
364 | | } |
365 | | }; |
366 | | |
367 | | // NOTE: We don't optimize the crap out of this routine primarily because |
368 | | // it seems like most find_overlapping searches will have higher match |
369 | | // counts, and thus, throughput is perhaps not as important. But if you |
370 | | // have a use case for something faster, feel free to file an issue. |
371 | 0 | while state.at < input.end() { |
372 | 0 | sid = dfa.next_state(sid, input.haystack()[state.at]); |
373 | 0 | if dfa.is_special_state(sid) { |
374 | 0 | state.id = Some(sid); |
375 | 0 | if dfa.is_start_state(sid) { |
376 | 0 | if let Some(ref pre) = pre { |
377 | 0 | let span = Span::from(state.at..input.end()); |
378 | 0 | match pre.find(input.haystack(), span) { |
379 | 0 | None => return Ok(()), |
380 | 0 | Some(ref span) => { |
381 | 0 | if span.start > state.at { |
382 | 0 | state.at = span.start; |
383 | 0 | if !universal_start { |
384 | 0 | sid = prefilter_restart( |
385 | 0 | dfa, &input, state.at, |
386 | 0 | )?; |
387 | 0 | } |
388 | 0 | continue; |
389 | 0 | } |
390 | | } |
391 | | } |
392 | 0 | } else if dfa.is_accel_state(sid) { |
393 | 0 | let needles = dfa.accelerator(sid); |
394 | 0 | state.at = accel::find_fwd( |
395 | 0 | needles, |
396 | 0 | input.haystack(), |
397 | 0 | state.at + 1, |
398 | 0 | ) |
399 | 0 | .unwrap_or(input.end()); |
400 | 0 | continue; |
401 | 0 | } |
402 | 0 | } else if dfa.is_match_state(sid) { |
403 | 0 | state.next_match_index = Some(1); |
404 | 0 | let pattern = dfa.match_pattern(sid, 0); |
405 | 0 | state.mat = Some(HalfMatch::new(pattern, state.at)); |
406 | 0 | return Ok(()); |
407 | 0 | } else if dfa.is_accel_state(sid) { |
408 | 0 | let needs = dfa.accelerator(sid); |
409 | | // If the accelerator returns nothing, why don't we quit the |
410 | | // search? Well, if the accelerator doesn't find anything, that |
411 | | // doesn't mean we don't have a match. It just means that we |
412 | | // can't leave the current state given one of the 255 possible |
413 | | // byte values. However, there might be an EOI transition. So |
414 | | // we set 'at' to the end of the haystack, which will cause |
415 | | // this loop to stop and fall down into the EOI transition. |
416 | 0 | state.at = |
417 | 0 | accel::find_fwd(needs, input.haystack(), state.at + 1) |
418 | 0 | .unwrap_or(input.end()); |
419 | 0 | continue; |
420 | 0 | } else if dfa.is_dead_state(sid) { |
421 | 0 | return Ok(()); |
422 | | } else { |
423 | 0 | return Err(MatchError::quit( |
424 | 0 | input.haystack()[state.at], |
425 | 0 | state.at, |
426 | 0 | )); |
427 | | } |
428 | 0 | } |
429 | 0 | state.at += 1; |
430 | | } |
431 | | |
432 | 0 | let result = eoi_fwd(dfa, input, &mut sid, &mut state.mat); |
433 | 0 | state.id = Some(sid); |
434 | 0 | if state.mat.is_some() { |
435 | 0 | // '1' is always correct here since if we get to this point, this |
436 | 0 | // always corresponds to the first (index '0') match discovered at |
437 | 0 | // this position. So the next match to report at this position (if |
438 | 0 | // it exists) is at index '1'. |
439 | 0 | state.next_match_index = Some(1); |
440 | 0 | } |
441 | 0 | result |
442 | 0 | } |
443 | | |
444 | | #[inline(never)] |
445 | | pub(crate) fn find_overlapping_rev<A: Automaton + ?Sized>( |
446 | | dfa: &A, |
447 | | input: &Input<'_>, |
448 | | state: &mut OverlappingState, |
449 | | ) -> Result<(), MatchError> { |
450 | | state.mat = None; |
451 | | if input.is_done() { |
452 | | return Ok(()); |
453 | | } |
454 | | let mut sid = match state.id { |
455 | | None => { |
456 | | let sid = init_rev(dfa, input)?; |
457 | | state.id = Some(sid); |
458 | | if input.start() == input.end() { |
459 | | state.rev_eoi = true; |
460 | | } else { |
461 | | state.at = input.end() - 1; |
462 | | } |
463 | | sid |
464 | | } |
465 | | Some(sid) => { |
466 | | if let Some(match_index) = state.next_match_index { |
467 | | let match_len = dfa.match_len(sid); |
468 | | if match_index < match_len { |
469 | | state.next_match_index = Some(match_index + 1); |
470 | | let pattern = dfa.match_pattern(sid, match_index); |
471 | | state.mat = Some(HalfMatch::new(pattern, state.at)); |
472 | | return Ok(()); |
473 | | } |
474 | | } |
475 | | // Once we've reported all matches at a given position, we need |
476 | | // to advance the search to the next position. However, if we've |
477 | | // already followed the EOI transition, then we know we're done |
478 | | // with the search and there cannot be any more matches to report. |
479 | | if state.rev_eoi { |
480 | | return Ok(()); |
481 | | } else if state.at == input.start() { |
482 | | // At this point, we should follow the EOI transition. This |
483 | | // will cause us the skip the main loop below and fall through |
484 | | // to the final 'eoi_rev' transition. |
485 | | state.rev_eoi = true; |
486 | | } else { |
487 | | // We haven't hit the end of the search yet, so move on. |
488 | | state.at -= 1; |
489 | | } |
490 | | sid |
491 | | } |
492 | | }; |
493 | | while !state.rev_eoi { |
494 | | sid = dfa.next_state(sid, input.haystack()[state.at]); |
495 | | if dfa.is_special_state(sid) { |
496 | | state.id = Some(sid); |
497 | | if dfa.is_start_state(sid) { |
498 | | if dfa.is_accel_state(sid) { |
499 | | let needles = dfa.accelerator(sid); |
500 | | state.at = |
501 | | accel::find_rev(needles, input.haystack(), state.at) |
502 | | .map(|i| i + 1) |
503 | | .unwrap_or(input.start()); |
504 | | } |
505 | | } else if dfa.is_match_state(sid) { |
506 | | state.next_match_index = Some(1); |
507 | | let pattern = dfa.match_pattern(sid, 0); |
508 | | state.mat = Some(HalfMatch::new(pattern, state.at + 1)); |
509 | | return Ok(()); |
510 | | } else if dfa.is_accel_state(sid) { |
511 | | let needles = dfa.accelerator(sid); |
512 | | // If the accelerator returns nothing, why don't we quit the |
513 | | // search? Well, if the accelerator doesn't find anything, that |
514 | | // doesn't mean we don't have a match. It just means that we |
515 | | // can't leave the current state given one of the 255 possible |
516 | | // byte values. However, there might be an EOI transition. So |
517 | | // we set 'at' to the end of the haystack, which will cause |
518 | | // this loop to stop and fall down into the EOI transition. |
519 | | state.at = |
520 | | accel::find_rev(needles, input.haystack(), state.at) |
521 | | .map(|i| i + 1) |
522 | | .unwrap_or(input.start()); |
523 | | } else if dfa.is_dead_state(sid) { |
524 | | return Ok(()); |
525 | | } else { |
526 | | return Err(MatchError::quit( |
527 | | input.haystack()[state.at], |
528 | | state.at, |
529 | | )); |
530 | | } |
531 | | } |
532 | | if state.at == input.start() { |
533 | | break; |
534 | | } |
535 | | state.at -= 1; |
536 | | } |
537 | | |
538 | | let result = eoi_rev(dfa, input, &mut sid, &mut state.mat); |
539 | | state.rev_eoi = true; |
540 | | state.id = Some(sid); |
541 | | if state.mat.is_some() { |
542 | | // '1' is always correct here since if we get to this point, this |
543 | | // always corresponds to the first (index '0') match discovered at |
544 | | // this position. So the next match to report at this position (if |
545 | | // it exists) is at index '1'. |
546 | | state.next_match_index = Some(1); |
547 | | } |
548 | | result |
549 | | } |
550 | | |
551 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
552 | 133k | fn init_fwd<A: Automaton + ?Sized>( |
553 | 133k | dfa: &A, |
554 | 133k | input: &Input<'_>, |
555 | 133k | ) -> Result<StateID, MatchError> { |
556 | 133k | let sid = dfa.start_state_forward(input)?; |
557 | | // Start states can never be match states, since all matches are delayed |
558 | | // by 1 byte. |
559 | 132k | debug_assert!(!dfa.is_match_state(sid)); |
560 | 132k | Ok(sid) |
561 | 133k | } Unexecuted instantiation: regex_automata::dfa::search::init_fwd::<regex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> regex_automata::dfa::search::init_fwd::<®ex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> Line | Count | Source | 552 | 101k | fn init_fwd<A: Automaton + ?Sized>( | 553 | 101k | dfa: &A, | 554 | 101k | input: &Input<'_>, | 555 | 101k | ) -> Result<StateID, MatchError> { | 556 | 101k | let sid = dfa.start_state_forward(input)?; | 557 | | // Start states can never be match states, since all matches are delayed | 558 | | // by 1 byte. | 559 | 100k | debug_assert!(!dfa.is_match_state(sid)); | 560 | 100k | Ok(sid) | 561 | 101k | } |
regex_automata::dfa::search::init_fwd::<®ex_automata::dfa::dense::DFA<&[u32]>> Line | Count | Source | 552 | 13.2k | fn init_fwd<A: Automaton + ?Sized>( | 553 | 13.2k | dfa: &A, | 554 | 13.2k | input: &Input<'_>, | 555 | 13.2k | ) -> Result<StateID, MatchError> { | 556 | 13.2k | let sid = dfa.start_state_forward(input)?; | 557 | | // Start states can never be match states, since all matches are delayed | 558 | | // by 1 byte. | 559 | 12.9k | debug_assert!(!dfa.is_match_state(sid)); | 560 | 12.9k | Ok(sid) | 561 | 13.2k | } |
regex_automata::dfa::search::init_fwd::<®ex_automata::dfa::sparse::DFA<&[u8]>> Line | Count | Source | 552 | 18.7k | fn init_fwd<A: Automaton + ?Sized>( | 553 | 18.7k | dfa: &A, | 554 | 18.7k | input: &Input<'_>, | 555 | 18.7k | ) -> Result<StateID, MatchError> { | 556 | 18.7k | let sid = dfa.start_state_forward(input)?; | 557 | | // Start states can never be match states, since all matches are delayed | 558 | | // by 1 byte. | 559 | 18.5k | debug_assert!(!dfa.is_match_state(sid)); | 560 | 18.5k | Ok(sid) | 561 | 18.7k | } |
|
562 | | |
563 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
564 | 4.94k | fn init_rev<A: Automaton + ?Sized>( |
565 | 4.94k | dfa: &A, |
566 | 4.94k | input: &Input<'_>, |
567 | 4.94k | ) -> Result<StateID, MatchError> { |
568 | 4.94k | let sid = dfa.start_state_reverse(input)?; |
569 | | // Start states can never be match states, since all matches are delayed |
570 | | // by 1 byte. |
571 | 4.94k | debug_assert!(!dfa.is_match_state(sid)); |
572 | 4.94k | Ok(sid) |
573 | 4.94k | } |
574 | | |
575 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
576 | 42.5k | fn eoi_fwd<A: Automaton + ?Sized>( |
577 | 42.5k | dfa: &A, |
578 | 42.5k | input: &Input<'_>, |
579 | 42.5k | sid: &mut StateID, |
580 | 42.5k | mat: &mut Option<HalfMatch>, |
581 | 42.5k | ) -> Result<(), MatchError> { |
582 | 42.5k | let sp = input.get_span(); |
583 | 42.5k | match input.haystack().get(sp.end) { |
584 | 0 | Some(&b) => { |
585 | 0 | *sid = dfa.next_state(*sid, b); |
586 | 0 | if dfa.is_match_state(*sid) { |
587 | 0 | let pattern = dfa.match_pattern(*sid, 0); |
588 | 0 | *mat = Some(HalfMatch::new(pattern, sp.end)); |
589 | 0 | } else if dfa.is_quit_state(*sid) { |
590 | 0 | return Err(MatchError::quit(b, sp.end)); |
591 | 0 | } |
592 | | } |
593 | | None => { |
594 | 42.5k | *sid = dfa.next_eoi_state(*sid); |
595 | 42.5k | if dfa.is_match_state(*sid) { |
596 | 10.7k | let pattern = dfa.match_pattern(*sid, 0); |
597 | 10.7k | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); |
598 | 31.7k | } |
599 | | } |
600 | | } |
601 | 42.5k | Ok(()) |
602 | 42.5k | } Unexecuted instantiation: regex_automata::dfa::search::eoi_fwd::<regex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> regex_automata::dfa::search::eoi_fwd::<®ex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> Line | Count | Source | 576 | 22.2k | fn eoi_fwd<A: Automaton + ?Sized>( | 577 | 22.2k | dfa: &A, | 578 | 22.2k | input: &Input<'_>, | 579 | 22.2k | sid: &mut StateID, | 580 | 22.2k | mat: &mut Option<HalfMatch>, | 581 | 22.2k | ) -> Result<(), MatchError> { | 582 | 22.2k | let sp = input.get_span(); | 583 | 22.2k | match input.haystack().get(sp.end) { | 584 | 0 | Some(&b) => { | 585 | 0 | *sid = dfa.next_state(*sid, b); | 586 | 0 | if dfa.is_match_state(*sid) { | 587 | 0 | let pattern = dfa.match_pattern(*sid, 0); | 588 | 0 | *mat = Some(HalfMatch::new(pattern, sp.end)); | 589 | 0 | } else if dfa.is_quit_state(*sid) { | 590 | 0 | return Err(MatchError::quit(b, sp.end)); | 591 | 0 | } | 592 | | } | 593 | | None => { | 594 | 22.2k | *sid = dfa.next_eoi_state(*sid); | 595 | 22.2k | if dfa.is_match_state(*sid) { | 596 | 10.6k | let pattern = dfa.match_pattern(*sid, 0); | 597 | 10.6k | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); | 598 | 11.6k | } | 599 | | } | 600 | | } | 601 | 22.2k | Ok(()) | 602 | 22.2k | } |
regex_automata::dfa::search::eoi_fwd::<®ex_automata::dfa::dense::DFA<&[u32]>> Line | Count | Source | 576 | 12.1k | fn eoi_fwd<A: Automaton + ?Sized>( | 577 | 12.1k | dfa: &A, | 578 | 12.1k | input: &Input<'_>, | 579 | 12.1k | sid: &mut StateID, | 580 | 12.1k | mat: &mut Option<HalfMatch>, | 581 | 12.1k | ) -> Result<(), MatchError> { | 582 | 12.1k | let sp = input.get_span(); | 583 | 12.1k | match input.haystack().get(sp.end) { | 584 | 0 | Some(&b) => { | 585 | 0 | *sid = dfa.next_state(*sid, b); | 586 | 0 | if dfa.is_match_state(*sid) { | 587 | 0 | let pattern = dfa.match_pattern(*sid, 0); | 588 | 0 | *mat = Some(HalfMatch::new(pattern, sp.end)); | 589 | 0 | } else if dfa.is_quit_state(*sid) { | 590 | 0 | return Err(MatchError::quit(b, sp.end)); | 591 | 0 | } | 592 | | } | 593 | | None => { | 594 | 12.1k | *sid = dfa.next_eoi_state(*sid); | 595 | 12.1k | if dfa.is_match_state(*sid) { | 596 | 28 | let pattern = dfa.match_pattern(*sid, 0); | 597 | 28 | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); | 598 | 12.1k | } | 599 | | } | 600 | | } | 601 | 12.1k | Ok(()) | 602 | 12.1k | } |
regex_automata::dfa::search::eoi_fwd::<®ex_automata::dfa::sparse::DFA<&[u8]>> Line | Count | Source | 576 | 8.05k | fn eoi_fwd<A: Automaton + ?Sized>( | 577 | 8.05k | dfa: &A, | 578 | 8.05k | input: &Input<'_>, | 579 | 8.05k | sid: &mut StateID, | 580 | 8.05k | mat: &mut Option<HalfMatch>, | 581 | 8.05k | ) -> Result<(), MatchError> { | 582 | 8.05k | let sp = input.get_span(); | 583 | 8.05k | match input.haystack().get(sp.end) { | 584 | 0 | Some(&b) => { | 585 | 0 | *sid = dfa.next_state(*sid, b); | 586 | 0 | if dfa.is_match_state(*sid) { | 587 | 0 | let pattern = dfa.match_pattern(*sid, 0); | 588 | 0 | *mat = Some(HalfMatch::new(pattern, sp.end)); | 589 | 0 | } else if dfa.is_quit_state(*sid) { | 590 | 0 | return Err(MatchError::quit(b, sp.end)); | 591 | 0 | } | 592 | | } | 593 | | None => { | 594 | 8.05k | *sid = dfa.next_eoi_state(*sid); | 595 | 8.05k | if dfa.is_match_state(*sid) { | 596 | 109 | let pattern = dfa.match_pattern(*sid, 0); | 597 | 109 | *mat = Some(HalfMatch::new(pattern, input.haystack().len())); | 598 | 7.94k | } | 599 | | } | 600 | | } | 601 | 8.05k | Ok(()) | 602 | 8.05k | } |
|
603 | | |
604 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
605 | 2.80k | fn eoi_rev<A: Automaton + ?Sized>( |
606 | 2.80k | dfa: &A, |
607 | 2.80k | input: &Input<'_>, |
608 | 2.80k | sid: &mut StateID, |
609 | 2.80k | mat: &mut Option<HalfMatch>, |
610 | 2.80k | ) -> Result<(), MatchError> { |
611 | 2.80k | let sp = input.get_span(); |
612 | 2.80k | if sp.start > 0 { |
613 | 0 | let byte = input.haystack()[sp.start - 1]; |
614 | 0 | *sid = dfa.next_state(*sid, byte); |
615 | 0 | if dfa.is_match_state(*sid) { |
616 | 0 | let pattern = dfa.match_pattern(*sid, 0); |
617 | 0 | *mat = Some(HalfMatch::new(pattern, sp.start)); |
618 | 0 | } else if dfa.is_quit_state(*sid) { |
619 | 0 | return Err(MatchError::quit(byte, sp.start - 1)); |
620 | 0 | } |
621 | | } else { |
622 | 2.80k | *sid = dfa.next_eoi_state(*sid); |
623 | 2.80k | if dfa.is_match_state(*sid) { |
624 | 1.60k | let pattern = dfa.match_pattern(*sid, 0); |
625 | 1.60k | *mat = Some(HalfMatch::new(pattern, 0)); |
626 | 1.60k | } |
627 | | } |
628 | 2.80k | Ok(()) |
629 | 2.80k | } |
630 | | |
631 | | /// Re-compute the starting state that a DFA should be in after finding a |
632 | | /// prefilter candidate match at the position `at`. |
633 | | /// |
634 | | /// The function with the same name has a bit more docs in hybrid/search.rs. |
635 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
636 | 27.9k | fn prefilter_restart<A: Automaton + ?Sized>( |
637 | 27.9k | dfa: &A, |
638 | 27.9k | input: &Input<'_>, |
639 | 27.9k | at: usize, |
640 | 27.9k | ) -> Result<StateID, MatchError> { |
641 | 27.9k | let mut input = input.clone(); |
642 | 27.9k | input.set_start(at); |
643 | 27.9k | init_fwd(dfa, &input) |
644 | 27.9k | } Unexecuted instantiation: regex_automata::dfa::search::prefilter_restart::<regex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> regex_automata::dfa::search::prefilter_restart::<®ex_automata::dfa::dense::DFA<alloc::vec::Vec<u32>>> Line | Count | Source | 636 | 27.9k | fn prefilter_restart<A: Automaton + ?Sized>( | 637 | 27.9k | dfa: &A, | 638 | 27.9k | input: &Input<'_>, | 639 | 27.9k | at: usize, | 640 | 27.9k | ) -> Result<StateID, MatchError> { | 641 | 27.9k | let mut input = input.clone(); | 642 | 27.9k | input.set_start(at); | 643 | 27.9k | init_fwd(dfa, &input) | 644 | 27.9k | } |
Unexecuted instantiation: regex_automata::dfa::search::prefilter_restart::<®ex_automata::dfa::dense::DFA<&[u32]>> Unexecuted instantiation: regex_automata::dfa::search::prefilter_restart::<®ex_automata::dfa::sparse::DFA<&[u8]>> |