/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.4.9/src/meta/wrappers.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | This module contains a boat load of wrappers around each of our internal regex |
3 | | engines. They encapsulate a few things: |
4 | | |
5 | | 1. The wrappers manage the conditional existence of the regex engine. Namely, |
6 | | the PikeVM is the only required regex engine. The rest are optional. These |
7 | | wrappers present a uniform API regardless of which engines are available. And |
8 | | availability might be determined by compile time features or by dynamic |
9 | | configuration via `meta::Config`. Encapsulating the conditional compilation |
10 | | features is in particular a huge simplification for the higher level code that |
11 | | composes these engines. |
12 | | 2. The wrappers manage construction of each engine, including skipping it if |
13 | | the engine is unavailable or configured to not be used. |
14 | | 3. The wrappers manage whether an engine *can* be used for a particular |
15 | | search configuration. For example, `BoundedBacktracker::get` only returns a |
16 | | backtracking engine when the haystack is bigger than the maximum supported |
17 | | length. The wrappers also sometimes take a position on when an engine *ought* |
18 | | to be used, but only in cases where the logic is extremely local to the engine |
19 | | itself. Otherwise, things like "choose between the backtracker and the one-pass |
20 | | DFA" are managed by the higher level meta strategy code. |
21 | | |
22 | | There are also corresponding wrappers for the various `Cache` types for each |
23 | | regex engine that needs them. If an engine is unavailable or not used, then a |
24 | | cache for it will *not* actually be allocated. |
25 | | */ |
26 | | |
27 | | use alloc::vec::Vec; |
28 | | |
29 | | use crate::{ |
30 | | meta::{ |
31 | | error::{BuildError, RetryError, RetryFailError}, |
32 | | regex::RegexInfo, |
33 | | }, |
34 | | nfa::thompson::{pikevm, NFA}, |
35 | | util::{prefilter::Prefilter, primitives::NonMaxUsize}, |
36 | | HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, |
37 | | }; |
38 | | |
39 | | #[cfg(feature = "dfa-build")] |
40 | | use crate::dfa; |
41 | | #[cfg(feature = "dfa-onepass")] |
42 | | use crate::dfa::onepass; |
43 | | #[cfg(feature = "hybrid")] |
44 | | use crate::hybrid; |
45 | | #[cfg(feature = "nfa-backtrack")] |
46 | | use crate::nfa::thompson::backtrack; |
47 | | |
48 | | #[derive(Debug)] |
49 | | pub(crate) struct PikeVM(PikeVMEngine); |
50 | | |
51 | | impl PikeVM { |
52 | 0 | pub(crate) fn new( |
53 | 0 | info: &RegexInfo, |
54 | 0 | pre: Option<Prefilter>, |
55 | 0 | nfa: &NFA, |
56 | 0 | ) -> Result<PikeVM, BuildError> { |
57 | 0 | PikeVMEngine::new(info, pre, nfa).map(PikeVM) |
58 | 0 | } |
59 | | |
60 | 0 | pub(crate) fn create_cache(&self) -> PikeVMCache { |
61 | 0 | PikeVMCache::new(self) |
62 | 0 | } |
63 | | |
64 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
65 | 0 | pub(crate) fn get(&self) -> &PikeVMEngine { |
66 | 0 | &self.0 |
67 | 0 | } |
68 | | } |
69 | | |
70 | | #[derive(Debug)] |
71 | | pub(crate) struct PikeVMEngine(pikevm::PikeVM); |
72 | | |
73 | | impl PikeVMEngine { |
74 | 0 | pub(crate) fn new( |
75 | 0 | info: &RegexInfo, |
76 | 0 | pre: Option<Prefilter>, |
77 | 0 | nfa: &NFA, |
78 | 0 | ) -> Result<PikeVMEngine, BuildError> { |
79 | 0 | let pikevm_config = pikevm::Config::new() |
80 | 0 | .match_kind(info.config().get_match_kind()) |
81 | 0 | .prefilter(pre); |
82 | 0 | let engine = pikevm::Builder::new() |
83 | 0 | .configure(pikevm_config) |
84 | 0 | .build_from_nfa(nfa.clone()) |
85 | 0 | .map_err(BuildError::nfa)?; |
86 | | debug!("PikeVM built"); |
87 | 0 | Ok(PikeVMEngine(engine)) |
88 | 0 | } |
89 | | |
90 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
91 | 0 | pub(crate) fn is_match( |
92 | 0 | &self, |
93 | 0 | cache: &mut PikeVMCache, |
94 | 0 | input: &Input<'_>, |
95 | 0 | ) -> bool { |
96 | 0 | self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) |
97 | 0 | } |
98 | | |
99 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
100 | 0 | pub(crate) fn search_slots( |
101 | 0 | &self, |
102 | 0 | cache: &mut PikeVMCache, |
103 | 0 | input: &Input<'_>, |
104 | 0 | slots: &mut [Option<NonMaxUsize>], |
105 | 0 | ) -> Option<PatternID> { |
106 | 0 | self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) |
107 | 0 | } |
108 | | |
109 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
110 | 0 | pub(crate) fn which_overlapping_matches( |
111 | 0 | &self, |
112 | 0 | cache: &mut PikeVMCache, |
113 | 0 | input: &Input<'_>, |
114 | 0 | patset: &mut PatternSet, |
115 | 0 | ) { |
116 | 0 | self.0.which_overlapping_matches( |
117 | 0 | cache.0.as_mut().unwrap(), |
118 | 0 | input, |
119 | 0 | patset, |
120 | 0 | ) |
121 | 0 | } |
122 | | } |
123 | | |
124 | | #[derive(Clone, Debug)] |
125 | | pub(crate) struct PikeVMCache(Option<pikevm::Cache>); |
126 | | |
127 | | impl PikeVMCache { |
128 | 0 | pub(crate) fn none() -> PikeVMCache { |
129 | 0 | PikeVMCache(None) |
130 | 0 | } |
131 | | |
132 | 0 | pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { |
133 | 0 | PikeVMCache(Some(builder.get().0.create_cache())) |
134 | 0 | } |
135 | | |
136 | 0 | pub(crate) fn reset(&mut self, builder: &PikeVM) { |
137 | 0 | self.0.as_mut().unwrap().reset(&builder.get().0); |
138 | 0 | } |
139 | | |
140 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
141 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
142 | 0 | } |
143 | | } |
144 | | |
145 | | #[derive(Debug)] |
146 | | pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>); |
147 | | |
148 | | impl BoundedBacktracker { |
149 | 0 | pub(crate) fn new( |
150 | 0 | info: &RegexInfo, |
151 | 0 | pre: Option<Prefilter>, |
152 | 0 | nfa: &NFA, |
153 | 0 | ) -> Result<BoundedBacktracker, BuildError> { |
154 | 0 | BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) |
155 | 0 | } |
156 | | |
157 | 0 | pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { |
158 | 0 | BoundedBacktrackerCache::new(self) |
159 | 0 | } |
160 | | |
161 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
162 | 0 | pub(crate) fn get( |
163 | 0 | &self, |
164 | 0 | input: &Input<'_>, |
165 | 0 | ) -> Option<&BoundedBacktrackerEngine> { |
166 | 0 | let engine = self.0.as_ref()?; |
167 | | // It is difficult to make the backtracker give up early if it is |
168 | | // guaranteed to eventually wind up in a match state. This is because |
169 | | // of the greedy nature of a backtracker: it just blindly mushes |
170 | | // forward. Every other regex engine is able to give up more quickly, |
171 | | // so even if the backtracker might be able to zip through faster than |
172 | | // (say) the PikeVM, we prefer the theoretical benefit that some other |
173 | | // engine might be able to scan much less of the haystack than the |
174 | | // backtracker. |
175 | | // |
176 | | // Now, if the haystack is really short already, then we allow the |
177 | | // backtracker to run. (This hasn't been litigated quantitatively with |
178 | | // benchmarks. Just a hunch.) |
179 | 0 | if input.get_earliest() && input.haystack().len() > 128 { |
180 | 0 | return None; |
181 | 0 | } |
182 | 0 | // If the backtracker is just going to return an error because the |
183 | 0 | // haystack is too long, then obviously do not use it. |
184 | 0 | if input.get_span().len() > engine.max_haystack_len() { |
185 | 0 | return None; |
186 | 0 | } |
187 | 0 | Some(engine) |
188 | 0 | } |
189 | | } |
190 | | |
191 | | #[derive(Debug)] |
192 | | pub(crate) struct BoundedBacktrackerEngine( |
193 | | #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, |
194 | | #[cfg(not(feature = "nfa-backtrack"))] (), |
195 | | ); |
196 | | |
197 | | impl BoundedBacktrackerEngine { |
198 | 0 | pub(crate) fn new( |
199 | 0 | info: &RegexInfo, |
200 | 0 | pre: Option<Prefilter>, |
201 | 0 | nfa: &NFA, |
202 | 0 | ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> { |
203 | 0 | #[cfg(feature = "nfa-backtrack")] |
204 | 0 | { |
205 | 0 | if !info.config().get_backtrack() |
206 | 0 | || info.config().get_match_kind() != MatchKind::LeftmostFirst |
207 | | { |
208 | 0 | return Ok(None); |
209 | 0 | } |
210 | 0 | let backtrack_config = backtrack::Config::new().prefilter(pre); |
211 | 0 | let engine = backtrack::Builder::new() |
212 | 0 | .configure(backtrack_config) |
213 | 0 | .build_from_nfa(nfa.clone()) |
214 | 0 | .map_err(BuildError::nfa)?; |
215 | | debug!( |
216 | | "BoundedBacktracker built (max haystack length: {:?})", |
217 | | engine.max_haystack_len() |
218 | | ); |
219 | 0 | Ok(Some(BoundedBacktrackerEngine(engine))) |
220 | | } |
221 | | #[cfg(not(feature = "nfa-backtrack"))] |
222 | | { |
223 | | Ok(None) |
224 | | } |
225 | 0 | } |
226 | | |
227 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
228 | 0 | pub(crate) fn is_match( |
229 | 0 | &self, |
230 | 0 | cache: &mut BoundedBacktrackerCache, |
231 | 0 | input: &Input<'_>, |
232 | 0 | ) -> bool { |
233 | 0 | #[cfg(feature = "nfa-backtrack")] |
234 | 0 | { |
235 | 0 | // OK because we only permit access to this engine when we know |
236 | 0 | // the haystack is short enough for the backtracker to run without |
237 | 0 | // reporting an error. |
238 | 0 | self.0 |
239 | 0 | .try_is_match(cache.0.as_mut().unwrap(), input.clone()) |
240 | 0 | .unwrap() |
241 | 0 | } |
242 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
243 | 0 | { |
244 | 0 | // Impossible to reach because this engine is never constructed |
245 | 0 | // if the requisite features aren't enabled. |
246 | 0 | unreachable!() |
247 | 0 | } |
248 | 0 | } |
249 | | |
250 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
251 | 0 | pub(crate) fn search_slots( |
252 | 0 | &self, |
253 | 0 | cache: &mut BoundedBacktrackerCache, |
254 | 0 | input: &Input<'_>, |
255 | 0 | slots: &mut [Option<NonMaxUsize>], |
256 | 0 | ) -> Option<PatternID> { |
257 | 0 | #[cfg(feature = "nfa-backtrack")] |
258 | 0 | { |
259 | 0 | // OK because we only permit access to this engine when we know |
260 | 0 | // the haystack is short enough for the backtracker to run without |
261 | 0 | // reporting an error. |
262 | 0 | self.0 |
263 | 0 | .try_search_slots(cache.0.as_mut().unwrap(), input, slots) |
264 | 0 | .unwrap() |
265 | 0 | } |
266 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
267 | 0 | { |
268 | 0 | // Impossible to reach because this engine is never constructed |
269 | 0 | // if the requisite features aren't enabled. |
270 | 0 | unreachable!() |
271 | 0 | } |
272 | 0 | } |
273 | | |
274 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
275 | 0 | fn max_haystack_len(&self) -> usize { |
276 | 0 | #[cfg(feature = "nfa-backtrack")] |
277 | 0 | { |
278 | 0 | self.0.max_haystack_len() |
279 | 0 | } |
280 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
281 | 0 | { |
282 | 0 | // Impossible to reach because this engine is never constructed |
283 | 0 | // if the requisite features aren't enabled. |
284 | 0 | unreachable!() |
285 | 0 | } |
286 | 0 | } |
287 | | } |
288 | | |
289 | | #[derive(Clone, Debug)] |
290 | | pub(crate) struct BoundedBacktrackerCache( |
291 | | #[cfg(feature = "nfa-backtrack")] Option<backtrack::Cache>, |
292 | | #[cfg(not(feature = "nfa-backtrack"))] (), |
293 | | ); |
294 | | |
295 | | impl BoundedBacktrackerCache { |
296 | 0 | pub(crate) fn none() -> BoundedBacktrackerCache { |
297 | 0 | #[cfg(feature = "nfa-backtrack")] |
298 | 0 | { |
299 | 0 | BoundedBacktrackerCache(None) |
300 | 0 | } |
301 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
302 | 0 | { |
303 | 0 | BoundedBacktrackerCache(()) |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | 0 | pub(crate) fn new( |
308 | 0 | builder: &BoundedBacktracker, |
309 | 0 | ) -> BoundedBacktrackerCache { |
310 | 0 | #[cfg(feature = "nfa-backtrack")] |
311 | 0 | { |
312 | 0 | BoundedBacktrackerCache( |
313 | 0 | builder.0.as_ref().map(|e| e.0.create_cache()), |
314 | 0 | ) |
315 | 0 | } |
316 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
317 | 0 | { |
318 | 0 | BoundedBacktrackerCache(()) |
319 | 0 | } |
320 | 0 | } |
321 | | |
322 | 0 | pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { |
323 | | #[cfg(feature = "nfa-backtrack")] |
324 | 0 | if let Some(ref e) = builder.0 { |
325 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
330 | 0 | #[cfg(feature = "nfa-backtrack")] |
331 | 0 | { |
332 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
333 | 0 | } |
334 | 0 | #[cfg(not(feature = "nfa-backtrack"))] |
335 | 0 | { |
336 | 0 | 0 |
337 | 0 | } |
338 | 0 | } |
339 | | } |
340 | | |
341 | | #[derive(Debug)] |
342 | | pub(crate) struct OnePass(Option<OnePassEngine>); |
343 | | |
344 | | impl OnePass { |
345 | 0 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { |
346 | 0 | OnePass(OnePassEngine::new(info, nfa)) |
347 | 0 | } |
348 | | |
349 | 0 | pub(crate) fn create_cache(&self) -> OnePassCache { |
350 | 0 | OnePassCache::new(self) |
351 | 0 | } |
352 | | |
353 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
354 | 0 | pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { |
355 | 0 | let engine = self.0.as_ref()?; |
356 | 0 | if !input.get_anchored().is_anchored() |
357 | 0 | && !engine.get_nfa().is_always_start_anchored() |
358 | | { |
359 | 0 | return None; |
360 | 0 | } |
361 | 0 | Some(engine) |
362 | 0 | } |
363 | | |
364 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
365 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
366 | 0 | } |
367 | | } |
368 | | |
369 | | #[derive(Debug)] |
370 | | pub(crate) struct OnePassEngine( |
371 | | #[cfg(feature = "dfa-onepass")] onepass::DFA, |
372 | | #[cfg(not(feature = "dfa-onepass"))] (), |
373 | | ); |
374 | | |
375 | | impl OnePassEngine { |
376 | 0 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> { |
377 | 0 | #[cfg(feature = "dfa-onepass")] |
378 | 0 | { |
379 | 0 | if !info.config().get_onepass() { |
380 | 0 | return None; |
381 | 0 | } |
382 | 0 | // In order to even attempt building a one-pass DFA, we require |
383 | 0 | // that we either have at least one explicit capturing group or |
384 | 0 | // there's a Unicode word boundary somewhere. If we don't have |
385 | 0 | // either of these things, then the lazy DFA will almost certainly |
386 | 0 | // be useable and be much faster. The only case where it might |
387 | 0 | // not is if the lazy DFA isn't utilizing its cache effectively, |
388 | 0 | // but in those cases, the underlying regex is almost certainly |
389 | 0 | // not one-pass or is too big to fit within the current one-pass |
390 | 0 | // implementation limits. |
391 | 0 | if info.props_union().explicit_captures_len() == 0 |
392 | 0 | && !info.props_union().look_set().contains_word_unicode() |
393 | | { |
394 | | debug!("not building OnePass because it isn't worth it"); |
395 | 0 | return None; |
396 | 0 | } |
397 | 0 | let onepass_config = onepass::Config::new() |
398 | 0 | .match_kind(info.config().get_match_kind()) |
399 | 0 | // Like for the lazy DFA, we unconditionally enable this |
400 | 0 | // because it doesn't cost much and makes the API more |
401 | 0 | // flexible. |
402 | 0 | .starts_for_each_pattern(true) |
403 | 0 | .byte_classes(info.config().get_byte_classes()) |
404 | 0 | .size_limit(info.config().get_onepass_size_limit()); |
405 | 0 | let result = onepass::Builder::new() |
406 | 0 | .configure(onepass_config) |
407 | 0 | .build_from_nfa(nfa.clone()); |
408 | 0 | let engine = match result { |
409 | 0 | Ok(engine) => engine, |
410 | 0 | Err(_err) => { |
411 | 0 | debug!("OnePass failed to build: {}", _err); |
412 | 0 | return None; |
413 | | } |
414 | | }; |
415 | | debug!("OnePass built, {} bytes", engine.memory_usage()); |
416 | 0 | Some(OnePassEngine(engine)) |
417 | | } |
418 | | #[cfg(not(feature = "dfa-onepass"))] |
419 | | { |
420 | | None |
421 | | } |
422 | 0 | } |
423 | | |
424 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
425 | 0 | pub(crate) fn search_slots( |
426 | 0 | &self, |
427 | 0 | cache: &mut OnePassCache, |
428 | 0 | input: &Input<'_>, |
429 | 0 | slots: &mut [Option<NonMaxUsize>], |
430 | 0 | ) -> Option<PatternID> { |
431 | 0 | #[cfg(feature = "dfa-onepass")] |
432 | 0 | { |
433 | 0 | // OK because we only permit getting a OnePassEngine when we know |
434 | 0 | // the search is anchored and thus an error cannot occur. |
435 | 0 | self.0 |
436 | 0 | .try_search_slots(cache.0.as_mut().unwrap(), input, slots) |
437 | 0 | .unwrap() |
438 | 0 | } |
439 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
440 | 0 | { |
441 | 0 | // Impossible to reach because this engine is never constructed |
442 | 0 | // if the requisite features aren't enabled. |
443 | 0 | unreachable!() |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
448 | 0 | #[cfg(feature = "dfa-onepass")] |
449 | 0 | { |
450 | 0 | self.0.memory_usage() |
451 | 0 | } |
452 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
453 | 0 | { |
454 | 0 | // Impossible to reach because this engine is never constructed |
455 | 0 | // if the requisite features aren't enabled. |
456 | 0 | unreachable!() |
457 | 0 | } |
458 | 0 | } |
459 | | |
460 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
461 | 0 | fn get_nfa(&self) -> &NFA { |
462 | 0 | #[cfg(feature = "dfa-onepass")] |
463 | 0 | { |
464 | 0 | self.0.get_nfa() |
465 | 0 | } |
466 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
467 | 0 | { |
468 | 0 | // Impossible to reach because this engine is never constructed |
469 | 0 | // if the requisite features aren't enabled. |
470 | 0 | unreachable!() |
471 | 0 | } |
472 | 0 | } |
473 | | } |
474 | | |
475 | | #[derive(Clone, Debug)] |
476 | | pub(crate) struct OnePassCache( |
477 | | #[cfg(feature = "dfa-onepass")] Option<onepass::Cache>, |
478 | | #[cfg(not(feature = "dfa-onepass"))] (), |
479 | | ); |
480 | | |
481 | | impl OnePassCache { |
482 | 0 | pub(crate) fn none() -> OnePassCache { |
483 | 0 | #[cfg(feature = "dfa-onepass")] |
484 | 0 | { |
485 | 0 | OnePassCache(None) |
486 | 0 | } |
487 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
488 | 0 | { |
489 | 0 | OnePassCache(()) |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | 0 | pub(crate) fn new(builder: &OnePass) -> OnePassCache { |
494 | 0 | #[cfg(feature = "dfa-onepass")] |
495 | 0 | { |
496 | 0 | OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
497 | 0 | } |
498 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
499 | 0 | { |
500 | 0 | OnePassCache(()) |
501 | 0 | } |
502 | 0 | } |
503 | | |
504 | 0 | pub(crate) fn reset(&mut self, builder: &OnePass) { |
505 | | #[cfg(feature = "dfa-onepass")] |
506 | 0 | if let Some(ref e) = builder.0 { |
507 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
508 | 0 | } |
509 | 0 | } |
510 | | |
511 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
512 | 0 | #[cfg(feature = "dfa-onepass")] |
513 | 0 | { |
514 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
515 | 0 | } |
516 | 0 | #[cfg(not(feature = "dfa-onepass"))] |
517 | 0 | { |
518 | 0 | 0 |
519 | 0 | } |
520 | 0 | } |
521 | | } |
522 | | |
523 | | #[derive(Debug)] |
524 | | pub(crate) struct Hybrid(Option<HybridEngine>); |
525 | | |
526 | | impl Hybrid { |
527 | 0 | pub(crate) fn none() -> Hybrid { |
528 | 0 | Hybrid(None) |
529 | 0 | } |
530 | | |
531 | 0 | pub(crate) fn new( |
532 | 0 | info: &RegexInfo, |
533 | 0 | pre: Option<Prefilter>, |
534 | 0 | nfa: &NFA, |
535 | 0 | nfarev: &NFA, |
536 | 0 | ) -> Hybrid { |
537 | 0 | Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) |
538 | 0 | } |
539 | | |
540 | 0 | pub(crate) fn create_cache(&self) -> HybridCache { |
541 | 0 | HybridCache::new(self) |
542 | 0 | } |
543 | | |
544 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
545 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { |
546 | 0 | let engine = self.0.as_ref()?; |
547 | 0 | Some(engine) |
548 | 0 | } |
549 | | |
550 | 0 | pub(crate) fn is_some(&self) -> bool { |
551 | 0 | self.0.is_some() |
552 | 0 | } |
553 | | } |
554 | | |
555 | | #[derive(Debug)] |
556 | | pub(crate) struct HybridEngine( |
557 | | #[cfg(feature = "hybrid")] hybrid::regex::Regex, |
558 | | #[cfg(not(feature = "hybrid"))] (), |
559 | | ); |
560 | | |
561 | | impl HybridEngine { |
562 | 0 | pub(crate) fn new( |
563 | 0 | info: &RegexInfo, |
564 | 0 | pre: Option<Prefilter>, |
565 | 0 | nfa: &NFA, |
566 | 0 | nfarev: &NFA, |
567 | 0 | ) -> Option<HybridEngine> { |
568 | 0 | #[cfg(feature = "hybrid")] |
569 | 0 | { |
570 | 0 | if !info.config().get_hybrid() { |
571 | 0 | return None; |
572 | 0 | } |
573 | 0 | let dfa_config = hybrid::dfa::Config::new() |
574 | 0 | .match_kind(info.config().get_match_kind()) |
575 | 0 | .prefilter(pre.clone()) |
576 | 0 | // Enabling this is necessary for ensuring we can service any |
577 | 0 | // kind of 'Input' search without error. For the lazy DFA, |
578 | 0 | // this is not particularly costly, since the start states are |
579 | 0 | // generated lazily. |
580 | 0 | .starts_for_each_pattern(true) |
581 | 0 | .byte_classes(info.config().get_byte_classes()) |
582 | 0 | .unicode_word_boundary(true) |
583 | 0 | .specialize_start_states(pre.is_some()) |
584 | 0 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
585 | 0 | // This makes it possible for building a lazy DFA to |
586 | 0 | // fail even though the NFA has already been built. Namely, |
587 | 0 | // if the cache capacity is too small to fit some minimum |
588 | 0 | // number of states (which is small, like 4 or 5), then the |
589 | 0 | // DFA will refuse to build. |
590 | 0 | // |
591 | 0 | // We shouldn't enable this to make building always work, since |
592 | 0 | // this could cause the allocation of a cache bigger than the |
593 | 0 | // provided capacity amount. |
594 | 0 | // |
595 | 0 | // This is effectively the only reason why building a lazy DFA |
596 | 0 | // could fail. If it does, then we simply suppress the error |
597 | 0 | // and return None. |
598 | 0 | .skip_cache_capacity_check(false) |
599 | 0 | // This and enabling heuristic Unicode word boundary support |
600 | 0 | // above make it so the lazy DFA can quit at match time. |
601 | 0 | .minimum_cache_clear_count(Some(3)) |
602 | 0 | .minimum_bytes_per_state(Some(10)); |
603 | 0 | let result = hybrid::dfa::Builder::new() |
604 | 0 | .configure(dfa_config.clone()) |
605 | 0 | .build_from_nfa(nfa.clone()); |
606 | 0 | let fwd = match result { |
607 | 0 | Ok(fwd) => fwd, |
608 | 0 | Err(_err) => { |
609 | 0 | debug!("forward lazy DFA failed to build: {}", _err); |
610 | 0 | return None; |
611 | | } |
612 | | }; |
613 | 0 | let result = hybrid::dfa::Builder::new() |
614 | 0 | .configure( |
615 | 0 | dfa_config |
616 | 0 | .clone() |
617 | 0 | .match_kind(MatchKind::All) |
618 | 0 | .prefilter(None) |
619 | 0 | .specialize_start_states(false), |
620 | 0 | ) |
621 | 0 | .build_from_nfa(nfarev.clone()); |
622 | 0 | let rev = match result { |
623 | 0 | Ok(rev) => rev, |
624 | 0 | Err(_err) => { |
625 | 0 | debug!("reverse lazy DFA failed to build: {}", _err); |
626 | 0 | return None; |
627 | | } |
628 | | }; |
629 | 0 | let engine = |
630 | 0 | hybrid::regex::Builder::new().build_from_dfas(fwd, rev); |
631 | 0 | debug!("lazy DFA built"); |
632 | 0 | Some(HybridEngine(engine)) |
633 | | } |
634 | | #[cfg(not(feature = "hybrid"))] |
635 | | { |
636 | | None |
637 | | } |
638 | 0 | } |
639 | | |
640 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
641 | 0 | pub(crate) fn try_search( |
642 | 0 | &self, |
643 | 0 | cache: &mut HybridCache, |
644 | 0 | input: &Input<'_>, |
645 | 0 | ) -> Result<Option<Match>, RetryFailError> { |
646 | 0 | #[cfg(feature = "hybrid")] |
647 | 0 | { |
648 | 0 | let cache = cache.0.as_mut().unwrap(); |
649 | 0 | self.0.try_search(cache, input).map_err(|e| e.into()) |
650 | 0 | } |
651 | 0 | #[cfg(not(feature = "hybrid"))] |
652 | 0 | { |
653 | 0 | // Impossible to reach because this engine is never constructed |
654 | 0 | // if the requisite features aren't enabled. |
655 | 0 | unreachable!() |
656 | 0 | } |
657 | 0 | } |
658 | | |
659 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
660 | 0 | pub(crate) fn try_search_half_fwd( |
661 | 0 | &self, |
662 | 0 | cache: &mut HybridCache, |
663 | 0 | input: &Input<'_>, |
664 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
665 | 0 | #[cfg(feature = "hybrid")] |
666 | 0 | { |
667 | 0 | let fwd = self.0.forward(); |
668 | 0 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
669 | 0 | fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) |
670 | 0 | } |
671 | 0 | #[cfg(not(feature = "hybrid"))] |
672 | 0 | { |
673 | 0 | // Impossible to reach because this engine is never constructed |
674 | 0 | // if the requisite features aren't enabled. |
675 | 0 | unreachable!() |
676 | 0 | } |
677 | 0 | } |
678 | | |
679 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
680 | 0 | pub(crate) fn try_search_half_fwd_stopat( |
681 | 0 | &self, |
682 | 0 | cache: &mut HybridCache, |
683 | 0 | input: &Input<'_>, |
684 | 0 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
685 | 0 | #[cfg(feature = "hybrid")] |
686 | 0 | { |
687 | 0 | let dfa = self.0.forward(); |
688 | 0 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; |
689 | 0 | crate::meta::stopat::hybrid_try_search_half_fwd( |
690 | 0 | dfa, &mut cache, input, |
691 | 0 | ) |
692 | 0 | } |
693 | 0 | #[cfg(not(feature = "hybrid"))] |
694 | 0 | { |
695 | 0 | // Impossible to reach because this engine is never constructed |
696 | 0 | // if the requisite features aren't enabled. |
697 | 0 | unreachable!() |
698 | 0 | } |
699 | 0 | } |
700 | | |
701 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
702 | 0 | pub(crate) fn try_search_half_rev( |
703 | 0 | &self, |
704 | 0 | cache: &mut HybridCache, |
705 | 0 | input: &Input<'_>, |
706 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
707 | 0 | #[cfg(feature = "hybrid")] |
708 | 0 | { |
709 | 0 | let rev = self.0.reverse(); |
710 | 0 | let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; |
711 | 0 | rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) |
712 | 0 | } |
713 | 0 | #[cfg(not(feature = "hybrid"))] |
714 | 0 | { |
715 | 0 | // Impossible to reach because this engine is never constructed |
716 | 0 | // if the requisite features aren't enabled. |
717 | 0 | unreachable!() |
718 | 0 | } |
719 | 0 | } |
720 | | |
721 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
722 | 0 | pub(crate) fn try_search_half_rev_limited( |
723 | 0 | &self, |
724 | 0 | cache: &mut HybridCache, |
725 | 0 | input: &Input<'_>, |
726 | 0 | min_start: usize, |
727 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
728 | 0 | #[cfg(feature = "hybrid")] |
729 | 0 | { |
730 | 0 | let dfa = self.0.reverse(); |
731 | 0 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; |
732 | 0 | crate::meta::limited::hybrid_try_search_half_rev( |
733 | 0 | dfa, &mut cache, input, min_start, |
734 | 0 | ) |
735 | 0 | } |
736 | 0 | #[cfg(not(feature = "hybrid"))] |
737 | 0 | { |
738 | 0 | // Impossible to reach because this engine is never constructed |
739 | 0 | // if the requisite features aren't enabled. |
740 | 0 | unreachable!() |
741 | 0 | } |
742 | 0 | } |
743 | | |
744 | | #[inline] |
745 | 0 | pub(crate) fn try_which_overlapping_matches( |
746 | 0 | &self, |
747 | 0 | cache: &mut HybridCache, |
748 | 0 | input: &Input<'_>, |
749 | 0 | patset: &mut PatternSet, |
750 | 0 | ) -> Result<(), RetryFailError> { |
751 | 0 | #[cfg(feature = "hybrid")] |
752 | 0 | { |
753 | 0 | let fwd = self.0.forward(); |
754 | 0 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
755 | 0 | fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) |
756 | 0 | .map_err(|e| e.into()) |
757 | 0 | } |
758 | 0 | #[cfg(not(feature = "hybrid"))] |
759 | 0 | { |
760 | 0 | // Impossible to reach because this engine is never constructed |
761 | 0 | // if the requisite features aren't enabled. |
762 | 0 | unreachable!() |
763 | 0 | } |
764 | 0 | } |
765 | | } |
766 | | |
767 | | #[derive(Clone, Debug)] |
768 | | pub(crate) struct HybridCache( |
769 | | #[cfg(feature = "hybrid")] Option<hybrid::regex::Cache>, |
770 | | #[cfg(not(feature = "hybrid"))] (), |
771 | | ); |
772 | | |
773 | | impl HybridCache { |
774 | 0 | pub(crate) fn none() -> HybridCache { |
775 | 0 | #[cfg(feature = "hybrid")] |
776 | 0 | { |
777 | 0 | HybridCache(None) |
778 | 0 | } |
779 | 0 | #[cfg(not(feature = "hybrid"))] |
780 | 0 | { |
781 | 0 | HybridCache(()) |
782 | 0 | } |
783 | 0 | } |
784 | | |
785 | 0 | pub(crate) fn new(builder: &Hybrid) -> HybridCache { |
786 | 0 | #[cfg(feature = "hybrid")] |
787 | 0 | { |
788 | 0 | HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
789 | 0 | } |
790 | 0 | #[cfg(not(feature = "hybrid"))] |
791 | 0 | { |
792 | 0 | HybridCache(()) |
793 | 0 | } |
794 | 0 | } |
795 | | |
796 | 0 | pub(crate) fn reset(&mut self, builder: &Hybrid) { |
797 | | #[cfg(feature = "hybrid")] |
798 | 0 | if let Some(ref e) = builder.0 { |
799 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
800 | 0 | } |
801 | 0 | } |
802 | | |
803 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
804 | 0 | #[cfg(feature = "hybrid")] |
805 | 0 | { |
806 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
807 | 0 | } |
808 | 0 | #[cfg(not(feature = "hybrid"))] |
809 | 0 | { |
810 | 0 | 0 |
811 | 0 | } |
812 | 0 | } |
813 | | } |
814 | | |
815 | | #[derive(Debug)] |
816 | | pub(crate) struct DFA(Option<DFAEngine>); |
817 | | |
818 | | impl DFA { |
819 | 0 | pub(crate) fn none() -> DFA { |
820 | 0 | DFA(None) |
821 | 0 | } |
822 | | |
823 | 0 | pub(crate) fn new( |
824 | 0 | info: &RegexInfo, |
825 | 0 | pre: Option<Prefilter>, |
826 | 0 | nfa: &NFA, |
827 | 0 | nfarev: &NFA, |
828 | 0 | ) -> DFA { |
829 | 0 | DFA(DFAEngine::new(info, pre, nfa, nfarev)) |
830 | 0 | } |
831 | | |
832 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
833 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { |
834 | 0 | let engine = self.0.as_ref()?; |
835 | 0 | Some(engine) |
836 | 0 | } |
837 | | |
838 | 0 | pub(crate) fn is_some(&self) -> bool { |
839 | 0 | self.0.is_some() |
840 | 0 | } |
841 | | |
842 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
843 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
844 | 0 | } |
845 | | } |
846 | | |
847 | | #[derive(Debug)] |
848 | | pub(crate) struct DFAEngine( |
849 | | #[cfg(feature = "dfa-build")] dfa::regex::Regex, |
850 | | #[cfg(not(feature = "dfa-build"))] (), |
851 | | ); |
852 | | |
853 | | impl DFAEngine { |
854 | 0 | pub(crate) fn new( |
855 | 0 | info: &RegexInfo, |
856 | 0 | pre: Option<Prefilter>, |
857 | 0 | nfa: &NFA, |
858 | 0 | nfarev: &NFA, |
859 | 0 | ) -> Option<DFAEngine> { |
860 | 0 | #[cfg(feature = "dfa-build")] |
861 | 0 | { |
862 | 0 | if !info.config().get_dfa() { |
863 | 0 | return None; |
864 | 0 | } |
865 | 0 | // If our NFA is anything but small, don't even bother with a DFA. |
866 | 0 | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
867 | 0 | if nfa.states().len() > state_limit { |
868 | 0 | debug!( |
869 | 0 | "skipping full DFA because NFA has {} states, \ |
870 | 0 | which exceeds the heuristic limit of {}", |
871 | 0 | nfa.states().len(), |
872 | 0 | state_limit, |
873 | 0 | ); |
874 | 0 | return None; |
875 | 0 | } |
876 | 0 | } |
877 | 0 | // We cut the size limit in four because the total heap used by |
878 | 0 | // DFA construction is determinization aux memory and the DFA |
879 | 0 | // itself, and those things are configured independently in the |
880 | 0 | // lower level DFA builder API. And then split that in two because |
881 | 0 | // of forward and reverse DFAs. |
882 | 0 | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); |
883 | 0 | let dfa_config = dfa::dense::Config::new() |
884 | 0 | .match_kind(info.config().get_match_kind()) |
885 | 0 | .prefilter(pre.clone()) |
886 | 0 | // Enabling this is necessary for ensuring we can service any |
887 | 0 | // kind of 'Input' search without error. For the full DFA, this |
888 | 0 | // can be quite costly. But since we have such a small bound |
889 | 0 | // on the size of the DFA, in practice, any multl-regexes are |
890 | 0 | // probably going to blow the limit anyway. |
891 | 0 | .starts_for_each_pattern(true) |
892 | 0 | .byte_classes(info.config().get_byte_classes()) |
893 | 0 | .unicode_word_boundary(true) |
894 | 0 | .specialize_start_states(pre.is_some()) |
895 | 0 | .determinize_size_limit(size_limit) |
896 | 0 | .dfa_size_limit(size_limit); |
897 | 0 | let result = dfa::dense::Builder::new() |
898 | 0 | .configure(dfa_config.clone()) |
899 | 0 | .build_from_nfa(&nfa); |
900 | 0 | let fwd = match result { |
901 | 0 | Ok(fwd) => fwd, |
902 | 0 | Err(_err) => { |
903 | 0 | debug!("forward full DFA failed to build: {}", _err); |
904 | 0 | return None; |
905 | 0 | } |
906 | 0 | }; |
907 | 0 | let result = dfa::dense::Builder::new() |
908 | 0 | .configure( |
909 | 0 | dfa_config |
910 | 0 | .clone() |
911 | 0 | // We never need unanchored reverse searches, so |
912 | 0 | // there's no point in building it into the DFA, which |
913 | 0 | // WILL take more space. (This isn't done for the lazy |
914 | 0 | // DFA because the DFA is, well, lazy. It doesn't pay |
915 | 0 | // the cost for supporting unanchored searches unless |
916 | 0 | // you actually do an unanchored search, which we |
917 | 0 | // don't.) |
918 | 0 | .start_kind(dfa::StartKind::Anchored) |
919 | 0 | .match_kind(MatchKind::All) |
920 | 0 | .prefilter(None) |
921 | 0 | .specialize_start_states(false), |
922 | 0 | ) |
923 | 0 | .build_from_nfa(&nfarev); |
924 | 0 | let rev = match result { |
925 | 0 | Ok(rev) => rev, |
926 | 0 | Err(_err) => { |
927 | 0 | debug!("reverse full DFA failed to build: {}", _err); |
928 | 0 | return None; |
929 | 0 | } |
930 | 0 | }; |
931 | 0 | let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); |
932 | 0 | debug!( |
933 | 0 | "fully compiled forward and reverse DFAs built, {} bytes", |
934 | 0 | engine.forward().memory_usage() |
935 | 0 | + engine.reverse().memory_usage(), |
936 | 0 | ); |
937 | 0 | Some(DFAEngine(engine)) |
938 | 0 | } |
939 | 0 | #[cfg(not(feature = "dfa-build"))] |
940 | 0 | { |
941 | 0 | None |
942 | 0 | } |
943 | 0 | } |
944 | | |
945 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
946 | 0 | pub(crate) fn try_search( |
947 | 0 | &self, |
948 | 0 | input: &Input<'_>, |
949 | 0 | ) -> Result<Option<Match>, RetryFailError> { |
950 | 0 | #[cfg(feature = "dfa-build")] |
951 | 0 | { |
952 | 0 | self.0.try_search(input).map_err(|e| e.into()) |
953 | 0 | } |
954 | 0 | #[cfg(not(feature = "dfa-build"))] |
955 | 0 | { |
956 | 0 | // Impossible to reach because this engine is never constructed |
957 | 0 | // if the requisite features aren't enabled. |
958 | 0 | unreachable!() |
959 | | } |
960 | | } |
961 | | |
962 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
963 | 0 | pub(crate) fn try_search_half_fwd( |
964 | 0 | &self, |
965 | 0 | input: &Input<'_>, |
966 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
967 | 0 | #[cfg(feature = "dfa-build")] |
968 | 0 | { |
969 | 0 | use crate::dfa::Automaton; |
970 | 0 | self.0.forward().try_search_fwd(input).map_err(|e| e.into()) |
971 | 0 | } |
972 | 0 | #[cfg(not(feature = "dfa-build"))] |
973 | 0 | { |
974 | 0 | // Impossible to reach because this engine is never constructed |
975 | 0 | // if the requisite features aren't enabled. |
976 | 0 | unreachable!() |
977 | | } |
978 | | } |
979 | | |
980 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
981 | 0 | pub(crate) fn try_search_half_fwd_stopat( |
982 | 0 | &self, |
983 | 0 | input: &Input<'_>, |
984 | 0 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
985 | 0 | #[cfg(feature = "dfa-build")] |
986 | 0 | { |
987 | 0 | let dfa = self.0.forward(); |
988 | 0 | crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) |
989 | 0 | } |
990 | 0 | #[cfg(not(feature = "dfa-build"))] |
991 | 0 | { |
992 | 0 | // Impossible to reach because this engine is never constructed |
993 | 0 | // if the requisite features aren't enabled. |
994 | 0 | unreachable!() |
995 | | } |
996 | | } |
997 | | |
998 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
999 | 0 | pub(crate) fn try_search_half_rev( |
1000 | 0 | &self, |
1001 | 0 | input: &Input<'_>, |
1002 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
1003 | 0 | #[cfg(feature = "dfa-build")] |
1004 | 0 | { |
1005 | 0 | use crate::dfa::Automaton; |
1006 | 0 | self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) |
1007 | 0 | } |
1008 | 0 | #[cfg(not(feature = "dfa-build"))] |
1009 | 0 | { |
1010 | 0 | // Impossible to reach because this engine is never constructed |
1011 | 0 | // if the requisite features aren't enabled. |
1012 | 0 | unreachable!() |
1013 | | } |
1014 | | } |
1015 | | |
1016 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1017 | 0 | pub(crate) fn try_search_half_rev_limited( |
1018 | 0 | &self, |
1019 | 0 | input: &Input<'_>, |
1020 | 0 | min_start: usize, |
1021 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1022 | 0 | #[cfg(feature = "dfa-build")] |
1023 | 0 | { |
1024 | 0 | let dfa = self.0.reverse(); |
1025 | 0 | crate::meta::limited::dfa_try_search_half_rev( |
1026 | 0 | dfa, input, min_start, |
1027 | 0 | ) |
1028 | 0 | } |
1029 | 0 | #[cfg(not(feature = "dfa-build"))] |
1030 | 0 | { |
1031 | 0 | // Impossible to reach because this engine is never constructed |
1032 | 0 | // if the requisite features aren't enabled. |
1033 | 0 | unreachable!() |
1034 | | } |
1035 | | } |
1036 | | |
1037 | | #[inline] |
1038 | 0 | pub(crate) fn try_which_overlapping_matches( |
1039 | 0 | &self, |
1040 | 0 | input: &Input<'_>, |
1041 | 0 | patset: &mut PatternSet, |
1042 | 0 | ) -> Result<(), RetryFailError> { |
1043 | 0 | #[cfg(feature = "dfa-build")] |
1044 | 0 | { |
1045 | 0 | use crate::dfa::Automaton; |
1046 | 0 | self.0 |
1047 | 0 | .forward() |
1048 | 0 | .try_which_overlapping_matches(input, patset) |
1049 | 0 | .map_err(|e| e.into()) |
1050 | 0 | } |
1051 | 0 | #[cfg(not(feature = "dfa-build"))] |
1052 | 0 | { |
1053 | 0 | // Impossible to reach because this engine is never constructed |
1054 | 0 | // if the requisite features aren't enabled. |
1055 | 0 | unreachable!() |
1056 | | } |
1057 | | } |
1058 | | |
1059 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1060 | 0 | #[cfg(feature = "dfa-build")] |
1061 | 0 | { |
1062 | 0 | self.0.forward().memory_usage() + self.0.reverse().memory_usage() |
1063 | 0 | } |
1064 | 0 | #[cfg(not(feature = "dfa-build"))] |
1065 | 0 | { |
1066 | 0 | // Impossible to reach because this engine is never constructed |
1067 | 0 | // if the requisite features aren't enabled. |
1068 | 0 | unreachable!() |
1069 | | } |
1070 | | } |
1071 | | } |
1072 | | |
1073 | | #[derive(Debug)] |
1074 | | pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>); |
1075 | | |
1076 | | impl ReverseHybrid { |
1077 | 0 | pub(crate) fn none() -> ReverseHybrid { |
1078 | 0 | ReverseHybrid(None) |
1079 | 0 | } |
1080 | | |
1081 | 0 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { |
1082 | 0 | ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) |
1083 | 0 | } |
1084 | | |
1085 | 0 | pub(crate) fn create_cache(&self) -> ReverseHybridCache { |
1086 | 0 | ReverseHybridCache::new(self) |
1087 | 0 | } |
1088 | | |
1089 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1090 | 0 | pub(crate) fn get( |
1091 | 0 | &self, |
1092 | 0 | _input: &Input<'_>, |
1093 | 0 | ) -> Option<&ReverseHybridEngine> { |
1094 | 0 | let engine = self.0.as_ref()?; |
1095 | 0 | Some(engine) |
1096 | 0 | } |
1097 | | } |
1098 | | |
1099 | | #[derive(Debug)] |
1100 | | pub(crate) struct ReverseHybridEngine( |
1101 | | #[cfg(feature = "hybrid")] hybrid::dfa::DFA, |
1102 | | #[cfg(not(feature = "hybrid"))] (), |
1103 | | ); |
1104 | | |
1105 | | impl ReverseHybridEngine { |
1106 | 0 | pub(crate) fn new( |
1107 | 0 | info: &RegexInfo, |
1108 | 0 | nfarev: &NFA, |
1109 | 0 | ) -> Option<ReverseHybridEngine> { |
1110 | 0 | #[cfg(feature = "hybrid")] |
1111 | 0 | { |
1112 | 0 | if !info.config().get_hybrid() { |
1113 | 0 | return None; |
1114 | 0 | } |
1115 | 0 | // Since we only use this for reverse searches, we can hard-code |
1116 | 0 | // a number of things like match semantics, prefilters, starts |
1117 | 0 | // for each pattern and so on. |
1118 | 0 | let dfa_config = hybrid::dfa::Config::new() |
1119 | 0 | .match_kind(MatchKind::All) |
1120 | 0 | .prefilter(None) |
1121 | 0 | .starts_for_each_pattern(false) |
1122 | 0 | .byte_classes(info.config().get_byte_classes()) |
1123 | 0 | .unicode_word_boundary(true) |
1124 | 0 | .specialize_start_states(false) |
1125 | 0 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
1126 | 0 | .skip_cache_capacity_check(false) |
1127 | 0 | .minimum_cache_clear_count(Some(3)) |
1128 | 0 | .minimum_bytes_per_state(Some(10)); |
1129 | 0 | let result = hybrid::dfa::Builder::new() |
1130 | 0 | .configure(dfa_config) |
1131 | 0 | .build_from_nfa(nfarev.clone()); |
1132 | 0 | let rev = match result { |
1133 | 0 | Ok(rev) => rev, |
1134 | 0 | Err(_err) => { |
1135 | 0 | debug!("lazy reverse DFA failed to build: {}", _err); |
1136 | 0 | return None; |
1137 | | } |
1138 | | }; |
1139 | | debug!("lazy reverse DFA built"); |
1140 | 0 | Some(ReverseHybridEngine(rev)) |
1141 | | } |
1142 | | #[cfg(not(feature = "hybrid"))] |
1143 | | { |
1144 | | None |
1145 | | } |
1146 | 0 | } |
1147 | | |
1148 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1149 | 0 | pub(crate) fn try_search_half_rev_limited( |
1150 | 0 | &self, |
1151 | 0 | cache: &mut ReverseHybridCache, |
1152 | 0 | input: &Input<'_>, |
1153 | 0 | min_start: usize, |
1154 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1155 | 0 | #[cfg(feature = "hybrid")] |
1156 | 0 | { |
1157 | 0 | let dfa = &self.0; |
1158 | 0 | let mut cache = cache.0.as_mut().unwrap(); |
1159 | 0 | crate::meta::limited::hybrid_try_search_half_rev( |
1160 | 0 | dfa, &mut cache, input, min_start, |
1161 | 0 | ) |
1162 | 0 | } |
1163 | 0 | #[cfg(not(feature = "hybrid"))] |
1164 | 0 | { |
1165 | 0 | // Impossible to reach because this engine is never constructed |
1166 | 0 | // if the requisite features aren't enabled. |
1167 | 0 | unreachable!() |
1168 | 0 | } |
1169 | 0 | } |
1170 | | } |
1171 | | |
1172 | | #[derive(Clone, Debug)] |
1173 | | pub(crate) struct ReverseHybridCache( |
1174 | | #[cfg(feature = "hybrid")] Option<hybrid::dfa::Cache>, |
1175 | | #[cfg(not(feature = "hybrid"))] (), |
1176 | | ); |
1177 | | |
1178 | | impl ReverseHybridCache { |
1179 | 0 | pub(crate) fn none() -> ReverseHybridCache { |
1180 | 0 | #[cfg(feature = "hybrid")] |
1181 | 0 | { |
1182 | 0 | ReverseHybridCache(None) |
1183 | 0 | } |
1184 | 0 | #[cfg(not(feature = "hybrid"))] |
1185 | 0 | { |
1186 | 0 | ReverseHybridCache(()) |
1187 | 0 | } |
1188 | 0 | } |
1189 | | |
1190 | 0 | pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { |
1191 | 0 | #[cfg(feature = "hybrid")] |
1192 | 0 | { |
1193 | 0 | ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
1194 | 0 | } |
1195 | 0 | #[cfg(not(feature = "hybrid"))] |
1196 | 0 | { |
1197 | 0 | ReverseHybridCache(()) |
1198 | 0 | } |
1199 | 0 | } |
1200 | | |
1201 | 0 | pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { |
1202 | | #[cfg(feature = "hybrid")] |
1203 | 0 | if let Some(ref e) = builder.0 { |
1204 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
1205 | 0 | } |
1206 | 0 | } |
1207 | | |
1208 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1209 | 0 | #[cfg(feature = "hybrid")] |
1210 | 0 | { |
1211 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
1212 | 0 | } |
1213 | 0 | #[cfg(not(feature = "hybrid"))] |
1214 | 0 | { |
1215 | 0 | 0 |
1216 | 0 | } |
1217 | 0 | } |
1218 | | } |
1219 | | |
1220 | | #[derive(Debug)] |
1221 | | pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>); |
1222 | | |
1223 | | impl ReverseDFA { |
1224 | 0 | pub(crate) fn none() -> ReverseDFA { |
1225 | 0 | ReverseDFA(None) |
1226 | 0 | } |
1227 | | |
1228 | 0 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { |
1229 | 0 | ReverseDFA(ReverseDFAEngine::new(info, nfarev)) |
1230 | 0 | } |
1231 | | |
1232 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1233 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { |
1234 | 0 | let engine = self.0.as_ref()?; |
1235 | 0 | Some(engine) |
1236 | 0 | } |
1237 | | |
1238 | 0 | pub(crate) fn is_some(&self) -> bool { |
1239 | 0 | self.0.is_some() |
1240 | 0 | } |
1241 | | |
1242 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1243 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
1244 | 0 | } |
1245 | | } |
1246 | | |
1247 | | #[derive(Debug)] |
1248 | | pub(crate) struct ReverseDFAEngine( |
1249 | | #[cfg(feature = "dfa-build")] dfa::dense::DFA<Vec<u32>>, |
1250 | | #[cfg(not(feature = "dfa-build"))] (), |
1251 | | ); |
1252 | | |
1253 | | impl ReverseDFAEngine { |
1254 | 0 | pub(crate) fn new( |
1255 | 0 | info: &RegexInfo, |
1256 | 0 | nfarev: &NFA, |
1257 | 0 | ) -> Option<ReverseDFAEngine> { |
1258 | 0 | #[cfg(feature = "dfa-build")] |
1259 | 0 | { |
1260 | 0 | if !info.config().get_dfa() { |
1261 | 0 | return None; |
1262 | 0 | } |
1263 | 0 | // If our NFA is anything but small, don't even bother with a DFA. |
1264 | 0 | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
1265 | 0 | if nfarev.states().len() > state_limit { |
1266 | 0 | debug!( |
1267 | 0 | "skipping full reverse DFA because NFA has {} states, \ |
1268 | 0 | which exceeds the heuristic limit of {}", |
1269 | 0 | nfarev.states().len(), |
1270 | 0 | state_limit, |
1271 | 0 | ); |
1272 | 0 | return None; |
1273 | 0 | } |
1274 | 0 | } |
1275 | 0 | // We cut the size limit in two because the total heap used by DFA |
1276 | 0 | // construction is determinization aux memory and the DFA itself, |
1277 | 0 | // and those things are configured independently in the lower level |
1278 | 0 | // DFA builder API. |
1279 | 0 | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); |
1280 | 0 | // Since we only use this for reverse searches, we can hard-code |
1281 | 0 | // a number of things like match semantics, prefilters, starts |
1282 | 0 | // for each pattern and so on. We also disable acceleration since |
1283 | 0 | // it's incompatible with limited searches (which is the only |
1284 | 0 | // operation we support for this kind of engine at the moment). |
1285 | 0 | let dfa_config = dfa::dense::Config::new() |
1286 | 0 | .match_kind(MatchKind::All) |
1287 | 0 | .prefilter(None) |
1288 | 0 | .accelerate(false) |
1289 | 0 | .start_kind(dfa::StartKind::Anchored) |
1290 | 0 | .starts_for_each_pattern(false) |
1291 | 0 | .byte_classes(info.config().get_byte_classes()) |
1292 | 0 | .unicode_word_boundary(true) |
1293 | 0 | .specialize_start_states(false) |
1294 | 0 | .determinize_size_limit(size_limit) |
1295 | 0 | .dfa_size_limit(size_limit); |
1296 | 0 | let result = dfa::dense::Builder::new() |
1297 | 0 | .configure(dfa_config) |
1298 | 0 | .build_from_nfa(&nfarev); |
1299 | 0 | let rev = match result { |
1300 | 0 | Ok(rev) => rev, |
1301 | 0 | Err(_err) => { |
1302 | 0 | debug!("full reverse DFA failed to build: {}", _err); |
1303 | 0 | return None; |
1304 | 0 | } |
1305 | 0 | }; |
1306 | 0 | debug!( |
1307 | 0 | "fully compiled reverse DFA built, {} bytes", |
1308 | 0 | rev.memory_usage() |
1309 | 0 | ); |
1310 | 0 | Some(ReverseDFAEngine(rev)) |
1311 | 0 | } |
1312 | 0 | #[cfg(not(feature = "dfa-build"))] |
1313 | 0 | { |
1314 | 0 | None |
1315 | 0 | } |
1316 | 0 | } |
1317 | | |
1318 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1319 | 0 | pub(crate) fn try_search_half_rev_limited( |
1320 | 0 | &self, |
1321 | 0 | input: &Input<'_>, |
1322 | 0 | min_start: usize, |
1323 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1324 | 0 | #[cfg(feature = "dfa-build")] |
1325 | 0 | { |
1326 | 0 | let dfa = &self.0; |
1327 | 0 | crate::meta::limited::dfa_try_search_half_rev( |
1328 | 0 | dfa, input, min_start, |
1329 | 0 | ) |
1330 | 0 | } |
1331 | 0 | #[cfg(not(feature = "dfa-build"))] |
1332 | 0 | { |
1333 | 0 | // Impossible to reach because this engine is never constructed |
1334 | 0 | // if the requisite features aren't enabled. |
1335 | 0 | unreachable!() |
1336 | | } |
1337 | | } |
1338 | | |
1339 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1340 | 0 | #[cfg(feature = "dfa-build")] |
1341 | 0 | { |
1342 | 0 | self.0.memory_usage() |
1343 | 0 | } |
1344 | 0 | #[cfg(not(feature = "dfa-build"))] |
1345 | 0 | { |
1346 | 0 | // Impossible to reach because this engine is never constructed |
1347 | 0 | // if the requisite features aren't enabled. |
1348 | 0 | unreachable!() |
1349 | | } |
1350 | | } |
1351 | | } |