/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-automata-0.4.13/src/meta/wrappers.rs
Line | Count | Source |
1 | | /*! |
2 | | This module contains a boat load of wrappers around each of our internal regex |
3 | | engines. They encapsulate a few things: |
4 | | |
5 | | 1. The wrappers manage the conditional existence of the regex engine. Namely, |
6 | | the PikeVM is the only required regex engine. The rest are optional. These |
7 | | wrappers present a uniform API regardless of which engines are available. And |
8 | | availability might be determined by compile time features or by dynamic |
9 | | configuration via `meta::Config`. Encapsulating the conditional compilation |
10 | | features is in particular a huge simplification for the higher level code that |
11 | | composes these engines. |
12 | | 2. The wrappers manage construction of each engine, including skipping it if |
13 | | the engine is unavailable or configured to not be used. |
14 | | 3. The wrappers manage whether an engine *can* be used for a particular |
15 | | search configuration. For example, `BoundedBacktracker::get` only returns a |
16 | | backtracking engine when the haystack is bigger than the maximum supported |
17 | | length. The wrappers also sometimes take a position on when an engine *ought* |
18 | | to be used, but only in cases where the logic is extremely local to the engine |
19 | | itself. Otherwise, things like "choose between the backtracker and the one-pass |
20 | | DFA" are managed by the higher level meta strategy code. |
21 | | |
22 | | There are also corresponding wrappers for the various `Cache` types for each |
23 | | regex engine that needs them. If an engine is unavailable or not used, then a |
24 | | cache for it will *not* actually be allocated. |
25 | | */ |
26 | | |
27 | | use alloc::vec::Vec; |
28 | | |
29 | | use crate::{ |
30 | | meta::{ |
31 | | error::{BuildError, RetryError, RetryFailError}, |
32 | | regex::RegexInfo, |
33 | | }, |
34 | | nfa::thompson::{pikevm, NFA}, |
35 | | util::{prefilter::Prefilter, primitives::NonMaxUsize}, |
36 | | HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, |
37 | | }; |
38 | | |
39 | | #[cfg(feature = "dfa-build")] |
40 | | use crate::dfa; |
41 | | #[cfg(feature = "dfa-onepass")] |
42 | | use crate::dfa::onepass; |
43 | | #[cfg(feature = "hybrid")] |
44 | | use crate::hybrid; |
45 | | #[cfg(feature = "nfa-backtrack")] |
46 | | use crate::nfa::thompson::backtrack; |
47 | | |
48 | | #[derive(Debug)] |
49 | | pub(crate) struct PikeVM(PikeVMEngine); |
50 | | |
51 | | impl PikeVM { |
52 | 0 | pub(crate) fn new( |
53 | 0 | info: &RegexInfo, |
54 | 0 | pre: Option<Prefilter>, |
55 | 0 | nfa: &NFA, |
56 | 0 | ) -> Result<PikeVM, BuildError> { |
57 | 0 | PikeVMEngine::new(info, pre, nfa).map(PikeVM) |
58 | 0 | } |
59 | | |
60 | 0 | pub(crate) fn create_cache(&self) -> PikeVMCache { |
61 | 0 | PikeVMCache::none() |
62 | 0 | } |
63 | | |
64 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
65 | 0 | pub(crate) fn get(&self) -> &PikeVMEngine { |
66 | 0 | &self.0 |
67 | 0 | } |
68 | | } |
69 | | |
70 | | #[derive(Debug)] |
71 | | pub(crate) struct PikeVMEngine(pikevm::PikeVM); |
72 | | |
73 | | impl PikeVMEngine { |
74 | 0 | pub(crate) fn new( |
75 | 0 | info: &RegexInfo, |
76 | 0 | pre: Option<Prefilter>, |
77 | 0 | nfa: &NFA, |
78 | 0 | ) -> Result<PikeVMEngine, BuildError> { |
79 | 0 | let pikevm_config = pikevm::Config::new() |
80 | 0 | .match_kind(info.config().get_match_kind()) |
81 | 0 | .prefilter(pre); |
82 | 0 | let engine = pikevm::Builder::new() |
83 | 0 | .configure(pikevm_config) |
84 | 0 | .build_from_nfa(nfa.clone()) |
85 | 0 | .map_err(BuildError::nfa)?; |
86 | | debug!("PikeVM built"); |
87 | 0 | Ok(PikeVMEngine(engine)) |
88 | 0 | } |
89 | | |
90 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
91 | 0 | pub(crate) fn is_match( |
92 | 0 | &self, |
93 | 0 | cache: &mut PikeVMCache, |
94 | 0 | input: &Input<'_>, |
95 | 0 | ) -> bool { |
96 | 0 | self.0.is_match(cache.get(&self.0), input.clone()) |
97 | 0 | } |
98 | | |
99 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
100 | 0 | pub(crate) fn search_slots( |
101 | 0 | &self, |
102 | 0 | cache: &mut PikeVMCache, |
103 | 0 | input: &Input<'_>, |
104 | 0 | slots: &mut [Option<NonMaxUsize>], |
105 | 0 | ) -> Option<PatternID> { |
106 | 0 | self.0.search_slots(cache.get(&self.0), input, slots) |
107 | 0 | } |
108 | | |
109 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
110 | 0 | pub(crate) fn which_overlapping_matches( |
111 | 0 | &self, |
112 | 0 | cache: &mut PikeVMCache, |
113 | 0 | input: &Input<'_>, |
114 | 0 | patset: &mut PatternSet, |
115 | 0 | ) { |
116 | 0 | self.0.which_overlapping_matches(cache.get(&self.0), input, patset) |
117 | 0 | } |
118 | | } |
119 | | |
120 | | #[derive(Clone, Debug)] |
121 | | pub(crate) struct PikeVMCache(Option<pikevm::Cache>); |
122 | | |
123 | | impl PikeVMCache { |
124 | 0 | pub(crate) fn none() -> PikeVMCache { |
125 | 0 | PikeVMCache(None) |
126 | 0 | } |
127 | | |
128 | 0 | pub(crate) fn reset(&mut self, builder: &PikeVM) { |
129 | 0 | self.get(&builder.get().0).reset(&builder.get().0); |
130 | 0 | } |
131 | | |
132 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
133 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
134 | 0 | } |
135 | | |
136 | 0 | fn get(&mut self, vm: &pikevm::PikeVM) -> &mut pikevm::Cache { |
137 | 0 | self.0.get_or_insert_with(|| vm.create_cache()) |
138 | 0 | } |
139 | | } |
140 | | |
141 | | #[derive(Debug)] |
142 | | pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>); |
143 | | |
144 | | impl BoundedBacktracker { |
145 | 0 | pub(crate) fn new( |
146 | 0 | info: &RegexInfo, |
147 | 0 | pre: Option<Prefilter>, |
148 | 0 | nfa: &NFA, |
149 | 0 | ) -> Result<BoundedBacktracker, BuildError> { |
150 | 0 | BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) |
151 | 0 | } |
152 | | |
153 | 0 | pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { |
154 | 0 | BoundedBacktrackerCache::none() |
155 | 0 | } |
156 | | |
157 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
158 | 0 | pub(crate) fn get( |
159 | 0 | &self, |
160 | 0 | input: &Input<'_>, |
161 | 0 | ) -> Option<&BoundedBacktrackerEngine> { |
162 | 0 | let engine = self.0.as_ref()?; |
163 | | // It is difficult to make the backtracker give up early if it is |
164 | | // guaranteed to eventually wind up in a match state. This is because |
165 | | // of the greedy nature of a backtracker: it just blindly mushes |
166 | | // forward. Every other regex engine is able to give up more quickly, |
167 | | // so even if the backtracker might be able to zip through faster than |
168 | | // (say) the PikeVM, we prefer the theoretical benefit that some other |
169 | | // engine might be able to scan much less of the haystack than the |
170 | | // backtracker. |
171 | | // |
172 | | // Now, if the haystack is really short already, then we allow the |
173 | | // backtracker to run. (This hasn't been litigated quantitatively with |
174 | | // benchmarks. Just a hunch.) |
175 | 0 | if input.get_earliest() && input.haystack().len() > 128 { |
176 | 0 | return None; |
177 | 0 | } |
178 | | // If the backtracker is just going to return an error because the |
179 | | // haystack is too long, then obviously do not use it. |
180 | 0 | if input.get_span().len() > engine.max_haystack_len() { |
181 | 0 | return None; |
182 | 0 | } |
183 | 0 | Some(engine) |
184 | 0 | } |
185 | | } |
186 | | |
187 | | #[derive(Debug)] |
188 | | pub(crate) struct BoundedBacktrackerEngine( |
189 | | #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, |
190 | | #[cfg(not(feature = "nfa-backtrack"))] (), |
191 | | ); |
192 | | |
193 | | impl BoundedBacktrackerEngine { |
194 | 0 | pub(crate) fn new( |
195 | 0 | info: &RegexInfo, |
196 | 0 | pre: Option<Prefilter>, |
197 | 0 | nfa: &NFA, |
198 | 0 | ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> { |
199 | | #[cfg(feature = "nfa-backtrack")] |
200 | | { |
201 | 0 | if !info.config().get_backtrack() |
202 | 0 | || info.config().get_match_kind() != MatchKind::LeftmostFirst |
203 | | { |
204 | 0 | return Ok(None); |
205 | 0 | } |
206 | 0 | let backtrack_config = backtrack::Config::new().prefilter(pre); |
207 | 0 | let engine = backtrack::Builder::new() |
208 | 0 | .configure(backtrack_config) |
209 | 0 | .build_from_nfa(nfa.clone()) |
210 | 0 | .map_err(BuildError::nfa)?; |
211 | | debug!( |
212 | | "BoundedBacktracker built (max haystack length: {:?})", |
213 | | engine.max_haystack_len() |
214 | | ); |
215 | 0 | Ok(Some(BoundedBacktrackerEngine(engine))) |
216 | | } |
217 | | #[cfg(not(feature = "nfa-backtrack"))] |
218 | | { |
219 | | Ok(None) |
220 | | } |
221 | 0 | } |
222 | | |
223 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
224 | 0 | pub(crate) fn is_match( |
225 | 0 | &self, |
226 | 0 | cache: &mut BoundedBacktrackerCache, |
227 | 0 | input: &Input<'_>, |
228 | 0 | ) -> bool { |
229 | | #[cfg(feature = "nfa-backtrack")] |
230 | | { |
231 | | // OK because we only permit access to this engine when we know |
232 | | // the haystack is short enough for the backtracker to run without |
233 | | // reporting an error. |
234 | 0 | self.0.try_is_match(cache.get(&self.0), input.clone()).unwrap() |
235 | | } |
236 | | #[cfg(not(feature = "nfa-backtrack"))] |
237 | | { |
238 | | // Impossible to reach because this engine is never constructed |
239 | | // if the requisite features aren't enabled. |
240 | | unreachable!() |
241 | | } |
242 | 0 | } |
243 | | |
244 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
245 | 0 | pub(crate) fn search_slots( |
246 | 0 | &self, |
247 | 0 | cache: &mut BoundedBacktrackerCache, |
248 | 0 | input: &Input<'_>, |
249 | 0 | slots: &mut [Option<NonMaxUsize>], |
250 | 0 | ) -> Option<PatternID> { |
251 | | #[cfg(feature = "nfa-backtrack")] |
252 | | { |
253 | | // OK because we only permit access to this engine when we know |
254 | | // the haystack is short enough for the backtracker to run without |
255 | | // reporting an error. |
256 | 0 | self.0.try_search_slots(cache.get(&self.0), input, slots).unwrap() |
257 | | } |
258 | | #[cfg(not(feature = "nfa-backtrack"))] |
259 | | { |
260 | | // Impossible to reach because this engine is never constructed |
261 | | // if the requisite features aren't enabled. |
262 | | unreachable!() |
263 | | } |
264 | 0 | } |
265 | | |
266 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
267 | 0 | fn max_haystack_len(&self) -> usize { |
268 | | #[cfg(feature = "nfa-backtrack")] |
269 | | { |
270 | 0 | self.0.max_haystack_len() |
271 | | } |
272 | | #[cfg(not(feature = "nfa-backtrack"))] |
273 | | { |
274 | | // Impossible to reach because this engine is never constructed |
275 | | // if the requisite features aren't enabled. |
276 | | unreachable!() |
277 | | } |
278 | 0 | } |
279 | | } |
280 | | |
281 | | #[derive(Clone, Debug)] |
282 | | pub(crate) struct BoundedBacktrackerCache( |
283 | | #[cfg(feature = "nfa-backtrack")] Option<backtrack::Cache>, |
284 | | #[cfg(not(feature = "nfa-backtrack"))] (), |
285 | | ); |
286 | | |
287 | | impl BoundedBacktrackerCache { |
288 | 0 | pub(crate) fn none() -> BoundedBacktrackerCache { |
289 | | #[cfg(feature = "nfa-backtrack")] |
290 | | { |
291 | 0 | BoundedBacktrackerCache(None) |
292 | | } |
293 | | #[cfg(not(feature = "nfa-backtrack"))] |
294 | | { |
295 | | BoundedBacktrackerCache(()) |
296 | | } |
297 | 0 | } |
298 | | |
299 | 0 | pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { |
300 | | #[cfg(feature = "nfa-backtrack")] |
301 | 0 | if let Some(ref e) = builder.0 { |
302 | 0 | self.get(&e.0).reset(&e.0); |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
307 | | #[cfg(feature = "nfa-backtrack")] |
308 | | { |
309 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
310 | | } |
311 | | #[cfg(not(feature = "nfa-backtrack"))] |
312 | | { |
313 | | 0 |
314 | | } |
315 | 0 | } |
316 | | |
317 | | #[cfg(feature = "nfa-backtrack")] |
318 | 0 | fn get( |
319 | 0 | &mut self, |
320 | 0 | bb: &backtrack::BoundedBacktracker, |
321 | 0 | ) -> &mut backtrack::Cache { |
322 | 0 | self.0.get_or_insert_with(|| bb.create_cache()) |
323 | 0 | } |
324 | | } |
325 | | |
326 | | #[derive(Debug)] |
327 | | pub(crate) struct OnePass(Option<OnePassEngine>); |
328 | | |
329 | | impl OnePass { |
330 | 0 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { |
331 | 0 | OnePass(OnePassEngine::new(info, nfa)) |
332 | 0 | } |
333 | | |
334 | 0 | pub(crate) fn create_cache(&self) -> OnePassCache { |
335 | 0 | OnePassCache::new(self) |
336 | 0 | } |
337 | | |
338 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
339 | 0 | pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { |
340 | 0 | let engine = self.0.as_ref()?; |
341 | 0 | if !input.get_anchored().is_anchored() |
342 | 0 | && !engine.get_nfa().is_always_start_anchored() |
343 | | { |
344 | 0 | return None; |
345 | 0 | } |
346 | 0 | Some(engine) |
347 | 0 | } |
348 | | |
349 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
350 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
351 | 0 | } |
352 | | } |
353 | | |
354 | | #[derive(Debug)] |
355 | | pub(crate) struct OnePassEngine( |
356 | | #[cfg(feature = "dfa-onepass")] onepass::DFA, |
357 | | #[cfg(not(feature = "dfa-onepass"))] (), |
358 | | ); |
359 | | |
360 | | impl OnePassEngine { |
361 | 0 | pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> { |
362 | | #[cfg(feature = "dfa-onepass")] |
363 | | { |
364 | 0 | if !info.config().get_onepass() { |
365 | 0 | return None; |
366 | 0 | } |
367 | | // In order to even attempt building a one-pass DFA, we require |
368 | | // that we either have at least one explicit capturing group or |
369 | | // there's a Unicode word boundary somewhere. If we don't have |
370 | | // either of these things, then the lazy DFA will almost certainly |
371 | | // be usable and be much faster. The only case where it might |
372 | | // not is if the lazy DFA isn't utilizing its cache effectively, |
373 | | // but in those cases, the underlying regex is almost certainly |
374 | | // not one-pass or is too big to fit within the current one-pass |
375 | | // implementation limits. |
376 | 0 | if info.props_union().explicit_captures_len() == 0 |
377 | 0 | && !info.props_union().look_set().contains_word_unicode() |
378 | | { |
379 | | debug!("not building OnePass because it isn't worth it"); |
380 | 0 | return None; |
381 | 0 | } |
382 | 0 | let onepass_config = onepass::Config::new() |
383 | 0 | .match_kind(info.config().get_match_kind()) |
384 | | // Like for the lazy DFA, we unconditionally enable this |
385 | | // because it doesn't cost much and makes the API more |
386 | | // flexible. |
387 | 0 | .starts_for_each_pattern(true) |
388 | 0 | .byte_classes(info.config().get_byte_classes()) |
389 | 0 | .size_limit(info.config().get_onepass_size_limit()); |
390 | 0 | let result = onepass::Builder::new() |
391 | 0 | .configure(onepass_config) |
392 | 0 | .build_from_nfa(nfa.clone()); |
393 | 0 | let engine = match result { |
394 | 0 | Ok(engine) => engine, |
395 | 0 | Err(_err) => { |
396 | | debug!("OnePass failed to build: {_err}"); |
397 | 0 | return None; |
398 | | } |
399 | | }; |
400 | | debug!("OnePass built, {} bytes", engine.memory_usage()); |
401 | 0 | Some(OnePassEngine(engine)) |
402 | | } |
403 | | #[cfg(not(feature = "dfa-onepass"))] |
404 | | { |
405 | | None |
406 | | } |
407 | 0 | } |
408 | | |
409 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
410 | 0 | pub(crate) fn search_slots( |
411 | 0 | &self, |
412 | 0 | cache: &mut OnePassCache, |
413 | 0 | input: &Input<'_>, |
414 | 0 | slots: &mut [Option<NonMaxUsize>], |
415 | 0 | ) -> Option<PatternID> { |
416 | | #[cfg(feature = "dfa-onepass")] |
417 | | { |
418 | | // OK because we only permit getting a OnePassEngine when we know |
419 | | // the search is anchored and thus an error cannot occur. |
420 | 0 | self.0 |
421 | 0 | .try_search_slots(cache.0.as_mut().unwrap(), input, slots) |
422 | 0 | .unwrap() |
423 | | } |
424 | | #[cfg(not(feature = "dfa-onepass"))] |
425 | | { |
426 | | // Impossible to reach because this engine is never constructed |
427 | | // if the requisite features aren't enabled. |
428 | | unreachable!() |
429 | | } |
430 | 0 | } |
431 | | |
432 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
433 | | #[cfg(feature = "dfa-onepass")] |
434 | | { |
435 | 0 | self.0.memory_usage() |
436 | | } |
437 | | #[cfg(not(feature = "dfa-onepass"))] |
438 | | { |
439 | | // Impossible to reach because this engine is never constructed |
440 | | // if the requisite features aren't enabled. |
441 | | unreachable!() |
442 | | } |
443 | 0 | } |
444 | | |
445 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
446 | 0 | fn get_nfa(&self) -> &NFA { |
447 | | #[cfg(feature = "dfa-onepass")] |
448 | | { |
449 | 0 | self.0.get_nfa() |
450 | | } |
451 | | #[cfg(not(feature = "dfa-onepass"))] |
452 | | { |
453 | | // Impossible to reach because this engine is never constructed |
454 | | // if the requisite features aren't enabled. |
455 | | unreachable!() |
456 | | } |
457 | 0 | } |
458 | | } |
459 | | |
460 | | #[derive(Clone, Debug)] |
461 | | pub(crate) struct OnePassCache( |
462 | | #[cfg(feature = "dfa-onepass")] Option<onepass::Cache>, |
463 | | #[cfg(not(feature = "dfa-onepass"))] (), |
464 | | ); |
465 | | |
466 | | impl OnePassCache { |
467 | 0 | pub(crate) fn none() -> OnePassCache { |
468 | | #[cfg(feature = "dfa-onepass")] |
469 | | { |
470 | 0 | OnePassCache(None) |
471 | | } |
472 | | #[cfg(not(feature = "dfa-onepass"))] |
473 | | { |
474 | | OnePassCache(()) |
475 | | } |
476 | 0 | } |
477 | | |
478 | 0 | pub(crate) fn new(builder: &OnePass) -> OnePassCache { |
479 | | #[cfg(feature = "dfa-onepass")] |
480 | | { |
481 | 0 | OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
482 | | } |
483 | | #[cfg(not(feature = "dfa-onepass"))] |
484 | | { |
485 | | OnePassCache(()) |
486 | | } |
487 | 0 | } |
488 | | |
489 | 0 | pub(crate) fn reset(&mut self, builder: &OnePass) { |
490 | | #[cfg(feature = "dfa-onepass")] |
491 | 0 | if let Some(ref e) = builder.0 { |
492 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
493 | 0 | } |
494 | 0 | } |
495 | | |
496 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
497 | | #[cfg(feature = "dfa-onepass")] |
498 | | { |
499 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
500 | | } |
501 | | #[cfg(not(feature = "dfa-onepass"))] |
502 | | { |
503 | | 0 |
504 | | } |
505 | 0 | } |
506 | | } |
507 | | |
508 | | #[derive(Debug)] |
509 | | pub(crate) struct Hybrid(Option<HybridEngine>); |
510 | | |
511 | | impl Hybrid { |
512 | 0 | pub(crate) fn none() -> Hybrid { |
513 | 0 | Hybrid(None) |
514 | 0 | } |
515 | | |
516 | 0 | pub(crate) fn new( |
517 | 0 | info: &RegexInfo, |
518 | 0 | pre: Option<Prefilter>, |
519 | 0 | nfa: &NFA, |
520 | 0 | nfarev: &NFA, |
521 | 0 | ) -> Hybrid { |
522 | 0 | Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) |
523 | 0 | } |
524 | | |
525 | 0 | pub(crate) fn create_cache(&self) -> HybridCache { |
526 | 0 | HybridCache::new(self) |
527 | 0 | } |
528 | | |
529 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
530 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { |
531 | 0 | let engine = self.0.as_ref()?; |
532 | 0 | Some(engine) |
533 | 0 | } |
534 | | |
535 | 0 | pub(crate) fn is_some(&self) -> bool { |
536 | 0 | self.0.is_some() |
537 | 0 | } |
538 | | } |
539 | | |
540 | | #[derive(Debug)] |
541 | | pub(crate) struct HybridEngine( |
542 | | #[cfg(feature = "hybrid")] hybrid::regex::Regex, |
543 | | #[cfg(not(feature = "hybrid"))] (), |
544 | | ); |
545 | | |
546 | | impl HybridEngine { |
547 | 0 | pub(crate) fn new( |
548 | 0 | info: &RegexInfo, |
549 | 0 | pre: Option<Prefilter>, |
550 | 0 | nfa: &NFA, |
551 | 0 | nfarev: &NFA, |
552 | 0 | ) -> Option<HybridEngine> { |
553 | | #[cfg(feature = "hybrid")] |
554 | | { |
555 | 0 | if !info.config().get_hybrid() { |
556 | 0 | return None; |
557 | 0 | } |
558 | 0 | let dfa_config = hybrid::dfa::Config::new() |
559 | 0 | .match_kind(info.config().get_match_kind()) |
560 | 0 | .prefilter(pre.clone()) |
561 | | // Enabling this is necessary for ensuring we can service any |
562 | | // kind of 'Input' search without error. For the lazy DFA, |
563 | | // this is not particularly costly, since the start states are |
564 | | // generated lazily. |
565 | 0 | .starts_for_each_pattern(true) |
566 | 0 | .byte_classes(info.config().get_byte_classes()) |
567 | 0 | .unicode_word_boundary(true) |
568 | 0 | .specialize_start_states(pre.is_some()) |
569 | 0 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
570 | | // This makes it possible for building a lazy DFA to |
571 | | // fail even though the NFA has already been built. Namely, |
572 | | // if the cache capacity is too small to fit some minimum |
573 | | // number of states (which is small, like 4 or 5), then the |
574 | | // DFA will refuse to build. |
575 | | // |
576 | | // We shouldn't enable this to make building always work, since |
577 | | // this could cause the allocation of a cache bigger than the |
578 | | // provided capacity amount. |
579 | | // |
580 | | // This is effectively the only reason why building a lazy DFA |
581 | | // could fail. If it does, then we simply suppress the error |
582 | | // and return None. |
583 | 0 | .skip_cache_capacity_check(false) |
584 | | // This and enabling heuristic Unicode word boundary support |
585 | | // above make it so the lazy DFA can quit at match time. |
586 | 0 | .minimum_cache_clear_count(Some(3)) |
587 | 0 | .minimum_bytes_per_state(Some(10)); |
588 | 0 | let result = hybrid::dfa::Builder::new() |
589 | 0 | .configure(dfa_config.clone()) |
590 | 0 | .build_from_nfa(nfa.clone()); |
591 | 0 | let fwd = match result { |
592 | 0 | Ok(fwd) => fwd, |
593 | 0 | Err(_err) => { |
594 | | debug!("forward lazy DFA failed to build: {_err}"); |
595 | 0 | return None; |
596 | | } |
597 | | }; |
598 | 0 | let result = hybrid::dfa::Builder::new() |
599 | 0 | .configure( |
600 | 0 | dfa_config |
601 | 0 | .clone() |
602 | 0 | .match_kind(MatchKind::All) |
603 | 0 | .prefilter(None) |
604 | 0 | .specialize_start_states(false), |
605 | 0 | ) |
606 | 0 | .build_from_nfa(nfarev.clone()); |
607 | 0 | let rev = match result { |
608 | 0 | Ok(rev) => rev, |
609 | 0 | Err(_err) => { |
610 | | debug!("reverse lazy DFA failed to build: {_err}"); |
611 | 0 | return None; |
612 | | } |
613 | | }; |
614 | 0 | let engine = |
615 | 0 | hybrid::regex::Builder::new().build_from_dfas(fwd, rev); |
616 | | debug!("lazy DFA built"); |
617 | 0 | Some(HybridEngine(engine)) |
618 | | } |
619 | | #[cfg(not(feature = "hybrid"))] |
620 | | { |
621 | | None |
622 | | } |
623 | 0 | } |
624 | | |
625 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
626 | 0 | pub(crate) fn try_search( |
627 | 0 | &self, |
628 | 0 | cache: &mut HybridCache, |
629 | 0 | input: &Input<'_>, |
630 | 0 | ) -> Result<Option<Match>, RetryFailError> { |
631 | | #[cfg(feature = "hybrid")] |
632 | | { |
633 | 0 | let cache = cache.0.as_mut().unwrap(); |
634 | 0 | self.0.try_search(cache, input).map_err(|e| e.into()) |
635 | | } |
636 | | #[cfg(not(feature = "hybrid"))] |
637 | | { |
638 | | // Impossible to reach because this engine is never constructed |
639 | | // if the requisite features aren't enabled. |
640 | | unreachable!() |
641 | | } |
642 | 0 | } |
643 | | |
644 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
645 | 0 | pub(crate) fn try_search_half_fwd( |
646 | 0 | &self, |
647 | 0 | cache: &mut HybridCache, |
648 | 0 | input: &Input<'_>, |
649 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
650 | | #[cfg(feature = "hybrid")] |
651 | | { |
652 | 0 | let fwd = self.0.forward(); |
653 | 0 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
654 | 0 | fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) |
655 | | } |
656 | | #[cfg(not(feature = "hybrid"))] |
657 | | { |
658 | | // Impossible to reach because this engine is never constructed |
659 | | // if the requisite features aren't enabled. |
660 | | unreachable!() |
661 | | } |
662 | 0 | } |
663 | | |
664 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
665 | 0 | pub(crate) fn try_search_half_fwd_stopat( |
666 | 0 | &self, |
667 | 0 | cache: &mut HybridCache, |
668 | 0 | input: &Input<'_>, |
669 | 0 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
670 | | #[cfg(feature = "hybrid")] |
671 | | { |
672 | 0 | let dfa = self.0.forward(); |
673 | 0 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; |
674 | 0 | crate::meta::stopat::hybrid_try_search_half_fwd( |
675 | 0 | dfa, &mut cache, input, |
676 | | ) |
677 | | } |
678 | | #[cfg(not(feature = "hybrid"))] |
679 | | { |
680 | | // Impossible to reach because this engine is never constructed |
681 | | // if the requisite features aren't enabled. |
682 | | unreachable!() |
683 | | } |
684 | 0 | } |
685 | | |
686 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
687 | 0 | pub(crate) fn try_search_half_rev( |
688 | 0 | &self, |
689 | 0 | cache: &mut HybridCache, |
690 | 0 | input: &Input<'_>, |
691 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
692 | | #[cfg(feature = "hybrid")] |
693 | | { |
694 | 0 | let rev = self.0.reverse(); |
695 | 0 | let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; |
696 | 0 | rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) |
697 | | } |
698 | | #[cfg(not(feature = "hybrid"))] |
699 | | { |
700 | | // Impossible to reach because this engine is never constructed |
701 | | // if the requisite features aren't enabled. |
702 | | unreachable!() |
703 | | } |
704 | 0 | } |
705 | | |
706 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
707 | 0 | pub(crate) fn try_search_half_rev_limited( |
708 | 0 | &self, |
709 | 0 | cache: &mut HybridCache, |
710 | 0 | input: &Input<'_>, |
711 | 0 | min_start: usize, |
712 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
713 | | #[cfg(feature = "hybrid")] |
714 | | { |
715 | 0 | let dfa = self.0.reverse(); |
716 | 0 | let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; |
717 | 0 | crate::meta::limited::hybrid_try_search_half_rev( |
718 | 0 | dfa, &mut cache, input, min_start, |
719 | | ) |
720 | | } |
721 | | #[cfg(not(feature = "hybrid"))] |
722 | | { |
723 | | // Impossible to reach because this engine is never constructed |
724 | | // if the requisite features aren't enabled. |
725 | | unreachable!() |
726 | | } |
727 | 0 | } |
728 | | |
729 | | #[inline] |
730 | 0 | pub(crate) fn try_which_overlapping_matches( |
731 | 0 | &self, |
732 | 0 | cache: &mut HybridCache, |
733 | 0 | input: &Input<'_>, |
734 | 0 | patset: &mut PatternSet, |
735 | 0 | ) -> Result<(), RetryFailError> { |
736 | | #[cfg(feature = "hybrid")] |
737 | | { |
738 | 0 | let fwd = self.0.forward(); |
739 | 0 | let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; |
740 | 0 | fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) |
741 | 0 | .map_err(|e| e.into()) |
742 | | } |
743 | | #[cfg(not(feature = "hybrid"))] |
744 | | { |
745 | | // Impossible to reach because this engine is never constructed |
746 | | // if the requisite features aren't enabled. |
747 | | unreachable!() |
748 | | } |
749 | 0 | } |
750 | | } |
751 | | |
752 | | #[derive(Clone, Debug)] |
753 | | pub(crate) struct HybridCache( |
754 | | #[cfg(feature = "hybrid")] Option<hybrid::regex::Cache>, |
755 | | #[cfg(not(feature = "hybrid"))] (), |
756 | | ); |
757 | | |
758 | | impl HybridCache { |
759 | 0 | pub(crate) fn none() -> HybridCache { |
760 | | #[cfg(feature = "hybrid")] |
761 | | { |
762 | 0 | HybridCache(None) |
763 | | } |
764 | | #[cfg(not(feature = "hybrid"))] |
765 | | { |
766 | | HybridCache(()) |
767 | | } |
768 | 0 | } |
769 | | |
770 | 0 | pub(crate) fn new(builder: &Hybrid) -> HybridCache { |
771 | | #[cfg(feature = "hybrid")] |
772 | | { |
773 | 0 | HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
774 | | } |
775 | | #[cfg(not(feature = "hybrid"))] |
776 | | { |
777 | | HybridCache(()) |
778 | | } |
779 | 0 | } |
780 | | |
781 | 0 | pub(crate) fn reset(&mut self, builder: &Hybrid) { |
782 | | #[cfg(feature = "hybrid")] |
783 | 0 | if let Some(ref e) = builder.0 { |
784 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
785 | 0 | } |
786 | 0 | } |
787 | | |
788 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
789 | | #[cfg(feature = "hybrid")] |
790 | | { |
791 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
792 | | } |
793 | | #[cfg(not(feature = "hybrid"))] |
794 | | { |
795 | | 0 |
796 | | } |
797 | 0 | } |
798 | | } |
799 | | |
800 | | #[derive(Debug)] |
801 | | pub(crate) struct DFA(Option<DFAEngine>); |
802 | | |
803 | | impl DFA { |
804 | 0 | pub(crate) fn none() -> DFA { |
805 | 0 | DFA(None) |
806 | 0 | } |
807 | | |
808 | 0 | pub(crate) fn new( |
809 | 0 | info: &RegexInfo, |
810 | 0 | pre: Option<Prefilter>, |
811 | 0 | nfa: &NFA, |
812 | 0 | nfarev: &NFA, |
813 | 0 | ) -> DFA { |
814 | 0 | DFA(DFAEngine::new(info, pre, nfa, nfarev)) |
815 | 0 | } |
816 | | |
817 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
818 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { |
819 | 0 | let engine = self.0.as_ref()?; |
820 | 0 | Some(engine) |
821 | 0 | } |
822 | | |
823 | 0 | pub(crate) fn is_some(&self) -> bool { |
824 | 0 | self.0.is_some() |
825 | 0 | } |
826 | | |
827 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
828 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
829 | 0 | } |
830 | | } |
831 | | |
832 | | #[derive(Debug)] |
833 | | pub(crate) struct DFAEngine( |
834 | | #[cfg(feature = "dfa-build")] dfa::regex::Regex, |
835 | | #[cfg(not(feature = "dfa-build"))] (), |
836 | | ); |
837 | | |
838 | | impl DFAEngine { |
839 | 0 | pub(crate) fn new( |
840 | 0 | info: &RegexInfo, |
841 | 0 | pre: Option<Prefilter>, |
842 | 0 | nfa: &NFA, |
843 | 0 | nfarev: &NFA, |
844 | 0 | ) -> Option<DFAEngine> { |
845 | | #[cfg(feature = "dfa-build")] |
846 | | { |
847 | | if !info.config().get_dfa() { |
848 | | return None; |
849 | | } |
850 | | // If our NFA is anything but small, don't even bother with a DFA. |
851 | | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
852 | | if nfa.states().len() > state_limit { |
853 | | debug!( |
854 | | "skipping full DFA because NFA has {} states, \ |
855 | | which exceeds the heuristic limit of {}", |
856 | | nfa.states().len(), |
857 | | state_limit, |
858 | | ); |
859 | | return None; |
860 | | } |
861 | | } |
862 | | // We cut the size limit in four because the total heap used by |
863 | | // DFA construction is determinization aux memory and the DFA |
864 | | // itself, and those things are configured independently in the |
865 | | // lower level DFA builder API. And then split that in two because |
866 | | // of forward and reverse DFAs. |
867 | | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); |
868 | | let dfa_config = dfa::dense::Config::new() |
869 | | .match_kind(info.config().get_match_kind()) |
870 | | .prefilter(pre.clone()) |
871 | | // Enabling this is necessary for ensuring we can service any |
872 | | // kind of 'Input' search without error. For the full DFA, this |
873 | | // can be quite costly. But since we have such a small bound |
874 | | // on the size of the DFA, in practice, any multi-regexes are |
875 | | // probably going to blow the limit anyway. |
876 | | .starts_for_each_pattern(true) |
877 | | .byte_classes(info.config().get_byte_classes()) |
878 | | .unicode_word_boundary(true) |
879 | | .specialize_start_states(pre.is_some()) |
880 | | .determinize_size_limit(size_limit) |
881 | | .dfa_size_limit(size_limit); |
882 | | let result = dfa::dense::Builder::new() |
883 | | .configure(dfa_config.clone()) |
884 | | .build_from_nfa(&nfa); |
885 | | let fwd = match result { |
886 | | Ok(fwd) => fwd, |
887 | | Err(_err) => { |
888 | | debug!("forward full DFA failed to build: {_err}"); |
889 | | return None; |
890 | | } |
891 | | }; |
892 | | let result = dfa::dense::Builder::new() |
893 | | .configure( |
894 | | dfa_config |
895 | | .clone() |
896 | | // We never need unanchored reverse searches, so |
897 | | // there's no point in building it into the DFA, which |
898 | | // WILL take more space. (This isn't done for the lazy |
899 | | // DFA because the DFA is, well, lazy. It doesn't pay |
900 | | // the cost for supporting unanchored searches unless |
901 | | // you actually do an unanchored search, which we |
902 | | // don't.) |
903 | | .start_kind(dfa::StartKind::Anchored) |
904 | | .match_kind(MatchKind::All) |
905 | | .prefilter(None) |
906 | | .specialize_start_states(false), |
907 | | ) |
908 | | .build_from_nfa(&nfarev); |
909 | | let rev = match result { |
910 | | Ok(rev) => rev, |
911 | | Err(_err) => { |
912 | | debug!("reverse full DFA failed to build: {_err}"); |
913 | | return None; |
914 | | } |
915 | | }; |
916 | | let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); |
917 | | debug!( |
918 | | "fully compiled forward and reverse DFAs built, {} bytes", |
919 | | engine.forward().memory_usage() |
920 | | + engine.reverse().memory_usage(), |
921 | | ); |
922 | | Some(DFAEngine(engine)) |
923 | | } |
924 | | #[cfg(not(feature = "dfa-build"))] |
925 | | { |
926 | 0 | None |
927 | | } |
928 | 0 | } |
929 | | |
930 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
931 | 0 | pub(crate) fn try_search( |
932 | 0 | &self, |
933 | 0 | input: &Input<'_>, |
934 | 0 | ) -> Result<Option<Match>, RetryFailError> { |
935 | | #[cfg(feature = "dfa-build")] |
936 | | { |
937 | | self.0.try_search(input).map_err(|e| e.into()) |
938 | | } |
939 | | #[cfg(not(feature = "dfa-build"))] |
940 | | { |
941 | | // Impossible to reach because this engine is never constructed |
942 | | // if the requisite features aren't enabled. |
943 | 0 | unreachable!() |
944 | | } |
945 | | } |
946 | | |
947 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
948 | 0 | pub(crate) fn try_search_half_fwd( |
949 | 0 | &self, |
950 | 0 | input: &Input<'_>, |
951 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
952 | | #[cfg(feature = "dfa-build")] |
953 | | { |
954 | | use crate::dfa::Automaton; |
955 | | self.0.forward().try_search_fwd(input).map_err(|e| e.into()) |
956 | | } |
957 | | #[cfg(not(feature = "dfa-build"))] |
958 | | { |
959 | | // Impossible to reach because this engine is never constructed |
960 | | // if the requisite features aren't enabled. |
961 | 0 | unreachable!() |
962 | | } |
963 | | } |
964 | | |
965 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
966 | 0 | pub(crate) fn try_search_half_fwd_stopat( |
967 | 0 | &self, |
968 | 0 | input: &Input<'_>, |
969 | 0 | ) -> Result<Result<HalfMatch, usize>, RetryFailError> { |
970 | | #[cfg(feature = "dfa-build")] |
971 | | { |
972 | | let dfa = self.0.forward(); |
973 | | crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) |
974 | | } |
975 | | #[cfg(not(feature = "dfa-build"))] |
976 | | { |
977 | | // Impossible to reach because this engine is never constructed |
978 | | // if the requisite features aren't enabled. |
979 | 0 | unreachable!() |
980 | | } |
981 | | } |
982 | | |
983 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
984 | 0 | pub(crate) fn try_search_half_rev( |
985 | 0 | &self, |
986 | 0 | input: &Input<'_>, |
987 | 0 | ) -> Result<Option<HalfMatch>, RetryFailError> { |
988 | | #[cfg(feature = "dfa-build")] |
989 | | { |
990 | | use crate::dfa::Automaton; |
991 | | self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) |
992 | | } |
993 | | #[cfg(not(feature = "dfa-build"))] |
994 | | { |
995 | | // Impossible to reach because this engine is never constructed |
996 | | // if the requisite features aren't enabled. |
997 | 0 | unreachable!() |
998 | | } |
999 | | } |
1000 | | |
1001 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1002 | 0 | pub(crate) fn try_search_half_rev_limited( |
1003 | 0 | &self, |
1004 | 0 | input: &Input<'_>, |
1005 | 0 | min_start: usize, |
1006 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1007 | | #[cfg(feature = "dfa-build")] |
1008 | | { |
1009 | | let dfa = self.0.reverse(); |
1010 | | crate::meta::limited::dfa_try_search_half_rev( |
1011 | | dfa, input, min_start, |
1012 | | ) |
1013 | | } |
1014 | | #[cfg(not(feature = "dfa-build"))] |
1015 | | { |
1016 | | // Impossible to reach because this engine is never constructed |
1017 | | // if the requisite features aren't enabled. |
1018 | 0 | unreachable!() |
1019 | | } |
1020 | | } |
1021 | | |
1022 | | #[inline] |
1023 | 0 | pub(crate) fn try_which_overlapping_matches( |
1024 | 0 | &self, |
1025 | 0 | input: &Input<'_>, |
1026 | 0 | patset: &mut PatternSet, |
1027 | 0 | ) -> Result<(), RetryFailError> { |
1028 | | #[cfg(feature = "dfa-build")] |
1029 | | { |
1030 | | use crate::dfa::Automaton; |
1031 | | self.0 |
1032 | | .forward() |
1033 | | .try_which_overlapping_matches(input, patset) |
1034 | | .map_err(|e| e.into()) |
1035 | | } |
1036 | | #[cfg(not(feature = "dfa-build"))] |
1037 | | { |
1038 | | // Impossible to reach because this engine is never constructed |
1039 | | // if the requisite features aren't enabled. |
1040 | 0 | unreachable!() |
1041 | | } |
1042 | | } |
1043 | | |
1044 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1045 | | #[cfg(feature = "dfa-build")] |
1046 | | { |
1047 | | self.0.forward().memory_usage() + self.0.reverse().memory_usage() |
1048 | | } |
1049 | | #[cfg(not(feature = "dfa-build"))] |
1050 | | { |
1051 | | // Impossible to reach because this engine is never constructed |
1052 | | // if the requisite features aren't enabled. |
1053 | 0 | unreachable!() |
1054 | | } |
1055 | | } |
1056 | | } |
1057 | | |
1058 | | #[derive(Debug)] |
1059 | | pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>); |
1060 | | |
1061 | | impl ReverseHybrid { |
1062 | 0 | pub(crate) fn none() -> ReverseHybrid { |
1063 | 0 | ReverseHybrid(None) |
1064 | 0 | } |
1065 | | |
1066 | 0 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { |
1067 | 0 | ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) |
1068 | 0 | } |
1069 | | |
1070 | 0 | pub(crate) fn create_cache(&self) -> ReverseHybridCache { |
1071 | 0 | ReverseHybridCache::new(self) |
1072 | 0 | } |
1073 | | |
1074 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1075 | 0 | pub(crate) fn get( |
1076 | 0 | &self, |
1077 | 0 | _input: &Input<'_>, |
1078 | 0 | ) -> Option<&ReverseHybridEngine> { |
1079 | 0 | let engine = self.0.as_ref()?; |
1080 | 0 | Some(engine) |
1081 | 0 | } |
1082 | | } |
1083 | | |
1084 | | #[derive(Debug)] |
1085 | | pub(crate) struct ReverseHybridEngine( |
1086 | | #[cfg(feature = "hybrid")] hybrid::dfa::DFA, |
1087 | | #[cfg(not(feature = "hybrid"))] (), |
1088 | | ); |
1089 | | |
1090 | | impl ReverseHybridEngine { |
1091 | 0 | pub(crate) fn new( |
1092 | 0 | info: &RegexInfo, |
1093 | 0 | nfarev: &NFA, |
1094 | 0 | ) -> Option<ReverseHybridEngine> { |
1095 | | #[cfg(feature = "hybrid")] |
1096 | | { |
1097 | 0 | if !info.config().get_hybrid() { |
1098 | 0 | return None; |
1099 | 0 | } |
1100 | | // Since we only use this for reverse searches, we can hard-code |
1101 | | // a number of things like match semantics, prefilters, starts |
1102 | | // for each pattern and so on. |
1103 | 0 | let dfa_config = hybrid::dfa::Config::new() |
1104 | 0 | .match_kind(MatchKind::All) |
1105 | 0 | .prefilter(None) |
1106 | 0 | .starts_for_each_pattern(false) |
1107 | 0 | .byte_classes(info.config().get_byte_classes()) |
1108 | 0 | .unicode_word_boundary(true) |
1109 | 0 | .specialize_start_states(false) |
1110 | 0 | .cache_capacity(info.config().get_hybrid_cache_capacity()) |
1111 | 0 | .skip_cache_capacity_check(false) |
1112 | 0 | .minimum_cache_clear_count(Some(3)) |
1113 | 0 | .minimum_bytes_per_state(Some(10)); |
1114 | 0 | let result = hybrid::dfa::Builder::new() |
1115 | 0 | .configure(dfa_config) |
1116 | 0 | .build_from_nfa(nfarev.clone()); |
1117 | 0 | let rev = match result { |
1118 | 0 | Ok(rev) => rev, |
1119 | 0 | Err(_err) => { |
1120 | | debug!("lazy reverse DFA failed to build: {_err}"); |
1121 | 0 | return None; |
1122 | | } |
1123 | | }; |
1124 | | debug!("lazy reverse DFA built"); |
1125 | 0 | Some(ReverseHybridEngine(rev)) |
1126 | | } |
1127 | | #[cfg(not(feature = "hybrid"))] |
1128 | | { |
1129 | | None |
1130 | | } |
1131 | 0 | } |
1132 | | |
1133 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1134 | 0 | pub(crate) fn try_search_half_rev_limited( |
1135 | 0 | &self, |
1136 | 0 | cache: &mut ReverseHybridCache, |
1137 | 0 | input: &Input<'_>, |
1138 | 0 | min_start: usize, |
1139 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1140 | | #[cfg(feature = "hybrid")] |
1141 | | { |
1142 | 0 | let dfa = &self.0; |
1143 | 0 | let mut cache = cache.0.as_mut().unwrap(); |
1144 | 0 | crate::meta::limited::hybrid_try_search_half_rev( |
1145 | 0 | dfa, &mut cache, input, min_start, |
1146 | | ) |
1147 | | } |
1148 | | #[cfg(not(feature = "hybrid"))] |
1149 | | { |
1150 | | // Impossible to reach because this engine is never constructed |
1151 | | // if the requisite features aren't enabled. |
1152 | | unreachable!() |
1153 | | } |
1154 | 0 | } |
1155 | | } |
1156 | | |
1157 | | #[derive(Clone, Debug)] |
1158 | | pub(crate) struct ReverseHybridCache( |
1159 | | #[cfg(feature = "hybrid")] Option<hybrid::dfa::Cache>, |
1160 | | #[cfg(not(feature = "hybrid"))] (), |
1161 | | ); |
1162 | | |
1163 | | impl ReverseHybridCache { |
1164 | 0 | pub(crate) fn none() -> ReverseHybridCache { |
1165 | | #[cfg(feature = "hybrid")] |
1166 | | { |
1167 | 0 | ReverseHybridCache(None) |
1168 | | } |
1169 | | #[cfg(not(feature = "hybrid"))] |
1170 | | { |
1171 | | ReverseHybridCache(()) |
1172 | | } |
1173 | 0 | } |
1174 | | |
1175 | 0 | pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { |
1176 | | #[cfg(feature = "hybrid")] |
1177 | | { |
1178 | 0 | ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) |
1179 | | } |
1180 | | #[cfg(not(feature = "hybrid"))] |
1181 | | { |
1182 | | ReverseHybridCache(()) |
1183 | | } |
1184 | 0 | } |
1185 | | |
1186 | 0 | pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { |
1187 | | #[cfg(feature = "hybrid")] |
1188 | 0 | if let Some(ref e) = builder.0 { |
1189 | 0 | self.0.as_mut().unwrap().reset(&e.0); |
1190 | 0 | } |
1191 | 0 | } |
1192 | | |
1193 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1194 | | #[cfg(feature = "hybrid")] |
1195 | | { |
1196 | 0 | self.0.as_ref().map_or(0, |c| c.memory_usage()) |
1197 | | } |
1198 | | #[cfg(not(feature = "hybrid"))] |
1199 | | { |
1200 | | 0 |
1201 | | } |
1202 | 0 | } |
1203 | | } |
1204 | | |
1205 | | #[derive(Debug)] |
1206 | | pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>); |
1207 | | |
1208 | | impl ReverseDFA { |
1209 | 0 | pub(crate) fn none() -> ReverseDFA { |
1210 | 0 | ReverseDFA(None) |
1211 | 0 | } |
1212 | | |
1213 | 0 | pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { |
1214 | 0 | ReverseDFA(ReverseDFAEngine::new(info, nfarev)) |
1215 | 0 | } |
1216 | | |
1217 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1218 | 0 | pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { |
1219 | 0 | let engine = self.0.as_ref()?; |
1220 | 0 | Some(engine) |
1221 | 0 | } |
1222 | | |
1223 | 0 | pub(crate) fn is_some(&self) -> bool { |
1224 | 0 | self.0.is_some() |
1225 | 0 | } |
1226 | | |
1227 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1228 | 0 | self.0.as_ref().map_or(0, |e| e.memory_usage()) |
1229 | 0 | } |
1230 | | } |
1231 | | |
1232 | | #[derive(Debug)] |
1233 | | pub(crate) struct ReverseDFAEngine( |
1234 | | #[cfg(feature = "dfa-build")] dfa::dense::DFA<Vec<u32>>, |
1235 | | #[cfg(not(feature = "dfa-build"))] (), |
1236 | | ); |
1237 | | |
1238 | | impl ReverseDFAEngine { |
1239 | 0 | pub(crate) fn new( |
1240 | 0 | info: &RegexInfo, |
1241 | 0 | nfarev: &NFA, |
1242 | 0 | ) -> Option<ReverseDFAEngine> { |
1243 | | #[cfg(feature = "dfa-build")] |
1244 | | { |
1245 | | if !info.config().get_dfa() { |
1246 | | return None; |
1247 | | } |
1248 | | // If our NFA is anything but small, don't even bother with a DFA. |
1249 | | if let Some(state_limit) = info.config().get_dfa_state_limit() { |
1250 | | if nfarev.states().len() > state_limit { |
1251 | | debug!( |
1252 | | "skipping full reverse DFA because NFA has {} states, \ |
1253 | | which exceeds the heuristic limit of {}", |
1254 | | nfarev.states().len(), |
1255 | | state_limit, |
1256 | | ); |
1257 | | return None; |
1258 | | } |
1259 | | } |
1260 | | // We cut the size limit in two because the total heap used by DFA |
1261 | | // construction is determinization aux memory and the DFA itself, |
1262 | | // and those things are configured independently in the lower level |
1263 | | // DFA builder API. |
1264 | | let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); |
1265 | | // Since we only use this for reverse searches, we can hard-code |
1266 | | // a number of things like match semantics, prefilters, starts |
1267 | | // for each pattern and so on. We also disable acceleration since |
1268 | | // it's incompatible with limited searches (which is the only |
1269 | | // operation we support for this kind of engine at the moment). |
1270 | | let dfa_config = dfa::dense::Config::new() |
1271 | | .match_kind(MatchKind::All) |
1272 | | .prefilter(None) |
1273 | | .accelerate(false) |
1274 | | .start_kind(dfa::StartKind::Anchored) |
1275 | | .starts_for_each_pattern(false) |
1276 | | .byte_classes(info.config().get_byte_classes()) |
1277 | | .unicode_word_boundary(true) |
1278 | | .specialize_start_states(false) |
1279 | | .determinize_size_limit(size_limit) |
1280 | | .dfa_size_limit(size_limit); |
1281 | | let result = dfa::dense::Builder::new() |
1282 | | .configure(dfa_config) |
1283 | | .build_from_nfa(&nfarev); |
1284 | | let rev = match result { |
1285 | | Ok(rev) => rev, |
1286 | | Err(_err) => { |
1287 | | debug!("full reverse DFA failed to build: {_err}"); |
1288 | | return None; |
1289 | | } |
1290 | | }; |
1291 | | debug!( |
1292 | | "fully compiled reverse DFA built, {} bytes", |
1293 | | rev.memory_usage() |
1294 | | ); |
1295 | | Some(ReverseDFAEngine(rev)) |
1296 | | } |
1297 | | #[cfg(not(feature = "dfa-build"))] |
1298 | | { |
1299 | 0 | None |
1300 | | } |
1301 | 0 | } |
1302 | | |
1303 | | #[cfg_attr(feature = "perf-inline", inline(always))] |
1304 | 0 | pub(crate) fn try_search_half_rev_limited( |
1305 | 0 | &self, |
1306 | 0 | input: &Input<'_>, |
1307 | 0 | min_start: usize, |
1308 | 0 | ) -> Result<Option<HalfMatch>, RetryError> { |
1309 | | #[cfg(feature = "dfa-build")] |
1310 | | { |
1311 | | let dfa = &self.0; |
1312 | | crate::meta::limited::dfa_try_search_half_rev( |
1313 | | dfa, input, min_start, |
1314 | | ) |
1315 | | } |
1316 | | #[cfg(not(feature = "dfa-build"))] |
1317 | | { |
1318 | | // Impossible to reach because this engine is never constructed |
1319 | | // if the requisite features aren't enabled. |
1320 | 0 | unreachable!() |
1321 | | } |
1322 | | } |
1323 | | |
1324 | 0 | pub(crate) fn memory_usage(&self) -> usize { |
1325 | | #[cfg(feature = "dfa-build")] |
1326 | | { |
1327 | | self.0.memory_usage() |
1328 | | } |
1329 | | #[cfg(not(feature = "dfa-build"))] |
1330 | | { |
1331 | | // Impossible to reach because this engine is never constructed |
1332 | | // if the requisite features aren't enabled. |
1333 | 0 | unreachable!() |
1334 | | } |
1335 | | } |
1336 | | } |