/src/regex/regex-automata/src/meta/error.rs
Line | Count | Source |
1 | | use regex_syntax::{ast, hir}; |
2 | | |
3 | | use crate::{nfa, util::search::MatchError, PatternID}; |
4 | | |
5 | | /// An error that occurs when construction of a `Regex` fails. |
6 | | /// |
7 | | /// A build error is generally a result of one of two possible failure |
8 | | /// modes. First is a parse or syntax error in the concrete syntax of a |
9 | | /// pattern. Second is that the construction of the underlying regex matcher |
10 | | /// fails, usually because it gets too big with respect to limits like |
11 | | /// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit). |
12 | | /// |
13 | | /// This error provides very little introspection capabilities. You can: |
14 | | /// |
15 | | /// * Ask for the [`PatternID`] of the pattern that caused an error, if one |
16 | | /// is available. This is available for things like syntax errors, but not for |
17 | | /// cases where build limits are exceeded. |
18 | | /// * Ask for the underlying syntax error, but only if the error is a syntax |
19 | | /// error. |
20 | | /// * Ask for a human readable message corresponding to the underlying error. |
21 | | /// * The `BuildError::source` method (from the `std::error::Error` |
22 | | /// trait implementation) may be used to query for an underlying error if one |
23 | | /// exists. There are no API guarantees about which error is returned. |
24 | | /// |
25 | | /// When the `std` feature is enabled, this implements `std::error::Error`. |
26 | | #[derive(Clone, Debug)] |
27 | | pub struct BuildError { |
28 | | kind: BuildErrorKind, |
29 | | } |
30 | | |
31 | | #[derive(Clone, Debug)] |
32 | | enum BuildErrorKind { |
33 | | Syntax { pid: PatternID, err: regex_syntax::Error }, |
34 | | NFA(nfa::thompson::BuildError), |
35 | | } |
36 | | |
37 | | impl BuildError { |
38 | | /// If it is known which pattern ID caused this build error to occur, then |
39 | | /// this method returns it. |
40 | | /// |
41 | | /// Some errors are not associated with a particular pattern. However, any |
42 | | /// errors that occur as part of parsing a pattern are guaranteed to be |
43 | | /// associated with a pattern ID. |
44 | | /// |
45 | | /// # Example |
46 | | /// |
47 | | /// ``` |
48 | | /// use regex_automata::{meta::Regex, PatternID}; |
49 | | /// |
50 | | /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err(); |
51 | | /// assert_eq!(Some(PatternID::must(2)), err.pattern()); |
52 | | /// ``` |
53 | 0 | pub fn pattern(&self) -> Option<PatternID> { |
54 | 0 | match self.kind { |
55 | 0 | BuildErrorKind::Syntax { pid, .. } => Some(pid), |
56 | 0 | _ => None, |
57 | | } |
58 | 0 | } |
59 | | |
60 | | /// If this error occurred because the regex exceeded the configured size |
61 | | /// limit before being built, then this returns the configured size limit. |
62 | | /// |
63 | | /// The limit returned is what was configured, and corresponds to the |
64 | | /// maximum amount of heap usage in bytes. |
65 | 10.7k | pub fn size_limit(&self) -> Option<usize> { |
66 | 10.7k | match self.kind { |
67 | 2.09k | BuildErrorKind::NFA(ref err) => err.size_limit(), |
68 | 8.65k | _ => None, |
69 | | } |
70 | 10.7k | } |
71 | | |
72 | | /// If this error corresponds to a syntax error, then a reference to it is |
73 | | /// returned by this method. |
74 | 8.65k | pub fn syntax_error(&self) -> Option<®ex_syntax::Error> { |
75 | 8.65k | match self.kind { |
76 | 8.65k | BuildErrorKind::Syntax { ref err, .. } => Some(err), |
77 | 0 | _ => None, |
78 | | } |
79 | 8.65k | } |
80 | | |
81 | 6.13k | pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError { |
82 | 6.13k | let err = regex_syntax::Error::from(err); |
83 | 6.13k | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
84 | 6.13k | } |
85 | | |
86 | 2.52k | pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError { |
87 | 2.52k | let err = regex_syntax::Error::from(err); |
88 | 2.52k | BuildError { kind: BuildErrorKind::Syntax { pid, err } } |
89 | 2.52k | } |
90 | | |
91 | 2.09k | pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { |
92 | 2.09k | BuildError { kind: BuildErrorKind::NFA(err) } |
93 | 2.09k | } |
94 | | } |
95 | | |
96 | | #[cfg(feature = "std")] |
97 | | impl std::error::Error for BuildError { |
98 | 0 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
99 | 0 | match self.kind { |
100 | 0 | BuildErrorKind::Syntax { ref err, .. } => Some(err), |
101 | 0 | BuildErrorKind::NFA(ref err) => Some(err), |
102 | | } |
103 | 0 | } |
104 | | } |
105 | | |
106 | | impl core::fmt::Display for BuildError { |
107 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
108 | 0 | match self.kind { |
109 | 0 | BuildErrorKind::Syntax { pid, .. } => { |
110 | 0 | write!(f, "error parsing pattern {}", pid.as_usize()) |
111 | | } |
112 | 0 | BuildErrorKind::NFA(_) => write!(f, "error building NFA"), |
113 | | } |
114 | 0 | } |
115 | | } |
116 | | |
117 | | /// An error that occurs when a search should be retried. |
118 | | /// |
119 | | /// This retry error distinguishes between two different failure modes. |
120 | | /// |
121 | | /// The first is one where potential quadratic behavior has been detected. |
122 | | /// In this case, whatever optimization that led to this behavior should be |
123 | | /// stopped, and the next best strategy should be used. |
124 | | /// |
125 | | /// The second indicates that the underlying regex engine has failed for some |
126 | | /// reason. This usually occurs because either a lazy DFA's cache has become |
127 | | /// ineffective or because a non-ASCII byte has been seen *and* a Unicode word |
128 | | /// boundary was used in one of the patterns. In this failure case, a different |
129 | | /// regex engine that won't fail in these ways (PikeVM, backtracker or the |
130 | | /// one-pass DFA) should be used. |
131 | | /// |
132 | | /// This is an internal error only and should never bleed into the public |
133 | | /// API. |
134 | | #[derive(Debug)] |
135 | | pub(crate) enum RetryError { |
136 | | Quadratic(RetryQuadraticError), |
137 | | Fail(RetryFailError), |
138 | | } |
139 | | |
140 | | #[cfg(feature = "std")] |
141 | | impl std::error::Error for RetryError {} |
142 | | |
143 | | impl core::fmt::Display for RetryError { |
144 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
145 | 0 | match *self { |
146 | 0 | RetryError::Quadratic(ref err) => err.fmt(f), |
147 | 0 | RetryError::Fail(ref err) => err.fmt(f), |
148 | | } |
149 | 0 | } |
150 | | } |
151 | | |
152 | | impl From<MatchError> for RetryError { |
153 | 1.41k | fn from(merr: MatchError) -> RetryError { |
154 | 1.41k | RetryError::Fail(RetryFailError::from(merr)) |
155 | 1.41k | } |
156 | | } |
157 | | |
158 | | /// An error that occurs when potential quadratic behavior has been detected |
159 | | /// when applying either the "reverse suffix" or "reverse inner" optimizations. |
160 | | /// |
161 | | /// When this error occurs, callers should abandon the "reverse" optimization |
162 | | /// and use a normal forward search. |
163 | | #[derive(Debug)] |
164 | | pub(crate) struct RetryQuadraticError(()); |
165 | | |
166 | | impl RetryQuadraticError { |
167 | 4.60k | pub(crate) fn new() -> RetryQuadraticError { |
168 | 4.60k | RetryQuadraticError(()) |
169 | 4.60k | } |
170 | | } |
171 | | |
172 | | #[cfg(feature = "std")] |
173 | | impl std::error::Error for RetryQuadraticError {} |
174 | | |
175 | | impl core::fmt::Display for RetryQuadraticError { |
176 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
177 | 0 | write!(f, "regex engine gave up to avoid quadratic behavior") |
178 | 0 | } |
179 | | } |
180 | | |
181 | | impl From<RetryQuadraticError> for RetryError { |
182 | 0 | fn from(err: RetryQuadraticError) -> RetryError { |
183 | 0 | RetryError::Quadratic(err) |
184 | 0 | } |
185 | | } |
186 | | |
187 | | /// An error that occurs when a regex engine "gives up" for some reason before |
188 | | /// finishing a search. Usually this occurs because of heuristic Unicode word |
189 | | /// boundary support or because of ineffective cache usage in the lazy DFA. |
190 | | /// |
191 | | /// When this error occurs, callers should retry the regex search with a |
192 | | /// different regex engine. |
193 | | /// |
194 | | /// Note that this has convenient `From` impls that will automatically |
195 | | /// convert a `MatchError` into this error. This works because the meta |
196 | | /// regex engine internals guarantee that errors like `HaystackTooLong` and |
197 | | /// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and |
198 | | /// `GaveUp`, which both correspond to this "failure" error. |
199 | | #[derive(Debug)] |
200 | | pub(crate) struct RetryFailError { |
201 | | offset: usize, |
202 | | } |
203 | | |
204 | | impl RetryFailError { |
205 | 36.8k | pub(crate) fn from_offset(offset: usize) -> RetryFailError { |
206 | 36.8k | RetryFailError { offset } |
207 | 36.8k | } |
208 | | } |
209 | | |
210 | | #[cfg(feature = "std")] |
211 | | impl std::error::Error for RetryFailError {} |
212 | | |
213 | | impl core::fmt::Display for RetryFailError { |
214 | 0 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
215 | 0 | write!(f, "regex engine failed at offset {:?}", self.offset) |
216 | 0 | } |
217 | | } |
218 | | |
219 | | impl From<RetryFailError> for RetryError { |
220 | 163 | fn from(err: RetryFailError) -> RetryError { |
221 | 163 | RetryError::Fail(err) |
222 | 163 | } |
223 | | } |
224 | | |
225 | | impl From<MatchError> for RetryFailError { |
226 | 36.8k | fn from(merr: MatchError) -> RetryFailError { |
227 | | use crate::util::search::MatchErrorKind::*; |
228 | | |
229 | 36.8k | match *merr.kind() { |
230 | 36.6k | Quit { offset, .. } => RetryFailError::from_offset(offset), |
231 | 170 | GaveUp { offset } => RetryFailError::from_offset(offset), |
232 | | // These can never occur because we avoid them by construction |
233 | | // or with higher level control flow logic. For example, the |
234 | | // backtracker's wrapper will never hand out a backtracker engine |
235 | | // when the haystack would be too long. |
236 | | HaystackTooLong { .. } | UnsupportedAnchored { .. } => { |
237 | 0 | unreachable!("found impossible error in meta engine: {merr}") |
238 | | } |
239 | | } |
240 | 36.8k | } |
241 | | } |