/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-syntax-0.6.29/src/hir/mod.rs
Line | Count | Source |
1 | | /*! |
2 | | Defines a high-level intermediate representation for regular expressions. |
3 | | */ |
4 | | use std::char; |
5 | | use std::cmp; |
6 | | use std::error; |
7 | | use std::fmt; |
8 | | use std::result; |
9 | | use std::u8; |
10 | | |
11 | | use crate::ast::Span; |
12 | | use crate::hir::interval::{Interval, IntervalSet, IntervalSetIter}; |
13 | | use crate::unicode; |
14 | | |
15 | | pub use crate::hir::visitor::{visit, Visitor}; |
16 | | pub use crate::unicode::CaseFoldError; |
17 | | |
18 | | mod interval; |
19 | | pub mod literal; |
20 | | pub mod print; |
21 | | pub mod translate; |
22 | | mod visitor; |
23 | | |
24 | | /// An error that can occur while translating an `Ast` to a `Hir`. |
25 | | #[derive(Clone, Debug, Eq, PartialEq)] |
26 | | pub struct Error { |
27 | | /// The kind of error. |
28 | | kind: ErrorKind, |
29 | | /// The original pattern that the translator's Ast was parsed from. Every |
30 | | /// span in an error is a valid range into this string. |
31 | | pattern: String, |
32 | | /// The span of this error, derived from the Ast given to the translator. |
33 | | span: Span, |
34 | | } |
35 | | |
36 | | impl Error { |
37 | | /// Return the type of this error. |
38 | 0 | pub fn kind(&self) -> &ErrorKind { |
39 | 0 | &self.kind |
40 | 0 | } |
41 | | |
42 | | /// The original pattern string in which this error occurred. |
43 | | /// |
44 | | /// Every span reported by this error is reported in terms of this string. |
45 | 0 | pub fn pattern(&self) -> &str { |
46 | 0 | &self.pattern |
47 | 0 | } |
48 | | |
49 | | /// Return the span at which this error occurred. |
50 | 0 | pub fn span(&self) -> &Span { |
51 | 0 | &self.span |
52 | 0 | } |
53 | | } |
54 | | |
55 | | /// The type of an error that occurred while building an `Hir`. |
56 | | #[derive(Clone, Debug, Eq, PartialEq)] |
57 | | pub enum ErrorKind { |
58 | | /// This error occurs when a Unicode feature is used when Unicode |
59 | | /// support is disabled. For example `(?-u:\pL)` would trigger this error. |
60 | | UnicodeNotAllowed, |
61 | | /// This error occurs when translating a pattern that could match a byte |
62 | | /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled. |
63 | | InvalidUtf8, |
64 | | /// This occurs when an unrecognized Unicode property name could not |
65 | | /// be found. |
66 | | UnicodePropertyNotFound, |
67 | | /// This occurs when an unrecognized Unicode property value could not |
68 | | /// be found. |
69 | | UnicodePropertyValueNotFound, |
70 | | /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or |
71 | | /// `\d`) could not be found. This can occur when the `unicode-perl` |
72 | | /// crate feature is not enabled. |
73 | | UnicodePerlClassNotFound, |
74 | | /// This occurs when the Unicode simple case mapping tables are not |
75 | | /// available, and the regular expression required Unicode aware case |
76 | | /// insensitivity. |
77 | | UnicodeCaseUnavailable, |
78 | | /// This occurs when the translator attempts to construct a character class |
79 | | /// that is empty. |
80 | | /// |
81 | | /// Note that this restriction in the translator may be removed in the |
82 | | /// future. |
83 | | EmptyClassNotAllowed, |
84 | | /// Hints that destructuring should not be exhaustive. |
85 | | /// |
86 | | /// This enum may grow additional variants, so this makes sure clients |
87 | | /// don't count on exhaustive matching. (Otherwise, adding a new variant |
88 | | /// could break existing code.) |
89 | | #[doc(hidden)] |
90 | | __Nonexhaustive, |
91 | | } |
92 | | |
93 | | impl ErrorKind { |
94 | | // TODO: Remove this method entirely on the next breaking semver release. |
95 | | #[allow(deprecated)] |
96 | 0 | fn description(&self) -> &str { |
97 | | use self::ErrorKind::*; |
98 | 0 | match *self { |
99 | 0 | UnicodeNotAllowed => "Unicode not allowed here", |
100 | 0 | InvalidUtf8 => "pattern can match invalid UTF-8", |
101 | 0 | UnicodePropertyNotFound => "Unicode property not found", |
102 | 0 | UnicodePropertyValueNotFound => "Unicode property value not found", |
103 | | UnicodePerlClassNotFound => { |
104 | 0 | "Unicode-aware Perl class not found \ |
105 | 0 | (make sure the unicode-perl feature is enabled)" |
106 | | } |
107 | | UnicodeCaseUnavailable => { |
108 | 0 | "Unicode-aware case insensitivity matching is not available \ |
109 | 0 | (make sure the unicode-case feature is enabled)" |
110 | | } |
111 | 0 | EmptyClassNotAllowed => "empty character classes are not allowed", |
112 | 0 | __Nonexhaustive => unreachable!(), |
113 | | } |
114 | 0 | } |
115 | | } |
116 | | |
117 | | impl error::Error for Error { |
118 | | // TODO: Remove this method entirely on the next breaking semver release. |
119 | | #[allow(deprecated)] |
120 | 0 | fn description(&self) -> &str { |
121 | 0 | self.kind.description() |
122 | 0 | } |
123 | | } |
124 | | |
125 | | impl fmt::Display for Error { |
126 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
127 | 0 | crate::error::Formatter::from(self).fmt(f) |
128 | 0 | } |
129 | | } |
130 | | |
131 | | impl fmt::Display for ErrorKind { |
132 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
133 | | // TODO: Remove this on the next breaking semver release. |
134 | | #[allow(deprecated)] |
135 | 0 | f.write_str(self.description()) |
136 | 0 | } |
137 | | } |
138 | | |
139 | | /// A high-level intermediate representation (HIR) for a regular expression. |
140 | | /// |
141 | | /// The HIR of a regular expression represents an intermediate step between its |
142 | | /// abstract syntax (a structured description of the concrete syntax) and |
143 | | /// compiled byte codes. The purpose of HIR is to make regular expressions |
144 | | /// easier to analyze. In particular, the AST is much more complex than the |
145 | | /// HIR. For example, while an AST supports arbitrarily nested character |
146 | | /// classes, the HIR will flatten all nested classes into a single set. The HIR |
147 | | /// will also "compile away" every flag present in the concrete syntax. For |
148 | | /// example, users of HIR expressions never need to worry about case folding; |
149 | | /// it is handled automatically by the translator (e.g., by translating `(?i)A` |
150 | | /// to `[aA]`). |
151 | | /// |
152 | | /// If the HIR was produced by a translator that disallows invalid UTF-8, then |
153 | | /// the HIR is guaranteed to match UTF-8 exclusively. |
154 | | /// |
155 | | /// This type defines its own destructor that uses constant stack space and |
156 | | /// heap space proportional to the size of the HIR. |
157 | | /// |
158 | | /// The specific type of an HIR expression can be accessed via its `kind` |
159 | | /// or `into_kind` methods. This extra level of indirection exists for two |
160 | | /// reasons: |
161 | | /// |
162 | | /// 1. Construction of an HIR expression *must* use the constructor methods |
163 | | /// on this `Hir` type instead of building the `HirKind` values directly. |
164 | | /// This permits construction to enforce invariants like "concatenations |
165 | | /// always consist of two or more sub-expressions." |
166 | | /// 2. Every HIR expression contains attributes that are defined inductively, |
167 | | /// and can be computed cheaply during the construction process. For |
168 | | /// example, one such attribute is whether the expression must match at the |
169 | | /// beginning of the text. |
170 | | /// |
171 | | /// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular |
172 | | /// expression pattern string, and uses constant stack space and heap space |
173 | | /// proportional to the size of the `Hir`. |
174 | | #[derive(Clone, Debug, Eq, PartialEq)] |
175 | | pub struct Hir { |
176 | | /// The underlying HIR kind. |
177 | | kind: HirKind, |
178 | | /// Analysis info about this HIR, computed during construction. |
179 | | info: HirInfo, |
180 | | } |
181 | | |
182 | | /// The kind of an arbitrary `Hir` expression. |
183 | | #[derive(Clone, Debug, Eq, PartialEq)] |
184 | | pub enum HirKind { |
185 | | /// The empty regular expression, which matches everything, including the |
186 | | /// empty string. |
187 | | Empty, |
188 | | /// A single literal character that matches exactly this character. |
189 | | Literal(Literal), |
190 | | /// A single character class that matches any of the characters in the |
191 | | /// class. A class can either consist of Unicode scalar values as |
192 | | /// characters, or it can use bytes. |
193 | | Class(Class), |
194 | | /// An anchor assertion. An anchor assertion match always has zero length. |
195 | | Anchor(Anchor), |
196 | | /// A word boundary assertion, which may or may not be Unicode aware. A |
197 | | /// word boundary assertion match always has zero length. |
198 | | WordBoundary(WordBoundary), |
199 | | /// A repetition operation applied to a child expression. |
200 | | Repetition(Repetition), |
201 | | /// A possibly capturing group, which contains a child expression. |
202 | | Group(Group), |
203 | | /// A concatenation of expressions. A concatenation always has at least two |
204 | | /// child expressions. |
205 | | /// |
206 | | /// A concatenation matches only if each of its child expression matches |
207 | | /// one after the other. |
208 | | Concat(Vec<Hir>), |
209 | | /// An alternation of expressions. An alternation always has at least two |
210 | | /// child expressions. |
211 | | /// |
212 | | /// An alternation matches only if at least one of its child expression |
213 | | /// matches. If multiple expressions match, then the leftmost is preferred. |
214 | | Alternation(Vec<Hir>), |
215 | | } |
216 | | |
217 | | impl Hir { |
218 | | /// Returns a reference to the underlying HIR kind. |
219 | 8.85k | pub fn kind(&self) -> &HirKind { |
220 | 8.85k | &self.kind |
221 | 8.85k | } |
222 | | |
223 | | /// Consumes ownership of this HIR expression and returns its underlying |
224 | | /// `HirKind`. |
225 | 0 | pub fn into_kind(mut self) -> HirKind { |
226 | | use std::mem; |
227 | 0 | mem::replace(&mut self.kind, HirKind::Empty) |
228 | 0 | } |
229 | | |
230 | | /// Returns an empty HIR expression. |
231 | | /// |
232 | | /// An empty HIR expression always matches, including the empty string. |
233 | 675 | pub fn empty() -> Hir { |
234 | 675 | let mut info = HirInfo::new(); |
235 | 675 | info.set_always_utf8(true); |
236 | 675 | info.set_all_assertions(true); |
237 | 675 | info.set_anchored_start(false); |
238 | 675 | info.set_anchored_end(false); |
239 | 675 | info.set_line_anchored_start(false); |
240 | 675 | info.set_line_anchored_end(false); |
241 | 675 | info.set_any_anchored_start(false); |
242 | 675 | info.set_any_anchored_end(false); |
243 | 675 | info.set_match_empty(true); |
244 | 675 | info.set_literal(false); |
245 | 675 | info.set_alternation_literal(false); |
246 | 675 | Hir { kind: HirKind::Empty, info } |
247 | 675 | } |
248 | | |
249 | | /// Creates a literal HIR expression. |
250 | | /// |
251 | | /// If the given literal has a `Byte` variant with an ASCII byte, then this |
252 | | /// method panics. This enforces the invariant that `Byte` variants are |
253 | | /// only used to express matching of invalid UTF-8. |
254 | 465 | pub fn literal(lit: Literal) -> Hir { |
255 | 465 | if let Literal::Byte(b) = lit { |
256 | 0 | assert!(b > 0x7F); |
257 | 465 | } |
258 | | |
259 | 465 | let mut info = HirInfo::new(); |
260 | 465 | info.set_always_utf8(lit.is_unicode()); |
261 | 465 | info.set_all_assertions(false); |
262 | 465 | info.set_anchored_start(false); |
263 | 465 | info.set_anchored_end(false); |
264 | 465 | info.set_line_anchored_start(false); |
265 | 465 | info.set_line_anchored_end(false); |
266 | 465 | info.set_any_anchored_start(false); |
267 | 465 | info.set_any_anchored_end(false); |
268 | 465 | info.set_match_empty(false); |
269 | 465 | info.set_literal(true); |
270 | 465 | info.set_alternation_literal(true); |
271 | 465 | Hir { kind: HirKind::Literal(lit), info } |
272 | 465 | } |
273 | | |
274 | | /// Creates a class HIR expression. |
275 | 175 | pub fn class(class: Class) -> Hir { |
276 | 175 | let mut info = HirInfo::new(); |
277 | 175 | info.set_always_utf8(class.is_always_utf8()); |
278 | 175 | info.set_all_assertions(false); |
279 | 175 | info.set_anchored_start(false); |
280 | 175 | info.set_anchored_end(false); |
281 | 175 | info.set_line_anchored_start(false); |
282 | 175 | info.set_line_anchored_end(false); |
283 | 175 | info.set_any_anchored_start(false); |
284 | 175 | info.set_any_anchored_end(false); |
285 | 175 | info.set_match_empty(false); |
286 | 175 | info.set_literal(false); |
287 | 175 | info.set_alternation_literal(false); |
288 | 175 | Hir { kind: HirKind::Class(class), info } |
289 | 175 | } |
290 | | |
291 | | /// Creates an anchor assertion HIR expression. |
292 | 30 | pub fn anchor(anchor: Anchor) -> Hir { |
293 | 30 | let mut info = HirInfo::new(); |
294 | 30 | info.set_always_utf8(true); |
295 | 30 | info.set_all_assertions(true); |
296 | 30 | info.set_anchored_start(false); |
297 | 30 | info.set_anchored_end(false); |
298 | 30 | info.set_line_anchored_start(false); |
299 | 30 | info.set_line_anchored_end(false); |
300 | 30 | info.set_any_anchored_start(false); |
301 | 30 | info.set_any_anchored_end(false); |
302 | 30 | info.set_match_empty(true); |
303 | 30 | info.set_literal(false); |
304 | 30 | info.set_alternation_literal(false); |
305 | 30 | if let Anchor::StartText = anchor { |
306 | 15 | info.set_anchored_start(true); |
307 | 15 | info.set_line_anchored_start(true); |
308 | 15 | info.set_any_anchored_start(true); |
309 | 15 | } |
310 | 30 | if let Anchor::EndText = anchor { |
311 | 15 | info.set_anchored_end(true); |
312 | 15 | info.set_line_anchored_end(true); |
313 | 15 | info.set_any_anchored_end(true); |
314 | 15 | } |
315 | 30 | if let Anchor::StartLine = anchor { |
316 | 0 | info.set_line_anchored_start(true); |
317 | 30 | } |
318 | 30 | if let Anchor::EndLine = anchor { |
319 | 0 | info.set_line_anchored_end(true); |
320 | 30 | } |
321 | 30 | Hir { kind: HirKind::Anchor(anchor), info } |
322 | 30 | } |
323 | | |
324 | | /// Creates a word boundary assertion HIR expression. |
325 | 0 | pub fn word_boundary(word_boundary: WordBoundary) -> Hir { |
326 | 0 | let mut info = HirInfo::new(); |
327 | 0 | info.set_always_utf8(true); |
328 | 0 | info.set_all_assertions(true); |
329 | 0 | info.set_anchored_start(false); |
330 | 0 | info.set_anchored_end(false); |
331 | 0 | info.set_line_anchored_start(false); |
332 | 0 | info.set_line_anchored_end(false); |
333 | 0 | info.set_any_anchored_start(false); |
334 | 0 | info.set_any_anchored_end(false); |
335 | 0 | info.set_literal(false); |
336 | 0 | info.set_alternation_literal(false); |
337 | | // A negated word boundary matches '', so that's fine. But \b does not |
338 | | // match \b, so why do we say it can match the empty string? Well, |
339 | | // because, if you search for \b against 'a', it will report [0, 0) and |
340 | | // [1, 1) as matches, and both of those matches correspond to the empty |
341 | | // string. Thus, only *certain* empty strings match \b, which similarly |
342 | | // applies to \B. |
343 | 0 | info.set_match_empty(true); |
344 | | // Negated ASCII word boundaries can match invalid UTF-8. |
345 | 0 | if let WordBoundary::AsciiNegate = word_boundary { |
346 | 0 | info.set_always_utf8(false); |
347 | 0 | } |
348 | 0 | Hir { kind: HirKind::WordBoundary(word_boundary), info } |
349 | 0 | } |
350 | | |
351 | | /// Creates a repetition HIR expression. |
352 | 325 | pub fn repetition(rep: Repetition) -> Hir { |
353 | 325 | let mut info = HirInfo::new(); |
354 | 325 | info.set_always_utf8(rep.hir.is_always_utf8()); |
355 | 325 | info.set_all_assertions(rep.hir.is_all_assertions()); |
356 | | // If this operator can match the empty string, then it can never |
357 | | // be anchored. |
358 | 325 | info.set_anchored_start( |
359 | 325 | !rep.is_match_empty() && rep.hir.is_anchored_start(), |
360 | | ); |
361 | 325 | info.set_anchored_end( |
362 | 325 | !rep.is_match_empty() && rep.hir.is_anchored_end(), |
363 | | ); |
364 | 325 | info.set_line_anchored_start( |
365 | 325 | !rep.is_match_empty() && rep.hir.is_anchored_start(), |
366 | | ); |
367 | 325 | info.set_line_anchored_end( |
368 | 325 | !rep.is_match_empty() && rep.hir.is_anchored_end(), |
369 | | ); |
370 | 325 | info.set_any_anchored_start(rep.hir.is_any_anchored_start()); |
371 | 325 | info.set_any_anchored_end(rep.hir.is_any_anchored_end()); |
372 | 325 | info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty()); |
373 | 325 | info.set_literal(false); |
374 | 325 | info.set_alternation_literal(false); |
375 | 325 | Hir { kind: HirKind::Repetition(rep), info } |
376 | 325 | } |
377 | | |
378 | | /// Creates a group HIR expression. |
379 | 90 | pub fn group(group: Group) -> Hir { |
380 | 90 | let mut info = HirInfo::new(); |
381 | 90 | info.set_always_utf8(group.hir.is_always_utf8()); |
382 | 90 | info.set_all_assertions(group.hir.is_all_assertions()); |
383 | 90 | info.set_anchored_start(group.hir.is_anchored_start()); |
384 | 90 | info.set_anchored_end(group.hir.is_anchored_end()); |
385 | 90 | info.set_line_anchored_start(group.hir.is_line_anchored_start()); |
386 | 90 | info.set_line_anchored_end(group.hir.is_line_anchored_end()); |
387 | 90 | info.set_any_anchored_start(group.hir.is_any_anchored_start()); |
388 | 90 | info.set_any_anchored_end(group.hir.is_any_anchored_end()); |
389 | 90 | info.set_match_empty(group.hir.is_match_empty()); |
390 | 90 | info.set_literal(false); |
391 | 90 | info.set_alternation_literal(false); |
392 | 90 | Hir { kind: HirKind::Group(group), info } |
393 | 90 | } |
394 | | |
395 | | /// Returns the concatenation of the given expressions. |
396 | | /// |
397 | | /// This flattens the concatenation as appropriate. |
398 | 100 | pub fn concat(mut exprs: Vec<Hir>) -> Hir { |
399 | 100 | match exprs.len() { |
400 | 0 | 0 => Hir::empty(), |
401 | 0 | 1 => exprs.pop().unwrap(), |
402 | | _ => { |
403 | 100 | let mut info = HirInfo::new(); |
404 | 100 | info.set_always_utf8(true); |
405 | 100 | info.set_all_assertions(true); |
406 | 100 | info.set_any_anchored_start(false); |
407 | 100 | info.set_any_anchored_end(false); |
408 | 100 | info.set_match_empty(true); |
409 | 100 | info.set_literal(true); |
410 | 100 | info.set_alternation_literal(true); |
411 | | |
412 | | // Some attributes require analyzing all sub-expressions. |
413 | 835 | for e in &exprs { |
414 | 735 | let x = info.is_always_utf8() && e.is_always_utf8(); |
415 | 735 | info.set_always_utf8(x); |
416 | | |
417 | 735 | let x = info.is_all_assertions() && e.is_all_assertions(); |
418 | 735 | info.set_all_assertions(x); |
419 | | |
420 | 735 | let x = info.is_any_anchored_start() |
421 | 480 | || e.is_any_anchored_start(); |
422 | 735 | info.set_any_anchored_start(x); |
423 | | |
424 | 735 | let x = |
425 | 735 | info.is_any_anchored_end() || e.is_any_anchored_end(); |
426 | 735 | info.set_any_anchored_end(x); |
427 | | |
428 | 735 | let x = info.is_match_empty() && e.is_match_empty(); |
429 | 735 | info.set_match_empty(x); |
430 | | |
431 | 735 | let x = info.is_literal() && e.is_literal(); |
432 | 735 | info.set_literal(x); |
433 | | |
434 | 735 | let x = info.is_alternation_literal() |
435 | 295 | && e.is_alternation_literal(); |
436 | 735 | info.set_alternation_literal(x); |
437 | | } |
438 | | // Anchored attributes require something slightly more |
439 | | // sophisticated. Normally, WLOG, to determine whether an |
440 | | // expression is anchored to the start, we'd only need to check |
441 | | // the first expression of a concatenation. However, |
442 | | // expressions like `$\b^` are still anchored to the start, |
443 | | // but the first expression in the concatenation *isn't* |
444 | | // anchored to the start. So the "first" expression to look at |
445 | | // is actually one that is either not an assertion or is |
446 | | // specifically the StartText assertion. |
447 | 100 | info.set_anchored_start( |
448 | 100 | exprs |
449 | 100 | .iter() |
450 | 100 | .take_while(|e| { |
451 | 100 | e.is_anchored_start() || e.is_all_assertions() |
452 | 100 | }) |
453 | 100 | .any(|e| e.is_anchored_start()), |
454 | | ); |
455 | | // Similarly for the end anchor, but in reverse. |
456 | 100 | info.set_anchored_end( |
457 | 100 | exprs |
458 | 100 | .iter() |
459 | 100 | .rev() |
460 | 100 | .take_while(|e| { |
461 | 100 | e.is_anchored_end() || e.is_all_assertions() |
462 | 100 | }) |
463 | 100 | .any(|e| e.is_anchored_end()), |
464 | | ); |
465 | | // Repeat the process for line anchors. |
466 | 100 | info.set_line_anchored_start( |
467 | 100 | exprs |
468 | 100 | .iter() |
469 | 100 | .take_while(|e| { |
470 | 100 | e.is_line_anchored_start() || e.is_all_assertions() |
471 | 100 | }) |
472 | 100 | .any(|e| e.is_line_anchored_start()), |
473 | | ); |
474 | 100 | info.set_line_anchored_end( |
475 | 100 | exprs |
476 | 100 | .iter() |
477 | 100 | .rev() |
478 | 100 | .take_while(|e| { |
479 | 100 | e.is_line_anchored_end() || e.is_all_assertions() |
480 | 100 | }) |
481 | 100 | .any(|e| e.is_line_anchored_end()), |
482 | | ); |
483 | 100 | Hir { kind: HirKind::Concat(exprs), info } |
484 | | } |
485 | | } |
486 | 100 | } |
487 | | |
488 | | /// Returns the alternation of the given expressions. |
489 | | /// |
490 | | /// This flattens the alternation as appropriate. |
491 | 10 | pub fn alternation(mut exprs: Vec<Hir>) -> Hir { |
492 | 10 | match exprs.len() { |
493 | 0 | 0 => Hir::empty(), |
494 | 0 | 1 => exprs.pop().unwrap(), |
495 | | _ => { |
496 | 10 | let mut info = HirInfo::new(); |
497 | 10 | info.set_always_utf8(true); |
498 | 10 | info.set_all_assertions(true); |
499 | 10 | info.set_anchored_start(true); |
500 | 10 | info.set_anchored_end(true); |
501 | 10 | info.set_line_anchored_start(true); |
502 | 10 | info.set_line_anchored_end(true); |
503 | 10 | info.set_any_anchored_start(false); |
504 | 10 | info.set_any_anchored_end(false); |
505 | 10 | info.set_match_empty(false); |
506 | 10 | info.set_literal(false); |
507 | 10 | info.set_alternation_literal(true); |
508 | | |
509 | | // Some attributes require analyzing all sub-expressions. |
510 | 40 | for e in &exprs { |
511 | 30 | let x = info.is_always_utf8() && e.is_always_utf8(); |
512 | 30 | info.set_always_utf8(x); |
513 | | |
514 | 30 | let x = info.is_all_assertions() && e.is_all_assertions(); |
515 | 30 | info.set_all_assertions(x); |
516 | | |
517 | 30 | let x = info.is_anchored_start() && e.is_anchored_start(); |
518 | 30 | info.set_anchored_start(x); |
519 | | |
520 | 30 | let x = info.is_anchored_end() && e.is_anchored_end(); |
521 | 30 | info.set_anchored_end(x); |
522 | | |
523 | 30 | let x = info.is_line_anchored_start() |
524 | 10 | && e.is_line_anchored_start(); |
525 | 30 | info.set_line_anchored_start(x); |
526 | | |
527 | 30 | let x = info.is_line_anchored_end() |
528 | 10 | && e.is_line_anchored_end(); |
529 | 30 | info.set_line_anchored_end(x); |
530 | | |
531 | 30 | let x = info.is_any_anchored_start() |
532 | 30 | || e.is_any_anchored_start(); |
533 | 30 | info.set_any_anchored_start(x); |
534 | | |
535 | 30 | let x = |
536 | 30 | info.is_any_anchored_end() || e.is_any_anchored_end(); |
537 | 30 | info.set_any_anchored_end(x); |
538 | | |
539 | 30 | let x = info.is_match_empty() || e.is_match_empty(); |
540 | 30 | info.set_match_empty(x); |
541 | | |
542 | 30 | let x = info.is_alternation_literal() && e.is_literal(); |
543 | 30 | info.set_alternation_literal(x); |
544 | | } |
545 | 10 | Hir { kind: HirKind::Alternation(exprs), info } |
546 | | } |
547 | | } |
548 | 10 | } |
549 | | |
550 | | /// Build an HIR expression for `.`. |
551 | | /// |
552 | | /// A `.` expression matches any character except for `\n`. To build an |
553 | | /// expression that matches any character, including `\n`, use the `any` |
554 | | /// method. |
555 | | /// |
556 | | /// If `bytes` is `true`, then this assumes characters are limited to a |
557 | | /// single byte. |
558 | 5 | pub fn dot(bytes: bool) -> Hir { |
559 | 5 | if bytes { |
560 | 0 | let mut cls = ClassBytes::empty(); |
561 | 0 | cls.push(ClassBytesRange::new(b'\0', b'\x09')); |
562 | 0 | cls.push(ClassBytesRange::new(b'\x0B', b'\xFF')); |
563 | 0 | Hir::class(Class::Bytes(cls)) |
564 | | } else { |
565 | 5 | let mut cls = ClassUnicode::empty(); |
566 | 5 | cls.push(ClassUnicodeRange::new('\0', '\x09')); |
567 | 5 | cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}')); |
568 | 5 | Hir::class(Class::Unicode(cls)) |
569 | | } |
570 | 5 | } |
571 | | |
572 | | /// Build an HIR expression for `(?s).`. |
573 | | /// |
574 | | /// A `(?s).` expression matches any character, including `\n`. To build an |
575 | | /// expression that matches any character except for `\n`, then use the |
576 | | /// `dot` method. |
577 | | /// |
578 | | /// If `bytes` is `true`, then this assumes characters are limited to a |
579 | | /// single byte. |
580 | 0 | pub fn any(bytes: bool) -> Hir { |
581 | 0 | if bytes { |
582 | 0 | let mut cls = ClassBytes::empty(); |
583 | 0 | cls.push(ClassBytesRange::new(b'\0', b'\xFF')); |
584 | 0 | Hir::class(Class::Bytes(cls)) |
585 | | } else { |
586 | 0 | let mut cls = ClassUnicode::empty(); |
587 | 0 | cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}')); |
588 | 0 | Hir::class(Class::Unicode(cls)) |
589 | | } |
590 | 0 | } |
591 | | |
592 | | /// Return true if and only if this HIR will always match valid UTF-8. |
593 | | /// |
594 | | /// When this returns false, then it is possible for this HIR expression |
595 | | /// to match invalid UTF-8. |
596 | 1.19k | pub fn is_always_utf8(&self) -> bool { |
597 | 1.19k | self.info.is_always_utf8() |
598 | 1.19k | } |
599 | | |
600 | | /// Returns true if and only if this entire HIR expression is made up of |
601 | | /// zero-width assertions. |
602 | | /// |
603 | | /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but |
604 | | /// not `^a`. |
605 | 880 | pub fn is_all_assertions(&self) -> bool { |
606 | 880 | self.info.is_all_assertions() |
607 | 880 | } |
608 | | |
609 | | /// Return true if and only if this HIR is required to match from the |
610 | | /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`, |
611 | | /// `^foo|^bar` but not `^foo|bar`. |
612 | 395 | pub fn is_anchored_start(&self) -> bool { |
613 | 395 | self.info.is_anchored_start() |
614 | 395 | } |
615 | | |
616 | | /// Return true if and only if this HIR is required to match at the end |
617 | | /// of text. This includes expressions like `foo$`, `(foo|bar)$`, |
618 | | /// `foo$|bar$` but not `foo$|bar`. |
619 | 395 | pub fn is_anchored_end(&self) -> bool { |
620 | 395 | self.info.is_anchored_end() |
621 | 395 | } |
622 | | |
623 | | /// Return true if and only if this HIR is required to match from the |
624 | | /// beginning of text or the beginning of a line. This includes expressions |
625 | | /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar` |
626 | | /// but not `^foo|bar` or `(?m)^foo|bar`. |
627 | | /// |
628 | | /// Note that if `is_anchored_start` is `true`, then |
629 | | /// `is_line_anchored_start` will also be `true`. The reverse implication |
630 | | /// is not true. For example, `(?m)^foo` is line anchored, but not |
631 | | /// `is_anchored_start`. |
632 | 215 | pub fn is_line_anchored_start(&self) -> bool { |
633 | 215 | self.info.is_line_anchored_start() |
634 | 215 | } |
635 | | |
636 | | /// Return true if and only if this HIR is required to match at the |
637 | | /// end of text or the end of a line. This includes expressions like |
638 | | /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`, |
639 | | /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`. |
640 | | /// |
641 | | /// Note that if `is_anchored_end` is `true`, then |
642 | | /// `is_line_anchored_end` will also be `true`. The reverse implication |
643 | | /// is not true. For example, `(?m)foo$` is line anchored, but not |
644 | | /// `is_anchored_end`. |
645 | 215 | pub fn is_line_anchored_end(&self) -> bool { |
646 | 215 | self.info.is_line_anchored_end() |
647 | 215 | } |
648 | | |
649 | | /// Return true if and only if this HIR contains any sub-expression that |
650 | | /// is required to match at the beginning of text. Specifically, this |
651 | | /// returns true if the `^` symbol (when multiline mode is disabled) or the |
652 | | /// `\A` escape appear anywhere in the regex. |
653 | 925 | pub fn is_any_anchored_start(&self) -> bool { |
654 | 925 | self.info.is_any_anchored_start() |
655 | 925 | } |
656 | | |
657 | | /// Return true if and only if this HIR contains any sub-expression that is |
658 | | /// required to match at the end of text. Specifically, this returns true |
659 | | /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape |
660 | | /// appear anywhere in the regex. |
661 | 1.18k | pub fn is_any_anchored_end(&self) -> bool { |
662 | 1.18k | self.info.is_any_anchored_end() |
663 | 1.18k | } |
664 | | |
665 | | /// Return true if and only if the empty string is part of the language |
666 | | /// matched by this regular expression. |
667 | | /// |
668 | | /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\b` |
669 | | /// and `\B`, but not `a` or `a+`. |
670 | 360 | pub fn is_match_empty(&self) -> bool { |
671 | 360 | self.info.is_match_empty() |
672 | 360 | } |
673 | | |
674 | | /// Return true if and only if this HIR is a simple literal. This is only |
675 | | /// true when this HIR expression is either itself a `Literal` or a |
676 | | /// concatenation of only `Literal`s. |
677 | | /// |
678 | | /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`, |
679 | | /// `` are not (even though that contain sub-expressions that are literals). |
680 | 325 | pub fn is_literal(&self) -> bool { |
681 | 325 | self.info.is_literal() |
682 | 325 | } |
683 | | |
684 | | /// Return true if and only if this HIR is either a simple literal or an |
685 | | /// alternation of simple literals. This is only |
686 | | /// true when this HIR expression is either itself a `Literal` or a |
687 | | /// concatenation of only `Literal`s or an alternation of only `Literal`s. |
688 | | /// |
689 | | /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation |
690 | | /// literals, but `f+`, `(foo)`, `foo()`, `` |
691 | | /// are not (even though that contain sub-expressions that are literals). |
692 | 310 | pub fn is_alternation_literal(&self) -> bool { |
693 | 310 | self.info.is_alternation_literal() |
694 | 310 | } |
695 | | } |
696 | | |
697 | | impl HirKind { |
698 | | /// Return true if and only if this HIR is the empty regular expression. |
699 | | /// |
700 | | /// Note that this is not defined inductively. That is, it only tests if |
701 | | /// this kind is the `Empty` variant. To get the inductive definition, |
702 | | /// use the `is_match_empty` method on [`Hir`](struct.Hir.html). |
703 | 735 | pub fn is_empty(&self) -> bool { |
704 | 735 | match *self { |
705 | 0 | HirKind::Empty => true, |
706 | 735 | _ => false, |
707 | | } |
708 | 735 | } |
709 | | |
710 | | /// Returns true if and only if this kind has any (including possibly |
711 | | /// empty) subexpressions. |
712 | 695 | pub fn has_subexprs(&self) -> bool { |
713 | 695 | match *self { |
714 | | HirKind::Empty |
715 | | | HirKind::Literal(_) |
716 | | | HirKind::Class(_) |
717 | | | HirKind::Anchor(_) |
718 | 655 | | HirKind::WordBoundary(_) => false, |
719 | | HirKind::Group(_) |
720 | | | HirKind::Repetition(_) |
721 | | | HirKind::Concat(_) |
722 | 40 | | HirKind::Alternation(_) => true, |
723 | | } |
724 | 695 | } |
725 | | } |
726 | | |
727 | | /// Print a display representation of this Hir. |
728 | | /// |
729 | | /// The result of this is a valid regular expression pattern string. |
730 | | /// |
731 | | /// This implementation uses constant stack space and heap space proportional |
732 | | /// to the size of the `Hir`. |
733 | | impl fmt::Display for Hir { |
734 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
735 | | use crate::hir::print::Printer; |
736 | 0 | Printer::new().print(self, f) |
737 | 0 | } |
738 | | } |
739 | | |
740 | | /// The high-level intermediate representation of a literal. |
741 | | /// |
742 | | /// A literal corresponds to a single character, where a character is either |
743 | | /// defined by a Unicode scalar value or an arbitrary byte. Unicode characters |
744 | | /// are preferred whenever possible. In particular, a `Byte` variant is only |
745 | | /// ever produced when it could match invalid UTF-8. |
746 | | #[derive(Clone, Debug, Eq, PartialEq)] |
747 | | pub enum Literal { |
748 | | /// A single character represented by a Unicode scalar value. |
749 | | Unicode(char), |
750 | | /// A single character represented by an arbitrary byte. |
751 | | Byte(u8), |
752 | | } |
753 | | |
754 | | impl Literal { |
755 | | /// Returns true if and only if this literal corresponds to a Unicode |
756 | | /// scalar value. |
757 | 465 | pub fn is_unicode(&self) -> bool { |
758 | 0 | match *self { |
759 | 465 | Literal::Unicode(_) => true, |
760 | 0 | Literal::Byte(b) if b <= 0x7F => true, |
761 | 0 | Literal::Byte(_) => false, |
762 | | } |
763 | 465 | } |
764 | | } |
765 | | |
766 | | /// The high-level intermediate representation of a character class. |
767 | | /// |
768 | | /// A character class corresponds to a set of characters. A character is either |
769 | | /// defined by a Unicode scalar value or a byte. Unicode characters are used |
770 | | /// by default, while bytes are used when Unicode mode (via the `u` flag) is |
771 | | /// disabled. |
772 | | /// |
773 | | /// A character class, regardless of its character type, is represented by a |
774 | | /// sequence of non-overlapping non-adjacent ranges of characters. |
775 | | /// |
776 | | /// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may |
777 | | /// be produced even when it exclusively matches valid UTF-8. This is because |
778 | | /// a `Bytes` variant represents an intention by the author of the regular |
779 | | /// expression to disable Unicode mode, which in turn impacts the semantics of |
780 | | /// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not |
781 | | /// match the same set of strings. |
782 | | #[derive(Clone, Debug, Eq, PartialEq)] |
783 | | pub enum Class { |
784 | | /// A set of characters represented by Unicode scalar values. |
785 | | Unicode(ClassUnicode), |
786 | | /// A set of characters represented by arbitrary bytes (one byte per |
787 | | /// character). |
788 | | Bytes(ClassBytes), |
789 | | } |
790 | | |
791 | | impl Class { |
792 | | /// Apply Unicode simple case folding to this character class, in place. |
793 | | /// The character class will be expanded to include all simple case folded |
794 | | /// character variants. |
795 | | /// |
796 | | /// If this is a byte oriented character class, then this will be limited |
797 | | /// to the ASCII ranges `A-Z` and `a-z`. |
798 | 0 | pub fn case_fold_simple(&mut self) { |
799 | 0 | match *self { |
800 | 0 | Class::Unicode(ref mut x) => x.case_fold_simple(), |
801 | 0 | Class::Bytes(ref mut x) => x.case_fold_simple(), |
802 | | } |
803 | 0 | } |
804 | | |
805 | | /// Negate this character class in place. |
806 | | /// |
807 | | /// After completion, this character class will contain precisely the |
808 | | /// characters that weren't previously in the class. |
809 | 0 | pub fn negate(&mut self) { |
810 | 0 | match *self { |
811 | 0 | Class::Unicode(ref mut x) => x.negate(), |
812 | 0 | Class::Bytes(ref mut x) => x.negate(), |
813 | | } |
814 | 0 | } |
815 | | |
816 | | /// Returns true if and only if this character class will only ever match |
817 | | /// valid UTF-8. |
818 | | /// |
819 | | /// A character class can match invalid UTF-8 only when the following |
820 | | /// conditions are met: |
821 | | /// |
822 | | /// 1. The translator was configured to permit generating an expression |
823 | | /// that can match invalid UTF-8. (By default, this is disabled.) |
824 | | /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete |
825 | | /// syntax or in the parser builder. By default, Unicode mode is |
826 | | /// enabled. |
827 | 175 | pub fn is_always_utf8(&self) -> bool { |
828 | 175 | match *self { |
829 | 175 | Class::Unicode(_) => true, |
830 | 0 | Class::Bytes(ref x) => x.is_all_ascii(), |
831 | | } |
832 | 175 | } |
833 | | } |
834 | | |
835 | | /// A set of characters represented by Unicode scalar values. |
836 | | #[derive(Clone, Debug, Eq, PartialEq)] |
837 | | pub struct ClassUnicode { |
838 | | set: IntervalSet<ClassUnicodeRange>, |
839 | | } |
840 | | |
841 | | impl ClassUnicode { |
842 | | /// Create a new class from a sequence of ranges. |
843 | | /// |
844 | | /// The given ranges do not need to be in any specific order, and ranges |
845 | | /// may overlap. |
846 | 325 | pub fn new<I>(ranges: I) -> ClassUnicode |
847 | 325 | where |
848 | 325 | I: IntoIterator<Item = ClassUnicodeRange>, |
849 | | { |
850 | 325 | ClassUnicode { set: IntervalSet::new(ranges) } |
851 | 325 | } <regex_syntax::hir::ClassUnicode>::new::<alloc::vec::Vec<regex_syntax::hir::ClassUnicodeRange>> Line | Count | Source | 846 | 175 | pub fn new<I>(ranges: I) -> ClassUnicode | 847 | 175 | where | 848 | 175 | I: IntoIterator<Item = ClassUnicodeRange>, | 849 | | { | 850 | 175 | ClassUnicode { set: IntervalSet::new(ranges) } | 851 | 175 | } |
<regex_syntax::hir::ClassUnicode>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<(char, char)>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_unicode_class::{closure#0}>>Line | Count | Source | 846 | 150 | pub fn new<I>(ranges: I) -> ClassUnicode | 847 | 150 | where | 848 | 150 | I: IntoIterator<Item = ClassUnicodeRange>, | 849 | | { | 850 | 150 | ClassUnicode { set: IntervalSet::new(ranges) } | 851 | 150 | } |
|
852 | | |
853 | | /// Create a new class with no ranges. |
854 | 175 | pub fn empty() -> ClassUnicode { |
855 | 175 | ClassUnicode::new(vec![]) |
856 | 175 | } |
857 | | |
858 | | /// Add a new range to this set. |
859 | 50 | pub fn push(&mut self, range: ClassUnicodeRange) { |
860 | 50 | self.set.push(range); |
861 | 50 | } |
862 | | |
863 | | /// Return an iterator over all ranges in this class. |
864 | | /// |
865 | | /// The iterator yields ranges in ascending order. |
866 | 255 | pub fn iter(&self) -> ClassUnicodeIter<'_> { |
867 | 255 | ClassUnicodeIter(self.set.iter()) |
868 | 255 | } |
869 | | |
870 | | /// Return the underlying ranges as a slice. |
871 | 695 | pub fn ranges(&self) -> &[ClassUnicodeRange] { |
872 | 695 | self.set.intervals() |
873 | 695 | } |
874 | | |
875 | | /// Expand this character class such that it contains all case folded |
876 | | /// characters, according to Unicode's "simple" mapping. For example, if |
877 | | /// this class consists of the range `a-z`, then applying case folding will |
878 | | /// result in the class containing both the ranges `a-z` and `A-Z`. |
879 | | /// |
880 | | /// # Panics |
881 | | /// |
882 | | /// This routine panics when the case mapping data necessary for this |
883 | | /// routine to complete is unavailable. This occurs when the `unicode-case` |
884 | | /// feature is not enabled. |
885 | | /// |
886 | | /// Callers should prefer using `try_case_fold_simple` instead, which will |
887 | | /// return an error instead of panicking. |
888 | 0 | pub fn case_fold_simple(&mut self) { |
889 | 0 | self.set |
890 | 0 | .case_fold_simple() |
891 | 0 | .expect("unicode-case feature must be enabled"); |
892 | 0 | } |
893 | | |
894 | | /// Expand this character class such that it contains all case folded |
895 | | /// characters, according to Unicode's "simple" mapping. For example, if |
896 | | /// this class consists of the range `a-z`, then applying case folding will |
897 | | /// result in the class containing both the ranges `a-z` and `A-Z`. |
898 | | /// |
899 | | /// # Error |
900 | | /// |
901 | | /// This routine returns an error when the case mapping data necessary |
902 | | /// for this routine to complete is unavailable. This occurs when the |
903 | | /// `unicode-case` feature is not enabled. |
904 | 0 | pub fn try_case_fold_simple( |
905 | 0 | &mut self, |
906 | 0 | ) -> result::Result<(), CaseFoldError> { |
907 | 0 | self.set.case_fold_simple() |
908 | 0 | } |
909 | | |
910 | | /// Negate this character class. |
911 | | /// |
912 | | /// For all `c` where `c` is a Unicode scalar value, if `c` was in this |
913 | | /// set, then it will not be in this set after negation. |
914 | 0 | pub fn negate(&mut self) { |
915 | 0 | self.set.negate(); |
916 | 0 | } |
917 | | |
918 | | /// Union this character class with the given character class, in place. |
919 | 150 | pub fn union(&mut self, other: &ClassUnicode) { |
920 | 150 | self.set.union(&other.set); |
921 | 150 | } |
922 | | |
923 | | /// Intersect this character class with the given character class, in |
924 | | /// place. |
925 | 0 | pub fn intersect(&mut self, other: &ClassUnicode) { |
926 | 0 | self.set.intersect(&other.set); |
927 | 0 | } |
928 | | |
929 | | /// Subtract the given character class from this character class, in place. |
930 | 0 | pub fn difference(&mut self, other: &ClassUnicode) { |
931 | 0 | self.set.difference(&other.set); |
932 | 0 | } |
933 | | |
934 | | /// Compute the symmetric difference of the given character classes, in |
935 | | /// place. |
936 | | /// |
937 | | /// This computes the symmetric difference of two character classes. This |
938 | | /// removes all elements in this class that are also in the given class, |
939 | | /// but all adds all elements from the given class that aren't in this |
940 | | /// class. That is, the class will contain all elements in either class, |
941 | | /// but will not contain any elements that are in both classes. |
942 | 0 | pub fn symmetric_difference(&mut self, other: &ClassUnicode) { |
943 | 0 | self.set.symmetric_difference(&other.set); |
944 | 0 | } |
945 | | |
946 | | /// Returns true if and only if this character class will either match |
947 | | /// nothing or only ASCII bytes. Stated differently, this returns false |
948 | | /// if and only if this class contains a non-ASCII codepoint. |
949 | 0 | pub fn is_all_ascii(&self) -> bool { |
950 | 0 | self.set.intervals().last().map_or(true, |r| r.end <= '\x7F') |
951 | 0 | } |
952 | | } |
953 | | |
954 | | /// An iterator over all ranges in a Unicode character class. |
955 | | /// |
956 | | /// The lifetime `'a` refers to the lifetime of the underlying class. |
957 | | #[derive(Debug)] |
958 | | pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>); |
959 | | |
960 | | impl<'a> Iterator for ClassUnicodeIter<'a> { |
961 | | type Item = &'a ClassUnicodeRange; |
962 | | |
963 | 715 | fn next(&mut self) -> Option<&'a ClassUnicodeRange> { |
964 | 715 | self.0.next() |
965 | 715 | } |
966 | | } |
967 | | |
968 | | /// A single range of characters represented by Unicode scalar values. |
969 | | /// |
970 | | /// The range is closed. That is, the start and end of the range are included |
971 | | /// in the range. |
972 | | #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] |
973 | | pub struct ClassUnicodeRange { |
974 | | start: char, |
975 | | end: char, |
976 | | } |
977 | | |
978 | | impl fmt::Debug for ClassUnicodeRange { |
979 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
980 | 0 | let start = if !self.start.is_whitespace() && !self.start.is_control() |
981 | | { |
982 | 0 | self.start.to_string() |
983 | | } else { |
984 | 0 | format!("0x{:X}", self.start as u32) |
985 | | }; |
986 | 0 | let end = if !self.end.is_whitespace() && !self.end.is_control() { |
987 | 0 | self.end.to_string() |
988 | | } else { |
989 | 0 | format!("0x{:X}", self.end as u32) |
990 | | }; |
991 | 0 | f.debug_struct("ClassUnicodeRange") |
992 | 0 | .field("start", &start) |
993 | 0 | .field("end", &end) |
994 | 0 | .finish() |
995 | 0 | } |
996 | | } |
997 | | |
998 | | impl Interval for ClassUnicodeRange { |
999 | | type Bound = char; |
1000 | | |
1001 | | #[inline] |
1002 | 2.58k | fn lower(&self) -> char { |
1003 | 2.58k | self.start |
1004 | 2.58k | } |
1005 | | #[inline] |
1006 | 2.58k | fn upper(&self) -> char { |
1007 | 2.58k | self.end |
1008 | 2.58k | } |
1009 | | #[inline] |
1010 | 1.23k | fn set_lower(&mut self, bound: char) { |
1011 | 1.23k | self.start = bound; |
1012 | 1.23k | } |
1013 | | #[inline] |
1014 | 1.23k | fn set_upper(&mut self, bound: char) { |
1015 | 1.23k | self.end = bound; |
1016 | 1.23k | } |
1017 | | |
1018 | | /// Apply simple case folding to this Unicode scalar value range. |
1019 | | /// |
1020 | | /// Additional ranges are appended to the given vector. Canonical ordering |
1021 | | /// is *not* maintained in the given vector. |
1022 | 0 | fn case_fold_simple( |
1023 | 0 | &self, |
1024 | 0 | ranges: &mut Vec<ClassUnicodeRange>, |
1025 | 0 | ) -> Result<(), unicode::CaseFoldError> { |
1026 | 0 | if !unicode::contains_simple_case_mapping(self.start, self.end)? { |
1027 | 0 | return Ok(()); |
1028 | 0 | } |
1029 | 0 | let start = self.start as u32; |
1030 | 0 | let end = (self.end as u32).saturating_add(1); |
1031 | 0 | let mut next_simple_cp = None; |
1032 | 0 | for cp in (start..end).filter_map(char::from_u32) { |
1033 | 0 | if next_simple_cp.map_or(false, |next| cp < next) { |
1034 | 0 | continue; |
1035 | 0 | } |
1036 | 0 | let it = match unicode::simple_fold(cp)? { |
1037 | 0 | Ok(it) => it, |
1038 | 0 | Err(next) => { |
1039 | 0 | next_simple_cp = next; |
1040 | 0 | continue; |
1041 | | } |
1042 | | }; |
1043 | 0 | for cp_folded in it { |
1044 | 0 | ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded)); |
1045 | 0 | } |
1046 | | } |
1047 | 0 | Ok(()) |
1048 | 0 | } |
1049 | | } |
1050 | | |
1051 | | impl ClassUnicodeRange { |
1052 | | /// Create a new Unicode scalar value range for a character class. |
1053 | | /// |
1054 | | /// The returned range is always in a canonical form. That is, the range |
1055 | | /// returned always satisfies the invariant that `start <= end`. |
1056 | 775 | pub fn new(start: char, end: char) -> ClassUnicodeRange { |
1057 | 775 | ClassUnicodeRange::create(start, end) |
1058 | 775 | } |
1059 | | |
1060 | | /// Return the start of this range. |
1061 | | /// |
1062 | | /// The start of a range is always less than or equal to the end of the |
1063 | | /// range. |
1064 | 945 | pub fn start(&self) -> char { |
1065 | 945 | self.start |
1066 | 945 | } |
1067 | | |
1068 | | /// Return the end of this range. |
1069 | | /// |
1070 | | /// The end of a range is always greater than or equal to the start of the |
1071 | | /// range. |
1072 | 945 | pub fn end(&self) -> char { |
1073 | 945 | self.end |
1074 | 945 | } |
1075 | | } |
1076 | | |
1077 | | /// A set of characters represented by arbitrary bytes (where one byte |
1078 | | /// corresponds to one character). |
1079 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1080 | | pub struct ClassBytes { |
1081 | | set: IntervalSet<ClassBytesRange>, |
1082 | | } |
1083 | | |
1084 | | impl ClassBytes { |
1085 | | /// Create a new class from a sequence of ranges. |
1086 | | /// |
1087 | | /// The given ranges do not need to be in any specific order, and ranges |
1088 | | /// may overlap. |
1089 | 0 | pub fn new<I>(ranges: I) -> ClassBytes |
1090 | 0 | where |
1091 | 0 | I: IntoIterator<Item = ClassBytesRange>, |
1092 | | { |
1093 | 0 | ClassBytes { set: IntervalSet::new(ranges) } |
1094 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytes>::new::<alloc::vec::Vec<regex_syntax::hir::ClassBytesRange>> Unexecuted instantiation: <regex_syntax::hir::ClassBytes>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<(char, char)>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_byte_class::{closure#0}>> |
1095 | | |
1096 | | /// Create a new class with no ranges. |
1097 | 0 | pub fn empty() -> ClassBytes { |
1098 | 0 | ClassBytes::new(vec![]) |
1099 | 0 | } |
1100 | | |
1101 | | /// Add a new range to this set. |
1102 | 0 | pub fn push(&mut self, range: ClassBytesRange) { |
1103 | 0 | self.set.push(range); |
1104 | 0 | } |
1105 | | |
1106 | | /// Return an iterator over all ranges in this class. |
1107 | | /// |
1108 | | /// The iterator yields ranges in ascending order. |
1109 | 0 | pub fn iter(&self) -> ClassBytesIter<'_> { |
1110 | 0 | ClassBytesIter(self.set.iter()) |
1111 | 0 | } |
1112 | | |
1113 | | /// Return the underlying ranges as a slice. |
1114 | 0 | pub fn ranges(&self) -> &[ClassBytesRange] { |
1115 | 0 | self.set.intervals() |
1116 | 0 | } |
1117 | | |
1118 | | /// Expand this character class such that it contains all case folded |
1119 | | /// characters. For example, if this class consists of the range `a-z`, |
1120 | | /// then applying case folding will result in the class containing both the |
1121 | | /// ranges `a-z` and `A-Z`. |
1122 | | /// |
1123 | | /// Note that this only applies ASCII case folding, which is limited to the |
1124 | | /// characters `a-z` and `A-Z`. |
1125 | 0 | pub fn case_fold_simple(&mut self) { |
1126 | 0 | self.set.case_fold_simple().expect("ASCII case folding never fails"); |
1127 | 0 | } |
1128 | | |
1129 | | /// Negate this byte class. |
1130 | | /// |
1131 | | /// For all `b` where `b` is a any byte, if `b` was in this set, then it |
1132 | | /// will not be in this set after negation. |
1133 | 0 | pub fn negate(&mut self) { |
1134 | 0 | self.set.negate(); |
1135 | 0 | } |
1136 | | |
1137 | | /// Union this byte class with the given byte class, in place. |
1138 | 0 | pub fn union(&mut self, other: &ClassBytes) { |
1139 | 0 | self.set.union(&other.set); |
1140 | 0 | } |
1141 | | |
1142 | | /// Intersect this byte class with the given byte class, in place. |
1143 | 0 | pub fn intersect(&mut self, other: &ClassBytes) { |
1144 | 0 | self.set.intersect(&other.set); |
1145 | 0 | } |
1146 | | |
1147 | | /// Subtract the given byte class from this byte class, in place. |
1148 | 0 | pub fn difference(&mut self, other: &ClassBytes) { |
1149 | 0 | self.set.difference(&other.set); |
1150 | 0 | } |
1151 | | |
1152 | | /// Compute the symmetric difference of the given byte classes, in place. |
1153 | | /// |
1154 | | /// This computes the symmetric difference of two byte classes. This |
1155 | | /// removes all elements in this class that are also in the given class, |
1156 | | /// but all adds all elements from the given class that aren't in this |
1157 | | /// class. That is, the class will contain all elements in either class, |
1158 | | /// but will not contain any elements that are in both classes. |
1159 | 0 | pub fn symmetric_difference(&mut self, other: &ClassBytes) { |
1160 | 0 | self.set.symmetric_difference(&other.set); |
1161 | 0 | } |
1162 | | |
1163 | | /// Returns true if and only if this character class will either match |
1164 | | /// nothing or only ASCII bytes. Stated differently, this returns false |
1165 | | /// if and only if this class contains a non-ASCII byte. |
1166 | 0 | pub fn is_all_ascii(&self) -> bool { |
1167 | 0 | self.set.intervals().last().map_or(true, |r| r.end <= 0x7F) |
1168 | 0 | } |
1169 | | } |
1170 | | |
1171 | | /// An iterator over all ranges in a byte character class. |
1172 | | /// |
1173 | | /// The lifetime `'a` refers to the lifetime of the underlying class. |
1174 | | #[derive(Debug)] |
1175 | | pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>); |
1176 | | |
1177 | | impl<'a> Iterator for ClassBytesIter<'a> { |
1178 | | type Item = &'a ClassBytesRange; |
1179 | | |
1180 | 0 | fn next(&mut self) -> Option<&'a ClassBytesRange> { |
1181 | 0 | self.0.next() |
1182 | 0 | } |
1183 | | } |
1184 | | |
1185 | | /// A single range of characters represented by arbitrary bytes. |
1186 | | /// |
1187 | | /// The range is closed. That is, the start and end of the range are included |
1188 | | /// in the range. |
1189 | | #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] |
1190 | | pub struct ClassBytesRange { |
1191 | | start: u8, |
1192 | | end: u8, |
1193 | | } |
1194 | | |
1195 | | impl Interval for ClassBytesRange { |
1196 | | type Bound = u8; |
1197 | | |
1198 | | #[inline] |
1199 | 0 | fn lower(&self) -> u8 { |
1200 | 0 | self.start |
1201 | 0 | } |
1202 | | #[inline] |
1203 | 0 | fn upper(&self) -> u8 { |
1204 | 0 | self.end |
1205 | 0 | } |
1206 | | #[inline] |
1207 | 0 | fn set_lower(&mut self, bound: u8) { |
1208 | 0 | self.start = bound; |
1209 | 0 | } |
1210 | | #[inline] |
1211 | 0 | fn set_upper(&mut self, bound: u8) { |
1212 | 0 | self.end = bound; |
1213 | 0 | } |
1214 | | |
1215 | | /// Apply simple case folding to this byte range. Only ASCII case mappings |
1216 | | /// (for a-z) are applied. |
1217 | | /// |
1218 | | /// Additional ranges are appended to the given vector. Canonical ordering |
1219 | | /// is *not* maintained in the given vector. |
1220 | 0 | fn case_fold_simple( |
1221 | 0 | &self, |
1222 | 0 | ranges: &mut Vec<ClassBytesRange>, |
1223 | 0 | ) -> Result<(), unicode::CaseFoldError> { |
1224 | 0 | if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) { |
1225 | 0 | let lower = cmp::max(self.start, b'a'); |
1226 | 0 | let upper = cmp::min(self.end, b'z'); |
1227 | 0 | ranges.push(ClassBytesRange::new(lower - 32, upper - 32)); |
1228 | 0 | } |
1229 | 0 | if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) { |
1230 | 0 | let lower = cmp::max(self.start, b'A'); |
1231 | 0 | let upper = cmp::min(self.end, b'Z'); |
1232 | 0 | ranges.push(ClassBytesRange::new(lower + 32, upper + 32)); |
1233 | 0 | } |
1234 | 0 | Ok(()) |
1235 | 0 | } |
1236 | | } |
1237 | | |
1238 | | impl ClassBytesRange { |
1239 | | /// Create a new byte range for a character class. |
1240 | | /// |
1241 | | /// The returned range is always in a canonical form. That is, the range |
1242 | | /// returned always satisfies the invariant that `start <= end`. |
1243 | 0 | pub fn new(start: u8, end: u8) -> ClassBytesRange { |
1244 | 0 | ClassBytesRange::create(start, end) |
1245 | 0 | } |
1246 | | |
1247 | | /// Return the start of this range. |
1248 | | /// |
1249 | | /// The start of a range is always less than or equal to the end of the |
1250 | | /// range. |
1251 | 0 | pub fn start(&self) -> u8 { |
1252 | 0 | self.start |
1253 | 0 | } |
1254 | | |
1255 | | /// Return the end of this range. |
1256 | | /// |
1257 | | /// The end of a range is always greater than or equal to the start of the |
1258 | | /// range. |
1259 | 0 | pub fn end(&self) -> u8 { |
1260 | 0 | self.end |
1261 | 0 | } |
1262 | | } |
1263 | | |
1264 | | impl fmt::Debug for ClassBytesRange { |
1265 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
1266 | 0 | let mut debug = f.debug_struct("ClassBytesRange"); |
1267 | 0 | if self.start <= 0x7F { |
1268 | 0 | debug.field("start", &(self.start as char)); |
1269 | 0 | } else { |
1270 | 0 | debug.field("start", &self.start); |
1271 | 0 | } |
1272 | 0 | if self.end <= 0x7F { |
1273 | 0 | debug.field("end", &(self.end as char)); |
1274 | 0 | } else { |
1275 | 0 | debug.field("end", &self.end); |
1276 | 0 | } |
1277 | 0 | debug.finish() |
1278 | 0 | } |
1279 | | } |
1280 | | |
1281 | | /// The high-level intermediate representation for an anchor assertion. |
1282 | | /// |
1283 | | /// A matching anchor assertion is always zero-length. |
1284 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1285 | | pub enum Anchor { |
1286 | | /// Match the beginning of a line or the beginning of text. Specifically, |
1287 | | /// this matches at the starting position of the input, or at the position |
1288 | | /// immediately following a `\n` character. |
1289 | | StartLine, |
1290 | | /// Match the end of a line or the end of text. Specifically, |
1291 | | /// this matches at the end position of the input, or at the position |
1292 | | /// immediately preceding a `\n` character. |
1293 | | EndLine, |
1294 | | /// Match the beginning of text. Specifically, this matches at the starting |
1295 | | /// position of the input. |
1296 | | StartText, |
1297 | | /// Match the end of text. Specifically, this matches at the ending |
1298 | | /// position of the input. |
1299 | | EndText, |
1300 | | } |
1301 | | |
1302 | | /// The high-level intermediate representation for a word-boundary assertion. |
1303 | | /// |
1304 | | /// A matching word boundary assertion is always zero-length. |
1305 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1306 | | pub enum WordBoundary { |
1307 | | /// Match a Unicode-aware word boundary. That is, this matches a position |
1308 | | /// where the left adjacent character and right adjacent character |
1309 | | /// correspond to a word and non-word or a non-word and word character. |
1310 | | Unicode, |
1311 | | /// Match a Unicode-aware negation of a word boundary. |
1312 | | UnicodeNegate, |
1313 | | /// Match an ASCII-only word boundary. That is, this matches a position |
1314 | | /// where the left adjacent character and right adjacent character |
1315 | | /// correspond to a word and non-word or a non-word and word character. |
1316 | | Ascii, |
1317 | | /// Match an ASCII-only negation of a word boundary. |
1318 | | AsciiNegate, |
1319 | | } |
1320 | | |
1321 | | impl WordBoundary { |
1322 | | /// Returns true if and only if this word boundary assertion is negated. |
1323 | 0 | pub fn is_negated(&self) -> bool { |
1324 | 0 | match *self { |
1325 | 0 | WordBoundary::Unicode | WordBoundary::Ascii => false, |
1326 | 0 | WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true, |
1327 | | } |
1328 | 0 | } |
1329 | | } |
1330 | | |
1331 | | /// The high-level intermediate representation for a group. |
1332 | | /// |
1333 | | /// This represents one of three possible group types: |
1334 | | /// |
1335 | | /// 1. A non-capturing group (e.g., `(?:expr)`). |
1336 | | /// 2. A capturing group (e.g., `(expr)`). |
1337 | | /// 3. A named capturing group (e.g., `(?P<name>expr)`). |
1338 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1339 | | pub struct Group { |
1340 | | /// The kind of this group. If it is a capturing group, then the kind |
1341 | | /// contains the capture group index (and the name, if it is a named |
1342 | | /// group). |
1343 | | pub kind: GroupKind, |
1344 | | /// The expression inside the capturing group, which may be empty. |
1345 | | pub hir: Box<Hir>, |
1346 | | } |
1347 | | |
1348 | | /// The kind of group. |
1349 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1350 | | pub enum GroupKind { |
1351 | | /// A normal unnamed capturing group. |
1352 | | /// |
1353 | | /// The value is the capture index of the group. |
1354 | | CaptureIndex(u32), |
1355 | | /// A named capturing group. |
1356 | | CaptureName { |
1357 | | /// The name of the group. |
1358 | | name: String, |
1359 | | /// The capture index of the group. |
1360 | | index: u32, |
1361 | | }, |
1362 | | /// A non-capturing group. |
1363 | | NonCapturing, |
1364 | | } |
1365 | | |
1366 | | /// The high-level intermediate representation of a repetition operator. |
1367 | | /// |
1368 | | /// A repetition operator permits the repetition of an arbitrary |
1369 | | /// sub-expression. |
1370 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1371 | | pub struct Repetition { |
1372 | | /// The kind of this repetition operator. |
1373 | | pub kind: RepetitionKind, |
1374 | | /// Whether this repetition operator is greedy or not. A greedy operator |
1375 | | /// will match as much as it can. A non-greedy operator will match as |
1376 | | /// little as it can. |
1377 | | /// |
1378 | | /// Typically, operators are greedy by default and are only non-greedy when |
1379 | | /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is |
1380 | | /// not. However, this can be inverted via the `U` "ungreedy" flag. |
1381 | | pub greedy: bool, |
1382 | | /// The expression being repeated. |
1383 | | pub hir: Box<Hir>, |
1384 | | } |
1385 | | |
1386 | | impl Repetition { |
1387 | | /// Returns true if and only if this repetition operator makes it possible |
1388 | | /// to match the empty string. |
1389 | | /// |
1390 | | /// Note that this is not defined inductively. For example, while `a*` |
1391 | | /// will report `true`, `()+` will not, even though `()` matches the empty |
1392 | | /// string and one or more occurrences of something that matches the empty |
1393 | | /// string will always match the empty string. In order to get the |
1394 | | /// inductive definition, see the corresponding method on |
1395 | | /// [`Hir`](struct.Hir.html). |
1396 | 1.62k | pub fn is_match_empty(&self) -> bool { |
1397 | 0 | match self.kind { |
1398 | 450 | RepetitionKind::ZeroOrOne => true, |
1399 | 875 | RepetitionKind::ZeroOrMore => true, |
1400 | 300 | RepetitionKind::OneOrMore => false, |
1401 | 0 | RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0, |
1402 | 0 | RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0, |
1403 | 0 | RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0, |
1404 | | } |
1405 | 1.62k | } |
1406 | | } |
1407 | | |
1408 | | /// The kind of a repetition operator. |
1409 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1410 | | pub enum RepetitionKind { |
1411 | | /// Matches a sub-expression zero or one times. |
1412 | | ZeroOrOne, |
1413 | | /// Matches a sub-expression zero or more times. |
1414 | | ZeroOrMore, |
1415 | | /// Matches a sub-expression one or more times. |
1416 | | OneOrMore, |
1417 | | /// Matches a sub-expression within a bounded range of times. |
1418 | | Range(RepetitionRange), |
1419 | | } |
1420 | | |
1421 | | /// The kind of a counted repetition operator. |
1422 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1423 | | pub enum RepetitionRange { |
1424 | | /// Matches a sub-expression exactly this many times. |
1425 | | Exactly(u32), |
1426 | | /// Matches a sub-expression at least this many times. |
1427 | | AtLeast(u32), |
1428 | | /// Matches a sub-expression at least `m` times and at most `n` times. |
1429 | | Bounded(u32, u32), |
1430 | | } |
1431 | | |
1432 | | /// A custom `Drop` impl is used for `HirKind` such that it uses constant stack |
1433 | | /// space but heap space proportional to the depth of the total `Hir`. |
1434 | | impl Drop for Hir { |
1435 | 2.84k | fn drop(&mut self) { |
1436 | | use std::mem; |
1437 | | |
1438 | 2.84k | match *self.kind() { |
1439 | | HirKind::Empty |
1440 | | | HirKind::Literal(_) |
1441 | | | HirKind::Class(_) |
1442 | | | HirKind::Anchor(_) |
1443 | 1.91k | | HirKind::WordBoundary(_) => return, |
1444 | 200 | HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return, |
1445 | 495 | HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return, |
1446 | 220 | HirKind::Concat(ref x) if x.is_empty() => return, |
1447 | 20 | HirKind::Alternation(ref x) if x.is_empty() => return, |
1448 | 55 | _ => {} |
1449 | | } |
1450 | | |
1451 | 55 | let mut stack = vec![mem::replace(self, Hir::empty())]; |
1452 | 2.15k | while let Some(mut expr) = stack.pop() { |
1453 | 2.10k | match expr.kind { |
1454 | | HirKind::Empty |
1455 | | | HirKind::Literal(_) |
1456 | | | HirKind::Class(_) |
1457 | | | HirKind::Anchor(_) |
1458 | 1.25k | | HirKind::WordBoundary(_) => {} |
1459 | 200 | HirKind::Group(ref mut x) => { |
1460 | 200 | stack.push(mem::replace(&mut x.hir, Hir::empty())); |
1461 | 200 | } |
1462 | 420 | HirKind::Repetition(ref mut x) => { |
1463 | 420 | stack.push(mem::replace(&mut x.hir, Hir::empty())); |
1464 | 420 | } |
1465 | 205 | HirKind::Concat(ref mut x) => { |
1466 | 205 | stack.extend(x.drain(..)); |
1467 | 205 | } |
1468 | 20 | HirKind::Alternation(ref mut x) => { |
1469 | 20 | stack.extend(x.drain(..)); |
1470 | 20 | } |
1471 | | } |
1472 | | } |
1473 | 2.84k | } |
1474 | | } |
1475 | | |
1476 | | /// A type that documents various attributes of an HIR expression. |
1477 | | /// |
1478 | | /// These attributes are typically defined inductively on the HIR. |
1479 | | #[derive(Clone, Debug, Eq, PartialEq)] |
1480 | | struct HirInfo { |
1481 | | /// Represent yes/no questions by a bitfield to conserve space, since |
1482 | | /// this is included in every HIR expression. |
1483 | | /// |
1484 | | /// If more attributes need to be added, it is OK to increase the size of |
1485 | | /// this as appropriate. |
1486 | | bools: u16, |
1487 | | } |
1488 | | |
1489 | | // A simple macro for defining bitfield accessors/mutators. |
1490 | | macro_rules! define_bool { |
1491 | | ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => { |
1492 | 11.8k | fn $is_fn_name(&self) -> bool { |
1493 | 11.8k | self.bools & (0b1 << $bit) > 0 |
1494 | 11.8k | } <regex_syntax::hir::HirInfo>::is_literal Line | Count | Source | 1492 | 1.06k | fn $is_fn_name(&self) -> bool { | 1493 | 1.06k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.06k | } |
<regex_syntax::hir::HirInfo>::is_always_utf8 Line | Count | Source | 1492 | 1.96k | fn $is_fn_name(&self) -> bool { | 1493 | 1.96k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.96k | } |
<regex_syntax::hir::HirInfo>::is_match_empty Line | Count | Source | 1492 | 1.12k | fn $is_fn_name(&self) -> bool { | 1493 | 1.12k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.12k | } |
<regex_syntax::hir::HirInfo>::is_anchored_start Line | Count | Source | 1492 | 425 | fn $is_fn_name(&self) -> bool { | 1493 | 425 | self.bools & (0b1 << $bit) > 0 | 1494 | 425 | } |
<regex_syntax::hir::HirInfo>::is_any_anchored_end Line | Count | Source | 1492 | 1.94k | fn $is_fn_name(&self) -> bool { | 1493 | 1.94k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.94k | } |
<regex_syntax::hir::HirInfo>::is_any_anchored_start Line | Count | Source | 1492 | 1.69k | fn $is_fn_name(&self) -> bool { | 1493 | 1.69k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.69k | } |
<regex_syntax::hir::HirInfo>::is_alternation_literal Line | Count | Source | 1492 | 1.07k | fn $is_fn_name(&self) -> bool { | 1493 | 1.07k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.07k | } |
<regex_syntax::hir::HirInfo>::is_line_anchored_start Line | Count | Source | 1492 | 245 | fn $is_fn_name(&self) -> bool { | 1493 | 245 | self.bools & (0b1 << $bit) > 0 | 1494 | 245 | } |
<regex_syntax::hir::HirInfo>::is_anchored_end Line | Count | Source | 1492 | 425 | fn $is_fn_name(&self) -> bool { | 1493 | 425 | self.bools & (0b1 << $bit) > 0 | 1494 | 425 | } |
<regex_syntax::hir::HirInfo>::is_all_assertions Line | Count | Source | 1492 | 1.64k | fn $is_fn_name(&self) -> bool { | 1493 | 1.64k | self.bools & (0b1 << $bit) > 0 | 1494 | 1.64k | } |
<regex_syntax::hir::HirInfo>::is_line_anchored_end Line | Count | Source | 1492 | 245 | fn $is_fn_name(&self) -> bool { | 1493 | 245 | self.bools & (0b1 << $bit) > 0 | 1494 | 245 | } |
|
1495 | | |
1496 | 26.1k | fn $set_fn_name(&mut self, yes: bool) { |
1497 | 26.1k | if yes { |
1498 | 6.71k | self.bools |= 1 << $bit; |
1499 | 19.3k | } else { |
1500 | 19.3k | self.bools &= !(1 << $bit); |
1501 | 19.3k | } |
1502 | 26.1k | } <regex_syntax::hir::HirInfo>::set_literal Line | Count | Source | 1496 | 2.60k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.60k | if yes { | 1498 | 790 | self.bools |= 1 << $bit; | 1499 | 1.81k | } else { | 1500 | 1.81k | self.bools &= !(1 << $bit); | 1501 | 1.81k | } | 1502 | 2.60k | } |
<regex_syntax::hir::HirInfo>::set_always_utf8 Line | Count | Source | 1496 | 2.63k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.63k | if yes { | 1498 | 2.63k | self.bools |= 1 << $bit; | 1499 | 2.63k | } else { | 1500 | 0 | self.bools &= !(1 << $bit); | 1501 | 0 | } | 1502 | 2.63k | } |
<regex_syntax::hir::HirInfo>::set_match_empty Line | Count | Source | 1496 | 2.63k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.63k | if yes { | 1498 | 1.15k | self.bools |= 1 << $bit; | 1499 | 1.48k | } else { | 1500 | 1.48k | self.bools &= !(1 << $bit); | 1501 | 1.48k | } | 1502 | 2.63k | } |
<regex_syntax::hir::HirInfo>::set_anchored_end Line | Count | Source | 1496 | 1.91k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 1.91k | if yes { | 1498 | 40 | self.bools |= 1 << $bit; | 1499 | 1.87k | } else { | 1500 | 1.87k | self.bools &= !(1 << $bit); | 1501 | 1.87k | } | 1502 | 1.91k | } |
<regex_syntax::hir::HirInfo>::set_all_assertions Line | Count | Source | 1496 | 2.63k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.63k | if yes { | 1498 | 830 | self.bools |= 1 << $bit; | 1499 | 1.80k | } else { | 1500 | 1.80k | self.bools &= !(1 << $bit); | 1501 | 1.80k | } | 1502 | 2.63k | } |
<regex_syntax::hir::HirInfo>::set_anchored_start Line | Count | Source | 1496 | 1.91k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 1.91k | if yes { | 1498 | 40 | self.bools |= 1 << $bit; | 1499 | 1.87k | } else { | 1500 | 1.87k | self.bools &= !(1 << $bit); | 1501 | 1.87k | } | 1502 | 1.91k | } |
<regex_syntax::hir::HirInfo>::set_any_anchored_end Line | Count | Source | 1496 | 2.65k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.65k | if yes { | 1498 | 30 | self.bools |= 1 << $bit; | 1499 | 2.62k | } else { | 1500 | 2.62k | self.bools &= !(1 << $bit); | 1501 | 2.62k | } | 1502 | 2.65k | } |
<regex_syntax::hir::HirInfo>::set_line_anchored_end Line | Count | Source | 1496 | 1.91k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 1.91k | if yes { | 1498 | 40 | self.bools |= 1 << $bit; | 1499 | 1.87k | } else { | 1500 | 1.87k | self.bools &= !(1 << $bit); | 1501 | 1.87k | } | 1502 | 1.91k | } |
<regex_syntax::hir::HirInfo>::set_any_anchored_start Line | Count | Source | 1496 | 2.65k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.65k | if yes { | 1498 | 285 | self.bools |= 1 << $bit; | 1499 | 2.36k | } else { | 1500 | 2.36k | self.bools &= !(1 << $bit); | 1501 | 2.36k | } | 1502 | 2.65k | } |
<regex_syntax::hir::HirInfo>::set_alternation_literal Line | Count | Source | 1496 | 2.63k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 2.63k | if yes { | 1498 | 830 | self.bools |= 1 << $bit; | 1499 | 1.80k | } else { | 1500 | 1.80k | self.bools &= !(1 << $bit); | 1501 | 1.80k | } | 1502 | 2.63k | } |
<regex_syntax::hir::HirInfo>::set_line_anchored_start Line | Count | Source | 1496 | 1.91k | fn $set_fn_name(&mut self, yes: bool) { | 1497 | 1.91k | if yes { | 1498 | 40 | self.bools |= 1 << $bit; | 1499 | 1.87k | } else { | 1500 | 1.87k | self.bools &= !(1 << $bit); | 1501 | 1.87k | } | 1502 | 1.91k | } |
|
1503 | | }; |
1504 | | } |
1505 | | |
1506 | | impl HirInfo { |
1507 | 1.87k | fn new() -> HirInfo { |
1508 | 1.87k | HirInfo { bools: 0 } |
1509 | 1.87k | } |
1510 | | |
1511 | | define_bool!(0, is_always_utf8, set_always_utf8); |
1512 | | define_bool!(1, is_all_assertions, set_all_assertions); |
1513 | | define_bool!(2, is_anchored_start, set_anchored_start); |
1514 | | define_bool!(3, is_anchored_end, set_anchored_end); |
1515 | | define_bool!(4, is_line_anchored_start, set_line_anchored_start); |
1516 | | define_bool!(5, is_line_anchored_end, set_line_anchored_end); |
1517 | | define_bool!(6, is_any_anchored_start, set_any_anchored_start); |
1518 | | define_bool!(7, is_any_anchored_end, set_any_anchored_end); |
1519 | | define_bool!(8, is_match_empty, set_match_empty); |
1520 | | define_bool!(9, is_literal, set_literal); |
1521 | | define_bool!(10, is_alternation_literal, set_alternation_literal); |
1522 | | } |
1523 | | |
1524 | | #[cfg(test)] |
1525 | | mod tests { |
1526 | | use super::*; |
1527 | | |
1528 | | fn uclass(ranges: &[(char, char)]) -> ClassUnicode { |
1529 | | let ranges: Vec<ClassUnicodeRange> = ranges |
1530 | | .iter() |
1531 | | .map(|&(s, e)| ClassUnicodeRange::new(s, e)) |
1532 | | .collect(); |
1533 | | ClassUnicode::new(ranges) |
1534 | | } |
1535 | | |
1536 | | fn bclass(ranges: &[(u8, u8)]) -> ClassBytes { |
1537 | | let ranges: Vec<ClassBytesRange> = |
1538 | | ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect(); |
1539 | | ClassBytes::new(ranges) |
1540 | | } |
1541 | | |
1542 | | fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> { |
1543 | | cls.iter().map(|x| (x.start(), x.end())).collect() |
1544 | | } |
1545 | | |
1546 | | #[cfg(feature = "unicode-case")] |
1547 | | fn ucasefold(cls: &ClassUnicode) -> ClassUnicode { |
1548 | | let mut cls_ = cls.clone(); |
1549 | | cls_.case_fold_simple(); |
1550 | | cls_ |
1551 | | } |
1552 | | |
1553 | | fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { |
1554 | | let mut cls_ = cls1.clone(); |
1555 | | cls_.union(cls2); |
1556 | | cls_ |
1557 | | } |
1558 | | |
1559 | | fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { |
1560 | | let mut cls_ = cls1.clone(); |
1561 | | cls_.intersect(cls2); |
1562 | | cls_ |
1563 | | } |
1564 | | |
1565 | | fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { |
1566 | | let mut cls_ = cls1.clone(); |
1567 | | cls_.difference(cls2); |
1568 | | cls_ |
1569 | | } |
1570 | | |
1571 | | fn usymdifference( |
1572 | | cls1: &ClassUnicode, |
1573 | | cls2: &ClassUnicode, |
1574 | | ) -> ClassUnicode { |
1575 | | let mut cls_ = cls1.clone(); |
1576 | | cls_.symmetric_difference(cls2); |
1577 | | cls_ |
1578 | | } |
1579 | | |
1580 | | fn unegate(cls: &ClassUnicode) -> ClassUnicode { |
1581 | | let mut cls_ = cls.clone(); |
1582 | | cls_.negate(); |
1583 | | cls_ |
1584 | | } |
1585 | | |
1586 | | fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> { |
1587 | | cls.iter().map(|x| (x.start(), x.end())).collect() |
1588 | | } |
1589 | | |
1590 | | fn bcasefold(cls: &ClassBytes) -> ClassBytes { |
1591 | | let mut cls_ = cls.clone(); |
1592 | | cls_.case_fold_simple(); |
1593 | | cls_ |
1594 | | } |
1595 | | |
1596 | | fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { |
1597 | | let mut cls_ = cls1.clone(); |
1598 | | cls_.union(cls2); |
1599 | | cls_ |
1600 | | } |
1601 | | |
1602 | | fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { |
1603 | | let mut cls_ = cls1.clone(); |
1604 | | cls_.intersect(cls2); |
1605 | | cls_ |
1606 | | } |
1607 | | |
1608 | | fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { |
1609 | | let mut cls_ = cls1.clone(); |
1610 | | cls_.difference(cls2); |
1611 | | cls_ |
1612 | | } |
1613 | | |
1614 | | fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { |
1615 | | let mut cls_ = cls1.clone(); |
1616 | | cls_.symmetric_difference(cls2); |
1617 | | cls_ |
1618 | | } |
1619 | | |
1620 | | fn bnegate(cls: &ClassBytes) -> ClassBytes { |
1621 | | let mut cls_ = cls.clone(); |
1622 | | cls_.negate(); |
1623 | | cls_ |
1624 | | } |
1625 | | |
1626 | | #[test] |
1627 | | fn class_range_canonical_unicode() { |
1628 | | let range = ClassUnicodeRange::new('\u{00FF}', '\0'); |
1629 | | assert_eq!('\0', range.start()); |
1630 | | assert_eq!('\u{00FF}', range.end()); |
1631 | | } |
1632 | | |
1633 | | #[test] |
1634 | | fn class_range_canonical_bytes() { |
1635 | | let range = ClassBytesRange::new(b'\xFF', b'\0'); |
1636 | | assert_eq!(b'\0', range.start()); |
1637 | | assert_eq!(b'\xFF', range.end()); |
1638 | | } |
1639 | | |
1640 | | #[test] |
1641 | | fn class_canonicalize_unicode() { |
1642 | | let cls = uclass(&[('a', 'c'), ('x', 'z')]); |
1643 | | let expected = vec![('a', 'c'), ('x', 'z')]; |
1644 | | assert_eq!(expected, uranges(&cls)); |
1645 | | |
1646 | | let cls = uclass(&[('x', 'z'), ('a', 'c')]); |
1647 | | let expected = vec![('a', 'c'), ('x', 'z')]; |
1648 | | assert_eq!(expected, uranges(&cls)); |
1649 | | |
1650 | | let cls = uclass(&[('x', 'z'), ('w', 'y')]); |
1651 | | let expected = vec![('w', 'z')]; |
1652 | | assert_eq!(expected, uranges(&cls)); |
1653 | | |
1654 | | let cls = uclass(&[ |
1655 | | ('c', 'f'), |
1656 | | ('a', 'g'), |
1657 | | ('d', 'j'), |
1658 | | ('a', 'c'), |
1659 | | ('m', 'p'), |
1660 | | ('l', 's'), |
1661 | | ]); |
1662 | | let expected = vec![('a', 'j'), ('l', 's')]; |
1663 | | assert_eq!(expected, uranges(&cls)); |
1664 | | |
1665 | | let cls = uclass(&[('x', 'z'), ('u', 'w')]); |
1666 | | let expected = vec![('u', 'z')]; |
1667 | | assert_eq!(expected, uranges(&cls)); |
1668 | | |
1669 | | let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]); |
1670 | | let expected = vec![('\x00', '\u{10FFFF}')]; |
1671 | | assert_eq!(expected, uranges(&cls)); |
1672 | | |
1673 | | let cls = uclass(&[('a', 'a'), ('b', 'b')]); |
1674 | | let expected = vec![('a', 'b')]; |
1675 | | assert_eq!(expected, uranges(&cls)); |
1676 | | } |
1677 | | |
1678 | | #[test] |
1679 | | fn class_canonicalize_bytes() { |
1680 | | let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]); |
1681 | | let expected = vec![(b'a', b'c'), (b'x', b'z')]; |
1682 | | assert_eq!(expected, branges(&cls)); |
1683 | | |
1684 | | let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]); |
1685 | | let expected = vec![(b'a', b'c'), (b'x', b'z')]; |
1686 | | assert_eq!(expected, branges(&cls)); |
1687 | | |
1688 | | let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]); |
1689 | | let expected = vec![(b'w', b'z')]; |
1690 | | assert_eq!(expected, branges(&cls)); |
1691 | | |
1692 | | let cls = bclass(&[ |
1693 | | (b'c', b'f'), |
1694 | | (b'a', b'g'), |
1695 | | (b'd', b'j'), |
1696 | | (b'a', b'c'), |
1697 | | (b'm', b'p'), |
1698 | | (b'l', b's'), |
1699 | | ]); |
1700 | | let expected = vec![(b'a', b'j'), (b'l', b's')]; |
1701 | | assert_eq!(expected, branges(&cls)); |
1702 | | |
1703 | | let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]); |
1704 | | let expected = vec![(b'u', b'z')]; |
1705 | | assert_eq!(expected, branges(&cls)); |
1706 | | |
1707 | | let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]); |
1708 | | let expected = vec![(b'\x00', b'\xFF')]; |
1709 | | assert_eq!(expected, branges(&cls)); |
1710 | | |
1711 | | let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]); |
1712 | | let expected = vec![(b'a', b'b')]; |
1713 | | assert_eq!(expected, branges(&cls)); |
1714 | | } |
1715 | | |
1716 | | #[test] |
1717 | | #[cfg(feature = "unicode-case")] |
1718 | | fn class_case_fold_unicode() { |
1719 | | let cls = uclass(&[ |
1720 | | ('C', 'F'), |
1721 | | ('A', 'G'), |
1722 | | ('D', 'J'), |
1723 | | ('A', 'C'), |
1724 | | ('M', 'P'), |
1725 | | ('L', 'S'), |
1726 | | ('c', 'f'), |
1727 | | ]); |
1728 | | let expected = uclass(&[ |
1729 | | ('A', 'J'), |
1730 | | ('L', 'S'), |
1731 | | ('a', 'j'), |
1732 | | ('l', 's'), |
1733 | | ('\u{17F}', '\u{17F}'), |
1734 | | ]); |
1735 | | assert_eq!(expected, ucasefold(&cls)); |
1736 | | |
1737 | | let cls = uclass(&[('A', 'Z')]); |
1738 | | let expected = uclass(&[ |
1739 | | ('A', 'Z'), |
1740 | | ('a', 'z'), |
1741 | | ('\u{17F}', '\u{17F}'), |
1742 | | ('\u{212A}', '\u{212A}'), |
1743 | | ]); |
1744 | | assert_eq!(expected, ucasefold(&cls)); |
1745 | | |
1746 | | let cls = uclass(&[('a', 'z')]); |
1747 | | let expected = uclass(&[ |
1748 | | ('A', 'Z'), |
1749 | | ('a', 'z'), |
1750 | | ('\u{17F}', '\u{17F}'), |
1751 | | ('\u{212A}', '\u{212A}'), |
1752 | | ]); |
1753 | | assert_eq!(expected, ucasefold(&cls)); |
1754 | | |
1755 | | let cls = uclass(&[('A', 'A'), ('_', '_')]); |
1756 | | let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]); |
1757 | | assert_eq!(expected, ucasefold(&cls)); |
1758 | | |
1759 | | let cls = uclass(&[('A', 'A'), ('=', '=')]); |
1760 | | let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]); |
1761 | | assert_eq!(expected, ucasefold(&cls)); |
1762 | | |
1763 | | let cls = uclass(&[('\x00', '\x10')]); |
1764 | | assert_eq!(cls, ucasefold(&cls)); |
1765 | | |
1766 | | let cls = uclass(&[('k', 'k')]); |
1767 | | let expected = |
1768 | | uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]); |
1769 | | assert_eq!(expected, ucasefold(&cls)); |
1770 | | |
1771 | | let cls = uclass(&[('@', '@')]); |
1772 | | assert_eq!(cls, ucasefold(&cls)); |
1773 | | } |
1774 | | |
1775 | | #[test] |
1776 | | #[cfg(not(feature = "unicode-case"))] |
1777 | | fn class_case_fold_unicode_disabled() { |
1778 | | let mut cls = uclass(&[ |
1779 | | ('C', 'F'), |
1780 | | ('A', 'G'), |
1781 | | ('D', 'J'), |
1782 | | ('A', 'C'), |
1783 | | ('M', 'P'), |
1784 | | ('L', 'S'), |
1785 | | ('c', 'f'), |
1786 | | ]); |
1787 | | assert!(cls.try_case_fold_simple().is_err()); |
1788 | | } |
1789 | | |
1790 | | #[test] |
1791 | | #[should_panic] |
1792 | | #[cfg(not(feature = "unicode-case"))] |
1793 | | fn class_case_fold_unicode_disabled_panics() { |
1794 | | let mut cls = uclass(&[ |
1795 | | ('C', 'F'), |
1796 | | ('A', 'G'), |
1797 | | ('D', 'J'), |
1798 | | ('A', 'C'), |
1799 | | ('M', 'P'), |
1800 | | ('L', 'S'), |
1801 | | ('c', 'f'), |
1802 | | ]); |
1803 | | cls.case_fold_simple(); |
1804 | | } |
1805 | | |
1806 | | #[test] |
1807 | | fn class_case_fold_bytes() { |
1808 | | let cls = bclass(&[ |
1809 | | (b'C', b'F'), |
1810 | | (b'A', b'G'), |
1811 | | (b'D', b'J'), |
1812 | | (b'A', b'C'), |
1813 | | (b'M', b'P'), |
1814 | | (b'L', b'S'), |
1815 | | (b'c', b'f'), |
1816 | | ]); |
1817 | | let expected = |
1818 | | bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]); |
1819 | | assert_eq!(expected, bcasefold(&cls)); |
1820 | | |
1821 | | let cls = bclass(&[(b'A', b'Z')]); |
1822 | | let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]); |
1823 | | assert_eq!(expected, bcasefold(&cls)); |
1824 | | |
1825 | | let cls = bclass(&[(b'a', b'z')]); |
1826 | | let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]); |
1827 | | assert_eq!(expected, bcasefold(&cls)); |
1828 | | |
1829 | | let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]); |
1830 | | let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]); |
1831 | | assert_eq!(expected, bcasefold(&cls)); |
1832 | | |
1833 | | let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]); |
1834 | | let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]); |
1835 | | assert_eq!(expected, bcasefold(&cls)); |
1836 | | |
1837 | | let cls = bclass(&[(b'\x00', b'\x10')]); |
1838 | | assert_eq!(cls, bcasefold(&cls)); |
1839 | | |
1840 | | let cls = bclass(&[(b'k', b'k')]); |
1841 | | let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]); |
1842 | | assert_eq!(expected, bcasefold(&cls)); |
1843 | | |
1844 | | let cls = bclass(&[(b'@', b'@')]); |
1845 | | assert_eq!(cls, bcasefold(&cls)); |
1846 | | } |
1847 | | |
1848 | | #[test] |
1849 | | fn class_negate_unicode() { |
1850 | | let cls = uclass(&[('a', 'a')]); |
1851 | | let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]); |
1852 | | assert_eq!(expected, unegate(&cls)); |
1853 | | |
1854 | | let cls = uclass(&[('a', 'a'), ('b', 'b')]); |
1855 | | let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]); |
1856 | | assert_eq!(expected, unegate(&cls)); |
1857 | | |
1858 | | let cls = uclass(&[('a', 'c'), ('x', 'z')]); |
1859 | | let expected = uclass(&[ |
1860 | | ('\x00', '\x60'), |
1861 | | ('\x64', '\x77'), |
1862 | | ('\x7B', '\u{10FFFF}'), |
1863 | | ]); |
1864 | | assert_eq!(expected, unegate(&cls)); |
1865 | | |
1866 | | let cls = uclass(&[('\x00', 'a')]); |
1867 | | let expected = uclass(&[('\x62', '\u{10FFFF}')]); |
1868 | | assert_eq!(expected, unegate(&cls)); |
1869 | | |
1870 | | let cls = uclass(&[('a', '\u{10FFFF}')]); |
1871 | | let expected = uclass(&[('\x00', '\x60')]); |
1872 | | assert_eq!(expected, unegate(&cls)); |
1873 | | |
1874 | | let cls = uclass(&[('\x00', '\u{10FFFF}')]); |
1875 | | let expected = uclass(&[]); |
1876 | | assert_eq!(expected, unegate(&cls)); |
1877 | | |
1878 | | let cls = uclass(&[]); |
1879 | | let expected = uclass(&[('\x00', '\u{10FFFF}')]); |
1880 | | assert_eq!(expected, unegate(&cls)); |
1881 | | |
1882 | | let cls = |
1883 | | uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]); |
1884 | | let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]); |
1885 | | assert_eq!(expected, unegate(&cls)); |
1886 | | |
1887 | | let cls = uclass(&[('\x00', '\u{D7FF}')]); |
1888 | | let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]); |
1889 | | assert_eq!(expected, unegate(&cls)); |
1890 | | |
1891 | | let cls = uclass(&[('\x00', '\u{D7FE}')]); |
1892 | | let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]); |
1893 | | assert_eq!(expected, unegate(&cls)); |
1894 | | |
1895 | | let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]); |
1896 | | let expected = uclass(&[('\x00', '\u{D7FF}')]); |
1897 | | assert_eq!(expected, unegate(&cls)); |
1898 | | |
1899 | | let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]); |
1900 | | let expected = uclass(&[('\x00', '\u{E000}')]); |
1901 | | assert_eq!(expected, unegate(&cls)); |
1902 | | } |
1903 | | |
1904 | | #[test] |
1905 | | fn class_negate_bytes() { |
1906 | | let cls = bclass(&[(b'a', b'a')]); |
1907 | | let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]); |
1908 | | assert_eq!(expected, bnegate(&cls)); |
1909 | | |
1910 | | let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]); |
1911 | | let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]); |
1912 | | assert_eq!(expected, bnegate(&cls)); |
1913 | | |
1914 | | let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]); |
1915 | | let expected = bclass(&[ |
1916 | | (b'\x00', b'\x60'), |
1917 | | (b'\x64', b'\x77'), |
1918 | | (b'\x7B', b'\xFF'), |
1919 | | ]); |
1920 | | assert_eq!(expected, bnegate(&cls)); |
1921 | | |
1922 | | let cls = bclass(&[(b'\x00', b'a')]); |
1923 | | let expected = bclass(&[(b'\x62', b'\xFF')]); |
1924 | | assert_eq!(expected, bnegate(&cls)); |
1925 | | |
1926 | | let cls = bclass(&[(b'a', b'\xFF')]); |
1927 | | let expected = bclass(&[(b'\x00', b'\x60')]); |
1928 | | assert_eq!(expected, bnegate(&cls)); |
1929 | | |
1930 | | let cls = bclass(&[(b'\x00', b'\xFF')]); |
1931 | | let expected = bclass(&[]); |
1932 | | assert_eq!(expected, bnegate(&cls)); |
1933 | | |
1934 | | let cls = bclass(&[]); |
1935 | | let expected = bclass(&[(b'\x00', b'\xFF')]); |
1936 | | assert_eq!(expected, bnegate(&cls)); |
1937 | | |
1938 | | let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]); |
1939 | | let expected = bclass(&[(b'\xFE', b'\xFE')]); |
1940 | | assert_eq!(expected, bnegate(&cls)); |
1941 | | } |
1942 | | |
1943 | | #[test] |
1944 | | fn class_union_unicode() { |
1945 | | let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]); |
1946 | | let cls2 = uclass(&[('a', 'z')]); |
1947 | | let expected = uclass(&[('a', 'z'), ('A', 'C')]); |
1948 | | assert_eq!(expected, uunion(&cls1, &cls2)); |
1949 | | } |
1950 | | |
1951 | | #[test] |
1952 | | fn class_union_bytes() { |
1953 | | let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]); |
1954 | | let cls2 = bclass(&[(b'a', b'z')]); |
1955 | | let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]); |
1956 | | assert_eq!(expected, bunion(&cls1, &cls2)); |
1957 | | } |
1958 | | |
1959 | | #[test] |
1960 | | fn class_intersect_unicode() { |
1961 | | let cls1 = uclass(&[]); |
1962 | | let cls2 = uclass(&[('a', 'a')]); |
1963 | | let expected = uclass(&[]); |
1964 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1965 | | |
1966 | | let cls1 = uclass(&[('a', 'a')]); |
1967 | | let cls2 = uclass(&[('a', 'a')]); |
1968 | | let expected = uclass(&[('a', 'a')]); |
1969 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1970 | | |
1971 | | let cls1 = uclass(&[('a', 'a')]); |
1972 | | let cls2 = uclass(&[('b', 'b')]); |
1973 | | let expected = uclass(&[]); |
1974 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1975 | | |
1976 | | let cls1 = uclass(&[('a', 'a')]); |
1977 | | let cls2 = uclass(&[('a', 'c')]); |
1978 | | let expected = uclass(&[('a', 'a')]); |
1979 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1980 | | |
1981 | | let cls1 = uclass(&[('a', 'b')]); |
1982 | | let cls2 = uclass(&[('a', 'c')]); |
1983 | | let expected = uclass(&[('a', 'b')]); |
1984 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1985 | | |
1986 | | let cls1 = uclass(&[('a', 'b')]); |
1987 | | let cls2 = uclass(&[('b', 'c')]); |
1988 | | let expected = uclass(&[('b', 'b')]); |
1989 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1990 | | |
1991 | | let cls1 = uclass(&[('a', 'b')]); |
1992 | | let cls2 = uclass(&[('c', 'd')]); |
1993 | | let expected = uclass(&[]); |
1994 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
1995 | | |
1996 | | let cls1 = uclass(&[('b', 'c')]); |
1997 | | let cls2 = uclass(&[('a', 'd')]); |
1998 | | let expected = uclass(&[('b', 'c')]); |
1999 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2000 | | |
2001 | | let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2002 | | let cls2 = uclass(&[('a', 'h')]); |
2003 | | let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2004 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2005 | | |
2006 | | let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2007 | | let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2008 | | let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2009 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2010 | | |
2011 | | let cls1 = uclass(&[('a', 'b'), ('g', 'h')]); |
2012 | | let cls2 = uclass(&[('d', 'e'), ('k', 'l')]); |
2013 | | let expected = uclass(&[]); |
2014 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2015 | | |
2016 | | let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); |
2017 | | let cls2 = uclass(&[('h', 'h')]); |
2018 | | let expected = uclass(&[('h', 'h')]); |
2019 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2020 | | |
2021 | | let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]); |
2022 | | let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]); |
2023 | | let expected = uclass(&[]); |
2024 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2025 | | |
2026 | | let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]); |
2027 | | let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]); |
2028 | | let expected = uclass(&[('b', 'f')]); |
2029 | | assert_eq!(expected, uintersect(&cls1, &cls2)); |
2030 | | } |
2031 | | |
2032 | | #[test] |
2033 | | fn class_intersect_bytes() { |
2034 | | let cls1 = bclass(&[]); |
2035 | | let cls2 = bclass(&[(b'a', b'a')]); |
2036 | | let expected = bclass(&[]); |
2037 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2038 | | |
2039 | | let cls1 = bclass(&[(b'a', b'a')]); |
2040 | | let cls2 = bclass(&[(b'a', b'a')]); |
2041 | | let expected = bclass(&[(b'a', b'a')]); |
2042 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2043 | | |
2044 | | let cls1 = bclass(&[(b'a', b'a')]); |
2045 | | let cls2 = bclass(&[(b'b', b'b')]); |
2046 | | let expected = bclass(&[]); |
2047 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2048 | | |
2049 | | let cls1 = bclass(&[(b'a', b'a')]); |
2050 | | let cls2 = bclass(&[(b'a', b'c')]); |
2051 | | let expected = bclass(&[(b'a', b'a')]); |
2052 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2053 | | |
2054 | | let cls1 = bclass(&[(b'a', b'b')]); |
2055 | | let cls2 = bclass(&[(b'a', b'c')]); |
2056 | | let expected = bclass(&[(b'a', b'b')]); |
2057 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2058 | | |
2059 | | let cls1 = bclass(&[(b'a', b'b')]); |
2060 | | let cls2 = bclass(&[(b'b', b'c')]); |
2061 | | let expected = bclass(&[(b'b', b'b')]); |
2062 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2063 | | |
2064 | | let cls1 = bclass(&[(b'a', b'b')]); |
2065 | | let cls2 = bclass(&[(b'c', b'd')]); |
2066 | | let expected = bclass(&[]); |
2067 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2068 | | |
2069 | | let cls1 = bclass(&[(b'b', b'c')]); |
2070 | | let cls2 = bclass(&[(b'a', b'd')]); |
2071 | | let expected = bclass(&[(b'b', b'c')]); |
2072 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2073 | | |
2074 | | let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2075 | | let cls2 = bclass(&[(b'a', b'h')]); |
2076 | | let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2077 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2078 | | |
2079 | | let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2080 | | let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2081 | | let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2082 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2083 | | |
2084 | | let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]); |
2085 | | let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]); |
2086 | | let expected = bclass(&[]); |
2087 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2088 | | |
2089 | | let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); |
2090 | | let cls2 = bclass(&[(b'h', b'h')]); |
2091 | | let expected = bclass(&[(b'h', b'h')]); |
2092 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2093 | | |
2094 | | let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]); |
2095 | | let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]); |
2096 | | let expected = bclass(&[]); |
2097 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2098 | | |
2099 | | let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]); |
2100 | | let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]); |
2101 | | let expected = bclass(&[(b'b', b'f')]); |
2102 | | assert_eq!(expected, bintersect(&cls1, &cls2)); |
2103 | | } |
2104 | | |
2105 | | #[test] |
2106 | | fn class_difference_unicode() { |
2107 | | let cls1 = uclass(&[('a', 'a')]); |
2108 | | let cls2 = uclass(&[('a', 'a')]); |
2109 | | let expected = uclass(&[]); |
2110 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2111 | | |
2112 | | let cls1 = uclass(&[('a', 'a')]); |
2113 | | let cls2 = uclass(&[]); |
2114 | | let expected = uclass(&[('a', 'a')]); |
2115 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2116 | | |
2117 | | let cls1 = uclass(&[]); |
2118 | | let cls2 = uclass(&[('a', 'a')]); |
2119 | | let expected = uclass(&[]); |
2120 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2121 | | |
2122 | | let cls1 = uclass(&[('a', 'z')]); |
2123 | | let cls2 = uclass(&[('a', 'a')]); |
2124 | | let expected = uclass(&[('b', 'z')]); |
2125 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2126 | | |
2127 | | let cls1 = uclass(&[('a', 'z')]); |
2128 | | let cls2 = uclass(&[('z', 'z')]); |
2129 | | let expected = uclass(&[('a', 'y')]); |
2130 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2131 | | |
2132 | | let cls1 = uclass(&[('a', 'z')]); |
2133 | | let cls2 = uclass(&[('m', 'm')]); |
2134 | | let expected = uclass(&[('a', 'l'), ('n', 'z')]); |
2135 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2136 | | |
2137 | | let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); |
2138 | | let cls2 = uclass(&[('a', 'z')]); |
2139 | | let expected = uclass(&[]); |
2140 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2141 | | |
2142 | | let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); |
2143 | | let cls2 = uclass(&[('d', 'v')]); |
2144 | | let expected = uclass(&[('a', 'c')]); |
2145 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2146 | | |
2147 | | let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); |
2148 | | let cls2 = uclass(&[('b', 'g'), ('s', 'u')]); |
2149 | | let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]); |
2150 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2151 | | |
2152 | | let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); |
2153 | | let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]); |
2154 | | let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]); |
2155 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2156 | | |
2157 | | let cls1 = uclass(&[('x', 'z')]); |
2158 | | let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]); |
2159 | | let expected = uclass(&[('x', 'z')]); |
2160 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2161 | | |
2162 | | let cls1 = uclass(&[('a', 'z')]); |
2163 | | let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]); |
2164 | | let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]); |
2165 | | assert_eq!(expected, udifference(&cls1, &cls2)); |
2166 | | } |
2167 | | |
2168 | | #[test] |
2169 | | fn class_difference_bytes() { |
2170 | | let cls1 = bclass(&[(b'a', b'a')]); |
2171 | | let cls2 = bclass(&[(b'a', b'a')]); |
2172 | | let expected = bclass(&[]); |
2173 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2174 | | |
2175 | | let cls1 = bclass(&[(b'a', b'a')]); |
2176 | | let cls2 = bclass(&[]); |
2177 | | let expected = bclass(&[(b'a', b'a')]); |
2178 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2179 | | |
2180 | | let cls1 = bclass(&[]); |
2181 | | let cls2 = bclass(&[(b'a', b'a')]); |
2182 | | let expected = bclass(&[]); |
2183 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2184 | | |
2185 | | let cls1 = bclass(&[(b'a', b'z')]); |
2186 | | let cls2 = bclass(&[(b'a', b'a')]); |
2187 | | let expected = bclass(&[(b'b', b'z')]); |
2188 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2189 | | |
2190 | | let cls1 = bclass(&[(b'a', b'z')]); |
2191 | | let cls2 = bclass(&[(b'z', b'z')]); |
2192 | | let expected = bclass(&[(b'a', b'y')]); |
2193 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2194 | | |
2195 | | let cls1 = bclass(&[(b'a', b'z')]); |
2196 | | let cls2 = bclass(&[(b'm', b'm')]); |
2197 | | let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]); |
2198 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2199 | | |
2200 | | let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); |
2201 | | let cls2 = bclass(&[(b'a', b'z')]); |
2202 | | let expected = bclass(&[]); |
2203 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2204 | | |
2205 | | let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); |
2206 | | let cls2 = bclass(&[(b'd', b'v')]); |
2207 | | let expected = bclass(&[(b'a', b'c')]); |
2208 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2209 | | |
2210 | | let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); |
2211 | | let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]); |
2212 | | let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]); |
2213 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2214 | | |
2215 | | let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); |
2216 | | let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]); |
2217 | | let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]); |
2218 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2219 | | |
2220 | | let cls1 = bclass(&[(b'x', b'z')]); |
2221 | | let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]); |
2222 | | let expected = bclass(&[(b'x', b'z')]); |
2223 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2224 | | |
2225 | | let cls1 = bclass(&[(b'a', b'z')]); |
2226 | | let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]); |
2227 | | let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]); |
2228 | | assert_eq!(expected, bdifference(&cls1, &cls2)); |
2229 | | } |
2230 | | |
2231 | | #[test] |
2232 | | fn class_symmetric_difference_unicode() { |
2233 | | let cls1 = uclass(&[('a', 'm')]); |
2234 | | let cls2 = uclass(&[('g', 't')]); |
2235 | | let expected = uclass(&[('a', 'f'), ('n', 't')]); |
2236 | | assert_eq!(expected, usymdifference(&cls1, &cls2)); |
2237 | | } |
2238 | | |
2239 | | #[test] |
2240 | | fn class_symmetric_difference_bytes() { |
2241 | | let cls1 = bclass(&[(b'a', b'm')]); |
2242 | | let cls2 = bclass(&[(b'g', b't')]); |
2243 | | let expected = bclass(&[(b'a', b'f'), (b'n', b't')]); |
2244 | | assert_eq!(expected, bsymdifference(&cls1, &cls2)); |
2245 | | } |
2246 | | |
2247 | | #[test] |
2248 | | #[should_panic] |
2249 | | fn hir_byte_literal_non_ascii() { |
2250 | | Hir::literal(Literal::Byte(b'a')); |
2251 | | } |
2252 | | |
2253 | | // We use a thread with an explicit stack size to test that our destructor |
2254 | | // for Hir can handle arbitrarily sized expressions in constant stack |
2255 | | // space. In case we run on a platform without threads (WASM?), we limit |
2256 | | // this test to Windows/Unix. |
2257 | | #[test] |
2258 | | #[cfg(any(unix, windows))] |
2259 | | fn no_stack_overflow_on_drop() { |
2260 | | use std::thread; |
2261 | | |
2262 | | let run = || { |
2263 | | let mut expr = Hir::empty(); |
2264 | | for _ in 0..100 { |
2265 | | expr = Hir::group(Group { |
2266 | | kind: GroupKind::NonCapturing, |
2267 | | hir: Box::new(expr), |
2268 | | }); |
2269 | | expr = Hir::repetition(Repetition { |
2270 | | kind: RepetitionKind::ZeroOrOne, |
2271 | | greedy: true, |
2272 | | hir: Box::new(expr), |
2273 | | }); |
2274 | | |
2275 | | expr = Hir { |
2276 | | kind: HirKind::Concat(vec![expr]), |
2277 | | info: HirInfo::new(), |
2278 | | }; |
2279 | | expr = Hir { |
2280 | | kind: HirKind::Alternation(vec![expr]), |
2281 | | info: HirInfo::new(), |
2282 | | }; |
2283 | | } |
2284 | | assert!(!expr.kind.is_empty()); |
2285 | | }; |
2286 | | |
2287 | | // We run our test on a thread with a small stack size so we can |
2288 | | // force the issue more easily. |
2289 | | // |
2290 | | // NOTE(2023-03-21): See the corresponding test in 'crate::ast::tests' |
2291 | | // for context on the specific stack size chosen here. |
2292 | | thread::Builder::new() |
2293 | | .stack_size(16 << 10) |
2294 | | .spawn(run) |
2295 | | .unwrap() |
2296 | | .join() |
2297 | | .unwrap(); |
2298 | | } |
2299 | | } |