/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-syntax-0.6.29/src/hir/translate.rs
Line | Count | Source |
1 | | /*! |
2 | | Defines a translator that converts an `Ast` to an `Hir`. |
3 | | */ |
4 | | |
5 | | use std::cell::{Cell, RefCell}; |
6 | | use std::result; |
7 | | |
8 | | use crate::ast::{self, Ast, Span, Visitor}; |
9 | | use crate::hir::{self, Error, ErrorKind, Hir}; |
10 | | use crate::unicode::{self, ClassQuery}; |
11 | | |
12 | | type Result<T> = result::Result<T, Error>; |
13 | | |
14 | | /// A builder for constructing an AST->HIR translator. |
15 | | #[derive(Clone, Debug)] |
16 | | pub struct TranslatorBuilder { |
17 | | allow_invalid_utf8: bool, |
18 | | flags: Flags, |
19 | | } |
20 | | |
21 | | impl Default for TranslatorBuilder { |
22 | 0 | fn default() -> TranslatorBuilder { |
23 | 0 | TranslatorBuilder::new() |
24 | 0 | } |
25 | | } |
26 | | |
27 | | impl TranslatorBuilder { |
28 | | /// Create a new translator builder with a default c onfiguration. |
29 | 0 | pub fn new() -> TranslatorBuilder { |
30 | 0 | TranslatorBuilder { |
31 | 0 | allow_invalid_utf8: false, |
32 | 0 | flags: Flags::default(), |
33 | 0 | } |
34 | 0 | } |
35 | | |
36 | | /// Build a translator using the current configuration. |
37 | 0 | pub fn build(&self) -> Translator { |
38 | 0 | Translator { |
39 | 0 | stack: RefCell::new(vec![]), |
40 | 0 | flags: Cell::new(self.flags), |
41 | 0 | allow_invalid_utf8: self.allow_invalid_utf8, |
42 | 0 | } |
43 | 0 | } |
44 | | |
45 | | /// When enabled, translation will permit the construction of a regular |
46 | | /// expression that may match invalid UTF-8. |
47 | | /// |
48 | | /// When disabled (the default), the translator is guaranteed to produce |
49 | | /// an expression that will only ever match valid UTF-8 (otherwise, the |
50 | | /// translator will return an error). |
51 | | /// |
52 | | /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII |
53 | | /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause |
54 | | /// the parser to return an error. Namely, a negated ASCII word boundary |
55 | | /// can result in matching positions that aren't valid UTF-8 boundaries. |
56 | 0 | pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { |
57 | 0 | self.allow_invalid_utf8 = yes; |
58 | 0 | self |
59 | 0 | } |
60 | | |
61 | | /// Enable or disable the case insensitive flag (`i`) by default. |
62 | 0 | pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { |
63 | 0 | self.flags.case_insensitive = if yes { Some(true) } else { None }; |
64 | 0 | self |
65 | 0 | } |
66 | | |
67 | | /// Enable or disable the multi-line matching flag (`m`) by default. |
68 | 0 | pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { |
69 | 0 | self.flags.multi_line = if yes { Some(true) } else { None }; |
70 | 0 | self |
71 | 0 | } |
72 | | |
73 | | /// Enable or disable the "dot matches any character" flag (`s`) by |
74 | | /// default. |
75 | 0 | pub fn dot_matches_new_line( |
76 | 0 | &mut self, |
77 | 0 | yes: bool, |
78 | 0 | ) -> &mut TranslatorBuilder { |
79 | 0 | self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; |
80 | 0 | self |
81 | 0 | } |
82 | | |
83 | | /// Enable or disable the "swap greed" flag (`U`) by default. |
84 | 0 | pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { |
85 | 0 | self.flags.swap_greed = if yes { Some(true) } else { None }; |
86 | 0 | self |
87 | 0 | } |
88 | | |
89 | | /// Enable or disable the Unicode flag (`u`) by default. |
90 | 0 | pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { |
91 | 0 | self.flags.unicode = if yes { None } else { Some(false) }; |
92 | 0 | self |
93 | 0 | } |
94 | | } |
95 | | |
96 | | /// A translator maps abstract syntax to a high level intermediate |
97 | | /// representation. |
98 | | /// |
99 | | /// A translator may be benefit from reuse. That is, a translator can translate |
100 | | /// many abstract syntax trees. |
101 | | /// |
102 | | /// A `Translator` can be configured in more detail via a |
103 | | /// [`TranslatorBuilder`](struct.TranslatorBuilder.html). |
104 | | #[derive(Clone, Debug)] |
105 | | pub struct Translator { |
106 | | /// Our call stack, but on the heap. |
107 | | stack: RefCell<Vec<HirFrame>>, |
108 | | /// The current flag settings. |
109 | | flags: Cell<Flags>, |
110 | | /// Whether we're allowed to produce HIR that can match arbitrary bytes. |
111 | | allow_invalid_utf8: bool, |
112 | | } |
113 | | |
114 | | impl Translator { |
115 | | /// Create a new translator using the default configuration. |
116 | 0 | pub fn new() -> Translator { |
117 | 0 | TranslatorBuilder::new().build() |
118 | 0 | } |
119 | | |
120 | | /// Translate the given abstract syntax tree (AST) into a high level |
121 | | /// intermediate representation (HIR). |
122 | | /// |
123 | | /// If there was a problem doing the translation, then an HIR-specific |
124 | | /// error is returned. |
125 | | /// |
126 | | /// The original pattern string used to produce the `Ast` *must* also be |
127 | | /// provided. The translator does not use the pattern string during any |
128 | | /// correct translation, but is used for error reporting. |
129 | 0 | pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> { |
130 | 0 | ast::visit(ast, TranslatorI::new(self, pattern)) |
131 | 0 | } |
132 | | } |
133 | | |
134 | | /// An HirFrame is a single stack frame, represented explicitly, which is |
135 | | /// created for each item in the Ast that we traverse. |
136 | | /// |
137 | | /// Note that technically, this type doesn't represent our entire stack |
138 | | /// frame. In particular, the Ast visitor represents any state associated with |
139 | | /// traversing the Ast itself. |
140 | | #[derive(Clone, Debug)] |
141 | | enum HirFrame { |
142 | | /// An arbitrary HIR expression. These get pushed whenever we hit a base |
143 | | /// case in the Ast. They get popped after an inductive (i.e., recursive) |
144 | | /// step is complete. |
145 | | Expr(Hir), |
146 | | /// A Unicode character class. This frame is mutated as we descend into |
147 | | /// the Ast of a character class (which is itself its own mini recursive |
148 | | /// structure). |
149 | | ClassUnicode(hir::ClassUnicode), |
150 | | /// A byte-oriented character class. This frame is mutated as we descend |
151 | | /// into the Ast of a character class (which is itself its own mini |
152 | | /// recursive structure). |
153 | | /// |
154 | | /// Byte character classes are created when Unicode mode (`u`) is disabled. |
155 | | /// If `allow_invalid_utf8` is disabled (the default), then a byte |
156 | | /// character is only permitted to match ASCII text. |
157 | | ClassBytes(hir::ClassBytes), |
158 | | /// This is pushed on to the stack upon first seeing any kind of group, |
159 | | /// indicated by parentheses (including non-capturing groups). It is popped |
160 | | /// upon leaving a group. |
161 | | Group { |
162 | | /// The old active flags when this group was opened. |
163 | | /// |
164 | | /// If this group sets flags, then the new active flags are set to the |
165 | | /// result of merging the old flags with the flags introduced by this |
166 | | /// group. If the group doesn't set any flags, then this is simply |
167 | | /// equivalent to whatever flags were set when the group was opened. |
168 | | /// |
169 | | /// When this group is popped, the active flags should be restored to |
170 | | /// the flags set here. |
171 | | /// |
172 | | /// The "active" flags correspond to whatever flags are set in the |
173 | | /// Translator. |
174 | | old_flags: Flags, |
175 | | }, |
176 | | /// This is pushed whenever a concatenation is observed. After visiting |
177 | | /// every sub-expression in the concatenation, the translator's stack is |
178 | | /// popped until it sees a Concat frame. |
179 | | Concat, |
180 | | /// This is pushed whenever an alternation is observed. After visiting |
181 | | /// every sub-expression in the alternation, the translator's stack is |
182 | | /// popped until it sees an Alternation frame. |
183 | | Alternation, |
184 | | } |
185 | | |
186 | | impl HirFrame { |
187 | | /// Assert that the current stack frame is an Hir expression and return it. |
188 | 0 | fn unwrap_expr(self) -> Hir { |
189 | 0 | match self { |
190 | 0 | HirFrame::Expr(expr) => expr, |
191 | 0 | _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self), |
192 | | } |
193 | 0 | } |
194 | | |
195 | | /// Assert that the current stack frame is a Unicode class expression and |
196 | | /// return it. |
197 | 0 | fn unwrap_class_unicode(self) -> hir::ClassUnicode { |
198 | 0 | match self { |
199 | 0 | HirFrame::ClassUnicode(cls) => cls, |
200 | 0 | _ => panic!( |
201 | 0 | "tried to unwrap Unicode class \ |
202 | 0 | from HirFrame, got: {:?}", |
203 | | self |
204 | | ), |
205 | | } |
206 | 0 | } |
207 | | |
208 | | /// Assert that the current stack frame is a byte class expression and |
209 | | /// return it. |
210 | 0 | fn unwrap_class_bytes(self) -> hir::ClassBytes { |
211 | 0 | match self { |
212 | 0 | HirFrame::ClassBytes(cls) => cls, |
213 | 0 | _ => panic!( |
214 | 0 | "tried to unwrap byte class \ |
215 | 0 | from HirFrame, got: {:?}", |
216 | | self |
217 | | ), |
218 | | } |
219 | 0 | } |
220 | | |
221 | | /// Assert that the current stack frame is a group indicator and return |
222 | | /// its corresponding flags (the flags that were active at the time the |
223 | | /// group was entered). |
224 | 0 | fn unwrap_group(self) -> Flags { |
225 | 0 | match self { |
226 | 0 | HirFrame::Group { old_flags } => old_flags, |
227 | | _ => { |
228 | 0 | panic!("tried to unwrap group from HirFrame, got: {:?}", self) |
229 | | } |
230 | | } |
231 | 0 | } |
232 | | } |
233 | | |
234 | | impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { |
235 | | type Output = Hir; |
236 | | type Err = Error; |
237 | | |
238 | 0 | fn finish(self) -> Result<Hir> { |
239 | | // ... otherwise, we should have exactly one HIR on the stack. |
240 | 0 | assert_eq!(self.trans().stack.borrow().len(), 1); |
241 | 0 | Ok(self.pop().unwrap().unwrap_expr()) |
242 | 0 | } |
243 | | |
244 | 0 | fn visit_pre(&mut self, ast: &Ast) -> Result<()> { |
245 | 0 | match *ast { |
246 | | Ast::Class(ast::Class::Bracketed(_)) => { |
247 | 0 | if self.flags().unicode() { |
248 | 0 | let cls = hir::ClassUnicode::empty(); |
249 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
250 | 0 | } else { |
251 | 0 | let cls = hir::ClassBytes::empty(); |
252 | 0 | self.push(HirFrame::ClassBytes(cls)); |
253 | 0 | } |
254 | | } |
255 | 0 | Ast::Group(ref x) => { |
256 | 0 | let old_flags = x |
257 | 0 | .flags() |
258 | 0 | .map(|ast| self.set_flags(ast)) |
259 | 0 | .unwrap_or_else(|| self.flags()); |
260 | 0 | self.push(HirFrame::Group { old_flags }); |
261 | | } |
262 | 0 | Ast::Concat(ref x) if x.asts.is_empty() => {} |
263 | 0 | Ast::Concat(_) => { |
264 | 0 | self.push(HirFrame::Concat); |
265 | 0 | } |
266 | 0 | Ast::Alternation(ref x) if x.asts.is_empty() => {} |
267 | 0 | Ast::Alternation(_) => { |
268 | 0 | self.push(HirFrame::Alternation); |
269 | 0 | } |
270 | 0 | _ => {} |
271 | | } |
272 | 0 | Ok(()) |
273 | 0 | } |
274 | | |
275 | 0 | fn visit_post(&mut self, ast: &Ast) -> Result<()> { |
276 | 0 | match *ast { |
277 | 0 | Ast::Empty(_) => { |
278 | 0 | self.push(HirFrame::Expr(Hir::empty())); |
279 | 0 | } |
280 | 0 | Ast::Flags(ref x) => { |
281 | 0 | self.set_flags(&x.flags); |
282 | 0 | // Flags in the AST are generally considered directives and |
283 | 0 | // not actual sub-expressions. However, they can be used in |
284 | 0 | // the concrete syntax like `((?i))`, and we need some kind of |
285 | 0 | // indication of an expression there, and Empty is the correct |
286 | 0 | // choice. |
287 | 0 | // |
288 | 0 | // There can also be things like `(?i)+`, but we rule those out |
289 | 0 | // in the parser. In the future, we might allow them for |
290 | 0 | // consistency sake. |
291 | 0 | self.push(HirFrame::Expr(Hir::empty())); |
292 | 0 | } |
293 | 0 | Ast::Literal(ref x) => { |
294 | 0 | self.push(HirFrame::Expr(self.hir_literal(x)?)); |
295 | | } |
296 | 0 | Ast::Dot(span) => { |
297 | 0 | self.push(HirFrame::Expr(self.hir_dot(span)?)); |
298 | | } |
299 | 0 | Ast::Assertion(ref x) => { |
300 | 0 | self.push(HirFrame::Expr(self.hir_assertion(x)?)); |
301 | | } |
302 | 0 | Ast::Class(ast::Class::Perl(ref x)) => { |
303 | 0 | if self.flags().unicode() { |
304 | 0 | let cls = self.hir_perl_unicode_class(x)?; |
305 | 0 | let hcls = hir::Class::Unicode(cls); |
306 | 0 | self.push(HirFrame::Expr(Hir::class(hcls))); |
307 | 0 | } else { |
308 | 0 | let cls = self.hir_perl_byte_class(x); |
309 | 0 | let hcls = hir::Class::Bytes(cls); |
310 | 0 | self.push(HirFrame::Expr(Hir::class(hcls))); |
311 | 0 | } |
312 | | } |
313 | 0 | Ast::Class(ast::Class::Unicode(ref x)) => { |
314 | 0 | let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); |
315 | 0 | self.push(HirFrame::Expr(Hir::class(cls))); |
316 | | } |
317 | 0 | Ast::Class(ast::Class::Bracketed(ref ast)) => { |
318 | 0 | if self.flags().unicode() { |
319 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
320 | 0 | self.unicode_fold_and_negate( |
321 | 0 | &ast.span, |
322 | 0 | ast.negated, |
323 | 0 | &mut cls, |
324 | 0 | )?; |
325 | 0 | if cls.ranges().is_empty() { |
326 | 0 | return Err(self.error( |
327 | 0 | ast.span, |
328 | 0 | ErrorKind::EmptyClassNotAllowed, |
329 | 0 | )); |
330 | 0 | } |
331 | 0 | let expr = Hir::class(hir::Class::Unicode(cls)); |
332 | 0 | self.push(HirFrame::Expr(expr)); |
333 | | } else { |
334 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
335 | 0 | self.bytes_fold_and_negate( |
336 | 0 | &ast.span, |
337 | 0 | ast.negated, |
338 | 0 | &mut cls, |
339 | 0 | )?; |
340 | 0 | if cls.ranges().is_empty() { |
341 | 0 | return Err(self.error( |
342 | 0 | ast.span, |
343 | 0 | ErrorKind::EmptyClassNotAllowed, |
344 | 0 | )); |
345 | 0 | } |
346 | | |
347 | 0 | let expr = Hir::class(hir::Class::Bytes(cls)); |
348 | 0 | self.push(HirFrame::Expr(expr)); |
349 | | } |
350 | | } |
351 | 0 | Ast::Repetition(ref x) => { |
352 | 0 | let expr = self.pop().unwrap().unwrap_expr(); |
353 | 0 | self.push(HirFrame::Expr(self.hir_repetition(x, expr))); |
354 | 0 | } |
355 | 0 | Ast::Group(ref x) => { |
356 | 0 | let expr = self.pop().unwrap().unwrap_expr(); |
357 | 0 | let old_flags = self.pop().unwrap().unwrap_group(); |
358 | 0 | self.trans().flags.set(old_flags); |
359 | 0 | self.push(HirFrame::Expr(self.hir_group(x, expr))); |
360 | 0 | } |
361 | | Ast::Concat(_) => { |
362 | 0 | let mut exprs = vec![]; |
363 | 0 | while let Some(HirFrame::Expr(expr)) = self.pop() { |
364 | 0 | if !expr.kind().is_empty() { |
365 | 0 | exprs.push(expr); |
366 | 0 | } |
367 | | } |
368 | 0 | exprs.reverse(); |
369 | 0 | self.push(HirFrame::Expr(Hir::concat(exprs))); |
370 | | } |
371 | | Ast::Alternation(_) => { |
372 | 0 | let mut exprs = vec![]; |
373 | 0 | while let Some(HirFrame::Expr(expr)) = self.pop() { |
374 | 0 | exprs.push(expr); |
375 | 0 | } |
376 | 0 | exprs.reverse(); |
377 | 0 | self.push(HirFrame::Expr(Hir::alternation(exprs))); |
378 | | } |
379 | | } |
380 | 0 | Ok(()) |
381 | 0 | } |
382 | | |
383 | 0 | fn visit_class_set_item_pre( |
384 | 0 | &mut self, |
385 | 0 | ast: &ast::ClassSetItem, |
386 | 0 | ) -> Result<()> { |
387 | 0 | match *ast { |
388 | | ast::ClassSetItem::Bracketed(_) => { |
389 | 0 | if self.flags().unicode() { |
390 | 0 | let cls = hir::ClassUnicode::empty(); |
391 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
392 | 0 | } else { |
393 | 0 | let cls = hir::ClassBytes::empty(); |
394 | 0 | self.push(HirFrame::ClassBytes(cls)); |
395 | 0 | } |
396 | | } |
397 | | // We needn't handle the Union case here since the visitor will |
398 | | // do it for us. |
399 | 0 | _ => {} |
400 | | } |
401 | 0 | Ok(()) |
402 | 0 | } |
403 | | |
404 | 0 | fn visit_class_set_item_post( |
405 | 0 | &mut self, |
406 | 0 | ast: &ast::ClassSetItem, |
407 | 0 | ) -> Result<()> { |
408 | 0 | match *ast { |
409 | 0 | ast::ClassSetItem::Empty(_) => {} |
410 | 0 | ast::ClassSetItem::Literal(ref x) => { |
411 | 0 | if self.flags().unicode() { |
412 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
413 | 0 | cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); |
414 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
415 | 0 | } else { |
416 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
417 | 0 | let byte = self.class_literal_byte(x)?; |
418 | 0 | cls.push(hir::ClassBytesRange::new(byte, byte)); |
419 | 0 | self.push(HirFrame::ClassBytes(cls)); |
420 | | } |
421 | | } |
422 | 0 | ast::ClassSetItem::Range(ref x) => { |
423 | 0 | if self.flags().unicode() { |
424 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
425 | 0 | cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); |
426 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
427 | 0 | } else { |
428 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
429 | 0 | let start = self.class_literal_byte(&x.start)?; |
430 | 0 | let end = self.class_literal_byte(&x.end)?; |
431 | 0 | cls.push(hir::ClassBytesRange::new(start, end)); |
432 | 0 | self.push(HirFrame::ClassBytes(cls)); |
433 | | } |
434 | | } |
435 | 0 | ast::ClassSetItem::Ascii(ref x) => { |
436 | 0 | if self.flags().unicode() { |
437 | 0 | let xcls = self.hir_ascii_unicode_class(x)?; |
438 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
439 | 0 | cls.union(&xcls); |
440 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
441 | | } else { |
442 | 0 | let xcls = self.hir_ascii_byte_class(x)?; |
443 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
444 | 0 | cls.union(&xcls); |
445 | 0 | self.push(HirFrame::ClassBytes(cls)); |
446 | | } |
447 | | } |
448 | 0 | ast::ClassSetItem::Unicode(ref x) => { |
449 | 0 | let xcls = self.hir_unicode_class(x)?; |
450 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
451 | 0 | cls.union(&xcls); |
452 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
453 | | } |
454 | 0 | ast::ClassSetItem::Perl(ref x) => { |
455 | 0 | if self.flags().unicode() { |
456 | 0 | let xcls = self.hir_perl_unicode_class(x)?; |
457 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
458 | 0 | cls.union(&xcls); |
459 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
460 | 0 | } else { |
461 | 0 | let xcls = self.hir_perl_byte_class(x); |
462 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
463 | 0 | cls.union(&xcls); |
464 | 0 | self.push(HirFrame::ClassBytes(cls)); |
465 | 0 | } |
466 | | } |
467 | 0 | ast::ClassSetItem::Bracketed(ref ast) => { |
468 | 0 | if self.flags().unicode() { |
469 | 0 | let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); |
470 | 0 | self.unicode_fold_and_negate( |
471 | 0 | &ast.span, |
472 | 0 | ast.negated, |
473 | 0 | &mut cls1, |
474 | 0 | )?; |
475 | | |
476 | 0 | let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); |
477 | 0 | cls2.union(&cls1); |
478 | 0 | self.push(HirFrame::ClassUnicode(cls2)); |
479 | | } else { |
480 | 0 | let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); |
481 | 0 | self.bytes_fold_and_negate( |
482 | 0 | &ast.span, |
483 | 0 | ast.negated, |
484 | 0 | &mut cls1, |
485 | 0 | )?; |
486 | | |
487 | 0 | let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); |
488 | 0 | cls2.union(&cls1); |
489 | 0 | self.push(HirFrame::ClassBytes(cls2)); |
490 | | } |
491 | | } |
492 | | // This is handled automatically by the visitor. |
493 | 0 | ast::ClassSetItem::Union(_) => {} |
494 | | } |
495 | 0 | Ok(()) |
496 | 0 | } |
497 | | |
498 | 0 | fn visit_class_set_binary_op_pre( |
499 | 0 | &mut self, |
500 | 0 | _op: &ast::ClassSetBinaryOp, |
501 | 0 | ) -> Result<()> { |
502 | 0 | if self.flags().unicode() { |
503 | 0 | let cls = hir::ClassUnicode::empty(); |
504 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
505 | 0 | } else { |
506 | 0 | let cls = hir::ClassBytes::empty(); |
507 | 0 | self.push(HirFrame::ClassBytes(cls)); |
508 | 0 | } |
509 | 0 | Ok(()) |
510 | 0 | } |
511 | | |
512 | 0 | fn visit_class_set_binary_op_in( |
513 | 0 | &mut self, |
514 | 0 | _op: &ast::ClassSetBinaryOp, |
515 | 0 | ) -> Result<()> { |
516 | 0 | if self.flags().unicode() { |
517 | 0 | let cls = hir::ClassUnicode::empty(); |
518 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
519 | 0 | } else { |
520 | 0 | let cls = hir::ClassBytes::empty(); |
521 | 0 | self.push(HirFrame::ClassBytes(cls)); |
522 | 0 | } |
523 | 0 | Ok(()) |
524 | 0 | } |
525 | | |
526 | 0 | fn visit_class_set_binary_op_post( |
527 | 0 | &mut self, |
528 | 0 | op: &ast::ClassSetBinaryOp, |
529 | 0 | ) -> Result<()> { |
530 | | use crate::ast::ClassSetBinaryOpKind::*; |
531 | | |
532 | 0 | if self.flags().unicode() { |
533 | 0 | let mut rhs = self.pop().unwrap().unwrap_class_unicode(); |
534 | 0 | let mut lhs = self.pop().unwrap().unwrap_class_unicode(); |
535 | 0 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
536 | 0 | if self.flags().case_insensitive() { |
537 | 0 | rhs.try_case_fold_simple().map_err(|_| { |
538 | 0 | self.error( |
539 | 0 | op.rhs.span().clone(), |
540 | 0 | ErrorKind::UnicodeCaseUnavailable, |
541 | | ) |
542 | 0 | })?; |
543 | 0 | lhs.try_case_fold_simple().map_err(|_| { |
544 | 0 | self.error( |
545 | 0 | op.lhs.span().clone(), |
546 | 0 | ErrorKind::UnicodeCaseUnavailable, |
547 | | ) |
548 | 0 | })?; |
549 | 0 | } |
550 | 0 | match op.kind { |
551 | 0 | Intersection => lhs.intersect(&rhs), |
552 | 0 | Difference => lhs.difference(&rhs), |
553 | 0 | SymmetricDifference => lhs.symmetric_difference(&rhs), |
554 | | } |
555 | 0 | cls.union(&lhs); |
556 | 0 | self.push(HirFrame::ClassUnicode(cls)); |
557 | | } else { |
558 | 0 | let mut rhs = self.pop().unwrap().unwrap_class_bytes(); |
559 | 0 | let mut lhs = self.pop().unwrap().unwrap_class_bytes(); |
560 | 0 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
561 | 0 | if self.flags().case_insensitive() { |
562 | 0 | rhs.case_fold_simple(); |
563 | 0 | lhs.case_fold_simple(); |
564 | 0 | } |
565 | 0 | match op.kind { |
566 | 0 | Intersection => lhs.intersect(&rhs), |
567 | 0 | Difference => lhs.difference(&rhs), |
568 | 0 | SymmetricDifference => lhs.symmetric_difference(&rhs), |
569 | | } |
570 | 0 | cls.union(&lhs); |
571 | 0 | self.push(HirFrame::ClassBytes(cls)); |
572 | | } |
573 | 0 | Ok(()) |
574 | 0 | } |
575 | | } |
576 | | |
577 | | /// The internal implementation of a translator. |
578 | | /// |
579 | | /// This type is responsible for carrying around the original pattern string, |
580 | | /// which is not tied to the internal state of a translator. |
581 | | /// |
582 | | /// A TranslatorI exists for the time it takes to translate a single Ast. |
583 | | #[derive(Clone, Debug)] |
584 | | struct TranslatorI<'t, 'p> { |
585 | | trans: &'t Translator, |
586 | | pattern: &'p str, |
587 | | } |
588 | | |
589 | | impl<'t, 'p> TranslatorI<'t, 'p> { |
590 | | /// Build a new internal translator. |
591 | 0 | fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { |
592 | 0 | TranslatorI { trans, pattern } |
593 | 0 | } |
594 | | |
595 | | /// Return a reference to the underlying translator. |
596 | 0 | fn trans(&self) -> &Translator { |
597 | 0 | &self.trans |
598 | 0 | } |
599 | | |
600 | | /// Push the given frame on to the call stack. |
601 | 0 | fn push(&self, frame: HirFrame) { |
602 | 0 | self.trans().stack.borrow_mut().push(frame); |
603 | 0 | } |
604 | | |
605 | | /// Pop the top of the call stack. If the call stack is empty, return None. |
606 | 0 | fn pop(&self) -> Option<HirFrame> { |
607 | 0 | self.trans().stack.borrow_mut().pop() |
608 | 0 | } |
609 | | |
610 | | /// Create a new error with the given span and error type. |
611 | 0 | fn error(&self, span: Span, kind: ErrorKind) -> Error { |
612 | 0 | Error { kind, pattern: self.pattern.to_string(), span } |
613 | 0 | } |
614 | | |
615 | | /// Return a copy of the active flags. |
616 | 0 | fn flags(&self) -> Flags { |
617 | 0 | self.trans().flags.get() |
618 | 0 | } |
619 | | |
620 | | /// Set the flags of this translator from the flags set in the given AST. |
621 | | /// Then, return the old flags. |
622 | 0 | fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { |
623 | 0 | let old_flags = self.flags(); |
624 | 0 | let mut new_flags = Flags::from_ast(ast_flags); |
625 | 0 | new_flags.merge(&old_flags); |
626 | 0 | self.trans().flags.set(new_flags); |
627 | 0 | old_flags |
628 | 0 | } |
629 | | |
630 | 0 | fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> { |
631 | 0 | let ch = match self.literal_to_char(lit)? { |
632 | 0 | byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), |
633 | 0 | hir::Literal::Unicode(ch) => ch, |
634 | | }; |
635 | 0 | if self.flags().case_insensitive() { |
636 | 0 | self.hir_from_char_case_insensitive(lit.span, ch) |
637 | | } else { |
638 | 0 | self.hir_from_char(lit.span, ch) |
639 | | } |
640 | 0 | } |
641 | | |
642 | | /// Convert an Ast literal to its scalar representation. |
643 | | /// |
644 | | /// When Unicode mode is enabled, then this always succeeds and returns a |
645 | | /// `char` (Unicode scalar value). |
646 | | /// |
647 | | /// When Unicode mode is disabled, then a raw byte is returned. If that |
648 | | /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns |
649 | | /// an error. |
650 | 0 | fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> { |
651 | 0 | if self.flags().unicode() { |
652 | 0 | return Ok(hir::Literal::Unicode(lit.c)); |
653 | 0 | } |
654 | 0 | let byte = match lit.byte() { |
655 | 0 | None => return Ok(hir::Literal::Unicode(lit.c)), |
656 | 0 | Some(byte) => byte, |
657 | | }; |
658 | 0 | if byte <= 0x7F { |
659 | 0 | return Ok(hir::Literal::Unicode(byte as char)); |
660 | 0 | } |
661 | 0 | if !self.trans().allow_invalid_utf8 { |
662 | 0 | return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); |
663 | 0 | } |
664 | 0 | Ok(hir::Literal::Byte(byte)) |
665 | 0 | } |
666 | | |
667 | 0 | fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> { |
668 | 0 | if !self.flags().unicode() && c.len_utf8() > 1 { |
669 | 0 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
670 | 0 | } |
671 | 0 | Ok(Hir::literal(hir::Literal::Unicode(c))) |
672 | 0 | } |
673 | | |
674 | 0 | fn hir_from_char_case_insensitive( |
675 | 0 | &self, |
676 | 0 | span: Span, |
677 | 0 | c: char, |
678 | 0 | ) -> Result<Hir> { |
679 | 0 | if self.flags().unicode() { |
680 | | // If case folding won't do anything, then don't bother trying. |
681 | 0 | let map = |
682 | 0 | unicode::contains_simple_case_mapping(c, c).map_err(|_| { |
683 | 0 | self.error(span, ErrorKind::UnicodeCaseUnavailable) |
684 | 0 | })?; |
685 | 0 | if !map { |
686 | 0 | return self.hir_from_char(span, c); |
687 | 0 | } |
688 | 0 | let mut cls = |
689 | 0 | hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( |
690 | 0 | c, c, |
691 | | )]); |
692 | 0 | cls.try_case_fold_simple().map_err(|_| { |
693 | 0 | self.error(span, ErrorKind::UnicodeCaseUnavailable) |
694 | 0 | })?; |
695 | 0 | Ok(Hir::class(hir::Class::Unicode(cls))) |
696 | | } else { |
697 | 0 | if c.len_utf8() > 1 { |
698 | 0 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
699 | 0 | } |
700 | | // If case folding won't do anything, then don't bother trying. |
701 | 0 | match c { |
702 | 0 | 'A'..='Z' | 'a'..='z' => {} |
703 | 0 | _ => return self.hir_from_char(span, c), |
704 | | } |
705 | 0 | let mut cls = |
706 | 0 | hir::ClassBytes::new(vec![hir::ClassBytesRange::new( |
707 | 0 | c as u8, c as u8, |
708 | | )]); |
709 | 0 | cls.case_fold_simple(); |
710 | 0 | Ok(Hir::class(hir::Class::Bytes(cls))) |
711 | | } |
712 | 0 | } |
713 | | |
714 | 0 | fn hir_dot(&self, span: Span) -> Result<Hir> { |
715 | 0 | let unicode = self.flags().unicode(); |
716 | 0 | if !unicode && !self.trans().allow_invalid_utf8 { |
717 | 0 | return Err(self.error(span, ErrorKind::InvalidUtf8)); |
718 | 0 | } |
719 | 0 | Ok(if self.flags().dot_matches_new_line() { |
720 | 0 | Hir::any(!unicode) |
721 | | } else { |
722 | 0 | Hir::dot(!unicode) |
723 | | }) |
724 | 0 | } |
725 | | |
726 | 0 | fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> { |
727 | 0 | let unicode = self.flags().unicode(); |
728 | 0 | let multi_line = self.flags().multi_line(); |
729 | 0 | Ok(match asst.kind { |
730 | 0 | ast::AssertionKind::StartLine => Hir::anchor(if multi_line { |
731 | 0 | hir::Anchor::StartLine |
732 | | } else { |
733 | 0 | hir::Anchor::StartText |
734 | | }), |
735 | 0 | ast::AssertionKind::EndLine => Hir::anchor(if multi_line { |
736 | 0 | hir::Anchor::EndLine |
737 | | } else { |
738 | 0 | hir::Anchor::EndText |
739 | | }), |
740 | | ast::AssertionKind::StartText => { |
741 | 0 | Hir::anchor(hir::Anchor::StartText) |
742 | | } |
743 | 0 | ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText), |
744 | | ast::AssertionKind::WordBoundary => { |
745 | 0 | Hir::word_boundary(if unicode { |
746 | 0 | hir::WordBoundary::Unicode |
747 | | } else { |
748 | 0 | hir::WordBoundary::Ascii |
749 | | }) |
750 | | } |
751 | | ast::AssertionKind::NotWordBoundary => { |
752 | 0 | Hir::word_boundary(if unicode { |
753 | 0 | hir::WordBoundary::UnicodeNegate |
754 | | } else { |
755 | | // It is possible for negated ASCII word boundaries to |
756 | | // match at invalid UTF-8 boundaries, even when searching |
757 | | // valid UTF-8. |
758 | 0 | if !self.trans().allow_invalid_utf8 { |
759 | 0 | return Err( |
760 | 0 | self.error(asst.span, ErrorKind::InvalidUtf8) |
761 | 0 | ); |
762 | 0 | } |
763 | 0 | hir::WordBoundary::AsciiNegate |
764 | | }) |
765 | | } |
766 | | }) |
767 | 0 | } |
768 | | |
769 | 0 | fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { |
770 | 0 | let kind = match group.kind { |
771 | 0 | ast::GroupKind::CaptureIndex(idx) => { |
772 | 0 | hir::GroupKind::CaptureIndex(idx) |
773 | | } |
774 | 0 | ast::GroupKind::CaptureName(ref capname) => { |
775 | 0 | hir::GroupKind::CaptureName { |
776 | 0 | name: capname.name.clone(), |
777 | 0 | index: capname.index, |
778 | 0 | } |
779 | | } |
780 | 0 | ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, |
781 | | }; |
782 | 0 | Hir::group(hir::Group { kind, hir: Box::new(expr) }) |
783 | 0 | } |
784 | | |
785 | 0 | fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { |
786 | 0 | let kind = match rep.op.kind { |
787 | 0 | ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, |
788 | 0 | ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, |
789 | 0 | ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, |
790 | 0 | ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { |
791 | 0 | hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) |
792 | | } |
793 | 0 | ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { |
794 | 0 | hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) |
795 | | } |
796 | | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( |
797 | 0 | m, |
798 | 0 | n, |
799 | | )) => { |
800 | 0 | hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) |
801 | | } |
802 | | }; |
803 | 0 | let greedy = |
804 | 0 | if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; |
805 | 0 | Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) |
806 | 0 | } |
807 | | |
808 | 0 | fn hir_unicode_class( |
809 | 0 | &self, |
810 | 0 | ast_class: &ast::ClassUnicode, |
811 | 0 | ) -> Result<hir::ClassUnicode> { |
812 | | use crate::ast::ClassUnicodeKind::*; |
813 | | |
814 | 0 | if !self.flags().unicode() { |
815 | 0 | return Err( |
816 | 0 | self.error(ast_class.span, ErrorKind::UnicodeNotAllowed) |
817 | 0 | ); |
818 | 0 | } |
819 | 0 | let query = match ast_class.kind { |
820 | 0 | OneLetter(name) => ClassQuery::OneLetter(name), |
821 | 0 | Named(ref name) => ClassQuery::Binary(name), |
822 | 0 | NamedValue { ref name, ref value, .. } => ClassQuery::ByValue { |
823 | 0 | property_name: name, |
824 | 0 | property_value: value, |
825 | 0 | }, |
826 | | }; |
827 | 0 | let mut result = self.convert_unicode_class_error( |
828 | 0 | &ast_class.span, |
829 | 0 | unicode::class(query), |
830 | | ); |
831 | 0 | if let Ok(ref mut class) = result { |
832 | 0 | self.unicode_fold_and_negate( |
833 | 0 | &ast_class.span, |
834 | 0 | ast_class.negated, |
835 | 0 | class, |
836 | 0 | )?; |
837 | 0 | if class.ranges().is_empty() { |
838 | 0 | let err = self |
839 | 0 | .error(ast_class.span, ErrorKind::EmptyClassNotAllowed); |
840 | 0 | return Err(err); |
841 | 0 | } |
842 | 0 | } |
843 | 0 | result |
844 | 0 | } |
845 | | |
846 | 0 | fn hir_ascii_unicode_class( |
847 | 0 | &self, |
848 | 0 | ast: &ast::ClassAscii, |
849 | 0 | ) -> Result<hir::ClassUnicode> { |
850 | 0 | let mut cls = hir::ClassUnicode::new( |
851 | 0 | ascii_class(&ast.kind) |
852 | 0 | .iter() |
853 | 0 | .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), |
854 | | ); |
855 | 0 | self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; |
856 | 0 | Ok(cls) |
857 | 0 | } |
858 | | |
859 | 0 | fn hir_ascii_byte_class( |
860 | 0 | &self, |
861 | 0 | ast: &ast::ClassAscii, |
862 | 0 | ) -> Result<hir::ClassBytes> { |
863 | 0 | let mut cls = hir::ClassBytes::new( |
864 | 0 | ascii_class(&ast.kind) |
865 | 0 | .iter() |
866 | 0 | .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), |
867 | | ); |
868 | 0 | self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; |
869 | 0 | Ok(cls) |
870 | 0 | } |
871 | | |
872 | 0 | fn hir_perl_unicode_class( |
873 | 0 | &self, |
874 | 0 | ast_class: &ast::ClassPerl, |
875 | 0 | ) -> Result<hir::ClassUnicode> { |
876 | | use crate::ast::ClassPerlKind::*; |
877 | | |
878 | 0 | assert!(self.flags().unicode()); |
879 | 0 | let result = match ast_class.kind { |
880 | 0 | Digit => unicode::perl_digit(), |
881 | 0 | Space => unicode::perl_space(), |
882 | 0 | Word => unicode::perl_word(), |
883 | | }; |
884 | 0 | let mut class = |
885 | 0 | self.convert_unicode_class_error(&ast_class.span, result)?; |
886 | | // We needn't apply case folding here because the Perl Unicode classes |
887 | | // are already closed under Unicode simple case folding. |
888 | 0 | if ast_class.negated { |
889 | 0 | class.negate(); |
890 | 0 | } |
891 | 0 | Ok(class) |
892 | 0 | } |
893 | | |
894 | 0 | fn hir_perl_byte_class( |
895 | 0 | &self, |
896 | 0 | ast_class: &ast::ClassPerl, |
897 | 0 | ) -> hir::ClassBytes { |
898 | | use crate::ast::ClassPerlKind::*; |
899 | | |
900 | 0 | assert!(!self.flags().unicode()); |
901 | 0 | let mut class = match ast_class.kind { |
902 | 0 | Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), |
903 | 0 | Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), |
904 | 0 | Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), |
905 | | }; |
906 | | // We needn't apply case folding here because the Perl ASCII classes |
907 | | // are already closed (under ASCII case folding). |
908 | 0 | if ast_class.negated { |
909 | 0 | class.negate(); |
910 | 0 | } |
911 | 0 | class |
912 | 0 | } |
913 | | |
914 | | /// Converts the given Unicode specific error to an HIR translation error. |
915 | | /// |
916 | | /// The span given should approximate the position at which an error would |
917 | | /// occur. |
918 | 0 | fn convert_unicode_class_error( |
919 | 0 | &self, |
920 | 0 | span: &Span, |
921 | 0 | result: unicode::Result<hir::ClassUnicode>, |
922 | 0 | ) -> Result<hir::ClassUnicode> { |
923 | 0 | result.map_err(|err| { |
924 | 0 | let sp = span.clone(); |
925 | 0 | match err { |
926 | | unicode::Error::PropertyNotFound => { |
927 | 0 | self.error(sp, ErrorKind::UnicodePropertyNotFound) |
928 | | } |
929 | | unicode::Error::PropertyValueNotFound => { |
930 | 0 | self.error(sp, ErrorKind::UnicodePropertyValueNotFound) |
931 | | } |
932 | | unicode::Error::PerlClassNotFound => { |
933 | 0 | self.error(sp, ErrorKind::UnicodePerlClassNotFound) |
934 | | } |
935 | | } |
936 | 0 | }) |
937 | 0 | } |
938 | | |
939 | 0 | fn unicode_fold_and_negate( |
940 | 0 | &self, |
941 | 0 | span: &Span, |
942 | 0 | negated: bool, |
943 | 0 | class: &mut hir::ClassUnicode, |
944 | 0 | ) -> Result<()> { |
945 | | // Note that we must apply case folding before negation! |
946 | | // Consider `(?i)[^x]`. If we applied negation field, then |
947 | | // the result would be the character class that matched any |
948 | | // Unicode scalar value. |
949 | 0 | if self.flags().case_insensitive() { |
950 | 0 | class.try_case_fold_simple().map_err(|_| { |
951 | 0 | self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable) |
952 | 0 | })?; |
953 | 0 | } |
954 | 0 | if negated { |
955 | 0 | class.negate(); |
956 | 0 | } |
957 | 0 | Ok(()) |
958 | 0 | } |
959 | | |
960 | 0 | fn bytes_fold_and_negate( |
961 | 0 | &self, |
962 | 0 | span: &Span, |
963 | 0 | negated: bool, |
964 | 0 | class: &mut hir::ClassBytes, |
965 | 0 | ) -> Result<()> { |
966 | | // Note that we must apply case folding before negation! |
967 | | // Consider `(?i)[^x]`. If we applied negation first, then |
968 | | // the result would be the character class that matched any |
969 | | // Unicode scalar value. |
970 | 0 | if self.flags().case_insensitive() { |
971 | 0 | class.case_fold_simple(); |
972 | 0 | } |
973 | 0 | if negated { |
974 | 0 | class.negate(); |
975 | 0 | } |
976 | 0 | if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { |
977 | 0 | return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); |
978 | 0 | } |
979 | 0 | Ok(()) |
980 | 0 | } |
981 | | |
982 | | /// Return a scalar byte value suitable for use as a literal in a byte |
983 | | /// character class. |
984 | 0 | fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> { |
985 | 0 | match self.literal_to_char(ast)? { |
986 | 0 | hir::Literal::Byte(byte) => Ok(byte), |
987 | 0 | hir::Literal::Unicode(ch) => { |
988 | 0 | if ch <= 0x7F as char { |
989 | 0 | Ok(ch as u8) |
990 | | } else { |
991 | | // We can't feasibly support Unicode in |
992 | | // byte oriented classes. Byte classes don't |
993 | | // do Unicode case folding. |
994 | 0 | Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) |
995 | | } |
996 | | } |
997 | | } |
998 | 0 | } |
999 | | } |
1000 | | |
1001 | | /// A translator's representation of a regular expression's flags at any given |
1002 | | /// moment in time. |
1003 | | /// |
1004 | | /// Each flag can be in one of three states: absent, present but disabled or |
1005 | | /// present but enabled. |
1006 | | #[derive(Clone, Copy, Debug, Default)] |
1007 | | struct Flags { |
1008 | | case_insensitive: Option<bool>, |
1009 | | multi_line: Option<bool>, |
1010 | | dot_matches_new_line: Option<bool>, |
1011 | | swap_greed: Option<bool>, |
1012 | | unicode: Option<bool>, |
1013 | | // Note that `ignore_whitespace` is omitted here because it is handled |
1014 | | // entirely in the parser. |
1015 | | } |
1016 | | |
1017 | | impl Flags { |
1018 | 0 | fn from_ast(ast: &ast::Flags) -> Flags { |
1019 | 0 | let mut flags = Flags::default(); |
1020 | 0 | let mut enable = true; |
1021 | 0 | for item in &ast.items { |
1022 | 0 | match item.kind { |
1023 | 0 | ast::FlagsItemKind::Negation => { |
1024 | 0 | enable = false; |
1025 | 0 | } |
1026 | 0 | ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { |
1027 | 0 | flags.case_insensitive = Some(enable); |
1028 | 0 | } |
1029 | 0 | ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { |
1030 | 0 | flags.multi_line = Some(enable); |
1031 | 0 | } |
1032 | 0 | ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { |
1033 | 0 | flags.dot_matches_new_line = Some(enable); |
1034 | 0 | } |
1035 | 0 | ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { |
1036 | 0 | flags.swap_greed = Some(enable); |
1037 | 0 | } |
1038 | 0 | ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { |
1039 | 0 | flags.unicode = Some(enable); |
1040 | 0 | } |
1041 | 0 | ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} |
1042 | | } |
1043 | | } |
1044 | 0 | flags |
1045 | 0 | } |
1046 | | |
1047 | 0 | fn merge(&mut self, previous: &Flags) { |
1048 | 0 | if self.case_insensitive.is_none() { |
1049 | 0 | self.case_insensitive = previous.case_insensitive; |
1050 | 0 | } |
1051 | 0 | if self.multi_line.is_none() { |
1052 | 0 | self.multi_line = previous.multi_line; |
1053 | 0 | } |
1054 | 0 | if self.dot_matches_new_line.is_none() { |
1055 | 0 | self.dot_matches_new_line = previous.dot_matches_new_line; |
1056 | 0 | } |
1057 | 0 | if self.swap_greed.is_none() { |
1058 | 0 | self.swap_greed = previous.swap_greed; |
1059 | 0 | } |
1060 | 0 | if self.unicode.is_none() { |
1061 | 0 | self.unicode = previous.unicode; |
1062 | 0 | } |
1063 | 0 | } |
1064 | | |
1065 | 0 | fn case_insensitive(&self) -> bool { |
1066 | 0 | self.case_insensitive.unwrap_or(false) |
1067 | 0 | } |
1068 | | |
1069 | 0 | fn multi_line(&self) -> bool { |
1070 | 0 | self.multi_line.unwrap_or(false) |
1071 | 0 | } |
1072 | | |
1073 | 0 | fn dot_matches_new_line(&self) -> bool { |
1074 | 0 | self.dot_matches_new_line.unwrap_or(false) |
1075 | 0 | } |
1076 | | |
1077 | 0 | fn swap_greed(&self) -> bool { |
1078 | 0 | self.swap_greed.unwrap_or(false) |
1079 | 0 | } |
1080 | | |
1081 | 0 | fn unicode(&self) -> bool { |
1082 | 0 | self.unicode.unwrap_or(true) |
1083 | 0 | } |
1084 | | } |
1085 | | |
1086 | 0 | fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { |
1087 | 0 | let ranges: Vec<_> = ascii_class(kind) |
1088 | 0 | .iter() |
1089 | 0 | .cloned() |
1090 | 0 | .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)) |
1091 | 0 | .collect(); |
1092 | 0 | hir::ClassBytes::new(ranges) |
1093 | 0 | } |
1094 | | |
1095 | 0 | fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { |
1096 | | use crate::ast::ClassAsciiKind::*; |
1097 | 0 | match *kind { |
1098 | 0 | Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')], |
1099 | 0 | Alpha => &[('A', 'Z'), ('a', 'z')], |
1100 | 0 | Ascii => &[('\x00', '\x7F')], |
1101 | 0 | Blank => &[('\t', '\t'), (' ', ' ')], |
1102 | 0 | Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')], |
1103 | 0 | Digit => &[('0', '9')], |
1104 | 0 | Graph => &[('!', '~')], |
1105 | 0 | Lower => &[('a', 'z')], |
1106 | 0 | Print => &[(' ', '~')], |
1107 | 0 | Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')], |
1108 | 0 | Space => &[ |
1109 | 0 | ('\t', '\t'), |
1110 | 0 | ('\n', '\n'), |
1111 | 0 | ('\x0B', '\x0B'), |
1112 | 0 | ('\x0C', '\x0C'), |
1113 | 0 | ('\r', '\r'), |
1114 | 0 | (' ', ' '), |
1115 | 0 | ], |
1116 | 0 | Upper => &[('A', 'Z')], |
1117 | 0 | Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')], |
1118 | 0 | Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')], |
1119 | | } |
1120 | 0 | } |
1121 | | |
1122 | | #[cfg(test)] |
1123 | | mod tests { |
1124 | | use crate::ast::parse::ParserBuilder; |
1125 | | use crate::ast::{self, Ast, Position, Span}; |
1126 | | use crate::hir::{self, Hir, HirKind}; |
1127 | | use crate::unicode::{self, ClassQuery}; |
1128 | | |
1129 | | use super::{ascii_class, TranslatorBuilder}; |
1130 | | |
1131 | | // We create these errors to compare with real hir::Errors in the tests. |
1132 | | // We define equality between TestError and hir::Error to disregard the |
1133 | | // pattern string in hir::Error, which is annoying to provide in tests. |
1134 | | #[derive(Clone, Debug)] |
1135 | | struct TestError { |
1136 | | span: Span, |
1137 | | kind: hir::ErrorKind, |
1138 | | } |
1139 | | |
1140 | | impl PartialEq<hir::Error> for TestError { |
1141 | | fn eq(&self, other: &hir::Error) -> bool { |
1142 | | self.span == other.span && self.kind == other.kind |
1143 | | } |
1144 | | } |
1145 | | |
1146 | | impl PartialEq<TestError> for hir::Error { |
1147 | | fn eq(&self, other: &TestError) -> bool { |
1148 | | self.span == other.span && self.kind == other.kind |
1149 | | } |
1150 | | } |
1151 | | |
1152 | | fn parse(pattern: &str) -> Ast { |
1153 | | ParserBuilder::new().octal(true).build().parse(pattern).unwrap() |
1154 | | } |
1155 | | |
1156 | | fn t(pattern: &str) -> Hir { |
1157 | | TranslatorBuilder::new() |
1158 | | .allow_invalid_utf8(false) |
1159 | | .build() |
1160 | | .translate(pattern, &parse(pattern)) |
1161 | | .unwrap() |
1162 | | } |
1163 | | |
1164 | | fn t_err(pattern: &str) -> hir::Error { |
1165 | | TranslatorBuilder::new() |
1166 | | .allow_invalid_utf8(false) |
1167 | | .build() |
1168 | | .translate(pattern, &parse(pattern)) |
1169 | | .unwrap_err() |
1170 | | } |
1171 | | |
1172 | | fn t_bytes(pattern: &str) -> Hir { |
1173 | | TranslatorBuilder::new() |
1174 | | .allow_invalid_utf8(true) |
1175 | | .build() |
1176 | | .translate(pattern, &parse(pattern)) |
1177 | | .unwrap() |
1178 | | } |
1179 | | |
1180 | | fn hir_lit(s: &str) -> Hir { |
1181 | | match s.len() { |
1182 | | 0 => Hir::empty(), |
1183 | | _ => { |
1184 | | let lits = s |
1185 | | .chars() |
1186 | | .map(hir::Literal::Unicode) |
1187 | | .map(Hir::literal) |
1188 | | .collect(); |
1189 | | Hir::concat(lits) |
1190 | | } |
1191 | | } |
1192 | | } |
1193 | | |
1194 | | fn hir_blit(s: &[u8]) -> Hir { |
1195 | | match s.len() { |
1196 | | 0 => Hir::empty(), |
1197 | | 1 => Hir::literal(hir::Literal::Byte(s[0])), |
1198 | | _ => { |
1199 | | let lits = s |
1200 | | .iter() |
1201 | | .cloned() |
1202 | | .map(hir::Literal::Byte) |
1203 | | .map(Hir::literal) |
1204 | | .collect(); |
1205 | | Hir::concat(lits) |
1206 | | } |
1207 | | } |
1208 | | } |
1209 | | |
1210 | | fn hir_group(i: u32, expr: Hir) -> Hir { |
1211 | | Hir::group(hir::Group { |
1212 | | kind: hir::GroupKind::CaptureIndex(i), |
1213 | | hir: Box::new(expr), |
1214 | | }) |
1215 | | } |
1216 | | |
1217 | | fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { |
1218 | | Hir::group(hir::Group { |
1219 | | kind: hir::GroupKind::CaptureName { |
1220 | | name: name.to_string(), |
1221 | | index: i, |
1222 | | }, |
1223 | | hir: Box::new(expr), |
1224 | | }) |
1225 | | } |
1226 | | |
1227 | | fn hir_group_nocap(expr: Hir) -> Hir { |
1228 | | Hir::group(hir::Group { |
1229 | | kind: hir::GroupKind::NonCapturing, |
1230 | | hir: Box::new(expr), |
1231 | | }) |
1232 | | } |
1233 | | |
1234 | | fn hir_quest(greedy: bool, expr: Hir) -> Hir { |
1235 | | Hir::repetition(hir::Repetition { |
1236 | | kind: hir::RepetitionKind::ZeroOrOne, |
1237 | | greedy, |
1238 | | hir: Box::new(expr), |
1239 | | }) |
1240 | | } |
1241 | | |
1242 | | fn hir_star(greedy: bool, expr: Hir) -> Hir { |
1243 | | Hir::repetition(hir::Repetition { |
1244 | | kind: hir::RepetitionKind::ZeroOrMore, |
1245 | | greedy, |
1246 | | hir: Box::new(expr), |
1247 | | }) |
1248 | | } |
1249 | | |
1250 | | fn hir_plus(greedy: bool, expr: Hir) -> Hir { |
1251 | | Hir::repetition(hir::Repetition { |
1252 | | kind: hir::RepetitionKind::OneOrMore, |
1253 | | greedy, |
1254 | | hir: Box::new(expr), |
1255 | | }) |
1256 | | } |
1257 | | |
1258 | | fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { |
1259 | | Hir::repetition(hir::Repetition { |
1260 | | kind: hir::RepetitionKind::Range(range), |
1261 | | greedy, |
1262 | | hir: Box::new(expr), |
1263 | | }) |
1264 | | } |
1265 | | |
1266 | | fn hir_alt(alts: Vec<Hir>) -> Hir { |
1267 | | Hir::alternation(alts) |
1268 | | } |
1269 | | |
1270 | | fn hir_cat(exprs: Vec<Hir>) -> Hir { |
1271 | | Hir::concat(exprs) |
1272 | | } |
1273 | | |
1274 | | #[allow(dead_code)] |
1275 | | fn hir_uclass_query(query: ClassQuery<'_>) -> Hir { |
1276 | | Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) |
1277 | | } |
1278 | | |
1279 | | #[allow(dead_code)] |
1280 | | fn hir_uclass_perl_word() -> Hir { |
1281 | | Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) |
1282 | | } |
1283 | | |
1284 | | fn hir_uclass(ranges: &[(char, char)]) -> Hir { |
1285 | | let ranges: Vec<hir::ClassUnicodeRange> = ranges |
1286 | | .iter() |
1287 | | .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) |
1288 | | .collect(); |
1289 | | Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) |
1290 | | } |
1291 | | |
1292 | | fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { |
1293 | | let ranges: Vec<hir::ClassBytesRange> = ranges |
1294 | | .iter() |
1295 | | .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) |
1296 | | .collect(); |
1297 | | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
1298 | | } |
1299 | | |
1300 | | fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { |
1301 | | let ranges: Vec<hir::ClassBytesRange> = ranges |
1302 | | .iter() |
1303 | | .map(|&(s, e)| { |
1304 | | assert!(s as u32 <= 0x7F); |
1305 | | assert!(e as u32 <= 0x7F); |
1306 | | hir::ClassBytesRange::new(s as u8, e as u8) |
1307 | | }) |
1308 | | .collect(); |
1309 | | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
1310 | | } |
1311 | | |
1312 | | fn hir_case_fold(expr: Hir) -> Hir { |
1313 | | match expr.into_kind() { |
1314 | | HirKind::Class(mut cls) => { |
1315 | | cls.case_fold_simple(); |
1316 | | Hir::class(cls) |
1317 | | } |
1318 | | _ => panic!("cannot case fold non-class Hir expr"), |
1319 | | } |
1320 | | } |
1321 | | |
1322 | | fn hir_negate(expr: Hir) -> Hir { |
1323 | | match expr.into_kind() { |
1324 | | HirKind::Class(mut cls) => { |
1325 | | cls.negate(); |
1326 | | Hir::class(cls) |
1327 | | } |
1328 | | _ => panic!("cannot negate non-class Hir expr"), |
1329 | | } |
1330 | | } |
1331 | | |
1332 | | #[allow(dead_code)] |
1333 | | fn hir_union(expr1: Hir, expr2: Hir) -> Hir { |
1334 | | use crate::hir::Class::{Bytes, Unicode}; |
1335 | | |
1336 | | match (expr1.into_kind(), expr2.into_kind()) { |
1337 | | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
1338 | | c1.union(&c2); |
1339 | | Hir::class(hir::Class::Unicode(c1)) |
1340 | | } |
1341 | | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
1342 | | c1.union(&c2); |
1343 | | Hir::class(hir::Class::Bytes(c1)) |
1344 | | } |
1345 | | _ => panic!("cannot union non-class Hir exprs"), |
1346 | | } |
1347 | | } |
1348 | | |
1349 | | #[allow(dead_code)] |
1350 | | fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { |
1351 | | use crate::hir::Class::{Bytes, Unicode}; |
1352 | | |
1353 | | match (expr1.into_kind(), expr2.into_kind()) { |
1354 | | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
1355 | | c1.difference(&c2); |
1356 | | Hir::class(hir::Class::Unicode(c1)) |
1357 | | } |
1358 | | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
1359 | | c1.difference(&c2); |
1360 | | Hir::class(hir::Class::Bytes(c1)) |
1361 | | } |
1362 | | _ => panic!("cannot difference non-class Hir exprs"), |
1363 | | } |
1364 | | } |
1365 | | |
1366 | | fn hir_anchor(anchor: hir::Anchor) -> Hir { |
1367 | | Hir::anchor(anchor) |
1368 | | } |
1369 | | |
1370 | | fn hir_word(wb: hir::WordBoundary) -> Hir { |
1371 | | Hir::word_boundary(wb) |
1372 | | } |
1373 | | |
1374 | | #[test] |
1375 | | fn empty() { |
1376 | | assert_eq!(t(""), Hir::empty()); |
1377 | | assert_eq!(t("(?i)"), Hir::empty()); |
1378 | | assert_eq!(t("()"), hir_group(1, Hir::empty())); |
1379 | | assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); |
1380 | | assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty())); |
1381 | | assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); |
1382 | | assert_eq!( |
1383 | | t("()|()"), |
1384 | | hir_alt(vec![ |
1385 | | hir_group(1, Hir::empty()), |
1386 | | hir_group(2, Hir::empty()), |
1387 | | ]) |
1388 | | ); |
1389 | | assert_eq!( |
1390 | | t("(|b)"), |
1391 | | hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) |
1392 | | ); |
1393 | | assert_eq!( |
1394 | | t("(a|)"), |
1395 | | hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) |
1396 | | ); |
1397 | | assert_eq!( |
1398 | | t("(a||c)"), |
1399 | | hir_group( |
1400 | | 1, |
1401 | | hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) |
1402 | | ) |
1403 | | ); |
1404 | | assert_eq!( |
1405 | | t("(||)"), |
1406 | | hir_group( |
1407 | | 1, |
1408 | | hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) |
1409 | | ) |
1410 | | ); |
1411 | | } |
1412 | | |
1413 | | #[test] |
1414 | | fn literal() { |
1415 | | assert_eq!(t("a"), hir_lit("a")); |
1416 | | assert_eq!(t("(?-u)a"), hir_lit("a")); |
1417 | | assert_eq!(t("☃"), hir_lit("☃")); |
1418 | | assert_eq!(t("abcd"), hir_lit("abcd")); |
1419 | | |
1420 | | assert_eq!(t_bytes("(?-u)a"), hir_lit("a")); |
1421 | | assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a")); |
1422 | | assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); |
1423 | | assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); |
1424 | | |
1425 | | assert_eq!( |
1426 | | t_err("(?-u)☃"), |
1427 | | TestError { |
1428 | | kind: hir::ErrorKind::UnicodeNotAllowed, |
1429 | | span: Span::new( |
1430 | | Position::new(5, 1, 6), |
1431 | | Position::new(8, 1, 7) |
1432 | | ), |
1433 | | } |
1434 | | ); |
1435 | | assert_eq!( |
1436 | | t_err(r"(?-u)\xFF"), |
1437 | | TestError { |
1438 | | kind: hir::ErrorKind::InvalidUtf8, |
1439 | | span: Span::new( |
1440 | | Position::new(5, 1, 6), |
1441 | | Position::new(9, 1, 10) |
1442 | | ), |
1443 | | } |
1444 | | ); |
1445 | | } |
1446 | | |
1447 | | #[test] |
1448 | | fn literal_case_insensitive() { |
1449 | | #[cfg(feature = "unicode-case")] |
1450 | | assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),])); |
1451 | | #[cfg(feature = "unicode-case")] |
1452 | | assert_eq!( |
1453 | | t("(?i:a)"), |
1454 | | hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],)) |
1455 | | ); |
1456 | | #[cfg(feature = "unicode-case")] |
1457 | | assert_eq!( |
1458 | | t("a(?i)a(?-i)a"), |
1459 | | hir_cat(vec![ |
1460 | | hir_lit("a"), |
1461 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1462 | | hir_lit("a"), |
1463 | | ]) |
1464 | | ); |
1465 | | #[cfg(feature = "unicode-case")] |
1466 | | assert_eq!( |
1467 | | t("(?i)ab@c"), |
1468 | | hir_cat(vec![ |
1469 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1470 | | hir_uclass(&[('B', 'B'), ('b', 'b')]), |
1471 | | hir_lit("@"), |
1472 | | hir_uclass(&[('C', 'C'), ('c', 'c')]), |
1473 | | ]) |
1474 | | ); |
1475 | | #[cfg(feature = "unicode-case")] |
1476 | | assert_eq!( |
1477 | | t("(?i)β"), |
1478 | | hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) |
1479 | | ); |
1480 | | |
1481 | | assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])); |
1482 | | #[cfg(feature = "unicode-case")] |
1483 | | assert_eq!( |
1484 | | t("(?-u)a(?i)a(?-i)a"), |
1485 | | hir_cat(vec![ |
1486 | | hir_lit("a"), |
1487 | | hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), |
1488 | | hir_lit("a"), |
1489 | | ]) |
1490 | | ); |
1491 | | assert_eq!( |
1492 | | t("(?i-u)ab@c"), |
1493 | | hir_cat(vec![ |
1494 | | hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), |
1495 | | hir_bclass(&[(b'B', b'B'), (b'b', b'b')]), |
1496 | | hir_lit("@"), |
1497 | | hir_bclass(&[(b'C', b'C'), (b'c', b'c')]), |
1498 | | ]) |
1499 | | ); |
1500 | | |
1501 | | assert_eq!( |
1502 | | t_bytes("(?i-u)a"), |
1503 | | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) |
1504 | | ); |
1505 | | assert_eq!( |
1506 | | t_bytes("(?i-u)\x61"), |
1507 | | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) |
1508 | | ); |
1509 | | assert_eq!( |
1510 | | t_bytes(r"(?i-u)\x61"), |
1511 | | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) |
1512 | | ); |
1513 | | assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); |
1514 | | |
1515 | | assert_eq!( |
1516 | | t_err("(?i-u)β"), |
1517 | | TestError { |
1518 | | kind: hir::ErrorKind::UnicodeNotAllowed, |
1519 | | span: Span::new( |
1520 | | Position::new(6, 1, 7), |
1521 | | Position::new(8, 1, 8), |
1522 | | ), |
1523 | | } |
1524 | | ); |
1525 | | } |
1526 | | |
1527 | | #[test] |
1528 | | fn dot() { |
1529 | | assert_eq!( |
1530 | | t("."), |
1531 | | hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),]) |
1532 | | ); |
1533 | | assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),])); |
1534 | | assert_eq!( |
1535 | | t_bytes("(?-u)."), |
1536 | | hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),]) |
1537 | | ); |
1538 | | assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); |
1539 | | |
1540 | | // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. |
1541 | | assert_eq!( |
1542 | | t_err("(?-u)."), |
1543 | | TestError { |
1544 | | kind: hir::ErrorKind::InvalidUtf8, |
1545 | | span: Span::new( |
1546 | | Position::new(5, 1, 6), |
1547 | | Position::new(6, 1, 7) |
1548 | | ), |
1549 | | } |
1550 | | ); |
1551 | | assert_eq!( |
1552 | | t_err("(?s-u)."), |
1553 | | TestError { |
1554 | | kind: hir::ErrorKind::InvalidUtf8, |
1555 | | span: Span::new( |
1556 | | Position::new(6, 1, 7), |
1557 | | Position::new(7, 1, 8) |
1558 | | ), |
1559 | | } |
1560 | | ); |
1561 | | } |
1562 | | |
1563 | | #[test] |
1564 | | fn assertions() { |
1565 | | assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText)); |
1566 | | assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText)); |
1567 | | assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText)); |
1568 | | assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText)); |
1569 | | assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine)); |
1570 | | assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine)); |
1571 | | assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText)); |
1572 | | assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText)); |
1573 | | |
1574 | | assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode)); |
1575 | | assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate)); |
1576 | | assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii)); |
1577 | | assert_eq!( |
1578 | | t_bytes(r"(?-u)\B"), |
1579 | | hir_word(hir::WordBoundary::AsciiNegate) |
1580 | | ); |
1581 | | |
1582 | | assert_eq!( |
1583 | | t_err(r"(?-u)\B"), |
1584 | | TestError { |
1585 | | kind: hir::ErrorKind::InvalidUtf8, |
1586 | | span: Span::new( |
1587 | | Position::new(5, 1, 6), |
1588 | | Position::new(7, 1, 8) |
1589 | | ), |
1590 | | } |
1591 | | ); |
1592 | | } |
1593 | | |
1594 | | #[test] |
1595 | | fn group() { |
1596 | | assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); |
1597 | | assert_eq!( |
1598 | | t("(a)(b)"), |
1599 | | hir_cat(vec![ |
1600 | | hir_group(1, hir_lit("a")), |
1601 | | hir_group(2, hir_lit("b")), |
1602 | | ]) |
1603 | | ); |
1604 | | assert_eq!( |
1605 | | t("(a)|(b)"), |
1606 | | hir_alt(vec![ |
1607 | | hir_group(1, hir_lit("a")), |
1608 | | hir_group(2, hir_lit("b")), |
1609 | | ]) |
1610 | | ); |
1611 | | assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty())); |
1612 | | assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a"))); |
1613 | | assert_eq!( |
1614 | | t("(?P<foo>a)(?P<bar>b)"), |
1615 | | hir_cat(vec![ |
1616 | | hir_group_name(1, "foo", hir_lit("a")), |
1617 | | hir_group_name(2, "bar", hir_lit("b")), |
1618 | | ]) |
1619 | | ); |
1620 | | assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); |
1621 | | assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a"))); |
1622 | | assert_eq!( |
1623 | | t("(?:a)(b)"), |
1624 | | hir_cat(vec![ |
1625 | | hir_group_nocap(hir_lit("a")), |
1626 | | hir_group(1, hir_lit("b")), |
1627 | | ]) |
1628 | | ); |
1629 | | assert_eq!( |
1630 | | t("(a)(?:b)(c)"), |
1631 | | hir_cat(vec![ |
1632 | | hir_group(1, hir_lit("a")), |
1633 | | hir_group_nocap(hir_lit("b")), |
1634 | | hir_group(2, hir_lit("c")), |
1635 | | ]) |
1636 | | ); |
1637 | | assert_eq!( |
1638 | | t("(a)(?P<foo>b)(c)"), |
1639 | | hir_cat(vec![ |
1640 | | hir_group(1, hir_lit("a")), |
1641 | | hir_group_name(2, "foo", hir_lit("b")), |
1642 | | hir_group(3, hir_lit("c")), |
1643 | | ]) |
1644 | | ); |
1645 | | assert_eq!(t("()"), hir_group(1, Hir::empty())); |
1646 | | assert_eq!(t("((?i))"), hir_group(1, Hir::empty())); |
1647 | | assert_eq!(t("((?x))"), hir_group(1, Hir::empty())); |
1648 | | assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty()))); |
1649 | | } |
1650 | | |
1651 | | #[test] |
1652 | | fn flags() { |
1653 | | #[cfg(feature = "unicode-case")] |
1654 | | assert_eq!( |
1655 | | t("(?i:a)a"), |
1656 | | hir_cat(vec![ |
1657 | | hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])), |
1658 | | hir_lit("a"), |
1659 | | ]) |
1660 | | ); |
1661 | | assert_eq!( |
1662 | | t("(?i-u:a)β"), |
1663 | | hir_cat(vec![ |
1664 | | hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), |
1665 | | hir_lit("β"), |
1666 | | ]) |
1667 | | ); |
1668 | | assert_eq!( |
1669 | | t("(?:(?i-u)a)b"), |
1670 | | hir_cat(vec![ |
1671 | | hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), |
1672 | | hir_lit("b"), |
1673 | | ]) |
1674 | | ); |
1675 | | assert_eq!( |
1676 | | t("((?i-u)a)b"), |
1677 | | hir_cat(vec![ |
1678 | | hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), |
1679 | | hir_lit("b"), |
1680 | | ]) |
1681 | | ); |
1682 | | #[cfg(feature = "unicode-case")] |
1683 | | assert_eq!( |
1684 | | t("(?i)(?-i:a)a"), |
1685 | | hir_cat(vec![ |
1686 | | hir_group_nocap(hir_lit("a")), |
1687 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1688 | | ]) |
1689 | | ); |
1690 | | #[cfg(feature = "unicode-case")] |
1691 | | assert_eq!( |
1692 | | t("(?im)a^"), |
1693 | | hir_cat(vec![ |
1694 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1695 | | hir_anchor(hir::Anchor::StartLine), |
1696 | | ]) |
1697 | | ); |
1698 | | #[cfg(feature = "unicode-case")] |
1699 | | assert_eq!( |
1700 | | t("(?im)a^(?i-m)a^"), |
1701 | | hir_cat(vec![ |
1702 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1703 | | hir_anchor(hir::Anchor::StartLine), |
1704 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1705 | | hir_anchor(hir::Anchor::StartText), |
1706 | | ]) |
1707 | | ); |
1708 | | assert_eq!( |
1709 | | t("(?U)a*a*?(?-U)a*a*?"), |
1710 | | hir_cat(vec![ |
1711 | | hir_star(false, hir_lit("a")), |
1712 | | hir_star(true, hir_lit("a")), |
1713 | | hir_star(true, hir_lit("a")), |
1714 | | hir_star(false, hir_lit("a")), |
1715 | | ]) |
1716 | | ); |
1717 | | #[cfg(feature = "unicode-case")] |
1718 | | assert_eq!( |
1719 | | t("(?:a(?i)a)a"), |
1720 | | hir_cat(vec![ |
1721 | | hir_group_nocap(hir_cat(vec![ |
1722 | | hir_lit("a"), |
1723 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1724 | | ])), |
1725 | | hir_lit("a"), |
1726 | | ]) |
1727 | | ); |
1728 | | #[cfg(feature = "unicode-case")] |
1729 | | assert_eq!( |
1730 | | t("(?i)(?:a(?-i)a)a"), |
1731 | | hir_cat(vec![ |
1732 | | hir_group_nocap(hir_cat(vec![ |
1733 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1734 | | hir_lit("a"), |
1735 | | ])), |
1736 | | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
1737 | | ]) |
1738 | | ); |
1739 | | } |
1740 | | |
1741 | | #[test] |
1742 | | fn escape() { |
1743 | | assert_eq!( |
1744 | | t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), |
1745 | | hir_lit(r"\.+*?()|[]{}^$#") |
1746 | | ); |
1747 | | } |
1748 | | |
1749 | | #[test] |
1750 | | fn repetition() { |
1751 | | assert_eq!(t("a?"), hir_quest(true, hir_lit("a"))); |
1752 | | assert_eq!(t("a*"), hir_star(true, hir_lit("a"))); |
1753 | | assert_eq!(t("a+"), hir_plus(true, hir_lit("a"))); |
1754 | | assert_eq!(t("a??"), hir_quest(false, hir_lit("a"))); |
1755 | | assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); |
1756 | | assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); |
1757 | | |
1758 | | assert_eq!( |
1759 | | t("a{1}"), |
1760 | | hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),) |
1761 | | ); |
1762 | | assert_eq!( |
1763 | | t("a{1,}"), |
1764 | | hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) |
1765 | | ); |
1766 | | assert_eq!( |
1767 | | t("a{1,2}"), |
1768 | | hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),) |
1769 | | ); |
1770 | | assert_eq!( |
1771 | | t("a{1}?"), |
1772 | | hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),) |
1773 | | ); |
1774 | | assert_eq!( |
1775 | | t("a{1,}?"), |
1776 | | hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) |
1777 | | ); |
1778 | | assert_eq!( |
1779 | | t("a{1,2}?"), |
1780 | | hir_range( |
1781 | | false, |
1782 | | hir::RepetitionRange::Bounded(1, 2), |
1783 | | hir_lit("a"), |
1784 | | ) |
1785 | | ); |
1786 | | |
1787 | | assert_eq!( |
1788 | | t("ab?"), |
1789 | | hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) |
1790 | | ); |
1791 | | assert_eq!( |
1792 | | t("(ab)?"), |
1793 | | hir_quest( |
1794 | | true, |
1795 | | hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) |
1796 | | ) |
1797 | | ); |
1798 | | assert_eq!( |
1799 | | t("a|b?"), |
1800 | | hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) |
1801 | | ); |
1802 | | } |
1803 | | |
1804 | | #[test] |
1805 | | fn cat_alt() { |
1806 | | assert_eq!( |
1807 | | t("(ab)"), |
1808 | | hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) |
1809 | | ); |
1810 | | assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),])); |
1811 | | assert_eq!( |
1812 | | t("a|b|c"), |
1813 | | hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) |
1814 | | ); |
1815 | | assert_eq!( |
1816 | | t("ab|bc|cd"), |
1817 | | hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) |
1818 | | ); |
1819 | | assert_eq!( |
1820 | | t("(a|b)"), |
1821 | | hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),])) |
1822 | | ); |
1823 | | assert_eq!( |
1824 | | t("(a|b|c)"), |
1825 | | hir_group( |
1826 | | 1, |
1827 | | hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) |
1828 | | ) |
1829 | | ); |
1830 | | assert_eq!( |
1831 | | t("(ab|bc|cd)"), |
1832 | | hir_group( |
1833 | | 1, |
1834 | | hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) |
1835 | | ) |
1836 | | ); |
1837 | | assert_eq!( |
1838 | | t("(ab|(bc|(cd)))"), |
1839 | | hir_group( |
1840 | | 1, |
1841 | | hir_alt(vec![ |
1842 | | hir_lit("ab"), |
1843 | | hir_group( |
1844 | | 2, |
1845 | | hir_alt(vec![ |
1846 | | hir_lit("bc"), |
1847 | | hir_group(3, hir_lit("cd")), |
1848 | | ]) |
1849 | | ), |
1850 | | ]) |
1851 | | ) |
1852 | | ); |
1853 | | } |
1854 | | |
1855 | | #[test] |
1856 | | fn class_ascii() { |
1857 | | assert_eq!( |
1858 | | t("[[:alnum:]]"), |
1859 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)) |
1860 | | ); |
1861 | | assert_eq!( |
1862 | | t("[[:alpha:]]"), |
1863 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)) |
1864 | | ); |
1865 | | assert_eq!( |
1866 | | t("[[:ascii:]]"), |
1867 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)) |
1868 | | ); |
1869 | | assert_eq!( |
1870 | | t("[[:blank:]]"), |
1871 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)) |
1872 | | ); |
1873 | | assert_eq!( |
1874 | | t("[[:cntrl:]]"), |
1875 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)) |
1876 | | ); |
1877 | | assert_eq!( |
1878 | | t("[[:digit:]]"), |
1879 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)) |
1880 | | ); |
1881 | | assert_eq!( |
1882 | | t("[[:graph:]]"), |
1883 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)) |
1884 | | ); |
1885 | | assert_eq!( |
1886 | | t("[[:lower:]]"), |
1887 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)) |
1888 | | ); |
1889 | | assert_eq!( |
1890 | | t("[[:print:]]"), |
1891 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)) |
1892 | | ); |
1893 | | assert_eq!( |
1894 | | t("[[:punct:]]"), |
1895 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)) |
1896 | | ); |
1897 | | assert_eq!( |
1898 | | t("[[:space:]]"), |
1899 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)) |
1900 | | ); |
1901 | | assert_eq!( |
1902 | | t("[[:upper:]]"), |
1903 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)) |
1904 | | ); |
1905 | | assert_eq!( |
1906 | | t("[[:word:]]"), |
1907 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)) |
1908 | | ); |
1909 | | assert_eq!( |
1910 | | t("[[:xdigit:]]"), |
1911 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)) |
1912 | | ); |
1913 | | |
1914 | | assert_eq!( |
1915 | | t("[[:^lower:]]"), |
1916 | | hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))) |
1917 | | ); |
1918 | | #[cfg(feature = "unicode-case")] |
1919 | | assert_eq!( |
1920 | | t("(?i)[[:lower:]]"), |
1921 | | hir_uclass(&[ |
1922 | | ('A', 'Z'), |
1923 | | ('a', 'z'), |
1924 | | ('\u{17F}', '\u{17F}'), |
1925 | | ('\u{212A}', '\u{212A}'), |
1926 | | ]) |
1927 | | ); |
1928 | | |
1929 | | assert_eq!( |
1930 | | t("(?-u)[[:lower:]]"), |
1931 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)) |
1932 | | ); |
1933 | | assert_eq!( |
1934 | | t("(?i-u)[[:lower:]]"), |
1935 | | hir_case_fold(hir_bclass_from_char(ascii_class( |
1936 | | &ast::ClassAsciiKind::Lower |
1937 | | ))) |
1938 | | ); |
1939 | | |
1940 | | assert_eq!( |
1941 | | t_err("(?-u)[[:^lower:]]"), |
1942 | | TestError { |
1943 | | kind: hir::ErrorKind::InvalidUtf8, |
1944 | | span: Span::new( |
1945 | | Position::new(6, 1, 7), |
1946 | | Position::new(16, 1, 17) |
1947 | | ), |
1948 | | } |
1949 | | ); |
1950 | | assert_eq!( |
1951 | | t_err("(?i-u)[[:^lower:]]"), |
1952 | | TestError { |
1953 | | kind: hir::ErrorKind::InvalidUtf8, |
1954 | | span: Span::new( |
1955 | | Position::new(7, 1, 8), |
1956 | | Position::new(17, 1, 18) |
1957 | | ), |
1958 | | } |
1959 | | ); |
1960 | | } |
1961 | | |
1962 | | #[test] |
1963 | | fn class_ascii_multiple() { |
1964 | | // See: https://github.com/rust-lang/regex/issues/680 |
1965 | | assert_eq!( |
1966 | | t("[[:alnum:][:^ascii:]]"), |
1967 | | hir_union( |
1968 | | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), |
1969 | | hir_uclass(&[('\u{80}', '\u{10FFFF}')]), |
1970 | | ), |
1971 | | ); |
1972 | | assert_eq!( |
1973 | | t_bytes("(?-u)[[:alnum:][:^ascii:]]"), |
1974 | | hir_union( |
1975 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), |
1976 | | hir_bclass(&[(0x80, 0xFF)]), |
1977 | | ), |
1978 | | ); |
1979 | | } |
1980 | | |
1981 | | #[test] |
1982 | | #[cfg(feature = "unicode-perl")] |
1983 | | fn class_perl() { |
1984 | | // Unicode |
1985 | | assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit"))); |
1986 | | assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space"))); |
1987 | | assert_eq!(t(r"\w"), hir_uclass_perl_word()); |
1988 | | #[cfg(feature = "unicode-case")] |
1989 | | assert_eq!( |
1990 | | t(r"(?i)\d"), |
1991 | | hir_uclass_query(ClassQuery::Binary("digit")) |
1992 | | ); |
1993 | | #[cfg(feature = "unicode-case")] |
1994 | | assert_eq!( |
1995 | | t(r"(?i)\s"), |
1996 | | hir_uclass_query(ClassQuery::Binary("space")) |
1997 | | ); |
1998 | | #[cfg(feature = "unicode-case")] |
1999 | | assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word()); |
2000 | | |
2001 | | // Unicode, negated |
2002 | | assert_eq!( |
2003 | | t(r"\D"), |
2004 | | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2005 | | ); |
2006 | | assert_eq!( |
2007 | | t(r"\S"), |
2008 | | hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) |
2009 | | ); |
2010 | | assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word())); |
2011 | | #[cfg(feature = "unicode-case")] |
2012 | | assert_eq!( |
2013 | | t(r"(?i)\D"), |
2014 | | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2015 | | ); |
2016 | | #[cfg(feature = "unicode-case")] |
2017 | | assert_eq!( |
2018 | | t(r"(?i)\S"), |
2019 | | hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) |
2020 | | ); |
2021 | | #[cfg(feature = "unicode-case")] |
2022 | | assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word())); |
2023 | | |
2024 | | // ASCII only |
2025 | | assert_eq!( |
2026 | | t(r"(?-u)\d"), |
2027 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2028 | | ); |
2029 | | assert_eq!( |
2030 | | t(r"(?-u)\s"), |
2031 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
2032 | | ); |
2033 | | assert_eq!( |
2034 | | t(r"(?-u)\w"), |
2035 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2036 | | ); |
2037 | | assert_eq!( |
2038 | | t(r"(?i-u)\d"), |
2039 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2040 | | ); |
2041 | | assert_eq!( |
2042 | | t(r"(?i-u)\s"), |
2043 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
2044 | | ); |
2045 | | assert_eq!( |
2046 | | t(r"(?i-u)\w"), |
2047 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2048 | | ); |
2049 | | |
2050 | | // ASCII only, negated |
2051 | | assert_eq!( |
2052 | | t(r"(?-u)\D"), |
2053 | | hir_negate(hir_bclass_from_char(ascii_class( |
2054 | | &ast::ClassAsciiKind::Digit |
2055 | | ))) |
2056 | | ); |
2057 | | assert_eq!( |
2058 | | t(r"(?-u)\S"), |
2059 | | hir_negate(hir_bclass_from_char(ascii_class( |
2060 | | &ast::ClassAsciiKind::Space |
2061 | | ))) |
2062 | | ); |
2063 | | assert_eq!( |
2064 | | t(r"(?-u)\W"), |
2065 | | hir_negate(hir_bclass_from_char(ascii_class( |
2066 | | &ast::ClassAsciiKind::Word |
2067 | | ))) |
2068 | | ); |
2069 | | assert_eq!( |
2070 | | t(r"(?i-u)\D"), |
2071 | | hir_negate(hir_bclass_from_char(ascii_class( |
2072 | | &ast::ClassAsciiKind::Digit |
2073 | | ))) |
2074 | | ); |
2075 | | assert_eq!( |
2076 | | t(r"(?i-u)\S"), |
2077 | | hir_negate(hir_bclass_from_char(ascii_class( |
2078 | | &ast::ClassAsciiKind::Space |
2079 | | ))) |
2080 | | ); |
2081 | | assert_eq!( |
2082 | | t(r"(?i-u)\W"), |
2083 | | hir_negate(hir_bclass_from_char(ascii_class( |
2084 | | &ast::ClassAsciiKind::Word |
2085 | | ))) |
2086 | | ); |
2087 | | } |
2088 | | |
2089 | | #[test] |
2090 | | #[cfg(not(feature = "unicode-perl"))] |
2091 | | fn class_perl_word_disabled() { |
2092 | | assert_eq!( |
2093 | | t_err(r"\w"), |
2094 | | TestError { |
2095 | | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2096 | | span: Span::new( |
2097 | | Position::new(0, 1, 1), |
2098 | | Position::new(2, 1, 3) |
2099 | | ), |
2100 | | } |
2101 | | ); |
2102 | | } |
2103 | | |
2104 | | #[test] |
2105 | | #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))] |
2106 | | fn class_perl_space_disabled() { |
2107 | | assert_eq!( |
2108 | | t_err(r"\s"), |
2109 | | TestError { |
2110 | | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2111 | | span: Span::new( |
2112 | | Position::new(0, 1, 1), |
2113 | | Position::new(2, 1, 3) |
2114 | | ), |
2115 | | } |
2116 | | ); |
2117 | | } |
2118 | | |
2119 | | #[test] |
2120 | | #[cfg(all( |
2121 | | not(feature = "unicode-perl"), |
2122 | | not(feature = "unicode-gencat") |
2123 | | ))] |
2124 | | fn class_perl_digit_disabled() { |
2125 | | assert_eq!( |
2126 | | t_err(r"\d"), |
2127 | | TestError { |
2128 | | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2129 | | span: Span::new( |
2130 | | Position::new(0, 1, 1), |
2131 | | Position::new(2, 1, 3) |
2132 | | ), |
2133 | | } |
2134 | | ); |
2135 | | } |
2136 | | |
2137 | | #[test] |
2138 | | #[cfg(feature = "unicode-gencat")] |
2139 | | fn class_unicode_gencat() { |
2140 | | assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z"))); |
2141 | | assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z"))); |
2142 | | assert_eq!( |
2143 | | t(r"\p{Separator}"), |
2144 | | hir_uclass_query(ClassQuery::Binary("Z")) |
2145 | | ); |
2146 | | assert_eq!( |
2147 | | t(r"\p{se PaRa ToR}"), |
2148 | | hir_uclass_query(ClassQuery::Binary("Z")) |
2149 | | ); |
2150 | | assert_eq!( |
2151 | | t(r"\p{gc:Separator}"), |
2152 | | hir_uclass_query(ClassQuery::Binary("Z")) |
2153 | | ); |
2154 | | assert_eq!( |
2155 | | t(r"\p{gc=Separator}"), |
2156 | | hir_uclass_query(ClassQuery::Binary("Z")) |
2157 | | ); |
2158 | | assert_eq!( |
2159 | | t(r"\p{Other}"), |
2160 | | hir_uclass_query(ClassQuery::Binary("Other")) |
2161 | | ); |
2162 | | assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other"))); |
2163 | | |
2164 | | assert_eq!( |
2165 | | t(r"\PZ"), |
2166 | | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2167 | | ); |
2168 | | assert_eq!( |
2169 | | t(r"\P{separator}"), |
2170 | | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2171 | | ); |
2172 | | assert_eq!( |
2173 | | t(r"\P{gc!=separator}"), |
2174 | | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2175 | | ); |
2176 | | |
2177 | | assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any"))); |
2178 | | assert_eq!( |
2179 | | t(r"\p{assigned}"), |
2180 | | hir_uclass_query(ClassQuery::Binary("Assigned")) |
2181 | | ); |
2182 | | assert_eq!( |
2183 | | t(r"\p{ascii}"), |
2184 | | hir_uclass_query(ClassQuery::Binary("ASCII")) |
2185 | | ); |
2186 | | assert_eq!( |
2187 | | t(r"\p{gc:any}"), |
2188 | | hir_uclass_query(ClassQuery::Binary("Any")) |
2189 | | ); |
2190 | | assert_eq!( |
2191 | | t(r"\p{gc:assigned}"), |
2192 | | hir_uclass_query(ClassQuery::Binary("Assigned")) |
2193 | | ); |
2194 | | assert_eq!( |
2195 | | t(r"\p{gc:ascii}"), |
2196 | | hir_uclass_query(ClassQuery::Binary("ASCII")) |
2197 | | ); |
2198 | | |
2199 | | assert_eq!( |
2200 | | t_err(r"(?-u)\pZ"), |
2201 | | TestError { |
2202 | | kind: hir::ErrorKind::UnicodeNotAllowed, |
2203 | | span: Span::new( |
2204 | | Position::new(5, 1, 6), |
2205 | | Position::new(8, 1, 9) |
2206 | | ), |
2207 | | } |
2208 | | ); |
2209 | | assert_eq!( |
2210 | | t_err(r"(?-u)\p{Separator}"), |
2211 | | TestError { |
2212 | | kind: hir::ErrorKind::UnicodeNotAllowed, |
2213 | | span: Span::new( |
2214 | | Position::new(5, 1, 6), |
2215 | | Position::new(18, 1, 19) |
2216 | | ), |
2217 | | } |
2218 | | ); |
2219 | | assert_eq!( |
2220 | | t_err(r"\pE"), |
2221 | | TestError { |
2222 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2223 | | span: Span::new( |
2224 | | Position::new(0, 1, 1), |
2225 | | Position::new(3, 1, 4) |
2226 | | ), |
2227 | | } |
2228 | | ); |
2229 | | assert_eq!( |
2230 | | t_err(r"\p{Foo}"), |
2231 | | TestError { |
2232 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2233 | | span: Span::new( |
2234 | | Position::new(0, 1, 1), |
2235 | | Position::new(7, 1, 8) |
2236 | | ), |
2237 | | } |
2238 | | ); |
2239 | | assert_eq!( |
2240 | | t_err(r"\p{gc:Foo}"), |
2241 | | TestError { |
2242 | | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2243 | | span: Span::new( |
2244 | | Position::new(0, 1, 1), |
2245 | | Position::new(10, 1, 11) |
2246 | | ), |
2247 | | } |
2248 | | ); |
2249 | | } |
2250 | | |
2251 | | #[test] |
2252 | | #[cfg(not(feature = "unicode-gencat"))] |
2253 | | fn class_unicode_gencat_disabled() { |
2254 | | assert_eq!( |
2255 | | t_err(r"\p{Separator}"), |
2256 | | TestError { |
2257 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2258 | | span: Span::new( |
2259 | | Position::new(0, 1, 1), |
2260 | | Position::new(13, 1, 14) |
2261 | | ), |
2262 | | } |
2263 | | ); |
2264 | | |
2265 | | assert_eq!( |
2266 | | t_err(r"\p{Any}"), |
2267 | | TestError { |
2268 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2269 | | span: Span::new( |
2270 | | Position::new(0, 1, 1), |
2271 | | Position::new(7, 1, 8) |
2272 | | ), |
2273 | | } |
2274 | | ); |
2275 | | } |
2276 | | |
2277 | | #[test] |
2278 | | #[cfg(feature = "unicode-script")] |
2279 | | fn class_unicode_script() { |
2280 | | assert_eq!( |
2281 | | t(r"\p{Greek}"), |
2282 | | hir_uclass_query(ClassQuery::Binary("Greek")) |
2283 | | ); |
2284 | | #[cfg(feature = "unicode-case")] |
2285 | | assert_eq!( |
2286 | | t(r"(?i)\p{Greek}"), |
2287 | | hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))) |
2288 | | ); |
2289 | | #[cfg(feature = "unicode-case")] |
2290 | | assert_eq!( |
2291 | | t(r"(?i)\P{Greek}"), |
2292 | | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2293 | | "Greek" |
2294 | | )))) |
2295 | | ); |
2296 | | |
2297 | | assert_eq!( |
2298 | | t_err(r"\p{sc:Foo}"), |
2299 | | TestError { |
2300 | | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2301 | | span: Span::new( |
2302 | | Position::new(0, 1, 1), |
2303 | | Position::new(10, 1, 11) |
2304 | | ), |
2305 | | } |
2306 | | ); |
2307 | | assert_eq!( |
2308 | | t_err(r"\p{scx:Foo}"), |
2309 | | TestError { |
2310 | | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2311 | | span: Span::new( |
2312 | | Position::new(0, 1, 1), |
2313 | | Position::new(11, 1, 12) |
2314 | | ), |
2315 | | } |
2316 | | ); |
2317 | | } |
2318 | | |
2319 | | #[test] |
2320 | | #[cfg(not(feature = "unicode-script"))] |
2321 | | fn class_unicode_script_disabled() { |
2322 | | assert_eq!( |
2323 | | t_err(r"\p{Greek}"), |
2324 | | TestError { |
2325 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2326 | | span: Span::new( |
2327 | | Position::new(0, 1, 1), |
2328 | | Position::new(9, 1, 10) |
2329 | | ), |
2330 | | } |
2331 | | ); |
2332 | | |
2333 | | assert_eq!( |
2334 | | t_err(r"\p{scx:Greek}"), |
2335 | | TestError { |
2336 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2337 | | span: Span::new( |
2338 | | Position::new(0, 1, 1), |
2339 | | Position::new(13, 1, 14) |
2340 | | ), |
2341 | | } |
2342 | | ); |
2343 | | } |
2344 | | |
2345 | | #[test] |
2346 | | #[cfg(feature = "unicode-age")] |
2347 | | fn class_unicode_age() { |
2348 | | assert_eq!( |
2349 | | t_err(r"\p{age:Foo}"), |
2350 | | TestError { |
2351 | | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2352 | | span: Span::new( |
2353 | | Position::new(0, 1, 1), |
2354 | | Position::new(11, 1, 12) |
2355 | | ), |
2356 | | } |
2357 | | ); |
2358 | | } |
2359 | | |
2360 | | #[test] |
2361 | | #[cfg(feature = "unicode-gencat")] |
2362 | | fn class_unicode_any_empty() { |
2363 | | assert_eq!( |
2364 | | t_err(r"\P{any}"), |
2365 | | TestError { |
2366 | | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2367 | | span: Span::new( |
2368 | | Position::new(0, 1, 1), |
2369 | | Position::new(7, 1, 8) |
2370 | | ), |
2371 | | } |
2372 | | ); |
2373 | | } |
2374 | | |
2375 | | #[test] |
2376 | | #[cfg(not(feature = "unicode-age"))] |
2377 | | fn class_unicode_age_disabled() { |
2378 | | assert_eq!( |
2379 | | t_err(r"\p{age:3.0}"), |
2380 | | TestError { |
2381 | | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2382 | | span: Span::new( |
2383 | | Position::new(0, 1, 1), |
2384 | | Position::new(11, 1, 12) |
2385 | | ), |
2386 | | } |
2387 | | ); |
2388 | | } |
2389 | | |
2390 | | #[test] |
2391 | | fn class_bracketed() { |
2392 | | assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')])); |
2393 | | assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')]))); |
2394 | | assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); |
2395 | | assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); |
2396 | | assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); |
2397 | | assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')])); |
2398 | | assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')])); |
2399 | | assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')])); |
2400 | | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] |
2401 | | assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit"))); |
2402 | | #[cfg(feature = "unicode-gencat")] |
2403 | | assert_eq!( |
2404 | | t(r"[\pZ]"), |
2405 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2406 | | ); |
2407 | | #[cfg(feature = "unicode-gencat")] |
2408 | | assert_eq!( |
2409 | | t(r"[\p{separator}]"), |
2410 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2411 | | ); |
2412 | | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] |
2413 | | assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit"))); |
2414 | | #[cfg(feature = "unicode-gencat")] |
2415 | | assert_eq!( |
2416 | | t(r"[^\PZ]"), |
2417 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2418 | | ); |
2419 | | #[cfg(feature = "unicode-gencat")] |
2420 | | assert_eq!( |
2421 | | t(r"[^\P{separator}]"), |
2422 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2423 | | ); |
2424 | | #[cfg(all( |
2425 | | feature = "unicode-case", |
2426 | | any(feature = "unicode-perl", feature = "unicode-gencat") |
2427 | | ))] |
2428 | | assert_eq!( |
2429 | | t(r"(?i)[^\D]"), |
2430 | | hir_uclass_query(ClassQuery::Binary("digit")) |
2431 | | ); |
2432 | | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] |
2433 | | assert_eq!( |
2434 | | t(r"(?i)[^\P{greek}]"), |
2435 | | hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))) |
2436 | | ); |
2437 | | |
2438 | | assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')])); |
2439 | | assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')])); |
2440 | | assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')])); |
2441 | | |
2442 | | #[cfg(feature = "unicode-case")] |
2443 | | assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')])); |
2444 | | #[cfg(feature = "unicode-case")] |
2445 | | assert_eq!( |
2446 | | t("(?i)[k]"), |
2447 | | hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),]) |
2448 | | ); |
2449 | | #[cfg(feature = "unicode-case")] |
2450 | | assert_eq!( |
2451 | | t("(?i)[β]"), |
2452 | | hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) |
2453 | | ); |
2454 | | assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),])); |
2455 | | |
2456 | | assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')]))); |
2457 | | assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')]))); |
2458 | | assert_eq!( |
2459 | | t_bytes("(?-u)[^a]"), |
2460 | | hir_negate(hir_bclass(&[(b'a', b'a')])) |
2461 | | ); |
2462 | | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] |
2463 | | assert_eq!( |
2464 | | t(r"[^\d]"), |
2465 | | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2466 | | ); |
2467 | | #[cfg(feature = "unicode-gencat")] |
2468 | | assert_eq!( |
2469 | | t(r"[^\pZ]"), |
2470 | | hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) |
2471 | | ); |
2472 | | #[cfg(feature = "unicode-gencat")] |
2473 | | assert_eq!( |
2474 | | t(r"[^\p{separator}]"), |
2475 | | hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) |
2476 | | ); |
2477 | | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] |
2478 | | assert_eq!( |
2479 | | t(r"(?i)[^\p{greek}]"), |
2480 | | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2481 | | "greek" |
2482 | | )))) |
2483 | | ); |
2484 | | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] |
2485 | | assert_eq!( |
2486 | | t(r"(?i)[\P{greek}]"), |
2487 | | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2488 | | "greek" |
2489 | | )))) |
2490 | | ); |
2491 | | |
2492 | | // Test some weird cases. |
2493 | | assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')])); |
2494 | | |
2495 | | assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')])); |
2496 | | assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')])); |
2497 | | assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')])); |
2498 | | assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')])); |
2499 | | assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')])); |
2500 | | |
2501 | | assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')])); |
2502 | | assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')])); |
2503 | | assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')])); |
2504 | | assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')])); |
2505 | | assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')])); |
2506 | | |
2507 | | assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')])); |
2508 | | assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')])); |
2509 | | assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')])); |
2510 | | assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')])); |
2511 | | assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')])); |
2512 | | |
2513 | | assert_eq!( |
2514 | | t_err("(?-u)[^a]"), |
2515 | | TestError { |
2516 | | kind: hir::ErrorKind::InvalidUtf8, |
2517 | | span: Span::new( |
2518 | | Position::new(5, 1, 6), |
2519 | | Position::new(9, 1, 10) |
2520 | | ), |
2521 | | } |
2522 | | ); |
2523 | | #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] |
2524 | | assert_eq!( |
2525 | | t_err(r"[^\s\S]"), |
2526 | | TestError { |
2527 | | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2528 | | span: Span::new( |
2529 | | Position::new(0, 1, 1), |
2530 | | Position::new(7, 1, 8) |
2531 | | ), |
2532 | | } |
2533 | | ); |
2534 | | #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] |
2535 | | assert_eq!( |
2536 | | t_err(r"(?-u)[^\s\S]"), |
2537 | | TestError { |
2538 | | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2539 | | span: Span::new( |
2540 | | Position::new(5, 1, 6), |
2541 | | Position::new(12, 1, 13) |
2542 | | ), |
2543 | | } |
2544 | | ); |
2545 | | } |
2546 | | |
2547 | | #[test] |
2548 | | fn class_bracketed_union() { |
2549 | | assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); |
2550 | | #[cfg(feature = "unicode-gencat")] |
2551 | | assert_eq!( |
2552 | | t(r"[a\pZb]"), |
2553 | | hir_union( |
2554 | | hir_uclass(&[('a', 'b')]), |
2555 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2556 | | ) |
2557 | | ); |
2558 | | #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))] |
2559 | | assert_eq!( |
2560 | | t(r"[\pZ\p{Greek}]"), |
2561 | | hir_union( |
2562 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2563 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2564 | | ) |
2565 | | ); |
2566 | | #[cfg(all( |
2567 | | feature = "unicode-age", |
2568 | | feature = "unicode-gencat", |
2569 | | feature = "unicode-script" |
2570 | | ))] |
2571 | | assert_eq!( |
2572 | | t(r"[\p{age:3.0}\pZ\p{Greek}]"), |
2573 | | hir_union( |
2574 | | hir_uclass_query(ClassQuery::ByValue { |
2575 | | property_name: "age", |
2576 | | property_value: "3.0", |
2577 | | }), |
2578 | | hir_union( |
2579 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2580 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2581 | | ) |
2582 | | ) |
2583 | | ); |
2584 | | #[cfg(all( |
2585 | | feature = "unicode-age", |
2586 | | feature = "unicode-gencat", |
2587 | | feature = "unicode-script" |
2588 | | ))] |
2589 | | assert_eq!( |
2590 | | t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"), |
2591 | | hir_union( |
2592 | | hir_uclass_query(ClassQuery::ByValue { |
2593 | | property_name: "age", |
2594 | | property_value: "3.0", |
2595 | | }), |
2596 | | hir_union( |
2597 | | hir_uclass_query(ClassQuery::Binary("cyrillic")), |
2598 | | hir_union( |
2599 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2600 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2601 | | ) |
2602 | | ) |
2603 | | ) |
2604 | | ); |
2605 | | |
2606 | | #[cfg(all( |
2607 | | feature = "unicode-age", |
2608 | | feature = "unicode-case", |
2609 | | feature = "unicode-gencat", |
2610 | | feature = "unicode-script" |
2611 | | ))] |
2612 | | assert_eq!( |
2613 | | t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"), |
2614 | | hir_case_fold(hir_union( |
2615 | | hir_uclass_query(ClassQuery::ByValue { |
2616 | | property_name: "age", |
2617 | | property_value: "3.0", |
2618 | | }), |
2619 | | hir_union( |
2620 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2621 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2622 | | ) |
2623 | | )) |
2624 | | ); |
2625 | | #[cfg(all( |
2626 | | feature = "unicode-age", |
2627 | | feature = "unicode-gencat", |
2628 | | feature = "unicode-script" |
2629 | | ))] |
2630 | | assert_eq!( |
2631 | | t(r"[^\p{age:3.0}\pZ\p{Greek}]"), |
2632 | | hir_negate(hir_union( |
2633 | | hir_uclass_query(ClassQuery::ByValue { |
2634 | | property_name: "age", |
2635 | | property_value: "3.0", |
2636 | | }), |
2637 | | hir_union( |
2638 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2639 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2640 | | ) |
2641 | | )) |
2642 | | ); |
2643 | | #[cfg(all( |
2644 | | feature = "unicode-age", |
2645 | | feature = "unicode-case", |
2646 | | feature = "unicode-gencat", |
2647 | | feature = "unicode-script" |
2648 | | ))] |
2649 | | assert_eq!( |
2650 | | t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"), |
2651 | | hir_negate(hir_case_fold(hir_union( |
2652 | | hir_uclass_query(ClassQuery::ByValue { |
2653 | | property_name: "age", |
2654 | | property_value: "3.0", |
2655 | | }), |
2656 | | hir_union( |
2657 | | hir_uclass_query(ClassQuery::Binary("greek")), |
2658 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2659 | | ) |
2660 | | ))) |
2661 | | ); |
2662 | | } |
2663 | | |
2664 | | #[test] |
2665 | | fn class_bracketed_nested() { |
2666 | | assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); |
2667 | | assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); |
2668 | | assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[]))); |
2669 | | |
2670 | | assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')])); |
2671 | | assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')])); |
2672 | | |
2673 | | #[cfg(feature = "unicode-case")] |
2674 | | assert_eq!( |
2675 | | t(r"(?i)[a[^c]]"), |
2676 | | hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) |
2677 | | ); |
2678 | | #[cfg(feature = "unicode-case")] |
2679 | | assert_eq!( |
2680 | | t(r"(?i)[a-b[^c]]"), |
2681 | | hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) |
2682 | | ); |
2683 | | |
2684 | | #[cfg(feature = "unicode-case")] |
2685 | | assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')])); |
2686 | | #[cfg(feature = "unicode-case")] |
2687 | | assert_eq!( |
2688 | | t(r"(?i)[^a-b[^c]]"), |
2689 | | hir_uclass(&[('C', 'C'), ('c', 'c')]) |
2690 | | ); |
2691 | | |
2692 | | assert_eq!( |
2693 | | t_err(r"[^a-c[^c]]"), |
2694 | | TestError { |
2695 | | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2696 | | span: Span::new( |
2697 | | Position::new(0, 1, 1), |
2698 | | Position::new(10, 1, 11) |
2699 | | ), |
2700 | | } |
2701 | | ); |
2702 | | #[cfg(feature = "unicode-case")] |
2703 | | assert_eq!( |
2704 | | t_err(r"(?i)[^a-c[^c]]"), |
2705 | | TestError { |
2706 | | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2707 | | span: Span::new( |
2708 | | Position::new(4, 1, 5), |
2709 | | Position::new(14, 1, 15) |
2710 | | ), |
2711 | | } |
2712 | | ); |
2713 | | } |
2714 | | |
2715 | | #[test] |
2716 | | fn class_bracketed_intersect() { |
2717 | | assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')])); |
2718 | | assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')])); |
2719 | | assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')])); |
2720 | | assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')])); |
2721 | | assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')])); |
2722 | | assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')])); |
2723 | | assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')])); |
2724 | | assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')])); |
2725 | | assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); |
2726 | | |
2727 | | assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')])); |
2728 | | assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); |
2729 | | assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); |
2730 | | assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')])); |
2731 | | assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')])); |
2732 | | assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')])); |
2733 | | |
2734 | | #[cfg(feature = "unicode-case")] |
2735 | | assert_eq!( |
2736 | | t("(?i)[abc&&b-c]"), |
2737 | | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2738 | | ); |
2739 | | #[cfg(feature = "unicode-case")] |
2740 | | assert_eq!( |
2741 | | t("(?i)[abc&&[b-c]]"), |
2742 | | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2743 | | ); |
2744 | | #[cfg(feature = "unicode-case")] |
2745 | | assert_eq!( |
2746 | | t("(?i)[[abc]&&[b-c]]"), |
2747 | | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2748 | | ); |
2749 | | #[cfg(feature = "unicode-case")] |
2750 | | assert_eq!( |
2751 | | t("(?i)[a-z&&b-y&&c-x]"), |
2752 | | hir_case_fold(hir_uclass(&[('c', 'x')])) |
2753 | | ); |
2754 | | #[cfg(feature = "unicode-case")] |
2755 | | assert_eq!( |
2756 | | t("(?i)[c-da-b&&a-d]"), |
2757 | | hir_case_fold(hir_uclass(&[('a', 'd')])) |
2758 | | ); |
2759 | | #[cfg(feature = "unicode-case")] |
2760 | | assert_eq!( |
2761 | | t("(?i)[a-d&&c-da-b]"), |
2762 | | hir_case_fold(hir_uclass(&[('a', 'd')])) |
2763 | | ); |
2764 | | |
2765 | | assert_eq!( |
2766 | | t("(?i-u)[abc&&b-c]"), |
2767 | | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2768 | | ); |
2769 | | assert_eq!( |
2770 | | t("(?i-u)[abc&&[b-c]]"), |
2771 | | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2772 | | ); |
2773 | | assert_eq!( |
2774 | | t("(?i-u)[[abc]&&[b-c]]"), |
2775 | | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2776 | | ); |
2777 | | assert_eq!( |
2778 | | t("(?i-u)[a-z&&b-y&&c-x]"), |
2779 | | hir_case_fold(hir_bclass(&[(b'c', b'x')])) |
2780 | | ); |
2781 | | assert_eq!( |
2782 | | t("(?i-u)[c-da-b&&a-d]"), |
2783 | | hir_case_fold(hir_bclass(&[(b'a', b'd')])) |
2784 | | ); |
2785 | | assert_eq!( |
2786 | | t("(?i-u)[a-d&&c-da-b]"), |
2787 | | hir_case_fold(hir_bclass(&[(b'a', b'd')])) |
2788 | | ); |
2789 | | |
2790 | | // In `[a^]`, `^` does not need to be escaped, so it makes sense that |
2791 | | // `^` is also allowed to be unescaped after `&&`. |
2792 | | assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')])); |
2793 | | // `]` needs to be escaped after `&&` since it's not at start of class. |
2794 | | assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')])); |
2795 | | assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')])); |
2796 | | assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')])); |
2797 | | assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')])); |
2798 | | // Test precedence. |
2799 | | assert_eq!( |
2800 | | t(r"[a-w&&[^c-g]z]"), |
2801 | | hir_uclass(&[('a', 'b'), ('h', 'w')]) |
2802 | | ); |
2803 | | } |
2804 | | |
2805 | | #[test] |
2806 | | fn class_bracketed_intersect_negate() { |
2807 | | #[cfg(feature = "unicode-perl")] |
2808 | | assert_eq!( |
2809 | | t(r"[^\w&&\d]"), |
2810 | | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2811 | | ); |
2812 | | assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); |
2813 | | #[cfg(feature = "unicode-perl")] |
2814 | | assert_eq!( |
2815 | | t(r"[^[\w&&\d]]"), |
2816 | | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2817 | | ); |
2818 | | #[cfg(feature = "unicode-perl")] |
2819 | | assert_eq!( |
2820 | | t(r"[^[^\w&&\d]]"), |
2821 | | hir_uclass_query(ClassQuery::Binary("digit")) |
2822 | | ); |
2823 | | #[cfg(feature = "unicode-perl")] |
2824 | | assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word())); |
2825 | | |
2826 | | #[cfg(feature = "unicode-perl")] |
2827 | | assert_eq!( |
2828 | | t_bytes(r"(?-u)[^\w&&\d]"), |
2829 | | hir_negate(hir_bclass_from_char(ascii_class( |
2830 | | &ast::ClassAsciiKind::Digit |
2831 | | ))) |
2832 | | ); |
2833 | | assert_eq!( |
2834 | | t_bytes(r"(?-u)[^[a-z&&a-c]]"), |
2835 | | hir_negate(hir_bclass(&[(b'a', b'c')])) |
2836 | | ); |
2837 | | assert_eq!( |
2838 | | t_bytes(r"(?-u)[^[\w&&\d]]"), |
2839 | | hir_negate(hir_bclass_from_char(ascii_class( |
2840 | | &ast::ClassAsciiKind::Digit |
2841 | | ))) |
2842 | | ); |
2843 | | assert_eq!( |
2844 | | t_bytes(r"(?-u)[^[^\w&&\d]]"), |
2845 | | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2846 | | ); |
2847 | | assert_eq!( |
2848 | | t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), |
2849 | | hir_negate(hir_bclass_from_char(ascii_class( |
2850 | | &ast::ClassAsciiKind::Word |
2851 | | ))) |
2852 | | ); |
2853 | | } |
2854 | | |
2855 | | #[test] |
2856 | | fn class_bracketed_difference() { |
2857 | | #[cfg(feature = "unicode-gencat")] |
2858 | | assert_eq!( |
2859 | | t(r"[\pL--[:ascii:]]"), |
2860 | | hir_difference( |
2861 | | hir_uclass_query(ClassQuery::Binary("letter")), |
2862 | | hir_uclass(&[('\0', '\x7F')]) |
2863 | | ) |
2864 | | ); |
2865 | | |
2866 | | assert_eq!( |
2867 | | t(r"(?-u)[[:alpha:]--[:lower:]]"), |
2868 | | hir_bclass(&[(b'A', b'Z')]) |
2869 | | ); |
2870 | | } |
2871 | | |
2872 | | #[test] |
2873 | | fn class_bracketed_symmetric_difference() { |
2874 | | #[cfg(feature = "unicode-script")] |
2875 | | assert_eq!( |
2876 | | t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), |
2877 | | hir_uclass(&[ |
2878 | | ('\u{0342}', '\u{0342}'), |
2879 | | ('\u{0345}', '\u{0345}'), |
2880 | | ('\u{1DC0}', '\u{1DC1}'), |
2881 | | ]) |
2882 | | ); |
2883 | | assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); |
2884 | | |
2885 | | assert_eq!( |
2886 | | t(r"(?-u)[a-g~~c-j]"), |
2887 | | hir_bclass(&[(b'a', b'b'), (b'h', b'j')]) |
2888 | | ); |
2889 | | } |
2890 | | |
2891 | | #[test] |
2892 | | fn ignore_whitespace() { |
2893 | | assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3")); |
2894 | | assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S")); |
2895 | | assert_eq!( |
2896 | | t(r"(?x)\x # comment |
2897 | | { # comment |
2898 | | 53 # comment |
2899 | | } #comment"), |
2900 | | hir_lit("S") |
2901 | | ); |
2902 | | |
2903 | | assert_eq!(t(r"(?x)\x 53"), hir_lit("S")); |
2904 | | assert_eq!( |
2905 | | t(r"(?x)\x # comment |
2906 | | 53 # comment"), |
2907 | | hir_lit("S") |
2908 | | ); |
2909 | | assert_eq!(t(r"(?x)\x5 3"), hir_lit("S")); |
2910 | | |
2911 | | #[cfg(feature = "unicode-gencat")] |
2912 | | assert_eq!( |
2913 | | t(r"(?x)\p # comment |
2914 | | { # comment |
2915 | | Separator # comment |
2916 | | } # comment"), |
2917 | | hir_uclass_query(ClassQuery::Binary("separator")) |
2918 | | ); |
2919 | | |
2920 | | assert_eq!( |
2921 | | t(r"(?x)a # comment |
2922 | | { # comment |
2923 | | 5 # comment |
2924 | | , # comment |
2925 | | 10 # comment |
2926 | | } # comment"), |
2927 | | hir_range( |
2928 | | true, |
2929 | | hir::RepetitionRange::Bounded(5, 10), |
2930 | | hir_lit("a") |
2931 | | ) |
2932 | | ); |
2933 | | |
2934 | | assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); |
2935 | | } |
2936 | | |
2937 | | #[test] |
2938 | | fn analysis_is_always_utf8() { |
2939 | | // Positive examples. |
2940 | | assert!(t_bytes(r"a").is_always_utf8()); |
2941 | | assert!(t_bytes(r"ab").is_always_utf8()); |
2942 | | assert!(t_bytes(r"(?-u)a").is_always_utf8()); |
2943 | | assert!(t_bytes(r"(?-u)ab").is_always_utf8()); |
2944 | | assert!(t_bytes(r"\xFF").is_always_utf8()); |
2945 | | assert!(t_bytes(r"\xFF\xFF").is_always_utf8()); |
2946 | | assert!(t_bytes(r"[^a]").is_always_utf8()); |
2947 | | assert!(t_bytes(r"[^a][^a]").is_always_utf8()); |
2948 | | assert!(t_bytes(r"\b").is_always_utf8()); |
2949 | | assert!(t_bytes(r"\B").is_always_utf8()); |
2950 | | assert!(t_bytes(r"(?-u)\b").is_always_utf8()); |
2951 | | |
2952 | | // Negative examples. |
2953 | | assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8()); |
2954 | | assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8()); |
2955 | | assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8()); |
2956 | | assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8()); |
2957 | | assert!(!t_bytes(r"(?-u)\B").is_always_utf8()); |
2958 | | } |
2959 | | |
2960 | | #[test] |
2961 | | fn analysis_is_all_assertions() { |
2962 | | // Positive examples. |
2963 | | assert!(t(r"\b").is_all_assertions()); |
2964 | | assert!(t(r"\B").is_all_assertions()); |
2965 | | assert!(t(r"^").is_all_assertions()); |
2966 | | assert!(t(r"$").is_all_assertions()); |
2967 | | assert!(t(r"\A").is_all_assertions()); |
2968 | | assert!(t(r"\z").is_all_assertions()); |
2969 | | assert!(t(r"$^\z\A\b\B").is_all_assertions()); |
2970 | | assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions()); |
2971 | | assert!(t(r"^$|$^").is_all_assertions()); |
2972 | | assert!(t(r"((\b)+())*^").is_all_assertions()); |
2973 | | |
2974 | | // Negative examples. |
2975 | | assert!(!t(r"^a").is_all_assertions()); |
2976 | | } |
2977 | | |
2978 | | #[test] |
2979 | | fn analysis_is_anchored() { |
2980 | | // Positive examples. |
2981 | | assert!(t(r"^").is_anchored_start()); |
2982 | | assert!(t(r"$").is_anchored_end()); |
2983 | | assert!(t(r"^").is_line_anchored_start()); |
2984 | | assert!(t(r"$").is_line_anchored_end()); |
2985 | | |
2986 | | assert!(t(r"^^").is_anchored_start()); |
2987 | | assert!(t(r"$$").is_anchored_end()); |
2988 | | assert!(t(r"^^").is_line_anchored_start()); |
2989 | | assert!(t(r"$$").is_line_anchored_end()); |
2990 | | |
2991 | | assert!(t(r"^$").is_anchored_start()); |
2992 | | assert!(t(r"^$").is_anchored_end()); |
2993 | | assert!(t(r"^$").is_line_anchored_start()); |
2994 | | assert!(t(r"^$").is_line_anchored_end()); |
2995 | | |
2996 | | assert!(t(r"^foo").is_anchored_start()); |
2997 | | assert!(t(r"foo$").is_anchored_end()); |
2998 | | assert!(t(r"^foo").is_line_anchored_start()); |
2999 | | assert!(t(r"foo$").is_line_anchored_end()); |
3000 | | |
3001 | | assert!(t(r"^foo|^bar").is_anchored_start()); |
3002 | | assert!(t(r"foo$|bar$").is_anchored_end()); |
3003 | | assert!(t(r"^foo|^bar").is_line_anchored_start()); |
3004 | | assert!(t(r"foo$|bar$").is_line_anchored_end()); |
3005 | | |
3006 | | assert!(t(r"^(foo|bar)").is_anchored_start()); |
3007 | | assert!(t(r"(foo|bar)$").is_anchored_end()); |
3008 | | assert!(t(r"^(foo|bar)").is_line_anchored_start()); |
3009 | | assert!(t(r"(foo|bar)$").is_line_anchored_end()); |
3010 | | |
3011 | | assert!(t(r"^+").is_anchored_start()); |
3012 | | assert!(t(r"$+").is_anchored_end()); |
3013 | | assert!(t(r"^+").is_line_anchored_start()); |
3014 | | assert!(t(r"$+").is_line_anchored_end()); |
3015 | | assert!(t(r"^++").is_anchored_start()); |
3016 | | assert!(t(r"$++").is_anchored_end()); |
3017 | | assert!(t(r"^++").is_line_anchored_start()); |
3018 | | assert!(t(r"$++").is_line_anchored_end()); |
3019 | | assert!(t(r"(^)+").is_anchored_start()); |
3020 | | assert!(t(r"($)+").is_anchored_end()); |
3021 | | assert!(t(r"(^)+").is_line_anchored_start()); |
3022 | | assert!(t(r"($)+").is_line_anchored_end()); |
3023 | | |
3024 | | assert!(t(r"$^").is_anchored_start()); |
3025 | | assert!(t(r"$^").is_anchored_start()); |
3026 | | assert!(t(r"$^").is_line_anchored_end()); |
3027 | | assert!(t(r"$^").is_line_anchored_end()); |
3028 | | assert!(t(r"$^|^$").is_anchored_start()); |
3029 | | assert!(t(r"$^|^$").is_anchored_end()); |
3030 | | assert!(t(r"$^|^$").is_line_anchored_start()); |
3031 | | assert!(t(r"$^|^$").is_line_anchored_end()); |
3032 | | |
3033 | | assert!(t(r"\b^").is_anchored_start()); |
3034 | | assert!(t(r"$\b").is_anchored_end()); |
3035 | | assert!(t(r"\b^").is_line_anchored_start()); |
3036 | | assert!(t(r"$\b").is_line_anchored_end()); |
3037 | | assert!(t(r"^(?m:^)").is_anchored_start()); |
3038 | | assert!(t(r"(?m:$)$").is_anchored_end()); |
3039 | | assert!(t(r"^(?m:^)").is_line_anchored_start()); |
3040 | | assert!(t(r"(?m:$)$").is_line_anchored_end()); |
3041 | | assert!(t(r"(?m:^)^").is_anchored_start()); |
3042 | | assert!(t(r"$(?m:$)").is_anchored_end()); |
3043 | | assert!(t(r"(?m:^)^").is_line_anchored_start()); |
3044 | | assert!(t(r"$(?m:$)").is_line_anchored_end()); |
3045 | | |
3046 | | // Negative examples. |
3047 | | assert!(!t(r"(?m)^").is_anchored_start()); |
3048 | | assert!(!t(r"(?m)$").is_anchored_end()); |
3049 | | assert!(!t(r"(?m:^$)|$^").is_anchored_start()); |
3050 | | assert!(!t(r"(?m:^$)|$^").is_anchored_end()); |
3051 | | assert!(!t(r"$^|(?m:^$)").is_anchored_start()); |
3052 | | assert!(!t(r"$^|(?m:^$)").is_anchored_end()); |
3053 | | |
3054 | | assert!(!t(r"a^").is_anchored_start()); |
3055 | | assert!(!t(r"$a").is_anchored_start()); |
3056 | | assert!(!t(r"a^").is_line_anchored_start()); |
3057 | | assert!(!t(r"$a").is_line_anchored_start()); |
3058 | | |
3059 | | assert!(!t(r"a^").is_anchored_end()); |
3060 | | assert!(!t(r"$a").is_anchored_end()); |
3061 | | assert!(!t(r"a^").is_line_anchored_end()); |
3062 | | assert!(!t(r"$a").is_line_anchored_end()); |
3063 | | |
3064 | | assert!(!t(r"^foo|bar").is_anchored_start()); |
3065 | | assert!(!t(r"foo|bar$").is_anchored_end()); |
3066 | | assert!(!t(r"^foo|bar").is_line_anchored_start()); |
3067 | | assert!(!t(r"foo|bar$").is_line_anchored_end()); |
3068 | | |
3069 | | assert!(!t(r"^*").is_anchored_start()); |
3070 | | assert!(!t(r"$*").is_anchored_end()); |
3071 | | assert!(!t(r"^*").is_line_anchored_start()); |
3072 | | assert!(!t(r"$*").is_line_anchored_end()); |
3073 | | assert!(!t(r"^*+").is_anchored_start()); |
3074 | | assert!(!t(r"$*+").is_anchored_end()); |
3075 | | assert!(!t(r"^*+").is_line_anchored_start()); |
3076 | | assert!(!t(r"$*+").is_line_anchored_end()); |
3077 | | assert!(!t(r"^+*").is_anchored_start()); |
3078 | | assert!(!t(r"$+*").is_anchored_end()); |
3079 | | assert!(!t(r"^+*").is_line_anchored_start()); |
3080 | | assert!(!t(r"$+*").is_line_anchored_end()); |
3081 | | assert!(!t(r"(^)*").is_anchored_start()); |
3082 | | assert!(!t(r"($)*").is_anchored_end()); |
3083 | | assert!(!t(r"(^)*").is_line_anchored_start()); |
3084 | | assert!(!t(r"($)*").is_line_anchored_end()); |
3085 | | } |
3086 | | |
3087 | | #[test] |
3088 | | fn analysis_is_line_anchored() { |
3089 | | assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start()); |
3090 | | assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end()); |
3091 | | |
3092 | | assert!(t(r"(?m)^foo|^bar").is_line_anchored_start()); |
3093 | | assert!(t(r"(?m)foo$|bar$").is_line_anchored_end()); |
3094 | | |
3095 | | assert!(t(r"(?m)^").is_line_anchored_start()); |
3096 | | assert!(t(r"(?m)$").is_line_anchored_end()); |
3097 | | |
3098 | | assert!(t(r"(?m:^$)|$^").is_line_anchored_start()); |
3099 | | assert!(t(r"(?m:^$)|$^").is_line_anchored_end()); |
3100 | | |
3101 | | assert!(t(r"$^|(?m:^$)").is_line_anchored_start()); |
3102 | | assert!(t(r"$^|(?m:^$)").is_line_anchored_end()); |
3103 | | } |
3104 | | |
3105 | | #[test] |
3106 | | fn analysis_is_any_anchored() { |
3107 | | // Positive examples. |
3108 | | assert!(t(r"^").is_any_anchored_start()); |
3109 | | assert!(t(r"$").is_any_anchored_end()); |
3110 | | assert!(t(r"\A").is_any_anchored_start()); |
3111 | | assert!(t(r"\z").is_any_anchored_end()); |
3112 | | |
3113 | | // Negative examples. |
3114 | | assert!(!t(r"(?m)^").is_any_anchored_start()); |
3115 | | assert!(!t(r"(?m)$").is_any_anchored_end()); |
3116 | | assert!(!t(r"$").is_any_anchored_start()); |
3117 | | assert!(!t(r"^").is_any_anchored_end()); |
3118 | | } |
3119 | | |
3120 | | #[test] |
3121 | | fn analysis_is_match_empty() { |
3122 | | // Positive examples. |
3123 | | assert!(t(r"").is_match_empty()); |
3124 | | assert!(t(r"()").is_match_empty()); |
3125 | | assert!(t(r"()*").is_match_empty()); |
3126 | | assert!(t(r"()+").is_match_empty()); |
3127 | | assert!(t(r"()?").is_match_empty()); |
3128 | | assert!(t(r"a*").is_match_empty()); |
3129 | | assert!(t(r"a?").is_match_empty()); |
3130 | | assert!(t(r"a{0}").is_match_empty()); |
3131 | | assert!(t(r"a{0,}").is_match_empty()); |
3132 | | assert!(t(r"a{0,1}").is_match_empty()); |
3133 | | assert!(t(r"a{0,10}").is_match_empty()); |
3134 | | #[cfg(feature = "unicode-gencat")] |
3135 | | assert!(t(r"\pL*").is_match_empty()); |
3136 | | assert!(t(r"a*|b").is_match_empty()); |
3137 | | assert!(t(r"b|a*").is_match_empty()); |
3138 | | assert!(t(r"a|").is_match_empty()); |
3139 | | assert!(t(r"|a").is_match_empty()); |
3140 | | assert!(t(r"a||b").is_match_empty()); |
3141 | | assert!(t(r"a*a?(abcd)*").is_match_empty()); |
3142 | | assert!(t(r"^").is_match_empty()); |
3143 | | assert!(t(r"$").is_match_empty()); |
3144 | | assert!(t(r"(?m)^").is_match_empty()); |
3145 | | assert!(t(r"(?m)$").is_match_empty()); |
3146 | | assert!(t(r"\A").is_match_empty()); |
3147 | | assert!(t(r"\z").is_match_empty()); |
3148 | | assert!(t(r"\B").is_match_empty()); |
3149 | | assert!(t_bytes(r"(?-u)\B").is_match_empty()); |
3150 | | assert!(t(r"\b").is_match_empty()); |
3151 | | assert!(t(r"(?-u)\b").is_match_empty()); |
3152 | | |
3153 | | // Negative examples. |
3154 | | assert!(!t(r"a+").is_match_empty()); |
3155 | | assert!(!t(r"a{1}").is_match_empty()); |
3156 | | assert!(!t(r"a{1,}").is_match_empty()); |
3157 | | assert!(!t(r"a{1,2}").is_match_empty()); |
3158 | | assert!(!t(r"a{1,10}").is_match_empty()); |
3159 | | assert!(!t(r"b|a").is_match_empty()); |
3160 | | assert!(!t(r"a*a+(abcd)*").is_match_empty()); |
3161 | | } |
3162 | | |
3163 | | #[test] |
3164 | | fn analysis_is_literal() { |
3165 | | // Positive examples. |
3166 | | assert!(t(r"a").is_literal()); |
3167 | | assert!(t(r"ab").is_literal()); |
3168 | | assert!(t(r"abc").is_literal()); |
3169 | | assert!(t(r"(?m)abc").is_literal()); |
3170 | | |
3171 | | // Negative examples. |
3172 | | assert!(!t(r"").is_literal()); |
3173 | | assert!(!t(r"^").is_literal()); |
3174 | | assert!(!t(r"a|b").is_literal()); |
3175 | | assert!(!t(r"(a)").is_literal()); |
3176 | | assert!(!t(r"a+").is_literal()); |
3177 | | assert!(!t(r"foo(a)").is_literal()); |
3178 | | assert!(!t(r"(a)foo").is_literal()); |
3179 | | assert!(!t(r"[a]").is_literal()); |
3180 | | } |
3181 | | |
3182 | | #[test] |
3183 | | fn analysis_is_alternation_literal() { |
3184 | | // Positive examples. |
3185 | | assert!(t(r"a").is_alternation_literal()); |
3186 | | assert!(t(r"ab").is_alternation_literal()); |
3187 | | assert!(t(r"abc").is_alternation_literal()); |
3188 | | assert!(t(r"(?m)abc").is_alternation_literal()); |
3189 | | assert!(t(r"a|b").is_alternation_literal()); |
3190 | | assert!(t(r"a|b|c").is_alternation_literal()); |
3191 | | assert!(t(r"foo|bar").is_alternation_literal()); |
3192 | | assert!(t(r"foo|bar|baz").is_alternation_literal()); |
3193 | | |
3194 | | // Negative examples. |
3195 | | assert!(!t(r"").is_alternation_literal()); |
3196 | | assert!(!t(r"^").is_alternation_literal()); |
3197 | | assert!(!t(r"(a)").is_alternation_literal()); |
3198 | | assert!(!t(r"a+").is_alternation_literal()); |
3199 | | assert!(!t(r"foo(a)").is_alternation_literal()); |
3200 | | assert!(!t(r"(a)foo").is_alternation_literal()); |
3201 | | assert!(!t(r"[a]").is_alternation_literal()); |
3202 | | assert!(!t(r"[a]|b").is_alternation_literal()); |
3203 | | assert!(!t(r"a|[b]").is_alternation_literal()); |
3204 | | assert!(!t(r"(a)|b").is_alternation_literal()); |
3205 | | assert!(!t(r"a|(b)").is_alternation_literal()); |
3206 | | } |
3207 | | } |