Coverage Report

Created: 2025-10-29 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-syntax-0.6.29/src/hir/translate.rs
Line
Count
Source
1
/*!
2
Defines a translator that converts an `Ast` to an `Hir`.
3
*/
4
5
use std::cell::{Cell, RefCell};
6
use std::result;
7
8
use crate::ast::{self, Ast, Span, Visitor};
9
use crate::hir::{self, Error, ErrorKind, Hir};
10
use crate::unicode::{self, ClassQuery};
11
12
type Result<T> = result::Result<T, Error>;
13
14
/// A builder for constructing an AST->HIR translator.
15
#[derive(Clone, Debug)]
16
pub struct TranslatorBuilder {
17
    allow_invalid_utf8: bool,
18
    flags: Flags,
19
}
20
21
impl Default for TranslatorBuilder {
22
0
    fn default() -> TranslatorBuilder {
23
0
        TranslatorBuilder::new()
24
0
    }
25
}
26
27
impl TranslatorBuilder {
28
    /// Create a new translator builder with a default c onfiguration.
29
0
    pub fn new() -> TranslatorBuilder {
30
0
        TranslatorBuilder {
31
0
            allow_invalid_utf8: false,
32
0
            flags: Flags::default(),
33
0
        }
34
0
    }
35
36
    /// Build a translator using the current configuration.
37
0
    pub fn build(&self) -> Translator {
38
0
        Translator {
39
0
            stack: RefCell::new(vec![]),
40
0
            flags: Cell::new(self.flags),
41
0
            allow_invalid_utf8: self.allow_invalid_utf8,
42
0
        }
43
0
    }
44
45
    /// When enabled, translation will permit the construction of a regular
46
    /// expression that may match invalid UTF-8.
47
    ///
48
    /// When disabled (the default), the translator is guaranteed to produce
49
    /// an expression that will only ever match valid UTF-8 (otherwise, the
50
    /// translator will return an error).
51
    ///
52
    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53
    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54
    /// the parser to return an error. Namely, a negated ASCII word boundary
55
    /// can result in matching positions that aren't valid UTF-8 boundaries.
56
0
    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57
0
        self.allow_invalid_utf8 = yes;
58
0
        self
59
0
    }
60
61
    /// Enable or disable the case insensitive flag (`i`) by default.
62
0
    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63
0
        self.flags.case_insensitive = if yes { Some(true) } else { None };
64
0
        self
65
0
    }
66
67
    /// Enable or disable the multi-line matching flag (`m`) by default.
68
0
    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69
0
        self.flags.multi_line = if yes { Some(true) } else { None };
70
0
        self
71
0
    }
72
73
    /// Enable or disable the "dot matches any character" flag (`s`) by
74
    /// default.
75
0
    pub fn dot_matches_new_line(
76
0
        &mut self,
77
0
        yes: bool,
78
0
    ) -> &mut TranslatorBuilder {
79
0
        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80
0
        self
81
0
    }
82
83
    /// Enable or disable the "swap greed" flag (`U`) by default.
84
0
    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85
0
        self.flags.swap_greed = if yes { Some(true) } else { None };
86
0
        self
87
0
    }
88
89
    /// Enable or disable the Unicode flag (`u`) by default.
90
0
    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91
0
        self.flags.unicode = if yes { None } else { Some(false) };
92
0
        self
93
0
    }
94
}
95
96
/// A translator maps abstract syntax to a high level intermediate
97
/// representation.
98
///
99
/// A translator may be benefit from reuse. That is, a translator can translate
100
/// many abstract syntax trees.
101
///
102
/// A `Translator` can be configured in more detail via a
103
/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104
#[derive(Clone, Debug)]
105
pub struct Translator {
106
    /// Our call stack, but on the heap.
107
    stack: RefCell<Vec<HirFrame>>,
108
    /// The current flag settings.
109
    flags: Cell<Flags>,
110
    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111
    allow_invalid_utf8: bool,
112
}
113
114
impl Translator {
115
    /// Create a new translator using the default configuration.
116
0
    pub fn new() -> Translator {
117
0
        TranslatorBuilder::new().build()
118
0
    }
119
120
    /// Translate the given abstract syntax tree (AST) into a high level
121
    /// intermediate representation (HIR).
122
    ///
123
    /// If there was a problem doing the translation, then an HIR-specific
124
    /// error is returned.
125
    ///
126
    /// The original pattern string used to produce the `Ast` *must* also be
127
    /// provided. The translator does not use the pattern string during any
128
    /// correct translation, but is used for error reporting.
129
0
    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130
0
        ast::visit(ast, TranslatorI::new(self, pattern))
131
0
    }
132
}
133
134
/// An HirFrame is a single stack frame, represented explicitly, which is
135
/// created for each item in the Ast that we traverse.
136
///
137
/// Note that technically, this type doesn't represent our entire stack
138
/// frame. In particular, the Ast visitor represents any state associated with
139
/// traversing the Ast itself.
140
#[derive(Clone, Debug)]
141
enum HirFrame {
142
    /// An arbitrary HIR expression. These get pushed whenever we hit a base
143
    /// case in the Ast. They get popped after an inductive (i.e., recursive)
144
    /// step is complete.
145
    Expr(Hir),
146
    /// A Unicode character class. This frame is mutated as we descend into
147
    /// the Ast of a character class (which is itself its own mini recursive
148
    /// structure).
149
    ClassUnicode(hir::ClassUnicode),
150
    /// A byte-oriented character class. This frame is mutated as we descend
151
    /// into the Ast of a character class (which is itself its own mini
152
    /// recursive structure).
153
    ///
154
    /// Byte character classes are created when Unicode mode (`u`) is disabled.
155
    /// If `allow_invalid_utf8` is disabled (the default), then a byte
156
    /// character is only permitted to match ASCII text.
157
    ClassBytes(hir::ClassBytes),
158
    /// This is pushed on to the stack upon first seeing any kind of group,
159
    /// indicated by parentheses (including non-capturing groups). It is popped
160
    /// upon leaving a group.
161
    Group {
162
        /// The old active flags when this group was opened.
163
        ///
164
        /// If this group sets flags, then the new active flags are set to the
165
        /// result of merging the old flags with the flags introduced by this
166
        /// group. If the group doesn't set any flags, then this is simply
167
        /// equivalent to whatever flags were set when the group was opened.
168
        ///
169
        /// When this group is popped, the active flags should be restored to
170
        /// the flags set here.
171
        ///
172
        /// The "active" flags correspond to whatever flags are set in the
173
        /// Translator.
174
        old_flags: Flags,
175
    },
176
    /// This is pushed whenever a concatenation is observed. After visiting
177
    /// every sub-expression in the concatenation, the translator's stack is
178
    /// popped until it sees a Concat frame.
179
    Concat,
180
    /// This is pushed whenever an alternation is observed. After visiting
181
    /// every sub-expression in the alternation, the translator's stack is
182
    /// popped until it sees an Alternation frame.
183
    Alternation,
184
}
185
186
impl HirFrame {
187
    /// Assert that the current stack frame is an Hir expression and return it.
188
0
    fn unwrap_expr(self) -> Hir {
189
0
        match self {
190
0
            HirFrame::Expr(expr) => expr,
191
0
            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192
        }
193
0
    }
194
195
    /// Assert that the current stack frame is a Unicode class expression and
196
    /// return it.
197
0
    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198
0
        match self {
199
0
            HirFrame::ClassUnicode(cls) => cls,
200
0
            _ => panic!(
201
0
                "tried to unwrap Unicode class \
202
0
                 from HirFrame, got: {:?}",
203
                self
204
            ),
205
        }
206
0
    }
207
208
    /// Assert that the current stack frame is a byte class expression and
209
    /// return it.
210
0
    fn unwrap_class_bytes(self) -> hir::ClassBytes {
211
0
        match self {
212
0
            HirFrame::ClassBytes(cls) => cls,
213
0
            _ => panic!(
214
0
                "tried to unwrap byte class \
215
0
                 from HirFrame, got: {:?}",
216
                self
217
            ),
218
        }
219
0
    }
220
221
    /// Assert that the current stack frame is a group indicator and return
222
    /// its corresponding flags (the flags that were active at the time the
223
    /// group was entered).
224
0
    fn unwrap_group(self) -> Flags {
225
0
        match self {
226
0
            HirFrame::Group { old_flags } => old_flags,
227
            _ => {
228
0
                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229
            }
230
        }
231
0
    }
232
}
233
234
impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235
    type Output = Hir;
236
    type Err = Error;
237
238
0
    fn finish(self) -> Result<Hir> {
239
        // ... otherwise, we should have exactly one HIR on the stack.
240
0
        assert_eq!(self.trans().stack.borrow().len(), 1);
241
0
        Ok(self.pop().unwrap().unwrap_expr())
242
0
    }
243
244
0
    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245
0
        match *ast {
246
            Ast::Class(ast::Class::Bracketed(_)) => {
247
0
                if self.flags().unicode() {
248
0
                    let cls = hir::ClassUnicode::empty();
249
0
                    self.push(HirFrame::ClassUnicode(cls));
250
0
                } else {
251
0
                    let cls = hir::ClassBytes::empty();
252
0
                    self.push(HirFrame::ClassBytes(cls));
253
0
                }
254
            }
255
0
            Ast::Group(ref x) => {
256
0
                let old_flags = x
257
0
                    .flags()
258
0
                    .map(|ast| self.set_flags(ast))
259
0
                    .unwrap_or_else(|| self.flags());
260
0
                self.push(HirFrame::Group { old_flags });
261
            }
262
0
            Ast::Concat(ref x) if x.asts.is_empty() => {}
263
0
            Ast::Concat(_) => {
264
0
                self.push(HirFrame::Concat);
265
0
            }
266
0
            Ast::Alternation(ref x) if x.asts.is_empty() => {}
267
0
            Ast::Alternation(_) => {
268
0
                self.push(HirFrame::Alternation);
269
0
            }
270
0
            _ => {}
271
        }
272
0
        Ok(())
273
0
    }
274
275
0
    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276
0
        match *ast {
277
0
            Ast::Empty(_) => {
278
0
                self.push(HirFrame::Expr(Hir::empty()));
279
0
            }
280
0
            Ast::Flags(ref x) => {
281
0
                self.set_flags(&x.flags);
282
0
                // Flags in the AST are generally considered directives and
283
0
                // not actual sub-expressions. However, they can be used in
284
0
                // the concrete syntax like `((?i))`, and we need some kind of
285
0
                // indication of an expression there, and Empty is the correct
286
0
                // choice.
287
0
                //
288
0
                // There can also be things like `(?i)+`, but we rule those out
289
0
                // in the parser. In the future, we might allow them for
290
0
                // consistency sake.
291
0
                self.push(HirFrame::Expr(Hir::empty()));
292
0
            }
293
0
            Ast::Literal(ref x) => {
294
0
                self.push(HirFrame::Expr(self.hir_literal(x)?));
295
            }
296
0
            Ast::Dot(span) => {
297
0
                self.push(HirFrame::Expr(self.hir_dot(span)?));
298
            }
299
0
            Ast::Assertion(ref x) => {
300
0
                self.push(HirFrame::Expr(self.hir_assertion(x)?));
301
            }
302
0
            Ast::Class(ast::Class::Perl(ref x)) => {
303
0
                if self.flags().unicode() {
304
0
                    let cls = self.hir_perl_unicode_class(x)?;
305
0
                    let hcls = hir::Class::Unicode(cls);
306
0
                    self.push(HirFrame::Expr(Hir::class(hcls)));
307
0
                } else {
308
0
                    let cls = self.hir_perl_byte_class(x);
309
0
                    let hcls = hir::Class::Bytes(cls);
310
0
                    self.push(HirFrame::Expr(Hir::class(hcls)));
311
0
                }
312
            }
313
0
            Ast::Class(ast::Class::Unicode(ref x)) => {
314
0
                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315
0
                self.push(HirFrame::Expr(Hir::class(cls)));
316
            }
317
0
            Ast::Class(ast::Class::Bracketed(ref ast)) => {
318
0
                if self.flags().unicode() {
319
0
                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
320
0
                    self.unicode_fold_and_negate(
321
0
                        &ast.span,
322
0
                        ast.negated,
323
0
                        &mut cls,
324
0
                    )?;
325
0
                    if cls.ranges().is_empty() {
326
0
                        return Err(self.error(
327
0
                            ast.span,
328
0
                            ErrorKind::EmptyClassNotAllowed,
329
0
                        ));
330
0
                    }
331
0
                    let expr = Hir::class(hir::Class::Unicode(cls));
332
0
                    self.push(HirFrame::Expr(expr));
333
                } else {
334
0
                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
335
0
                    self.bytes_fold_and_negate(
336
0
                        &ast.span,
337
0
                        ast.negated,
338
0
                        &mut cls,
339
0
                    )?;
340
0
                    if cls.ranges().is_empty() {
341
0
                        return Err(self.error(
342
0
                            ast.span,
343
0
                            ErrorKind::EmptyClassNotAllowed,
344
0
                        ));
345
0
                    }
346
347
0
                    let expr = Hir::class(hir::Class::Bytes(cls));
348
0
                    self.push(HirFrame::Expr(expr));
349
                }
350
            }
351
0
            Ast::Repetition(ref x) => {
352
0
                let expr = self.pop().unwrap().unwrap_expr();
353
0
                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354
0
            }
355
0
            Ast::Group(ref x) => {
356
0
                let expr = self.pop().unwrap().unwrap_expr();
357
0
                let old_flags = self.pop().unwrap().unwrap_group();
358
0
                self.trans().flags.set(old_flags);
359
0
                self.push(HirFrame::Expr(self.hir_group(x, expr)));
360
0
            }
361
            Ast::Concat(_) => {
362
0
                let mut exprs = vec![];
363
0
                while let Some(HirFrame::Expr(expr)) = self.pop() {
364
0
                    if !expr.kind().is_empty() {
365
0
                        exprs.push(expr);
366
0
                    }
367
                }
368
0
                exprs.reverse();
369
0
                self.push(HirFrame::Expr(Hir::concat(exprs)));
370
            }
371
            Ast::Alternation(_) => {
372
0
                let mut exprs = vec![];
373
0
                while let Some(HirFrame::Expr(expr)) = self.pop() {
374
0
                    exprs.push(expr);
375
0
                }
376
0
                exprs.reverse();
377
0
                self.push(HirFrame::Expr(Hir::alternation(exprs)));
378
            }
379
        }
380
0
        Ok(())
381
0
    }
382
383
0
    fn visit_class_set_item_pre(
384
0
        &mut self,
385
0
        ast: &ast::ClassSetItem,
386
0
    ) -> Result<()> {
387
0
        match *ast {
388
            ast::ClassSetItem::Bracketed(_) => {
389
0
                if self.flags().unicode() {
390
0
                    let cls = hir::ClassUnicode::empty();
391
0
                    self.push(HirFrame::ClassUnicode(cls));
392
0
                } else {
393
0
                    let cls = hir::ClassBytes::empty();
394
0
                    self.push(HirFrame::ClassBytes(cls));
395
0
                }
396
            }
397
            // We needn't handle the Union case here since the visitor will
398
            // do it for us.
399
0
            _ => {}
400
        }
401
0
        Ok(())
402
0
    }
403
404
0
    fn visit_class_set_item_post(
405
0
        &mut self,
406
0
        ast: &ast::ClassSetItem,
407
0
    ) -> Result<()> {
408
0
        match *ast {
409
0
            ast::ClassSetItem::Empty(_) => {}
410
0
            ast::ClassSetItem::Literal(ref x) => {
411
0
                if self.flags().unicode() {
412
0
                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
413
0
                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414
0
                    self.push(HirFrame::ClassUnicode(cls));
415
0
                } else {
416
0
                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
417
0
                    let byte = self.class_literal_byte(x)?;
418
0
                    cls.push(hir::ClassBytesRange::new(byte, byte));
419
0
                    self.push(HirFrame::ClassBytes(cls));
420
                }
421
            }
422
0
            ast::ClassSetItem::Range(ref x) => {
423
0
                if self.flags().unicode() {
424
0
                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
425
0
                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426
0
                    self.push(HirFrame::ClassUnicode(cls));
427
0
                } else {
428
0
                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
429
0
                    let start = self.class_literal_byte(&x.start)?;
430
0
                    let end = self.class_literal_byte(&x.end)?;
431
0
                    cls.push(hir::ClassBytesRange::new(start, end));
432
0
                    self.push(HirFrame::ClassBytes(cls));
433
                }
434
            }
435
0
            ast::ClassSetItem::Ascii(ref x) => {
436
0
                if self.flags().unicode() {
437
0
                    let xcls = self.hir_ascii_unicode_class(x)?;
438
0
                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
439
0
                    cls.union(&xcls);
440
0
                    self.push(HirFrame::ClassUnicode(cls));
441
                } else {
442
0
                    let xcls = self.hir_ascii_byte_class(x)?;
443
0
                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
444
0
                    cls.union(&xcls);
445
0
                    self.push(HirFrame::ClassBytes(cls));
446
                }
447
            }
448
0
            ast::ClassSetItem::Unicode(ref x) => {
449
0
                let xcls = self.hir_unicode_class(x)?;
450
0
                let mut cls = self.pop().unwrap().unwrap_class_unicode();
451
0
                cls.union(&xcls);
452
0
                self.push(HirFrame::ClassUnicode(cls));
453
            }
454
0
            ast::ClassSetItem::Perl(ref x) => {
455
0
                if self.flags().unicode() {
456
0
                    let xcls = self.hir_perl_unicode_class(x)?;
457
0
                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
458
0
                    cls.union(&xcls);
459
0
                    self.push(HirFrame::ClassUnicode(cls));
460
0
                } else {
461
0
                    let xcls = self.hir_perl_byte_class(x);
462
0
                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
463
0
                    cls.union(&xcls);
464
0
                    self.push(HirFrame::ClassBytes(cls));
465
0
                }
466
            }
467
0
            ast::ClassSetItem::Bracketed(ref ast) => {
468
0
                if self.flags().unicode() {
469
0
                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
470
0
                    self.unicode_fold_and_negate(
471
0
                        &ast.span,
472
0
                        ast.negated,
473
0
                        &mut cls1,
474
0
                    )?;
475
476
0
                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
477
0
                    cls2.union(&cls1);
478
0
                    self.push(HirFrame::ClassUnicode(cls2));
479
                } else {
480
0
                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
481
0
                    self.bytes_fold_and_negate(
482
0
                        &ast.span,
483
0
                        ast.negated,
484
0
                        &mut cls1,
485
0
                    )?;
486
487
0
                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
488
0
                    cls2.union(&cls1);
489
0
                    self.push(HirFrame::ClassBytes(cls2));
490
                }
491
            }
492
            // This is handled automatically by the visitor.
493
0
            ast::ClassSetItem::Union(_) => {}
494
        }
495
0
        Ok(())
496
0
    }
497
498
0
    fn visit_class_set_binary_op_pre(
499
0
        &mut self,
500
0
        _op: &ast::ClassSetBinaryOp,
501
0
    ) -> Result<()> {
502
0
        if self.flags().unicode() {
503
0
            let cls = hir::ClassUnicode::empty();
504
0
            self.push(HirFrame::ClassUnicode(cls));
505
0
        } else {
506
0
            let cls = hir::ClassBytes::empty();
507
0
            self.push(HirFrame::ClassBytes(cls));
508
0
        }
509
0
        Ok(())
510
0
    }
511
512
0
    fn visit_class_set_binary_op_in(
513
0
        &mut self,
514
0
        _op: &ast::ClassSetBinaryOp,
515
0
    ) -> Result<()> {
516
0
        if self.flags().unicode() {
517
0
            let cls = hir::ClassUnicode::empty();
518
0
            self.push(HirFrame::ClassUnicode(cls));
519
0
        } else {
520
0
            let cls = hir::ClassBytes::empty();
521
0
            self.push(HirFrame::ClassBytes(cls));
522
0
        }
523
0
        Ok(())
524
0
    }
525
526
0
    fn visit_class_set_binary_op_post(
527
0
        &mut self,
528
0
        op: &ast::ClassSetBinaryOp,
529
0
    ) -> Result<()> {
530
        use crate::ast::ClassSetBinaryOpKind::*;
531
532
0
        if self.flags().unicode() {
533
0
            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
534
0
            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
535
0
            let mut cls = self.pop().unwrap().unwrap_class_unicode();
536
0
            if self.flags().case_insensitive() {
537
0
                rhs.try_case_fold_simple().map_err(|_| {
538
0
                    self.error(
539
0
                        op.rhs.span().clone(),
540
0
                        ErrorKind::UnicodeCaseUnavailable,
541
                    )
542
0
                })?;
543
0
                lhs.try_case_fold_simple().map_err(|_| {
544
0
                    self.error(
545
0
                        op.lhs.span().clone(),
546
0
                        ErrorKind::UnicodeCaseUnavailable,
547
                    )
548
0
                })?;
549
0
            }
550
0
            match op.kind {
551
0
                Intersection => lhs.intersect(&rhs),
552
0
                Difference => lhs.difference(&rhs),
553
0
                SymmetricDifference => lhs.symmetric_difference(&rhs),
554
            }
555
0
            cls.union(&lhs);
556
0
            self.push(HirFrame::ClassUnicode(cls));
557
        } else {
558
0
            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
559
0
            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
560
0
            let mut cls = self.pop().unwrap().unwrap_class_bytes();
561
0
            if self.flags().case_insensitive() {
562
0
                rhs.case_fold_simple();
563
0
                lhs.case_fold_simple();
564
0
            }
565
0
            match op.kind {
566
0
                Intersection => lhs.intersect(&rhs),
567
0
                Difference => lhs.difference(&rhs),
568
0
                SymmetricDifference => lhs.symmetric_difference(&rhs),
569
            }
570
0
            cls.union(&lhs);
571
0
            self.push(HirFrame::ClassBytes(cls));
572
        }
573
0
        Ok(())
574
0
    }
575
}
576
577
/// The internal implementation of a translator.
578
///
579
/// This type is responsible for carrying around the original pattern string,
580
/// which is not tied to the internal state of a translator.
581
///
582
/// A TranslatorI exists for the time it takes to translate a single Ast.
583
#[derive(Clone, Debug)]
584
struct TranslatorI<'t, 'p> {
585
    trans: &'t Translator,
586
    pattern: &'p str,
587
}
588
589
impl<'t, 'p> TranslatorI<'t, 'p> {
590
    /// Build a new internal translator.
591
0
    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
592
0
        TranslatorI { trans, pattern }
593
0
    }
594
595
    /// Return a reference to the underlying translator.
596
0
    fn trans(&self) -> &Translator {
597
0
        &self.trans
598
0
    }
599
600
    /// Push the given frame on to the call stack.
601
0
    fn push(&self, frame: HirFrame) {
602
0
        self.trans().stack.borrow_mut().push(frame);
603
0
    }
604
605
    /// Pop the top of the call stack. If the call stack is empty, return None.
606
0
    fn pop(&self) -> Option<HirFrame> {
607
0
        self.trans().stack.borrow_mut().pop()
608
0
    }
609
610
    /// Create a new error with the given span and error type.
611
0
    fn error(&self, span: Span, kind: ErrorKind) -> Error {
612
0
        Error { kind, pattern: self.pattern.to_string(), span }
613
0
    }
614
615
    /// Return a copy of the active flags.
616
0
    fn flags(&self) -> Flags {
617
0
        self.trans().flags.get()
618
0
    }
619
620
    /// Set the flags of this translator from the flags set in the given AST.
621
    /// Then, return the old flags.
622
0
    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
623
0
        let old_flags = self.flags();
624
0
        let mut new_flags = Flags::from_ast(ast_flags);
625
0
        new_flags.merge(&old_flags);
626
0
        self.trans().flags.set(new_flags);
627
0
        old_flags
628
0
    }
629
630
0
    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
631
0
        let ch = match self.literal_to_char(lit)? {
632
0
            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
633
0
            hir::Literal::Unicode(ch) => ch,
634
        };
635
0
        if self.flags().case_insensitive() {
636
0
            self.hir_from_char_case_insensitive(lit.span, ch)
637
        } else {
638
0
            self.hir_from_char(lit.span, ch)
639
        }
640
0
    }
641
642
    /// Convert an Ast literal to its scalar representation.
643
    ///
644
    /// When Unicode mode is enabled, then this always succeeds and returns a
645
    /// `char` (Unicode scalar value).
646
    ///
647
    /// When Unicode mode is disabled, then a raw byte is returned. If that
648
    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
649
    /// an error.
650
0
    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
651
0
        if self.flags().unicode() {
652
0
            return Ok(hir::Literal::Unicode(lit.c));
653
0
        }
654
0
        let byte = match lit.byte() {
655
0
            None => return Ok(hir::Literal::Unicode(lit.c)),
656
0
            Some(byte) => byte,
657
        };
658
0
        if byte <= 0x7F {
659
0
            return Ok(hir::Literal::Unicode(byte as char));
660
0
        }
661
0
        if !self.trans().allow_invalid_utf8 {
662
0
            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
663
0
        }
664
0
        Ok(hir::Literal::Byte(byte))
665
0
    }
666
667
0
    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
668
0
        if !self.flags().unicode() && c.len_utf8() > 1 {
669
0
            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
670
0
        }
671
0
        Ok(Hir::literal(hir::Literal::Unicode(c)))
672
0
    }
673
674
0
    fn hir_from_char_case_insensitive(
675
0
        &self,
676
0
        span: Span,
677
0
        c: char,
678
0
    ) -> Result<Hir> {
679
0
        if self.flags().unicode() {
680
            // If case folding won't do anything, then don't bother trying.
681
0
            let map =
682
0
                unicode::contains_simple_case_mapping(c, c).map_err(|_| {
683
0
                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
684
0
                })?;
685
0
            if !map {
686
0
                return self.hir_from_char(span, c);
687
0
            }
688
0
            let mut cls =
689
0
                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
690
0
                    c, c,
691
                )]);
692
0
            cls.try_case_fold_simple().map_err(|_| {
693
0
                self.error(span, ErrorKind::UnicodeCaseUnavailable)
694
0
            })?;
695
0
            Ok(Hir::class(hir::Class::Unicode(cls)))
696
        } else {
697
0
            if c.len_utf8() > 1 {
698
0
                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
699
0
            }
700
            // If case folding won't do anything, then don't bother trying.
701
0
            match c {
702
0
                'A'..='Z' | 'a'..='z' => {}
703
0
                _ => return self.hir_from_char(span, c),
704
            }
705
0
            let mut cls =
706
0
                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
707
0
                    c as u8, c as u8,
708
                )]);
709
0
            cls.case_fold_simple();
710
0
            Ok(Hir::class(hir::Class::Bytes(cls)))
711
        }
712
0
    }
713
714
0
    fn hir_dot(&self, span: Span) -> Result<Hir> {
715
0
        let unicode = self.flags().unicode();
716
0
        if !unicode && !self.trans().allow_invalid_utf8 {
717
0
            return Err(self.error(span, ErrorKind::InvalidUtf8));
718
0
        }
719
0
        Ok(if self.flags().dot_matches_new_line() {
720
0
            Hir::any(!unicode)
721
        } else {
722
0
            Hir::dot(!unicode)
723
        })
724
0
    }
725
726
0
    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
727
0
        let unicode = self.flags().unicode();
728
0
        let multi_line = self.flags().multi_line();
729
0
        Ok(match asst.kind {
730
0
            ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
731
0
                hir::Anchor::StartLine
732
            } else {
733
0
                hir::Anchor::StartText
734
            }),
735
0
            ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
736
0
                hir::Anchor::EndLine
737
            } else {
738
0
                hir::Anchor::EndText
739
            }),
740
            ast::AssertionKind::StartText => {
741
0
                Hir::anchor(hir::Anchor::StartText)
742
            }
743
0
            ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
744
            ast::AssertionKind::WordBoundary => {
745
0
                Hir::word_boundary(if unicode {
746
0
                    hir::WordBoundary::Unicode
747
                } else {
748
0
                    hir::WordBoundary::Ascii
749
                })
750
            }
751
            ast::AssertionKind::NotWordBoundary => {
752
0
                Hir::word_boundary(if unicode {
753
0
                    hir::WordBoundary::UnicodeNegate
754
                } else {
755
                    // It is possible for negated ASCII word boundaries to
756
                    // match at invalid UTF-8 boundaries, even when searching
757
                    // valid UTF-8.
758
0
                    if !self.trans().allow_invalid_utf8 {
759
0
                        return Err(
760
0
                            self.error(asst.span, ErrorKind::InvalidUtf8)
761
0
                        );
762
0
                    }
763
0
                    hir::WordBoundary::AsciiNegate
764
                })
765
            }
766
        })
767
0
    }
768
769
0
    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
770
0
        let kind = match group.kind {
771
0
            ast::GroupKind::CaptureIndex(idx) => {
772
0
                hir::GroupKind::CaptureIndex(idx)
773
            }
774
0
            ast::GroupKind::CaptureName(ref capname) => {
775
0
                hir::GroupKind::CaptureName {
776
0
                    name: capname.name.clone(),
777
0
                    index: capname.index,
778
0
                }
779
            }
780
0
            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
781
        };
782
0
        Hir::group(hir::Group { kind, hir: Box::new(expr) })
783
0
    }
784
785
0
    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
786
0
        let kind = match rep.op.kind {
787
0
            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
788
0
            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
789
0
            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
790
0
            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
791
0
                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
792
            }
793
0
            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
794
0
                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
795
            }
796
            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
797
0
                m,
798
0
                n,
799
            )) => {
800
0
                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
801
            }
802
        };
803
0
        let greedy =
804
0
            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
805
0
        Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) })
806
0
    }
807
808
0
    fn hir_unicode_class(
809
0
        &self,
810
0
        ast_class: &ast::ClassUnicode,
811
0
    ) -> Result<hir::ClassUnicode> {
812
        use crate::ast::ClassUnicodeKind::*;
813
814
0
        if !self.flags().unicode() {
815
0
            return Err(
816
0
                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
817
0
            );
818
0
        }
819
0
        let query = match ast_class.kind {
820
0
            OneLetter(name) => ClassQuery::OneLetter(name),
821
0
            Named(ref name) => ClassQuery::Binary(name),
822
0
            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
823
0
                property_name: name,
824
0
                property_value: value,
825
0
            },
826
        };
827
0
        let mut result = self.convert_unicode_class_error(
828
0
            &ast_class.span,
829
0
            unicode::class(query),
830
        );
831
0
        if let Ok(ref mut class) = result {
832
0
            self.unicode_fold_and_negate(
833
0
                &ast_class.span,
834
0
                ast_class.negated,
835
0
                class,
836
0
            )?;
837
0
            if class.ranges().is_empty() {
838
0
                let err = self
839
0
                    .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
840
0
                return Err(err);
841
0
            }
842
0
        }
843
0
        result
844
0
    }
845
846
0
    fn hir_ascii_unicode_class(
847
0
        &self,
848
0
        ast: &ast::ClassAscii,
849
0
    ) -> Result<hir::ClassUnicode> {
850
0
        let mut cls = hir::ClassUnicode::new(
851
0
            ascii_class(&ast.kind)
852
0
                .iter()
853
0
                .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
854
        );
855
0
        self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
856
0
        Ok(cls)
857
0
    }
858
859
0
    fn hir_ascii_byte_class(
860
0
        &self,
861
0
        ast: &ast::ClassAscii,
862
0
    ) -> Result<hir::ClassBytes> {
863
0
        let mut cls = hir::ClassBytes::new(
864
0
            ascii_class(&ast.kind)
865
0
                .iter()
866
0
                .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
867
        );
868
0
        self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
869
0
        Ok(cls)
870
0
    }
871
872
0
    fn hir_perl_unicode_class(
873
0
        &self,
874
0
        ast_class: &ast::ClassPerl,
875
0
    ) -> Result<hir::ClassUnicode> {
876
        use crate::ast::ClassPerlKind::*;
877
878
0
        assert!(self.flags().unicode());
879
0
        let result = match ast_class.kind {
880
0
            Digit => unicode::perl_digit(),
881
0
            Space => unicode::perl_space(),
882
0
            Word => unicode::perl_word(),
883
        };
884
0
        let mut class =
885
0
            self.convert_unicode_class_error(&ast_class.span, result)?;
886
        // We needn't apply case folding here because the Perl Unicode classes
887
        // are already closed under Unicode simple case folding.
888
0
        if ast_class.negated {
889
0
            class.negate();
890
0
        }
891
0
        Ok(class)
892
0
    }
893
894
0
    fn hir_perl_byte_class(
895
0
        &self,
896
0
        ast_class: &ast::ClassPerl,
897
0
    ) -> hir::ClassBytes {
898
        use crate::ast::ClassPerlKind::*;
899
900
0
        assert!(!self.flags().unicode());
901
0
        let mut class = match ast_class.kind {
902
0
            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
903
0
            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
904
0
            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
905
        };
906
        // We needn't apply case folding here because the Perl ASCII classes
907
        // are already closed (under ASCII case folding).
908
0
        if ast_class.negated {
909
0
            class.negate();
910
0
        }
911
0
        class
912
0
    }
913
914
    /// Converts the given Unicode specific error to an HIR translation error.
915
    ///
916
    /// The span given should approximate the position at which an error would
917
    /// occur.
918
0
    fn convert_unicode_class_error(
919
0
        &self,
920
0
        span: &Span,
921
0
        result: unicode::Result<hir::ClassUnicode>,
922
0
    ) -> Result<hir::ClassUnicode> {
923
0
        result.map_err(|err| {
924
0
            let sp = span.clone();
925
0
            match err {
926
                unicode::Error::PropertyNotFound => {
927
0
                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
928
                }
929
                unicode::Error::PropertyValueNotFound => {
930
0
                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
931
                }
932
                unicode::Error::PerlClassNotFound => {
933
0
                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
934
                }
935
            }
936
0
        })
937
0
    }
938
939
0
    fn unicode_fold_and_negate(
940
0
        &self,
941
0
        span: &Span,
942
0
        negated: bool,
943
0
        class: &mut hir::ClassUnicode,
944
0
    ) -> Result<()> {
945
        // Note that we must apply case folding before negation!
946
        // Consider `(?i)[^x]`. If we applied negation field, then
947
        // the result would be the character class that matched any
948
        // Unicode scalar value.
949
0
        if self.flags().case_insensitive() {
950
0
            class.try_case_fold_simple().map_err(|_| {
951
0
                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
952
0
            })?;
953
0
        }
954
0
        if negated {
955
0
            class.negate();
956
0
        }
957
0
        Ok(())
958
0
    }
959
960
0
    fn bytes_fold_and_negate(
961
0
        &self,
962
0
        span: &Span,
963
0
        negated: bool,
964
0
        class: &mut hir::ClassBytes,
965
0
    ) -> Result<()> {
966
        // Note that we must apply case folding before negation!
967
        // Consider `(?i)[^x]`. If we applied negation first, then
968
        // the result would be the character class that matched any
969
        // Unicode scalar value.
970
0
        if self.flags().case_insensitive() {
971
0
            class.case_fold_simple();
972
0
        }
973
0
        if negated {
974
0
            class.negate();
975
0
        }
976
0
        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
977
0
            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
978
0
        }
979
0
        Ok(())
980
0
    }
981
982
    /// Return a scalar byte value suitable for use as a literal in a byte
983
    /// character class.
984
0
    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
985
0
        match self.literal_to_char(ast)? {
986
0
            hir::Literal::Byte(byte) => Ok(byte),
987
0
            hir::Literal::Unicode(ch) => {
988
0
                if ch <= 0x7F as char {
989
0
                    Ok(ch as u8)
990
                } else {
991
                    // We can't feasibly support Unicode in
992
                    // byte oriented classes. Byte classes don't
993
                    // do Unicode case folding.
994
0
                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
995
                }
996
            }
997
        }
998
0
    }
999
}
1000
1001
/// A translator's representation of a regular expression's flags at any given
1002
/// moment in time.
1003
///
1004
/// Each flag can be in one of three states: absent, present but disabled or
1005
/// present but enabled.
1006
#[derive(Clone, Copy, Debug, Default)]
1007
struct Flags {
1008
    case_insensitive: Option<bool>,
1009
    multi_line: Option<bool>,
1010
    dot_matches_new_line: Option<bool>,
1011
    swap_greed: Option<bool>,
1012
    unicode: Option<bool>,
1013
    // Note that `ignore_whitespace` is omitted here because it is handled
1014
    // entirely in the parser.
1015
}
1016
1017
impl Flags {
1018
0
    fn from_ast(ast: &ast::Flags) -> Flags {
1019
0
        let mut flags = Flags::default();
1020
0
        let mut enable = true;
1021
0
        for item in &ast.items {
1022
0
            match item.kind {
1023
0
                ast::FlagsItemKind::Negation => {
1024
0
                    enable = false;
1025
0
                }
1026
0
                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1027
0
                    flags.case_insensitive = Some(enable);
1028
0
                }
1029
0
                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1030
0
                    flags.multi_line = Some(enable);
1031
0
                }
1032
0
                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1033
0
                    flags.dot_matches_new_line = Some(enable);
1034
0
                }
1035
0
                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1036
0
                    flags.swap_greed = Some(enable);
1037
0
                }
1038
0
                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1039
0
                    flags.unicode = Some(enable);
1040
0
                }
1041
0
                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1042
            }
1043
        }
1044
0
        flags
1045
0
    }
1046
1047
0
    fn merge(&mut self, previous: &Flags) {
1048
0
        if self.case_insensitive.is_none() {
1049
0
            self.case_insensitive = previous.case_insensitive;
1050
0
        }
1051
0
        if self.multi_line.is_none() {
1052
0
            self.multi_line = previous.multi_line;
1053
0
        }
1054
0
        if self.dot_matches_new_line.is_none() {
1055
0
            self.dot_matches_new_line = previous.dot_matches_new_line;
1056
0
        }
1057
0
        if self.swap_greed.is_none() {
1058
0
            self.swap_greed = previous.swap_greed;
1059
0
        }
1060
0
        if self.unicode.is_none() {
1061
0
            self.unicode = previous.unicode;
1062
0
        }
1063
0
    }
1064
1065
0
    fn case_insensitive(&self) -> bool {
1066
0
        self.case_insensitive.unwrap_or(false)
1067
0
    }
1068
1069
0
    fn multi_line(&self) -> bool {
1070
0
        self.multi_line.unwrap_or(false)
1071
0
    }
1072
1073
0
    fn dot_matches_new_line(&self) -> bool {
1074
0
        self.dot_matches_new_line.unwrap_or(false)
1075
0
    }
1076
1077
0
    fn swap_greed(&self) -> bool {
1078
0
        self.swap_greed.unwrap_or(false)
1079
0
    }
1080
1081
0
    fn unicode(&self) -> bool {
1082
0
        self.unicode.unwrap_or(true)
1083
0
    }
1084
}
1085
1086
0
fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1087
0
    let ranges: Vec<_> = ascii_class(kind)
1088
0
        .iter()
1089
0
        .cloned()
1090
0
        .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1091
0
        .collect();
1092
0
    hir::ClassBytes::new(ranges)
1093
0
}
1094
1095
0
fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1096
    use crate::ast::ClassAsciiKind::*;
1097
0
    match *kind {
1098
0
        Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1099
0
        Alpha => &[('A', 'Z'), ('a', 'z')],
1100
0
        Ascii => &[('\x00', '\x7F')],
1101
0
        Blank => &[('\t', '\t'), (' ', ' ')],
1102
0
        Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1103
0
        Digit => &[('0', '9')],
1104
0
        Graph => &[('!', '~')],
1105
0
        Lower => &[('a', 'z')],
1106
0
        Print => &[(' ', '~')],
1107
0
        Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1108
0
        Space => &[
1109
0
            ('\t', '\t'),
1110
0
            ('\n', '\n'),
1111
0
            ('\x0B', '\x0B'),
1112
0
            ('\x0C', '\x0C'),
1113
0
            ('\r', '\r'),
1114
0
            (' ', ' '),
1115
0
        ],
1116
0
        Upper => &[('A', 'Z')],
1117
0
        Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1118
0
        Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1119
    }
1120
0
}
1121
1122
#[cfg(test)]
1123
mod tests {
1124
    use crate::ast::parse::ParserBuilder;
1125
    use crate::ast::{self, Ast, Position, Span};
1126
    use crate::hir::{self, Hir, HirKind};
1127
    use crate::unicode::{self, ClassQuery};
1128
1129
    use super::{ascii_class, TranslatorBuilder};
1130
1131
    // We create these errors to compare with real hir::Errors in the tests.
1132
    // We define equality between TestError and hir::Error to disregard the
1133
    // pattern string in hir::Error, which is annoying to provide in tests.
1134
    #[derive(Clone, Debug)]
1135
    struct TestError {
1136
        span: Span,
1137
        kind: hir::ErrorKind,
1138
    }
1139
1140
    impl PartialEq<hir::Error> for TestError {
1141
        fn eq(&self, other: &hir::Error) -> bool {
1142
            self.span == other.span && self.kind == other.kind
1143
        }
1144
    }
1145
1146
    impl PartialEq<TestError> for hir::Error {
1147
        fn eq(&self, other: &TestError) -> bool {
1148
            self.span == other.span && self.kind == other.kind
1149
        }
1150
    }
1151
1152
    fn parse(pattern: &str) -> Ast {
1153
        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1154
    }
1155
1156
    fn t(pattern: &str) -> Hir {
1157
        TranslatorBuilder::new()
1158
            .allow_invalid_utf8(false)
1159
            .build()
1160
            .translate(pattern, &parse(pattern))
1161
            .unwrap()
1162
    }
1163
1164
    fn t_err(pattern: &str) -> hir::Error {
1165
        TranslatorBuilder::new()
1166
            .allow_invalid_utf8(false)
1167
            .build()
1168
            .translate(pattern, &parse(pattern))
1169
            .unwrap_err()
1170
    }
1171
1172
    fn t_bytes(pattern: &str) -> Hir {
1173
        TranslatorBuilder::new()
1174
            .allow_invalid_utf8(true)
1175
            .build()
1176
            .translate(pattern, &parse(pattern))
1177
            .unwrap()
1178
    }
1179
1180
    fn hir_lit(s: &str) -> Hir {
1181
        match s.len() {
1182
            0 => Hir::empty(),
1183
            _ => {
1184
                let lits = s
1185
                    .chars()
1186
                    .map(hir::Literal::Unicode)
1187
                    .map(Hir::literal)
1188
                    .collect();
1189
                Hir::concat(lits)
1190
            }
1191
        }
1192
    }
1193
1194
    fn hir_blit(s: &[u8]) -> Hir {
1195
        match s.len() {
1196
            0 => Hir::empty(),
1197
            1 => Hir::literal(hir::Literal::Byte(s[0])),
1198
            _ => {
1199
                let lits = s
1200
                    .iter()
1201
                    .cloned()
1202
                    .map(hir::Literal::Byte)
1203
                    .map(Hir::literal)
1204
                    .collect();
1205
                Hir::concat(lits)
1206
            }
1207
        }
1208
    }
1209
1210
    fn hir_group(i: u32, expr: Hir) -> Hir {
1211
        Hir::group(hir::Group {
1212
            kind: hir::GroupKind::CaptureIndex(i),
1213
            hir: Box::new(expr),
1214
        })
1215
    }
1216
1217
    fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1218
        Hir::group(hir::Group {
1219
            kind: hir::GroupKind::CaptureName {
1220
                name: name.to_string(),
1221
                index: i,
1222
            },
1223
            hir: Box::new(expr),
1224
        })
1225
    }
1226
1227
    fn hir_group_nocap(expr: Hir) -> Hir {
1228
        Hir::group(hir::Group {
1229
            kind: hir::GroupKind::NonCapturing,
1230
            hir: Box::new(expr),
1231
        })
1232
    }
1233
1234
    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1235
        Hir::repetition(hir::Repetition {
1236
            kind: hir::RepetitionKind::ZeroOrOne,
1237
            greedy,
1238
            hir: Box::new(expr),
1239
        })
1240
    }
1241
1242
    fn hir_star(greedy: bool, expr: Hir) -> Hir {
1243
        Hir::repetition(hir::Repetition {
1244
            kind: hir::RepetitionKind::ZeroOrMore,
1245
            greedy,
1246
            hir: Box::new(expr),
1247
        })
1248
    }
1249
1250
    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1251
        Hir::repetition(hir::Repetition {
1252
            kind: hir::RepetitionKind::OneOrMore,
1253
            greedy,
1254
            hir: Box::new(expr),
1255
        })
1256
    }
1257
1258
    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1259
        Hir::repetition(hir::Repetition {
1260
            kind: hir::RepetitionKind::Range(range),
1261
            greedy,
1262
            hir: Box::new(expr),
1263
        })
1264
    }
1265
1266
    fn hir_alt(alts: Vec<Hir>) -> Hir {
1267
        Hir::alternation(alts)
1268
    }
1269
1270
    fn hir_cat(exprs: Vec<Hir>) -> Hir {
1271
        Hir::concat(exprs)
1272
    }
1273
1274
    #[allow(dead_code)]
1275
    fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1276
        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1277
    }
1278
1279
    #[allow(dead_code)]
1280
    fn hir_uclass_perl_word() -> Hir {
1281
        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1282
    }
1283
1284
    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1285
        let ranges: Vec<hir::ClassUnicodeRange> = ranges
1286
            .iter()
1287
            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1288
            .collect();
1289
        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1290
    }
1291
1292
    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1293
        let ranges: Vec<hir::ClassBytesRange> = ranges
1294
            .iter()
1295
            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1296
            .collect();
1297
        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1298
    }
1299
1300
    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1301
        let ranges: Vec<hir::ClassBytesRange> = ranges
1302
            .iter()
1303
            .map(|&(s, e)| {
1304
                assert!(s as u32 <= 0x7F);
1305
                assert!(e as u32 <= 0x7F);
1306
                hir::ClassBytesRange::new(s as u8, e as u8)
1307
            })
1308
            .collect();
1309
        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1310
    }
1311
1312
    fn hir_case_fold(expr: Hir) -> Hir {
1313
        match expr.into_kind() {
1314
            HirKind::Class(mut cls) => {
1315
                cls.case_fold_simple();
1316
                Hir::class(cls)
1317
            }
1318
            _ => panic!("cannot case fold non-class Hir expr"),
1319
        }
1320
    }
1321
1322
    fn hir_negate(expr: Hir) -> Hir {
1323
        match expr.into_kind() {
1324
            HirKind::Class(mut cls) => {
1325
                cls.negate();
1326
                Hir::class(cls)
1327
            }
1328
            _ => panic!("cannot negate non-class Hir expr"),
1329
        }
1330
    }
1331
1332
    #[allow(dead_code)]
1333
    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1334
        use crate::hir::Class::{Bytes, Unicode};
1335
1336
        match (expr1.into_kind(), expr2.into_kind()) {
1337
            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1338
                c1.union(&c2);
1339
                Hir::class(hir::Class::Unicode(c1))
1340
            }
1341
            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1342
                c1.union(&c2);
1343
                Hir::class(hir::Class::Bytes(c1))
1344
            }
1345
            _ => panic!("cannot union non-class Hir exprs"),
1346
        }
1347
    }
1348
1349
    #[allow(dead_code)]
1350
    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1351
        use crate::hir::Class::{Bytes, Unicode};
1352
1353
        match (expr1.into_kind(), expr2.into_kind()) {
1354
            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1355
                c1.difference(&c2);
1356
                Hir::class(hir::Class::Unicode(c1))
1357
            }
1358
            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1359
                c1.difference(&c2);
1360
                Hir::class(hir::Class::Bytes(c1))
1361
            }
1362
            _ => panic!("cannot difference non-class Hir exprs"),
1363
        }
1364
    }
1365
1366
    fn hir_anchor(anchor: hir::Anchor) -> Hir {
1367
        Hir::anchor(anchor)
1368
    }
1369
1370
    fn hir_word(wb: hir::WordBoundary) -> Hir {
1371
        Hir::word_boundary(wb)
1372
    }
1373
1374
    #[test]
1375
    fn empty() {
1376
        assert_eq!(t(""), Hir::empty());
1377
        assert_eq!(t("(?i)"), Hir::empty());
1378
        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1379
        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1380
        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1381
        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1382
        assert_eq!(
1383
            t("()|()"),
1384
            hir_alt(vec![
1385
                hir_group(1, Hir::empty()),
1386
                hir_group(2, Hir::empty()),
1387
            ])
1388
        );
1389
        assert_eq!(
1390
            t("(|b)"),
1391
            hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1392
        );
1393
        assert_eq!(
1394
            t("(a|)"),
1395
            hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1396
        );
1397
        assert_eq!(
1398
            t("(a||c)"),
1399
            hir_group(
1400
                1,
1401
                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1402
            )
1403
        );
1404
        assert_eq!(
1405
            t("(||)"),
1406
            hir_group(
1407
                1,
1408
                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1409
            )
1410
        );
1411
    }
1412
1413
    #[test]
1414
    fn literal() {
1415
        assert_eq!(t("a"), hir_lit("a"));
1416
        assert_eq!(t("(?-u)a"), hir_lit("a"));
1417
        assert_eq!(t("☃"), hir_lit("☃"));
1418
        assert_eq!(t("abcd"), hir_lit("abcd"));
1419
1420
        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1421
        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1422
        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1423
        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1424
1425
        assert_eq!(
1426
            t_err("(?-u)☃"),
1427
            TestError {
1428
                kind: hir::ErrorKind::UnicodeNotAllowed,
1429
                span: Span::new(
1430
                    Position::new(5, 1, 6),
1431
                    Position::new(8, 1, 7)
1432
                ),
1433
            }
1434
        );
1435
        assert_eq!(
1436
            t_err(r"(?-u)\xFF"),
1437
            TestError {
1438
                kind: hir::ErrorKind::InvalidUtf8,
1439
                span: Span::new(
1440
                    Position::new(5, 1, 6),
1441
                    Position::new(9, 1, 10)
1442
                ),
1443
            }
1444
        );
1445
    }
1446
1447
    #[test]
1448
    fn literal_case_insensitive() {
1449
        #[cfg(feature = "unicode-case")]
1450
        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1451
        #[cfg(feature = "unicode-case")]
1452
        assert_eq!(
1453
            t("(?i:a)"),
1454
            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1455
        );
1456
        #[cfg(feature = "unicode-case")]
1457
        assert_eq!(
1458
            t("a(?i)a(?-i)a"),
1459
            hir_cat(vec![
1460
                hir_lit("a"),
1461
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1462
                hir_lit("a"),
1463
            ])
1464
        );
1465
        #[cfg(feature = "unicode-case")]
1466
        assert_eq!(
1467
            t("(?i)ab@c"),
1468
            hir_cat(vec![
1469
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1470
                hir_uclass(&[('B', 'B'), ('b', 'b')]),
1471
                hir_lit("@"),
1472
                hir_uclass(&[('C', 'C'), ('c', 'c')]),
1473
            ])
1474
        );
1475
        #[cfg(feature = "unicode-case")]
1476
        assert_eq!(
1477
            t("(?i)β"),
1478
            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1479
        );
1480
1481
        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1482
        #[cfg(feature = "unicode-case")]
1483
        assert_eq!(
1484
            t("(?-u)a(?i)a(?-i)a"),
1485
            hir_cat(vec![
1486
                hir_lit("a"),
1487
                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1488
                hir_lit("a"),
1489
            ])
1490
        );
1491
        assert_eq!(
1492
            t("(?i-u)ab@c"),
1493
            hir_cat(vec![
1494
                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1495
                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1496
                hir_lit("@"),
1497
                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1498
            ])
1499
        );
1500
1501
        assert_eq!(
1502
            t_bytes("(?i-u)a"),
1503
            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1504
        );
1505
        assert_eq!(
1506
            t_bytes("(?i-u)\x61"),
1507
            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1508
        );
1509
        assert_eq!(
1510
            t_bytes(r"(?i-u)\x61"),
1511
            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1512
        );
1513
        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1514
1515
        assert_eq!(
1516
            t_err("(?i-u)β"),
1517
            TestError {
1518
                kind: hir::ErrorKind::UnicodeNotAllowed,
1519
                span: Span::new(
1520
                    Position::new(6, 1, 7),
1521
                    Position::new(8, 1, 8),
1522
                ),
1523
            }
1524
        );
1525
    }
1526
1527
    #[test]
1528
    fn dot() {
1529
        assert_eq!(
1530
            t("."),
1531
            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1532
        );
1533
        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1534
        assert_eq!(
1535
            t_bytes("(?-u)."),
1536
            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1537
        );
1538
        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1539
1540
        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1541
        assert_eq!(
1542
            t_err("(?-u)."),
1543
            TestError {
1544
                kind: hir::ErrorKind::InvalidUtf8,
1545
                span: Span::new(
1546
                    Position::new(5, 1, 6),
1547
                    Position::new(6, 1, 7)
1548
                ),
1549
            }
1550
        );
1551
        assert_eq!(
1552
            t_err("(?s-u)."),
1553
            TestError {
1554
                kind: hir::ErrorKind::InvalidUtf8,
1555
                span: Span::new(
1556
                    Position::new(6, 1, 7),
1557
                    Position::new(7, 1, 8)
1558
                ),
1559
            }
1560
        );
1561
    }
1562
1563
    #[test]
1564
    fn assertions() {
1565
        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1566
        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1567
        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1568
        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1569
        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1570
        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1571
        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1572
        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1573
1574
        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1575
        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1576
        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1577
        assert_eq!(
1578
            t_bytes(r"(?-u)\B"),
1579
            hir_word(hir::WordBoundary::AsciiNegate)
1580
        );
1581
1582
        assert_eq!(
1583
            t_err(r"(?-u)\B"),
1584
            TestError {
1585
                kind: hir::ErrorKind::InvalidUtf8,
1586
                span: Span::new(
1587
                    Position::new(5, 1, 6),
1588
                    Position::new(7, 1, 8)
1589
                ),
1590
            }
1591
        );
1592
    }
1593
1594
    #[test]
1595
    fn group() {
1596
        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1597
        assert_eq!(
1598
            t("(a)(b)"),
1599
            hir_cat(vec![
1600
                hir_group(1, hir_lit("a")),
1601
                hir_group(2, hir_lit("b")),
1602
            ])
1603
        );
1604
        assert_eq!(
1605
            t("(a)|(b)"),
1606
            hir_alt(vec![
1607
                hir_group(1, hir_lit("a")),
1608
                hir_group(2, hir_lit("b")),
1609
            ])
1610
        );
1611
        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1612
        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1613
        assert_eq!(
1614
            t("(?P<foo>a)(?P<bar>b)"),
1615
            hir_cat(vec![
1616
                hir_group_name(1, "foo", hir_lit("a")),
1617
                hir_group_name(2, "bar", hir_lit("b")),
1618
            ])
1619
        );
1620
        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1621
        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1622
        assert_eq!(
1623
            t("(?:a)(b)"),
1624
            hir_cat(vec![
1625
                hir_group_nocap(hir_lit("a")),
1626
                hir_group(1, hir_lit("b")),
1627
            ])
1628
        );
1629
        assert_eq!(
1630
            t("(a)(?:b)(c)"),
1631
            hir_cat(vec![
1632
                hir_group(1, hir_lit("a")),
1633
                hir_group_nocap(hir_lit("b")),
1634
                hir_group(2, hir_lit("c")),
1635
            ])
1636
        );
1637
        assert_eq!(
1638
            t("(a)(?P<foo>b)(c)"),
1639
            hir_cat(vec![
1640
                hir_group(1, hir_lit("a")),
1641
                hir_group_name(2, "foo", hir_lit("b")),
1642
                hir_group(3, hir_lit("c")),
1643
            ])
1644
        );
1645
        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1646
        assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1647
        assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1648
        assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1649
    }
1650
1651
    #[test]
1652
    fn flags() {
1653
        #[cfg(feature = "unicode-case")]
1654
        assert_eq!(
1655
            t("(?i:a)a"),
1656
            hir_cat(vec![
1657
                hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1658
                hir_lit("a"),
1659
            ])
1660
        );
1661
        assert_eq!(
1662
            t("(?i-u:a)β"),
1663
            hir_cat(vec![
1664
                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1665
                hir_lit("β"),
1666
            ])
1667
        );
1668
        assert_eq!(
1669
            t("(?:(?i-u)a)b"),
1670
            hir_cat(vec![
1671
                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1672
                hir_lit("b"),
1673
            ])
1674
        );
1675
        assert_eq!(
1676
            t("((?i-u)a)b"),
1677
            hir_cat(vec![
1678
                hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1679
                hir_lit("b"),
1680
            ])
1681
        );
1682
        #[cfg(feature = "unicode-case")]
1683
        assert_eq!(
1684
            t("(?i)(?-i:a)a"),
1685
            hir_cat(vec![
1686
                hir_group_nocap(hir_lit("a")),
1687
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688
            ])
1689
        );
1690
        #[cfg(feature = "unicode-case")]
1691
        assert_eq!(
1692
            t("(?im)a^"),
1693
            hir_cat(vec![
1694
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1695
                hir_anchor(hir::Anchor::StartLine),
1696
            ])
1697
        );
1698
        #[cfg(feature = "unicode-case")]
1699
        assert_eq!(
1700
            t("(?im)a^(?i-m)a^"),
1701
            hir_cat(vec![
1702
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703
                hir_anchor(hir::Anchor::StartLine),
1704
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1705
                hir_anchor(hir::Anchor::StartText),
1706
            ])
1707
        );
1708
        assert_eq!(
1709
            t("(?U)a*a*?(?-U)a*a*?"),
1710
            hir_cat(vec![
1711
                hir_star(false, hir_lit("a")),
1712
                hir_star(true, hir_lit("a")),
1713
                hir_star(true, hir_lit("a")),
1714
                hir_star(false, hir_lit("a")),
1715
            ])
1716
        );
1717
        #[cfg(feature = "unicode-case")]
1718
        assert_eq!(
1719
            t("(?:a(?i)a)a"),
1720
            hir_cat(vec![
1721
                hir_group_nocap(hir_cat(vec![
1722
                    hir_lit("a"),
1723
                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1724
                ])),
1725
                hir_lit("a"),
1726
            ])
1727
        );
1728
        #[cfg(feature = "unicode-case")]
1729
        assert_eq!(
1730
            t("(?i)(?:a(?-i)a)a"),
1731
            hir_cat(vec![
1732
                hir_group_nocap(hir_cat(vec![
1733
                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1734
                    hir_lit("a"),
1735
                ])),
1736
                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1737
            ])
1738
        );
1739
    }
1740
1741
    #[test]
1742
    fn escape() {
1743
        assert_eq!(
1744
            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1745
            hir_lit(r"\.+*?()|[]{}^$#")
1746
        );
1747
    }
1748
1749
    #[test]
1750
    fn repetition() {
1751
        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1752
        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1753
        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1754
        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1755
        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1756
        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1757
1758
        assert_eq!(
1759
            t("a{1}"),
1760
            hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1761
        );
1762
        assert_eq!(
1763
            t("a{1,}"),
1764
            hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1765
        );
1766
        assert_eq!(
1767
            t("a{1,2}"),
1768
            hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1769
        );
1770
        assert_eq!(
1771
            t("a{1}?"),
1772
            hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1773
        );
1774
        assert_eq!(
1775
            t("a{1,}?"),
1776
            hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1777
        );
1778
        assert_eq!(
1779
            t("a{1,2}?"),
1780
            hir_range(
1781
                false,
1782
                hir::RepetitionRange::Bounded(1, 2),
1783
                hir_lit("a"),
1784
            )
1785
        );
1786
1787
        assert_eq!(
1788
            t("ab?"),
1789
            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1790
        );
1791
        assert_eq!(
1792
            t("(ab)?"),
1793
            hir_quest(
1794
                true,
1795
                hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1796
            )
1797
        );
1798
        assert_eq!(
1799
            t("a|b?"),
1800
            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1801
        );
1802
    }
1803
1804
    #[test]
1805
    fn cat_alt() {
1806
        assert_eq!(
1807
            t("(ab)"),
1808
            hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1809
        );
1810
        assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1811
        assert_eq!(
1812
            t("a|b|c"),
1813
            hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1814
        );
1815
        assert_eq!(
1816
            t("ab|bc|cd"),
1817
            hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1818
        );
1819
        assert_eq!(
1820
            t("(a|b)"),
1821
            hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1822
        );
1823
        assert_eq!(
1824
            t("(a|b|c)"),
1825
            hir_group(
1826
                1,
1827
                hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1828
            )
1829
        );
1830
        assert_eq!(
1831
            t("(ab|bc|cd)"),
1832
            hir_group(
1833
                1,
1834
                hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1835
            )
1836
        );
1837
        assert_eq!(
1838
            t("(ab|(bc|(cd)))"),
1839
            hir_group(
1840
                1,
1841
                hir_alt(vec![
1842
                    hir_lit("ab"),
1843
                    hir_group(
1844
                        2,
1845
                        hir_alt(vec![
1846
                            hir_lit("bc"),
1847
                            hir_group(3, hir_lit("cd")),
1848
                        ])
1849
                    ),
1850
                ])
1851
            )
1852
        );
1853
    }
1854
1855
    #[test]
1856
    fn class_ascii() {
1857
        assert_eq!(
1858
            t("[[:alnum:]]"),
1859
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1860
        );
1861
        assert_eq!(
1862
            t("[[:alpha:]]"),
1863
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1864
        );
1865
        assert_eq!(
1866
            t("[[:ascii:]]"),
1867
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1868
        );
1869
        assert_eq!(
1870
            t("[[:blank:]]"),
1871
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1872
        );
1873
        assert_eq!(
1874
            t("[[:cntrl:]]"),
1875
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1876
        );
1877
        assert_eq!(
1878
            t("[[:digit:]]"),
1879
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1880
        );
1881
        assert_eq!(
1882
            t("[[:graph:]]"),
1883
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1884
        );
1885
        assert_eq!(
1886
            t("[[:lower:]]"),
1887
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1888
        );
1889
        assert_eq!(
1890
            t("[[:print:]]"),
1891
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1892
        );
1893
        assert_eq!(
1894
            t("[[:punct:]]"),
1895
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1896
        );
1897
        assert_eq!(
1898
            t("[[:space:]]"),
1899
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1900
        );
1901
        assert_eq!(
1902
            t("[[:upper:]]"),
1903
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1904
        );
1905
        assert_eq!(
1906
            t("[[:word:]]"),
1907
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1908
        );
1909
        assert_eq!(
1910
            t("[[:xdigit:]]"),
1911
            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1912
        );
1913
1914
        assert_eq!(
1915
            t("[[:^lower:]]"),
1916
            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1917
        );
1918
        #[cfg(feature = "unicode-case")]
1919
        assert_eq!(
1920
            t("(?i)[[:lower:]]"),
1921
            hir_uclass(&[
1922
                ('A', 'Z'),
1923
                ('a', 'z'),
1924
                ('\u{17F}', '\u{17F}'),
1925
                ('\u{212A}', '\u{212A}'),
1926
            ])
1927
        );
1928
1929
        assert_eq!(
1930
            t("(?-u)[[:lower:]]"),
1931
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1932
        );
1933
        assert_eq!(
1934
            t("(?i-u)[[:lower:]]"),
1935
            hir_case_fold(hir_bclass_from_char(ascii_class(
1936
                &ast::ClassAsciiKind::Lower
1937
            )))
1938
        );
1939
1940
        assert_eq!(
1941
            t_err("(?-u)[[:^lower:]]"),
1942
            TestError {
1943
                kind: hir::ErrorKind::InvalidUtf8,
1944
                span: Span::new(
1945
                    Position::new(6, 1, 7),
1946
                    Position::new(16, 1, 17)
1947
                ),
1948
            }
1949
        );
1950
        assert_eq!(
1951
            t_err("(?i-u)[[:^lower:]]"),
1952
            TestError {
1953
                kind: hir::ErrorKind::InvalidUtf8,
1954
                span: Span::new(
1955
                    Position::new(7, 1, 8),
1956
                    Position::new(17, 1, 18)
1957
                ),
1958
            }
1959
        );
1960
    }
1961
1962
    #[test]
1963
    fn class_ascii_multiple() {
1964
        // See: https://github.com/rust-lang/regex/issues/680
1965
        assert_eq!(
1966
            t("[[:alnum:][:^ascii:]]"),
1967
            hir_union(
1968
                hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
1969
                hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
1970
            ),
1971
        );
1972
        assert_eq!(
1973
            t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1974
            hir_union(
1975
                hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
1976
                hir_bclass(&[(0x80, 0xFF)]),
1977
            ),
1978
        );
1979
    }
1980
1981
    #[test]
1982
    #[cfg(feature = "unicode-perl")]
1983
    fn class_perl() {
1984
        // Unicode
1985
        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1986
        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1987
        assert_eq!(t(r"\w"), hir_uclass_perl_word());
1988
        #[cfg(feature = "unicode-case")]
1989
        assert_eq!(
1990
            t(r"(?i)\d"),
1991
            hir_uclass_query(ClassQuery::Binary("digit"))
1992
        );
1993
        #[cfg(feature = "unicode-case")]
1994
        assert_eq!(
1995
            t(r"(?i)\s"),
1996
            hir_uclass_query(ClassQuery::Binary("space"))
1997
        );
1998
        #[cfg(feature = "unicode-case")]
1999
        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2000
2001
        // Unicode, negated
2002
        assert_eq!(
2003
            t(r"\D"),
2004
            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2005
        );
2006
        assert_eq!(
2007
            t(r"\S"),
2008
            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2009
        );
2010
        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2011
        #[cfg(feature = "unicode-case")]
2012
        assert_eq!(
2013
            t(r"(?i)\D"),
2014
            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2015
        );
2016
        #[cfg(feature = "unicode-case")]
2017
        assert_eq!(
2018
            t(r"(?i)\S"),
2019
            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2020
        );
2021
        #[cfg(feature = "unicode-case")]
2022
        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2023
2024
        // ASCII only
2025
        assert_eq!(
2026
            t(r"(?-u)\d"),
2027
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2028
        );
2029
        assert_eq!(
2030
            t(r"(?-u)\s"),
2031
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2032
        );
2033
        assert_eq!(
2034
            t(r"(?-u)\w"),
2035
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2036
        );
2037
        assert_eq!(
2038
            t(r"(?i-u)\d"),
2039
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2040
        );
2041
        assert_eq!(
2042
            t(r"(?i-u)\s"),
2043
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2044
        );
2045
        assert_eq!(
2046
            t(r"(?i-u)\w"),
2047
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2048
        );
2049
2050
        // ASCII only, negated
2051
        assert_eq!(
2052
            t(r"(?-u)\D"),
2053
            hir_negate(hir_bclass_from_char(ascii_class(
2054
                &ast::ClassAsciiKind::Digit
2055
            )))
2056
        );
2057
        assert_eq!(
2058
            t(r"(?-u)\S"),
2059
            hir_negate(hir_bclass_from_char(ascii_class(
2060
                &ast::ClassAsciiKind::Space
2061
            )))
2062
        );
2063
        assert_eq!(
2064
            t(r"(?-u)\W"),
2065
            hir_negate(hir_bclass_from_char(ascii_class(
2066
                &ast::ClassAsciiKind::Word
2067
            )))
2068
        );
2069
        assert_eq!(
2070
            t(r"(?i-u)\D"),
2071
            hir_negate(hir_bclass_from_char(ascii_class(
2072
                &ast::ClassAsciiKind::Digit
2073
            )))
2074
        );
2075
        assert_eq!(
2076
            t(r"(?i-u)\S"),
2077
            hir_negate(hir_bclass_from_char(ascii_class(
2078
                &ast::ClassAsciiKind::Space
2079
            )))
2080
        );
2081
        assert_eq!(
2082
            t(r"(?i-u)\W"),
2083
            hir_negate(hir_bclass_from_char(ascii_class(
2084
                &ast::ClassAsciiKind::Word
2085
            )))
2086
        );
2087
    }
2088
2089
    #[test]
2090
    #[cfg(not(feature = "unicode-perl"))]
2091
    fn class_perl_word_disabled() {
2092
        assert_eq!(
2093
            t_err(r"\w"),
2094
            TestError {
2095
                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2096
                span: Span::new(
2097
                    Position::new(0, 1, 1),
2098
                    Position::new(2, 1, 3)
2099
                ),
2100
            }
2101
        );
2102
    }
2103
2104
    #[test]
2105
    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2106
    fn class_perl_space_disabled() {
2107
        assert_eq!(
2108
            t_err(r"\s"),
2109
            TestError {
2110
                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2111
                span: Span::new(
2112
                    Position::new(0, 1, 1),
2113
                    Position::new(2, 1, 3)
2114
                ),
2115
            }
2116
        );
2117
    }
2118
2119
    #[test]
2120
    #[cfg(all(
2121
        not(feature = "unicode-perl"),
2122
        not(feature = "unicode-gencat")
2123
    ))]
2124
    fn class_perl_digit_disabled() {
2125
        assert_eq!(
2126
            t_err(r"\d"),
2127
            TestError {
2128
                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2129
                span: Span::new(
2130
                    Position::new(0, 1, 1),
2131
                    Position::new(2, 1, 3)
2132
                ),
2133
            }
2134
        );
2135
    }
2136
2137
    #[test]
2138
    #[cfg(feature = "unicode-gencat")]
2139
    fn class_unicode_gencat() {
2140
        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2141
        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2142
        assert_eq!(
2143
            t(r"\p{Separator}"),
2144
            hir_uclass_query(ClassQuery::Binary("Z"))
2145
        );
2146
        assert_eq!(
2147
            t(r"\p{se      PaRa ToR}"),
2148
            hir_uclass_query(ClassQuery::Binary("Z"))
2149
        );
2150
        assert_eq!(
2151
            t(r"\p{gc:Separator}"),
2152
            hir_uclass_query(ClassQuery::Binary("Z"))
2153
        );
2154
        assert_eq!(
2155
            t(r"\p{gc=Separator}"),
2156
            hir_uclass_query(ClassQuery::Binary("Z"))
2157
        );
2158
        assert_eq!(
2159
            t(r"\p{Other}"),
2160
            hir_uclass_query(ClassQuery::Binary("Other"))
2161
        );
2162
        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2163
2164
        assert_eq!(
2165
            t(r"\PZ"),
2166
            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2167
        );
2168
        assert_eq!(
2169
            t(r"\P{separator}"),
2170
            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2171
        );
2172
        assert_eq!(
2173
            t(r"\P{gc!=separator}"),
2174
            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2175
        );
2176
2177
        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2178
        assert_eq!(
2179
            t(r"\p{assigned}"),
2180
            hir_uclass_query(ClassQuery::Binary("Assigned"))
2181
        );
2182
        assert_eq!(
2183
            t(r"\p{ascii}"),
2184
            hir_uclass_query(ClassQuery::Binary("ASCII"))
2185
        );
2186
        assert_eq!(
2187
            t(r"\p{gc:any}"),
2188
            hir_uclass_query(ClassQuery::Binary("Any"))
2189
        );
2190
        assert_eq!(
2191
            t(r"\p{gc:assigned}"),
2192
            hir_uclass_query(ClassQuery::Binary("Assigned"))
2193
        );
2194
        assert_eq!(
2195
            t(r"\p{gc:ascii}"),
2196
            hir_uclass_query(ClassQuery::Binary("ASCII"))
2197
        );
2198
2199
        assert_eq!(
2200
            t_err(r"(?-u)\pZ"),
2201
            TestError {
2202
                kind: hir::ErrorKind::UnicodeNotAllowed,
2203
                span: Span::new(
2204
                    Position::new(5, 1, 6),
2205
                    Position::new(8, 1, 9)
2206
                ),
2207
            }
2208
        );
2209
        assert_eq!(
2210
            t_err(r"(?-u)\p{Separator}"),
2211
            TestError {
2212
                kind: hir::ErrorKind::UnicodeNotAllowed,
2213
                span: Span::new(
2214
                    Position::new(5, 1, 6),
2215
                    Position::new(18, 1, 19)
2216
                ),
2217
            }
2218
        );
2219
        assert_eq!(
2220
            t_err(r"\pE"),
2221
            TestError {
2222
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2223
                span: Span::new(
2224
                    Position::new(0, 1, 1),
2225
                    Position::new(3, 1, 4)
2226
                ),
2227
            }
2228
        );
2229
        assert_eq!(
2230
            t_err(r"\p{Foo}"),
2231
            TestError {
2232
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2233
                span: Span::new(
2234
                    Position::new(0, 1, 1),
2235
                    Position::new(7, 1, 8)
2236
                ),
2237
            }
2238
        );
2239
        assert_eq!(
2240
            t_err(r"\p{gc:Foo}"),
2241
            TestError {
2242
                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2243
                span: Span::new(
2244
                    Position::new(0, 1, 1),
2245
                    Position::new(10, 1, 11)
2246
                ),
2247
            }
2248
        );
2249
    }
2250
2251
    #[test]
2252
    #[cfg(not(feature = "unicode-gencat"))]
2253
    fn class_unicode_gencat_disabled() {
2254
        assert_eq!(
2255
            t_err(r"\p{Separator}"),
2256
            TestError {
2257
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2258
                span: Span::new(
2259
                    Position::new(0, 1, 1),
2260
                    Position::new(13, 1, 14)
2261
                ),
2262
            }
2263
        );
2264
2265
        assert_eq!(
2266
            t_err(r"\p{Any}"),
2267
            TestError {
2268
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2269
                span: Span::new(
2270
                    Position::new(0, 1, 1),
2271
                    Position::new(7, 1, 8)
2272
                ),
2273
            }
2274
        );
2275
    }
2276
2277
    #[test]
2278
    #[cfg(feature = "unicode-script")]
2279
    fn class_unicode_script() {
2280
        assert_eq!(
2281
            t(r"\p{Greek}"),
2282
            hir_uclass_query(ClassQuery::Binary("Greek"))
2283
        );
2284
        #[cfg(feature = "unicode-case")]
2285
        assert_eq!(
2286
            t(r"(?i)\p{Greek}"),
2287
            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2288
        );
2289
        #[cfg(feature = "unicode-case")]
2290
        assert_eq!(
2291
            t(r"(?i)\P{Greek}"),
2292
            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2293
                "Greek"
2294
            ))))
2295
        );
2296
2297
        assert_eq!(
2298
            t_err(r"\p{sc:Foo}"),
2299
            TestError {
2300
                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2301
                span: Span::new(
2302
                    Position::new(0, 1, 1),
2303
                    Position::new(10, 1, 11)
2304
                ),
2305
            }
2306
        );
2307
        assert_eq!(
2308
            t_err(r"\p{scx:Foo}"),
2309
            TestError {
2310
                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2311
                span: Span::new(
2312
                    Position::new(0, 1, 1),
2313
                    Position::new(11, 1, 12)
2314
                ),
2315
            }
2316
        );
2317
    }
2318
2319
    #[test]
2320
    #[cfg(not(feature = "unicode-script"))]
2321
    fn class_unicode_script_disabled() {
2322
        assert_eq!(
2323
            t_err(r"\p{Greek}"),
2324
            TestError {
2325
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2326
                span: Span::new(
2327
                    Position::new(0, 1, 1),
2328
                    Position::new(9, 1, 10)
2329
                ),
2330
            }
2331
        );
2332
2333
        assert_eq!(
2334
            t_err(r"\p{scx:Greek}"),
2335
            TestError {
2336
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2337
                span: Span::new(
2338
                    Position::new(0, 1, 1),
2339
                    Position::new(13, 1, 14)
2340
                ),
2341
            }
2342
        );
2343
    }
2344
2345
    #[test]
2346
    #[cfg(feature = "unicode-age")]
2347
    fn class_unicode_age() {
2348
        assert_eq!(
2349
            t_err(r"\p{age:Foo}"),
2350
            TestError {
2351
                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2352
                span: Span::new(
2353
                    Position::new(0, 1, 1),
2354
                    Position::new(11, 1, 12)
2355
                ),
2356
            }
2357
        );
2358
    }
2359
2360
    #[test]
2361
    #[cfg(feature = "unicode-gencat")]
2362
    fn class_unicode_any_empty() {
2363
        assert_eq!(
2364
            t_err(r"\P{any}"),
2365
            TestError {
2366
                kind: hir::ErrorKind::EmptyClassNotAllowed,
2367
                span: Span::new(
2368
                    Position::new(0, 1, 1),
2369
                    Position::new(7, 1, 8)
2370
                ),
2371
            }
2372
        );
2373
    }
2374
2375
    #[test]
2376
    #[cfg(not(feature = "unicode-age"))]
2377
    fn class_unicode_age_disabled() {
2378
        assert_eq!(
2379
            t_err(r"\p{age:3.0}"),
2380
            TestError {
2381
                kind: hir::ErrorKind::UnicodePropertyNotFound,
2382
                span: Span::new(
2383
                    Position::new(0, 1, 1),
2384
                    Position::new(11, 1, 12)
2385
                ),
2386
            }
2387
        );
2388
    }
2389
2390
    #[test]
2391
    fn class_bracketed() {
2392
        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2393
        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2394
        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2395
        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2396
        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2397
        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2398
        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2399
        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2400
        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2401
        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2402
        #[cfg(feature = "unicode-gencat")]
2403
        assert_eq!(
2404
            t(r"[\pZ]"),
2405
            hir_uclass_query(ClassQuery::Binary("separator"))
2406
        );
2407
        #[cfg(feature = "unicode-gencat")]
2408
        assert_eq!(
2409
            t(r"[\p{separator}]"),
2410
            hir_uclass_query(ClassQuery::Binary("separator"))
2411
        );
2412
        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2413
        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2414
        #[cfg(feature = "unicode-gencat")]
2415
        assert_eq!(
2416
            t(r"[^\PZ]"),
2417
            hir_uclass_query(ClassQuery::Binary("separator"))
2418
        );
2419
        #[cfg(feature = "unicode-gencat")]
2420
        assert_eq!(
2421
            t(r"[^\P{separator}]"),
2422
            hir_uclass_query(ClassQuery::Binary("separator"))
2423
        );
2424
        #[cfg(all(
2425
            feature = "unicode-case",
2426
            any(feature = "unicode-perl", feature = "unicode-gencat")
2427
        ))]
2428
        assert_eq!(
2429
            t(r"(?i)[^\D]"),
2430
            hir_uclass_query(ClassQuery::Binary("digit"))
2431
        );
2432
        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2433
        assert_eq!(
2434
            t(r"(?i)[^\P{greek}]"),
2435
            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2436
        );
2437
2438
        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2439
        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2440
        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2441
2442
        #[cfg(feature = "unicode-case")]
2443
        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2444
        #[cfg(feature = "unicode-case")]
2445
        assert_eq!(
2446
            t("(?i)[k]"),
2447
            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2448
        );
2449
        #[cfg(feature = "unicode-case")]
2450
        assert_eq!(
2451
            t("(?i)[β]"),
2452
            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2453
        );
2454
        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2455
2456
        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2457
        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2458
        assert_eq!(
2459
            t_bytes("(?-u)[^a]"),
2460
            hir_negate(hir_bclass(&[(b'a', b'a')]))
2461
        );
2462
        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2463
        assert_eq!(
2464
            t(r"[^\d]"),
2465
            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2466
        );
2467
        #[cfg(feature = "unicode-gencat")]
2468
        assert_eq!(
2469
            t(r"[^\pZ]"),
2470
            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2471
        );
2472
        #[cfg(feature = "unicode-gencat")]
2473
        assert_eq!(
2474
            t(r"[^\p{separator}]"),
2475
            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2476
        );
2477
        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2478
        assert_eq!(
2479
            t(r"(?i)[^\p{greek}]"),
2480
            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2481
                "greek"
2482
            ))))
2483
        );
2484
        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2485
        assert_eq!(
2486
            t(r"(?i)[\P{greek}]"),
2487
            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2488
                "greek"
2489
            ))))
2490
        );
2491
2492
        // Test some weird cases.
2493
        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2494
2495
        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2496
        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2497
        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2498
        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2499
        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2500
2501
        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2502
        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2503
        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2504
        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2505
        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2506
2507
        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2508
        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2509
        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2510
        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2511
        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2512
2513
        assert_eq!(
2514
            t_err("(?-u)[^a]"),
2515
            TestError {
2516
                kind: hir::ErrorKind::InvalidUtf8,
2517
                span: Span::new(
2518
                    Position::new(5, 1, 6),
2519
                    Position::new(9, 1, 10)
2520
                ),
2521
            }
2522
        );
2523
        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2524
        assert_eq!(
2525
            t_err(r"[^\s\S]"),
2526
            TestError {
2527
                kind: hir::ErrorKind::EmptyClassNotAllowed,
2528
                span: Span::new(
2529
                    Position::new(0, 1, 1),
2530
                    Position::new(7, 1, 8)
2531
                ),
2532
            }
2533
        );
2534
        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2535
        assert_eq!(
2536
            t_err(r"(?-u)[^\s\S]"),
2537
            TestError {
2538
                kind: hir::ErrorKind::EmptyClassNotAllowed,
2539
                span: Span::new(
2540
                    Position::new(5, 1, 6),
2541
                    Position::new(12, 1, 13)
2542
                ),
2543
            }
2544
        );
2545
    }
2546
2547
    #[test]
2548
    fn class_bracketed_union() {
2549
        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2550
        #[cfg(feature = "unicode-gencat")]
2551
        assert_eq!(
2552
            t(r"[a\pZb]"),
2553
            hir_union(
2554
                hir_uclass(&[('a', 'b')]),
2555
                hir_uclass_query(ClassQuery::Binary("separator"))
2556
            )
2557
        );
2558
        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2559
        assert_eq!(
2560
            t(r"[\pZ\p{Greek}]"),
2561
            hir_union(
2562
                hir_uclass_query(ClassQuery::Binary("greek")),
2563
                hir_uclass_query(ClassQuery::Binary("separator"))
2564
            )
2565
        );
2566
        #[cfg(all(
2567
            feature = "unicode-age",
2568
            feature = "unicode-gencat",
2569
            feature = "unicode-script"
2570
        ))]
2571
        assert_eq!(
2572
            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2573
            hir_union(
2574
                hir_uclass_query(ClassQuery::ByValue {
2575
                    property_name: "age",
2576
                    property_value: "3.0",
2577
                }),
2578
                hir_union(
2579
                    hir_uclass_query(ClassQuery::Binary("greek")),
2580
                    hir_uclass_query(ClassQuery::Binary("separator"))
2581
                )
2582
            )
2583
        );
2584
        #[cfg(all(
2585
            feature = "unicode-age",
2586
            feature = "unicode-gencat",
2587
            feature = "unicode-script"
2588
        ))]
2589
        assert_eq!(
2590
            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2591
            hir_union(
2592
                hir_uclass_query(ClassQuery::ByValue {
2593
                    property_name: "age",
2594
                    property_value: "3.0",
2595
                }),
2596
                hir_union(
2597
                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
2598
                    hir_union(
2599
                        hir_uclass_query(ClassQuery::Binary("greek")),
2600
                        hir_uclass_query(ClassQuery::Binary("separator"))
2601
                    )
2602
                )
2603
            )
2604
        );
2605
2606
        #[cfg(all(
2607
            feature = "unicode-age",
2608
            feature = "unicode-case",
2609
            feature = "unicode-gencat",
2610
            feature = "unicode-script"
2611
        ))]
2612
        assert_eq!(
2613
            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2614
            hir_case_fold(hir_union(
2615
                hir_uclass_query(ClassQuery::ByValue {
2616
                    property_name: "age",
2617
                    property_value: "3.0",
2618
                }),
2619
                hir_union(
2620
                    hir_uclass_query(ClassQuery::Binary("greek")),
2621
                    hir_uclass_query(ClassQuery::Binary("separator"))
2622
                )
2623
            ))
2624
        );
2625
        #[cfg(all(
2626
            feature = "unicode-age",
2627
            feature = "unicode-gencat",
2628
            feature = "unicode-script"
2629
        ))]
2630
        assert_eq!(
2631
            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2632
            hir_negate(hir_union(
2633
                hir_uclass_query(ClassQuery::ByValue {
2634
                    property_name: "age",
2635
                    property_value: "3.0",
2636
                }),
2637
                hir_union(
2638
                    hir_uclass_query(ClassQuery::Binary("greek")),
2639
                    hir_uclass_query(ClassQuery::Binary("separator"))
2640
                )
2641
            ))
2642
        );
2643
        #[cfg(all(
2644
            feature = "unicode-age",
2645
            feature = "unicode-case",
2646
            feature = "unicode-gencat",
2647
            feature = "unicode-script"
2648
        ))]
2649
        assert_eq!(
2650
            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2651
            hir_negate(hir_case_fold(hir_union(
2652
                hir_uclass_query(ClassQuery::ByValue {
2653
                    property_name: "age",
2654
                    property_value: "3.0",
2655
                }),
2656
                hir_union(
2657
                    hir_uclass_query(ClassQuery::Binary("greek")),
2658
                    hir_uclass_query(ClassQuery::Binary("separator"))
2659
                )
2660
            )))
2661
        );
2662
    }
2663
2664
    #[test]
2665
    fn class_bracketed_nested() {
2666
        assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2667
        assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2668
        assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2669
2670
        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2671
        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2672
2673
        #[cfg(feature = "unicode-case")]
2674
        assert_eq!(
2675
            t(r"(?i)[a[^c]]"),
2676
            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2677
        );
2678
        #[cfg(feature = "unicode-case")]
2679
        assert_eq!(
2680
            t(r"(?i)[a-b[^c]]"),
2681
            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2682
        );
2683
2684
        #[cfg(feature = "unicode-case")]
2685
        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2686
        #[cfg(feature = "unicode-case")]
2687
        assert_eq!(
2688
            t(r"(?i)[^a-b[^c]]"),
2689
            hir_uclass(&[('C', 'C'), ('c', 'c')])
2690
        );
2691
2692
        assert_eq!(
2693
            t_err(r"[^a-c[^c]]"),
2694
            TestError {
2695
                kind: hir::ErrorKind::EmptyClassNotAllowed,
2696
                span: Span::new(
2697
                    Position::new(0, 1, 1),
2698
                    Position::new(10, 1, 11)
2699
                ),
2700
            }
2701
        );
2702
        #[cfg(feature = "unicode-case")]
2703
        assert_eq!(
2704
            t_err(r"(?i)[^a-c[^c]]"),
2705
            TestError {
2706
                kind: hir::ErrorKind::EmptyClassNotAllowed,
2707
                span: Span::new(
2708
                    Position::new(4, 1, 5),
2709
                    Position::new(14, 1, 15)
2710
                ),
2711
            }
2712
        );
2713
    }
2714
2715
    #[test]
2716
    fn class_bracketed_intersect() {
2717
        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2718
        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2719
        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2720
        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2721
        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2722
        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2723
        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2724
        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2725
        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2726
2727
        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2728
        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2729
        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2730
        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2731
        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2732
        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2733
2734
        #[cfg(feature = "unicode-case")]
2735
        assert_eq!(
2736
            t("(?i)[abc&&b-c]"),
2737
            hir_case_fold(hir_uclass(&[('b', 'c')]))
2738
        );
2739
        #[cfg(feature = "unicode-case")]
2740
        assert_eq!(
2741
            t("(?i)[abc&&[b-c]]"),
2742
            hir_case_fold(hir_uclass(&[('b', 'c')]))
2743
        );
2744
        #[cfg(feature = "unicode-case")]
2745
        assert_eq!(
2746
            t("(?i)[[abc]&&[b-c]]"),
2747
            hir_case_fold(hir_uclass(&[('b', 'c')]))
2748
        );
2749
        #[cfg(feature = "unicode-case")]
2750
        assert_eq!(
2751
            t("(?i)[a-z&&b-y&&c-x]"),
2752
            hir_case_fold(hir_uclass(&[('c', 'x')]))
2753
        );
2754
        #[cfg(feature = "unicode-case")]
2755
        assert_eq!(
2756
            t("(?i)[c-da-b&&a-d]"),
2757
            hir_case_fold(hir_uclass(&[('a', 'd')]))
2758
        );
2759
        #[cfg(feature = "unicode-case")]
2760
        assert_eq!(
2761
            t("(?i)[a-d&&c-da-b]"),
2762
            hir_case_fold(hir_uclass(&[('a', 'd')]))
2763
        );
2764
2765
        assert_eq!(
2766
            t("(?i-u)[abc&&b-c]"),
2767
            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2768
        );
2769
        assert_eq!(
2770
            t("(?i-u)[abc&&[b-c]]"),
2771
            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2772
        );
2773
        assert_eq!(
2774
            t("(?i-u)[[abc]&&[b-c]]"),
2775
            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2776
        );
2777
        assert_eq!(
2778
            t("(?i-u)[a-z&&b-y&&c-x]"),
2779
            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2780
        );
2781
        assert_eq!(
2782
            t("(?i-u)[c-da-b&&a-d]"),
2783
            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2784
        );
2785
        assert_eq!(
2786
            t("(?i-u)[a-d&&c-da-b]"),
2787
            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2788
        );
2789
2790
        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2791
        // `^` is also allowed to be unescaped after `&&`.
2792
        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2793
        // `]` needs to be escaped after `&&` since it's not at start of class.
2794
        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2795
        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2796
        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2797
        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2798
        // Test precedence.
2799
        assert_eq!(
2800
            t(r"[a-w&&[^c-g]z]"),
2801
            hir_uclass(&[('a', 'b'), ('h', 'w')])
2802
        );
2803
    }
2804
2805
    #[test]
2806
    fn class_bracketed_intersect_negate() {
2807
        #[cfg(feature = "unicode-perl")]
2808
        assert_eq!(
2809
            t(r"[^\w&&\d]"),
2810
            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2811
        );
2812
        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2813
        #[cfg(feature = "unicode-perl")]
2814
        assert_eq!(
2815
            t(r"[^[\w&&\d]]"),
2816
            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2817
        );
2818
        #[cfg(feature = "unicode-perl")]
2819
        assert_eq!(
2820
            t(r"[^[^\w&&\d]]"),
2821
            hir_uclass_query(ClassQuery::Binary("digit"))
2822
        );
2823
        #[cfg(feature = "unicode-perl")]
2824
        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2825
2826
        #[cfg(feature = "unicode-perl")]
2827
        assert_eq!(
2828
            t_bytes(r"(?-u)[^\w&&\d]"),
2829
            hir_negate(hir_bclass_from_char(ascii_class(
2830
                &ast::ClassAsciiKind::Digit
2831
            )))
2832
        );
2833
        assert_eq!(
2834
            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2835
            hir_negate(hir_bclass(&[(b'a', b'c')]))
2836
        );
2837
        assert_eq!(
2838
            t_bytes(r"(?-u)[^[\w&&\d]]"),
2839
            hir_negate(hir_bclass_from_char(ascii_class(
2840
                &ast::ClassAsciiKind::Digit
2841
            )))
2842
        );
2843
        assert_eq!(
2844
            t_bytes(r"(?-u)[^[^\w&&\d]]"),
2845
            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2846
        );
2847
        assert_eq!(
2848
            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2849
            hir_negate(hir_bclass_from_char(ascii_class(
2850
                &ast::ClassAsciiKind::Word
2851
            )))
2852
        );
2853
    }
2854
2855
    #[test]
2856
    fn class_bracketed_difference() {
2857
        #[cfg(feature = "unicode-gencat")]
2858
        assert_eq!(
2859
            t(r"[\pL--[:ascii:]]"),
2860
            hir_difference(
2861
                hir_uclass_query(ClassQuery::Binary("letter")),
2862
                hir_uclass(&[('\0', '\x7F')])
2863
            )
2864
        );
2865
2866
        assert_eq!(
2867
            t(r"(?-u)[[:alpha:]--[:lower:]]"),
2868
            hir_bclass(&[(b'A', b'Z')])
2869
        );
2870
    }
2871
2872
    #[test]
2873
    fn class_bracketed_symmetric_difference() {
2874
        #[cfg(feature = "unicode-script")]
2875
        assert_eq!(
2876
            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2877
            hir_uclass(&[
2878
                ('\u{0342}', '\u{0342}'),
2879
                ('\u{0345}', '\u{0345}'),
2880
                ('\u{1DC0}', '\u{1DC1}'),
2881
            ])
2882
        );
2883
        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2884
2885
        assert_eq!(
2886
            t(r"(?-u)[a-g~~c-j]"),
2887
            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2888
        );
2889
    }
2890
2891
    #[test]
2892
    fn ignore_whitespace() {
2893
        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2894
        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2895
        assert_eq!(
2896
            t(r"(?x)\x # comment
2897
{ # comment
2898
    53 # comment
2899
} #comment"),
2900
            hir_lit("S")
2901
        );
2902
2903
        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2904
        assert_eq!(
2905
            t(r"(?x)\x # comment
2906
        53 # comment"),
2907
            hir_lit("S")
2908
        );
2909
        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2910
2911
        #[cfg(feature = "unicode-gencat")]
2912
        assert_eq!(
2913
            t(r"(?x)\p # comment
2914
{ # comment
2915
    Separator # comment
2916
} # comment"),
2917
            hir_uclass_query(ClassQuery::Binary("separator"))
2918
        );
2919
2920
        assert_eq!(
2921
            t(r"(?x)a # comment
2922
{ # comment
2923
    5 # comment
2924
    , # comment
2925
    10 # comment
2926
} # comment"),
2927
            hir_range(
2928
                true,
2929
                hir::RepetitionRange::Bounded(5, 10),
2930
                hir_lit("a")
2931
            )
2932
        );
2933
2934
        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2935
    }
2936
2937
    #[test]
2938
    fn analysis_is_always_utf8() {
2939
        // Positive examples.
2940
        assert!(t_bytes(r"a").is_always_utf8());
2941
        assert!(t_bytes(r"ab").is_always_utf8());
2942
        assert!(t_bytes(r"(?-u)a").is_always_utf8());
2943
        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2944
        assert!(t_bytes(r"\xFF").is_always_utf8());
2945
        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2946
        assert!(t_bytes(r"[^a]").is_always_utf8());
2947
        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2948
        assert!(t_bytes(r"\b").is_always_utf8());
2949
        assert!(t_bytes(r"\B").is_always_utf8());
2950
        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2951
2952
        // Negative examples.
2953
        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2954
        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2955
        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2956
        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2957
        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2958
    }
2959
2960
    #[test]
2961
    fn analysis_is_all_assertions() {
2962
        // Positive examples.
2963
        assert!(t(r"\b").is_all_assertions());
2964
        assert!(t(r"\B").is_all_assertions());
2965
        assert!(t(r"^").is_all_assertions());
2966
        assert!(t(r"$").is_all_assertions());
2967
        assert!(t(r"\A").is_all_assertions());
2968
        assert!(t(r"\z").is_all_assertions());
2969
        assert!(t(r"$^\z\A\b\B").is_all_assertions());
2970
        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2971
        assert!(t(r"^$|$^").is_all_assertions());
2972
        assert!(t(r"((\b)+())*^").is_all_assertions());
2973
2974
        // Negative examples.
2975
        assert!(!t(r"^a").is_all_assertions());
2976
    }
2977
2978
    #[test]
2979
    fn analysis_is_anchored() {
2980
        // Positive examples.
2981
        assert!(t(r"^").is_anchored_start());
2982
        assert!(t(r"$").is_anchored_end());
2983
        assert!(t(r"^").is_line_anchored_start());
2984
        assert!(t(r"$").is_line_anchored_end());
2985
2986
        assert!(t(r"^^").is_anchored_start());
2987
        assert!(t(r"$$").is_anchored_end());
2988
        assert!(t(r"^^").is_line_anchored_start());
2989
        assert!(t(r"$$").is_line_anchored_end());
2990
2991
        assert!(t(r"^$").is_anchored_start());
2992
        assert!(t(r"^$").is_anchored_end());
2993
        assert!(t(r"^$").is_line_anchored_start());
2994
        assert!(t(r"^$").is_line_anchored_end());
2995
2996
        assert!(t(r"^foo").is_anchored_start());
2997
        assert!(t(r"foo$").is_anchored_end());
2998
        assert!(t(r"^foo").is_line_anchored_start());
2999
        assert!(t(r"foo$").is_line_anchored_end());
3000
3001
        assert!(t(r"^foo|^bar").is_anchored_start());
3002
        assert!(t(r"foo$|bar$").is_anchored_end());
3003
        assert!(t(r"^foo|^bar").is_line_anchored_start());
3004
        assert!(t(r"foo$|bar$").is_line_anchored_end());
3005
3006
        assert!(t(r"^(foo|bar)").is_anchored_start());
3007
        assert!(t(r"(foo|bar)$").is_anchored_end());
3008
        assert!(t(r"^(foo|bar)").is_line_anchored_start());
3009
        assert!(t(r"(foo|bar)$").is_line_anchored_end());
3010
3011
        assert!(t(r"^+").is_anchored_start());
3012
        assert!(t(r"$+").is_anchored_end());
3013
        assert!(t(r"^+").is_line_anchored_start());
3014
        assert!(t(r"$+").is_line_anchored_end());
3015
        assert!(t(r"^++").is_anchored_start());
3016
        assert!(t(r"$++").is_anchored_end());
3017
        assert!(t(r"^++").is_line_anchored_start());
3018
        assert!(t(r"$++").is_line_anchored_end());
3019
        assert!(t(r"(^)+").is_anchored_start());
3020
        assert!(t(r"($)+").is_anchored_end());
3021
        assert!(t(r"(^)+").is_line_anchored_start());
3022
        assert!(t(r"($)+").is_line_anchored_end());
3023
3024
        assert!(t(r"$^").is_anchored_start());
3025
        assert!(t(r"$^").is_anchored_start());
3026
        assert!(t(r"$^").is_line_anchored_end());
3027
        assert!(t(r"$^").is_line_anchored_end());
3028
        assert!(t(r"$^|^$").is_anchored_start());
3029
        assert!(t(r"$^|^$").is_anchored_end());
3030
        assert!(t(r"$^|^$").is_line_anchored_start());
3031
        assert!(t(r"$^|^$").is_line_anchored_end());
3032
3033
        assert!(t(r"\b^").is_anchored_start());
3034
        assert!(t(r"$\b").is_anchored_end());
3035
        assert!(t(r"\b^").is_line_anchored_start());
3036
        assert!(t(r"$\b").is_line_anchored_end());
3037
        assert!(t(r"^(?m:^)").is_anchored_start());
3038
        assert!(t(r"(?m:$)$").is_anchored_end());
3039
        assert!(t(r"^(?m:^)").is_line_anchored_start());
3040
        assert!(t(r"(?m:$)$").is_line_anchored_end());
3041
        assert!(t(r"(?m:^)^").is_anchored_start());
3042
        assert!(t(r"$(?m:$)").is_anchored_end());
3043
        assert!(t(r"(?m:^)^").is_line_anchored_start());
3044
        assert!(t(r"$(?m:$)").is_line_anchored_end());
3045
3046
        // Negative examples.
3047
        assert!(!t(r"(?m)^").is_anchored_start());
3048
        assert!(!t(r"(?m)$").is_anchored_end());
3049
        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3050
        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3051
        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3052
        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3053
3054
        assert!(!t(r"a^").is_anchored_start());
3055
        assert!(!t(r"$a").is_anchored_start());
3056
        assert!(!t(r"a^").is_line_anchored_start());
3057
        assert!(!t(r"$a").is_line_anchored_start());
3058
3059
        assert!(!t(r"a^").is_anchored_end());
3060
        assert!(!t(r"$a").is_anchored_end());
3061
        assert!(!t(r"a^").is_line_anchored_end());
3062
        assert!(!t(r"$a").is_line_anchored_end());
3063
3064
        assert!(!t(r"^foo|bar").is_anchored_start());
3065
        assert!(!t(r"foo|bar$").is_anchored_end());
3066
        assert!(!t(r"^foo|bar").is_line_anchored_start());
3067
        assert!(!t(r"foo|bar$").is_line_anchored_end());
3068
3069
        assert!(!t(r"^*").is_anchored_start());
3070
        assert!(!t(r"$*").is_anchored_end());
3071
        assert!(!t(r"^*").is_line_anchored_start());
3072
        assert!(!t(r"$*").is_line_anchored_end());
3073
        assert!(!t(r"^*+").is_anchored_start());
3074
        assert!(!t(r"$*+").is_anchored_end());
3075
        assert!(!t(r"^*+").is_line_anchored_start());
3076
        assert!(!t(r"$*+").is_line_anchored_end());
3077
        assert!(!t(r"^+*").is_anchored_start());
3078
        assert!(!t(r"$+*").is_anchored_end());
3079
        assert!(!t(r"^+*").is_line_anchored_start());
3080
        assert!(!t(r"$+*").is_line_anchored_end());
3081
        assert!(!t(r"(^)*").is_anchored_start());
3082
        assert!(!t(r"($)*").is_anchored_end());
3083
        assert!(!t(r"(^)*").is_line_anchored_start());
3084
        assert!(!t(r"($)*").is_line_anchored_end());
3085
    }
3086
3087
    #[test]
3088
    fn analysis_is_line_anchored() {
3089
        assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3090
        assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3091
3092
        assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3093
        assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3094
3095
        assert!(t(r"(?m)^").is_line_anchored_start());
3096
        assert!(t(r"(?m)$").is_line_anchored_end());
3097
3098
        assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3099
        assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3100
3101
        assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3102
        assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3103
    }
3104
3105
    #[test]
3106
    fn analysis_is_any_anchored() {
3107
        // Positive examples.
3108
        assert!(t(r"^").is_any_anchored_start());
3109
        assert!(t(r"$").is_any_anchored_end());
3110
        assert!(t(r"\A").is_any_anchored_start());
3111
        assert!(t(r"\z").is_any_anchored_end());
3112
3113
        // Negative examples.
3114
        assert!(!t(r"(?m)^").is_any_anchored_start());
3115
        assert!(!t(r"(?m)$").is_any_anchored_end());
3116
        assert!(!t(r"$").is_any_anchored_start());
3117
        assert!(!t(r"^").is_any_anchored_end());
3118
    }
3119
3120
    #[test]
3121
    fn analysis_is_match_empty() {
3122
        // Positive examples.
3123
        assert!(t(r"").is_match_empty());
3124
        assert!(t(r"()").is_match_empty());
3125
        assert!(t(r"()*").is_match_empty());
3126
        assert!(t(r"()+").is_match_empty());
3127
        assert!(t(r"()?").is_match_empty());
3128
        assert!(t(r"a*").is_match_empty());
3129
        assert!(t(r"a?").is_match_empty());
3130
        assert!(t(r"a{0}").is_match_empty());
3131
        assert!(t(r"a{0,}").is_match_empty());
3132
        assert!(t(r"a{0,1}").is_match_empty());
3133
        assert!(t(r"a{0,10}").is_match_empty());
3134
        #[cfg(feature = "unicode-gencat")]
3135
        assert!(t(r"\pL*").is_match_empty());
3136
        assert!(t(r"a*|b").is_match_empty());
3137
        assert!(t(r"b|a*").is_match_empty());
3138
        assert!(t(r"a|").is_match_empty());
3139
        assert!(t(r"|a").is_match_empty());
3140
        assert!(t(r"a||b").is_match_empty());
3141
        assert!(t(r"a*a?(abcd)*").is_match_empty());
3142
        assert!(t(r"^").is_match_empty());
3143
        assert!(t(r"$").is_match_empty());
3144
        assert!(t(r"(?m)^").is_match_empty());
3145
        assert!(t(r"(?m)$").is_match_empty());
3146
        assert!(t(r"\A").is_match_empty());
3147
        assert!(t(r"\z").is_match_empty());
3148
        assert!(t(r"\B").is_match_empty());
3149
        assert!(t_bytes(r"(?-u)\B").is_match_empty());
3150
        assert!(t(r"\b").is_match_empty());
3151
        assert!(t(r"(?-u)\b").is_match_empty());
3152
3153
        // Negative examples.
3154
        assert!(!t(r"a+").is_match_empty());
3155
        assert!(!t(r"a{1}").is_match_empty());
3156
        assert!(!t(r"a{1,}").is_match_empty());
3157
        assert!(!t(r"a{1,2}").is_match_empty());
3158
        assert!(!t(r"a{1,10}").is_match_empty());
3159
        assert!(!t(r"b|a").is_match_empty());
3160
        assert!(!t(r"a*a+(abcd)*").is_match_empty());
3161
    }
3162
3163
    #[test]
3164
    fn analysis_is_literal() {
3165
        // Positive examples.
3166
        assert!(t(r"a").is_literal());
3167
        assert!(t(r"ab").is_literal());
3168
        assert!(t(r"abc").is_literal());
3169
        assert!(t(r"(?m)abc").is_literal());
3170
3171
        // Negative examples.
3172
        assert!(!t(r"").is_literal());
3173
        assert!(!t(r"^").is_literal());
3174
        assert!(!t(r"a|b").is_literal());
3175
        assert!(!t(r"(a)").is_literal());
3176
        assert!(!t(r"a+").is_literal());
3177
        assert!(!t(r"foo(a)").is_literal());
3178
        assert!(!t(r"(a)foo").is_literal());
3179
        assert!(!t(r"[a]").is_literal());
3180
    }
3181
3182
    #[test]
3183
    fn analysis_is_alternation_literal() {
3184
        // Positive examples.
3185
        assert!(t(r"a").is_alternation_literal());
3186
        assert!(t(r"ab").is_alternation_literal());
3187
        assert!(t(r"abc").is_alternation_literal());
3188
        assert!(t(r"(?m)abc").is_alternation_literal());
3189
        assert!(t(r"a|b").is_alternation_literal());
3190
        assert!(t(r"a|b|c").is_alternation_literal());
3191
        assert!(t(r"foo|bar").is_alternation_literal());
3192
        assert!(t(r"foo|bar|baz").is_alternation_literal());
3193
3194
        // Negative examples.
3195
        assert!(!t(r"").is_alternation_literal());
3196
        assert!(!t(r"^").is_alternation_literal());
3197
        assert!(!t(r"(a)").is_alternation_literal());
3198
        assert!(!t(r"a+").is_alternation_literal());
3199
        assert!(!t(r"foo(a)").is_alternation_literal());
3200
        assert!(!t(r"(a)foo").is_alternation_literal());
3201
        assert!(!t(r"[a]").is_alternation_literal());
3202
        assert!(!t(r"[a]|b").is_alternation_literal());
3203
        assert!(!t(r"a|[b]").is_alternation_literal());
3204
        assert!(!t(r"(a)|b").is_alternation_literal());
3205
        assert!(!t(r"a|(b)").is_alternation_literal());
3206
    }
3207
}