Coverage Report

Created: 2021-11-03 07:11

/rust/registry/src/github.com-1ecc6299db9ec823/regex-syntax-0.6.25/src/ast/parse.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
This module provides a regular expression parser.
3
*/
4
5
use std::borrow::Borrow;
6
use std::cell::{Cell, RefCell};
7
use std::mem;
8
use std::result;
9
10
use crate::ast::{self, Ast, Position, Span};
11
use crate::either::Either;
12
13
use crate::is_meta_character;
14
15
type Result<T> = result::Result<T, ast::Error>;
16
17
/// A primitive is an expression with no sub-expressions. This includes
18
/// literals, assertions and non-set character classes. This representation
19
/// is used as intermediate state in the parser.
20
///
21
/// This does not include ASCII character classes, since they can only appear
22
/// within a set character class.
23
0
#[derive(Clone, Debug, Eq, PartialEq)]
Unexecuted instantiation: <regex_syntax::ast::parse::Primitive as core::cmp::PartialEq>::eq
Unexecuted instantiation: <regex_syntax::ast::parse::Primitive as core::cmp::PartialEq>::ne
24
enum Primitive {
25
    Literal(ast::Literal),
26
    Assertion(ast::Assertion),
27
    Dot(Span),
28
    Perl(ast::ClassPerl),
29
    Unicode(ast::ClassUnicode),
30
}
31
32
impl Primitive {
33
    /// Return the span of this primitive.
34
8
    fn span(&self) -> &Span {
35
8
        match *self {
36
8
            Primitive::Literal(ref x) => &x.span,
37
0
            Primitive::Assertion(ref x) => &x.span,
38
0
            Primitive::Dot(ref span) => span,
39
0
            Primitive::Perl(ref x) => &x.span,
40
0
            Primitive::Unicode(ref x) => &x.span,
41
        }
42
8
    }
43
44
    /// Convert this primitive into a proper AST.
45
200
    fn into_ast(self) -> Ast {
46
200
        match self {
47
186
            Primitive::Literal(lit) => Ast::Literal(lit),
48
12
            Primitive::Assertion(assert) => Ast::Assertion(assert),
49
2
            Primitive::Dot(span) => Ast::Dot(span),
50
0
            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51
0
            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52
        }
53
200
    }
54
55
    /// Convert this primitive into an item in a character class.
56
    ///
57
    /// If this primitive is not a legal item (i.e., an assertion or a dot),
58
    /// then return an error.
59
12
    fn into_class_set_item<P: Borrow<Parser>>(
60
12
        self,
61
12
        p: &ParserI<'_, P>,
62
12
    ) -> Result<ast::ClassSetItem> {
63
12
        use self::Primitive::*;
64
12
        use crate::ast::ClassSetItem;
65
12
66
12
        match self {
67
12
            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68
0
            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69
0
            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71
        }
72
12
    }
73
74
    /// Convert this primitive into a literal in a character class. In
75
    /// particular, literals are the only valid items that can appear in
76
    /// ranges.
77
    ///
78
    /// If this primitive is not a legal item (i.e., a class, assertion or a
79
    /// dot), then return an error.
80
8
    fn into_class_literal<P: Borrow<Parser>>(
81
8
        self,
82
8
        p: &ParserI<'_, P>,
83
8
    ) -> Result<ast::Literal> {
84
8
        use self::Primitive::*;
85
8
86
8
        match self {
87
8
            Literal(lit) => Ok(lit),
88
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89
        }
90
8
    }
91
}
92
93
/// Returns true if the given character is a hexadecimal digit.
94
0
fn is_hex(c: char) -> bool {
95
0
    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96
0
}
97
98
/// Returns true if the given character is a valid in a capture group name.
99
///
100
/// If `first` is true, then `c` is treated as the first character in the
101
/// group name (which must be alphabetic or underscore).
102
0
fn is_capture_char(c: char, first: bool) -> bool {
103
0
    c == '_'
104
0
        || (!first
105
0
            && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106
0
        || ('A' <= c && c <= 'Z')
107
0
        || ('a' <= c && c <= 'z')
108
0
}
109
110
/// A builder for a regular expression parser.
111
///
112
/// This builder permits modifying configuration options for the parser.
113
0
#[derive(Clone, Debug)]
114
pub struct ParserBuilder {
115
    ignore_whitespace: bool,
116
    nest_limit: u32,
117
    octal: bool,
118
}
119
120
impl Default for ParserBuilder {
121
6
    fn default() -> ParserBuilder {
122
6
        ParserBuilder::new()
123
6
    }
124
}
125
126
impl ParserBuilder {
127
    /// Create a new parser builder with a default configuration.
128
6
    pub fn new() -> ParserBuilder {
129
6
        ParserBuilder {
130
6
            ignore_whitespace: false,
131
6
            nest_limit: 250,
132
6
            octal: false,
133
6
        }
134
6
    }
135
136
    /// Build a parser from this configuration with the given pattern.
137
6
    pub fn build(&self) -> Parser {
138
6
        Parser {
139
6
            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140
6
            capture_index: Cell::new(0),
141
6
            nest_limit: self.nest_limit,
142
6
            octal: self.octal,
143
6
            initial_ignore_whitespace: self.ignore_whitespace,
144
6
            ignore_whitespace: Cell::new(self.ignore_whitespace),
145
6
            comments: RefCell::new(vec![]),
146
6
            stack_group: RefCell::new(vec![]),
147
6
            stack_class: RefCell::new(vec![]),
148
6
            capture_names: RefCell::new(vec![]),
149
6
            scratch: RefCell::new(String::new()),
150
6
        }
151
6
    }
152
153
    /// Set the nesting limit for this parser.
154
    ///
155
    /// The nesting limit controls how deep the abstract syntax tree is allowed
156
    /// to be. If the AST exceeds the given limit (e.g., with too many nested
157
    /// groups), then an error is returned by the parser.
158
    ///
159
    /// The purpose of this limit is to act as a heuristic to prevent stack
160
    /// overflow for consumers that do structural induction on an `Ast` using
161
    /// explicit recursion. While this crate never does this (instead using
162
    /// constant stack space and moving the call stack to the heap), other
163
    /// crates may.
164
    ///
165
    /// This limit is not checked until the entire Ast is parsed. Therefore,
166
    /// if callers want to put a limit on the amount of heap space used, then
167
    /// they should impose a limit on the length, in bytes, of the concrete
168
    /// pattern string. In particular, this is viable since this parser
169
    /// implementation will limit itself to heap space proportional to the
170
    /// lenth of the pattern string.
171
    ///
172
    /// Note that a nest limit of `0` will return a nest limit error for most
173
    /// patterns but not all. For example, a nest limit of `0` permits `a` but
174
    /// not `ab`, since `ab` requires a concatenation, which results in a nest
175
    /// depth of `1`. In general, a nest limit is not something that manifests
176
    /// in an obvious way in the concrete syntax, therefore, it should not be
177
    /// used in a granular way.
178
6
    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179
6
        self.nest_limit = limit;
180
6
        self
181
6
    }
182
183
    /// Whether to support octal syntax or not.
184
    ///
185
    /// Octal syntax is a little-known way of uttering Unicode codepoints in
186
    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187
    /// `\141` are all equivalent regular expressions, where the last example
188
    /// shows octal syntax.
189
    ///
190
    /// While supporting octal syntax isn't in and of itself a problem, it does
191
    /// make good error messages harder. That is, in PCRE based regex engines,
192
    /// syntax like `\0` invokes a backreference, which is explicitly
193
    /// unsupported in Rust's regex engine. However, many users expect it to
194
    /// be supported. Therefore, when octal support is disabled, the error
195
    /// message will explicitly mention that backreferences aren't supported.
196
    ///
197
    /// Octal syntax is disabled by default.
198
6
    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199
6
        self.octal = yes;
200
6
        self
201
6
    }
202
203
    /// Enable verbose mode in the regular expression.
204
    ///
205
    /// When enabled, verbose mode permits insigificant whitespace in many
206
    /// places in the regular expression, as well as comments. Comments are
207
    /// started using `#` and continue until the end of the line.
208
    ///
209
    /// By default, this is disabled. It may be selectively enabled in the
210
    /// regular expression by using the `x` flag regardless of this setting.
211
6
    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212
6
        self.ignore_whitespace = yes;
213
6
        self
214
6
    }
215
}
216
217
/// A regular expression parser.
218
///
219
/// This parses a string representation of a regular expression into an
220
/// abstract syntax tree. The size of the tree is proportional to the length
221
/// of the regular expression pattern.
222
///
223
/// A `Parser` can be configured in more detail via a
224
/// [`ParserBuilder`](struct.ParserBuilder.html).
225
0
#[derive(Clone, Debug)]
226
pub struct Parser {
227
    /// The current position of the parser.
228
    pos: Cell<Position>,
229
    /// The current capture index.
230
    capture_index: Cell<u32>,
231
    /// The maximum number of open parens/brackets allowed. If the parser
232
    /// exceeds this number, then an error is returned.
233
    nest_limit: u32,
234
    /// Whether to support octal syntax or not. When `false`, the parser will
235
    /// return an error helpfully pointing out that backreferences are not
236
    /// supported.
237
    octal: bool,
238
    /// The initial setting for `ignore_whitespace` as provided by
239
    /// Th`ParserBuilder`. is is used when reseting the parser's state.
240
    initial_ignore_whitespace: bool,
241
    /// Whether whitespace should be ignored. When enabled, comments are
242
    /// also permitted.
243
    ignore_whitespace: Cell<bool>,
244
    /// A list of comments, in order of appearance.
245
    comments: RefCell<Vec<ast::Comment>>,
246
    /// A stack of grouped sub-expressions, including alternations.
247
    stack_group: RefCell<Vec<GroupState>>,
248
    /// A stack of nested character classes. This is only non-empty when
249
    /// parsing a class.
250
    stack_class: RefCell<Vec<ClassState>>,
251
    /// A sorted sequence of capture names. This is used to detect duplicate
252
    /// capture names and report an error if one is detected.
253
    capture_names: RefCell<Vec<ast::CaptureName>>,
254
    /// A scratch buffer used in various places. Mostly this is used to
255
    /// accumulate relevant characters from parts of a pattern.
256
    scratch: RefCell<String>,
257
}
258
259
/// ParserI is the internal parser implementation.
260
///
261
/// We use this separate type so that we can carry the provided pattern string
262
/// along with us. In particular, a `Parser` internal state is not tied to any
263
/// one pattern, but `ParserI` is.
264
///
265
/// This type also lets us use `ParserI<&Parser>` in production code while
266
/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267
/// work against the internal interface of the parser.
268
0
#[derive(Clone, Debug)]
269
struct ParserI<'s, P> {
270
    /// The parser state/configuration.
271
    parser: P,
272
    /// The full regular expression provided by the user.
273
    pattern: &'s str,
274
}
275
276
/// GroupState represents a single stack frame while parsing nested groups
277
/// and alternations. Each frame records the state up to an opening parenthesis
278
/// or a alternating bracket `|`.
279
0
#[derive(Clone, Debug)]
280
enum GroupState {
281
    /// This state is pushed whenever an opening group is found.
282
    Group {
283
        /// The concatenation immediately preceding the opening group.
284
        concat: ast::Concat,
285
        /// The group that has been opened. Its sub-AST is always empty.
286
        group: ast::Group,
287
        /// Whether this group has the `x` flag enabled or not.
288
        ignore_whitespace: bool,
289
    },
290
    /// This state is pushed whenever a new alternation branch is found. If
291
    /// an alternation branch is found and this state is at the top of the
292
    /// stack, then this state should be modified to include the new
293
    /// alternation.
294
    Alternation(ast::Alternation),
295
}
296
297
/// ClassState represents a single stack frame while parsing character classes.
298
/// Each frame records the state up to an intersection, difference, symmetric
299
/// difference or nested class.
300
///
301
/// Note that a parser's character class stack is only non-empty when parsing
302
/// a character class. In all other cases, it is empty.
303
0
#[derive(Clone, Debug)]
304
enum ClassState {
305
    /// This state is pushed whenever an opening bracket is found.
306
    Open {
307
        /// The union of class items immediately preceding this class.
308
        union: ast::ClassSetUnion,
309
        /// The class that has been opened. Typically this just corresponds
310
        /// to the `[`, but it can also include `[^` since `^` indicates
311
        /// negation of the class.
312
        set: ast::ClassBracketed,
313
    },
314
    /// This state is pushed when a operator is seen. When popped, the stored
315
    /// set becomes the left hand side of the operator.
316
    Op {
317
        /// The type of the operation, i.e., &&, -- or ~~.
318
        kind: ast::ClassSetBinaryOpKind,
319
        /// The left-hand side of the operator.
320
        lhs: ast::ClassSet,
321
    },
322
}
323
324
impl Parser {
325
    /// Create a new parser with a default configuration.
326
    ///
327
    /// The parser can be run with either the `parse` or `parse_with_comments`
328
    /// methods. The parse methods return an abstract syntax tree.
329
    ///
330
    /// To set configuration options on the parser, use
331
    /// [`ParserBuilder`](struct.ParserBuilder.html).
332
0
    pub fn new() -> Parser {
333
0
        ParserBuilder::new().build()
334
0
    }
335
336
    /// Parse the regular expression into an abstract syntax tree.
337
6
    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338
6
        ParserI::new(self, pattern).parse()
339
6
    }
340
341
    /// Parse the regular expression and return an abstract syntax tree with
342
    /// all of the comments found in the pattern.
343
0
    pub fn parse_with_comments(
344
0
        &mut self,
345
0
        pattern: &str,
346
0
    ) -> Result<ast::WithComments> {
347
0
        ParserI::new(self, pattern).parse_with_comments()
348
0
    }
349
350
    /// Reset the internal state of a parser.
351
    ///
352
    /// This is called at the beginning of every parse. This prevents the
353
    /// parser from running with inconsistent state (say, if a previous
354
    /// invocation returned an error and the parser is reused).
355
6
    fn reset(&self) {
356
6
        // These settings should be in line with the construction
357
6
        // in `ParserBuilder::build`.
358
6
        self.pos.set(Position { offset: 0, line: 1, column: 1 });
359
6
        self.ignore_whitespace.set(self.initial_ignore_whitespace);
360
6
        self.comments.borrow_mut().clear();
361
6
        self.stack_group.borrow_mut().clear();
362
6
        self.stack_class.borrow_mut().clear();
363
6
    }
364
}
365
366
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367
    /// Build an internal parser from a parser configuration and a pattern.
368
6
    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369
6
        ParserI { parser: parser, pattern: pattern }
370
6
    }
371
372
    /// Return a reference to the parser state.
373
14.4k
    fn parser(&self) -> &Parser {
374
14.4k
        self.parser.borrow()
375
14.4k
    }
376
377
    /// Return a reference to the pattern being parsed.
378
8.49k
    fn pattern(&self) -> &str {
379
8.49k
        self.pattern.borrow()
380
8.49k
    }
381
382
    /// Create a new error with the given span and error type.
383
0
    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384
0
        ast::Error {
385
0
            kind: kind,
386
0
            pattern: self.pattern().to_string(),
387
0
            span: span,
388
0
        }
389
0
    }
390
391
    /// Return the current offset of the parser.
392
    ///
393
    /// The offset starts at `0` from the beginning of the regular expression
394
    /// pattern string.
395
8.81k
    fn offset(&self) -> usize {
396
8.81k
        self.parser().pos.get().offset
397
8.81k
    }
398
399
    /// Return the current line number of the parser.
400
    ///
401
    /// The line number starts at `1`.
402
256
    fn line(&self) -> usize {
403
256
        self.parser().pos.get().line
404
256
    }
405
406
    /// Return the current column of the parser.
407
    ///
408
    /// The column number starts at `1` and is reset whenever a `\n` is seen.
409
256
    fn column(&self) -> usize {
410
256
        self.parser().pos.get().column
411
256
    }
412
413
    /// Return the next capturing index. Each subsequent call increments the
414
    /// internal index.
415
    ///
416
    /// The span given should correspond to the location of the opening
417
    /// parenthesis.
418
    ///
419
    /// If the capture limit is exceeded, then an error is returned.
420
22
    fn next_capture_index(&self, span: Span) -> Result<u32> {
421
22
        let current = self.parser().capture_index.get();
422
22
        let i = current.checked_add(1).ok_or_else(|| {
423
0
            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
424
22
        })?;
425
22
        self.parser().capture_index.set(i);
426
22
        Ok(i)
427
22
    }
428
429
    /// Adds the given capture name to this parser. If this capture name has
430
    /// already been used, then an error is returned.
431
0
    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
432
0
        let mut names = self.parser().capture_names.borrow_mut();
433
0
        match names
434
0
            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
435
        {
436
0
            Err(i) => {
437
0
                names.insert(i, cap.clone());
438
0
                Ok(())
439
            }
440
0
            Ok(i) => Err(self.error(
441
0
                cap.span,
442
0
                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
443
0
            )),
444
        }
445
0
    }
446
447
    /// Return whether the parser should ignore whitespace or not.
448
834
    fn ignore_whitespace(&self) -> bool {
449
834
        self.parser().ignore_whitespace.get()
450
834
    }
451
452
    /// Return the character at the current position of the parser.
453
    ///
454
    /// This panics if the current position does not point to a valid char.
455
5.10k
    fn char(&self) -> char {
456
5.10k
        self.char_at(self.offset())
457
5.10k
    }
458
459
    /// Return the character at the given position.
460
    ///
461
    /// This panics if the given position does not point to a valid char.
462
5.10k
    fn char_at(&self, i: usize) -> char {
463
5.10k
        self.pattern()[i..]
464
5.10k
            .chars()
465
5.10k
            .next()
466
5.10k
            .unwrap_or_else(|| panic!("expected char at offset {}", i))
467
5.10k
    }
468
469
    /// Bump the parser to the next Unicode scalar value.
470
    ///
471
    /// If the end of the input has been reached, then `false` is returned.
472
1.10k
    fn bump(&self) -> bool {
473
1.10k
        if self.is_eof() {
474
0
            return false;
475
1.10k
        }
476
1.10k
        let Position { mut offset, mut line, mut column } = self.pos();
477
1.10k
        if self.char() == '\n' {
478
0
            line = line.checked_add(1).unwrap();
479
0
            column = 1;
480
1.10k
        } else {
481
1.10k
            column = column.checked_add(1).unwrap();
482
1.10k
        }
483
1.10k
        offset += self.char().len_utf8();
484
1.10k
        self.parser().pos.set(Position {
485
1.10k
            offset: offset,
486
1.10k
            line: line,
487
1.10k
            column: column,
488
1.10k
        });
489
1.10k
        self.pattern()[self.offset()..].chars().next().is_some()
490
1.10k
    }
491
492
    /// If the substring starting at the current position of the parser has
493
    /// the given prefix, then bump the parser to the character immediately
494
    /// following the prefix and return true. Otherwise, don't bump the parser
495
    /// and return false.
496
276
    fn bump_if(&self, prefix: &str) -> bool {
497
276
        if self.pattern()[self.offset()..].starts_with(prefix) {
498
134
            for _ in 0..prefix.chars().count() {
499
134
                self.bump();
500
134
            }
501
74
            true
502
        } else {
503
202
            false
504
        }
505
276
    }
506
507
    /// Returns true if and only if the parser is positioned at a look-around
508
    /// prefix. The conditions under which this returns true must always
509
    /// correspond to a regular expression that would otherwise be consider
510
    /// invalid.
511
    ///
512
    /// This should only be called immediately after parsing the opening of
513
    /// a group or a set of flags.
514
36
    fn is_lookaround_prefix(&self) -> bool {
515
36
        self.bump_if("?=")
516
36
            || self.bump_if("?!")
517
36
            || self.bump_if("?<=")
518
36
            || self.bump_if("?<!")
519
36
    }
520
521
    /// Bump the parser, and if the `x` flag is enabled, bump through any
522
    /// subsequent spaces. Return true if and only if the parser is not at
523
    /// EOF.
524
72
    fn bump_and_bump_space(&self) -> bool {
525
72
        if !self.bump() {
526
0
            return false;
527
72
        }
528
72
        self.bump_space();
529
72
        !self.is_eof()
530
72
    }
531
532
    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
533
    /// comments), then this will advance the parser through all whitespace
534
    /// and comments to the next non-whitespace non-comment byte.
535
    ///
536
    /// If the `x` flag is disabled, then this is a no-op.
537
    ///
538
    /// This should be used selectively throughout the parser where
539
    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
540
    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
541
790
    fn bump_space(&self) {
542
790
        if !self.ignore_whitespace() {
543
790
            return;
544
0
        }
545
0
        while !self.is_eof() {
546
0
            if self.char().is_whitespace() {
547
0
                self.bump();
548
0
            } else if self.char() == '#' {
549
0
                let start = self.pos();
550
0
                let mut comment_text = String::new();
551
0
                self.bump();
552
0
                while !self.is_eof() {
553
0
                    let c = self.char();
554
0
                    self.bump();
555
0
                    if c == '\n' {
556
0
                        break;
557
0
                    }
558
0
                    comment_text.push(c);
559
                }
560
0
                let comment = ast::Comment {
561
0
                    span: Span::new(start, self.pos()),
562
0
                    comment: comment_text,
563
0
                };
564
0
                self.parser().comments.borrow_mut().push(comment);
565
            } else {
566
0
                break;
567
            }
568
        }
569
790
    }
570
571
    /// Peek at the next character in the input without advancing the parser.
572
    ///
573
    /// If the input has been exhausted, then this returns `None`.
574
8
    fn peek(&self) -> Option<char> {
575
8
        if self.is_eof() {
576
0
            return None;
577
8
        }
578
8
        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
579
8
    }
580
581
    /// Like peek, but will ignore spaces when the parser is in whitespace
582
    /// insensitive mode.
583
8
    fn peek_space(&self) -> Option<char> {
584
8
        if !self.ignore_whitespace() {
585
8
            return self.peek();
586
0
        }
587
0
        if self.is_eof() {
588
0
            return None;
589
0
        }
590
0
        let mut start = self.offset() + self.char().len_utf8();
591
0
        let mut in_comment = false;
592
0
        for (i, c) in self.pattern()[start..].char_indices() {
593
0
            if c.is_whitespace() {
594
0
                continue;
595
0
            } else if !in_comment && c == '#' {
596
0
                in_comment = true;
597
0
            } else if in_comment && c == '\n' {
598
0
                in_comment = false;
599
0
            } else {
600
0
                start += i;
601
0
                break;
602
            }
603
        }
604
0
        self.pattern()[start..].chars().next()
605
8
    }
606
607
    /// Returns true if the next call to `bump` would return false.
608
1.94k
    fn is_eof(&self) -> bool {
609
1.94k
        self.offset() == self.pattern().len()
610
1.94k
    }
611
612
    /// Return the current position of the parser, which includes the offset,
613
    /// line and column.
614
2.36k
    fn pos(&self) -> Position {
615
2.36k
        self.parser().pos.get()
616
2.36k
    }
617
618
    /// Create a span at the current position of the parser. Both the start
619
    /// and end of the span are set.
620
272
    fn span(&self) -> Span {
621
272
        Span::splat(self.pos())
622
272
    }
623
624
    /// Create a span that covers the current character.
625
256
    fn span_char(&self) -> Span {
626
256
        let mut next = Position {
627
256
            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
628
256
            line: self.line(),
629
256
            column: self.column().checked_add(1).unwrap(),
630
256
        };
631
256
        if self.char() == '\n' {
632
0
            next.line += 1;
633
0
            next.column = 1;
634
256
        }
635
256
        Span::new(self.pos(), next)
636
256
    }
637
638
    /// Parse and push a single alternation on to the parser's internal stack.
639
    /// If the top of the stack already has an alternation, then add to that
640
    /// instead of pushing a new one.
641
    ///
642
    /// The concatenation given corresponds to a single alternation branch.
643
    /// The concatenation returned starts the next branch and is empty.
644
    ///
645
    /// This assumes the parser is currently positioned at `|` and will advance
646
    /// the parser to the character following `|`.
647
    #[inline(never)]
648
8
    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
649
8
        assert_eq!(self.char(), '|');
650
8
        concat.span.end = self.pos();
651
8
        self.push_or_add_alternation(concat);
652
8
        self.bump();
653
8
        Ok(ast::Concat { span: self.span(), asts: vec![] })
654
8
    }
655
656
    /// Pushes or adds the given branch of an alternation to the parser's
657
    /// internal stack of state.
658
8
    fn push_or_add_alternation(&self, concat: ast::Concat) {
659
8
        use self::GroupState::*;
660
8
661
8
        let mut stack = self.parser().stack_group.borrow_mut();
662
8
        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
663
4
            alts.asts.push(concat.into_ast());
664
4
            return;
665
4
        }
666
4
        stack.push(Alternation(ast::Alternation {
667
4
            span: Span::new(concat.span.start, self.pos()),
668
4
            asts: vec![concat.into_ast()],
669
4
        }));
670
8
    }
671
672
    /// Parse and push a group AST (and its parent concatenation) on to the
673
    /// parser's internal stack. Return a fresh concatenation corresponding
674
    /// to the group's sub-AST.
675
    ///
676
    /// If a set of flags was found (with no group), then the concatenation
677
    /// is returned with that set of flags added.
678
    ///
679
    /// This assumes that the parser is currently positioned on the opening
680
    /// parenthesis. It advances the parser to the character at the start
681
    /// of the sub-expression (or adjoining expression).
682
    ///
683
    /// If there was a problem parsing the start of the group, then an error
684
    /// is returned.
685
    #[inline(never)]
686
36
    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
687
36
        assert_eq!(self.char(), '(');
688
36
        match self.parse_group()? {
689
0
            Either::Left(set) => {
690
0
                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
691
0
                if let Some(v) = ignore {
692
0
                    self.parser().ignore_whitespace.set(v);
693
0
                }
694
695
0
                concat.asts.push(Ast::Flags(set));
696
0
                Ok(concat)
697
            }
698
36
            Either::Right(group) => {
699
36
                let old_ignore_whitespace = self.ignore_whitespace();
700
36
                let new_ignore_whitespace = group
701
36
                    .flags()
702
36
                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
703
36
                    .unwrap_or(old_ignore_whitespace);
704
36
                self.parser().stack_group.borrow_mut().push(
705
36
                    GroupState::Group {
706
36
                        concat: concat,
707
36
                        group: group,
708
36
                        ignore_whitespace: old_ignore_whitespace,
709
36
                    },
710
36
                );
711
36
                self.parser().ignore_whitespace.set(new_ignore_whitespace);
712
36
                Ok(ast::Concat { span: self.span(), asts: vec![] })
713
            }
714
        }
715
36
    }
716
717
    /// Pop a group AST from the parser's internal stack and set the group's
718
    /// AST to the given concatenation. Return the concatenation containing
719
    /// the group.
720
    ///
721
    /// This assumes that the parser is currently positioned on the closing
722
    /// parenthesis and advances the parser to the character following the `)`.
723
    ///
724
    /// If no such group could be popped, then an unopened group error is
725
    /// returned.
726
    #[inline(never)]
727
36
    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
728
36
        use self::GroupState::*;
729
36
730
36
        assert_eq!(self.char(), ')');
731
36
        let mut stack = self.parser().stack_group.borrow_mut();
732
36
        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
733
36
            .pop()
734
        {
735
32
            Some(Group { concat, group, ignore_whitespace }) => {
736
32
                (concat, group, ignore_whitespace, None)
737
            }
738
4
            Some(Alternation(alt)) => match stack.pop() {
739
4
                Some(Group { concat, group, ignore_whitespace }) => {
740
4
                    (concat, group, ignore_whitespace, Some(alt))
741
                }
742
                None | Some(Alternation(_)) => {
743
0
                    return Err(self.error(
744
0
                        self.span_char(),
745
0
                        ast::ErrorKind::GroupUnopened,
746
0
                    ));
747
                }
748
            },
749
            None => {
750
0
                return Err(self
751
0
                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
752
            }
753
        };
754
36
        self.parser().ignore_whitespace.set(ignore_whitespace);
755
36
        group_concat.span.end = self.pos();
756
36
        self.bump();
757
36
        group.span.end = self.pos();
758
36
        match alt {
759
4
            Some(mut alt) => {
760
4
                alt.span.end = group_concat.span.end;
761
4
                alt.asts.push(group_concat.into_ast());
762
4
                group.ast = Box::new(alt.into_ast());
763
4
            }
764
32
            None => {
765
32
                group.ast = Box::new(group_concat.into_ast());
766
32
            }
767
        }
768
36
        prior_concat.asts.push(Ast::Group(group));
769
36
        Ok(prior_concat)
770
36
    }
771
772
    /// Pop the last state from the parser's internal stack, if it exists, and
773
    /// add the given concatenation to it. There either must be no state or a
774
    /// single alternation item on the stack. Any other scenario produces an
775
    /// error.
776
    ///
777
    /// This assumes that the parser has advanced to the end.
778
    #[inline(never)]
779
6
    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
780
6
        concat.span.end = self.pos();
781
6
        let mut stack = self.parser().stack_group.borrow_mut();
782
6
        let ast = match stack.pop() {
783
6
            None => Ok(concat.into_ast()),
784
0
            Some(GroupState::Alternation(mut alt)) => {
785
0
                alt.span.end = self.pos();
786
0
                alt.asts.push(concat.into_ast());
787
0
                Ok(Ast::Alternation(alt))
788
            }
789
0
            Some(GroupState::Group { group, .. }) => {
790
0
                return Err(
791
0
                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
792
0
                );
793
            }
794
        };
795
        // If we try to pop again, there should be nothing.
796
6
        match stack.pop() {
797
6
            None => ast,
798
            Some(GroupState::Alternation(_)) => {
799
                // This unreachable is unfortunate. This case can't happen
800
                // because the only way we can be here is if there were two
801
                // `GroupState::Alternation`s adjacent in the parser's stack,
802
                // which we guarantee to never happen because we never push a
803
                // `GroupState::Alternation` if one is already at the top of
804
                // the stack.
805
0
                unreachable!()
806
            }
807
0
            Some(GroupState::Group { group, .. }) => {
808
0
                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
809
            }
810
        }
811
6
    }
812
813
    /// Parse the opening of a character class and push the current class
814
    /// parsing context onto the parser's stack. This assumes that the parser
815
    /// is positioned at an opening `[`. The given union should correspond to
816
    /// the union of set items built up before seeing the `[`.
817
    ///
818
    /// If there was a problem parsing the opening of the class, then an error
819
    /// is returned. Otherwise, a new union of set items for the class is
820
    /// returned (which may be populated with either a `]` or a `-`).
821
    #[inline(never)]
822
68
    fn push_class_open(
823
68
        &self,
824
68
        parent_union: ast::ClassSetUnion,
825
68
    ) -> Result<ast::ClassSetUnion> {
826
68
        assert_eq!(self.char(), '[');
827
828
68
        let (nested_set, nested_union) = self.parse_set_class_open()?;
829
68
        self.parser()
830
68
            .stack_class
831
68
            .borrow_mut()
832
68
            .push(ClassState::Open { union: parent_union, set: nested_set });
833
68
        Ok(nested_union)
834
68
    }
835
836
    /// Parse the end of a character class set and pop the character class
837
    /// parser stack. The union given corresponds to the last union built
838
    /// before seeing the closing `]`. The union returned corresponds to the
839
    /// parent character class set with the nested class added to it.
840
    ///
841
    /// This assumes that the parser is positioned at a `]` and will advance
842
    /// the parser to the byte immediately following the `]`.
843
    ///
844
    /// If the stack is empty after popping, then this returns the final
845
    /// "top-level" character class AST (where a "top-level" character class
846
    /// is one that is not nested inside any other character class).
847
    ///
848
    /// If there is no corresponding opening bracket on the parser's stack,
849
    /// then an error is returned.
850
    #[inline(never)]
851
68
    fn pop_class(
852
68
        &self,
853
68
        nested_union: ast::ClassSetUnion,
854
68
    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
855
68
        assert_eq!(self.char(), ']');
856
857
68
        let item = ast::ClassSet::Item(nested_union.into_item());
858
68
        let prevset = self.pop_class_op(item);
859
68
        let mut stack = self.parser().stack_class.borrow_mut();
860
68
        match stack.pop() {
861
            None => {
862
                // We can never observe an empty stack:
863
                //
864
                // 1) We are guaranteed to start with a non-empty stack since
865
                //    the character class parser is only initiated when it sees
866
                //    a `[`.
867
                // 2) If we ever observe an empty stack while popping after
868
                //    seeing a `]`, then we signal the character class parser
869
                //    to terminate.
870
0
                panic!("unexpected empty character class stack")
871
            }
872
            Some(ClassState::Op { .. }) => {
873
                // This panic is unfortunate, but this case is impossible
874
                // since we already popped the Op state if one exists above.
875
                // Namely, every push to the class parser stack is guarded by
876
                // whether an existing Op is already on the top of the stack.
877
                // If it is, the existing Op is modified. That is, the stack
878
                // can never have consecutive Op states.
879
0
                panic!("unexpected ClassState::Op")
880
            }
881
68
            Some(ClassState::Open { mut union, mut set }) => {
882
68
                self.bump();
883
68
                set.span.end = self.pos();
884
68
                set.kind = prevset;
885
68
                if stack.is_empty() {
886
68
                    Ok(Either::Right(ast::Class::Bracketed(set)))
887
                } else {
888
0
                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
889
0
                    Ok(Either::Left(union))
890
                }
891
            }
892
        }
893
68
    }
894
895
    /// Return an "unclosed class" error whose span points to the most
896
    /// recently opened class.
897
    ///
898
    /// This should only be called while parsing a character class.
899
    #[inline(never)]
900
0
    fn unclosed_class_error(&self) -> ast::Error {
901
0
        for state in self.parser().stack_class.borrow().iter().rev() {
902
0
            match *state {
903
0
                ClassState::Open { ref set, .. } => {
904
0
                    return self
905
0
                        .error(set.span, ast::ErrorKind::ClassUnclosed);
906
                }
907
0
                _ => {}
908
            }
909
        }
910
        // We are guaranteed to have a non-empty stack with at least
911
        // one open bracket, so we should never get here.
912
0
        panic!("no open character class found")
913
0
    }
914
915
    /// Push the current set of class items on to the class parser's stack as
916
    /// the left hand side of the given operator.
917
    ///
918
    /// A fresh set union is returned, which should be used to build the right
919
    /// hand side of this operator.
920
    #[inline(never)]
921
0
    fn push_class_op(
922
0
        &self,
923
0
        next_kind: ast::ClassSetBinaryOpKind,
924
0
        next_union: ast::ClassSetUnion,
925
0
    ) -> ast::ClassSetUnion {
926
0
        let item = ast::ClassSet::Item(next_union.into_item());
927
0
        let new_lhs = self.pop_class_op(item);
928
0
        self.parser()
929
0
            .stack_class
930
0
            .borrow_mut()
931
0
            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
932
0
        ast::ClassSetUnion { span: self.span(), items: vec![] }
933
0
    }
934
935
    /// Pop a character class set from the character class parser stack. If the
936
    /// top of the stack is just an item (not an operation), then return the
937
    /// given set unchanged. If the top of the stack is an operation, then the
938
    /// given set will be used as the rhs of the operation on the top of the
939
    /// stack. In that case, the binary operation is returned as a set.
940
    #[inline(never)]
941
68
    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
942
68
        let mut stack = self.parser().stack_class.borrow_mut();
943
68
        let (kind, lhs) = match stack.pop() {
944
0
            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
945
68
            Some(state @ ClassState::Open { .. }) => {
946
68
                stack.push(state);
947
68
                return rhs;
948
            }
949
0
            None => unreachable!(),
950
        };
951
0
        let span = Span::new(lhs.span().start, rhs.span().end);
952
0
        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
953
0
            span: span,
954
0
            kind: kind,
955
0
            lhs: Box::new(lhs),
956
0
            rhs: Box::new(rhs),
957
0
        })
958
68
    }
959
}
960
961
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
962
    /// Parse the regular expression into an abstract syntax tree.
963
6
    fn parse(&self) -> Result<Ast> {
964
6
        self.parse_with_comments().map(|astc| astc.ast)
965
6
    }
966
967
    /// Parse the regular expression and return an abstract syntax tree with
968
    /// all of the comments found in the pattern.
969
6
    fn parse_with_comments(&self) -> Result<ast::WithComments> {
970
6
        assert_eq!(self.offset(), 0, "parser can only be used once");
971
6
        self.parser().reset();
972
6
        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
973
454
        loop {
974
454
            self.bump_space();
975
454
            if self.is_eof() {
976
6
                break;
977
448
            }
978
448
            match self.char() {
979
36
                '(' => concat = self.push_group(concat)?,
980
36
                ')' => concat = self.pop_group(concat)?,
981
8
                '|' => concat = self.push_alternate(concat)?,
982
68
                '[' => {
983
68
                    let class = self.parse_set_class()?;
984
68
                    concat.asts.push(Ast::Class(class));
985
                }
986
                '?' => {
987
36
                    concat = self.parse_uncounted_repetition(
988
36
                        concat,
989
36
                        ast::RepetitionKind::ZeroOrOne,
990
36
                    )?;
991
                }
992
                '*' => {
993
40
                    concat = self.parse_uncounted_repetition(
994
40
                        concat,
995
40
                        ast::RepetitionKind::ZeroOrMore,
996
40
                    )?;
997
                }
998
                '+' => {
999
24
                    concat = self.parse_uncounted_repetition(
1000
24
                        concat,
1001
24
                        ast::RepetitionKind::OneOrMore,
1002
24
                    )?;
1003
                }
1004
                '{' => {
1005
0
                    concat = self.parse_counted_repetition(concat)?;
1006
                }
1007
200
                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1008
            }
1009
        }
1010
6
        let ast = self.pop_group_end(concat)?;
1011
6
        NestLimiter::new(self).check(&ast)?;
1012
6
        Ok(ast::WithComments {
1013
6
            ast: ast,
1014
6
            comments: mem::replace(
1015
6
                &mut *self.parser().comments.borrow_mut(),
1016
6
                vec![],
1017
6
            ),
1018
6
        })
1019
6
    }
1020
1021
    /// Parses an uncounted repetition operation. An uncounted repetition
1022
    /// operator includes ?, * and +, but does not include the {m,n} syntax.
1023
    /// The given `kind` should correspond to the operator observed by the
1024
    /// caller.
1025
    ///
1026
    /// This assumes that the paser is currently positioned at the repetition
1027
    /// operator and advances the parser to the first character after the
1028
    /// operator. (Note that the operator may include a single additional `?`,
1029
    /// which makes the operator ungreedy.)
1030
    ///
1031
    /// The caller should include the concatenation that is being built. The
1032
    /// concatenation returned includes the repetition operator applied to the
1033
    /// last expression in the given concatenation.
1034
    #[inline(never)]
1035
100
    fn parse_uncounted_repetition(
1036
100
        &self,
1037
100
        mut concat: ast::Concat,
1038
100
        kind: ast::RepetitionKind,
1039
100
    ) -> Result<ast::Concat> {
1040
100
        assert!(
1041
100
            self.char() == '?' || self.char() == '*' || self.char() == '+'
1042
        );
1043
100
        let op_start = self.pos();
1044
100
        let ast = match concat.asts.pop() {
1045
100
            Some(ast) => ast,
1046
            None => {
1047
0
                return Err(
1048
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1049
0
                )
1050
            }
1051
        };
1052
100
        match ast {
1053
            Ast::Empty(_) | Ast::Flags(_) => {
1054
0
                return Err(
1055
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1056
0
                )
1057
            }
1058
100
            _ => {}
1059
100
        }
1060
100
        let mut greedy = true;
1061
100
        if self.bump() && self.char() == '?' {
1062
0
            greedy = false;
1063
0
            self.bump();
1064
100
        }
1065
100
        concat.asts.push(Ast::Repetition(ast::Repetition {
1066
100
            span: ast.span().with_end(self.pos()),
1067
100
            op: ast::RepetitionOp {
1068
100
                span: Span::new(op_start, self.pos()),
1069
100
                kind: kind,
1070
100
            },
1071
100
            greedy: greedy,
1072
100
            ast: Box::new(ast),
1073
100
        }));
1074
100
        Ok(concat)
1075
100
    }
1076
1077
    /// Parses a counted repetition operation. A counted repetition operator
1078
    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1079
    /// operators.
1080
    ///
1081
    /// This assumes that the paser is currently positioned at the opening `{`
1082
    /// and advances the parser to the first character after the operator.
1083
    /// (Note that the operator may include a single additional `?`, which
1084
    /// makes the operator ungreedy.)
1085
    ///
1086
    /// The caller should include the concatenation that is being built. The
1087
    /// concatenation returned includes the repetition operator applied to the
1088
    /// last expression in the given concatenation.
1089
    #[inline(never)]
1090
0
    fn parse_counted_repetition(
1091
0
        &self,
1092
0
        mut concat: ast::Concat,
1093
0
    ) -> Result<ast::Concat> {
1094
0
        assert!(self.char() == '{');
1095
0
        let start = self.pos();
1096
0
        let ast = match concat.asts.pop() {
1097
0
            Some(ast) => ast,
1098
            None => {
1099
0
                return Err(
1100
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1101
0
                )
1102
            }
1103
        };
1104
0
        match ast {
1105
            Ast::Empty(_) | Ast::Flags(_) => {
1106
0
                return Err(
1107
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1108
0
                )
1109
            }
1110
0
            _ => {}
1111
0
        }
1112
0
        if !self.bump_and_bump_space() {
1113
0
            return Err(self.error(
1114
0
                Span::new(start, self.pos()),
1115
0
                ast::ErrorKind::RepetitionCountUnclosed,
1116
0
            ));
1117
0
        }
1118
0
        let count_start = specialize_err(
1119
0
            self.parse_decimal(),
1120
0
            ast::ErrorKind::DecimalEmpty,
1121
0
            ast::ErrorKind::RepetitionCountDecimalEmpty,
1122
0
        )?;
1123
0
        let mut range = ast::RepetitionRange::Exactly(count_start);
1124
0
        if self.is_eof() {
1125
0
            return Err(self.error(
1126
0
                Span::new(start, self.pos()),
1127
0
                ast::ErrorKind::RepetitionCountUnclosed,
1128
0
            ));
1129
0
        }
1130
0
        if self.char() == ',' {
1131
0
            if !self.bump_and_bump_space() {
1132
0
                return Err(self.error(
1133
0
                    Span::new(start, self.pos()),
1134
0
                    ast::ErrorKind::RepetitionCountUnclosed,
1135
0
                ));
1136
0
            }
1137
0
            if self.char() != '}' {
1138
0
                let count_end = specialize_err(
1139
0
                    self.parse_decimal(),
1140
0
                    ast::ErrorKind::DecimalEmpty,
1141
0
                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1142
0
                )?;
1143
0
                range = ast::RepetitionRange::Bounded(count_start, count_end);
1144
0
            } else {
1145
0
                range = ast::RepetitionRange::AtLeast(count_start);
1146
0
            }
1147
0
        }
1148
0
        if self.is_eof() || self.char() != '}' {
1149
0
            return Err(self.error(
1150
0
                Span::new(start, self.pos()),
1151
0
                ast::ErrorKind::RepetitionCountUnclosed,
1152
0
            ));
1153
0
        }
1154
0
1155
0
        let mut greedy = true;
1156
0
        if self.bump_and_bump_space() && self.char() == '?' {
1157
0
            greedy = false;
1158
0
            self.bump();
1159
0
        }
1160
1161
0
        let op_span = Span::new(start, self.pos());
1162
0
        if !range.is_valid() {
1163
0
            return Err(
1164
0
                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1165
0
            );
1166
0
        }
1167
0
        concat.asts.push(Ast::Repetition(ast::Repetition {
1168
0
            span: ast.span().with_end(self.pos()),
1169
0
            op: ast::RepetitionOp {
1170
0
                span: op_span,
1171
0
                kind: ast::RepetitionKind::Range(range),
1172
0
            },
1173
0
            greedy: greedy,
1174
0
            ast: Box::new(ast),
1175
0
        }));
1176
0
        Ok(concat)
1177
0
    }
1178
1179
    /// Parse a group (which contains a sub-expression) or a set of flags.
1180
    ///
1181
    /// If a group was found, then it is returned with an empty AST. If a set
1182
    /// of flags is found, then that set is returned.
1183
    ///
1184
    /// The parser should be positioned at the opening parenthesis.
1185
    ///
1186
    /// This advances the parser to the character before the start of the
1187
    /// sub-expression (in the case of a group) or to the closing parenthesis
1188
    /// immediately following the set of flags.
1189
    ///
1190
    /// # Errors
1191
    ///
1192
    /// If flags are given and incorrectly specified, then a corresponding
1193
    /// error is returned.
1194
    ///
1195
    /// If a capture name is given and it is incorrectly specified, then a
1196
    /// corresponding error is returned.
1197
    #[inline(never)]
1198
36
    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1199
36
        assert_eq!(self.char(), '(');
1200
36
        let open_span = self.span_char();
1201
36
        self.bump();
1202
36
        self.bump_space();
1203
36
        if self.is_lookaround_prefix() {
1204
0
            return Err(self.error(
1205
0
                Span::new(open_span.start, self.span().end),
1206
0
                ast::ErrorKind::UnsupportedLookAround,
1207
0
            ));
1208
36
        }
1209
36
        let inner_span = self.span();
1210
36
        if self.bump_if("?P<") {
1211
0
            let capture_index = self.next_capture_index(open_span)?;
1212
0
            let cap = self.parse_capture_name(capture_index)?;
1213
0
            Ok(Either::Right(ast::Group {
1214
0
                span: open_span,
1215
0
                kind: ast::GroupKind::CaptureName(cap),
1216
0
                ast: Box::new(Ast::Empty(self.span())),
1217
0
            }))
1218
36
        } else if self.bump_if("?") {
1219
14
            if self.is_eof() {
1220
0
                return Err(
1221
0
                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
1222
0
                );
1223
14
            }
1224
14
            let flags = self.parse_flags()?;
1225
14
            let char_end = self.char();
1226
14
            self.bump();
1227
14
            if char_end == ')' {
1228
                // We don't allow empty flags, e.g., `(?)`. We instead
1229
                // interpret it as a repetition operator missing its argument.
1230
0
                if flags.items.is_empty() {
1231
0
                    return Err(self.error(
1232
0
                        inner_span,
1233
0
                        ast::ErrorKind::RepetitionMissing,
1234
0
                    ));
1235
0
                }
1236
0
                Ok(Either::Left(ast::SetFlags {
1237
0
                    span: Span { end: self.pos(), ..open_span },
1238
0
                    flags: flags,
1239
0
                }))
1240
            } else {
1241
14
                assert_eq!(char_end, ':');
1242
14
                Ok(Either::Right(ast::Group {
1243
14
                    span: open_span,
1244
14
                    kind: ast::GroupKind::NonCapturing(flags),
1245
14
                    ast: Box::new(Ast::Empty(self.span())),
1246
14
                }))
1247
            }
1248
        } else {
1249
22
            let capture_index = self.next_capture_index(open_span)?;
1250
22
            Ok(Either::Right(ast::Group {
1251
22
                span: open_span,
1252
22
                kind: ast::GroupKind::CaptureIndex(capture_index),
1253
22
                ast: Box::new(Ast::Empty(self.span())),
1254
22
            }))
1255
        }
1256
36
    }
1257
1258
    /// Parses a capture group name. Assumes that the parser is positioned at
1259
    /// the first character in the name following the opening `<` (and may
1260
    /// possibly be EOF). This advances the parser to the first character
1261
    /// following the closing `>`.
1262
    ///
1263
    /// The caller must provide the capture index of the group for this name.
1264
    #[inline(never)]
1265
0
    fn parse_capture_name(
1266
0
        &self,
1267
0
        capture_index: u32,
1268
0
    ) -> Result<ast::CaptureName> {
1269
0
        if self.is_eof() {
1270
0
            return Err(self
1271
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1272
0
        }
1273
0
        let start = self.pos();
1274
0
        loop {
1275
0
            if self.char() == '>' {
1276
0
                break;
1277
0
            }
1278
0
            if !is_capture_char(self.char(), self.pos() == start) {
1279
0
                return Err(self.error(
1280
0
                    self.span_char(),
1281
0
                    ast::ErrorKind::GroupNameInvalid,
1282
0
                ));
1283
0
            }
1284
0
            if !self.bump() {
1285
0
                break;
1286
0
            }
1287
        }
1288
0
        let end = self.pos();
1289
0
        if self.is_eof() {
1290
0
            return Err(self
1291
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1292
0
        }
1293
0
        assert_eq!(self.char(), '>');
1294
0
        self.bump();
1295
0
        let name = &self.pattern()[start.offset..end.offset];
1296
0
        if name.is_empty() {
1297
0
            return Err(self.error(
1298
0
                Span::new(start, start),
1299
0
                ast::ErrorKind::GroupNameEmpty,
1300
0
            ));
1301
0
        }
1302
0
        let capname = ast::CaptureName {
1303
0
            span: Span::new(start, end),
1304
0
            name: name.to_string(),
1305
0
            index: capture_index,
1306
0
        };
1307
0
        self.add_capture_name(&capname)?;
1308
0
        Ok(capname)
1309
0
    }
1310
1311
    /// Parse a sequence of flags starting at the current character.
1312
    ///
1313
    /// This advances the parser to the character immediately following the
1314
    /// flags, which is guaranteed to be either `:` or `)`.
1315
    ///
1316
    /// # Errors
1317
    ///
1318
    /// If any flags are duplicated, then an error is returned.
1319
    ///
1320
    /// If the negation operator is used more than once, then an error is
1321
    /// returned.
1322
    ///
1323
    /// If no flags could be found or if the negation operation is not followed
1324
    /// by any flags, then an error is returned.
1325
    #[inline(never)]
1326
14
    fn parse_flags(&self) -> Result<ast::Flags> {
1327
14
        let mut flags = ast::Flags { span: self.span(), items: vec![] };
1328
14
        let mut last_was_negation = None;
1329
14
        while self.char() != ':' && self.char() != ')' {
1330
0
            if self.char() == '-' {
1331
0
                last_was_negation = Some(self.span_char());
1332
0
                let item = ast::FlagsItem {
1333
0
                    span: self.span_char(),
1334
0
                    kind: ast::FlagsItemKind::Negation,
1335
0
                };
1336
0
                if let Some(i) = flags.add_item(item) {
1337
0
                    return Err(self.error(
1338
0
                        self.span_char(),
1339
0
                        ast::ErrorKind::FlagRepeatedNegation {
1340
0
                            original: flags.items[i].span,
1341
0
                        },
1342
0
                    ));
1343
0
                }
1344
            } else {
1345
0
                last_was_negation = None;
1346
0
                let item = ast::FlagsItem {
1347
0
                    span: self.span_char(),
1348
0
                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1349
                };
1350
0
                if let Some(i) = flags.add_item(item) {
1351
0
                    return Err(self.error(
1352
0
                        self.span_char(),
1353
0
                        ast::ErrorKind::FlagDuplicate {
1354
0
                            original: flags.items[i].span,
1355
0
                        },
1356
0
                    ));
1357
0
                }
1358
            }
1359
0
            if !self.bump() {
1360
0
                return Err(
1361
0
                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1362
0
                );
1363
0
            }
1364
        }
1365
14
        if let Some(span) = last_was_negation {
1366
0
            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1367
14
        }
1368
14
        flags.span.end = self.pos();
1369
14
        Ok(flags)
1370
14
    }
1371
1372
    /// Parse the current character as a flag. Do not advance the parser.
1373
    ///
1374
    /// # Errors
1375
    ///
1376
    /// If the flag is not recognized, then an error is returned.
1377
    #[inline(never)]
1378
0
    fn parse_flag(&self) -> Result<ast::Flag> {
1379
0
        match self.char() {
1380
0
            'i' => Ok(ast::Flag::CaseInsensitive),
1381
0
            'm' => Ok(ast::Flag::MultiLine),
1382
0
            's' => Ok(ast::Flag::DotMatchesNewLine),
1383
0
            'U' => Ok(ast::Flag::SwapGreed),
1384
0
            'u' => Ok(ast::Flag::Unicode),
1385
0
            'x' => Ok(ast::Flag::IgnoreWhitespace),
1386
            _ => {
1387
0
                Err(self
1388
0
                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1389
            }
1390
        }
1391
0
    }
1392
1393
    /// Parse a primitive AST. e.g., A literal, non-set character class or
1394
    /// assertion.
1395
    ///
1396
    /// This assumes that the parser expects a primitive at the current
1397
    /// location. i.e., All other non-primitive cases have been handled.
1398
    /// For example, if the parser's position is at `|`, then `|` will be
1399
    /// treated as a literal (e.g., inside a character class).
1400
    ///
1401
    /// This advances the parser to the first character immediately following
1402
    /// the primitive.
1403
200
    fn parse_primitive(&self) -> Result<Primitive> {
1404
200
        match self.char() {
1405
0
            '\\' => self.parse_escape(),
1406
            '.' => {
1407
2
                let ast = Primitive::Dot(self.span_char());
1408
2
                self.bump();
1409
2
                Ok(ast)
1410
            }
1411
            '^' => {
1412
6
                let ast = Primitive::Assertion(ast::Assertion {
1413
6
                    span: self.span_char(),
1414
6
                    kind: ast::AssertionKind::StartLine,
1415
6
                });
1416
6
                self.bump();
1417
6
                Ok(ast)
1418
            }
1419
            '$' => {
1420
6
                let ast = Primitive::Assertion(ast::Assertion {
1421
6
                    span: self.span_char(),
1422
6
                    kind: ast::AssertionKind::EndLine,
1423
6
                });
1424
6
                self.bump();
1425
6
                Ok(ast)
1426
            }
1427
186
            c => {
1428
186
                let ast = Primitive::Literal(ast::Literal {
1429
186
                    span: self.span_char(),
1430
186
                    kind: ast::LiteralKind::Verbatim,
1431
186
                    c: c,
1432
186
                });
1433
186
                self.bump();
1434
186
                Ok(ast)
1435
            }
1436
        }
1437
200
    }
1438
1439
    /// Parse an escape sequence as a primitive AST.
1440
    ///
1441
    /// This assumes the parser is positioned at the start of the escape
1442
    /// sequence, i.e., `\`. It advances the parser to the first position
1443
    /// immediately following the escape sequence.
1444
    #[inline(never)]
1445
0
    fn parse_escape(&self) -> Result<Primitive> {
1446
0
        assert_eq!(self.char(), '\\');
1447
0
        let start = self.pos();
1448
0
        if !self.bump() {
1449
0
            return Err(self.error(
1450
0
                Span::new(start, self.pos()),
1451
0
                ast::ErrorKind::EscapeUnexpectedEof,
1452
0
            ));
1453
0
        }
1454
0
        let c = self.char();
1455
        // Put some of the more complicated routines into helpers.
1456
0
        match c {
1457
0
            '0'..='7' => {
1458
0
                if !self.parser().octal {
1459
0
                    return Err(self.error(
1460
0
                        Span::new(start, self.span_char().end),
1461
0
                        ast::ErrorKind::UnsupportedBackreference,
1462
0
                    ));
1463
0
                }
1464
0
                let mut lit = self.parse_octal();
1465
0
                lit.span.start = start;
1466
0
                return Ok(Primitive::Literal(lit));
1467
            }
1468
0
            '8'..='9' if !self.parser().octal => {
1469
0
                return Err(self.error(
1470
0
                    Span::new(start, self.span_char().end),
1471
0
                    ast::ErrorKind::UnsupportedBackreference,
1472
0
                ));
1473
            }
1474
            'x' | 'u' | 'U' => {
1475
0
                let mut lit = self.parse_hex()?;
1476
0
                lit.span.start = start;
1477
0
                return Ok(Primitive::Literal(lit));
1478
            }
1479
            'p' | 'P' => {
1480
0
                let mut cls = self.parse_unicode_class()?;
1481
0
                cls.span.start = start;
1482
0
                return Ok(Primitive::Unicode(cls));
1483
            }
1484
            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1485
0
                let mut cls = self.parse_perl_class();
1486
0
                cls.span.start = start;
1487
0
                return Ok(Primitive::Perl(cls));
1488
            }
1489
0
            _ => {}
1490
0
        }
1491
0
1492
0
        // Handle all of the one letter sequences inline.
1493
0
        self.bump();
1494
0
        let span = Span::new(start, self.pos());
1495
0
        if is_meta_character(c) {
1496
0
            return Ok(Primitive::Literal(ast::Literal {
1497
0
                span: span,
1498
0
                kind: ast::LiteralKind::Punctuation,
1499
0
                c: c,
1500
0
            }));
1501
0
        }
1502
0
        let special = |kind, c| {
1503
0
            Ok(Primitive::Literal(ast::Literal {
1504
0
                span: span,
1505
0
                kind: ast::LiteralKind::Special(kind),
1506
0
                c: c,
1507
0
            }))
1508
0
        };
1509
0
        match c {
1510
0
            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1511
0
            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1512
0
            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1513
0
            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1514
0
            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1515
0
            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1516
0
            ' ' if self.ignore_whitespace() => {
1517
0
                special(ast::SpecialLiteralKind::Space, ' ')
1518
            }
1519
0
            'A' => Ok(Primitive::Assertion(ast::Assertion {
1520
0
                span: span,
1521
0
                kind: ast::AssertionKind::StartText,
1522
0
            })),
1523
0
            'z' => Ok(Primitive::Assertion(ast::Assertion {
1524
0
                span: span,
1525
0
                kind: ast::AssertionKind::EndText,
1526
0
            })),
1527
0
            'b' => Ok(Primitive::Assertion(ast::Assertion {
1528
0
                span: span,
1529
0
                kind: ast::AssertionKind::WordBoundary,
1530
0
            })),
1531
0
            'B' => Ok(Primitive::Assertion(ast::Assertion {
1532
0
                span: span,
1533
0
                kind: ast::AssertionKind::NotWordBoundary,
1534
0
            })),
1535
0
            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1536
        }
1537
0
    }
1538
1539
    /// Parse an octal representation of a Unicode codepoint up to 3 digits
1540
    /// long. This expects the parser to be positioned at the first octal
1541
    /// digit and advances the parser to the first character immediately
1542
    /// following the octal number. This also assumes that parsing octal
1543
    /// escapes is enabled.
1544
    ///
1545
    /// Assuming the preconditions are met, this routine can never fail.
1546
    #[inline(never)]
1547
0
    fn parse_octal(&self) -> ast::Literal {
1548
0
        use std::char;
1549
0
        use std::u32;
1550
0
1551
0
        assert!(self.parser().octal);
1552
0
        assert!('0' <= self.char() && self.char() <= '7');
1553
0
        let start = self.pos();
1554
        // Parse up to two more digits.
1555
0
        while self.bump()
1556
0
            && '0' <= self.char()
1557
0
            && self.char() <= '7'
1558
0
            && self.pos().offset - start.offset <= 2
1559
0
        {}
1560
0
        let end = self.pos();
1561
0
        let octal = &self.pattern()[start.offset..end.offset];
1562
0
        // Parsing the octal should never fail since the above guarantees a
1563
0
        // valid number.
1564
0
        let codepoint =
1565
0
            u32::from_str_radix(octal, 8).expect("valid octal number");
1566
0
        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1567
0
        // invalid Unicode scalar values.
1568
0
        let c = char::from_u32(codepoint).expect("Unicode scalar value");
1569
0
        ast::Literal {
1570
0
            span: Span::new(start, end),
1571
0
            kind: ast::LiteralKind::Octal,
1572
0
            c: c,
1573
0
        }
1574
0
    }
1575
1576
    /// Parse a hex representation of a Unicode codepoint. This handles both
1577
    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1578
    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1579
    /// the first character immediately following the hexadecimal literal.
1580
    #[inline(never)]
1581
0
    fn parse_hex(&self) -> Result<ast::Literal> {
1582
0
        assert!(
1583
0
            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1584
        );
1585
1586
0
        let hex_kind = match self.char() {
1587
0
            'x' => ast::HexLiteralKind::X,
1588
0
            'u' => ast::HexLiteralKind::UnicodeShort,
1589
0
            _ => ast::HexLiteralKind::UnicodeLong,
1590
        };
1591
0
        if !self.bump_and_bump_space() {
1592
0
            return Err(
1593
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1594
0
            );
1595
0
        }
1596
0
        if self.char() == '{' {
1597
0
            self.parse_hex_brace(hex_kind)
1598
        } else {
1599
0
            self.parse_hex_digits(hex_kind)
1600
        }
1601
0
    }
1602
1603
    /// Parse an N-digit hex representation of a Unicode codepoint. This
1604
    /// expects the parser to be positioned at the first digit and will advance
1605
    /// the parser to the first character immediately following the escape
1606
    /// sequence.
1607
    ///
1608
    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1609
    /// or 8 (for `\UNNNNNNNN`).
1610
    #[inline(never)]
1611
0
    fn parse_hex_digits(
1612
0
        &self,
1613
0
        kind: ast::HexLiteralKind,
1614
0
    ) -> Result<ast::Literal> {
1615
0
        use std::char;
1616
0
        use std::u32;
1617
0
1618
0
        let mut scratch = self.parser().scratch.borrow_mut();
1619
0
        scratch.clear();
1620
0
1621
0
        let start = self.pos();
1622
0
        for i in 0..kind.digits() {
1623
0
            if i > 0 && !self.bump_and_bump_space() {
1624
0
                return Err(self
1625
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1626
0
            }
1627
0
            if !is_hex(self.char()) {
1628
0
                return Err(self.error(
1629
0
                    self.span_char(),
1630
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1631
0
                ));
1632
0
            }
1633
0
            scratch.push(self.char());
1634
        }
1635
        // The final bump just moves the parser past the literal, which may
1636
        // be EOF.
1637
0
        self.bump_and_bump_space();
1638
0
        let end = self.pos();
1639
0
        let hex = scratch.as_str();
1640
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1641
0
            None => Err(self.error(
1642
0
                Span::new(start, end),
1643
0
                ast::ErrorKind::EscapeHexInvalid,
1644
0
            )),
1645
0
            Some(c) => Ok(ast::Literal {
1646
0
                span: Span::new(start, end),
1647
0
                kind: ast::LiteralKind::HexFixed(kind),
1648
0
                c: c,
1649
0
            }),
1650
        }
1651
0
    }
1652
1653
    /// Parse a hex representation of any Unicode scalar value. This expects
1654
    /// the parser to be positioned at the opening brace `{` and will advance
1655
    /// the parser to the first character following the closing brace `}`.
1656
    #[inline(never)]
1657
0
    fn parse_hex_brace(
1658
0
        &self,
1659
0
        kind: ast::HexLiteralKind,
1660
0
    ) -> Result<ast::Literal> {
1661
0
        use std::char;
1662
0
        use std::u32;
1663
0
1664
0
        let mut scratch = self.parser().scratch.borrow_mut();
1665
0
        scratch.clear();
1666
0
1667
0
        let brace_pos = self.pos();
1668
0
        let start = self.span_char().end;
1669
0
        while self.bump_and_bump_space() && self.char() != '}' {
1670
0
            if !is_hex(self.char()) {
1671
0
                return Err(self.error(
1672
0
                    self.span_char(),
1673
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1674
0
                ));
1675
0
            }
1676
0
            scratch.push(self.char());
1677
        }
1678
0
        if self.is_eof() {
1679
0
            return Err(self.error(
1680
0
                Span::new(brace_pos, self.pos()),
1681
0
                ast::ErrorKind::EscapeUnexpectedEof,
1682
0
            ));
1683
0
        }
1684
0
        let end = self.pos();
1685
0
        let hex = scratch.as_str();
1686
0
        assert_eq!(self.char(), '}');
1687
0
        self.bump_and_bump_space();
1688
0
1689
0
        if hex.is_empty() {
1690
0
            return Err(self.error(
1691
0
                Span::new(brace_pos, self.pos()),
1692
0
                ast::ErrorKind::EscapeHexEmpty,
1693
0
            ));
1694
0
        }
1695
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1696
0
            None => Err(self.error(
1697
0
                Span::new(start, end),
1698
0
                ast::ErrorKind::EscapeHexInvalid,
1699
0
            )),
1700
0
            Some(c) => Ok(ast::Literal {
1701
0
                span: Span::new(start, self.pos()),
1702
0
                kind: ast::LiteralKind::HexBrace(kind),
1703
0
                c: c,
1704
0
            }),
1705
        }
1706
0
    }
1707
1708
    /// Parse a decimal number into a u32 while trimming leading and trailing
1709
    /// whitespace.
1710
    ///
1711
    /// This expects the parser to be positioned at the first position where
1712
    /// a decimal digit could occur. This will advance the parser to the byte
1713
    /// immediately following the last contiguous decimal digit.
1714
    ///
1715
    /// If no decimal digit could be found or if there was a problem parsing
1716
    /// the complete set of digits into a u32, then an error is returned.
1717
0
    fn parse_decimal(&self) -> Result<u32> {
1718
0
        let mut scratch = self.parser().scratch.borrow_mut();
1719
0
        scratch.clear();
1720
1721
0
        while !self.is_eof() && self.char().is_whitespace() {
1722
0
            self.bump();
1723
0
        }
1724
0
        let start = self.pos();
1725
0
        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1726
0
            scratch.push(self.char());
1727
0
            self.bump_and_bump_space();
1728
0
        }
1729
0
        let span = Span::new(start, self.pos());
1730
0
        while !self.is_eof() && self.char().is_whitespace() {
1731
0
            self.bump_and_bump_space();
1732
0
        }
1733
0
        let digits = scratch.as_str();
1734
0
        if digits.is_empty() {
1735
0
            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1736
0
        }
1737
0
        match u32::from_str_radix(digits, 10).ok() {
1738
0
            Some(n) => Ok(n),
1739
0
            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1740
        }
1741
0
    }
1742
1743
    /// Parse a standard character class consisting primarily of characters or
1744
    /// character ranges, but can also contain nested character classes of
1745
    /// any type (sans `.`).
1746
    ///
1747
    /// This assumes the parser is positioned at the opening `[`. If parsing
1748
    /// is successful, then the parser is advanced to the position immediately
1749
    /// following the closing `]`.
1750
    #[inline(never)]
1751
68
    fn parse_set_class(&self) -> Result<ast::Class> {
1752
68
        assert_eq!(self.char(), '[');
1753
1754
68
        let mut union =
1755
68
            ast::ClassSetUnion { span: self.span(), items: vec![] };
1756
212
        loop {
1757
212
            self.bump_space();
1758
212
            if self.is_eof() {
1759
0
                return Err(self.unclosed_class_error());
1760
212
            }
1761
212
            match self.char() {
1762
                '[' => {
1763
                    // If we've already parsed the opening bracket, then
1764
                    // attempt to treat this as the beginning of an ASCII
1765
                    // class. If ASCII class parsing fails, then the parser
1766
                    // backs up to `[`.
1767
128
                    if !self.parser().stack_class.borrow().is_empty() {
1768
60
                        if let Some(cls) = self.maybe_parse_ascii_class() {
1769
60
                            union.push(ast::ClassSetItem::Ascii(cls));
1770
60
                            continue;
1771
0
                        }
1772
68
                    }
1773
68
                    union = self.push_class_open(union)?;
1774
                }
1775
68
                ']' => match self.pop_class(union)? {
1776
0
                    Either::Left(nested_union) => {
1777
0
                        union = nested_union;
1778
0
                    }
1779
68
                    Either::Right(class) => return Ok(class),
1780
                },
1781
0
                '&' if self.peek() == Some('&') => {
1782
0
                    assert!(self.bump_if("&&"));
1783
0
                    union = self.push_class_op(
1784
0
                        ast::ClassSetBinaryOpKind::Intersection,
1785
0
                        union,
1786
0
                    );
1787
                }
1788
0
                '-' if self.peek() == Some('-') => {
1789
0
                    assert!(self.bump_if("--"));
1790
0
                    union = self.push_class_op(
1791
0
                        ast::ClassSetBinaryOpKind::Difference,
1792
0
                        union,
1793
0
                    );
1794
                }
1795
0
                '~' if self.peek() == Some('~') => {
1796
0
                    assert!(self.bump_if("~~"));
1797
0
                    union = self.push_class_op(
1798
0
                        ast::ClassSetBinaryOpKind::SymmetricDifference,
1799
0
                        union,
1800
0
                    );
1801
                }
1802
                _ => {
1803
16
                    union.push(self.parse_set_class_range()?);
1804
                }
1805
            }
1806
        }
1807
68
    }
1808
1809
    /// Parse a single primitive item in a character class set. The item to
1810
    /// be parsed can either be one of a simple literal character, a range
1811
    /// between two simple literal characters or a "primitive" character
1812
    /// class like \w or \p{Greek}.
1813
    ///
1814
    /// If an invalid escape is found, or if a character class is found where
1815
    /// a simple literal is expected (e.g., in a range), then an error is
1816
    /// returned.
1817
    #[inline(never)]
1818
16
    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1819
16
        let prim1 = self.parse_set_class_item()?;
1820
16
        self.bump_space();
1821
16
        if self.is_eof() {
1822
0
            return Err(self.unclosed_class_error());
1823
16
        }
1824
16
        // If the next char isn't a `-`, then we don't have a range.
1825
16
        // There are two exceptions. If the char after a `-` is a `]`, then
1826
16
        // `-` is interpreted as a literal `-`. Alternatively, if the char
1827
16
        // after a `-` is a `-`, then `--` corresponds to a "difference"
1828
16
        // operation.
1829
16
        if self.char() != '-'
1830
4
            || self.peek_space() == Some(']')
1831
4
            || self.peek_space() == Some('-')
1832
        {
1833
12
            return prim1.into_class_set_item(self);
1834
4
        }
1835
4
        // OK, now we're parsing a range, so bump past the `-` and parse the
1836
4
        // second half of the range.
1837
4
        if !self.bump_and_bump_space() {
1838
0
            return Err(self.unclosed_class_error());
1839
4
        }
1840
4
        let prim2 = self.parse_set_class_item()?;
1841
4
        let range = ast::ClassSetRange {
1842
4
            span: Span::new(prim1.span().start, prim2.span().end),
1843
4
            start: prim1.into_class_literal(self)?,
1844
4
            end: prim2.into_class_literal(self)?,
1845
        };
1846
4
        if !range.is_valid() {
1847
0
            return Err(
1848
0
                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1849
0
            );
1850
4
        }
1851
4
        Ok(ast::ClassSetItem::Range(range))
1852
16
    }
1853
1854
    /// Parse a single item in a character class as a primitive, where the
1855
    /// primitive either consists of a verbatim literal or a single escape
1856
    /// sequence.
1857
    ///
1858
    /// This assumes the parser is positioned at the beginning of a primitive,
1859
    /// and advances the parser to the first position after the primitive if
1860
    /// successful.
1861
    ///
1862
    /// Note that it is the caller's responsibility to report an error if an
1863
    /// illegal primitive was parsed.
1864
    #[inline(never)]
1865
20
    fn parse_set_class_item(&self) -> Result<Primitive> {
1866
20
        if self.char() == '\\' {
1867
0
            self.parse_escape()
1868
        } else {
1869
20
            let x = Primitive::Literal(ast::Literal {
1870
20
                span: self.span_char(),
1871
20
                kind: ast::LiteralKind::Verbatim,
1872
20
                c: self.char(),
1873
20
            });
1874
20
            self.bump();
1875
20
            Ok(x)
1876
        }
1877
20
    }
1878
1879
    /// Parses the opening of a character class set. This includes the opening
1880
    /// bracket along with `^` if present to indicate negation. This also
1881
    /// starts parsing the opening set of unioned items if applicable, since
1882
    /// there are special rules applied to certain characters in the opening
1883
    /// of a character class. For example, `[^]]` is the class of all
1884
    /// characters not equal to `]`. (`]` would need to be escaped in any other
1885
    /// position.) Similarly for `-`.
1886
    ///
1887
    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1888
    /// empty union. This empty union should be replaced with the actual item
1889
    /// when it is popped from the parser's stack.
1890
    ///
1891
    /// This assumes the parser is positioned at the opening `[` and advances
1892
    /// the parser to the first non-special byte of the character class.
1893
    ///
1894
    /// An error is returned if EOF is found.
1895
    #[inline(never)]
1896
68
    fn parse_set_class_open(
1897
68
        &self,
1898
68
    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1899
68
        assert_eq!(self.char(), '[');
1900
68
        let start = self.pos();
1901
68
        if !self.bump_and_bump_space() {
1902
0
            return Err(self.error(
1903
0
                Span::new(start, self.pos()),
1904
0
                ast::ErrorKind::ClassUnclosed,
1905
0
            ));
1906
68
        }
1907
1908
68
        let negated = if self.char() != '^' {
1909
68
            false
1910
        } else {
1911
0
            if !self.bump_and_bump_space() {
1912
0
                return Err(self.error(
1913
0
                    Span::new(start, self.pos()),
1914
0
                    ast::ErrorKind::ClassUnclosed,
1915
0
                ));
1916
0
            }
1917
0
            true
1918
        };
1919
        // Accept any number of `-` as literal `-`.
1920
68
        let mut union =
1921
68
            ast::ClassSetUnion { span: self.span(), items: vec![] };
1922
68
        while self.char() == '-' {
1923
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
1924
0
                span: self.span_char(),
1925
0
                kind: ast::LiteralKind::Verbatim,
1926
0
                c: '-',
1927
0
            }));
1928
0
            if !self.bump_and_bump_space() {
1929
0
                return Err(self.error(
1930
0
                    Span::new(start, self.pos()),
1931
0
                    ast::ErrorKind::ClassUnclosed,
1932
0
                ));
1933
0
            }
1934
        }
1935
        // If `]` is the *first* char in a set, then interpret it as a literal
1936
        // `]`. That is, an empty class is impossible to write.
1937
68
        if union.items.is_empty() && self.char() == ']' {
1938
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
1939
0
                span: self.span_char(),
1940
0
                kind: ast::LiteralKind::Verbatim,
1941
0
                c: ']',
1942
0
            }));
1943
0
            if !self.bump_and_bump_space() {
1944
0
                return Err(self.error(
1945
0
                    Span::new(start, self.pos()),
1946
0
                    ast::ErrorKind::ClassUnclosed,
1947
0
                ));
1948
0
            }
1949
68
        }
1950
68
        let set = ast::ClassBracketed {
1951
68
            span: Span::new(start, self.pos()),
1952
68
            negated: negated,
1953
68
            kind: ast::ClassSet::union(ast::ClassSetUnion {
1954
68
                span: Span::new(union.span.start, union.span.start),
1955
68
                items: vec![],
1956
68
            }),
1957
68
        };
1958
68
        Ok((set, union))
1959
68
    }
1960
1961
    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1962
    ///
1963
    /// This assumes the parser is positioned at the opening `[`.
1964
    ///
1965
    /// If no valid ASCII character class could be found, then this does not
1966
    /// advance the parser and `None` is returned. Otherwise, the parser is
1967
    /// advanced to the first byte following the closing `]` and the
1968
    /// corresponding ASCII class is returned.
1969
    #[inline(never)]
1970
60
    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1971
60
        // ASCII character classes are interesting from a parsing perspective
1972
60
        // because parsing cannot fail with any interesting error. For example,
1973
60
        // in order to use an ASCII character class, it must be enclosed in
1974
60
        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1975
60
        // of it as "ASCII character characters have the syntax `[:NAME:]`
1976
60
        // which can only appear within character brackets." This means that
1977
60
        // things like `[[:lower:]A]` are legal constructs.
1978
60
        //
1979
60
        // However, if one types an incorrect ASCII character class, e.g.,
1980
60
        // `[[:loower:]]`, then we treat that as a normal nested character
1981
60
        // class containing the characters `:elorw`. One might argue that we
1982
60
        // should return an error instead since the repeated colons give away
1983
60
        // the intent to write an ASCII class. But what if the user typed
1984
60
        // `[[:lower]]` instead? How can we tell that was intended to be an
1985
60
        // ASCII class and not just a normal nested class?
1986
60
        //
1987
60
        // Reasonable people can probably disagree over this, but for better
1988
60
        // or worse, we implement semantics that never fails at the expense
1989
60
        // of better failure modes.
1990
60
        assert_eq!(self.char(), '[');
1991
        // If parsing fails, then we back up the parser to this starting point.
1992
60
        let start = self.pos();
1993
60
        let mut negated = false;
1994
60
        if !self.bump() || self.char() != ':' {
1995
0
            self.parser().pos.set(start);
1996
0
            return None;
1997
60
        }
1998
60
        if !self.bump() {
1999
0
            self.parser().pos.set(start);
2000
0
            return None;
2001
60
        }
2002
60
        if self.char() == '^' {
2003
0
            negated = true;
2004
0
            if !self.bump() {
2005
0
                self.parser().pos.set(start);
2006
0
                return None;
2007
0
            }
2008
60
        }
2009
60
        let name_start = self.offset();
2010
360
        while self.char() != ':' && self.bump() {}
2011
60
        if self.is_eof() {
2012
0
            self.parser().pos.set(start);
2013
0
            return None;
2014
60
        }
2015
60
        let name = &self.pattern()[name_start..self.offset()];
2016
60
        if !self.bump_if(":]") {
2017
0
            self.parser().pos.set(start);
2018
0
            return None;
2019
60
        }
2020
60
        let kind = match ast::ClassAsciiKind::from_name(name) {
2021
60
            Some(kind) => kind,
2022
            None => {
2023
0
                self.parser().pos.set(start);
2024
0
                return None;
2025
            }
2026
        };
2027
60
        Some(ast::ClassAscii {
2028
60
            span: Span::new(start, self.pos()),
2029
60
            kind: kind,
2030
60
            negated: negated,
2031
60
        })
2032
60
    }
2033
2034
    /// Parse a Unicode class in either the single character notation, `\pN`
2035
    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2036
    /// the parser is positioned at the `p` (or `P` for negation) and will
2037
    /// advance the parser to the character immediately following the class.
2038
    ///
2039
    /// Note that this does not check whether the class name is valid or not.
2040
    #[inline(never)]
2041
0
    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2042
0
        assert!(self.char() == 'p' || self.char() == 'P');
2043
2044
0
        let mut scratch = self.parser().scratch.borrow_mut();
2045
0
        scratch.clear();
2046
0
2047
0
        let negated = self.char() == 'P';
2048
0
        if !self.bump_and_bump_space() {
2049
0
            return Err(
2050
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2051
0
            );
2052
0
        }
2053
0
        let (start, kind) = if self.char() == '{' {
2054
0
            let start = self.span_char().end;
2055
0
            while self.bump_and_bump_space() && self.char() != '}' {
2056
0
                scratch.push(self.char());
2057
0
            }
2058
0
            if self.is_eof() {
2059
0
                return Err(self
2060
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2061
0
            }
2062
0
            assert_eq!(self.char(), '}');
2063
0
            self.bump();
2064
0
2065
0
            let name = scratch.as_str();
2066
0
            if let Some(i) = name.find("!=") {
2067
0
                (
2068
0
                    start,
2069
0
                    ast::ClassUnicodeKind::NamedValue {
2070
0
                        op: ast::ClassUnicodeOpKind::NotEqual,
2071
0
                        name: name[..i].to_string(),
2072
0
                        value: name[i + 2..].to_string(),
2073
0
                    },
2074
0
                )
2075
0
            } else if let Some(i) = name.find(':') {
2076
0
                (
2077
0
                    start,
2078
0
                    ast::ClassUnicodeKind::NamedValue {
2079
0
                        op: ast::ClassUnicodeOpKind::Colon,
2080
0
                        name: name[..i].to_string(),
2081
0
                        value: name[i + 1..].to_string(),
2082
0
                    },
2083
0
                )
2084
0
            } else if let Some(i) = name.find('=') {
2085
0
                (
2086
0
                    start,
2087
0
                    ast::ClassUnicodeKind::NamedValue {
2088
0
                        op: ast::ClassUnicodeOpKind::Equal,
2089
0
                        name: name[..i].to_string(),
2090
0
                        value: name[i + 1..].to_string(),
2091
0
                    },
2092
0
                )
2093
            } else {
2094
0
                (start, ast::ClassUnicodeKind::Named(name.to_string()))
2095
            }
2096
        } else {
2097
0
            let start = self.pos();
2098
0
            let c = self.char();
2099
0
            if c == '\\' {
2100
0
                return Err(self.error(
2101
0
                    self.span_char(),
2102
0
                    ast::ErrorKind::UnicodeClassInvalid,
2103
0
                ));
2104
0
            }
2105
0
            self.bump_and_bump_space();
2106
0
            let kind = ast::ClassUnicodeKind::OneLetter(c);
2107
0
            (start, kind)
2108
        };
2109
0
        Ok(ast::ClassUnicode {
2110
0
            span: Span::new(start, self.pos()),
2111
0
            negated: negated,
2112
0
            kind: kind,
2113
0
        })
2114
0
    }
2115
2116
    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2117
    /// parser is currently at a valid character class name and will be
2118
    /// advanced to the character immediately following the class.
2119
    #[inline(never)]
2120
0
    fn parse_perl_class(&self) -> ast::ClassPerl {
2121
0
        let c = self.char();
2122
0
        let span = self.span_char();
2123
0
        self.bump();
2124
0
        let (negated, kind) = match c {
2125
0
            'd' => (false, ast::ClassPerlKind::Digit),
2126
0
            'D' => (true, ast::ClassPerlKind::Digit),
2127
0
            's' => (false, ast::ClassPerlKind::Space),
2128
0
            'S' => (true, ast::ClassPerlKind::Space),
2129
0
            'w' => (false, ast::ClassPerlKind::Word),
2130
0
            'W' => (true, ast::ClassPerlKind::Word),
2131
0
            c => panic!("expected valid Perl class but got '{}'", c),
2132
        };
2133
0
        ast::ClassPerl { span: span, kind: kind, negated: negated }
2134
0
    }
2135
}
2136
2137
/// A type that traverses a fully parsed Ast and checks whether its depth
2138
/// exceeds the specified nesting limit. If it does, then an error is returned.
2139
0
#[derive(Debug)]
2140
struct NestLimiter<'p, 's, P> {
2141
    /// The parser that is checking the nest limit.
2142
    p: &'p ParserI<'s, P>,
2143
    /// The current depth while walking an Ast.
2144
    depth: u32,
2145
}
2146
2147
impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2148
6
    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2149
6
        NestLimiter { p: p, depth: 0 }
2150
6
    }
2151
2152
    #[inline(never)]
2153
6
    fn check(self, ast: &Ast) -> Result<()> {
2154
6
        ast::visit(ast, self)
2155
6
    }
2156
2157
256
    fn increment_depth(&mut self, span: &Span) -> Result<()> {
2158
256
        let new = self.depth.checked_add(1).ok_or_else(|| {
2159
0
            self.p.error(
2160
0
                span.clone(),
2161
0
                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2162
0
            )
2163
256
        })?;
2164
256
        let limit = self.p.parser().nest_limit;
2165
256
        if new > limit {
2166
0
            return Err(self.p.error(
2167
0
                span.clone(),
2168
0
                ast::ErrorKind::NestLimitExceeded(limit),
2169
0
            ));
2170
256
        }
2171
256
        self.depth = new;
2172
256
        Ok(())
2173
256
    }
2174
2175
256
    fn decrement_depth(&mut self) {
2176
256
        // Assuming the correctness of the visitor, this should never drop
2177
256
        // below 0.
2178
256
        self.depth = self.depth.checked_sub(1).unwrap();
2179
256
    }
2180
}
2181
2182
impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2183
    type Output = ();
2184
    type Err = ast::Error;
2185
2186
6
    fn finish(self) -> Result<()> {
2187
6
        Ok(())
2188
6
    }
2189
2190
448
    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2191
248
        let span = match *ast {
2192
            Ast::Empty(_)
2193
            | Ast::Flags(_)
2194
            | Ast::Literal(_)
2195
            | Ast::Dot(_)
2196
            | Ast::Assertion(_)
2197
            | Ast::Class(ast::Class::Unicode(_))
2198
            | Ast::Class(ast::Class::Perl(_)) => {
2199
                // These are all base cases, so we don't increment depth.
2200
200
                return Ok(());
2201
            }
2202
68
            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2203
100
            Ast::Repetition(ref x) => &x.span,
2204
36
            Ast::Group(ref x) => &x.span,
2205
4
            Ast::Alternation(ref x) => &x.span,
2206
40
            Ast::Concat(ref x) => &x.span,
2207
        };
2208
248
        self.increment_depth(span)
2209
448
    }
2210
2211
    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2212
68
        match *ast {
2213
            Ast::Empty(_)
2214
            | Ast::Flags(_)
2215
            | Ast::Literal(_)
2216
            | Ast::Dot(_)
2217
            | Ast::Assertion(_)
2218
            | Ast::Class(ast::Class::Unicode(_))
2219
            | Ast::Class(ast::Class::Perl(_)) => {
2220
                // These are all base cases, so we don't decrement depth.
2221
200
                Ok(())
2222
            }
2223
            Ast::Class(ast::Class::Bracketed(_))
2224
            | Ast::Repetition(_)
2225
            | Ast::Group(_)
2226
            | Ast::Alternation(_)
2227
            | Ast::Concat(_) => {
2228
248
                self.decrement_depth();
2229
248
                Ok(())
2230
            }
2231
        }
2232
448
    }
2233
2234
84
    fn visit_class_set_item_pre(
2235
84
        &mut self,
2236
84
        ast: &ast::ClassSetItem,
2237
84
    ) -> Result<()> {
2238
84
        let span = match *ast {
2239
            ast::ClassSetItem::Empty(_)
2240
            | ast::ClassSetItem::Literal(_)
2241
            | ast::ClassSetItem::Range(_)
2242
            | ast::ClassSetItem::Ascii(_)
2243
            | ast::ClassSetItem::Unicode(_)
2244
            | ast::ClassSetItem::Perl(_) => {
2245
                // These are all base cases, so we don't increment depth.
2246
76
                return Ok(());
2247
            }
2248
0
            ast::ClassSetItem::Bracketed(ref x) => &x.span,
2249
8
            ast::ClassSetItem::Union(ref x) => &x.span,
2250
        };
2251
8
        self.increment_depth(span)
2252
84
    }
2253
2254
84
    fn visit_class_set_item_post(
2255
84
        &mut self,
2256
84
        ast: &ast::ClassSetItem,
2257
84
    ) -> Result<()> {
2258
84
        match *ast {
2259
            ast::ClassSetItem::Empty(_)
2260
            | ast::ClassSetItem::Literal(_)
2261
            | ast::ClassSetItem::Range(_)
2262
            | ast::ClassSetItem::Ascii(_)
2263
            | ast::ClassSetItem::Unicode(_)
2264
            | ast::ClassSetItem::Perl(_) => {
2265
                // These are all base cases, so we don't decrement depth.
2266
76
                Ok(())
2267
            }
2268
            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2269
8
                self.decrement_depth();
2270
8
                Ok(())
2271
            }
2272
        }
2273
84
    }
2274
2275
0
    fn visit_class_set_binary_op_pre(
2276
0
        &mut self,
2277
0
        ast: &ast::ClassSetBinaryOp,
2278
0
    ) -> Result<()> {
2279
0
        self.increment_depth(&ast.span)
2280
0
    }
2281
2282
0
    fn visit_class_set_binary_op_post(
2283
0
        &mut self,
2284
0
        _ast: &ast::ClassSetBinaryOp,
2285
0
    ) -> Result<()> {
2286
0
        self.decrement_depth();
2287
0
        Ok(())
2288
0
    }
2289
}
2290
2291
/// When the result is an error, transforms the ast::ErrorKind from the source
2292
/// Result into another one. This function is used to return clearer error
2293
/// messages when possible.
2294
fn specialize_err<T>(
2295
    result: Result<T>,
2296
    from: ast::ErrorKind,
2297
    to: ast::ErrorKind,
2298
) -> Result<T> {
2299
0
    if let Err(e) = result {
2300
0
        if e.kind == from {
2301
0
            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2302
        } else {
2303
0
            Err(e)
2304
        }
2305
    } else {
2306
0
        result
2307
    }
2308
0
}
2309
2310
#[cfg(test)]
2311
mod tests {
2312
    use std::ops::Range;
2313
2314
    use super::{Parser, ParserBuilder, ParserI, Primitive};
2315
    use crate::ast::{self, Ast, Position, Span};
2316
2317
    // Our own assert_eq, which has slightly better formatting (but honestly
2318
    // still kind of crappy).
2319
    macro_rules! assert_eq {
2320
        ($left:expr, $right:expr) => {{
2321
            match (&$left, &$right) {
2322
                (left_val, right_val) => {
2323
                    if !(*left_val == *right_val) {
2324
                        panic!(
2325
                            "assertion failed: `(left == right)`\n\n\
2326
                             left:  `{:?}`\nright: `{:?}`\n\n",
2327
                            left_val, right_val
2328
                        )
2329
                    }
2330
                }
2331
            }
2332
        }};
2333
    }
2334
2335
    // We create these errors to compare with real ast::Errors in the tests.
2336
    // We define equality between TestError and ast::Error to disregard the
2337
    // pattern string in ast::Error, which is annoying to provide in tests.
2338
    #[derive(Clone, Debug)]
2339
    struct TestError {
2340
        span: Span,
2341
        kind: ast::ErrorKind,
2342
    }
2343
2344
    impl PartialEq<ast::Error> for TestError {
2345
        fn eq(&self, other: &ast::Error) -> bool {
2346
            self.span == other.span && self.kind == other.kind
2347
        }
2348
    }
2349
2350
    impl PartialEq<TestError> for ast::Error {
2351
        fn eq(&self, other: &TestError) -> bool {
2352
            self.span == other.span && self.kind == other.kind
2353
        }
2354
    }
2355
2356
    fn s(str: &str) -> String {
2357
        str.to_string()
2358
    }
2359
2360
    fn parser(pattern: &str) -> ParserI<'_, Parser> {
2361
        ParserI::new(Parser::new(), pattern)
2362
    }
2363
2364
    fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2365
        let parser = ParserBuilder::new().octal(true).build();
2366
        ParserI::new(parser, pattern)
2367
    }
2368
2369
    fn parser_nest_limit(
2370
        pattern: &str,
2371
        nest_limit: u32,
2372
    ) -> ParserI<'_, Parser> {
2373
        let p = ParserBuilder::new().nest_limit(nest_limit).build();
2374
        ParserI::new(p, pattern)
2375
    }
2376
2377
    fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2378
        let p = ParserBuilder::new().ignore_whitespace(true).build();
2379
        ParserI::new(p, pattern)
2380
    }
2381
2382
    /// Short alias for creating a new span.
2383
    fn nspan(start: Position, end: Position) -> Span {
2384
        Span::new(start, end)
2385
    }
2386
2387
    /// Short alias for creating a new position.
2388
    fn npos(offset: usize, line: usize, column: usize) -> Position {
2389
        Position::new(offset, line, column)
2390
    }
2391
2392
    /// Create a new span from the given offset range. This assumes a single
2393
    /// line and sets the columns based on the offsets. i.e., This only works
2394
    /// out of the box for ASCII, which is fine for most tests.
2395
    fn span(range: Range<usize>) -> Span {
2396
        let start = Position::new(range.start, 1, range.start + 1);
2397
        let end = Position::new(range.end, 1, range.end + 1);
2398
        Span::new(start, end)
2399
    }
2400
2401
    /// Create a new span for the corresponding byte range in the given string.
2402
    fn span_range(subject: &str, range: Range<usize>) -> Span {
2403
        let start = Position {
2404
            offset: range.start,
2405
            line: 1 + subject[..range.start].matches('\n').count(),
2406
            column: 1 + subject[..range.start]
2407
                .chars()
2408
                .rev()
2409
                .position(|c| c == '\n')
2410
                .unwrap_or(subject[..range.start].chars().count()),
2411
        };
2412
        let end = Position {
2413
            offset: range.end,
2414
            line: 1 + subject[..range.end].matches('\n').count(),
2415
            column: 1 + subject[..range.end]
2416
                .chars()
2417
                .rev()
2418
                .position(|c| c == '\n')
2419
                .unwrap_or(subject[..range.end].chars().count()),
2420
        };
2421
        Span::new(start, end)
2422
    }
2423
2424
    /// Create a verbatim literal starting at the given position.
2425
    fn lit(c: char, start: usize) -> Ast {
2426
        lit_with(c, span(start..start + c.len_utf8()))
2427
    }
2428
2429
    /// Create a punctuation literal starting at the given position.
2430
    fn punct_lit(c: char, span: Span) -> Ast {
2431
        Ast::Literal(ast::Literal {
2432
            span: span,
2433
            kind: ast::LiteralKind::Punctuation,
2434
            c: c,
2435
        })
2436
    }
2437
2438
    /// Create a verbatim literal with the given span.
2439
    fn lit_with(c: char, span: Span) -> Ast {
2440
        Ast::Literal(ast::Literal {
2441
            span: span,
2442
            kind: ast::LiteralKind::Verbatim,
2443
            c: c,
2444
        })
2445
    }
2446
2447
    /// Create a concatenation with the given range.
2448
    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2449
        concat_with(span(range), asts)
2450
    }
2451
2452
    /// Create a concatenation with the given span.
2453
    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2454
        Ast::Concat(ast::Concat { span: span, asts: asts })
2455
    }
2456
2457
    /// Create an alternation with the given span.
2458
    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2459
        Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
2460
    }
2461
2462
    /// Create a capturing group with the given span.
2463
    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2464
        Ast::Group(ast::Group {
2465
            span: span(range),
2466
            kind: ast::GroupKind::CaptureIndex(index),
2467
            ast: Box::new(ast),
2468
        })
2469
    }
2470
2471
    /// Create an ast::SetFlags.
2472
    ///
2473
    /// The given pattern should be the full pattern string. The range given
2474
    /// should correspond to the byte offsets where the flag set occurs.
2475
    ///
2476
    /// If negated is true, then the set is interpreted as beginning with a
2477
    /// negation.
2478
    fn flag_set(
2479
        pat: &str,
2480
        range: Range<usize>,
2481
        flag: ast::Flag,
2482
        negated: bool,
2483
    ) -> Ast {
2484
        let mut items = vec![ast::FlagsItem {
2485
            span: span_range(pat, (range.end - 2)..(range.end - 1)),
2486
            kind: ast::FlagsItemKind::Flag(flag),
2487
        }];
2488
        if negated {
2489
            items.insert(
2490
                0,
2491
                ast::FlagsItem {
2492
                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
2493
                    kind: ast::FlagsItemKind::Negation,
2494
                },
2495
            );
2496
        }
2497
        Ast::Flags(ast::SetFlags {
2498
            span: span_range(pat, range.clone()),
2499
            flags: ast::Flags {
2500
                span: span_range(pat, (range.start + 2)..(range.end - 1)),
2501
                items: items,
2502
            },
2503
        })
2504
    }
2505
2506
    #[test]
2507
    fn parse_nest_limit() {
2508
        // A nest limit of 0 still allows some types of regexes.
2509
        assert_eq!(
2510
            parser_nest_limit("", 0).parse(),
2511
            Ok(Ast::Empty(span(0..0)))
2512
        );
2513
        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2514
2515
        // Test repetition operations, which require one level of nesting.
2516
        assert_eq!(
2517
            parser_nest_limit("a+", 0).parse().unwrap_err(),
2518
            TestError {
2519
                span: span(0..2),
2520
                kind: ast::ErrorKind::NestLimitExceeded(0),
2521
            }
2522
        );
2523
        assert_eq!(
2524
            parser_nest_limit("a+", 1).parse(),
2525
            Ok(Ast::Repetition(ast::Repetition {
2526
                span: span(0..2),
2527
                op: ast::RepetitionOp {
2528
                    span: span(1..2),
2529
                    kind: ast::RepetitionKind::OneOrMore,
2530
                },
2531
                greedy: true,
2532
                ast: Box::new(lit('a', 0)),
2533
            }))
2534
        );
2535
        assert_eq!(
2536
            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2537
            TestError {
2538
                span: span(0..3),
2539
                kind: ast::ErrorKind::NestLimitExceeded(1),
2540
            }
2541
        );
2542
        assert_eq!(
2543
            parser_nest_limit("a+*", 1).parse().unwrap_err(),
2544
            TestError {
2545
                span: span(0..2),
2546
                kind: ast::ErrorKind::NestLimitExceeded(1),
2547
            }
2548
        );
2549
        assert_eq!(
2550
            parser_nest_limit("a+*", 2).parse(),
2551
            Ok(Ast::Repetition(ast::Repetition {
2552
                span: span(0..3),
2553
                op: ast::RepetitionOp {
2554
                    span: span(2..3),
2555
                    kind: ast::RepetitionKind::ZeroOrMore,
2556
                },
2557
                greedy: true,
2558
                ast: Box::new(Ast::Repetition(ast::Repetition {
2559
                    span: span(0..2),
2560
                    op: ast::RepetitionOp {
2561
                        span: span(1..2),
2562
                        kind: ast::RepetitionKind::OneOrMore,
2563
                    },
2564
                    greedy: true,
2565
                    ast: Box::new(lit('a', 0)),
2566
                })),
2567
            }))
2568
        );
2569
2570
        // Test concatenations. A concatenation requires one level of nesting.
2571
        assert_eq!(
2572
            parser_nest_limit("ab", 0).parse().unwrap_err(),
2573
            TestError {
2574
                span: span(0..2),
2575
                kind: ast::ErrorKind::NestLimitExceeded(0),
2576
            }
2577
        );
2578
        assert_eq!(
2579
            parser_nest_limit("ab", 1).parse(),
2580
            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2581
        );
2582
        assert_eq!(
2583
            parser_nest_limit("abc", 1).parse(),
2584
            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2585
        );
2586
2587
        // Test alternations. An alternation requires one level of nesting.
2588
        assert_eq!(
2589
            parser_nest_limit("a|b", 0).parse().unwrap_err(),
2590
            TestError {
2591
                span: span(0..3),
2592
                kind: ast::ErrorKind::NestLimitExceeded(0),
2593
            }
2594
        );
2595
        assert_eq!(
2596
            parser_nest_limit("a|b", 1).parse(),
2597
            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2598
        );
2599
        assert_eq!(
2600
            parser_nest_limit("a|b|c", 1).parse(),
2601
            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2602
        );
2603
2604
        // Test character classes. Classes form their own mini-recursive
2605
        // syntax!
2606
        assert_eq!(
2607
            parser_nest_limit("[a]", 0).parse().unwrap_err(),
2608
            TestError {
2609
                span: span(0..3),
2610
                kind: ast::ErrorKind::NestLimitExceeded(0),
2611
            }
2612
        );
2613
        assert_eq!(
2614
            parser_nest_limit("[a]", 1).parse(),
2615
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2616
                span: span(0..3),
2617
                negated: false,
2618
                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2619
                    ast::Literal {
2620
                        span: span(1..2),
2621
                        kind: ast::LiteralKind::Verbatim,
2622
                        c: 'a',
2623
                    }
2624
                )),
2625
            })))
2626
        );
2627
        assert_eq!(
2628
            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2629
            TestError {
2630
                span: span(1..3),
2631
                kind: ast::ErrorKind::NestLimitExceeded(1),
2632
            }
2633
        );
2634
        assert_eq!(
2635
            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2636
            TestError {
2637
                span: span(3..7),
2638
                kind: ast::ErrorKind::NestLimitExceeded(2),
2639
            }
2640
        );
2641
        assert_eq!(
2642
            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2643
            TestError {
2644
                span: span(4..6),
2645
                kind: ast::ErrorKind::NestLimitExceeded(3),
2646
            }
2647
        );
2648
        assert_eq!(
2649
            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2650
            TestError {
2651
                span: span(1..5),
2652
                kind: ast::ErrorKind::NestLimitExceeded(1),
2653
            }
2654
        );
2655
        assert_eq!(
2656
            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2657
            TestError {
2658
                span: span(4..6),
2659
                kind: ast::ErrorKind::NestLimitExceeded(2),
2660
            }
2661
        );
2662
    }
2663
2664
    #[test]
2665
    fn parse_comments() {
2666
        let pat = "(?x)
2667
# This is comment 1.
2668
foo # This is comment 2.
2669
  # This is comment 3.
2670
bar
2671
# This is comment 4.";
2672
        let astc = parser(pat).parse_with_comments().unwrap();
2673
        assert_eq!(
2674
            astc.ast,
2675
            concat_with(
2676
                span_range(pat, 0..pat.len()),
2677
                vec![
2678
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2679
                    lit_with('f', span_range(pat, 26..27)),
2680
                    lit_with('o', span_range(pat, 27..28)),
2681
                    lit_with('o', span_range(pat, 28..29)),
2682
                    lit_with('b', span_range(pat, 74..75)),
2683
                    lit_with('a', span_range(pat, 75..76)),
2684
                    lit_with('r', span_range(pat, 76..77)),
2685
                ]
2686
            )
2687
        );
2688
        assert_eq!(
2689
            astc.comments,
2690
            vec![
2691
                ast::Comment {
2692
                    span: span_range(pat, 5..26),
2693
                    comment: s(" This is comment 1."),
2694
                },
2695
                ast::Comment {
2696
                    span: span_range(pat, 30..51),
2697
                    comment: s(" This is comment 2."),
2698
                },
2699
                ast::Comment {
2700
                    span: span_range(pat, 53..74),
2701
                    comment: s(" This is comment 3."),
2702
                },
2703
                ast::Comment {
2704
                    span: span_range(pat, 78..98),
2705
                    comment: s(" This is comment 4."),
2706
                },
2707
            ]
2708
        );
2709
    }
2710
2711
    #[test]
2712
    fn parse_holistic() {
2713
        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2714
        assert_eq!(
2715
            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2716
            Ok(concat(
2717
                0..36,
2718
                vec![
2719
                    punct_lit('\\', span(0..2)),
2720
                    punct_lit('.', span(2..4)),
2721
                    punct_lit('+', span(4..6)),
2722
                    punct_lit('*', span(6..8)),
2723
                    punct_lit('?', span(8..10)),
2724
                    punct_lit('(', span(10..12)),
2725
                    punct_lit(')', span(12..14)),
2726
                    punct_lit('|', span(14..16)),
2727
                    punct_lit('[', span(16..18)),
2728
                    punct_lit(']', span(18..20)),
2729
                    punct_lit('{', span(20..22)),
2730
                    punct_lit('}', span(22..24)),
2731
                    punct_lit('^', span(24..26)),
2732
                    punct_lit('$', span(26..28)),
2733
                    punct_lit('#', span(28..30)),
2734
                    punct_lit('&', span(30..32)),
2735
                    punct_lit('-', span(32..34)),
2736
                    punct_lit('~', span(34..36)),
2737
                ]
2738
            ))
2739
        );
2740
    }
2741
2742
    #[test]
2743
    fn parse_ignore_whitespace() {
2744
        // Test that basic whitespace insensitivity works.
2745
        let pat = "(?x)a b";
2746
        assert_eq!(
2747
            parser(pat).parse(),
2748
            Ok(concat_with(
2749
                nspan(npos(0, 1, 1), npos(7, 1, 8)),
2750
                vec![
2751
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2752
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2753
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2754
                ]
2755
            ))
2756
        );
2757
2758
        // Test that we can toggle whitespace insensitivity.
2759
        let pat = "(?x)a b(?-x)a b";
2760
        assert_eq!(
2761
            parser(pat).parse(),
2762
            Ok(concat_with(
2763
                nspan(npos(0, 1, 1), npos(15, 1, 16)),
2764
                vec![
2765
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2766
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2767
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2768
                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2769
                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2770
                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2771
                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2772
                ]
2773
            ))
2774
        );
2775
2776
        // Test that nesting whitespace insensitive flags works.
2777
        let pat = "a (?x:a )a ";
2778
        assert_eq!(
2779
            parser(pat).parse(),
2780
            Ok(concat_with(
2781
                span_range(pat, 0..11),
2782
                vec![
2783
                    lit_with('a', span_range(pat, 0..1)),
2784
                    lit_with(' ', span_range(pat, 1..2)),
2785
                    Ast::Group(ast::Group {
2786
                        span: span_range(pat, 2..9),
2787
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2788
                            span: span_range(pat, 4..5),
2789
                            items: vec![ast::FlagsItem {
2790
                                span: span_range(pat, 4..5),
2791
                                kind: ast::FlagsItemKind::Flag(
2792
                                    ast::Flag::IgnoreWhitespace
2793
                                ),
2794
                            },],
2795
                        }),
2796
                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2797
                    }),
2798
                    lit_with('a', span_range(pat, 9..10)),
2799
                    lit_with(' ', span_range(pat, 10..11)),
2800
                ]
2801
            ))
2802
        );
2803
2804
        // Test that whitespace after an opening paren is insignificant.
2805
        let pat = "(?x)( ?P<foo> a )";
2806
        assert_eq!(
2807
            parser(pat).parse(),
2808
            Ok(concat_with(
2809
                span_range(pat, 0..pat.len()),
2810
                vec![
2811
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2812
                    Ast::Group(ast::Group {
2813
                        span: span_range(pat, 4..pat.len()),
2814
                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
2815
                            span: span_range(pat, 9..12),
2816
                            name: s("foo"),
2817
                            index: 1,
2818
                        }),
2819
                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2820
                    }),
2821
                ]
2822
            ))
2823
        );
2824
        let pat = "(?x)(  a )";
2825
        assert_eq!(
2826
            parser(pat).parse(),
2827
            Ok(concat_with(
2828
                span_range(pat, 0..pat.len()),
2829
                vec![
2830
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2831
                    Ast::Group(ast::Group {
2832
                        span: span_range(pat, 4..pat.len()),
2833
                        kind: ast::GroupKind::CaptureIndex(1),
2834
                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2835
                    }),
2836
                ]
2837
            ))
2838
        );
2839
        let pat = "(?x)(  ?:  a )";
2840
        assert_eq!(
2841
            parser(pat).parse(),
2842
            Ok(concat_with(
2843
                span_range(pat, 0..pat.len()),
2844
                vec![
2845
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2846
                    Ast::Group(ast::Group {
2847
                        span: span_range(pat, 4..pat.len()),
2848
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2849
                            span: span_range(pat, 8..8),
2850
                            items: vec![],
2851
                        }),
2852
                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2853
                    }),
2854
                ]
2855
            ))
2856
        );
2857
        let pat = r"(?x)\x { 53 }";
2858
        assert_eq!(
2859
            parser(pat).parse(),
2860
            Ok(concat_with(
2861
                span_range(pat, 0..pat.len()),
2862
                vec![
2863
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2864
                    Ast::Literal(ast::Literal {
2865
                        span: span(4..13),
2866
                        kind: ast::LiteralKind::HexBrace(
2867
                            ast::HexLiteralKind::X
2868
                        ),
2869
                        c: 'S',
2870
                    }),
2871
                ]
2872
            ))
2873
        );
2874
2875
        // Test that whitespace after an escape is OK.
2876
        let pat = r"(?x)\ ";
2877
        assert_eq!(
2878
            parser(pat).parse(),
2879
            Ok(concat_with(
2880
                span_range(pat, 0..pat.len()),
2881
                vec![
2882
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2883
                    Ast::Literal(ast::Literal {
2884
                        span: span_range(pat, 4..6),
2885
                        kind: ast::LiteralKind::Special(
2886
                            ast::SpecialLiteralKind::Space
2887
                        ),
2888
                        c: ' ',
2889
                    }),
2890
                ]
2891
            ))
2892
        );
2893
        // ... but only when `x` mode is enabled.
2894
        let pat = r"\ ";
2895
        assert_eq!(
2896
            parser(pat).parse().unwrap_err(),
2897
            TestError {
2898
                span: span_range(pat, 0..2),
2899
                kind: ast::ErrorKind::EscapeUnrecognized,
2900
            }
2901
        );
2902
    }
2903
2904
    #[test]
2905
    fn parse_newlines() {
2906
        let pat = ".\n.";
2907
        assert_eq!(
2908
            parser(pat).parse(),
2909
            Ok(concat_with(
2910
                span_range(pat, 0..3),
2911
                vec![
2912
                    Ast::Dot(span_range(pat, 0..1)),
2913
                    lit_with('\n', span_range(pat, 1..2)),
2914
                    Ast::Dot(span_range(pat, 2..3)),
2915
                ]
2916
            ))
2917
        );
2918
2919
        let pat = "foobar\nbaz\nquux\n";
2920
        assert_eq!(
2921
            parser(pat).parse(),
2922
            Ok(concat_with(
2923
                span_range(pat, 0..pat.len()),
2924
                vec![
2925
                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2926
                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2927
                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2928
                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2929
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2930
                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2931
                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2932
                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2933
                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2934
                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2935
                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2936
                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2937
                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2938
                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2939
                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2940
                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2941
                ]
2942
            ))
2943
        );
2944
    }
2945
2946
    #[test]
2947
    fn parse_uncounted_repetition() {
2948
        assert_eq!(
2949
            parser(r"a*").parse(),
2950
            Ok(Ast::Repetition(ast::Repetition {
2951
                span: span(0..2),
2952
                op: ast::RepetitionOp {
2953
                    span: span(1..2),
2954
                    kind: ast::RepetitionKind::ZeroOrMore,
2955
                },
2956
                greedy: true,
2957
                ast: Box::new(lit('a', 0)),
2958
            }))
2959
        );
2960
        assert_eq!(
2961
            parser(r"a+").parse(),
2962
            Ok(Ast::Repetition(ast::Repetition {
2963
                span: span(0..2),
2964
                op: ast::RepetitionOp {
2965
                    span: span(1..2),
2966
                    kind: ast::RepetitionKind::OneOrMore,
2967
                },
2968
                greedy: true,
2969
                ast: Box::new(lit('a', 0)),
2970
            }))
2971
        );
2972
2973
        assert_eq!(
2974
            parser(r"a?").parse(),
2975
            Ok(Ast::Repetition(ast::Repetition {
2976
                span: span(0..2),
2977
                op: ast::RepetitionOp {
2978
                    span: span(1..2),
2979
                    kind: ast::RepetitionKind::ZeroOrOne,
2980
                },
2981
                greedy: true,
2982
                ast: Box::new(lit('a', 0)),
2983
            }))
2984
        );
2985
        assert_eq!(
2986
            parser(r"a??").parse(),
2987
            Ok(Ast::Repetition(ast::Repetition {
2988
                span: span(0..3),
2989
                op: ast::RepetitionOp {
2990
                    span: span(1..3),
2991
                    kind: ast::RepetitionKind::ZeroOrOne,
2992
                },
2993
                greedy: false,
2994
                ast: Box::new(lit('a', 0)),
2995
            }))
2996
        );
2997
        assert_eq!(
2998
            parser(r"a?").parse(),
2999
            Ok(Ast::Repetition(ast::Repetition {
3000
                span: span(0..2),
3001
                op: ast::RepetitionOp {
3002
                    span: span(1..2),
3003
                    kind: ast::RepetitionKind::ZeroOrOne,
3004
                },
3005
                greedy: true,
3006
                ast: Box::new(lit('a', 0)),
3007
            }))
3008
        );
3009
        assert_eq!(
3010
            parser(r"a?b").parse(),
3011
            Ok(concat(
3012
                0..3,
3013
                vec![
3014
                    Ast::Repetition(ast::Repetition {
3015
                        span: span(0..2),
3016
                        op: ast::RepetitionOp {
3017
                            span: span(1..2),
3018
                            kind: ast::RepetitionKind::ZeroOrOne,
3019
                        },
3020
                        greedy: true,
3021
                        ast: Box::new(lit('a', 0)),
3022
                    }),
3023
                    lit('b', 2),
3024
                ]
3025
            ))
3026
        );
3027
        assert_eq!(
3028
            parser(r"a??b").parse(),
3029
            Ok(concat(
3030
                0..4,
3031
                vec![
3032
                    Ast::Repetition(ast::Repetition {
3033
                        span: span(0..3),
3034
                        op: ast::RepetitionOp {
3035
                            span: span(1..3),
3036
                            kind: ast::RepetitionKind::ZeroOrOne,
3037
                        },
3038
                        greedy: false,
3039
                        ast: Box::new(lit('a', 0)),
3040
                    }),
3041
                    lit('b', 3),
3042
                ]
3043
            ))
3044
        );
3045
        assert_eq!(
3046
            parser(r"ab?").parse(),
3047
            Ok(concat(
3048
                0..3,
3049
                vec![
3050
                    lit('a', 0),
3051
                    Ast::Repetition(ast::Repetition {
3052
                        span: span(1..3),
3053
                        op: ast::RepetitionOp {
3054
                            span: span(2..3),
3055
                            kind: ast::RepetitionKind::ZeroOrOne,
3056
                        },
3057
                        greedy: true,
3058
                        ast: Box::new(lit('b', 1)),
3059
                    }),
3060
                ]
3061
            ))
3062
        );
3063
        assert_eq!(
3064
            parser(r"(ab)?").parse(),
3065
            Ok(Ast::Repetition(ast::Repetition {
3066
                span: span(0..5),
3067
                op: ast::RepetitionOp {
3068
                    span: span(4..5),
3069
                    kind: ast::RepetitionKind::ZeroOrOne,
3070
                },
3071
                greedy: true,
3072
                ast: Box::new(group(
3073
                    0..4,
3074
                    1,
3075
                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
3076
                )),
3077
            }))
3078
        );
3079
        assert_eq!(
3080
            parser(r"|a?").parse(),
3081
            Ok(alt(
3082
                0..3,
3083
                vec![
3084
                    Ast::Empty(span(0..0)),
3085
                    Ast::Repetition(ast::Repetition {
3086
                        span: span(1..3),
3087
                        op: ast::RepetitionOp {
3088
                            span: span(2..3),
3089
                            kind: ast::RepetitionKind::ZeroOrOne,
3090
                        },
3091
                        greedy: true,
3092
                        ast: Box::new(lit('a', 1)),
3093
                    }),
3094
                ]
3095
            ))
3096
        );
3097
3098
        assert_eq!(
3099
            parser(r"*").parse().unwrap_err(),
3100
            TestError {
3101
                span: span(0..0),
3102
                kind: ast::ErrorKind::RepetitionMissing,
3103
            }
3104
        );
3105
        assert_eq!(
3106
            parser(r"(?i)*").parse().unwrap_err(),
3107
            TestError {
3108
                span: span(4..4),
3109
                kind: ast::ErrorKind::RepetitionMissing,
3110
            }
3111
        );
3112
        assert_eq!(
3113
            parser(r"(*)").parse().unwrap_err(),
3114
            TestError {
3115
                span: span(1..1),
3116
                kind: ast::ErrorKind::RepetitionMissing,
3117
            }
3118
        );
3119
        assert_eq!(
3120
            parser(r"(?:?)").parse().unwrap_err(),
3121
            TestError {
3122
                span: span(3..3),
3123
                kind: ast::ErrorKind::RepetitionMissing,
3124
            }
3125
        );
3126
        assert_eq!(
3127
            parser(r"+").parse().unwrap_err(),
3128
            TestError {
3129
                span: span(0..0),
3130
                kind: ast::ErrorKind::RepetitionMissing,
3131
            }
3132
        );
3133
        assert_eq!(
3134
            parser(r"?").parse().unwrap_err(),
3135
            TestError {
3136
                span: span(0..0),
3137
                kind: ast::ErrorKind::RepetitionMissing,
3138
            }
3139
        );
3140
        assert_eq!(
3141
            parser(r"(?)").parse().unwrap_err(),
3142
            TestError {
3143
                span: span(1..1),
3144
                kind: ast::ErrorKind::RepetitionMissing,
3145
            }
3146
        );
3147
        assert_eq!(
3148
            parser(r"|*").parse().unwrap_err(),
3149
            TestError {
3150
                span: span(1..1),
3151
                kind: ast::ErrorKind::RepetitionMissing,
3152
            }
3153
        );
3154
        assert_eq!(
3155
            parser(r"|+").parse().unwrap_err(),
3156
            TestError {
3157
                span: span(1..1),
3158
                kind: ast::ErrorKind::RepetitionMissing,
3159
            }
3160
        );
3161
        assert_eq!(
3162
            parser(r"|?").parse().unwrap_err(),
3163
            TestError {
3164
                span: span(1..1),
3165
                kind: ast::ErrorKind::RepetitionMissing,
3166
            }
3167
        );
3168
    }
3169
3170
    #[test]
3171
    fn parse_counted_repetition() {
3172
        assert_eq!(
3173
            parser(r"a{5}").parse(),
3174
            Ok(Ast::Repetition(ast::Repetition {
3175
                span: span(0..4),
3176
                op: ast::RepetitionOp {
3177
                    span: span(1..4),
3178
                    kind: ast::RepetitionKind::Range(
3179
                        ast::RepetitionRange::Exactly(5)
3180
                    ),
3181
                },
3182
                greedy: true,
3183
                ast: Box::new(lit('a', 0)),
3184
            }))
3185
        );
3186
        assert_eq!(
3187
            parser(r"a{5,}").parse(),
3188
            Ok(Ast::Repetition(ast::Repetition {
3189
                span: span(0..5),
3190
                op: ast::RepetitionOp {
3191
                    span: span(1..5),
3192
                    kind: ast::RepetitionKind::Range(
3193
                        ast::RepetitionRange::AtLeast(5)
3194
                    ),
3195
                },
3196
                greedy: true,
3197
                ast: Box::new(lit('a', 0)),
3198
            }))
3199
        );
3200
        assert_eq!(
3201
            parser(r"a{5,9}").parse(),
3202
            Ok(Ast::Repetition(ast::Repetition {
3203
                span: span(0..6),
3204
                op: ast::RepetitionOp {
3205
                    span: span(1..6),
3206
                    kind: ast::RepetitionKind::Range(
3207
                        ast::RepetitionRange::Bounded(5, 9)
3208
                    ),
3209
                },
3210
                greedy: true,
3211
                ast: Box::new(lit('a', 0)),
3212
            }))
3213
        );
3214
        assert_eq!(
3215
            parser(r"a{5}?").parse(),
3216
            Ok(Ast::Repetition(ast::Repetition {
3217
                span: span(0..5),
3218
                op: ast::RepetitionOp {
3219
                    span: span(1..5),
3220
                    kind: ast::RepetitionKind::Range(
3221
                        ast::RepetitionRange::Exactly(5)
3222
                    ),
3223
                },
3224
                greedy: false,
3225
                ast: Box::new(lit('a', 0)),
3226
            }))
3227
        );
3228
        assert_eq!(
3229
            parser(r"ab{5}").parse(),
3230
            Ok(concat(
3231
                0..5,
3232
                vec![
3233
                    lit('a', 0),
3234
                    Ast::Repetition(ast::Repetition {
3235
                        span: span(1..5),
3236
                        op: ast::RepetitionOp {
3237
                            span: span(2..5),
3238
                            kind: ast::RepetitionKind::Range(
3239
                                ast::RepetitionRange::Exactly(5)
3240
                            ),
3241
                        },
3242
                        greedy: true,
3243
                        ast: Box::new(lit('b', 1)),
3244
                    }),
3245
                ]
3246
            ))
3247
        );
3248
        assert_eq!(
3249
            parser(r"ab{5}c").parse(),
3250
            Ok(concat(
3251
                0..6,
3252
                vec![
3253
                    lit('a', 0),
3254
                    Ast::Repetition(ast::Repetition {
3255
                        span: span(1..5),
3256
                        op: ast::RepetitionOp {
3257
                            span: span(2..5),
3258
                            kind: ast::RepetitionKind::Range(
3259
                                ast::RepetitionRange::Exactly(5)
3260
                            ),
3261
                        },
3262
                        greedy: true,
3263
                        ast: Box::new(lit('b', 1)),
3264
                    }),
3265
                    lit('c', 5),
3266
                ]
3267
            ))
3268
        );
3269
3270
        assert_eq!(
3271
            parser(r"a{ 5 }").parse(),
3272
            Ok(Ast::Repetition(ast::Repetition {
3273
                span: span(0..6),
3274
                op: ast::RepetitionOp {
3275
                    span: span(1..6),
3276
                    kind: ast::RepetitionKind::Range(
3277
                        ast::RepetitionRange::Exactly(5)
3278
                    ),
3279
                },
3280
                greedy: true,
3281
                ast: Box::new(lit('a', 0)),
3282
            }))
3283
        );
3284
        assert_eq!(
3285
            parser(r"a{ 5 , 9 }").parse(),
3286
            Ok(Ast::Repetition(ast::Repetition {
3287
                span: span(0..10),
3288
                op: ast::RepetitionOp {
3289
                    span: span(1..10),
3290
                    kind: ast::RepetitionKind::Range(
3291
                        ast::RepetitionRange::Bounded(5, 9)
3292
                    ),
3293
                },
3294
                greedy: true,
3295
                ast: Box::new(lit('a', 0)),
3296
            }))
3297
        );
3298
        assert_eq!(
3299
            parser_ignore_whitespace(r"a{5,9} ?").parse(),
3300
            Ok(Ast::Repetition(ast::Repetition {
3301
                span: span(0..8),
3302
                op: ast::RepetitionOp {
3303
                    span: span(1..8),
3304
                    kind: ast::RepetitionKind::Range(
3305
                        ast::RepetitionRange::Bounded(5, 9)
3306
                    ),
3307
                },
3308
                greedy: false,
3309
                ast: Box::new(lit('a', 0)),
3310
            }))
3311
        );
3312
3313
        assert_eq!(
3314
            parser(r"(?i){0}").parse().unwrap_err(),
3315
            TestError {
3316
                span: span(4..4),
3317
                kind: ast::ErrorKind::RepetitionMissing,
3318
            }
3319
        );
3320
        assert_eq!(
3321
            parser(r"(?m){1,1}").parse().unwrap_err(),
3322
            TestError {
3323
                span: span(4..4),
3324
                kind: ast::ErrorKind::RepetitionMissing,
3325
            }
3326
        );
3327
        assert_eq!(
3328
            parser(r"a{]}").parse().unwrap_err(),
3329
            TestError {
3330
                span: span(2..2),
3331
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3332
            }
3333
        );
3334
        assert_eq!(
3335
            parser(r"a{1,]}").parse().unwrap_err(),
3336
            TestError {
3337
                span: span(4..4),
3338
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3339
            }
3340
        );
3341
        assert_eq!(
3342
            parser(r"a{").parse().unwrap_err(),
3343
            TestError {
3344
                span: span(1..2),
3345
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3346
            }
3347
        );
3348
        assert_eq!(
3349
            parser(r"a{}").parse().unwrap_err(),
3350
            TestError {
3351
                span: span(2..2),
3352
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3353
            }
3354
        );
3355
        assert_eq!(
3356
            parser(r"a{a").parse().unwrap_err(),
3357
            TestError {
3358
                span: span(2..2),
3359
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3360
            }
3361
        );
3362
        assert_eq!(
3363
            parser(r"a{9999999999}").parse().unwrap_err(),
3364
            TestError {
3365
                span: span(2..12),
3366
                kind: ast::ErrorKind::DecimalInvalid,
3367
            }
3368
        );
3369
        assert_eq!(
3370
            parser(r"a{9").parse().unwrap_err(),
3371
            TestError {
3372
                span: span(1..3),
3373
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3374
            }
3375
        );
3376
        assert_eq!(
3377
            parser(r"a{9,a").parse().unwrap_err(),
3378
            TestError {
3379
                span: span(4..4),
3380
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3381
            }
3382
        );
3383
        assert_eq!(
3384
            parser(r"a{9,9999999999}").parse().unwrap_err(),
3385
            TestError {
3386
                span: span(4..14),
3387
                kind: ast::ErrorKind::DecimalInvalid,
3388
            }
3389
        );
3390
        assert_eq!(
3391
            parser(r"a{9,").parse().unwrap_err(),
3392
            TestError {
3393
                span: span(1..4),
3394
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3395
            }
3396
        );
3397
        assert_eq!(
3398
            parser(r"a{9,11").parse().unwrap_err(),
3399
            TestError {
3400
                span: span(1..6),
3401
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3402
            }
3403
        );
3404
        assert_eq!(
3405
            parser(r"a{2,1}").parse().unwrap_err(),
3406
            TestError {
3407
                span: span(1..6),
3408
                kind: ast::ErrorKind::RepetitionCountInvalid,
3409
            }
3410
        );
3411
        assert_eq!(
3412
            parser(r"{5}").parse().unwrap_err(),
3413
            TestError {
3414
                span: span(0..0),
3415
                kind: ast::ErrorKind::RepetitionMissing,
3416
            }
3417
        );
3418
        assert_eq!(
3419
            parser(r"|{5}").parse().unwrap_err(),
3420
            TestError {
3421
                span: span(1..1),
3422
                kind: ast::ErrorKind::RepetitionMissing,
3423
            }
3424
        );
3425
    }
3426
3427
    #[test]
3428
    fn parse_alternate() {
3429
        assert_eq!(
3430
            parser(r"a|b").parse(),
3431
            Ok(Ast::Alternation(ast::Alternation {
3432
                span: span(0..3),
3433
                asts: vec![lit('a', 0), lit('b', 2)],
3434
            }))
3435
        );
3436
        assert_eq!(
3437
            parser(r"(a|b)").parse(),
3438
            Ok(group(
3439
                0..5,
3440
                1,
3441
                Ast::Alternation(ast::Alternation {
3442
                    span: span(1..4),
3443
                    asts: vec![lit('a', 1), lit('b', 3)],
3444
                })
3445
            ))
3446
        );
3447
3448
        assert_eq!(
3449
            parser(r"a|b|c").parse(),
3450
            Ok(Ast::Alternation(ast::Alternation {
3451
                span: span(0..5),
3452
                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3453
            }))
3454
        );
3455
        assert_eq!(
3456
            parser(r"ax|by|cz").parse(),
3457
            Ok(Ast::Alternation(ast::Alternation {
3458
                span: span(0..8),
3459
                asts: vec![
3460
                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3461
                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3462
                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3463
                ],
3464
            }))
3465
        );
3466
        assert_eq!(
3467
            parser(r"(ax|by|cz)").parse(),
3468
            Ok(group(
3469
                0..10,
3470
                1,
3471
                Ast::Alternation(ast::Alternation {
3472
                    span: span(1..9),
3473
                    asts: vec![
3474
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3475
                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3476
                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3477
                    ],
3478
                })
3479
            ))
3480
        );
3481
        assert_eq!(
3482
            parser(r"(ax|(by|(cz)))").parse(),
3483
            Ok(group(
3484
                0..14,
3485
                1,
3486
                alt(
3487
                    1..13,
3488
                    vec![
3489
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3490
                        group(
3491
                            4..13,
3492
                            2,
3493
                            alt(
3494
                                5..12,
3495
                                vec![
3496
                                    concat(
3497
                                        5..7,
3498
                                        vec![lit('b', 5), lit('y', 6)]
3499
                                    ),
3500
                                    group(
3501
                                        8..12,
3502
                                        3,
3503
                                        concat(
3504
                                            9..11,
3505
                                            vec![lit('c', 9), lit('z', 10),]
3506
                                        )
3507
                                    ),
3508
                                ]
3509
                            )
3510
                        ),
3511
                    ]
3512
                )
3513
            ))
3514
        );
3515
3516
        assert_eq!(
3517
            parser(r"|").parse(),
3518
            Ok(alt(
3519
                0..1,
3520
                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3521
            ))
3522
        );
3523
        assert_eq!(
3524
            parser(r"||").parse(),
3525
            Ok(alt(
3526
                0..2,
3527
                vec![
3528
                    Ast::Empty(span(0..0)),
3529
                    Ast::Empty(span(1..1)),
3530
                    Ast::Empty(span(2..2)),
3531
                ]
3532
            ))
3533
        );
3534
        assert_eq!(
3535
            parser(r"a|").parse(),
3536
            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3537
        );
3538
        assert_eq!(
3539
            parser(r"|a").parse(),
3540
            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3541
        );
3542
3543
        assert_eq!(
3544
            parser(r"(|)").parse(),
3545
            Ok(group(
3546
                0..3,
3547
                1,
3548
                alt(
3549
                    1..2,
3550
                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3551
                )
3552
            ))
3553
        );
3554
        assert_eq!(
3555
            parser(r"(a|)").parse(),
3556
            Ok(group(
3557
                0..4,
3558
                1,
3559
                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3560
            ))
3561
        );
3562
        assert_eq!(
3563
            parser(r"(|a)").parse(),
3564
            Ok(group(
3565
                0..4,
3566
                1,
3567
                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3568
            ))
3569
        );
3570
3571
        assert_eq!(
3572
            parser(r"a|b)").parse().unwrap_err(),
3573
            TestError {
3574
                span: span(3..4),
3575
                kind: ast::ErrorKind::GroupUnopened,
3576
            }
3577
        );
3578
        assert_eq!(
3579
            parser(r"(a|b").parse().unwrap_err(),
3580
            TestError {
3581
                span: span(0..1),
3582
                kind: ast::ErrorKind::GroupUnclosed,
3583
            }
3584
        );
3585
    }
3586
3587
    #[test]
3588
    fn parse_unsupported_lookaround() {
3589
        assert_eq!(
3590
            parser(r"(?=a)").parse().unwrap_err(),
3591
            TestError {
3592
                span: span(0..3),
3593
                kind: ast::ErrorKind::UnsupportedLookAround,
3594
            }
3595
        );
3596
        assert_eq!(
3597
            parser(r"(?!a)").parse().unwrap_err(),
3598
            TestError {
3599
                span: span(0..3),
3600
                kind: ast::ErrorKind::UnsupportedLookAround,
3601
            }
3602
        );
3603
        assert_eq!(
3604
            parser(r"(?<=a)").parse().unwrap_err(),
3605
            TestError {
3606
                span: span(0..4),
3607
                kind: ast::ErrorKind::UnsupportedLookAround,
3608
            }
3609
        );
3610
        assert_eq!(
3611
            parser(r"(?<!a)").parse().unwrap_err(),
3612
            TestError {
3613
                span: span(0..4),
3614
                kind: ast::ErrorKind::UnsupportedLookAround,
3615
            }
3616
        );
3617
    }
3618
3619
    #[test]
3620
    fn parse_group() {
3621
        assert_eq!(
3622
            parser("(?i)").parse(),
3623
            Ok(Ast::Flags(ast::SetFlags {
3624
                span: span(0..4),
3625
                flags: ast::Flags {
3626
                    span: span(2..3),
3627
                    items: vec![ast::FlagsItem {
3628
                        span: span(2..3),
3629
                        kind: ast::FlagsItemKind::Flag(
3630
                            ast::Flag::CaseInsensitive
3631
                        ),
3632
                    }],
3633
                },
3634
            }))
3635
        );
3636
        assert_eq!(
3637
            parser("(?iU)").parse(),
3638
            Ok(Ast::Flags(ast::SetFlags {
3639
                span: span(0..5),
3640
                flags: ast::Flags {
3641
                    span: span(2..4),
3642
                    items: vec![
3643
                        ast::FlagsItem {
3644
                            span: span(2..3),
3645
                            kind: ast::FlagsItemKind::Flag(
3646
                                ast::Flag::CaseInsensitive
3647
                            ),
3648
                        },
3649
                        ast::FlagsItem {
3650
                            span: span(3..4),
3651
                            kind: ast::FlagsItemKind::Flag(
3652
                                ast::Flag::SwapGreed
3653
                            ),
3654
                        },
3655
                    ],
3656
                },
3657
            }))
3658
        );
3659
        assert_eq!(
3660
            parser("(?i-U)").parse(),
3661
            Ok(Ast::Flags(ast::SetFlags {
3662
                span: span(0..6),
3663
                flags: ast::Flags {
3664
                    span: span(2..5),
3665
                    items: vec![
3666
                        ast::FlagsItem {
3667
                            span: span(2..3),
3668
                            kind: ast::FlagsItemKind::Flag(
3669
                                ast::Flag::CaseInsensitive
3670
                            ),
3671
                        },
3672
                        ast::FlagsItem {
3673
                            span: span(3..4),
3674
                            kind: ast::FlagsItemKind::Negation,
3675
                        },
3676
                        ast::FlagsItem {
3677
                            span: span(4..5),
3678
                            kind: ast::FlagsItemKind::Flag(
3679
                                ast::Flag::SwapGreed
3680
                            ),
3681
                        },
3682
                    ],
3683
                },
3684
            }))
3685
        );
3686
3687
        assert_eq!(
3688
            parser("()").parse(),
3689
            Ok(Ast::Group(ast::Group {
3690
                span: span(0..2),
3691
                kind: ast::GroupKind::CaptureIndex(1),
3692
                ast: Box::new(Ast::Empty(span(1..1))),
3693
            }))
3694
        );
3695
        assert_eq!(
3696
            parser("(a)").parse(),
3697
            Ok(Ast::Group(ast::Group {
3698
                span: span(0..3),
3699
                kind: ast::GroupKind::CaptureIndex(1),
3700
                ast: Box::new(lit('a', 1)),
3701
            }))
3702
        );
3703
        assert_eq!(
3704
            parser("(())").parse(),
3705
            Ok(Ast::Group(ast::Group {
3706
                span: span(0..4),
3707
                kind: ast::GroupKind::CaptureIndex(1),
3708
                ast: Box::new(Ast::Group(ast::Group {
3709
                    span: span(1..3),
3710
                    kind: ast::GroupKind::CaptureIndex(2),
3711
                    ast: Box::new(Ast::Empty(span(2..2))),
3712
                })),
3713
            }))
3714
        );
3715
3716
        assert_eq!(
3717
            parser("(?:a)").parse(),
3718
            Ok(Ast::Group(ast::Group {
3719
                span: span(0..5),
3720
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3721
                    span: span(2..2),
3722
                    items: vec![],
3723
                }),
3724
                ast: Box::new(lit('a', 3)),
3725
            }))
3726
        );
3727
3728
        assert_eq!(
3729
            parser("(?i:a)").parse(),
3730
            Ok(Ast::Group(ast::Group {
3731
                span: span(0..6),
3732
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3733
                    span: span(2..3),
3734
                    items: vec![ast::FlagsItem {
3735
                        span: span(2..3),
3736
                        kind: ast::FlagsItemKind::Flag(
3737
                            ast::Flag::CaseInsensitive
3738
                        ),
3739
                    },],
3740
                }),
3741
                ast: Box::new(lit('a', 4)),
3742
            }))
3743
        );
3744
        assert_eq!(
3745
            parser("(?i-U:a)").parse(),
3746
            Ok(Ast::Group(ast::Group {
3747
                span: span(0..8),
3748
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3749
                    span: span(2..5),
3750
                    items: vec![
3751
                        ast::FlagsItem {
3752
                            span: span(2..3),
3753
                            kind: ast::FlagsItemKind::Flag(
3754
                                ast::Flag::CaseInsensitive
3755
                            ),
3756
                        },
3757
                        ast::FlagsItem {
3758
                            span: span(3..4),
3759
                            kind: ast::FlagsItemKind::Negation,
3760
                        },
3761
                        ast::FlagsItem {
3762
                            span: span(4..5),
3763
                            kind: ast::FlagsItemKind::Flag(
3764
                                ast::Flag::SwapGreed
3765
                            ),
3766
                        },
3767
                    ],
3768
                }),
3769
                ast: Box::new(lit('a', 6)),
3770
            }))
3771
        );
3772
3773
        assert_eq!(
3774
            parser("(").parse().unwrap_err(),
3775
            TestError {
3776
                span: span(0..1),
3777
                kind: ast::ErrorKind::GroupUnclosed,
3778
            }
3779
        );
3780
        assert_eq!(
3781
            parser("(?").parse().unwrap_err(),
3782
            TestError {
3783
                span: span(0..1),
3784
                kind: ast::ErrorKind::GroupUnclosed,
3785
            }
3786
        );
3787
        assert_eq!(
3788
            parser("(?P").parse().unwrap_err(),
3789
            TestError {
3790
                span: span(2..3),
3791
                kind: ast::ErrorKind::FlagUnrecognized,
3792
            }
3793
        );
3794
        assert_eq!(
3795
            parser("(?P<").parse().unwrap_err(),
3796
            TestError {
3797
                span: span(4..4),
3798
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3799
            }
3800
        );
3801
        assert_eq!(
3802
            parser("(a").parse().unwrap_err(),
3803
            TestError {
3804
                span: span(0..1),
3805
                kind: ast::ErrorKind::GroupUnclosed,
3806
            }
3807
        );
3808
        assert_eq!(
3809
            parser("(()").parse().unwrap_err(),
3810
            TestError {
3811
                span: span(0..1),
3812
                kind: ast::ErrorKind::GroupUnclosed,
3813
            }
3814
        );
3815
        assert_eq!(
3816
            parser(")").parse().unwrap_err(),
3817
            TestError {
3818
                span: span(0..1),
3819
                kind: ast::ErrorKind::GroupUnopened,
3820
            }
3821
        );
3822
        assert_eq!(
3823
            parser("a)").parse().unwrap_err(),
3824
            TestError {
3825
                span: span(1..2),
3826
                kind: ast::ErrorKind::GroupUnopened,
3827
            }
3828
        );
3829
    }
3830
3831
    #[test]
3832
    fn parse_capture_name() {
3833
        assert_eq!(
3834
            parser("(?P<a>z)").parse(),
3835
            Ok(Ast::Group(ast::Group {
3836
                span: span(0..8),
3837
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838
                    span: span(4..5),
3839
                    name: s("a"),
3840
                    index: 1,
3841
                }),
3842
                ast: Box::new(lit('z', 6)),
3843
            }))
3844
        );
3845
        assert_eq!(
3846
            parser("(?P<abc>z)").parse(),
3847
            Ok(Ast::Group(ast::Group {
3848
                span: span(0..10),
3849
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3850
                    span: span(4..7),
3851
                    name: s("abc"),
3852
                    index: 1,
3853
                }),
3854
                ast: Box::new(lit('z', 8)),
3855
            }))
3856
        );
3857
3858
        assert_eq!(
3859
            parser("(?P<a_1>z)").parse(),
3860
            Ok(Ast::Group(ast::Group {
3861
                span: span(0..10),
3862
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3863
                    span: span(4..7),
3864
                    name: s("a_1"),
3865
                    index: 1,
3866
                }),
3867
                ast: Box::new(lit('z', 8)),
3868
            }))
3869
        );
3870
3871
        assert_eq!(
3872
            parser("(?P<a.1>z)").parse(),
3873
            Ok(Ast::Group(ast::Group {
3874
                span: span(0..10),
3875
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3876
                    span: span(4..7),
3877
                    name: s("a.1"),
3878
                    index: 1,
3879
                }),
3880
                ast: Box::new(lit('z', 8)),
3881
            }))
3882
        );
3883
3884
        assert_eq!(
3885
            parser("(?P<a[1]>z)").parse(),
3886
            Ok(Ast::Group(ast::Group {
3887
                span: span(0..11),
3888
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3889
                    span: span(4..8),
3890
                    name: s("a[1]"),
3891
                    index: 1,
3892
                }),
3893
                ast: Box::new(lit('z', 9)),
3894
            }))
3895
        );
3896
3897
        assert_eq!(
3898
            parser("(?P<").parse().unwrap_err(),
3899
            TestError {
3900
                span: span(4..4),
3901
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3902
            }
3903
        );
3904
        assert_eq!(
3905
            parser("(?P<>z)").parse().unwrap_err(),
3906
            TestError {
3907
                span: span(4..4),
3908
                kind: ast::ErrorKind::GroupNameEmpty,
3909
            }
3910
        );
3911
        assert_eq!(
3912
            parser("(?P<a").parse().unwrap_err(),
3913
            TestError {
3914
                span: span(5..5),
3915
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3916
            }
3917
        );
3918
        assert_eq!(
3919
            parser("(?P<ab").parse().unwrap_err(),
3920
            TestError {
3921
                span: span(6..6),
3922
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3923
            }
3924
        );
3925
        assert_eq!(
3926
            parser("(?P<0a").parse().unwrap_err(),
3927
            TestError {
3928
                span: span(4..5),
3929
                kind: ast::ErrorKind::GroupNameInvalid,
3930
            }
3931
        );
3932
        assert_eq!(
3933
            parser("(?P<~").parse().unwrap_err(),
3934
            TestError {
3935
                span: span(4..5),
3936
                kind: ast::ErrorKind::GroupNameInvalid,
3937
            }
3938
        );
3939
        assert_eq!(
3940
            parser("(?P<abc~").parse().unwrap_err(),
3941
            TestError {
3942
                span: span(7..8),
3943
                kind: ast::ErrorKind::GroupNameInvalid,
3944
            }
3945
        );
3946
        assert_eq!(
3947
            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3948
            TestError {
3949
                span: span(12..13),
3950
                kind: ast::ErrorKind::GroupNameDuplicate {
3951
                    original: span(4..5),
3952
                },
3953
            }
3954
        );
3955
    }
3956
3957
    #[test]
3958
    fn parse_flags() {
3959
        assert_eq!(
3960
            parser("i:").parse_flags(),
3961
            Ok(ast::Flags {
3962
                span: span(0..1),
3963
                items: vec![ast::FlagsItem {
3964
                    span: span(0..1),
3965
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3966
                }],
3967
            })
3968
        );
3969
        assert_eq!(
3970
            parser("i)").parse_flags(),
3971
            Ok(ast::Flags {
3972
                span: span(0..1),
3973
                items: vec![ast::FlagsItem {
3974
                    span: span(0..1),
3975
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3976
                }],
3977
            })
3978
        );
3979
3980
        assert_eq!(
3981
            parser("isU:").parse_flags(),
3982
            Ok(ast::Flags {
3983
                span: span(0..3),
3984
                items: vec![
3985
                    ast::FlagsItem {
3986
                        span: span(0..1),
3987
                        kind: ast::FlagsItemKind::Flag(
3988
                            ast::Flag::CaseInsensitive
3989
                        ),
3990
                    },
3991
                    ast::FlagsItem {
3992
                        span: span(1..2),
3993
                        kind: ast::FlagsItemKind::Flag(
3994
                            ast::Flag::DotMatchesNewLine
3995
                        ),
3996
                    },
3997
                    ast::FlagsItem {
3998
                        span: span(2..3),
3999
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4000
                    },
4001
                ],
4002
            })
4003
        );
4004
4005
        assert_eq!(
4006
            parser("-isU:").parse_flags(),
4007
            Ok(ast::Flags {
4008
                span: span(0..4),
4009
                items: vec![
4010
                    ast::FlagsItem {
4011
                        span: span(0..1),
4012
                        kind: ast::FlagsItemKind::Negation,
4013
                    },
4014
                    ast::FlagsItem {
4015
                        span: span(1..2),
4016
                        kind: ast::FlagsItemKind::Flag(
4017
                            ast::Flag::CaseInsensitive
4018
                        ),
4019
                    },
4020
                    ast::FlagsItem {
4021
                        span: span(2..3),
4022
                        kind: ast::FlagsItemKind::Flag(
4023
                            ast::Flag::DotMatchesNewLine
4024
                        ),
4025
                    },
4026
                    ast::FlagsItem {
4027
                        span: span(3..4),
4028
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4029
                    },
4030
                ],
4031
            })
4032
        );
4033
        assert_eq!(
4034
            parser("i-sU:").parse_flags(),
4035
            Ok(ast::Flags {
4036
                span: span(0..4),
4037
                items: vec![
4038
                    ast::FlagsItem {
4039
                        span: span(0..1),
4040
                        kind: ast::FlagsItemKind::Flag(
4041
                            ast::Flag::CaseInsensitive
4042
                        ),
4043
                    },
4044
                    ast::FlagsItem {
4045
                        span: span(1..2),
4046
                        kind: ast::FlagsItemKind::Negation,
4047
                    },
4048
                    ast::FlagsItem {
4049
                        span: span(2..3),
4050
                        kind: ast::FlagsItemKind::Flag(
4051
                            ast::Flag::DotMatchesNewLine
4052
                        ),
4053
                    },
4054
                    ast::FlagsItem {
4055
                        span: span(3..4),
4056
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4057
                    },
4058
                ],
4059
            })
4060
        );
4061
4062
        assert_eq!(
4063
            parser("isU").parse_flags().unwrap_err(),
4064
            TestError {
4065
                span: span(3..3),
4066
                kind: ast::ErrorKind::FlagUnexpectedEof,
4067
            }
4068
        );
4069
        assert_eq!(
4070
            parser("isUa:").parse_flags().unwrap_err(),
4071
            TestError {
4072
                span: span(3..4),
4073
                kind: ast::ErrorKind::FlagUnrecognized,
4074
            }
4075
        );
4076
        assert_eq!(
4077
            parser("isUi:").parse_flags().unwrap_err(),
4078
            TestError {
4079
                span: span(3..4),
4080
                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4081
            }
4082
        );
4083
        assert_eq!(
4084
            parser("i-sU-i:").parse_flags().unwrap_err(),
4085
            TestError {
4086
                span: span(4..5),
4087
                kind: ast::ErrorKind::FlagRepeatedNegation {
4088
                    original: span(1..2),
4089
                },
4090
            }
4091
        );
4092
        assert_eq!(
4093
            parser("-)").parse_flags().unwrap_err(),
4094
            TestError {
4095
                span: span(0..1),
4096
                kind: ast::ErrorKind::FlagDanglingNegation,
4097
            }
4098
        );
4099
        assert_eq!(
4100
            parser("i-)").parse_flags().unwrap_err(),
4101
            TestError {
4102
                span: span(1..2),
4103
                kind: ast::ErrorKind::FlagDanglingNegation,
4104
            }
4105
        );
4106
        assert_eq!(
4107
            parser("iU-)").parse_flags().unwrap_err(),
4108
            TestError {
4109
                span: span(2..3),
4110
                kind: ast::ErrorKind::FlagDanglingNegation,
4111
            }
4112
        );
4113
    }
4114
4115
    #[test]
4116
    fn parse_flag() {
4117
        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4118
        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4119
        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4120
        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4121
        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4122
        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4123
4124
        assert_eq!(
4125
            parser("a").parse_flag().unwrap_err(),
4126
            TestError {
4127
                span: span(0..1),
4128
                kind: ast::ErrorKind::FlagUnrecognized,
4129
            }
4130
        );
4131
        assert_eq!(
4132
            parser("☃").parse_flag().unwrap_err(),
4133
            TestError {
4134
                span: span_range("☃", 0..3),
4135
                kind: ast::ErrorKind::FlagUnrecognized,
4136
            }
4137
        );
4138
    }
4139
4140
    #[test]
4141
    fn parse_primitive_non_escape() {
4142
        assert_eq!(
4143
            parser(r".").parse_primitive(),
4144
            Ok(Primitive::Dot(span(0..1)))
4145
        );
4146
        assert_eq!(
4147
            parser(r"^").parse_primitive(),
4148
            Ok(Primitive::Assertion(ast::Assertion {
4149
                span: span(0..1),
4150
                kind: ast::AssertionKind::StartLine,
4151
            }))
4152
        );
4153
        assert_eq!(
4154
            parser(r"$").parse_primitive(),
4155
            Ok(Primitive::Assertion(ast::Assertion {
4156
                span: span(0..1),
4157
                kind: ast::AssertionKind::EndLine,
4158
            }))
4159
        );
4160
4161
        assert_eq!(
4162
            parser(r"a").parse_primitive(),
4163
            Ok(Primitive::Literal(ast::Literal {
4164
                span: span(0..1),
4165
                kind: ast::LiteralKind::Verbatim,
4166
                c: 'a',
4167
            }))
4168
        );
4169
        assert_eq!(
4170
            parser(r"|").parse_primitive(),
4171
            Ok(Primitive::Literal(ast::Literal {
4172
                span: span(0..1),
4173
                kind: ast::LiteralKind::Verbatim,
4174
                c: '|',
4175
            }))
4176
        );
4177
        assert_eq!(
4178
            parser(r"☃").parse_primitive(),
4179
            Ok(Primitive::Literal(ast::Literal {
4180
                span: span_range("☃", 0..3),
4181
                kind: ast::LiteralKind::Verbatim,
4182
                c: '☃',
4183
            }))
4184
        );
4185
    }
4186
4187
    #[test]
4188
    fn parse_escape() {
4189
        assert_eq!(
4190
            parser(r"\|").parse_primitive(),
4191
            Ok(Primitive::Literal(ast::Literal {
4192
                span: span(0..2),
4193
                kind: ast::LiteralKind::Punctuation,
4194
                c: '|',
4195
            }))
4196
        );
4197
        let specials = &[
4198
            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4199
            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4200
            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4201
            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4202
            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4203
            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4204
        ];
4205
        for &(pat, c, ref kind) in specials {
4206
            assert_eq!(
4207
                parser(pat).parse_primitive(),
4208
                Ok(Primitive::Literal(ast::Literal {
4209
                    span: span(0..2),
4210
                    kind: ast::LiteralKind::Special(kind.clone()),
4211
                    c: c,
4212
                }))
4213
            );
4214
        }
4215
        assert_eq!(
4216
            parser(r"\A").parse_primitive(),
4217
            Ok(Primitive::Assertion(ast::Assertion {
4218
                span: span(0..2),
4219
                kind: ast::AssertionKind::StartText,
4220
            }))
4221
        );
4222
        assert_eq!(
4223
            parser(r"\z").parse_primitive(),
4224
            Ok(Primitive::Assertion(ast::Assertion {
4225
                span: span(0..2),
4226
                kind: ast::AssertionKind::EndText,
4227
            }))
4228
        );
4229
        assert_eq!(
4230
            parser(r"\b").parse_primitive(),
4231
            Ok(Primitive::Assertion(ast::Assertion {
4232
                span: span(0..2),
4233
                kind: ast::AssertionKind::WordBoundary,
4234
            }))
4235
        );
4236
        assert_eq!(
4237
            parser(r"\B").parse_primitive(),
4238
            Ok(Primitive::Assertion(ast::Assertion {
4239
                span: span(0..2),
4240
                kind: ast::AssertionKind::NotWordBoundary,
4241
            }))
4242
        );
4243
4244
        assert_eq!(
4245
            parser(r"\").parse_escape().unwrap_err(),
4246
            TestError {
4247
                span: span(0..1),
4248
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4249
            }
4250
        );
4251
        assert_eq!(
4252
            parser(r"\y").parse_escape().unwrap_err(),
4253
            TestError {
4254
                span: span(0..2),
4255
                kind: ast::ErrorKind::EscapeUnrecognized,
4256
            }
4257
        );
4258
    }
4259
4260
    #[test]
4261
    fn parse_unsupported_backreference() {
4262
        assert_eq!(
4263
            parser(r"\0").parse_escape().unwrap_err(),
4264
            TestError {
4265
                span: span(0..2),
4266
                kind: ast::ErrorKind::UnsupportedBackreference,
4267
            }
4268
        );
4269
        assert_eq!(
4270
            parser(r"\9").parse_escape().unwrap_err(),
4271
            TestError {
4272
                span: span(0..2),
4273
                kind: ast::ErrorKind::UnsupportedBackreference,
4274
            }
4275
        );
4276
    }
4277
4278
    #[test]
4279
    fn parse_octal() {
4280
        for i in 0..511 {
4281
            let pat = format!(r"\{:o}", i);
4282
            assert_eq!(
4283
                parser_octal(&pat).parse_escape(),
4284
                Ok(Primitive::Literal(ast::Literal {
4285
                    span: span(0..pat.len()),
4286
                    kind: ast::LiteralKind::Octal,
4287
                    c: ::std::char::from_u32(i).unwrap(),
4288
                }))
4289
            );
4290
        }
4291
        assert_eq!(
4292
            parser_octal(r"\778").parse_escape(),
4293
            Ok(Primitive::Literal(ast::Literal {
4294
                span: span(0..3),
4295
                kind: ast::LiteralKind::Octal,
4296
                c: '?',
4297
            }))
4298
        );
4299
        assert_eq!(
4300
            parser_octal(r"\7777").parse_escape(),
4301
            Ok(Primitive::Literal(ast::Literal {
4302
                span: span(0..4),
4303
                kind: ast::LiteralKind::Octal,
4304
                c: '\u{01FF}',
4305
            }))
4306
        );
4307
        assert_eq!(
4308
            parser_octal(r"\778").parse(),
4309
            Ok(Ast::Concat(ast::Concat {
4310
                span: span(0..4),
4311
                asts: vec![
4312
                    Ast::Literal(ast::Literal {
4313
                        span: span(0..3),
4314
                        kind: ast::LiteralKind::Octal,
4315
                        c: '?',
4316
                    }),
4317
                    Ast::Literal(ast::Literal {
4318
                        span: span(3..4),
4319
                        kind: ast::LiteralKind::Verbatim,
4320
                        c: '8',
4321
                    }),
4322
                ],
4323
            }))
4324
        );
4325
        assert_eq!(
4326
            parser_octal(r"\7777").parse(),
4327
            Ok(Ast::Concat(ast::Concat {
4328
                span: span(0..5),
4329
                asts: vec![
4330
                    Ast::Literal(ast::Literal {
4331
                        span: span(0..4),
4332
                        kind: ast::LiteralKind::Octal,
4333
                        c: '\u{01FF}',
4334
                    }),
4335
                    Ast::Literal(ast::Literal {
4336
                        span: span(4..5),
4337
                        kind: ast::LiteralKind::Verbatim,
4338
                        c: '7',
4339
                    }),
4340
                ],
4341
            }))
4342
        );
4343
4344
        assert_eq!(
4345
            parser_octal(r"\8").parse_escape().unwrap_err(),
4346
            TestError {
4347
                span: span(0..2),
4348
                kind: ast::ErrorKind::EscapeUnrecognized,
4349
            }
4350
        );
4351
    }
4352
4353
    #[test]
4354
    fn parse_hex_two() {
4355
        for i in 0..256 {
4356
            let pat = format!(r"\x{:02x}", i);
4357
            assert_eq!(
4358
                parser(&pat).parse_escape(),
4359
                Ok(Primitive::Literal(ast::Literal {
4360
                    span: span(0..pat.len()),
4361
                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4362
                    c: ::std::char::from_u32(i).unwrap(),
4363
                }))
4364
            );
4365
        }
4366
4367
        assert_eq!(
4368
            parser(r"\xF").parse_escape().unwrap_err(),
4369
            TestError {
4370
                span: span(3..3),
4371
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4372
            }
4373
        );
4374
        assert_eq!(
4375
            parser(r"\xG").parse_escape().unwrap_err(),
4376
            TestError {
4377
                span: span(2..3),
4378
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4379
            }
4380
        );
4381
        assert_eq!(
4382
            parser(r"\xFG").parse_escape().unwrap_err(),
4383
            TestError {
4384
                span: span(3..4),
4385
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4386
            }
4387
        );
4388
    }
4389
4390
    #[test]
4391
    fn parse_hex_four() {
4392
        for i in 0..65536 {
4393
            let c = match ::std::char::from_u32(i) {
4394
                None => continue,
4395
                Some(c) => c,
4396
            };
4397
            let pat = format!(r"\u{:04x}", i);
4398
            assert_eq!(
4399
                parser(&pat).parse_escape(),
4400
                Ok(Primitive::Literal(ast::Literal {
4401
                    span: span(0..pat.len()),
4402
                    kind: ast::LiteralKind::HexFixed(
4403
                        ast::HexLiteralKind::UnicodeShort
4404
                    ),
4405
                    c: c,
4406
                }))
4407
            );
4408
        }
4409
4410
        assert_eq!(
4411
            parser(r"\uF").parse_escape().unwrap_err(),
4412
            TestError {
4413
                span: span(3..3),
4414
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4415
            }
4416
        );
4417
        assert_eq!(
4418
            parser(r"\uG").parse_escape().unwrap_err(),
4419
            TestError {
4420
                span: span(2..3),
4421
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4422
            }
4423
        );
4424
        assert_eq!(
4425
            parser(r"\uFG").parse_escape().unwrap_err(),
4426
            TestError {
4427
                span: span(3..4),
4428
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4429
            }
4430
        );
4431
        assert_eq!(
4432
            parser(r"\uFFG").parse_escape().unwrap_err(),
4433
            TestError {
4434
                span: span(4..5),
4435
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4436
            }
4437
        );
4438
        assert_eq!(
4439
            parser(r"\uFFFG").parse_escape().unwrap_err(),
4440
            TestError {
4441
                span: span(5..6),
4442
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4443
            }
4444
        );
4445
        assert_eq!(
4446
            parser(r"\uD800").parse_escape().unwrap_err(),
4447
            TestError {
4448
                span: span(2..6),
4449
                kind: ast::ErrorKind::EscapeHexInvalid,
4450
            }
4451
        );
4452
    }
4453
4454
    #[test]
4455
    fn parse_hex_eight() {
4456
        for i in 0..65536 {
4457
            let c = match ::std::char::from_u32(i) {
4458
                None => continue,
4459
                Some(c) => c,
4460
            };
4461
            let pat = format!(r"\U{:08x}", i);
4462
            assert_eq!(
4463
                parser(&pat).parse_escape(),
4464
                Ok(Primitive::Literal(ast::Literal {
4465
                    span: span(0..pat.len()),
4466
                    kind: ast::LiteralKind::HexFixed(
4467
                        ast::HexLiteralKind::UnicodeLong
4468
                    ),
4469
                    c: c,
4470
                }))
4471
            );
4472
        }
4473
4474
        assert_eq!(
4475
            parser(r"\UF").parse_escape().unwrap_err(),
4476
            TestError {
4477
                span: span(3..3),
4478
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4479
            }
4480
        );
4481
        assert_eq!(
4482
            parser(r"\UG").parse_escape().unwrap_err(),
4483
            TestError {
4484
                span: span(2..3),
4485
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4486
            }
4487
        );
4488
        assert_eq!(
4489
            parser(r"\UFG").parse_escape().unwrap_err(),
4490
            TestError {
4491
                span: span(3..4),
4492
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4493
            }
4494
        );
4495
        assert_eq!(
4496
            parser(r"\UFFG").parse_escape().unwrap_err(),
4497
            TestError {
4498
                span: span(4..5),
4499
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4500
            }
4501
        );
4502
        assert_eq!(
4503
            parser(r"\UFFFG").parse_escape().unwrap_err(),
4504
            TestError {
4505
                span: span(5..6),
4506
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4507
            }
4508
        );
4509
        assert_eq!(
4510
            parser(r"\UFFFFG").parse_escape().unwrap_err(),
4511
            TestError {
4512
                span: span(6..7),
4513
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4514
            }
4515
        );
4516
        assert_eq!(
4517
            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4518
            TestError {
4519
                span: span(7..8),
4520
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4521
            }
4522
        );
4523
        assert_eq!(
4524
            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4525
            TestError {
4526
                span: span(8..9),
4527
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4528
            }
4529
        );
4530
        assert_eq!(
4531
            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4532
            TestError {
4533
                span: span(9..10),
4534
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4535
            }
4536
        );
4537
    }
4538
4539
    #[test]
4540
    fn parse_hex_brace() {
4541
        assert_eq!(
4542
            parser(r"\u{26c4}").parse_escape(),
4543
            Ok(Primitive::Literal(ast::Literal {
4544
                span: span(0..8),
4545
                kind: ast::LiteralKind::HexBrace(
4546
                    ast::HexLiteralKind::UnicodeShort
4547
                ),
4548
                c: '⛄',
4549
            }))
4550
        );
4551
        assert_eq!(
4552
            parser(r"\U{26c4}").parse_escape(),
4553
            Ok(Primitive::Literal(ast::Literal {
4554
                span: span(0..8),
4555
                kind: ast::LiteralKind::HexBrace(
4556
                    ast::HexLiteralKind::UnicodeLong
4557
                ),
4558
                c: '⛄',
4559
            }))
4560
        );
4561
        assert_eq!(
4562
            parser(r"\x{26c4}").parse_escape(),
4563
            Ok(Primitive::Literal(ast::Literal {
4564
                span: span(0..8),
4565
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4566
                c: '⛄',
4567
            }))
4568
        );
4569
        assert_eq!(
4570
            parser(r"\x{26C4}").parse_escape(),
4571
            Ok(Primitive::Literal(ast::Literal {
4572
                span: span(0..8),
4573
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4574
                c: '⛄',
4575
            }))
4576
        );
4577
        assert_eq!(
4578
            parser(r"\x{10fFfF}").parse_escape(),
4579
            Ok(Primitive::Literal(ast::Literal {
4580
                span: span(0..10),
4581
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4582
                c: '\u{10FFFF}',
4583
            }))
4584
        );
4585
4586
        assert_eq!(
4587
            parser(r"\x").parse_escape().unwrap_err(),
4588
            TestError {
4589
                span: span(2..2),
4590
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4591
            }
4592
        );
4593
        assert_eq!(
4594
            parser(r"\x{").parse_escape().unwrap_err(),
4595
            TestError {
4596
                span: span(2..3),
4597
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4598
            }
4599
        );
4600
        assert_eq!(
4601
            parser(r"\x{FF").parse_escape().unwrap_err(),
4602
            TestError {
4603
                span: span(2..5),
4604
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4605
            }
4606
        );
4607
        assert_eq!(
4608
            parser(r"\x{}").parse_escape().unwrap_err(),
4609
            TestError {
4610
                span: span(2..4),
4611
                kind: ast::ErrorKind::EscapeHexEmpty,
4612
            }
4613
        );
4614
        assert_eq!(
4615
            parser(r"\x{FGF}").parse_escape().unwrap_err(),
4616
            TestError {
4617
                span: span(4..5),
4618
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4619
            }
4620
        );
4621
        assert_eq!(
4622
            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4623
            TestError {
4624
                span: span(3..9),
4625
                kind: ast::ErrorKind::EscapeHexInvalid,
4626
            }
4627
        );
4628
        assert_eq!(
4629
            parser(r"\x{D800}").parse_escape().unwrap_err(),
4630
            TestError {
4631
                span: span(3..7),
4632
                kind: ast::ErrorKind::EscapeHexInvalid,
4633
            }
4634
        );
4635
        assert_eq!(
4636
            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4637
            TestError {
4638
                span: span(3..12),
4639
                kind: ast::ErrorKind::EscapeHexInvalid,
4640
            }
4641
        );
4642
    }
4643
4644
    #[test]
4645
    fn parse_decimal() {
4646
        assert_eq!(parser("123").parse_decimal(), Ok(123));
4647
        assert_eq!(parser("0").parse_decimal(), Ok(0));
4648
        assert_eq!(parser("01").parse_decimal(), Ok(1));
4649
4650
        assert_eq!(
4651
            parser("-1").parse_decimal().unwrap_err(),
4652
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4653
        );
4654
        assert_eq!(
4655
            parser("").parse_decimal().unwrap_err(),
4656
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4657
        );
4658
        assert_eq!(
4659
            parser("9999999999").parse_decimal().unwrap_err(),
4660
            TestError {
4661
                span: span(0..10),
4662
                kind: ast::ErrorKind::DecimalInvalid,
4663
            }
4664
        );
4665
    }
4666
4667
    #[test]
4668
    fn parse_set_class() {
4669
        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4670
            ast::ClassSet::union(ast::ClassSetUnion {
4671
                span: span,
4672
                items: items,
4673
            })
4674
        }
4675
4676
        fn intersection(
4677
            span: Span,
4678
            lhs: ast::ClassSet,
4679
            rhs: ast::ClassSet,
4680
        ) -> ast::ClassSet {
4681
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4682
                span: span,
4683
                kind: ast::ClassSetBinaryOpKind::Intersection,
4684
                lhs: Box::new(lhs),
4685
                rhs: Box::new(rhs),
4686
            })
4687
        }
4688
4689
        fn difference(
4690
            span: Span,
4691
            lhs: ast::ClassSet,
4692
            rhs: ast::ClassSet,
4693
        ) -> ast::ClassSet {
4694
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4695
                span: span,
4696
                kind: ast::ClassSetBinaryOpKind::Difference,
4697
                lhs: Box::new(lhs),
4698
                rhs: Box::new(rhs),
4699
            })
4700
        }
4701
4702
        fn symdifference(
4703
            span: Span,
4704
            lhs: ast::ClassSet,
4705
            rhs: ast::ClassSet,
4706
        ) -> ast::ClassSet {
4707
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4708
                span: span,
4709
                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4710
                lhs: Box::new(lhs),
4711
                rhs: Box::new(rhs),
4712
            })
4713
        }
4714
4715
        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4716
            ast::ClassSet::Item(item)
4717
        }
4718
4719
        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4720
            ast::ClassSetItem::Ascii(cls)
4721
        }
4722
4723
        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4724
            ast::ClassSetItem::Unicode(cls)
4725
        }
4726
4727
        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4728
            ast::ClassSetItem::Perl(cls)
4729
        }
4730
4731
        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4732
            ast::ClassSetItem::Bracketed(Box::new(cls))
4733
        }
4734
4735
        fn lit(span: Span, c: char) -> ast::ClassSetItem {
4736
            ast::ClassSetItem::Literal(ast::Literal {
4737
                span: span,
4738
                kind: ast::LiteralKind::Verbatim,
4739
                c: c,
4740
            })
4741
        }
4742
4743
        fn empty(span: Span) -> ast::ClassSetItem {
4744
            ast::ClassSetItem::Empty(span)
4745
        }
4746
4747
        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4748
            let pos1 = Position {
4749
                offset: span.start.offset + start.len_utf8(),
4750
                column: span.start.column + 1,
4751
                ..span.start
4752
            };
4753
            let pos2 = Position {
4754
                offset: span.end.offset - end.len_utf8(),
4755
                column: span.end.column - 1,
4756
                ..span.end
4757
            };
4758
            ast::ClassSetItem::Range(ast::ClassSetRange {
4759
                span: span,
4760
                start: ast::Literal {
4761
                    span: Span { end: pos1, ..span },
4762
                    kind: ast::LiteralKind::Verbatim,
4763
                    c: start,
4764
                },
4765
                end: ast::Literal {
4766
                    span: Span { start: pos2, ..span },
4767
                    kind: ast::LiteralKind::Verbatim,
4768
                    c: end,
4769
                },
4770
            })
4771
        }
4772
4773
        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4774
            ast::ClassAscii {
4775
                span: span,
4776
                kind: ast::ClassAsciiKind::Alnum,
4777
                negated: negated,
4778
            }
4779
        }
4780
4781
        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4782
            ast::ClassAscii {
4783
                span: span,
4784
                kind: ast::ClassAsciiKind::Lower,
4785
                negated: negated,
4786
            }
4787
        }
4788
4789
        assert_eq!(
4790
            parser("[[:alnum:]]").parse(),
4791
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4792
                span: span(0..11),
4793
                negated: false,
4794
                kind: itemset(item_ascii(alnum(span(1..10), false))),
4795
            })))
4796
        );
4797
        assert_eq!(
4798
            parser("[[[:alnum:]]]").parse(),
4799
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4800
                span: span(0..13),
4801
                negated: false,
4802
                kind: itemset(item_bracket(ast::ClassBracketed {
4803
                    span: span(1..12),
4804
                    negated: false,
4805
                    kind: itemset(item_ascii(alnum(span(2..11), false))),
4806
                })),
4807
            })))
4808
        );
4809
        assert_eq!(
4810
            parser("[[:alnum:]&&[:lower:]]").parse(),
4811
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4812
                span: span(0..22),
4813
                negated: false,
4814
                kind: intersection(
4815
                    span(1..21),
4816
                    itemset(item_ascii(alnum(span(1..10), false))),
4817
                    itemset(item_ascii(lower(span(12..21), false))),
4818
                ),
4819
            })))
4820
        );
4821
        assert_eq!(
4822
            parser("[[:alnum:]--[:lower:]]").parse(),
4823
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4824
                span: span(0..22),
4825
                negated: false,
4826
                kind: difference(
4827
                    span(1..21),
4828
                    itemset(item_ascii(alnum(span(1..10), false))),
4829
                    itemset(item_ascii(lower(span(12..21), false))),
4830
                ),
4831
            })))
4832
        );
4833
        assert_eq!(
4834
            parser("[[:alnum:]~~[:lower:]]").parse(),
4835
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4836
                span: span(0..22),
4837
                negated: false,
4838
                kind: symdifference(
4839
                    span(1..21),
4840
                    itemset(item_ascii(alnum(span(1..10), false))),
4841
                    itemset(item_ascii(lower(span(12..21), false))),
4842
                ),
4843
            })))
4844
        );
4845
4846
        assert_eq!(
4847
            parser("[a]").parse(),
4848
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4849
                span: span(0..3),
4850
                negated: false,
4851
                kind: itemset(lit(span(1..2), 'a')),
4852
            })))
4853
        );
4854
        assert_eq!(
4855
            parser(r"[a\]]").parse(),
4856
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4857
                span: span(0..5),
4858
                negated: false,
4859
                kind: union(
4860
                    span(1..4),
4861
                    vec![
4862
                        lit(span(1..2), 'a'),
4863
                        ast::ClassSetItem::Literal(ast::Literal {
4864
                            span: span(2..4),
4865
                            kind: ast::LiteralKind::Punctuation,
4866
                            c: ']',
4867
                        }),
4868
                    ]
4869
                ),
4870
            })))
4871
        );
4872
        assert_eq!(
4873
            parser(r"[a\-z]").parse(),
4874
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4875
                span: span(0..6),
4876
                negated: false,
4877
                kind: union(
4878
                    span(1..5),
4879
                    vec![
4880
                        lit(span(1..2), 'a'),
4881
                        ast::ClassSetItem::Literal(ast::Literal {
4882
                            span: span(2..4),
4883
                            kind: ast::LiteralKind::Punctuation,
4884
                            c: '-',
4885
                        }),
4886
                        lit(span(4..5), 'z'),
4887
                    ]
4888
                ),
4889
            })))
4890
        );
4891
        assert_eq!(
4892
            parser("[ab]").parse(),
4893
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4894
                span: span(0..4),
4895
                negated: false,
4896
                kind: union(
4897
                    span(1..3),
4898
                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4899
                ),
4900
            })))
4901
        );
4902
        assert_eq!(
4903
            parser("[a-]").parse(),
4904
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4905
                span: span(0..4),
4906
                negated: false,
4907
                kind: union(
4908
                    span(1..3),
4909
                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4910
                ),
4911
            })))
4912
        );
4913
        assert_eq!(
4914
            parser("[-a]").parse(),
4915
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916
                span: span(0..4),
4917
                negated: false,
4918
                kind: union(
4919
                    span(1..3),
4920
                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4921
                ),
4922
            })))
4923
        );
4924
        assert_eq!(
4925
            parser(r"[\pL]").parse(),
4926
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4927
                span: span(0..5),
4928
                negated: false,
4929
                kind: itemset(item_unicode(ast::ClassUnicode {
4930
                    span: span(1..4),
4931
                    negated: false,
4932
                    kind: ast::ClassUnicodeKind::OneLetter('L'),
4933
                })),
4934
            })))
4935
        );
4936
        assert_eq!(
4937
            parser(r"[\w]").parse(),
4938
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4939
                span: span(0..4),
4940
                negated: false,
4941
                kind: itemset(item_perl(ast::ClassPerl {
4942
                    span: span(1..3),
4943
                    kind: ast::ClassPerlKind::Word,
4944
                    negated: false,
4945
                })),
4946
            })))
4947
        );
4948
        assert_eq!(
4949
            parser(r"[a\wz]").parse(),
4950
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4951
                span: span(0..6),
4952
                negated: false,
4953
                kind: union(
4954
                    span(1..5),
4955
                    vec![
4956
                        lit(span(1..2), 'a'),
4957
                        item_perl(ast::ClassPerl {
4958
                            span: span(2..4),
4959
                            kind: ast::ClassPerlKind::Word,
4960
                            negated: false,
4961
                        }),
4962
                        lit(span(4..5), 'z'),
4963
                    ]
4964
                ),
4965
            })))
4966
        );
4967
4968
        assert_eq!(
4969
            parser("[a-z]").parse(),
4970
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4971
                span: span(0..5),
4972
                negated: false,
4973
                kind: itemset(range(span(1..4), 'a', 'z')),
4974
            })))
4975
        );
4976
        assert_eq!(
4977
            parser("[a-cx-z]").parse(),
4978
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4979
                span: span(0..8),
4980
                negated: false,
4981
                kind: union(
4982
                    span(1..7),
4983
                    vec![
4984
                        range(span(1..4), 'a', 'c'),
4985
                        range(span(4..7), 'x', 'z'),
4986
                    ]
4987
                ),
4988
            })))
4989
        );
4990
        assert_eq!(
4991
            parser(r"[\w&&a-cx-z]").parse(),
4992
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4993
                span: span(0..12),
4994
                negated: false,
4995
                kind: intersection(
4996
                    span(1..11),
4997
                    itemset(item_perl(ast::ClassPerl {
4998
                        span: span(1..3),
4999
                        kind: ast::ClassPerlKind::Word,
5000
                        negated: false,
5001
                    })),
5002
                    union(
5003
                        span(5..11),
5004
                        vec![
5005
                            range(span(5..8), 'a', 'c'),
5006
                            range(span(8..11), 'x', 'z'),
5007
                        ]
5008
                    ),
5009
                ),
5010
            })))
5011
        );
5012
        assert_eq!(
5013
            parser(r"[a-cx-z&&\w]").parse(),
5014
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5015
                span: span(0..12),
5016
                negated: false,
5017
                kind: intersection(
5018
                    span(1..11),
5019
                    union(
5020
                        span(1..7),
5021
                        vec![
5022
                            range(span(1..4), 'a', 'c'),
5023
                            range(span(4..7), 'x', 'z'),
5024
                        ]
5025
                    ),
5026
                    itemset(item_perl(ast::ClassPerl {
5027
                        span: span(9..11),
5028
                        kind: ast::ClassPerlKind::Word,
5029
                        negated: false,
5030
                    })),
5031
                ),
5032
            })))
5033
        );
5034
        assert_eq!(
5035
            parser(r"[a--b--c]").parse(),
5036
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5037
                span: span(0..9),
5038
                negated: false,
5039
                kind: difference(
5040
                    span(1..8),
5041
                    difference(
5042
                        span(1..5),
5043
                        itemset(lit(span(1..2), 'a')),
5044
                        itemset(lit(span(4..5), 'b')),
5045
                    ),
5046
                    itemset(lit(span(7..8), 'c')),
5047
                ),
5048
            })))
5049
        );
5050
        assert_eq!(
5051
            parser(r"[a~~b~~c]").parse(),
5052
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5053
                span: span(0..9),
5054
                negated: false,
5055
                kind: symdifference(
5056
                    span(1..8),
5057
                    symdifference(
5058
                        span(1..5),
5059
                        itemset(lit(span(1..2), 'a')),
5060
                        itemset(lit(span(4..5), 'b')),
5061
                    ),
5062
                    itemset(lit(span(7..8), 'c')),
5063
                ),
5064
            })))
5065
        );
5066
        assert_eq!(
5067
            parser(r"[\^&&^]").parse(),
5068
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5069
                span: span(0..7),
5070
                negated: false,
5071
                kind: intersection(
5072
                    span(1..6),
5073
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5074
                        span: span(1..3),
5075
                        kind: ast::LiteralKind::Punctuation,
5076
                        c: '^',
5077
                    })),
5078
                    itemset(lit(span(5..6), '^')),
5079
                ),
5080
            })))
5081
        );
5082
        assert_eq!(
5083
            parser(r"[\&&&&]").parse(),
5084
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5085
                span: span(0..7),
5086
                negated: false,
5087
                kind: intersection(
5088
                    span(1..6),
5089
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5090
                        span: span(1..3),
5091
                        kind: ast::LiteralKind::Punctuation,
5092
                        c: '&',
5093
                    })),
5094
                    itemset(lit(span(5..6), '&')),
5095
                ),
5096
            })))
5097
        );
5098
        assert_eq!(
5099
            parser(r"[&&&&]").parse(),
5100
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5101
                span: span(0..6),
5102
                negated: false,
5103
                kind: intersection(
5104
                    span(1..5),
5105
                    intersection(
5106
                        span(1..3),
5107
                        itemset(empty(span(1..1))),
5108
                        itemset(empty(span(3..3))),
5109
                    ),
5110
                    itemset(empty(span(5..5))),
5111
                ),
5112
            })))
5113
        );
5114
5115
        let pat = "[☃-⛄]";
5116
        assert_eq!(
5117
            parser(pat).parse(),
5118
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5119
                span: span_range(pat, 0..9),
5120
                negated: false,
5121
                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5122
                    span: span_range(pat, 1..8),
5123
                    start: ast::Literal {
5124
                        span: span_range(pat, 1..4),
5125
                        kind: ast::LiteralKind::Verbatim,
5126
                        c: '☃',
5127
                    },
5128
                    end: ast::Literal {
5129
                        span: span_range(pat, 5..8),
5130
                        kind: ast::LiteralKind::Verbatim,
5131
                        c: '⛄',
5132
                    },
5133
                })),
5134
            })))
5135
        );
5136
5137
        assert_eq!(
5138
            parser(r"[]]").parse(),
5139
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5140
                span: span(0..3),
5141
                negated: false,
5142
                kind: itemset(lit(span(1..2), ']')),
5143
            })))
5144
        );
5145
        assert_eq!(
5146
            parser(r"[]\[]").parse(),
5147
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5148
                span: span(0..5),
5149
                negated: false,
5150
                kind: union(
5151
                    span(1..4),
5152
                    vec![
5153
                        lit(span(1..2), ']'),
5154
                        ast::ClassSetItem::Literal(ast::Literal {
5155
                            span: span(2..4),
5156
                            kind: ast::LiteralKind::Punctuation,
5157
                            c: '[',
5158
                        }),
5159
                    ]
5160
                ),
5161
            })))
5162
        );
5163
        assert_eq!(
5164
            parser(r"[\[]]").parse(),
5165
            Ok(concat(
5166
                0..5,
5167
                vec![
5168
                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5169
                        span: span(0..4),
5170
                        negated: false,
5171
                        kind: itemset(ast::ClassSetItem::Literal(
5172
                            ast::Literal {
5173
                                span: span(1..3),
5174
                                kind: ast::LiteralKind::Punctuation,
5175
                                c: '[',
5176
                            }
5177
                        )),
5178
                    })),
5179
                    Ast::Literal(ast::Literal {
5180
                        span: span(4..5),
5181
                        kind: ast::LiteralKind::Verbatim,
5182
                        c: ']',
5183
                    }),
5184
                ]
5185
            ))
5186
        );
5187
5188
        assert_eq!(
5189
            parser("[").parse().unwrap_err(),
5190
            TestError {
5191
                span: span(0..1),
5192
                kind: ast::ErrorKind::ClassUnclosed,
5193
            }
5194
        );
5195
        assert_eq!(
5196
            parser("[[").parse().unwrap_err(),
5197
            TestError {
5198
                span: span(1..2),
5199
                kind: ast::ErrorKind::ClassUnclosed,
5200
            }
5201
        );
5202
        assert_eq!(
5203
            parser("[[-]").parse().unwrap_err(),
5204
            TestError {
5205
                span: span(0..1),
5206
                kind: ast::ErrorKind::ClassUnclosed,
5207
            }
5208
        );
5209
        assert_eq!(
5210
            parser("[[[:alnum:]").parse().unwrap_err(),
5211
            TestError {
5212
                span: span(1..2),
5213
                kind: ast::ErrorKind::ClassUnclosed,
5214
            }
5215
        );
5216
        assert_eq!(
5217
            parser(r"[\b]").parse().unwrap_err(),
5218
            TestError {
5219
                span: span(1..3),
5220
                kind: ast::ErrorKind::ClassEscapeInvalid,
5221
            }
5222
        );
5223
        assert_eq!(
5224
            parser(r"[\w-a]").parse().unwrap_err(),
5225
            TestError {
5226
                span: span(1..3),
5227
                kind: ast::ErrorKind::ClassRangeLiteral,
5228
            }
5229
        );
5230
        assert_eq!(
5231
            parser(r"[a-\w]").parse().unwrap_err(),
5232
            TestError {
5233
                span: span(3..5),
5234
                kind: ast::ErrorKind::ClassRangeLiteral,
5235
            }
5236
        );
5237
        assert_eq!(
5238
            parser(r"[z-a]").parse().unwrap_err(),
5239
            TestError {
5240
                span: span(1..4),
5241
                kind: ast::ErrorKind::ClassRangeInvalid,
5242
            }
5243
        );
5244
5245
        assert_eq!(
5246
            parser_ignore_whitespace("[a ").parse().unwrap_err(),
5247
            TestError {
5248
                span: span(0..1),
5249
                kind: ast::ErrorKind::ClassUnclosed,
5250
            }
5251
        );
5252
        assert_eq!(
5253
            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5254
            TestError {
5255
                span: span(0..1),
5256
                kind: ast::ErrorKind::ClassUnclosed,
5257
            }
5258
        );
5259
    }
5260
5261
    #[test]
5262
    fn parse_set_class_open() {
5263
        assert_eq!(parser("[a]").parse_set_class_open(), {
5264
            let set = ast::ClassBracketed {
5265
                span: span(0..1),
5266
                negated: false,
5267
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5268
                    span: span(1..1),
5269
                    items: vec![],
5270
                }),
5271
            };
5272
            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5273
            Ok((set, union))
5274
        });
5275
        assert_eq!(
5276
            parser_ignore_whitespace("[   a]").parse_set_class_open(),
5277
            {
5278
                let set = ast::ClassBracketed {
5279
                    span: span(0..4),
5280
                    negated: false,
5281
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5282
                        span: span(4..4),
5283
                        items: vec![],
5284
                    }),
5285
                };
5286
                let union =
5287
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5288
                Ok((set, union))
5289
            }
5290
        );
5291
        assert_eq!(parser("[^a]").parse_set_class_open(), {
5292
            let set = ast::ClassBracketed {
5293
                span: span(0..2),
5294
                negated: true,
5295
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5296
                    span: span(2..2),
5297
                    items: vec![],
5298
                }),
5299
            };
5300
            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5301
            Ok((set, union))
5302
        });
5303
        assert_eq!(
5304
            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5305
            {
5306
                let set = ast::ClassBracketed {
5307
                    span: span(0..4),
5308
                    negated: true,
5309
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5310
                        span: span(4..4),
5311
                        items: vec![],
5312
                    }),
5313
                };
5314
                let union =
5315
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5316
                Ok((set, union))
5317
            }
5318
        );
5319
        assert_eq!(parser("[-a]").parse_set_class_open(), {
5320
            let set = ast::ClassBracketed {
5321
                span: span(0..2),
5322
                negated: false,
5323
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5324
                    span: span(1..1),
5325
                    items: vec![],
5326
                }),
5327
            };
5328
            let union = ast::ClassSetUnion {
5329
                span: span(1..2),
5330
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5331
                    span: span(1..2),
5332
                    kind: ast::LiteralKind::Verbatim,
5333
                    c: '-',
5334
                })],
5335
            };
5336
            Ok((set, union))
5337
        });
5338
        assert_eq!(
5339
            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5340
            {
5341
                let set = ast::ClassBracketed {
5342
                    span: span(0..4),
5343
                    negated: false,
5344
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5345
                        span: span(2..2),
5346
                        items: vec![],
5347
                    }),
5348
                };
5349
                let union = ast::ClassSetUnion {
5350
                    span: span(2..3),
5351
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5352
                        span: span(2..3),
5353
                        kind: ast::LiteralKind::Verbatim,
5354
                        c: '-',
5355
                    })],
5356
                };
5357
                Ok((set, union))
5358
            }
5359
        );
5360
        assert_eq!(parser("[^-a]").parse_set_class_open(), {
5361
            let set = ast::ClassBracketed {
5362
                span: span(0..3),
5363
                negated: true,
5364
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5365
                    span: span(2..2),
5366
                    items: vec![],
5367
                }),
5368
            };
5369
            let union = ast::ClassSetUnion {
5370
                span: span(2..3),
5371
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5372
                    span: span(2..3),
5373
                    kind: ast::LiteralKind::Verbatim,
5374
                    c: '-',
5375
                })],
5376
            };
5377
            Ok((set, union))
5378
        });
5379
        assert_eq!(parser("[--a]").parse_set_class_open(), {
5380
            let set = ast::ClassBracketed {
5381
                span: span(0..3),
5382
                negated: false,
5383
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5384
                    span: span(1..1),
5385
                    items: vec![],
5386
                }),
5387
            };
5388
            let union = ast::ClassSetUnion {
5389
                span: span(1..3),
5390
                items: vec![
5391
                    ast::ClassSetItem::Literal(ast::Literal {
5392
                        span: span(1..2),
5393
                        kind: ast::LiteralKind::Verbatim,
5394
                        c: '-',
5395
                    }),
5396
                    ast::ClassSetItem::Literal(ast::Literal {
5397
                        span: span(2..3),
5398
                        kind: ast::LiteralKind::Verbatim,
5399
                        c: '-',
5400
                    }),
5401
                ],
5402
            };
5403
            Ok((set, union))
5404
        });
5405
        assert_eq!(parser("[]a]").parse_set_class_open(), {
5406
            let set = ast::ClassBracketed {
5407
                span: span(0..2),
5408
                negated: false,
5409
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5410
                    span: span(1..1),
5411
                    items: vec![],
5412
                }),
5413
            };
5414
            let union = ast::ClassSetUnion {
5415
                span: span(1..2),
5416
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5417
                    span: span(1..2),
5418
                    kind: ast::LiteralKind::Verbatim,
5419
                    c: ']',
5420
                })],
5421
            };
5422
            Ok((set, union))
5423
        });
5424
        assert_eq!(
5425
            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5426
            {
5427
                let set = ast::ClassBracketed {
5428
                    span: span(0..4),
5429
                    negated: false,
5430
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5431
                        span: span(2..2),
5432
                        items: vec![],
5433
                    }),
5434
                };
5435
                let union = ast::ClassSetUnion {
5436
                    span: span(2..3),
5437
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5438
                        span: span(2..3),
5439
                        kind: ast::LiteralKind::Verbatim,
5440
                        c: ']',
5441
                    })],
5442
                };
5443
                Ok((set, union))
5444
            }
5445
        );
5446
        assert_eq!(parser("[^]a]").parse_set_class_open(), {
5447
            let set = ast::ClassBracketed {
5448
                span: span(0..3),
5449
                negated: true,
5450
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5451
                    span: span(2..2),
5452
                    items: vec![],
5453
                }),
5454
            };
5455
            let union = ast::ClassSetUnion {
5456
                span: span(2..3),
5457
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5458
                    span: span(2..3),
5459
                    kind: ast::LiteralKind::Verbatim,
5460
                    c: ']',
5461
                })],
5462
            };
5463
            Ok((set, union))
5464
        });
5465
        assert_eq!(parser("[-]a]").parse_set_class_open(), {
5466
            let set = ast::ClassBracketed {
5467
                span: span(0..2),
5468
                negated: false,
5469
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5470
                    span: span(1..1),
5471
                    items: vec![],
5472
                }),
5473
            };
5474
            let union = ast::ClassSetUnion {
5475
                span: span(1..2),
5476
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5477
                    span: span(1..2),
5478
                    kind: ast::LiteralKind::Verbatim,
5479
                    c: '-',
5480
                })],
5481
            };
5482
            Ok((set, union))
5483
        });
5484
5485
        assert_eq!(
5486
            parser("[").parse_set_class_open().unwrap_err(),
5487
            TestError {
5488
                span: span(0..1),
5489
                kind: ast::ErrorKind::ClassUnclosed,
5490
            }
5491
        );
5492
        assert_eq!(
5493
            parser_ignore_whitespace("[    ")
5494
                .parse_set_class_open()
5495
                .unwrap_err(),
5496
            TestError {
5497
                span: span(0..5),
5498
                kind: ast::ErrorKind::ClassUnclosed,
5499
            }
5500
        );
5501
        assert_eq!(
5502
            parser("[^").parse_set_class_open().unwrap_err(),
5503
            TestError {
5504
                span: span(0..2),
5505
                kind: ast::ErrorKind::ClassUnclosed,
5506
            }
5507
        );
5508
        assert_eq!(
5509
            parser("[]").parse_set_class_open().unwrap_err(),
5510
            TestError {
5511
                span: span(0..2),
5512
                kind: ast::ErrorKind::ClassUnclosed,
5513
            }
5514
        );
5515
        assert_eq!(
5516
            parser("[-").parse_set_class_open().unwrap_err(),
5517
            TestError {
5518
                span: span(0..2),
5519
                kind: ast::ErrorKind::ClassUnclosed,
5520
            }
5521
        );
5522
        assert_eq!(
5523
            parser("[--").parse_set_class_open().unwrap_err(),
5524
            TestError {
5525
                span: span(0..3),
5526
                kind: ast::ErrorKind::ClassUnclosed,
5527
            }
5528
        );
5529
    }
5530
5531
    #[test]
5532
    fn maybe_parse_ascii_class() {
5533
        assert_eq!(
5534
            parser(r"[:alnum:]").maybe_parse_ascii_class(),
5535
            Some(ast::ClassAscii {
5536
                span: span(0..9),
5537
                kind: ast::ClassAsciiKind::Alnum,
5538
                negated: false,
5539
            })
5540
        );
5541
        assert_eq!(
5542
            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5543
            Some(ast::ClassAscii {
5544
                span: span(0..9),
5545
                kind: ast::ClassAsciiKind::Alnum,
5546
                negated: false,
5547
            })
5548
        );
5549
        assert_eq!(
5550
            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5551
            Some(ast::ClassAscii {
5552
                span: span(0..10),
5553
                kind: ast::ClassAsciiKind::Alnum,
5554
                negated: true,
5555
            })
5556
        );
5557
5558
        let p = parser(r"[:");
5559
        assert_eq!(p.maybe_parse_ascii_class(), None);
5560
        assert_eq!(p.offset(), 0);
5561
5562
        let p = parser(r"[:^");
5563
        assert_eq!(p.maybe_parse_ascii_class(), None);
5564
        assert_eq!(p.offset(), 0);
5565
5566
        let p = parser(r"[^:alnum:]");
5567
        assert_eq!(p.maybe_parse_ascii_class(), None);
5568
        assert_eq!(p.offset(), 0);
5569
5570
        let p = parser(r"[:alnnum:]");
5571
        assert_eq!(p.maybe_parse_ascii_class(), None);
5572
        assert_eq!(p.offset(), 0);
5573
5574
        let p = parser(r"[:alnum]");
5575
        assert_eq!(p.maybe_parse_ascii_class(), None);
5576
        assert_eq!(p.offset(), 0);
5577
5578
        let p = parser(r"[:alnum:");
5579
        assert_eq!(p.maybe_parse_ascii_class(), None);
5580
        assert_eq!(p.offset(), 0);
5581
    }
5582
5583
    #[test]
5584
    fn parse_unicode_class() {
5585
        assert_eq!(
5586
            parser(r"\pN").parse_escape(),
5587
            Ok(Primitive::Unicode(ast::ClassUnicode {
5588
                span: span(0..3),
5589
                negated: false,
5590
                kind: ast::ClassUnicodeKind::OneLetter('N'),
5591
            }))
5592
        );
5593
        assert_eq!(
5594
            parser(r"\PN").parse_escape(),
5595
            Ok(Primitive::Unicode(ast::ClassUnicode {
5596
                span: span(0..3),
5597
                negated: true,
5598
                kind: ast::ClassUnicodeKind::OneLetter('N'),
5599
            }))
5600
        );
5601
        assert_eq!(
5602
            parser(r"\p{N}").parse_escape(),
5603
            Ok(Primitive::Unicode(ast::ClassUnicode {
5604
                span: span(0..5),
5605
                negated: false,
5606
                kind: ast::ClassUnicodeKind::Named(s("N")),
5607
            }))
5608
        );
5609
        assert_eq!(
5610
            parser(r"\P{N}").parse_escape(),
5611
            Ok(Primitive::Unicode(ast::ClassUnicode {
5612
                span: span(0..5),
5613
                negated: true,
5614
                kind: ast::ClassUnicodeKind::Named(s("N")),
5615
            }))
5616
        );
5617
        assert_eq!(
5618
            parser(r"\p{Greek}").parse_escape(),
5619
            Ok(Primitive::Unicode(ast::ClassUnicode {
5620
                span: span(0..9),
5621
                negated: false,
5622
                kind: ast::ClassUnicodeKind::Named(s("Greek")),
5623
            }))
5624
        );
5625
5626
        assert_eq!(
5627
            parser(r"\p{scx:Katakana}").parse_escape(),
5628
            Ok(Primitive::Unicode(ast::ClassUnicode {
5629
                span: span(0..16),
5630
                negated: false,
5631
                kind: ast::ClassUnicodeKind::NamedValue {
5632
                    op: ast::ClassUnicodeOpKind::Colon,
5633
                    name: s("scx"),
5634
                    value: s("Katakana"),
5635
                },
5636
            }))
5637
        );
5638
        assert_eq!(
5639
            parser(r"\p{scx=Katakana}").parse_escape(),
5640
            Ok(Primitive::Unicode(ast::ClassUnicode {
5641
                span: span(0..16),
5642
                negated: false,
5643
                kind: ast::ClassUnicodeKind::NamedValue {
5644
                    op: ast::ClassUnicodeOpKind::Equal,
5645
                    name: s("scx"),
5646
                    value: s("Katakana"),
5647
                },
5648
            }))
5649
        );
5650
        assert_eq!(
5651
            parser(r"\p{scx!=Katakana}").parse_escape(),
5652
            Ok(Primitive::Unicode(ast::ClassUnicode {
5653
                span: span(0..17),
5654
                negated: false,
5655
                kind: ast::ClassUnicodeKind::NamedValue {
5656
                    op: ast::ClassUnicodeOpKind::NotEqual,
5657
                    name: s("scx"),
5658
                    value: s("Katakana"),
5659
                },
5660
            }))
5661
        );
5662
5663
        assert_eq!(
5664
            parser(r"\p{:}").parse_escape(),
5665
            Ok(Primitive::Unicode(ast::ClassUnicode {
5666
                span: span(0..5),
5667
                negated: false,
5668
                kind: ast::ClassUnicodeKind::NamedValue {
5669
                    op: ast::ClassUnicodeOpKind::Colon,
5670
                    name: s(""),
5671
                    value: s(""),
5672
                },
5673
            }))
5674
        );
5675
        assert_eq!(
5676
            parser(r"\p{=}").parse_escape(),
5677
            Ok(Primitive::Unicode(ast::ClassUnicode {
5678
                span: span(0..5),
5679
                negated: false,
5680
                kind: ast::ClassUnicodeKind::NamedValue {
5681
                    op: ast::ClassUnicodeOpKind::Equal,
5682
                    name: s(""),
5683
                    value: s(""),
5684
                },
5685
            }))
5686
        );
5687
        assert_eq!(
5688
            parser(r"\p{!=}").parse_escape(),
5689
            Ok(Primitive::Unicode(ast::ClassUnicode {
5690
                span: span(0..6),
5691
                negated: false,
5692
                kind: ast::ClassUnicodeKind::NamedValue {
5693
                    op: ast::ClassUnicodeOpKind::NotEqual,
5694
                    name: s(""),
5695
                    value: s(""),
5696
                },
5697
            }))
5698
        );
5699
5700
        assert_eq!(
5701
            parser(r"\p").parse_escape().unwrap_err(),
5702
            TestError {
5703
                span: span(2..2),
5704
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5705
            }
5706
        );
5707
        assert_eq!(
5708
            parser(r"\p{").parse_escape().unwrap_err(),
5709
            TestError {
5710
                span: span(3..3),
5711
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5712
            }
5713
        );
5714
        assert_eq!(
5715
            parser(r"\p{N").parse_escape().unwrap_err(),
5716
            TestError {
5717
                span: span(4..4),
5718
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5719
            }
5720
        );
5721
        assert_eq!(
5722
            parser(r"\p{Greek").parse_escape().unwrap_err(),
5723
            TestError {
5724
                span: span(8..8),
5725
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5726
            }
5727
        );
5728
5729
        assert_eq!(
5730
            parser(r"\pNz").parse(),
5731
            Ok(Ast::Concat(ast::Concat {
5732
                span: span(0..4),
5733
                asts: vec![
5734
                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5735
                        span: span(0..3),
5736
                        negated: false,
5737
                        kind: ast::ClassUnicodeKind::OneLetter('N'),
5738
                    })),
5739
                    Ast::Literal(ast::Literal {
5740
                        span: span(3..4),
5741
                        kind: ast::LiteralKind::Verbatim,
5742
                        c: 'z',
5743
                    }),
5744
                ],
5745
            }))
5746
        );
5747
        assert_eq!(
5748
            parser(r"\p{Greek}z").parse(),
5749
            Ok(Ast::Concat(ast::Concat {
5750
                span: span(0..10),
5751
                asts: vec![
5752
                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5753
                        span: span(0..9),
5754
                        negated: false,
5755
                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
5756
                    })),
5757
                    Ast::Literal(ast::Literal {
5758
                        span: span(9..10),
5759
                        kind: ast::LiteralKind::Verbatim,
5760
                        c: 'z',
5761
                    }),
5762
                ],
5763
            }))
5764
        );
5765
        assert_eq!(
5766
            parser(r"\p\{").parse().unwrap_err(),
5767
            TestError {
5768
                span: span(2..3),
5769
                kind: ast::ErrorKind::UnicodeClassInvalid,
5770
            }
5771
        );
5772
        assert_eq!(
5773
            parser(r"\P\{").parse().unwrap_err(),
5774
            TestError {
5775
                span: span(2..3),
5776
                kind: ast::ErrorKind::UnicodeClassInvalid,
5777
            }
5778
        );
5779
    }
5780
5781
    #[test]
5782
    fn parse_perl_class() {
5783
        assert_eq!(
5784
            parser(r"\d").parse_escape(),
5785
            Ok(Primitive::Perl(ast::ClassPerl {
5786
                span: span(0..2),
5787
                kind: ast::ClassPerlKind::Digit,
5788
                negated: false,
5789
            }))
5790
        );
5791
        assert_eq!(
5792
            parser(r"\D").parse_escape(),
5793
            Ok(Primitive::Perl(ast::ClassPerl {
5794
                span: span(0..2),
5795
                kind: ast::ClassPerlKind::Digit,
5796
                negated: true,
5797
            }))
5798
        );
5799
        assert_eq!(
5800
            parser(r"\s").parse_escape(),
5801
            Ok(Primitive::Perl(ast::ClassPerl {
5802
                span: span(0..2),
5803
                kind: ast::ClassPerlKind::Space,
5804
                negated: false,
5805
            }))
5806
        );
5807
        assert_eq!(
5808
            parser(r"\S").parse_escape(),
5809
            Ok(Primitive::Perl(ast::ClassPerl {
5810
                span: span(0..2),
5811
                kind: ast::ClassPerlKind::Space,
5812
                negated: true,
5813
            }))
5814
        );
5815
        assert_eq!(
5816
            parser(r"\w").parse_escape(),
5817
            Ok(Primitive::Perl(ast::ClassPerl {
5818
                span: span(0..2),
5819
                kind: ast::ClassPerlKind::Word,
5820
                negated: false,
5821
            }))
5822
        );
5823
        assert_eq!(
5824
            parser(r"\W").parse_escape(),
5825
            Ok(Primitive::Perl(ast::ClassPerl {
5826
                span: span(0..2),
5827
                kind: ast::ClassPerlKind::Word,
5828
                negated: true,
5829
            }))
5830
        );
5831
5832
        assert_eq!(
5833
            parser(r"\d").parse(),
5834
            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5835
                span: span(0..2),
5836
                kind: ast::ClassPerlKind::Digit,
5837
                negated: false,
5838
            })))
5839
        );
5840
        assert_eq!(
5841
            parser(r"\dz").parse(),
5842
            Ok(Ast::Concat(ast::Concat {
5843
                span: span(0..3),
5844
                asts: vec![
5845
                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
5846
                        span: span(0..2),
5847
                        kind: ast::ClassPerlKind::Digit,
5848
                        negated: false,
5849
                    })),
5850
                    Ast::Literal(ast::Literal {
5851
                        span: span(2..3),
5852
                        kind: ast::LiteralKind::Verbatim,
5853
                        c: 'z',
5854
                    }),
5855
                ],
5856
            }))
5857
        );
5858
    }
5859
5860
    // This tests a bug fix where the nest limit checker wasn't decrementing
5861
    // its depth during post-traversal, which causes long regexes to trip
5862
    // the default limit too aggressively.
5863
    #[test]
5864
    fn regression_454_nest_too_big() {
5865
        let pattern = r#"
5866
        2(?:
5867
          [45]\d{3}|
5868
          7(?:
5869
            1[0-267]|
5870
            2[0-289]|
5871
            3[0-29]|
5872
            4[01]|
5873
            5[1-3]|
5874
            6[013]|
5875
            7[0178]|
5876
            91
5877
          )|
5878
          8(?:
5879
            0[125]|
5880
            [139][1-6]|
5881
            2[0157-9]|
5882
            41|
5883
            6[1-35]|
5884
            7[1-5]|
5885
            8[1-8]|
5886
            90
5887
          )|
5888
          9(?:
5889
            0[0-2]|
5890
            1[0-4]|
5891
            2[568]|
5892
            3[3-6]|
5893
            5[5-7]|
5894
            6[0167]|
5895
            7[15]|
5896
            8[0146-9]
5897
          )
5898
        )\d{4}
5899
        "#;
5900
        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5901
    }
5902
5903
    // This tests that we treat a trailing `-` in a character class as a
5904
    // literal `-` even when whitespace mode is enabled and there is whitespace
5905
    // after the trailing `-`.
5906
    #[test]
5907
    fn regression_455_trailing_dash_ignore_whitespace() {
5908
        assert!(parser("(?x)[ / - ]").parse().is_ok());
5909
        assert!(parser("(?x)[ a - ]").parse().is_ok());
5910
        assert!(parser(
5911
            "(?x)[
5912
            a
5913
            - ]
5914
        "
5915
        )
5916
        .parse()
5917
        .is_ok());
5918
        assert!(parser(
5919
            "(?x)[
5920
            a # wat
5921
            - ]
5922
        "
5923
        )
5924
        .parse()
5925
        .is_ok());
5926
5927
        assert!(parser("(?x)[ / -").parse().is_err());
5928
        assert!(parser("(?x)[ / - ").parse().is_err());
5929
        assert!(parser(
5930
            "(?x)[
5931
            / -
5932
        "
5933
        )
5934
        .parse()
5935
        .is_err());
5936
        assert!(parser(
5937
            "(?x)[
5938
            / - # wat
5939
        "
5940
        )
5941
        .parse()
5942
        .is_err());
5943
    }
5944
}