Coverage Report

Created: 2025-11-16 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-syntax-0.6.29/src/ast/parse.rs
Line
Count
Source
1
/*!
2
This module provides a regular expression parser.
3
*/
4
5
use std::borrow::Borrow;
6
use std::cell::{Cell, RefCell};
7
use std::mem;
8
use std::result;
9
10
use crate::ast::{self, Ast, Position, Span};
11
use crate::either::Either;
12
13
use crate::is_meta_character;
14
15
type Result<T> = result::Result<T, ast::Error>;
16
17
/// A primitive is an expression with no sub-expressions. This includes
18
/// literals, assertions and non-set character classes. This representation
19
/// is used as intermediate state in the parser.
20
///
21
/// This does not include ASCII character classes, since they can only appear
22
/// within a set character class.
23
#[derive(Clone, Debug, Eq, PartialEq)]
24
enum Primitive {
25
    Literal(ast::Literal),
26
    Assertion(ast::Assertion),
27
    Dot(Span),
28
    Perl(ast::ClassPerl),
29
    Unicode(ast::ClassUnicode),
30
}
31
32
impl Primitive {
33
    /// Return the span of this primitive.
34
0
    fn span(&self) -> &Span {
35
0
        match *self {
36
0
            Primitive::Literal(ref x) => &x.span,
37
0
            Primitive::Assertion(ref x) => &x.span,
38
0
            Primitive::Dot(ref span) => span,
39
0
            Primitive::Perl(ref x) => &x.span,
40
0
            Primitive::Unicode(ref x) => &x.span,
41
        }
42
0
    }
43
44
    /// Convert this primitive into a proper AST.
45
0
    fn into_ast(self) -> Ast {
46
0
        match self {
47
0
            Primitive::Literal(lit) => Ast::Literal(lit),
48
0
            Primitive::Assertion(assert) => Ast::Assertion(assert),
49
0
            Primitive::Dot(span) => Ast::Dot(span),
50
0
            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51
0
            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52
        }
53
0
    }
54
55
    /// Convert this primitive into an item in a character class.
56
    ///
57
    /// If this primitive is not a legal item (i.e., an assertion or a dot),
58
    /// then return an error.
59
0
    fn into_class_set_item<P: Borrow<Parser>>(
60
0
        self,
61
0
        p: &ParserI<'_, P>,
62
0
    ) -> Result<ast::ClassSetItem> {
63
        use self::Primitive::*;
64
        use crate::ast::ClassSetItem;
65
66
0
        match self {
67
0
            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68
0
            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69
0
            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71
        }
72
0
    }
73
74
    /// Convert this primitive into a literal in a character class. In
75
    /// particular, literals are the only valid items that can appear in
76
    /// ranges.
77
    ///
78
    /// If this primitive is not a legal item (i.e., a class, assertion or a
79
    /// dot), then return an error.
80
0
    fn into_class_literal<P: Borrow<Parser>>(
81
0
        self,
82
0
        p: &ParserI<'_, P>,
83
0
    ) -> Result<ast::Literal> {
84
        use self::Primitive::*;
85
86
0
        match self {
87
0
            Literal(lit) => Ok(lit),
88
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89
        }
90
0
    }
91
}
92
93
/// Returns true if the given character is a hexadecimal digit.
94
0
fn is_hex(c: char) -> bool {
95
0
    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96
0
}
97
98
/// Returns true if the given character is a valid in a capture group name.
99
///
100
/// If `first` is true, then `c` is treated as the first character in the
101
/// group name (which must be alphabetic or underscore).
102
0
fn is_capture_char(c: char, first: bool) -> bool {
103
0
    c == '_'
104
0
        || (!first
105
0
            && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106
0
        || ('A' <= c && c <= 'Z')
107
0
        || ('a' <= c && c <= 'z')
108
0
}
109
110
/// A builder for a regular expression parser.
111
///
112
/// This builder permits modifying configuration options for the parser.
113
#[derive(Clone, Debug)]
114
pub struct ParserBuilder {
115
    ignore_whitespace: bool,
116
    nest_limit: u32,
117
    octal: bool,
118
}
119
120
impl Default for ParserBuilder {
121
0
    fn default() -> ParserBuilder {
122
0
        ParserBuilder::new()
123
0
    }
124
}
125
126
impl ParserBuilder {
127
    /// Create a new parser builder with a default configuration.
128
0
    pub fn new() -> ParserBuilder {
129
0
        ParserBuilder {
130
0
            ignore_whitespace: false,
131
0
            nest_limit: 250,
132
0
            octal: false,
133
0
        }
134
0
    }
135
136
    /// Build a parser from this configuration with the given pattern.
137
0
    pub fn build(&self) -> Parser {
138
0
        Parser {
139
0
            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140
0
            capture_index: Cell::new(0),
141
0
            nest_limit: self.nest_limit,
142
0
            octal: self.octal,
143
0
            initial_ignore_whitespace: self.ignore_whitespace,
144
0
            ignore_whitespace: Cell::new(self.ignore_whitespace),
145
0
            comments: RefCell::new(vec![]),
146
0
            stack_group: RefCell::new(vec![]),
147
0
            stack_class: RefCell::new(vec![]),
148
0
            capture_names: RefCell::new(vec![]),
149
0
            scratch: RefCell::new(String::new()),
150
0
        }
151
0
    }
152
153
    /// Set the nesting limit for this parser.
154
    ///
155
    /// The nesting limit controls how deep the abstract syntax tree is allowed
156
    /// to be. If the AST exceeds the given limit (e.g., with too many nested
157
    /// groups), then an error is returned by the parser.
158
    ///
159
    /// The purpose of this limit is to act as a heuristic to prevent stack
160
    /// overflow for consumers that do structural induction on an `Ast` using
161
    /// explicit recursion. While this crate never does this (instead using
162
    /// constant stack space and moving the call stack to the heap), other
163
    /// crates may.
164
    ///
165
    /// This limit is not checked until the entire Ast is parsed. Therefore,
166
    /// if callers want to put a limit on the amount of heap space used, then
167
    /// they should impose a limit on the length, in bytes, of the concrete
168
    /// pattern string. In particular, this is viable since this parser
169
    /// implementation will limit itself to heap space proportional to the
170
    /// length of the pattern string.
171
    ///
172
    /// Note that a nest limit of `0` will return a nest limit error for most
173
    /// patterns but not all. For example, a nest limit of `0` permits `a` but
174
    /// not `ab`, since `ab` requires a concatenation, which results in a nest
175
    /// depth of `1`. In general, a nest limit is not something that manifests
176
    /// in an obvious way in the concrete syntax, therefore, it should not be
177
    /// used in a granular way.
178
0
    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179
0
        self.nest_limit = limit;
180
0
        self
181
0
    }
182
183
    /// Whether to support octal syntax or not.
184
    ///
185
    /// Octal syntax is a little-known way of uttering Unicode codepoints in
186
    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187
    /// `\141` are all equivalent regular expressions, where the last example
188
    /// shows octal syntax.
189
    ///
190
    /// While supporting octal syntax isn't in and of itself a problem, it does
191
    /// make good error messages harder. That is, in PCRE based regex engines,
192
    /// syntax like `\0` invokes a backreference, which is explicitly
193
    /// unsupported in Rust's regex engine. However, many users expect it to
194
    /// be supported. Therefore, when octal support is disabled, the error
195
    /// message will explicitly mention that backreferences aren't supported.
196
    ///
197
    /// Octal syntax is disabled by default.
198
0
    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199
0
        self.octal = yes;
200
0
        self
201
0
    }
202
203
    /// Enable verbose mode in the regular expression.
204
    ///
205
    /// When enabled, verbose mode permits insignificant whitespace in many
206
    /// places in the regular expression, as well as comments. Comments are
207
    /// started using `#` and continue until the end of the line.
208
    ///
209
    /// By default, this is disabled. It may be selectively enabled in the
210
    /// regular expression by using the `x` flag regardless of this setting.
211
0
    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212
0
        self.ignore_whitespace = yes;
213
0
        self
214
0
    }
215
}
216
217
/// A regular expression parser.
218
///
219
/// This parses a string representation of a regular expression into an
220
/// abstract syntax tree. The size of the tree is proportional to the length
221
/// of the regular expression pattern.
222
///
223
/// A `Parser` can be configured in more detail via a
224
/// [`ParserBuilder`](struct.ParserBuilder.html).
225
#[derive(Clone, Debug)]
226
pub struct Parser {
227
    /// The current position of the parser.
228
    pos: Cell<Position>,
229
    /// The current capture index.
230
    capture_index: Cell<u32>,
231
    /// The maximum number of open parens/brackets allowed. If the parser
232
    /// exceeds this number, then an error is returned.
233
    nest_limit: u32,
234
    /// Whether to support octal syntax or not. When `false`, the parser will
235
    /// return an error helpfully pointing out that backreferences are not
236
    /// supported.
237
    octal: bool,
238
    /// The initial setting for `ignore_whitespace` as provided by
239
    /// `ParserBuilder`. It is used when resetting the parser's state.
240
    initial_ignore_whitespace: bool,
241
    /// Whether whitespace should be ignored. When enabled, comments are
242
    /// also permitted.
243
    ignore_whitespace: Cell<bool>,
244
    /// A list of comments, in order of appearance.
245
    comments: RefCell<Vec<ast::Comment>>,
246
    /// A stack of grouped sub-expressions, including alternations.
247
    stack_group: RefCell<Vec<GroupState>>,
248
    /// A stack of nested character classes. This is only non-empty when
249
    /// parsing a class.
250
    stack_class: RefCell<Vec<ClassState>>,
251
    /// A sorted sequence of capture names. This is used to detect duplicate
252
    /// capture names and report an error if one is detected.
253
    capture_names: RefCell<Vec<ast::CaptureName>>,
254
    /// A scratch buffer used in various places. Mostly this is used to
255
    /// accumulate relevant characters from parts of a pattern.
256
    scratch: RefCell<String>,
257
}
258
259
/// ParserI is the internal parser implementation.
260
///
261
/// We use this separate type so that we can carry the provided pattern string
262
/// along with us. In particular, a `Parser` internal state is not tied to any
263
/// one pattern, but `ParserI` is.
264
///
265
/// This type also lets us use `ParserI<&Parser>` in production code while
266
/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267
/// work against the internal interface of the parser.
268
#[derive(Clone, Debug)]
269
struct ParserI<'s, P> {
270
    /// The parser state/configuration.
271
    parser: P,
272
    /// The full regular expression provided by the user.
273
    pattern: &'s str,
274
}
275
276
/// GroupState represents a single stack frame while parsing nested groups
277
/// and alternations. Each frame records the state up to an opening parenthesis
278
/// or a alternating bracket `|`.
279
#[derive(Clone, Debug)]
280
enum GroupState {
281
    /// This state is pushed whenever an opening group is found.
282
    Group {
283
        /// The concatenation immediately preceding the opening group.
284
        concat: ast::Concat,
285
        /// The group that has been opened. Its sub-AST is always empty.
286
        group: ast::Group,
287
        /// Whether this group has the `x` flag enabled or not.
288
        ignore_whitespace: bool,
289
    },
290
    /// This state is pushed whenever a new alternation branch is found. If
291
    /// an alternation branch is found and this state is at the top of the
292
    /// stack, then this state should be modified to include the new
293
    /// alternation.
294
    Alternation(ast::Alternation),
295
}
296
297
/// ClassState represents a single stack frame while parsing character classes.
298
/// Each frame records the state up to an intersection, difference, symmetric
299
/// difference or nested class.
300
///
301
/// Note that a parser's character class stack is only non-empty when parsing
302
/// a character class. In all other cases, it is empty.
303
#[derive(Clone, Debug)]
304
enum ClassState {
305
    /// This state is pushed whenever an opening bracket is found.
306
    Open {
307
        /// The union of class items immediately preceding this class.
308
        union: ast::ClassSetUnion,
309
        /// The class that has been opened. Typically this just corresponds
310
        /// to the `[`, but it can also include `[^` since `^` indicates
311
        /// negation of the class.
312
        set: ast::ClassBracketed,
313
    },
314
    /// This state is pushed when a operator is seen. When popped, the stored
315
    /// set becomes the left hand side of the operator.
316
    Op {
317
        /// The type of the operation, i.e., &&, -- or ~~.
318
        kind: ast::ClassSetBinaryOpKind,
319
        /// The left-hand side of the operator.
320
        lhs: ast::ClassSet,
321
    },
322
}
323
324
impl Parser {
325
    /// Create a new parser with a default configuration.
326
    ///
327
    /// The parser can be run with either the `parse` or `parse_with_comments`
328
    /// methods. The parse methods return an abstract syntax tree.
329
    ///
330
    /// To set configuration options on the parser, use
331
    /// [`ParserBuilder`](struct.ParserBuilder.html).
332
0
    pub fn new() -> Parser {
333
0
        ParserBuilder::new().build()
334
0
    }
335
336
    /// Parse the regular expression into an abstract syntax tree.
337
0
    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338
0
        ParserI::new(self, pattern).parse()
339
0
    }
340
341
    /// Parse the regular expression and return an abstract syntax tree with
342
    /// all of the comments found in the pattern.
343
0
    pub fn parse_with_comments(
344
0
        &mut self,
345
0
        pattern: &str,
346
0
    ) -> Result<ast::WithComments> {
347
0
        ParserI::new(self, pattern).parse_with_comments()
348
0
    }
349
350
    /// Reset the internal state of a parser.
351
    ///
352
    /// This is called at the beginning of every parse. This prevents the
353
    /// parser from running with inconsistent state (say, if a previous
354
    /// invocation returned an error and the parser is reused).
355
0
    fn reset(&self) {
356
        // These settings should be in line with the construction
357
        // in `ParserBuilder::build`.
358
0
        self.pos.set(Position { offset: 0, line: 1, column: 1 });
359
0
        self.ignore_whitespace.set(self.initial_ignore_whitespace);
360
0
        self.comments.borrow_mut().clear();
361
0
        self.stack_group.borrow_mut().clear();
362
0
        self.stack_class.borrow_mut().clear();
363
0
    }
364
}
365
366
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367
    /// Build an internal parser from a parser configuration and a pattern.
368
0
    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369
0
        ParserI { parser, pattern }
370
0
    }
371
372
    /// Return a reference to the parser state.
373
0
    fn parser(&self) -> &Parser {
374
0
        self.parser.borrow()
375
0
    }
376
377
    /// Return a reference to the pattern being parsed.
378
0
    fn pattern(&self) -> &str {
379
0
        self.pattern.borrow()
380
0
    }
381
382
    /// Create a new error with the given span and error type.
383
0
    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384
0
        ast::Error { kind, pattern: self.pattern().to_string(), span }
385
0
    }
386
387
    /// Return the current offset of the parser.
388
    ///
389
    /// The offset starts at `0` from the beginning of the regular expression
390
    /// pattern string.
391
0
    fn offset(&self) -> usize {
392
0
        self.parser().pos.get().offset
393
0
    }
394
395
    /// Return the current line number of the parser.
396
    ///
397
    /// The line number starts at `1`.
398
0
    fn line(&self) -> usize {
399
0
        self.parser().pos.get().line
400
0
    }
401
402
    /// Return the current column of the parser.
403
    ///
404
    /// The column number starts at `1` and is reset whenever a `\n` is seen.
405
0
    fn column(&self) -> usize {
406
0
        self.parser().pos.get().column
407
0
    }
408
409
    /// Return the next capturing index. Each subsequent call increments the
410
    /// internal index.
411
    ///
412
    /// The span given should correspond to the location of the opening
413
    /// parenthesis.
414
    ///
415
    /// If the capture limit is exceeded, then an error is returned.
416
0
    fn next_capture_index(&self, span: Span) -> Result<u32> {
417
0
        let current = self.parser().capture_index.get();
418
0
        let i = current.checked_add(1).ok_or_else(|| {
419
0
            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
420
0
        })?;
421
0
        self.parser().capture_index.set(i);
422
0
        Ok(i)
423
0
    }
424
425
    /// Adds the given capture name to this parser. If this capture name has
426
    /// already been used, then an error is returned.
427
0
    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
428
0
        let mut names = self.parser().capture_names.borrow_mut();
429
0
        match names
430
0
            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
431
        {
432
0
            Err(i) => {
433
0
                names.insert(i, cap.clone());
434
0
                Ok(())
435
            }
436
0
            Ok(i) => Err(self.error(
437
0
                cap.span,
438
0
                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
439
0
            )),
440
        }
441
0
    }
442
443
    /// Return whether the parser should ignore whitespace or not.
444
0
    fn ignore_whitespace(&self) -> bool {
445
0
        self.parser().ignore_whitespace.get()
446
0
    }
447
448
    /// Return the character at the current position of the parser.
449
    ///
450
    /// This panics if the current position does not point to a valid char.
451
0
    fn char(&self) -> char {
452
0
        self.char_at(self.offset())
453
0
    }
454
455
    /// Return the character at the given position.
456
    ///
457
    /// This panics if the given position does not point to a valid char.
458
0
    fn char_at(&self, i: usize) -> char {
459
0
        self.pattern()[i..]
460
0
            .chars()
461
0
            .next()
462
0
            .unwrap_or_else(|| panic!("expected char at offset {}", i))
463
0
    }
464
465
    /// Bump the parser to the next Unicode scalar value.
466
    ///
467
    /// If the end of the input has been reached, then `false` is returned.
468
0
    fn bump(&self) -> bool {
469
0
        if self.is_eof() {
470
0
            return false;
471
0
        }
472
0
        let Position { mut offset, mut line, mut column } = self.pos();
473
0
        if self.char() == '\n' {
474
0
            line = line.checked_add(1).unwrap();
475
0
            column = 1;
476
0
        } else {
477
0
            column = column.checked_add(1).unwrap();
478
0
        }
479
0
        offset += self.char().len_utf8();
480
0
        self.parser().pos.set(Position { offset, line, column });
481
0
        self.pattern()[self.offset()..].chars().next().is_some()
482
0
    }
483
484
    /// If the substring starting at the current position of the parser has
485
    /// the given prefix, then bump the parser to the character immediately
486
    /// following the prefix and return true. Otherwise, don't bump the parser
487
    /// and return false.
488
0
    fn bump_if(&self, prefix: &str) -> bool {
489
0
        if self.pattern()[self.offset()..].starts_with(prefix) {
490
0
            for _ in 0..prefix.chars().count() {
491
0
                self.bump();
492
0
            }
493
0
            true
494
        } else {
495
0
            false
496
        }
497
0
    }
498
499
    /// Returns true if and only if the parser is positioned at a look-around
500
    /// prefix. The conditions under which this returns true must always
501
    /// correspond to a regular expression that would otherwise be consider
502
    /// invalid.
503
    ///
504
    /// This should only be called immediately after parsing the opening of
505
    /// a group or a set of flags.
506
0
    fn is_lookaround_prefix(&self) -> bool {
507
0
        self.bump_if("?=")
508
0
            || self.bump_if("?!")
509
0
            || self.bump_if("?<=")
510
0
            || self.bump_if("?<!")
511
0
    }
512
513
    /// Bump the parser, and if the `x` flag is enabled, bump through any
514
    /// subsequent spaces. Return true if and only if the parser is not at
515
    /// EOF.
516
0
    fn bump_and_bump_space(&self) -> bool {
517
0
        if !self.bump() {
518
0
            return false;
519
0
        }
520
0
        self.bump_space();
521
0
        !self.is_eof()
522
0
    }
523
524
    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
525
    /// comments), then this will advance the parser through all whitespace
526
    /// and comments to the next non-whitespace non-comment byte.
527
    ///
528
    /// If the `x` flag is disabled, then this is a no-op.
529
    ///
530
    /// This should be used selectively throughout the parser where
531
    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
532
    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
533
0
    fn bump_space(&self) {
534
0
        if !self.ignore_whitespace() {
535
0
            return;
536
0
        }
537
0
        while !self.is_eof() {
538
0
            if self.char().is_whitespace() {
539
0
                self.bump();
540
0
            } else if self.char() == '#' {
541
0
                let start = self.pos();
542
0
                let mut comment_text = String::new();
543
0
                self.bump();
544
0
                while !self.is_eof() {
545
0
                    let c = self.char();
546
0
                    self.bump();
547
0
                    if c == '\n' {
548
0
                        break;
549
0
                    }
550
0
                    comment_text.push(c);
551
                }
552
0
                let comment = ast::Comment {
553
0
                    span: Span::new(start, self.pos()),
554
0
                    comment: comment_text,
555
0
                };
556
0
                self.parser().comments.borrow_mut().push(comment);
557
            } else {
558
0
                break;
559
            }
560
        }
561
0
    }
562
563
    /// Peek at the next character in the input without advancing the parser.
564
    ///
565
    /// If the input has been exhausted, then this returns `None`.
566
0
    fn peek(&self) -> Option<char> {
567
0
        if self.is_eof() {
568
0
            return None;
569
0
        }
570
0
        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
571
0
    }
572
573
    /// Like peek, but will ignore spaces when the parser is in whitespace
574
    /// insensitive mode.
575
0
    fn peek_space(&self) -> Option<char> {
576
0
        if !self.ignore_whitespace() {
577
0
            return self.peek();
578
0
        }
579
0
        if self.is_eof() {
580
0
            return None;
581
0
        }
582
0
        let mut start = self.offset() + self.char().len_utf8();
583
0
        let mut in_comment = false;
584
0
        for (i, c) in self.pattern()[start..].char_indices() {
585
0
            if c.is_whitespace() {
586
0
                continue;
587
0
            } else if !in_comment && c == '#' {
588
0
                in_comment = true;
589
0
            } else if in_comment && c == '\n' {
590
0
                in_comment = false;
591
0
            } else {
592
0
                start += i;
593
0
                break;
594
            }
595
        }
596
0
        self.pattern()[start..].chars().next()
597
0
    }
598
599
    /// Returns true if the next call to `bump` would return false.
600
0
    fn is_eof(&self) -> bool {
601
0
        self.offset() == self.pattern().len()
602
0
    }
603
604
    /// Return the current position of the parser, which includes the offset,
605
    /// line and column.
606
0
    fn pos(&self) -> Position {
607
0
        self.parser().pos.get()
608
0
    }
609
610
    /// Create a span at the current position of the parser. Both the start
611
    /// and end of the span are set.
612
0
    fn span(&self) -> Span {
613
0
        Span::splat(self.pos())
614
0
    }
615
616
    /// Create a span that covers the current character.
617
0
    fn span_char(&self) -> Span {
618
0
        let mut next = Position {
619
0
            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
620
0
            line: self.line(),
621
0
            column: self.column().checked_add(1).unwrap(),
622
0
        };
623
0
        if self.char() == '\n' {
624
0
            next.line += 1;
625
0
            next.column = 1;
626
0
        }
627
0
        Span::new(self.pos(), next)
628
0
    }
629
630
    /// Parse and push a single alternation on to the parser's internal stack.
631
    /// If the top of the stack already has an alternation, then add to that
632
    /// instead of pushing a new one.
633
    ///
634
    /// The concatenation given corresponds to a single alternation branch.
635
    /// The concatenation returned starts the next branch and is empty.
636
    ///
637
    /// This assumes the parser is currently positioned at `|` and will advance
638
    /// the parser to the character following `|`.
639
    #[inline(never)]
640
0
    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
641
0
        assert_eq!(self.char(), '|');
642
0
        concat.span.end = self.pos();
643
0
        self.push_or_add_alternation(concat);
644
0
        self.bump();
645
0
        Ok(ast::Concat { span: self.span(), asts: vec![] })
646
0
    }
647
648
    /// Pushes or adds the given branch of an alternation to the parser's
649
    /// internal stack of state.
650
0
    fn push_or_add_alternation(&self, concat: ast::Concat) {
651
        use self::GroupState::*;
652
653
0
        let mut stack = self.parser().stack_group.borrow_mut();
654
0
        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
655
0
            alts.asts.push(concat.into_ast());
656
0
            return;
657
0
        }
658
0
        stack.push(Alternation(ast::Alternation {
659
0
            span: Span::new(concat.span.start, self.pos()),
660
0
            asts: vec![concat.into_ast()],
661
0
        }));
662
0
    }
663
664
    /// Parse and push a group AST (and its parent concatenation) on to the
665
    /// parser's internal stack. Return a fresh concatenation corresponding
666
    /// to the group's sub-AST.
667
    ///
668
    /// If a set of flags was found (with no group), then the concatenation
669
    /// is returned with that set of flags added.
670
    ///
671
    /// This assumes that the parser is currently positioned on the opening
672
    /// parenthesis. It advances the parser to the character at the start
673
    /// of the sub-expression (or adjoining expression).
674
    ///
675
    /// If there was a problem parsing the start of the group, then an error
676
    /// is returned.
677
    #[inline(never)]
678
0
    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
679
0
        assert_eq!(self.char(), '(');
680
0
        match self.parse_group()? {
681
0
            Either::Left(set) => {
682
0
                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
683
0
                if let Some(v) = ignore {
684
0
                    self.parser().ignore_whitespace.set(v);
685
0
                }
686
687
0
                concat.asts.push(Ast::Flags(set));
688
0
                Ok(concat)
689
            }
690
0
            Either::Right(group) => {
691
0
                let old_ignore_whitespace = self.ignore_whitespace();
692
0
                let new_ignore_whitespace = group
693
0
                    .flags()
694
0
                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
695
0
                    .unwrap_or(old_ignore_whitespace);
696
0
                self.parser().stack_group.borrow_mut().push(
697
0
                    GroupState::Group {
698
0
                        concat,
699
0
                        group,
700
0
                        ignore_whitespace: old_ignore_whitespace,
701
0
                    },
702
0
                );
703
0
                self.parser().ignore_whitespace.set(new_ignore_whitespace);
704
0
                Ok(ast::Concat { span: self.span(), asts: vec![] })
705
            }
706
        }
707
0
    }
708
709
    /// Pop a group AST from the parser's internal stack and set the group's
710
    /// AST to the given concatenation. Return the concatenation containing
711
    /// the group.
712
    ///
713
    /// This assumes that the parser is currently positioned on the closing
714
    /// parenthesis and advances the parser to the character following the `)`.
715
    ///
716
    /// If no such group could be popped, then an unopened group error is
717
    /// returned.
718
    #[inline(never)]
719
0
    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
720
        use self::GroupState::*;
721
722
0
        assert_eq!(self.char(), ')');
723
0
        let mut stack = self.parser().stack_group.borrow_mut();
724
0
        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
725
0
            .pop()
726
        {
727
0
            Some(Group { concat, group, ignore_whitespace }) => {
728
0
                (concat, group, ignore_whitespace, None)
729
            }
730
0
            Some(Alternation(alt)) => match stack.pop() {
731
0
                Some(Group { concat, group, ignore_whitespace }) => {
732
0
                    (concat, group, ignore_whitespace, Some(alt))
733
                }
734
                None | Some(Alternation(_)) => {
735
0
                    return Err(self.error(
736
0
                        self.span_char(),
737
0
                        ast::ErrorKind::GroupUnopened,
738
0
                    ));
739
                }
740
            },
741
            None => {
742
0
                return Err(self
743
0
                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
744
            }
745
        };
746
0
        self.parser().ignore_whitespace.set(ignore_whitespace);
747
0
        group_concat.span.end = self.pos();
748
0
        self.bump();
749
0
        group.span.end = self.pos();
750
0
        match alt {
751
0
            Some(mut alt) => {
752
0
                alt.span.end = group_concat.span.end;
753
0
                alt.asts.push(group_concat.into_ast());
754
0
                group.ast = Box::new(alt.into_ast());
755
0
            }
756
0
            None => {
757
0
                group.ast = Box::new(group_concat.into_ast());
758
0
            }
759
        }
760
0
        prior_concat.asts.push(Ast::Group(group));
761
0
        Ok(prior_concat)
762
0
    }
763
764
    /// Pop the last state from the parser's internal stack, if it exists, and
765
    /// add the given concatenation to it. There either must be no state or a
766
    /// single alternation item on the stack. Any other scenario produces an
767
    /// error.
768
    ///
769
    /// This assumes that the parser has advanced to the end.
770
    #[inline(never)]
771
0
    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
772
0
        concat.span.end = self.pos();
773
0
        let mut stack = self.parser().stack_group.borrow_mut();
774
0
        let ast = match stack.pop() {
775
0
            None => Ok(concat.into_ast()),
776
0
            Some(GroupState::Alternation(mut alt)) => {
777
0
                alt.span.end = self.pos();
778
0
                alt.asts.push(concat.into_ast());
779
0
                Ok(Ast::Alternation(alt))
780
            }
781
0
            Some(GroupState::Group { group, .. }) => {
782
0
                return Err(
783
0
                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
784
0
                );
785
            }
786
        };
787
        // If we try to pop again, there should be nothing.
788
0
        match stack.pop() {
789
0
            None => ast,
790
            Some(GroupState::Alternation(_)) => {
791
                // This unreachable is unfortunate. This case can't happen
792
                // because the only way we can be here is if there were two
793
                // `GroupState::Alternation`s adjacent in the parser's stack,
794
                // which we guarantee to never happen because we never push a
795
                // `GroupState::Alternation` if one is already at the top of
796
                // the stack.
797
0
                unreachable!()
798
            }
799
0
            Some(GroupState::Group { group, .. }) => {
800
0
                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
801
            }
802
        }
803
0
    }
804
805
    /// Parse the opening of a character class and push the current class
806
    /// parsing context onto the parser's stack. This assumes that the parser
807
    /// is positioned at an opening `[`. The given union should correspond to
808
    /// the union of set items built up before seeing the `[`.
809
    ///
810
    /// If there was a problem parsing the opening of the class, then an error
811
    /// is returned. Otherwise, a new union of set items for the class is
812
    /// returned (which may be populated with either a `]` or a `-`).
813
    #[inline(never)]
814
0
    fn push_class_open(
815
0
        &self,
816
0
        parent_union: ast::ClassSetUnion,
817
0
    ) -> Result<ast::ClassSetUnion> {
818
0
        assert_eq!(self.char(), '[');
819
820
0
        let (nested_set, nested_union) = self.parse_set_class_open()?;
821
0
        self.parser()
822
0
            .stack_class
823
0
            .borrow_mut()
824
0
            .push(ClassState::Open { union: parent_union, set: nested_set });
825
0
        Ok(nested_union)
826
0
    }
827
828
    /// Parse the end of a character class set and pop the character class
829
    /// parser stack. The union given corresponds to the last union built
830
    /// before seeing the closing `]`. The union returned corresponds to the
831
    /// parent character class set with the nested class added to it.
832
    ///
833
    /// This assumes that the parser is positioned at a `]` and will advance
834
    /// the parser to the byte immediately following the `]`.
835
    ///
836
    /// If the stack is empty after popping, then this returns the final
837
    /// "top-level" character class AST (where a "top-level" character class
838
    /// is one that is not nested inside any other character class).
839
    ///
840
    /// If there is no corresponding opening bracket on the parser's stack,
841
    /// then an error is returned.
842
    #[inline(never)]
843
0
    fn pop_class(
844
0
        &self,
845
0
        nested_union: ast::ClassSetUnion,
846
0
    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
847
0
        assert_eq!(self.char(), ']');
848
849
0
        let item = ast::ClassSet::Item(nested_union.into_item());
850
0
        let prevset = self.pop_class_op(item);
851
0
        let mut stack = self.parser().stack_class.borrow_mut();
852
0
        match stack.pop() {
853
            None => {
854
                // We can never observe an empty stack:
855
                //
856
                // 1) We are guaranteed to start with a non-empty stack since
857
                //    the character class parser is only initiated when it sees
858
                //    a `[`.
859
                // 2) If we ever observe an empty stack while popping after
860
                //    seeing a `]`, then we signal the character class parser
861
                //    to terminate.
862
0
                panic!("unexpected empty character class stack")
863
            }
864
            Some(ClassState::Op { .. }) => {
865
                // This panic is unfortunate, but this case is impossible
866
                // since we already popped the Op state if one exists above.
867
                // Namely, every push to the class parser stack is guarded by
868
                // whether an existing Op is already on the top of the stack.
869
                // If it is, the existing Op is modified. That is, the stack
870
                // can never have consecutive Op states.
871
0
                panic!("unexpected ClassState::Op")
872
            }
873
0
            Some(ClassState::Open { mut union, mut set }) => {
874
0
                self.bump();
875
0
                set.span.end = self.pos();
876
0
                set.kind = prevset;
877
0
                if stack.is_empty() {
878
0
                    Ok(Either::Right(ast::Class::Bracketed(set)))
879
                } else {
880
0
                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
881
0
                    Ok(Either::Left(union))
882
                }
883
            }
884
        }
885
0
    }
886
887
    /// Return an "unclosed class" error whose span points to the most
888
    /// recently opened class.
889
    ///
890
    /// This should only be called while parsing a character class.
891
    #[inline(never)]
892
0
    fn unclosed_class_error(&self) -> ast::Error {
893
0
        for state in self.parser().stack_class.borrow().iter().rev() {
894
0
            if let ClassState::Open { ref set, .. } = *state {
895
0
                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
896
0
            }
897
        }
898
        // We are guaranteed to have a non-empty stack with at least
899
        // one open bracket, so we should never get here.
900
0
        panic!("no open character class found")
901
0
    }
902
903
    /// Push the current set of class items on to the class parser's stack as
904
    /// the left hand side of the given operator.
905
    ///
906
    /// A fresh set union is returned, which should be used to build the right
907
    /// hand side of this operator.
908
    #[inline(never)]
909
0
    fn push_class_op(
910
0
        &self,
911
0
        next_kind: ast::ClassSetBinaryOpKind,
912
0
        next_union: ast::ClassSetUnion,
913
0
    ) -> ast::ClassSetUnion {
914
0
        let item = ast::ClassSet::Item(next_union.into_item());
915
0
        let new_lhs = self.pop_class_op(item);
916
0
        self.parser()
917
0
            .stack_class
918
0
            .borrow_mut()
919
0
            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
920
0
        ast::ClassSetUnion { span: self.span(), items: vec![] }
921
0
    }
922
923
    /// Pop a character class set from the character class parser stack. If the
924
    /// top of the stack is just an item (not an operation), then return the
925
    /// given set unchanged. If the top of the stack is an operation, then the
926
    /// given set will be used as the rhs of the operation on the top of the
927
    /// stack. In that case, the binary operation is returned as a set.
928
    #[inline(never)]
929
0
    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
930
0
        let mut stack = self.parser().stack_class.borrow_mut();
931
0
        let (kind, lhs) = match stack.pop() {
932
0
            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
933
0
            Some(state @ ClassState::Open { .. }) => {
934
0
                stack.push(state);
935
0
                return rhs;
936
            }
937
0
            None => unreachable!(),
938
        };
939
0
        let span = Span::new(lhs.span().start, rhs.span().end);
940
0
        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
941
0
            span,
942
0
            kind,
943
0
            lhs: Box::new(lhs),
944
0
            rhs: Box::new(rhs),
945
0
        })
946
0
    }
947
}
948
949
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
950
    /// Parse the regular expression into an abstract syntax tree.
951
0
    fn parse(&self) -> Result<Ast> {
952
0
        self.parse_with_comments().map(|astc| astc.ast)
953
0
    }
954
955
    /// Parse the regular expression and return an abstract syntax tree with
956
    /// all of the comments found in the pattern.
957
0
    fn parse_with_comments(&self) -> Result<ast::WithComments> {
958
0
        assert_eq!(self.offset(), 0, "parser can only be used once");
959
0
        self.parser().reset();
960
0
        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
961
        loop {
962
0
            self.bump_space();
963
0
            if self.is_eof() {
964
0
                break;
965
0
            }
966
0
            match self.char() {
967
0
                '(' => concat = self.push_group(concat)?,
968
0
                ')' => concat = self.pop_group(concat)?,
969
0
                '|' => concat = self.push_alternate(concat)?,
970
                '[' => {
971
0
                    let class = self.parse_set_class()?;
972
0
                    concat.asts.push(Ast::Class(class));
973
                }
974
                '?' => {
975
0
                    concat = self.parse_uncounted_repetition(
976
0
                        concat,
977
0
                        ast::RepetitionKind::ZeroOrOne,
978
0
                    )?;
979
                }
980
                '*' => {
981
0
                    concat = self.parse_uncounted_repetition(
982
0
                        concat,
983
0
                        ast::RepetitionKind::ZeroOrMore,
984
0
                    )?;
985
                }
986
                '+' => {
987
0
                    concat = self.parse_uncounted_repetition(
988
0
                        concat,
989
0
                        ast::RepetitionKind::OneOrMore,
990
0
                    )?;
991
                }
992
                '{' => {
993
0
                    concat = self.parse_counted_repetition(concat)?;
994
                }
995
0
                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
996
            }
997
        }
998
0
        let ast = self.pop_group_end(concat)?;
999
0
        NestLimiter::new(self).check(&ast)?;
1000
0
        Ok(ast::WithComments {
1001
0
            ast,
1002
0
            comments: mem::replace(
1003
0
                &mut *self.parser().comments.borrow_mut(),
1004
0
                vec![],
1005
0
            ),
1006
0
        })
1007
0
    }
1008
1009
    /// Parses an uncounted repetition operation. An uncounted repetition
1010
    /// operator includes ?, * and +, but does not include the {m,n} syntax.
1011
    /// The given `kind` should correspond to the operator observed by the
1012
    /// caller.
1013
    ///
1014
    /// This assumes that the parser is currently positioned at the repetition
1015
    /// operator and advances the parser to the first character after the
1016
    /// operator. (Note that the operator may include a single additional `?`,
1017
    /// which makes the operator ungreedy.)
1018
    ///
1019
    /// The caller should include the concatenation that is being built. The
1020
    /// concatenation returned includes the repetition operator applied to the
1021
    /// last expression in the given concatenation.
1022
    #[inline(never)]
1023
0
    fn parse_uncounted_repetition(
1024
0
        &self,
1025
0
        mut concat: ast::Concat,
1026
0
        kind: ast::RepetitionKind,
1027
0
    ) -> Result<ast::Concat> {
1028
0
        assert!(
1029
0
            self.char() == '?' || self.char() == '*' || self.char() == '+'
1030
        );
1031
0
        let op_start = self.pos();
1032
0
        let ast = match concat.asts.pop() {
1033
0
            Some(ast) => ast,
1034
            None => {
1035
0
                return Err(
1036
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1037
0
                )
1038
            }
1039
        };
1040
0
        match ast {
1041
            Ast::Empty(_) | Ast::Flags(_) => {
1042
0
                return Err(
1043
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044
0
                )
1045
            }
1046
0
            _ => {}
1047
        }
1048
0
        let mut greedy = true;
1049
0
        if self.bump() && self.char() == '?' {
1050
0
            greedy = false;
1051
0
            self.bump();
1052
0
        }
1053
0
        concat.asts.push(Ast::Repetition(ast::Repetition {
1054
0
            span: ast.span().with_end(self.pos()),
1055
0
            op: ast::RepetitionOp {
1056
0
                span: Span::new(op_start, self.pos()),
1057
0
                kind,
1058
0
            },
1059
0
            greedy,
1060
0
            ast: Box::new(ast),
1061
0
        }));
1062
0
        Ok(concat)
1063
0
    }
1064
1065
    /// Parses a counted repetition operation. A counted repetition operator
1066
    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1067
    /// operators.
1068
    ///
1069
    /// This assumes that the parser is currently positioned at the opening `{`
1070
    /// and advances the parser to the first character after the operator.
1071
    /// (Note that the operator may include a single additional `?`, which
1072
    /// makes the operator ungreedy.)
1073
    ///
1074
    /// The caller should include the concatenation that is being built. The
1075
    /// concatenation returned includes the repetition operator applied to the
1076
    /// last expression in the given concatenation.
1077
    #[inline(never)]
1078
0
    fn parse_counted_repetition(
1079
0
        &self,
1080
0
        mut concat: ast::Concat,
1081
0
    ) -> Result<ast::Concat> {
1082
0
        assert!(self.char() == '{');
1083
0
        let start = self.pos();
1084
0
        let ast = match concat.asts.pop() {
1085
0
            Some(ast) => ast,
1086
            None => {
1087
0
                return Err(
1088
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1089
0
                )
1090
            }
1091
        };
1092
0
        match ast {
1093
            Ast::Empty(_) | Ast::Flags(_) => {
1094
0
                return Err(
1095
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096
0
                )
1097
            }
1098
0
            _ => {}
1099
        }
1100
0
        if !self.bump_and_bump_space() {
1101
0
            return Err(self.error(
1102
0
                Span::new(start, self.pos()),
1103
0
                ast::ErrorKind::RepetitionCountUnclosed,
1104
0
            ));
1105
0
        }
1106
0
        let count_start = specialize_err(
1107
0
            self.parse_decimal(),
1108
0
            ast::ErrorKind::DecimalEmpty,
1109
0
            ast::ErrorKind::RepetitionCountDecimalEmpty,
1110
0
        )?;
1111
0
        let mut range = ast::RepetitionRange::Exactly(count_start);
1112
0
        if self.is_eof() {
1113
0
            return Err(self.error(
1114
0
                Span::new(start, self.pos()),
1115
0
                ast::ErrorKind::RepetitionCountUnclosed,
1116
0
            ));
1117
0
        }
1118
0
        if self.char() == ',' {
1119
0
            if !self.bump_and_bump_space() {
1120
0
                return Err(self.error(
1121
0
                    Span::new(start, self.pos()),
1122
0
                    ast::ErrorKind::RepetitionCountUnclosed,
1123
0
                ));
1124
0
            }
1125
0
            if self.char() != '}' {
1126
0
                let count_end = specialize_err(
1127
0
                    self.parse_decimal(),
1128
0
                    ast::ErrorKind::DecimalEmpty,
1129
0
                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1130
0
                )?;
1131
0
                range = ast::RepetitionRange::Bounded(count_start, count_end);
1132
0
            } else {
1133
0
                range = ast::RepetitionRange::AtLeast(count_start);
1134
0
            }
1135
0
        }
1136
0
        if self.is_eof() || self.char() != '}' {
1137
0
            return Err(self.error(
1138
0
                Span::new(start, self.pos()),
1139
0
                ast::ErrorKind::RepetitionCountUnclosed,
1140
0
            ));
1141
0
        }
1142
1143
0
        let mut greedy = true;
1144
0
        if self.bump_and_bump_space() && self.char() == '?' {
1145
0
            greedy = false;
1146
0
            self.bump();
1147
0
        }
1148
1149
0
        let op_span = Span::new(start, self.pos());
1150
0
        if !range.is_valid() {
1151
0
            return Err(
1152
0
                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1153
0
            );
1154
0
        }
1155
0
        concat.asts.push(Ast::Repetition(ast::Repetition {
1156
0
            span: ast.span().with_end(self.pos()),
1157
0
            op: ast::RepetitionOp {
1158
0
                span: op_span,
1159
0
                kind: ast::RepetitionKind::Range(range),
1160
0
            },
1161
0
            greedy,
1162
0
            ast: Box::new(ast),
1163
0
        }));
1164
0
        Ok(concat)
1165
0
    }
1166
1167
    /// Parse a group (which contains a sub-expression) or a set of flags.
1168
    ///
1169
    /// If a group was found, then it is returned with an empty AST. If a set
1170
    /// of flags is found, then that set is returned.
1171
    ///
1172
    /// The parser should be positioned at the opening parenthesis.
1173
    ///
1174
    /// This advances the parser to the character before the start of the
1175
    /// sub-expression (in the case of a group) or to the closing parenthesis
1176
    /// immediately following the set of flags.
1177
    ///
1178
    /// # Errors
1179
    ///
1180
    /// If flags are given and incorrectly specified, then a corresponding
1181
    /// error is returned.
1182
    ///
1183
    /// If a capture name is given and it is incorrectly specified, then a
1184
    /// corresponding error is returned.
1185
    #[inline(never)]
1186
0
    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1187
0
        assert_eq!(self.char(), '(');
1188
0
        let open_span = self.span_char();
1189
0
        self.bump();
1190
0
        self.bump_space();
1191
0
        if self.is_lookaround_prefix() {
1192
0
            return Err(self.error(
1193
0
                Span::new(open_span.start, self.span().end),
1194
0
                ast::ErrorKind::UnsupportedLookAround,
1195
0
            ));
1196
0
        }
1197
0
        let inner_span = self.span();
1198
0
        if self.bump_if("?P<") {
1199
0
            let capture_index = self.next_capture_index(open_span)?;
1200
0
            let cap = self.parse_capture_name(capture_index)?;
1201
0
            Ok(Either::Right(ast::Group {
1202
0
                span: open_span,
1203
0
                kind: ast::GroupKind::CaptureName(cap),
1204
0
                ast: Box::new(Ast::Empty(self.span())),
1205
0
            }))
1206
0
        } else if self.bump_if("?") {
1207
0
            if self.is_eof() {
1208
0
                return Err(
1209
0
                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
1210
0
                );
1211
0
            }
1212
0
            let flags = self.parse_flags()?;
1213
0
            let char_end = self.char();
1214
0
            self.bump();
1215
0
            if char_end == ')' {
1216
                // We don't allow empty flags, e.g., `(?)`. We instead
1217
                // interpret it as a repetition operator missing its argument.
1218
0
                if flags.items.is_empty() {
1219
0
                    return Err(self.error(
1220
0
                        inner_span,
1221
0
                        ast::ErrorKind::RepetitionMissing,
1222
0
                    ));
1223
0
                }
1224
0
                Ok(Either::Left(ast::SetFlags {
1225
0
                    span: Span { end: self.pos(), ..open_span },
1226
0
                    flags,
1227
0
                }))
1228
            } else {
1229
0
                assert_eq!(char_end, ':');
1230
0
                Ok(Either::Right(ast::Group {
1231
0
                    span: open_span,
1232
0
                    kind: ast::GroupKind::NonCapturing(flags),
1233
0
                    ast: Box::new(Ast::Empty(self.span())),
1234
0
                }))
1235
            }
1236
        } else {
1237
0
            let capture_index = self.next_capture_index(open_span)?;
1238
0
            Ok(Either::Right(ast::Group {
1239
0
                span: open_span,
1240
0
                kind: ast::GroupKind::CaptureIndex(capture_index),
1241
0
                ast: Box::new(Ast::Empty(self.span())),
1242
0
            }))
1243
        }
1244
0
    }
1245
1246
    /// Parses a capture group name. Assumes that the parser is positioned at
1247
    /// the first character in the name following the opening `<` (and may
1248
    /// possibly be EOF). This advances the parser to the first character
1249
    /// following the closing `>`.
1250
    ///
1251
    /// The caller must provide the capture index of the group for this name.
1252
    #[inline(never)]
1253
0
    fn parse_capture_name(
1254
0
        &self,
1255
0
        capture_index: u32,
1256
0
    ) -> Result<ast::CaptureName> {
1257
0
        if self.is_eof() {
1258
0
            return Err(self
1259
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1260
0
        }
1261
0
        let start = self.pos();
1262
        loop {
1263
0
            if self.char() == '>' {
1264
0
                break;
1265
0
            }
1266
0
            if !is_capture_char(self.char(), self.pos() == start) {
1267
0
                return Err(self.error(
1268
0
                    self.span_char(),
1269
0
                    ast::ErrorKind::GroupNameInvalid,
1270
0
                ));
1271
0
            }
1272
0
            if !self.bump() {
1273
0
                break;
1274
0
            }
1275
        }
1276
0
        let end = self.pos();
1277
0
        if self.is_eof() {
1278
0
            return Err(self
1279
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1280
0
        }
1281
0
        assert_eq!(self.char(), '>');
1282
0
        self.bump();
1283
0
        let name = &self.pattern()[start.offset..end.offset];
1284
0
        if name.is_empty() {
1285
0
            return Err(self.error(
1286
0
                Span::new(start, start),
1287
0
                ast::ErrorKind::GroupNameEmpty,
1288
0
            ));
1289
0
        }
1290
0
        let capname = ast::CaptureName {
1291
0
            span: Span::new(start, end),
1292
0
            name: name.to_string(),
1293
0
            index: capture_index,
1294
0
        };
1295
0
        self.add_capture_name(&capname)?;
1296
0
        Ok(capname)
1297
0
    }
1298
1299
    /// Parse a sequence of flags starting at the current character.
1300
    ///
1301
    /// This advances the parser to the character immediately following the
1302
    /// flags, which is guaranteed to be either `:` or `)`.
1303
    ///
1304
    /// # Errors
1305
    ///
1306
    /// If any flags are duplicated, then an error is returned.
1307
    ///
1308
    /// If the negation operator is used more than once, then an error is
1309
    /// returned.
1310
    ///
1311
    /// If no flags could be found or if the negation operation is not followed
1312
    /// by any flags, then an error is returned.
1313
    #[inline(never)]
1314
0
    fn parse_flags(&self) -> Result<ast::Flags> {
1315
0
        let mut flags = ast::Flags { span: self.span(), items: vec![] };
1316
0
        let mut last_was_negation = None;
1317
0
        while self.char() != ':' && self.char() != ')' {
1318
0
            if self.char() == '-' {
1319
0
                last_was_negation = Some(self.span_char());
1320
0
                let item = ast::FlagsItem {
1321
0
                    span: self.span_char(),
1322
0
                    kind: ast::FlagsItemKind::Negation,
1323
0
                };
1324
0
                if let Some(i) = flags.add_item(item) {
1325
0
                    return Err(self.error(
1326
0
                        self.span_char(),
1327
0
                        ast::ErrorKind::FlagRepeatedNegation {
1328
0
                            original: flags.items[i].span,
1329
0
                        },
1330
0
                    ));
1331
0
                }
1332
            } else {
1333
0
                last_was_negation = None;
1334
0
                let item = ast::FlagsItem {
1335
0
                    span: self.span_char(),
1336
0
                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1337
                };
1338
0
                if let Some(i) = flags.add_item(item) {
1339
0
                    return Err(self.error(
1340
0
                        self.span_char(),
1341
0
                        ast::ErrorKind::FlagDuplicate {
1342
0
                            original: flags.items[i].span,
1343
0
                        },
1344
0
                    ));
1345
0
                }
1346
            }
1347
0
            if !self.bump() {
1348
0
                return Err(
1349
0
                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1350
0
                );
1351
0
            }
1352
        }
1353
0
        if let Some(span) = last_was_negation {
1354
0
            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1355
0
        }
1356
0
        flags.span.end = self.pos();
1357
0
        Ok(flags)
1358
0
    }
1359
1360
    /// Parse the current character as a flag. Do not advance the parser.
1361
    ///
1362
    /// # Errors
1363
    ///
1364
    /// If the flag is not recognized, then an error is returned.
1365
    #[inline(never)]
1366
0
    fn parse_flag(&self) -> Result<ast::Flag> {
1367
0
        match self.char() {
1368
0
            'i' => Ok(ast::Flag::CaseInsensitive),
1369
0
            'm' => Ok(ast::Flag::MultiLine),
1370
0
            's' => Ok(ast::Flag::DotMatchesNewLine),
1371
0
            'U' => Ok(ast::Flag::SwapGreed),
1372
0
            'u' => Ok(ast::Flag::Unicode),
1373
0
            'x' => Ok(ast::Flag::IgnoreWhitespace),
1374
            _ => {
1375
0
                Err(self
1376
0
                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1377
            }
1378
        }
1379
0
    }
1380
1381
    /// Parse a primitive AST. e.g., A literal, non-set character class or
1382
    /// assertion.
1383
    ///
1384
    /// This assumes that the parser expects a primitive at the current
1385
    /// location. i.e., All other non-primitive cases have been handled.
1386
    /// For example, if the parser's position is at `|`, then `|` will be
1387
    /// treated as a literal (e.g., inside a character class).
1388
    ///
1389
    /// This advances the parser to the first character immediately following
1390
    /// the primitive.
1391
0
    fn parse_primitive(&self) -> Result<Primitive> {
1392
0
        match self.char() {
1393
0
            '\\' => self.parse_escape(),
1394
            '.' => {
1395
0
                let ast = Primitive::Dot(self.span_char());
1396
0
                self.bump();
1397
0
                Ok(ast)
1398
            }
1399
            '^' => {
1400
0
                let ast = Primitive::Assertion(ast::Assertion {
1401
0
                    span: self.span_char(),
1402
0
                    kind: ast::AssertionKind::StartLine,
1403
0
                });
1404
0
                self.bump();
1405
0
                Ok(ast)
1406
            }
1407
            '$' => {
1408
0
                let ast = Primitive::Assertion(ast::Assertion {
1409
0
                    span: self.span_char(),
1410
0
                    kind: ast::AssertionKind::EndLine,
1411
0
                });
1412
0
                self.bump();
1413
0
                Ok(ast)
1414
            }
1415
0
            c => {
1416
0
                let ast = Primitive::Literal(ast::Literal {
1417
0
                    span: self.span_char(),
1418
0
                    kind: ast::LiteralKind::Verbatim,
1419
0
                    c,
1420
0
                });
1421
0
                self.bump();
1422
0
                Ok(ast)
1423
            }
1424
        }
1425
0
    }
1426
1427
    /// Parse an escape sequence as a primitive AST.
1428
    ///
1429
    /// This assumes the parser is positioned at the start of the escape
1430
    /// sequence, i.e., `\`. It advances the parser to the first position
1431
    /// immediately following the escape sequence.
1432
    #[inline(never)]
1433
0
    fn parse_escape(&self) -> Result<Primitive> {
1434
0
        assert_eq!(self.char(), '\\');
1435
0
        let start = self.pos();
1436
0
        if !self.bump() {
1437
0
            return Err(self.error(
1438
0
                Span::new(start, self.pos()),
1439
0
                ast::ErrorKind::EscapeUnexpectedEof,
1440
0
            ));
1441
0
        }
1442
0
        let c = self.char();
1443
        // Put some of the more complicated routines into helpers.
1444
0
        match c {
1445
0
            '0'..='7' => {
1446
0
                if !self.parser().octal {
1447
0
                    return Err(self.error(
1448
0
                        Span::new(start, self.span_char().end),
1449
0
                        ast::ErrorKind::UnsupportedBackreference,
1450
0
                    ));
1451
0
                }
1452
0
                let mut lit = self.parse_octal();
1453
0
                lit.span.start = start;
1454
0
                return Ok(Primitive::Literal(lit));
1455
            }
1456
0
            '8'..='9' if !self.parser().octal => {
1457
0
                return Err(self.error(
1458
0
                    Span::new(start, self.span_char().end),
1459
0
                    ast::ErrorKind::UnsupportedBackreference,
1460
0
                ));
1461
            }
1462
            'x' | 'u' | 'U' => {
1463
0
                let mut lit = self.parse_hex()?;
1464
0
                lit.span.start = start;
1465
0
                return Ok(Primitive::Literal(lit));
1466
            }
1467
            'p' | 'P' => {
1468
0
                let mut cls = self.parse_unicode_class()?;
1469
0
                cls.span.start = start;
1470
0
                return Ok(Primitive::Unicode(cls));
1471
            }
1472
            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1473
0
                let mut cls = self.parse_perl_class();
1474
0
                cls.span.start = start;
1475
0
                return Ok(Primitive::Perl(cls));
1476
            }
1477
0
            _ => {}
1478
        }
1479
1480
        // Handle all of the one letter sequences inline.
1481
0
        self.bump();
1482
0
        let span = Span::new(start, self.pos());
1483
0
        if is_meta_character(c) {
1484
0
            return Ok(Primitive::Literal(ast::Literal {
1485
0
                span,
1486
0
                kind: ast::LiteralKind::Punctuation,
1487
0
                c,
1488
0
            }));
1489
0
        }
1490
0
        let special = |kind, c| {
1491
0
            Ok(Primitive::Literal(ast::Literal {
1492
0
                span,
1493
0
                kind: ast::LiteralKind::Special(kind),
1494
0
                c,
1495
0
            }))
1496
0
        };
1497
0
        match c {
1498
0
            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1499
0
            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1500
0
            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1501
0
            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1502
0
            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1503
0
            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1504
0
            ' ' if self.ignore_whitespace() => {
1505
0
                special(ast::SpecialLiteralKind::Space, ' ')
1506
            }
1507
0
            'A' => Ok(Primitive::Assertion(ast::Assertion {
1508
0
                span,
1509
0
                kind: ast::AssertionKind::StartText,
1510
0
            })),
1511
0
            'z' => Ok(Primitive::Assertion(ast::Assertion {
1512
0
                span,
1513
0
                kind: ast::AssertionKind::EndText,
1514
0
            })),
1515
0
            'b' => Ok(Primitive::Assertion(ast::Assertion {
1516
0
                span,
1517
0
                kind: ast::AssertionKind::WordBoundary,
1518
0
            })),
1519
0
            'B' => Ok(Primitive::Assertion(ast::Assertion {
1520
0
                span,
1521
0
                kind: ast::AssertionKind::NotWordBoundary,
1522
0
            })),
1523
0
            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1524
        }
1525
0
    }
1526
1527
    /// Parse an octal representation of a Unicode codepoint up to 3 digits
1528
    /// long. This expects the parser to be positioned at the first octal
1529
    /// digit and advances the parser to the first character immediately
1530
    /// following the octal number. This also assumes that parsing octal
1531
    /// escapes is enabled.
1532
    ///
1533
    /// Assuming the preconditions are met, this routine can never fail.
1534
    #[inline(never)]
1535
0
    fn parse_octal(&self) -> ast::Literal {
1536
        use std::char;
1537
        use std::u32;
1538
1539
0
        assert!(self.parser().octal);
1540
0
        assert!('0' <= self.char() && self.char() <= '7');
1541
0
        let start = self.pos();
1542
        // Parse up to two more digits.
1543
0
        while self.bump()
1544
0
            && '0' <= self.char()
1545
0
            && self.char() <= '7'
1546
0
            && self.pos().offset - start.offset <= 2
1547
0
        {}
1548
0
        let end = self.pos();
1549
0
        let octal = &self.pattern()[start.offset..end.offset];
1550
        // Parsing the octal should never fail since the above guarantees a
1551
        // valid number.
1552
0
        let codepoint =
1553
0
            u32::from_str_radix(octal, 8).expect("valid octal number");
1554
        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1555
        // invalid Unicode scalar values.
1556
0
        let c = char::from_u32(codepoint).expect("Unicode scalar value");
1557
0
        ast::Literal {
1558
0
            span: Span::new(start, end),
1559
0
            kind: ast::LiteralKind::Octal,
1560
0
            c,
1561
0
        }
1562
0
    }
1563
1564
    /// Parse a hex representation of a Unicode codepoint. This handles both
1565
    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566
    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1567
    /// the first character immediately following the hexadecimal literal.
1568
    #[inline(never)]
1569
0
    fn parse_hex(&self) -> Result<ast::Literal> {
1570
0
        assert!(
1571
0
            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1572
        );
1573
1574
0
        let hex_kind = match self.char() {
1575
0
            'x' => ast::HexLiteralKind::X,
1576
0
            'u' => ast::HexLiteralKind::UnicodeShort,
1577
0
            _ => ast::HexLiteralKind::UnicodeLong,
1578
        };
1579
0
        if !self.bump_and_bump_space() {
1580
0
            return Err(
1581
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1582
0
            );
1583
0
        }
1584
0
        if self.char() == '{' {
1585
0
            self.parse_hex_brace(hex_kind)
1586
        } else {
1587
0
            self.parse_hex_digits(hex_kind)
1588
        }
1589
0
    }
1590
1591
    /// Parse an N-digit hex representation of a Unicode codepoint. This
1592
    /// expects the parser to be positioned at the first digit and will advance
1593
    /// the parser to the first character immediately following the escape
1594
    /// sequence.
1595
    ///
1596
    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1597
    /// or 8 (for `\UNNNNNNNN`).
1598
    #[inline(never)]
1599
0
    fn parse_hex_digits(
1600
0
        &self,
1601
0
        kind: ast::HexLiteralKind,
1602
0
    ) -> Result<ast::Literal> {
1603
        use std::char;
1604
        use std::u32;
1605
1606
0
        let mut scratch = self.parser().scratch.borrow_mut();
1607
0
        scratch.clear();
1608
1609
0
        let start = self.pos();
1610
0
        for i in 0..kind.digits() {
1611
0
            if i > 0 && !self.bump_and_bump_space() {
1612
0
                return Err(self
1613
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1614
0
            }
1615
0
            if !is_hex(self.char()) {
1616
0
                return Err(self.error(
1617
0
                    self.span_char(),
1618
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1619
0
                ));
1620
0
            }
1621
0
            scratch.push(self.char());
1622
        }
1623
        // The final bump just moves the parser past the literal, which may
1624
        // be EOF.
1625
0
        self.bump_and_bump_space();
1626
0
        let end = self.pos();
1627
0
        let hex = scratch.as_str();
1628
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1629
0
            None => Err(self.error(
1630
0
                Span::new(start, end),
1631
0
                ast::ErrorKind::EscapeHexInvalid,
1632
0
            )),
1633
0
            Some(c) => Ok(ast::Literal {
1634
0
                span: Span::new(start, end),
1635
0
                kind: ast::LiteralKind::HexFixed(kind),
1636
0
                c,
1637
0
            }),
1638
        }
1639
0
    }
1640
1641
    /// Parse a hex representation of any Unicode scalar value. This expects
1642
    /// the parser to be positioned at the opening brace `{` and will advance
1643
    /// the parser to the first character following the closing brace `}`.
1644
    #[inline(never)]
1645
0
    fn parse_hex_brace(
1646
0
        &self,
1647
0
        kind: ast::HexLiteralKind,
1648
0
    ) -> Result<ast::Literal> {
1649
        use std::char;
1650
        use std::u32;
1651
1652
0
        let mut scratch = self.parser().scratch.borrow_mut();
1653
0
        scratch.clear();
1654
1655
0
        let brace_pos = self.pos();
1656
0
        let start = self.span_char().end;
1657
0
        while self.bump_and_bump_space() && self.char() != '}' {
1658
0
            if !is_hex(self.char()) {
1659
0
                return Err(self.error(
1660
0
                    self.span_char(),
1661
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1662
0
                ));
1663
0
            }
1664
0
            scratch.push(self.char());
1665
        }
1666
0
        if self.is_eof() {
1667
0
            return Err(self.error(
1668
0
                Span::new(brace_pos, self.pos()),
1669
0
                ast::ErrorKind::EscapeUnexpectedEof,
1670
0
            ));
1671
0
        }
1672
0
        let end = self.pos();
1673
0
        let hex = scratch.as_str();
1674
0
        assert_eq!(self.char(), '}');
1675
0
        self.bump_and_bump_space();
1676
1677
0
        if hex.is_empty() {
1678
0
            return Err(self.error(
1679
0
                Span::new(brace_pos, self.pos()),
1680
0
                ast::ErrorKind::EscapeHexEmpty,
1681
0
            ));
1682
0
        }
1683
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1684
0
            None => Err(self.error(
1685
0
                Span::new(start, end),
1686
0
                ast::ErrorKind::EscapeHexInvalid,
1687
0
            )),
1688
0
            Some(c) => Ok(ast::Literal {
1689
0
                span: Span::new(start, self.pos()),
1690
0
                kind: ast::LiteralKind::HexBrace(kind),
1691
0
                c,
1692
0
            }),
1693
        }
1694
0
    }
1695
1696
    /// Parse a decimal number into a u32 while trimming leading and trailing
1697
    /// whitespace.
1698
    ///
1699
    /// This expects the parser to be positioned at the first position where
1700
    /// a decimal digit could occur. This will advance the parser to the byte
1701
    /// immediately following the last contiguous decimal digit.
1702
    ///
1703
    /// If no decimal digit could be found or if there was a problem parsing
1704
    /// the complete set of digits into a u32, then an error is returned.
1705
0
    fn parse_decimal(&self) -> Result<u32> {
1706
0
        let mut scratch = self.parser().scratch.borrow_mut();
1707
0
        scratch.clear();
1708
1709
0
        while !self.is_eof() && self.char().is_whitespace() {
1710
0
            self.bump();
1711
0
        }
1712
0
        let start = self.pos();
1713
0
        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1714
0
            scratch.push(self.char());
1715
0
            self.bump_and_bump_space();
1716
0
        }
1717
0
        let span = Span::new(start, self.pos());
1718
0
        while !self.is_eof() && self.char().is_whitespace() {
1719
0
            self.bump_and_bump_space();
1720
0
        }
1721
0
        let digits = scratch.as_str();
1722
0
        if digits.is_empty() {
1723
0
            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1724
0
        }
1725
0
        match u32::from_str_radix(digits, 10).ok() {
1726
0
            Some(n) => Ok(n),
1727
0
            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1728
        }
1729
0
    }
1730
1731
    /// Parse a standard character class consisting primarily of characters or
1732
    /// character ranges, but can also contain nested character classes of
1733
    /// any type (sans `.`).
1734
    ///
1735
    /// This assumes the parser is positioned at the opening `[`. If parsing
1736
    /// is successful, then the parser is advanced to the position immediately
1737
    /// following the closing `]`.
1738
    #[inline(never)]
1739
0
    fn parse_set_class(&self) -> Result<ast::Class> {
1740
0
        assert_eq!(self.char(), '[');
1741
1742
0
        let mut union =
1743
0
            ast::ClassSetUnion { span: self.span(), items: vec![] };
1744
        loop {
1745
0
            self.bump_space();
1746
0
            if self.is_eof() {
1747
0
                return Err(self.unclosed_class_error());
1748
0
            }
1749
0
            match self.char() {
1750
                '[' => {
1751
                    // If we've already parsed the opening bracket, then
1752
                    // attempt to treat this as the beginning of an ASCII
1753
                    // class. If ASCII class parsing fails, then the parser
1754
                    // backs up to `[`.
1755
0
                    if !self.parser().stack_class.borrow().is_empty() {
1756
0
                        if let Some(cls) = self.maybe_parse_ascii_class() {
1757
0
                            union.push(ast::ClassSetItem::Ascii(cls));
1758
0
                            continue;
1759
0
                        }
1760
0
                    }
1761
0
                    union = self.push_class_open(union)?;
1762
                }
1763
0
                ']' => match self.pop_class(union)? {
1764
0
                    Either::Left(nested_union) => {
1765
0
                        union = nested_union;
1766
0
                    }
1767
0
                    Either::Right(class) => return Ok(class),
1768
                },
1769
0
                '&' if self.peek() == Some('&') => {
1770
0
                    assert!(self.bump_if("&&"));
1771
0
                    union = self.push_class_op(
1772
0
                        ast::ClassSetBinaryOpKind::Intersection,
1773
0
                        union,
1774
                    );
1775
                }
1776
0
                '-' if self.peek() == Some('-') => {
1777
0
                    assert!(self.bump_if("--"));
1778
0
                    union = self.push_class_op(
1779
0
                        ast::ClassSetBinaryOpKind::Difference,
1780
0
                        union,
1781
                    );
1782
                }
1783
0
                '~' if self.peek() == Some('~') => {
1784
0
                    assert!(self.bump_if("~~"));
1785
0
                    union = self.push_class_op(
1786
0
                        ast::ClassSetBinaryOpKind::SymmetricDifference,
1787
0
                        union,
1788
                    );
1789
                }
1790
                _ => {
1791
0
                    union.push(self.parse_set_class_range()?);
1792
                }
1793
            }
1794
        }
1795
0
    }
1796
1797
    /// Parse a single primitive item in a character class set. The item to
1798
    /// be parsed can either be one of a simple literal character, a range
1799
    /// between two simple literal characters or a "primitive" character
1800
    /// class like \w or \p{Greek}.
1801
    ///
1802
    /// If an invalid escape is found, or if a character class is found where
1803
    /// a simple literal is expected (e.g., in a range), then an error is
1804
    /// returned.
1805
    #[inline(never)]
1806
0
    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1807
0
        let prim1 = self.parse_set_class_item()?;
1808
0
        self.bump_space();
1809
0
        if self.is_eof() {
1810
0
            return Err(self.unclosed_class_error());
1811
0
        }
1812
        // If the next char isn't a `-`, then we don't have a range.
1813
        // There are two exceptions. If the char after a `-` is a `]`, then
1814
        // `-` is interpreted as a literal `-`. Alternatively, if the char
1815
        // after a `-` is a `-`, then `--` corresponds to a "difference"
1816
        // operation.
1817
0
        if self.char() != '-'
1818
0
            || self.peek_space() == Some(']')
1819
0
            || self.peek_space() == Some('-')
1820
        {
1821
0
            return prim1.into_class_set_item(self);
1822
0
        }
1823
        // OK, now we're parsing a range, so bump past the `-` and parse the
1824
        // second half of the range.
1825
0
        if !self.bump_and_bump_space() {
1826
0
            return Err(self.unclosed_class_error());
1827
0
        }
1828
0
        let prim2 = self.parse_set_class_item()?;
1829
0
        let range = ast::ClassSetRange {
1830
0
            span: Span::new(prim1.span().start, prim2.span().end),
1831
0
            start: prim1.into_class_literal(self)?,
1832
0
            end: prim2.into_class_literal(self)?,
1833
        };
1834
0
        if !range.is_valid() {
1835
0
            return Err(
1836
0
                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1837
0
            );
1838
0
        }
1839
0
        Ok(ast::ClassSetItem::Range(range))
1840
0
    }
1841
1842
    /// Parse a single item in a character class as a primitive, where the
1843
    /// primitive either consists of a verbatim literal or a single escape
1844
    /// sequence.
1845
    ///
1846
    /// This assumes the parser is positioned at the beginning of a primitive,
1847
    /// and advances the parser to the first position after the primitive if
1848
    /// successful.
1849
    ///
1850
    /// Note that it is the caller's responsibility to report an error if an
1851
    /// illegal primitive was parsed.
1852
    #[inline(never)]
1853
0
    fn parse_set_class_item(&self) -> Result<Primitive> {
1854
0
        if self.char() == '\\' {
1855
0
            self.parse_escape()
1856
        } else {
1857
0
            let x = Primitive::Literal(ast::Literal {
1858
0
                span: self.span_char(),
1859
0
                kind: ast::LiteralKind::Verbatim,
1860
0
                c: self.char(),
1861
0
            });
1862
0
            self.bump();
1863
0
            Ok(x)
1864
        }
1865
0
    }
1866
1867
    /// Parses the opening of a character class set. This includes the opening
1868
    /// bracket along with `^` if present to indicate negation. This also
1869
    /// starts parsing the opening set of unioned items if applicable, since
1870
    /// there are special rules applied to certain characters in the opening
1871
    /// of a character class. For example, `[^]]` is the class of all
1872
    /// characters not equal to `]`. (`]` would need to be escaped in any other
1873
    /// position.) Similarly for `-`.
1874
    ///
1875
    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1876
    /// empty union. This empty union should be replaced with the actual item
1877
    /// when it is popped from the parser's stack.
1878
    ///
1879
    /// This assumes the parser is positioned at the opening `[` and advances
1880
    /// the parser to the first non-special byte of the character class.
1881
    ///
1882
    /// An error is returned if EOF is found.
1883
    #[inline(never)]
1884
0
    fn parse_set_class_open(
1885
0
        &self,
1886
0
    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1887
0
        assert_eq!(self.char(), '[');
1888
0
        let start = self.pos();
1889
0
        if !self.bump_and_bump_space() {
1890
0
            return Err(self.error(
1891
0
                Span::new(start, self.pos()),
1892
0
                ast::ErrorKind::ClassUnclosed,
1893
0
            ));
1894
0
        }
1895
1896
0
        let negated = if self.char() != '^' {
1897
0
            false
1898
        } else {
1899
0
            if !self.bump_and_bump_space() {
1900
0
                return Err(self.error(
1901
0
                    Span::new(start, self.pos()),
1902
0
                    ast::ErrorKind::ClassUnclosed,
1903
0
                ));
1904
0
            }
1905
0
            true
1906
        };
1907
        // Accept any number of `-` as literal `-`.
1908
0
        let mut union =
1909
0
            ast::ClassSetUnion { span: self.span(), items: vec![] };
1910
0
        while self.char() == '-' {
1911
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
1912
0
                span: self.span_char(),
1913
0
                kind: ast::LiteralKind::Verbatim,
1914
0
                c: '-',
1915
0
            }));
1916
0
            if !self.bump_and_bump_space() {
1917
0
                return Err(self.error(
1918
0
                    Span::new(start, start),
1919
0
                    ast::ErrorKind::ClassUnclosed,
1920
0
                ));
1921
0
            }
1922
        }
1923
        // If `]` is the *first* char in a set, then interpret it as a literal
1924
        // `]`. That is, an empty class is impossible to write.
1925
0
        if union.items.is_empty() && self.char() == ']' {
1926
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
1927
0
                span: self.span_char(),
1928
0
                kind: ast::LiteralKind::Verbatim,
1929
0
                c: ']',
1930
0
            }));
1931
0
            if !self.bump_and_bump_space() {
1932
0
                return Err(self.error(
1933
0
                    Span::new(start, self.pos()),
1934
0
                    ast::ErrorKind::ClassUnclosed,
1935
0
                ));
1936
0
            }
1937
0
        }
1938
0
        let set = ast::ClassBracketed {
1939
0
            span: Span::new(start, self.pos()),
1940
0
            negated,
1941
0
            kind: ast::ClassSet::union(ast::ClassSetUnion {
1942
0
                span: Span::new(union.span.start, union.span.start),
1943
0
                items: vec![],
1944
0
            }),
1945
0
        };
1946
0
        Ok((set, union))
1947
0
    }
1948
1949
    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1950
    ///
1951
    /// This assumes the parser is positioned at the opening `[`.
1952
    ///
1953
    /// If no valid ASCII character class could be found, then this does not
1954
    /// advance the parser and `None` is returned. Otherwise, the parser is
1955
    /// advanced to the first byte following the closing `]` and the
1956
    /// corresponding ASCII class is returned.
1957
    #[inline(never)]
1958
0
    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1959
        // ASCII character classes are interesting from a parsing perspective
1960
        // because parsing cannot fail with any interesting error. For example,
1961
        // in order to use an ASCII character class, it must be enclosed in
1962
        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1963
        // of it as "ASCII character characters have the syntax `[:NAME:]`
1964
        // which can only appear within character brackets." This means that
1965
        // things like `[[:lower:]A]` are legal constructs.
1966
        //
1967
        // However, if one types an incorrect ASCII character class, e.g.,
1968
        // `[[:loower:]]`, then we treat that as a normal nested character
1969
        // class containing the characters `:elorw`. One might argue that we
1970
        // should return an error instead since the repeated colons give away
1971
        // the intent to write an ASCII class. But what if the user typed
1972
        // `[[:lower]]` instead? How can we tell that was intended to be an
1973
        // ASCII class and not just a normal nested class?
1974
        //
1975
        // Reasonable people can probably disagree over this, but for better
1976
        // or worse, we implement semantics that never fails at the expense
1977
        // of better failure modes.
1978
0
        assert_eq!(self.char(), '[');
1979
        // If parsing fails, then we back up the parser to this starting point.
1980
0
        let start = self.pos();
1981
0
        let mut negated = false;
1982
0
        if !self.bump() || self.char() != ':' {
1983
0
            self.parser().pos.set(start);
1984
0
            return None;
1985
0
        }
1986
0
        if !self.bump() {
1987
0
            self.parser().pos.set(start);
1988
0
            return None;
1989
0
        }
1990
0
        if self.char() == '^' {
1991
0
            negated = true;
1992
0
            if !self.bump() {
1993
0
                self.parser().pos.set(start);
1994
0
                return None;
1995
0
            }
1996
0
        }
1997
0
        let name_start = self.offset();
1998
0
        while self.char() != ':' && self.bump() {}
1999
0
        if self.is_eof() {
2000
0
            self.parser().pos.set(start);
2001
0
            return None;
2002
0
        }
2003
0
        let name = &self.pattern()[name_start..self.offset()];
2004
0
        if !self.bump_if(":]") {
2005
0
            self.parser().pos.set(start);
2006
0
            return None;
2007
0
        }
2008
0
        let kind = match ast::ClassAsciiKind::from_name(name) {
2009
0
            Some(kind) => kind,
2010
            None => {
2011
0
                self.parser().pos.set(start);
2012
0
                return None;
2013
            }
2014
        };
2015
0
        Some(ast::ClassAscii {
2016
0
            span: Span::new(start, self.pos()),
2017
0
            kind,
2018
0
            negated,
2019
0
        })
2020
0
    }
2021
2022
    /// Parse a Unicode class in either the single character notation, `\pN`
2023
    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2024
    /// the parser is positioned at the `p` (or `P` for negation) and will
2025
    /// advance the parser to the character immediately following the class.
2026
    ///
2027
    /// Note that this does not check whether the class name is valid or not.
2028
    #[inline(never)]
2029
0
    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2030
0
        assert!(self.char() == 'p' || self.char() == 'P');
2031
2032
0
        let mut scratch = self.parser().scratch.borrow_mut();
2033
0
        scratch.clear();
2034
2035
0
        let negated = self.char() == 'P';
2036
0
        if !self.bump_and_bump_space() {
2037
0
            return Err(
2038
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2039
0
            );
2040
0
        }
2041
0
        let (start, kind) = if self.char() == '{' {
2042
0
            let start = self.span_char().end;
2043
0
            while self.bump_and_bump_space() && self.char() != '}' {
2044
0
                scratch.push(self.char());
2045
0
            }
2046
0
            if self.is_eof() {
2047
0
                return Err(self
2048
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2049
0
            }
2050
0
            assert_eq!(self.char(), '}');
2051
0
            self.bump();
2052
2053
0
            let name = scratch.as_str();
2054
0
            if let Some(i) = name.find("!=") {
2055
0
                (
2056
0
                    start,
2057
0
                    ast::ClassUnicodeKind::NamedValue {
2058
0
                        op: ast::ClassUnicodeOpKind::NotEqual,
2059
0
                        name: name[..i].to_string(),
2060
0
                        value: name[i + 2..].to_string(),
2061
0
                    },
2062
0
                )
2063
0
            } else if let Some(i) = name.find(':') {
2064
0
                (
2065
0
                    start,
2066
0
                    ast::ClassUnicodeKind::NamedValue {
2067
0
                        op: ast::ClassUnicodeOpKind::Colon,
2068
0
                        name: name[..i].to_string(),
2069
0
                        value: name[i + 1..].to_string(),
2070
0
                    },
2071
0
                )
2072
0
            } else if let Some(i) = name.find('=') {
2073
0
                (
2074
0
                    start,
2075
0
                    ast::ClassUnicodeKind::NamedValue {
2076
0
                        op: ast::ClassUnicodeOpKind::Equal,
2077
0
                        name: name[..i].to_string(),
2078
0
                        value: name[i + 1..].to_string(),
2079
0
                    },
2080
0
                )
2081
            } else {
2082
0
                (start, ast::ClassUnicodeKind::Named(name.to_string()))
2083
            }
2084
        } else {
2085
0
            let start = self.pos();
2086
0
            let c = self.char();
2087
0
            if c == '\\' {
2088
0
                return Err(self.error(
2089
0
                    self.span_char(),
2090
0
                    ast::ErrorKind::UnicodeClassInvalid,
2091
0
                ));
2092
0
            }
2093
0
            self.bump_and_bump_space();
2094
0
            let kind = ast::ClassUnicodeKind::OneLetter(c);
2095
0
            (start, kind)
2096
        };
2097
0
        Ok(ast::ClassUnicode {
2098
0
            span: Span::new(start, self.pos()),
2099
0
            negated,
2100
0
            kind,
2101
0
        })
2102
0
    }
2103
2104
    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2105
    /// parser is currently at a valid character class name and will be
2106
    /// advanced to the character immediately following the class.
2107
    #[inline(never)]
2108
0
    fn parse_perl_class(&self) -> ast::ClassPerl {
2109
0
        let c = self.char();
2110
0
        let span = self.span_char();
2111
0
        self.bump();
2112
0
        let (negated, kind) = match c {
2113
0
            'd' => (false, ast::ClassPerlKind::Digit),
2114
0
            'D' => (true, ast::ClassPerlKind::Digit),
2115
0
            's' => (false, ast::ClassPerlKind::Space),
2116
0
            'S' => (true, ast::ClassPerlKind::Space),
2117
0
            'w' => (false, ast::ClassPerlKind::Word),
2118
0
            'W' => (true, ast::ClassPerlKind::Word),
2119
0
            c => panic!("expected valid Perl class but got '{}'", c),
2120
        };
2121
0
        ast::ClassPerl { span, kind, negated }
2122
0
    }
2123
}
2124
2125
/// A type that traverses a fully parsed Ast and checks whether its depth
2126
/// exceeds the specified nesting limit. If it does, then an error is returned.
2127
#[derive(Debug)]
2128
struct NestLimiter<'p, 's, P> {
2129
    /// The parser that is checking the nest limit.
2130
    p: &'p ParserI<'s, P>,
2131
    /// The current depth while walking an Ast.
2132
    depth: u32,
2133
}
2134
2135
impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2136
0
    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2137
0
        NestLimiter { p, depth: 0 }
2138
0
    }
2139
2140
    #[inline(never)]
2141
0
    fn check(self, ast: &Ast) -> Result<()> {
2142
0
        ast::visit(ast, self)
2143
0
    }
2144
2145
0
    fn increment_depth(&mut self, span: &Span) -> Result<()> {
2146
0
        let new = self.depth.checked_add(1).ok_or_else(|| {
2147
0
            self.p.error(
2148
0
                span.clone(),
2149
0
                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2150
            )
2151
0
        })?;
2152
0
        let limit = self.p.parser().nest_limit;
2153
0
        if new > limit {
2154
0
            return Err(self.p.error(
2155
0
                span.clone(),
2156
0
                ast::ErrorKind::NestLimitExceeded(limit),
2157
0
            ));
2158
0
        }
2159
0
        self.depth = new;
2160
0
        Ok(())
2161
0
    }
2162
2163
0
    fn decrement_depth(&mut self) {
2164
        // Assuming the correctness of the visitor, this should never drop
2165
        // below 0.
2166
0
        self.depth = self.depth.checked_sub(1).unwrap();
2167
0
    }
2168
}
2169
2170
impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2171
    type Output = ();
2172
    type Err = ast::Error;
2173
2174
0
    fn finish(self) -> Result<()> {
2175
0
        Ok(())
2176
0
    }
2177
2178
0
    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2179
0
        let span = match *ast {
2180
            Ast::Empty(_)
2181
            | Ast::Flags(_)
2182
            | Ast::Literal(_)
2183
            | Ast::Dot(_)
2184
            | Ast::Assertion(_)
2185
            | Ast::Class(ast::Class::Unicode(_))
2186
            | Ast::Class(ast::Class::Perl(_)) => {
2187
                // These are all base cases, so we don't increment depth.
2188
0
                return Ok(());
2189
            }
2190
0
            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2191
0
            Ast::Repetition(ref x) => &x.span,
2192
0
            Ast::Group(ref x) => &x.span,
2193
0
            Ast::Alternation(ref x) => &x.span,
2194
0
            Ast::Concat(ref x) => &x.span,
2195
        };
2196
0
        self.increment_depth(span)
2197
0
    }
2198
2199
0
    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2200
0
        match *ast {
2201
            Ast::Empty(_)
2202
            | Ast::Flags(_)
2203
            | Ast::Literal(_)
2204
            | Ast::Dot(_)
2205
            | Ast::Assertion(_)
2206
            | Ast::Class(ast::Class::Unicode(_))
2207
            | Ast::Class(ast::Class::Perl(_)) => {
2208
                // These are all base cases, so we don't decrement depth.
2209
0
                Ok(())
2210
            }
2211
            Ast::Class(ast::Class::Bracketed(_))
2212
            | Ast::Repetition(_)
2213
            | Ast::Group(_)
2214
            | Ast::Alternation(_)
2215
            | Ast::Concat(_) => {
2216
0
                self.decrement_depth();
2217
0
                Ok(())
2218
            }
2219
        }
2220
0
    }
2221
2222
0
    fn visit_class_set_item_pre(
2223
0
        &mut self,
2224
0
        ast: &ast::ClassSetItem,
2225
0
    ) -> Result<()> {
2226
0
        let span = match *ast {
2227
            ast::ClassSetItem::Empty(_)
2228
            | ast::ClassSetItem::Literal(_)
2229
            | ast::ClassSetItem::Range(_)
2230
            | ast::ClassSetItem::Ascii(_)
2231
            | ast::ClassSetItem::Unicode(_)
2232
            | ast::ClassSetItem::Perl(_) => {
2233
                // These are all base cases, so we don't increment depth.
2234
0
                return Ok(());
2235
            }
2236
0
            ast::ClassSetItem::Bracketed(ref x) => &x.span,
2237
0
            ast::ClassSetItem::Union(ref x) => &x.span,
2238
        };
2239
0
        self.increment_depth(span)
2240
0
    }
2241
2242
0
    fn visit_class_set_item_post(
2243
0
        &mut self,
2244
0
        ast: &ast::ClassSetItem,
2245
0
    ) -> Result<()> {
2246
0
        match *ast {
2247
            ast::ClassSetItem::Empty(_)
2248
            | ast::ClassSetItem::Literal(_)
2249
            | ast::ClassSetItem::Range(_)
2250
            | ast::ClassSetItem::Ascii(_)
2251
            | ast::ClassSetItem::Unicode(_)
2252
            | ast::ClassSetItem::Perl(_) => {
2253
                // These are all base cases, so we don't decrement depth.
2254
0
                Ok(())
2255
            }
2256
            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2257
0
                self.decrement_depth();
2258
0
                Ok(())
2259
            }
2260
        }
2261
0
    }
2262
2263
0
    fn visit_class_set_binary_op_pre(
2264
0
        &mut self,
2265
0
        ast: &ast::ClassSetBinaryOp,
2266
0
    ) -> Result<()> {
2267
0
        self.increment_depth(&ast.span)
2268
0
    }
2269
2270
0
    fn visit_class_set_binary_op_post(
2271
0
        &mut self,
2272
0
        _ast: &ast::ClassSetBinaryOp,
2273
0
    ) -> Result<()> {
2274
0
        self.decrement_depth();
2275
0
        Ok(())
2276
0
    }
2277
}
2278
2279
/// When the result is an error, transforms the ast::ErrorKind from the source
2280
/// Result into another one. This function is used to return clearer error
2281
/// messages when possible.
2282
0
fn specialize_err<T>(
2283
0
    result: Result<T>,
2284
0
    from: ast::ErrorKind,
2285
0
    to: ast::ErrorKind,
2286
0
) -> Result<T> {
2287
0
    if let Err(e) = result {
2288
0
        if e.kind == from {
2289
0
            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2290
        } else {
2291
0
            Err(e)
2292
        }
2293
    } else {
2294
0
        result
2295
    }
2296
0
}
2297
2298
#[cfg(test)]
2299
mod tests {
2300
    use std::ops::Range;
2301
2302
    use super::{Parser, ParserBuilder, ParserI, Primitive};
2303
    use crate::ast::{self, Ast, Position, Span};
2304
2305
    // Our own assert_eq, which has slightly better formatting (but honestly
2306
    // still kind of crappy).
2307
    macro_rules! assert_eq {
2308
        ($left:expr, $right:expr) => {{
2309
            match (&$left, &$right) {
2310
                (left_val, right_val) => {
2311
                    if !(*left_val == *right_val) {
2312
                        panic!(
2313
                            "assertion failed: `(left == right)`\n\n\
2314
                             left:  `{:?}`\nright: `{:?}`\n\n",
2315
                            left_val, right_val
2316
                        )
2317
                    }
2318
                }
2319
            }
2320
        }};
2321
    }
2322
2323
    // We create these errors to compare with real ast::Errors in the tests.
2324
    // We define equality between TestError and ast::Error to disregard the
2325
    // pattern string in ast::Error, which is annoying to provide in tests.
2326
    #[derive(Clone, Debug)]
2327
    struct TestError {
2328
        span: Span,
2329
        kind: ast::ErrorKind,
2330
    }
2331
2332
    impl PartialEq<ast::Error> for TestError {
2333
        fn eq(&self, other: &ast::Error) -> bool {
2334
            self.span == other.span && self.kind == other.kind
2335
        }
2336
    }
2337
2338
    impl PartialEq<TestError> for ast::Error {
2339
        fn eq(&self, other: &TestError) -> bool {
2340
            self.span == other.span && self.kind == other.kind
2341
        }
2342
    }
2343
2344
    fn s(str: &str) -> String {
2345
        str.to_string()
2346
    }
2347
2348
    fn parser(pattern: &str) -> ParserI<'_, Parser> {
2349
        ParserI::new(Parser::new(), pattern)
2350
    }
2351
2352
    fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2353
        let parser = ParserBuilder::new().octal(true).build();
2354
        ParserI::new(parser, pattern)
2355
    }
2356
2357
    fn parser_nest_limit(
2358
        pattern: &str,
2359
        nest_limit: u32,
2360
    ) -> ParserI<'_, Parser> {
2361
        let p = ParserBuilder::new().nest_limit(nest_limit).build();
2362
        ParserI::new(p, pattern)
2363
    }
2364
2365
    fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2366
        let p = ParserBuilder::new().ignore_whitespace(true).build();
2367
        ParserI::new(p, pattern)
2368
    }
2369
2370
    /// Short alias for creating a new span.
2371
    fn nspan(start: Position, end: Position) -> Span {
2372
        Span::new(start, end)
2373
    }
2374
2375
    /// Short alias for creating a new position.
2376
    fn npos(offset: usize, line: usize, column: usize) -> Position {
2377
        Position::new(offset, line, column)
2378
    }
2379
2380
    /// Create a new span from the given offset range. This assumes a single
2381
    /// line and sets the columns based on the offsets. i.e., This only works
2382
    /// out of the box for ASCII, which is fine for most tests.
2383
    fn span(range: Range<usize>) -> Span {
2384
        let start = Position::new(range.start, 1, range.start + 1);
2385
        let end = Position::new(range.end, 1, range.end + 1);
2386
        Span::new(start, end)
2387
    }
2388
2389
    /// Create a new span for the corresponding byte range in the given string.
2390
    fn span_range(subject: &str, range: Range<usize>) -> Span {
2391
        let start = Position {
2392
            offset: range.start,
2393
            line: 1 + subject[..range.start].matches('\n').count(),
2394
            column: 1 + subject[..range.start]
2395
                .chars()
2396
                .rev()
2397
                .position(|c| c == '\n')
2398
                .unwrap_or(subject[..range.start].chars().count()),
2399
        };
2400
        let end = Position {
2401
            offset: range.end,
2402
            line: 1 + subject[..range.end].matches('\n').count(),
2403
            column: 1 + subject[..range.end]
2404
                .chars()
2405
                .rev()
2406
                .position(|c| c == '\n')
2407
                .unwrap_or(subject[..range.end].chars().count()),
2408
        };
2409
        Span::new(start, end)
2410
    }
2411
2412
    /// Create a verbatim literal starting at the given position.
2413
    fn lit(c: char, start: usize) -> Ast {
2414
        lit_with(c, span(start..start + c.len_utf8()))
2415
    }
2416
2417
    /// Create a punctuation literal starting at the given position.
2418
    fn punct_lit(c: char, span: Span) -> Ast {
2419
        Ast::Literal(ast::Literal {
2420
            span,
2421
            kind: ast::LiteralKind::Punctuation,
2422
            c,
2423
        })
2424
    }
2425
2426
    /// Create a verbatim literal with the given span.
2427
    fn lit_with(c: char, span: Span) -> Ast {
2428
        Ast::Literal(ast::Literal {
2429
            span,
2430
            kind: ast::LiteralKind::Verbatim,
2431
            c,
2432
        })
2433
    }
2434
2435
    /// Create a concatenation with the given range.
2436
    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2437
        concat_with(span(range), asts)
2438
    }
2439
2440
    /// Create a concatenation with the given span.
2441
    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2442
        Ast::Concat(ast::Concat { span, asts })
2443
    }
2444
2445
    /// Create an alternation with the given span.
2446
    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2447
        Ast::Alternation(ast::Alternation { span: span(range), asts })
2448
    }
2449
2450
    /// Create a capturing group with the given span.
2451
    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2452
        Ast::Group(ast::Group {
2453
            span: span(range),
2454
            kind: ast::GroupKind::CaptureIndex(index),
2455
            ast: Box::new(ast),
2456
        })
2457
    }
2458
2459
    /// Create an ast::SetFlags.
2460
    ///
2461
    /// The given pattern should be the full pattern string. The range given
2462
    /// should correspond to the byte offsets where the flag set occurs.
2463
    ///
2464
    /// If negated is true, then the set is interpreted as beginning with a
2465
    /// negation.
2466
    fn flag_set(
2467
        pat: &str,
2468
        range: Range<usize>,
2469
        flag: ast::Flag,
2470
        negated: bool,
2471
    ) -> Ast {
2472
        let mut items = vec![ast::FlagsItem {
2473
            span: span_range(pat, (range.end - 2)..(range.end - 1)),
2474
            kind: ast::FlagsItemKind::Flag(flag),
2475
        }];
2476
        if negated {
2477
            items.insert(
2478
                0,
2479
                ast::FlagsItem {
2480
                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
2481
                    kind: ast::FlagsItemKind::Negation,
2482
                },
2483
            );
2484
        }
2485
        Ast::Flags(ast::SetFlags {
2486
            span: span_range(pat, range.clone()),
2487
            flags: ast::Flags {
2488
                span: span_range(pat, (range.start + 2)..(range.end - 1)),
2489
                items,
2490
            },
2491
        })
2492
    }
2493
2494
    #[test]
2495
    fn parse_nest_limit() {
2496
        // A nest limit of 0 still allows some types of regexes.
2497
        assert_eq!(
2498
            parser_nest_limit("", 0).parse(),
2499
            Ok(Ast::Empty(span(0..0)))
2500
        );
2501
        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2502
2503
        // Test repetition operations, which require one level of nesting.
2504
        assert_eq!(
2505
            parser_nest_limit("a+", 0).parse().unwrap_err(),
2506
            TestError {
2507
                span: span(0..2),
2508
                kind: ast::ErrorKind::NestLimitExceeded(0),
2509
            }
2510
        );
2511
        assert_eq!(
2512
            parser_nest_limit("a+", 1).parse(),
2513
            Ok(Ast::Repetition(ast::Repetition {
2514
                span: span(0..2),
2515
                op: ast::RepetitionOp {
2516
                    span: span(1..2),
2517
                    kind: ast::RepetitionKind::OneOrMore,
2518
                },
2519
                greedy: true,
2520
                ast: Box::new(lit('a', 0)),
2521
            }))
2522
        );
2523
        assert_eq!(
2524
            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2525
            TestError {
2526
                span: span(0..3),
2527
                kind: ast::ErrorKind::NestLimitExceeded(1),
2528
            }
2529
        );
2530
        assert_eq!(
2531
            parser_nest_limit("a+*", 1).parse().unwrap_err(),
2532
            TestError {
2533
                span: span(0..2),
2534
                kind: ast::ErrorKind::NestLimitExceeded(1),
2535
            }
2536
        );
2537
        assert_eq!(
2538
            parser_nest_limit("a+*", 2).parse(),
2539
            Ok(Ast::Repetition(ast::Repetition {
2540
                span: span(0..3),
2541
                op: ast::RepetitionOp {
2542
                    span: span(2..3),
2543
                    kind: ast::RepetitionKind::ZeroOrMore,
2544
                },
2545
                greedy: true,
2546
                ast: Box::new(Ast::Repetition(ast::Repetition {
2547
                    span: span(0..2),
2548
                    op: ast::RepetitionOp {
2549
                        span: span(1..2),
2550
                        kind: ast::RepetitionKind::OneOrMore,
2551
                    },
2552
                    greedy: true,
2553
                    ast: Box::new(lit('a', 0)),
2554
                })),
2555
            }))
2556
        );
2557
2558
        // Test concatenations. A concatenation requires one level of nesting.
2559
        assert_eq!(
2560
            parser_nest_limit("ab", 0).parse().unwrap_err(),
2561
            TestError {
2562
                span: span(0..2),
2563
                kind: ast::ErrorKind::NestLimitExceeded(0),
2564
            }
2565
        );
2566
        assert_eq!(
2567
            parser_nest_limit("ab", 1).parse(),
2568
            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2569
        );
2570
        assert_eq!(
2571
            parser_nest_limit("abc", 1).parse(),
2572
            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2573
        );
2574
2575
        // Test alternations. An alternation requires one level of nesting.
2576
        assert_eq!(
2577
            parser_nest_limit("a|b", 0).parse().unwrap_err(),
2578
            TestError {
2579
                span: span(0..3),
2580
                kind: ast::ErrorKind::NestLimitExceeded(0),
2581
            }
2582
        );
2583
        assert_eq!(
2584
            parser_nest_limit("a|b", 1).parse(),
2585
            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2586
        );
2587
        assert_eq!(
2588
            parser_nest_limit("a|b|c", 1).parse(),
2589
            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2590
        );
2591
2592
        // Test character classes. Classes form their own mini-recursive
2593
        // syntax!
2594
        assert_eq!(
2595
            parser_nest_limit("[a]", 0).parse().unwrap_err(),
2596
            TestError {
2597
                span: span(0..3),
2598
                kind: ast::ErrorKind::NestLimitExceeded(0),
2599
            }
2600
        );
2601
        assert_eq!(
2602
            parser_nest_limit("[a]", 1).parse(),
2603
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2604
                span: span(0..3),
2605
                negated: false,
2606
                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2607
                    ast::Literal {
2608
                        span: span(1..2),
2609
                        kind: ast::LiteralKind::Verbatim,
2610
                        c: 'a',
2611
                    }
2612
                )),
2613
            })))
2614
        );
2615
        assert_eq!(
2616
            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2617
            TestError {
2618
                span: span(1..3),
2619
                kind: ast::ErrorKind::NestLimitExceeded(1),
2620
            }
2621
        );
2622
        assert_eq!(
2623
            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2624
            TestError {
2625
                span: span(3..7),
2626
                kind: ast::ErrorKind::NestLimitExceeded(2),
2627
            }
2628
        );
2629
        assert_eq!(
2630
            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2631
            TestError {
2632
                span: span(4..6),
2633
                kind: ast::ErrorKind::NestLimitExceeded(3),
2634
            }
2635
        );
2636
        assert_eq!(
2637
            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2638
            TestError {
2639
                span: span(1..5),
2640
                kind: ast::ErrorKind::NestLimitExceeded(1),
2641
            }
2642
        );
2643
        assert_eq!(
2644
            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2645
            TestError {
2646
                span: span(4..6),
2647
                kind: ast::ErrorKind::NestLimitExceeded(2),
2648
            }
2649
        );
2650
    }
2651
2652
    #[test]
2653
    fn parse_comments() {
2654
        let pat = "(?x)
2655
# This is comment 1.
2656
foo # This is comment 2.
2657
  # This is comment 3.
2658
bar
2659
# This is comment 4.";
2660
        let astc = parser(pat).parse_with_comments().unwrap();
2661
        assert_eq!(
2662
            astc.ast,
2663
            concat_with(
2664
                span_range(pat, 0..pat.len()),
2665
                vec![
2666
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2667
                    lit_with('f', span_range(pat, 26..27)),
2668
                    lit_with('o', span_range(pat, 27..28)),
2669
                    lit_with('o', span_range(pat, 28..29)),
2670
                    lit_with('b', span_range(pat, 74..75)),
2671
                    lit_with('a', span_range(pat, 75..76)),
2672
                    lit_with('r', span_range(pat, 76..77)),
2673
                ]
2674
            )
2675
        );
2676
        assert_eq!(
2677
            astc.comments,
2678
            vec![
2679
                ast::Comment {
2680
                    span: span_range(pat, 5..26),
2681
                    comment: s(" This is comment 1."),
2682
                },
2683
                ast::Comment {
2684
                    span: span_range(pat, 30..51),
2685
                    comment: s(" This is comment 2."),
2686
                },
2687
                ast::Comment {
2688
                    span: span_range(pat, 53..74),
2689
                    comment: s(" This is comment 3."),
2690
                },
2691
                ast::Comment {
2692
                    span: span_range(pat, 78..98),
2693
                    comment: s(" This is comment 4."),
2694
                },
2695
            ]
2696
        );
2697
    }
2698
2699
    #[test]
2700
    fn parse_holistic() {
2701
        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2702
        assert_eq!(
2703
            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2704
            Ok(concat(
2705
                0..36,
2706
                vec![
2707
                    punct_lit('\\', span(0..2)),
2708
                    punct_lit('.', span(2..4)),
2709
                    punct_lit('+', span(4..6)),
2710
                    punct_lit('*', span(6..8)),
2711
                    punct_lit('?', span(8..10)),
2712
                    punct_lit('(', span(10..12)),
2713
                    punct_lit(')', span(12..14)),
2714
                    punct_lit('|', span(14..16)),
2715
                    punct_lit('[', span(16..18)),
2716
                    punct_lit(']', span(18..20)),
2717
                    punct_lit('{', span(20..22)),
2718
                    punct_lit('}', span(22..24)),
2719
                    punct_lit('^', span(24..26)),
2720
                    punct_lit('$', span(26..28)),
2721
                    punct_lit('#', span(28..30)),
2722
                    punct_lit('&', span(30..32)),
2723
                    punct_lit('-', span(32..34)),
2724
                    punct_lit('~', span(34..36)),
2725
                ]
2726
            ))
2727
        );
2728
    }
2729
2730
    #[test]
2731
    fn parse_ignore_whitespace() {
2732
        // Test that basic whitespace insensitivity works.
2733
        let pat = "(?x)a b";
2734
        assert_eq!(
2735
            parser(pat).parse(),
2736
            Ok(concat_with(
2737
                nspan(npos(0, 1, 1), npos(7, 1, 8)),
2738
                vec![
2739
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2740
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2741
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2742
                ]
2743
            ))
2744
        );
2745
2746
        // Test that we can toggle whitespace insensitivity.
2747
        let pat = "(?x)a b(?-x)a b";
2748
        assert_eq!(
2749
            parser(pat).parse(),
2750
            Ok(concat_with(
2751
                nspan(npos(0, 1, 1), npos(15, 1, 16)),
2752
                vec![
2753
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2754
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2755
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2756
                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2757
                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2758
                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2759
                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2760
                ]
2761
            ))
2762
        );
2763
2764
        // Test that nesting whitespace insensitive flags works.
2765
        let pat = "a (?x:a )a ";
2766
        assert_eq!(
2767
            parser(pat).parse(),
2768
            Ok(concat_with(
2769
                span_range(pat, 0..11),
2770
                vec![
2771
                    lit_with('a', span_range(pat, 0..1)),
2772
                    lit_with(' ', span_range(pat, 1..2)),
2773
                    Ast::Group(ast::Group {
2774
                        span: span_range(pat, 2..9),
2775
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2776
                            span: span_range(pat, 4..5),
2777
                            items: vec![ast::FlagsItem {
2778
                                span: span_range(pat, 4..5),
2779
                                kind: ast::FlagsItemKind::Flag(
2780
                                    ast::Flag::IgnoreWhitespace
2781
                                ),
2782
                            },],
2783
                        }),
2784
                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2785
                    }),
2786
                    lit_with('a', span_range(pat, 9..10)),
2787
                    lit_with(' ', span_range(pat, 10..11)),
2788
                ]
2789
            ))
2790
        );
2791
2792
        // Test that whitespace after an opening paren is insignificant.
2793
        let pat = "(?x)( ?P<foo> a )";
2794
        assert_eq!(
2795
            parser(pat).parse(),
2796
            Ok(concat_with(
2797
                span_range(pat, 0..pat.len()),
2798
                vec![
2799
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2800
                    Ast::Group(ast::Group {
2801
                        span: span_range(pat, 4..pat.len()),
2802
                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
2803
                            span: span_range(pat, 9..12),
2804
                            name: s("foo"),
2805
                            index: 1,
2806
                        }),
2807
                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2808
                    }),
2809
                ]
2810
            ))
2811
        );
2812
        let pat = "(?x)(  a )";
2813
        assert_eq!(
2814
            parser(pat).parse(),
2815
            Ok(concat_with(
2816
                span_range(pat, 0..pat.len()),
2817
                vec![
2818
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2819
                    Ast::Group(ast::Group {
2820
                        span: span_range(pat, 4..pat.len()),
2821
                        kind: ast::GroupKind::CaptureIndex(1),
2822
                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2823
                    }),
2824
                ]
2825
            ))
2826
        );
2827
        let pat = "(?x)(  ?:  a )";
2828
        assert_eq!(
2829
            parser(pat).parse(),
2830
            Ok(concat_with(
2831
                span_range(pat, 0..pat.len()),
2832
                vec![
2833
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2834
                    Ast::Group(ast::Group {
2835
                        span: span_range(pat, 4..pat.len()),
2836
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2837
                            span: span_range(pat, 8..8),
2838
                            items: vec![],
2839
                        }),
2840
                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2841
                    }),
2842
                ]
2843
            ))
2844
        );
2845
        let pat = r"(?x)\x { 53 }";
2846
        assert_eq!(
2847
            parser(pat).parse(),
2848
            Ok(concat_with(
2849
                span_range(pat, 0..pat.len()),
2850
                vec![
2851
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2852
                    Ast::Literal(ast::Literal {
2853
                        span: span(4..13),
2854
                        kind: ast::LiteralKind::HexBrace(
2855
                            ast::HexLiteralKind::X
2856
                        ),
2857
                        c: 'S',
2858
                    }),
2859
                ]
2860
            ))
2861
        );
2862
2863
        // Test that whitespace after an escape is OK.
2864
        let pat = r"(?x)\ ";
2865
        assert_eq!(
2866
            parser(pat).parse(),
2867
            Ok(concat_with(
2868
                span_range(pat, 0..pat.len()),
2869
                vec![
2870
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2871
                    Ast::Literal(ast::Literal {
2872
                        span: span_range(pat, 4..6),
2873
                        kind: ast::LiteralKind::Special(
2874
                            ast::SpecialLiteralKind::Space
2875
                        ),
2876
                        c: ' ',
2877
                    }),
2878
                ]
2879
            ))
2880
        );
2881
        // ... but only when `x` mode is enabled.
2882
        let pat = r"\ ";
2883
        assert_eq!(
2884
            parser(pat).parse().unwrap_err(),
2885
            TestError {
2886
                span: span_range(pat, 0..2),
2887
                kind: ast::ErrorKind::EscapeUnrecognized,
2888
            }
2889
        );
2890
    }
2891
2892
    #[test]
2893
    fn parse_newlines() {
2894
        let pat = ".\n.";
2895
        assert_eq!(
2896
            parser(pat).parse(),
2897
            Ok(concat_with(
2898
                span_range(pat, 0..3),
2899
                vec![
2900
                    Ast::Dot(span_range(pat, 0..1)),
2901
                    lit_with('\n', span_range(pat, 1..2)),
2902
                    Ast::Dot(span_range(pat, 2..3)),
2903
                ]
2904
            ))
2905
        );
2906
2907
        let pat = "foobar\nbaz\nquux\n";
2908
        assert_eq!(
2909
            parser(pat).parse(),
2910
            Ok(concat_with(
2911
                span_range(pat, 0..pat.len()),
2912
                vec![
2913
                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2914
                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2915
                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2916
                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2917
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2918
                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2919
                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2920
                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2921
                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2922
                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2923
                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2924
                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2925
                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2926
                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2927
                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2928
                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2929
                ]
2930
            ))
2931
        );
2932
    }
2933
2934
    #[test]
2935
    fn parse_uncounted_repetition() {
2936
        assert_eq!(
2937
            parser(r"a*").parse(),
2938
            Ok(Ast::Repetition(ast::Repetition {
2939
                span: span(0..2),
2940
                op: ast::RepetitionOp {
2941
                    span: span(1..2),
2942
                    kind: ast::RepetitionKind::ZeroOrMore,
2943
                },
2944
                greedy: true,
2945
                ast: Box::new(lit('a', 0)),
2946
            }))
2947
        );
2948
        assert_eq!(
2949
            parser(r"a+").parse(),
2950
            Ok(Ast::Repetition(ast::Repetition {
2951
                span: span(0..2),
2952
                op: ast::RepetitionOp {
2953
                    span: span(1..2),
2954
                    kind: ast::RepetitionKind::OneOrMore,
2955
                },
2956
                greedy: true,
2957
                ast: Box::new(lit('a', 0)),
2958
            }))
2959
        );
2960
2961
        assert_eq!(
2962
            parser(r"a?").parse(),
2963
            Ok(Ast::Repetition(ast::Repetition {
2964
                span: span(0..2),
2965
                op: ast::RepetitionOp {
2966
                    span: span(1..2),
2967
                    kind: ast::RepetitionKind::ZeroOrOne,
2968
                },
2969
                greedy: true,
2970
                ast: Box::new(lit('a', 0)),
2971
            }))
2972
        );
2973
        assert_eq!(
2974
            parser(r"a??").parse(),
2975
            Ok(Ast::Repetition(ast::Repetition {
2976
                span: span(0..3),
2977
                op: ast::RepetitionOp {
2978
                    span: span(1..3),
2979
                    kind: ast::RepetitionKind::ZeroOrOne,
2980
                },
2981
                greedy: false,
2982
                ast: Box::new(lit('a', 0)),
2983
            }))
2984
        );
2985
        assert_eq!(
2986
            parser(r"a?").parse(),
2987
            Ok(Ast::Repetition(ast::Repetition {
2988
                span: span(0..2),
2989
                op: ast::RepetitionOp {
2990
                    span: span(1..2),
2991
                    kind: ast::RepetitionKind::ZeroOrOne,
2992
                },
2993
                greedy: true,
2994
                ast: Box::new(lit('a', 0)),
2995
            }))
2996
        );
2997
        assert_eq!(
2998
            parser(r"a?b").parse(),
2999
            Ok(concat(
3000
                0..3,
3001
                vec![
3002
                    Ast::Repetition(ast::Repetition {
3003
                        span: span(0..2),
3004
                        op: ast::RepetitionOp {
3005
                            span: span(1..2),
3006
                            kind: ast::RepetitionKind::ZeroOrOne,
3007
                        },
3008
                        greedy: true,
3009
                        ast: Box::new(lit('a', 0)),
3010
                    }),
3011
                    lit('b', 2),
3012
                ]
3013
            ))
3014
        );
3015
        assert_eq!(
3016
            parser(r"a??b").parse(),
3017
            Ok(concat(
3018
                0..4,
3019
                vec![
3020
                    Ast::Repetition(ast::Repetition {
3021
                        span: span(0..3),
3022
                        op: ast::RepetitionOp {
3023
                            span: span(1..3),
3024
                            kind: ast::RepetitionKind::ZeroOrOne,
3025
                        },
3026
                        greedy: false,
3027
                        ast: Box::new(lit('a', 0)),
3028
                    }),
3029
                    lit('b', 3),
3030
                ]
3031
            ))
3032
        );
3033
        assert_eq!(
3034
            parser(r"ab?").parse(),
3035
            Ok(concat(
3036
                0..3,
3037
                vec![
3038
                    lit('a', 0),
3039
                    Ast::Repetition(ast::Repetition {
3040
                        span: span(1..3),
3041
                        op: ast::RepetitionOp {
3042
                            span: span(2..3),
3043
                            kind: ast::RepetitionKind::ZeroOrOne,
3044
                        },
3045
                        greedy: true,
3046
                        ast: Box::new(lit('b', 1)),
3047
                    }),
3048
                ]
3049
            ))
3050
        );
3051
        assert_eq!(
3052
            parser(r"(ab)?").parse(),
3053
            Ok(Ast::Repetition(ast::Repetition {
3054
                span: span(0..5),
3055
                op: ast::RepetitionOp {
3056
                    span: span(4..5),
3057
                    kind: ast::RepetitionKind::ZeroOrOne,
3058
                },
3059
                greedy: true,
3060
                ast: Box::new(group(
3061
                    0..4,
3062
                    1,
3063
                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
3064
                )),
3065
            }))
3066
        );
3067
        assert_eq!(
3068
            parser(r"|a?").parse(),
3069
            Ok(alt(
3070
                0..3,
3071
                vec![
3072
                    Ast::Empty(span(0..0)),
3073
                    Ast::Repetition(ast::Repetition {
3074
                        span: span(1..3),
3075
                        op: ast::RepetitionOp {
3076
                            span: span(2..3),
3077
                            kind: ast::RepetitionKind::ZeroOrOne,
3078
                        },
3079
                        greedy: true,
3080
                        ast: Box::new(lit('a', 1)),
3081
                    }),
3082
                ]
3083
            ))
3084
        );
3085
3086
        assert_eq!(
3087
            parser(r"*").parse().unwrap_err(),
3088
            TestError {
3089
                span: span(0..0),
3090
                kind: ast::ErrorKind::RepetitionMissing,
3091
            }
3092
        );
3093
        assert_eq!(
3094
            parser(r"(?i)*").parse().unwrap_err(),
3095
            TestError {
3096
                span: span(4..4),
3097
                kind: ast::ErrorKind::RepetitionMissing,
3098
            }
3099
        );
3100
        assert_eq!(
3101
            parser(r"(*)").parse().unwrap_err(),
3102
            TestError {
3103
                span: span(1..1),
3104
                kind: ast::ErrorKind::RepetitionMissing,
3105
            }
3106
        );
3107
        assert_eq!(
3108
            parser(r"(?:?)").parse().unwrap_err(),
3109
            TestError {
3110
                span: span(3..3),
3111
                kind: ast::ErrorKind::RepetitionMissing,
3112
            }
3113
        );
3114
        assert_eq!(
3115
            parser(r"+").parse().unwrap_err(),
3116
            TestError {
3117
                span: span(0..0),
3118
                kind: ast::ErrorKind::RepetitionMissing,
3119
            }
3120
        );
3121
        assert_eq!(
3122
            parser(r"?").parse().unwrap_err(),
3123
            TestError {
3124
                span: span(0..0),
3125
                kind: ast::ErrorKind::RepetitionMissing,
3126
            }
3127
        );
3128
        assert_eq!(
3129
            parser(r"(?)").parse().unwrap_err(),
3130
            TestError {
3131
                span: span(1..1),
3132
                kind: ast::ErrorKind::RepetitionMissing,
3133
            }
3134
        );
3135
        assert_eq!(
3136
            parser(r"|*").parse().unwrap_err(),
3137
            TestError {
3138
                span: span(1..1),
3139
                kind: ast::ErrorKind::RepetitionMissing,
3140
            }
3141
        );
3142
        assert_eq!(
3143
            parser(r"|+").parse().unwrap_err(),
3144
            TestError {
3145
                span: span(1..1),
3146
                kind: ast::ErrorKind::RepetitionMissing,
3147
            }
3148
        );
3149
        assert_eq!(
3150
            parser(r"|?").parse().unwrap_err(),
3151
            TestError {
3152
                span: span(1..1),
3153
                kind: ast::ErrorKind::RepetitionMissing,
3154
            }
3155
        );
3156
    }
3157
3158
    #[test]
3159
    fn parse_counted_repetition() {
3160
        assert_eq!(
3161
            parser(r"a{5}").parse(),
3162
            Ok(Ast::Repetition(ast::Repetition {
3163
                span: span(0..4),
3164
                op: ast::RepetitionOp {
3165
                    span: span(1..4),
3166
                    kind: ast::RepetitionKind::Range(
3167
                        ast::RepetitionRange::Exactly(5)
3168
                    ),
3169
                },
3170
                greedy: true,
3171
                ast: Box::new(lit('a', 0)),
3172
            }))
3173
        );
3174
        assert_eq!(
3175
            parser(r"a{5,}").parse(),
3176
            Ok(Ast::Repetition(ast::Repetition {
3177
                span: span(0..5),
3178
                op: ast::RepetitionOp {
3179
                    span: span(1..5),
3180
                    kind: ast::RepetitionKind::Range(
3181
                        ast::RepetitionRange::AtLeast(5)
3182
                    ),
3183
                },
3184
                greedy: true,
3185
                ast: Box::new(lit('a', 0)),
3186
            }))
3187
        );
3188
        assert_eq!(
3189
            parser(r"a{5,9}").parse(),
3190
            Ok(Ast::Repetition(ast::Repetition {
3191
                span: span(0..6),
3192
                op: ast::RepetitionOp {
3193
                    span: span(1..6),
3194
                    kind: ast::RepetitionKind::Range(
3195
                        ast::RepetitionRange::Bounded(5, 9)
3196
                    ),
3197
                },
3198
                greedy: true,
3199
                ast: Box::new(lit('a', 0)),
3200
            }))
3201
        );
3202
        assert_eq!(
3203
            parser(r"a{5}?").parse(),
3204
            Ok(Ast::Repetition(ast::Repetition {
3205
                span: span(0..5),
3206
                op: ast::RepetitionOp {
3207
                    span: span(1..5),
3208
                    kind: ast::RepetitionKind::Range(
3209
                        ast::RepetitionRange::Exactly(5)
3210
                    ),
3211
                },
3212
                greedy: false,
3213
                ast: Box::new(lit('a', 0)),
3214
            }))
3215
        );
3216
        assert_eq!(
3217
            parser(r"ab{5}").parse(),
3218
            Ok(concat(
3219
                0..5,
3220
                vec![
3221
                    lit('a', 0),
3222
                    Ast::Repetition(ast::Repetition {
3223
                        span: span(1..5),
3224
                        op: ast::RepetitionOp {
3225
                            span: span(2..5),
3226
                            kind: ast::RepetitionKind::Range(
3227
                                ast::RepetitionRange::Exactly(5)
3228
                            ),
3229
                        },
3230
                        greedy: true,
3231
                        ast: Box::new(lit('b', 1)),
3232
                    }),
3233
                ]
3234
            ))
3235
        );
3236
        assert_eq!(
3237
            parser(r"ab{5}c").parse(),
3238
            Ok(concat(
3239
                0..6,
3240
                vec![
3241
                    lit('a', 0),
3242
                    Ast::Repetition(ast::Repetition {
3243
                        span: span(1..5),
3244
                        op: ast::RepetitionOp {
3245
                            span: span(2..5),
3246
                            kind: ast::RepetitionKind::Range(
3247
                                ast::RepetitionRange::Exactly(5)
3248
                            ),
3249
                        },
3250
                        greedy: true,
3251
                        ast: Box::new(lit('b', 1)),
3252
                    }),
3253
                    lit('c', 5),
3254
                ]
3255
            ))
3256
        );
3257
3258
        assert_eq!(
3259
            parser(r"a{ 5 }").parse(),
3260
            Ok(Ast::Repetition(ast::Repetition {
3261
                span: span(0..6),
3262
                op: ast::RepetitionOp {
3263
                    span: span(1..6),
3264
                    kind: ast::RepetitionKind::Range(
3265
                        ast::RepetitionRange::Exactly(5)
3266
                    ),
3267
                },
3268
                greedy: true,
3269
                ast: Box::new(lit('a', 0)),
3270
            }))
3271
        );
3272
        assert_eq!(
3273
            parser(r"a{ 5 , 9 }").parse(),
3274
            Ok(Ast::Repetition(ast::Repetition {
3275
                span: span(0..10),
3276
                op: ast::RepetitionOp {
3277
                    span: span(1..10),
3278
                    kind: ast::RepetitionKind::Range(
3279
                        ast::RepetitionRange::Bounded(5, 9)
3280
                    ),
3281
                },
3282
                greedy: true,
3283
                ast: Box::new(lit('a', 0)),
3284
            }))
3285
        );
3286
        assert_eq!(
3287
            parser_ignore_whitespace(r"a{5,9} ?").parse(),
3288
            Ok(Ast::Repetition(ast::Repetition {
3289
                span: span(0..8),
3290
                op: ast::RepetitionOp {
3291
                    span: span(1..8),
3292
                    kind: ast::RepetitionKind::Range(
3293
                        ast::RepetitionRange::Bounded(5, 9)
3294
                    ),
3295
                },
3296
                greedy: false,
3297
                ast: Box::new(lit('a', 0)),
3298
            }))
3299
        );
3300
3301
        assert_eq!(
3302
            parser(r"(?i){0}").parse().unwrap_err(),
3303
            TestError {
3304
                span: span(4..4),
3305
                kind: ast::ErrorKind::RepetitionMissing,
3306
            }
3307
        );
3308
        assert_eq!(
3309
            parser(r"(?m){1,1}").parse().unwrap_err(),
3310
            TestError {
3311
                span: span(4..4),
3312
                kind: ast::ErrorKind::RepetitionMissing,
3313
            }
3314
        );
3315
        assert_eq!(
3316
            parser(r"a{]}").parse().unwrap_err(),
3317
            TestError {
3318
                span: span(2..2),
3319
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3320
            }
3321
        );
3322
        assert_eq!(
3323
            parser(r"a{1,]}").parse().unwrap_err(),
3324
            TestError {
3325
                span: span(4..4),
3326
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3327
            }
3328
        );
3329
        assert_eq!(
3330
            parser(r"a{").parse().unwrap_err(),
3331
            TestError {
3332
                span: span(1..2),
3333
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3334
            }
3335
        );
3336
        assert_eq!(
3337
            parser(r"a{}").parse().unwrap_err(),
3338
            TestError {
3339
                span: span(2..2),
3340
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3341
            }
3342
        );
3343
        assert_eq!(
3344
            parser(r"a{a").parse().unwrap_err(),
3345
            TestError {
3346
                span: span(2..2),
3347
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3348
            }
3349
        );
3350
        assert_eq!(
3351
            parser(r"a{9999999999}").parse().unwrap_err(),
3352
            TestError {
3353
                span: span(2..12),
3354
                kind: ast::ErrorKind::DecimalInvalid,
3355
            }
3356
        );
3357
        assert_eq!(
3358
            parser(r"a{9").parse().unwrap_err(),
3359
            TestError {
3360
                span: span(1..3),
3361
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3362
            }
3363
        );
3364
        assert_eq!(
3365
            parser(r"a{9,a").parse().unwrap_err(),
3366
            TestError {
3367
                span: span(4..4),
3368
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3369
            }
3370
        );
3371
        assert_eq!(
3372
            parser(r"a{9,9999999999}").parse().unwrap_err(),
3373
            TestError {
3374
                span: span(4..14),
3375
                kind: ast::ErrorKind::DecimalInvalid,
3376
            }
3377
        );
3378
        assert_eq!(
3379
            parser(r"a{9,").parse().unwrap_err(),
3380
            TestError {
3381
                span: span(1..4),
3382
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3383
            }
3384
        );
3385
        assert_eq!(
3386
            parser(r"a{9,11").parse().unwrap_err(),
3387
            TestError {
3388
                span: span(1..6),
3389
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3390
            }
3391
        );
3392
        assert_eq!(
3393
            parser(r"a{2,1}").parse().unwrap_err(),
3394
            TestError {
3395
                span: span(1..6),
3396
                kind: ast::ErrorKind::RepetitionCountInvalid,
3397
            }
3398
        );
3399
        assert_eq!(
3400
            parser(r"{5}").parse().unwrap_err(),
3401
            TestError {
3402
                span: span(0..0),
3403
                kind: ast::ErrorKind::RepetitionMissing,
3404
            }
3405
        );
3406
        assert_eq!(
3407
            parser(r"|{5}").parse().unwrap_err(),
3408
            TestError {
3409
                span: span(1..1),
3410
                kind: ast::ErrorKind::RepetitionMissing,
3411
            }
3412
        );
3413
    }
3414
3415
    #[test]
3416
    fn parse_alternate() {
3417
        assert_eq!(
3418
            parser(r"a|b").parse(),
3419
            Ok(Ast::Alternation(ast::Alternation {
3420
                span: span(0..3),
3421
                asts: vec![lit('a', 0), lit('b', 2)],
3422
            }))
3423
        );
3424
        assert_eq!(
3425
            parser(r"(a|b)").parse(),
3426
            Ok(group(
3427
                0..5,
3428
                1,
3429
                Ast::Alternation(ast::Alternation {
3430
                    span: span(1..4),
3431
                    asts: vec![lit('a', 1), lit('b', 3)],
3432
                })
3433
            ))
3434
        );
3435
3436
        assert_eq!(
3437
            parser(r"a|b|c").parse(),
3438
            Ok(Ast::Alternation(ast::Alternation {
3439
                span: span(0..5),
3440
                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3441
            }))
3442
        );
3443
        assert_eq!(
3444
            parser(r"ax|by|cz").parse(),
3445
            Ok(Ast::Alternation(ast::Alternation {
3446
                span: span(0..8),
3447
                asts: vec![
3448
                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3449
                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3450
                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3451
                ],
3452
            }))
3453
        );
3454
        assert_eq!(
3455
            parser(r"(ax|by|cz)").parse(),
3456
            Ok(group(
3457
                0..10,
3458
                1,
3459
                Ast::Alternation(ast::Alternation {
3460
                    span: span(1..9),
3461
                    asts: vec![
3462
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3463
                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3464
                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3465
                    ],
3466
                })
3467
            ))
3468
        );
3469
        assert_eq!(
3470
            parser(r"(ax|(by|(cz)))").parse(),
3471
            Ok(group(
3472
                0..14,
3473
                1,
3474
                alt(
3475
                    1..13,
3476
                    vec![
3477
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3478
                        group(
3479
                            4..13,
3480
                            2,
3481
                            alt(
3482
                                5..12,
3483
                                vec![
3484
                                    concat(
3485
                                        5..7,
3486
                                        vec![lit('b', 5), lit('y', 6)]
3487
                                    ),
3488
                                    group(
3489
                                        8..12,
3490
                                        3,
3491
                                        concat(
3492
                                            9..11,
3493
                                            vec![lit('c', 9), lit('z', 10),]
3494
                                        )
3495
                                    ),
3496
                                ]
3497
                            )
3498
                        ),
3499
                    ]
3500
                )
3501
            ))
3502
        );
3503
3504
        assert_eq!(
3505
            parser(r"|").parse(),
3506
            Ok(alt(
3507
                0..1,
3508
                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3509
            ))
3510
        );
3511
        assert_eq!(
3512
            parser(r"||").parse(),
3513
            Ok(alt(
3514
                0..2,
3515
                vec![
3516
                    Ast::Empty(span(0..0)),
3517
                    Ast::Empty(span(1..1)),
3518
                    Ast::Empty(span(2..2)),
3519
                ]
3520
            ))
3521
        );
3522
        assert_eq!(
3523
            parser(r"a|").parse(),
3524
            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3525
        );
3526
        assert_eq!(
3527
            parser(r"|a").parse(),
3528
            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3529
        );
3530
3531
        assert_eq!(
3532
            parser(r"(|)").parse(),
3533
            Ok(group(
3534
                0..3,
3535
                1,
3536
                alt(
3537
                    1..2,
3538
                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3539
                )
3540
            ))
3541
        );
3542
        assert_eq!(
3543
            parser(r"(a|)").parse(),
3544
            Ok(group(
3545
                0..4,
3546
                1,
3547
                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3548
            ))
3549
        );
3550
        assert_eq!(
3551
            parser(r"(|a)").parse(),
3552
            Ok(group(
3553
                0..4,
3554
                1,
3555
                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3556
            ))
3557
        );
3558
3559
        assert_eq!(
3560
            parser(r"a|b)").parse().unwrap_err(),
3561
            TestError {
3562
                span: span(3..4),
3563
                kind: ast::ErrorKind::GroupUnopened,
3564
            }
3565
        );
3566
        assert_eq!(
3567
            parser(r"(a|b").parse().unwrap_err(),
3568
            TestError {
3569
                span: span(0..1),
3570
                kind: ast::ErrorKind::GroupUnclosed,
3571
            }
3572
        );
3573
    }
3574
3575
    #[test]
3576
    fn parse_unsupported_lookaround() {
3577
        assert_eq!(
3578
            parser(r"(?=a)").parse().unwrap_err(),
3579
            TestError {
3580
                span: span(0..3),
3581
                kind: ast::ErrorKind::UnsupportedLookAround,
3582
            }
3583
        );
3584
        assert_eq!(
3585
            parser(r"(?!a)").parse().unwrap_err(),
3586
            TestError {
3587
                span: span(0..3),
3588
                kind: ast::ErrorKind::UnsupportedLookAround,
3589
            }
3590
        );
3591
        assert_eq!(
3592
            parser(r"(?<=a)").parse().unwrap_err(),
3593
            TestError {
3594
                span: span(0..4),
3595
                kind: ast::ErrorKind::UnsupportedLookAround,
3596
            }
3597
        );
3598
        assert_eq!(
3599
            parser(r"(?<!a)").parse().unwrap_err(),
3600
            TestError {
3601
                span: span(0..4),
3602
                kind: ast::ErrorKind::UnsupportedLookAround,
3603
            }
3604
        );
3605
    }
3606
3607
    #[test]
3608
    fn parse_group() {
3609
        assert_eq!(
3610
            parser("(?i)").parse(),
3611
            Ok(Ast::Flags(ast::SetFlags {
3612
                span: span(0..4),
3613
                flags: ast::Flags {
3614
                    span: span(2..3),
3615
                    items: vec![ast::FlagsItem {
3616
                        span: span(2..3),
3617
                        kind: ast::FlagsItemKind::Flag(
3618
                            ast::Flag::CaseInsensitive
3619
                        ),
3620
                    }],
3621
                },
3622
            }))
3623
        );
3624
        assert_eq!(
3625
            parser("(?iU)").parse(),
3626
            Ok(Ast::Flags(ast::SetFlags {
3627
                span: span(0..5),
3628
                flags: ast::Flags {
3629
                    span: span(2..4),
3630
                    items: vec![
3631
                        ast::FlagsItem {
3632
                            span: span(2..3),
3633
                            kind: ast::FlagsItemKind::Flag(
3634
                                ast::Flag::CaseInsensitive
3635
                            ),
3636
                        },
3637
                        ast::FlagsItem {
3638
                            span: span(3..4),
3639
                            kind: ast::FlagsItemKind::Flag(
3640
                                ast::Flag::SwapGreed
3641
                            ),
3642
                        },
3643
                    ],
3644
                },
3645
            }))
3646
        );
3647
        assert_eq!(
3648
            parser("(?i-U)").parse(),
3649
            Ok(Ast::Flags(ast::SetFlags {
3650
                span: span(0..6),
3651
                flags: ast::Flags {
3652
                    span: span(2..5),
3653
                    items: vec![
3654
                        ast::FlagsItem {
3655
                            span: span(2..3),
3656
                            kind: ast::FlagsItemKind::Flag(
3657
                                ast::Flag::CaseInsensitive
3658
                            ),
3659
                        },
3660
                        ast::FlagsItem {
3661
                            span: span(3..4),
3662
                            kind: ast::FlagsItemKind::Negation,
3663
                        },
3664
                        ast::FlagsItem {
3665
                            span: span(4..5),
3666
                            kind: ast::FlagsItemKind::Flag(
3667
                                ast::Flag::SwapGreed
3668
                            ),
3669
                        },
3670
                    ],
3671
                },
3672
            }))
3673
        );
3674
3675
        assert_eq!(
3676
            parser("()").parse(),
3677
            Ok(Ast::Group(ast::Group {
3678
                span: span(0..2),
3679
                kind: ast::GroupKind::CaptureIndex(1),
3680
                ast: Box::new(Ast::Empty(span(1..1))),
3681
            }))
3682
        );
3683
        assert_eq!(
3684
            parser("(a)").parse(),
3685
            Ok(Ast::Group(ast::Group {
3686
                span: span(0..3),
3687
                kind: ast::GroupKind::CaptureIndex(1),
3688
                ast: Box::new(lit('a', 1)),
3689
            }))
3690
        );
3691
        assert_eq!(
3692
            parser("(())").parse(),
3693
            Ok(Ast::Group(ast::Group {
3694
                span: span(0..4),
3695
                kind: ast::GroupKind::CaptureIndex(1),
3696
                ast: Box::new(Ast::Group(ast::Group {
3697
                    span: span(1..3),
3698
                    kind: ast::GroupKind::CaptureIndex(2),
3699
                    ast: Box::new(Ast::Empty(span(2..2))),
3700
                })),
3701
            }))
3702
        );
3703
3704
        assert_eq!(
3705
            parser("(?:a)").parse(),
3706
            Ok(Ast::Group(ast::Group {
3707
                span: span(0..5),
3708
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3709
                    span: span(2..2),
3710
                    items: vec![],
3711
                }),
3712
                ast: Box::new(lit('a', 3)),
3713
            }))
3714
        );
3715
3716
        assert_eq!(
3717
            parser("(?i:a)").parse(),
3718
            Ok(Ast::Group(ast::Group {
3719
                span: span(0..6),
3720
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3721
                    span: span(2..3),
3722
                    items: vec![ast::FlagsItem {
3723
                        span: span(2..3),
3724
                        kind: ast::FlagsItemKind::Flag(
3725
                            ast::Flag::CaseInsensitive
3726
                        ),
3727
                    },],
3728
                }),
3729
                ast: Box::new(lit('a', 4)),
3730
            }))
3731
        );
3732
        assert_eq!(
3733
            parser("(?i-U:a)").parse(),
3734
            Ok(Ast::Group(ast::Group {
3735
                span: span(0..8),
3736
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3737
                    span: span(2..5),
3738
                    items: vec![
3739
                        ast::FlagsItem {
3740
                            span: span(2..3),
3741
                            kind: ast::FlagsItemKind::Flag(
3742
                                ast::Flag::CaseInsensitive
3743
                            ),
3744
                        },
3745
                        ast::FlagsItem {
3746
                            span: span(3..4),
3747
                            kind: ast::FlagsItemKind::Negation,
3748
                        },
3749
                        ast::FlagsItem {
3750
                            span: span(4..5),
3751
                            kind: ast::FlagsItemKind::Flag(
3752
                                ast::Flag::SwapGreed
3753
                            ),
3754
                        },
3755
                    ],
3756
                }),
3757
                ast: Box::new(lit('a', 6)),
3758
            }))
3759
        );
3760
3761
        assert_eq!(
3762
            parser("(").parse().unwrap_err(),
3763
            TestError {
3764
                span: span(0..1),
3765
                kind: ast::ErrorKind::GroupUnclosed,
3766
            }
3767
        );
3768
        assert_eq!(
3769
            parser("(?").parse().unwrap_err(),
3770
            TestError {
3771
                span: span(0..1),
3772
                kind: ast::ErrorKind::GroupUnclosed,
3773
            }
3774
        );
3775
        assert_eq!(
3776
            parser("(?P").parse().unwrap_err(),
3777
            TestError {
3778
                span: span(2..3),
3779
                kind: ast::ErrorKind::FlagUnrecognized,
3780
            }
3781
        );
3782
        assert_eq!(
3783
            parser("(?P<").parse().unwrap_err(),
3784
            TestError {
3785
                span: span(4..4),
3786
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3787
            }
3788
        );
3789
        assert_eq!(
3790
            parser("(a").parse().unwrap_err(),
3791
            TestError {
3792
                span: span(0..1),
3793
                kind: ast::ErrorKind::GroupUnclosed,
3794
            }
3795
        );
3796
        assert_eq!(
3797
            parser("(()").parse().unwrap_err(),
3798
            TestError {
3799
                span: span(0..1),
3800
                kind: ast::ErrorKind::GroupUnclosed,
3801
            }
3802
        );
3803
        assert_eq!(
3804
            parser(")").parse().unwrap_err(),
3805
            TestError {
3806
                span: span(0..1),
3807
                kind: ast::ErrorKind::GroupUnopened,
3808
            }
3809
        );
3810
        assert_eq!(
3811
            parser("a)").parse().unwrap_err(),
3812
            TestError {
3813
                span: span(1..2),
3814
                kind: ast::ErrorKind::GroupUnopened,
3815
            }
3816
        );
3817
    }
3818
3819
    #[test]
3820
    fn parse_capture_name() {
3821
        assert_eq!(
3822
            parser("(?P<a>z)").parse(),
3823
            Ok(Ast::Group(ast::Group {
3824
                span: span(0..8),
3825
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3826
                    span: span(4..5),
3827
                    name: s("a"),
3828
                    index: 1,
3829
                }),
3830
                ast: Box::new(lit('z', 6)),
3831
            }))
3832
        );
3833
        assert_eq!(
3834
            parser("(?P<abc>z)").parse(),
3835
            Ok(Ast::Group(ast::Group {
3836
                span: span(0..10),
3837
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838
                    span: span(4..7),
3839
                    name: s("abc"),
3840
                    index: 1,
3841
                }),
3842
                ast: Box::new(lit('z', 8)),
3843
            }))
3844
        );
3845
3846
        assert_eq!(
3847
            parser("(?P<a_1>z)").parse(),
3848
            Ok(Ast::Group(ast::Group {
3849
                span: span(0..10),
3850
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3851
                    span: span(4..7),
3852
                    name: s("a_1"),
3853
                    index: 1,
3854
                }),
3855
                ast: Box::new(lit('z', 8)),
3856
            }))
3857
        );
3858
3859
        assert_eq!(
3860
            parser("(?P<a.1>z)").parse(),
3861
            Ok(Ast::Group(ast::Group {
3862
                span: span(0..10),
3863
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3864
                    span: span(4..7),
3865
                    name: s("a.1"),
3866
                    index: 1,
3867
                }),
3868
                ast: Box::new(lit('z', 8)),
3869
            }))
3870
        );
3871
3872
        assert_eq!(
3873
            parser("(?P<a[1]>z)").parse(),
3874
            Ok(Ast::Group(ast::Group {
3875
                span: span(0..11),
3876
                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3877
                    span: span(4..8),
3878
                    name: s("a[1]"),
3879
                    index: 1,
3880
                }),
3881
                ast: Box::new(lit('z', 9)),
3882
            }))
3883
        );
3884
3885
        assert_eq!(
3886
            parser("(?P<").parse().unwrap_err(),
3887
            TestError {
3888
                span: span(4..4),
3889
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3890
            }
3891
        );
3892
        assert_eq!(
3893
            parser("(?P<>z)").parse().unwrap_err(),
3894
            TestError {
3895
                span: span(4..4),
3896
                kind: ast::ErrorKind::GroupNameEmpty,
3897
            }
3898
        );
3899
        assert_eq!(
3900
            parser("(?P<a").parse().unwrap_err(),
3901
            TestError {
3902
                span: span(5..5),
3903
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3904
            }
3905
        );
3906
        assert_eq!(
3907
            parser("(?P<ab").parse().unwrap_err(),
3908
            TestError {
3909
                span: span(6..6),
3910
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3911
            }
3912
        );
3913
        assert_eq!(
3914
            parser("(?P<0a").parse().unwrap_err(),
3915
            TestError {
3916
                span: span(4..5),
3917
                kind: ast::ErrorKind::GroupNameInvalid,
3918
            }
3919
        );
3920
        assert_eq!(
3921
            parser("(?P<~").parse().unwrap_err(),
3922
            TestError {
3923
                span: span(4..5),
3924
                kind: ast::ErrorKind::GroupNameInvalid,
3925
            }
3926
        );
3927
        assert_eq!(
3928
            parser("(?P<abc~").parse().unwrap_err(),
3929
            TestError {
3930
                span: span(7..8),
3931
                kind: ast::ErrorKind::GroupNameInvalid,
3932
            }
3933
        );
3934
        assert_eq!(
3935
            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3936
            TestError {
3937
                span: span(12..13),
3938
                kind: ast::ErrorKind::GroupNameDuplicate {
3939
                    original: span(4..5),
3940
                },
3941
            }
3942
        );
3943
    }
3944
3945
    #[test]
3946
    fn parse_flags() {
3947
        assert_eq!(
3948
            parser("i:").parse_flags(),
3949
            Ok(ast::Flags {
3950
                span: span(0..1),
3951
                items: vec![ast::FlagsItem {
3952
                    span: span(0..1),
3953
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3954
                }],
3955
            })
3956
        );
3957
        assert_eq!(
3958
            parser("i)").parse_flags(),
3959
            Ok(ast::Flags {
3960
                span: span(0..1),
3961
                items: vec![ast::FlagsItem {
3962
                    span: span(0..1),
3963
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3964
                }],
3965
            })
3966
        );
3967
3968
        assert_eq!(
3969
            parser("isU:").parse_flags(),
3970
            Ok(ast::Flags {
3971
                span: span(0..3),
3972
                items: vec![
3973
                    ast::FlagsItem {
3974
                        span: span(0..1),
3975
                        kind: ast::FlagsItemKind::Flag(
3976
                            ast::Flag::CaseInsensitive
3977
                        ),
3978
                    },
3979
                    ast::FlagsItem {
3980
                        span: span(1..2),
3981
                        kind: ast::FlagsItemKind::Flag(
3982
                            ast::Flag::DotMatchesNewLine
3983
                        ),
3984
                    },
3985
                    ast::FlagsItem {
3986
                        span: span(2..3),
3987
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3988
                    },
3989
                ],
3990
            })
3991
        );
3992
3993
        assert_eq!(
3994
            parser("-isU:").parse_flags(),
3995
            Ok(ast::Flags {
3996
                span: span(0..4),
3997
                items: vec![
3998
                    ast::FlagsItem {
3999
                        span: span(0..1),
4000
                        kind: ast::FlagsItemKind::Negation,
4001
                    },
4002
                    ast::FlagsItem {
4003
                        span: span(1..2),
4004
                        kind: ast::FlagsItemKind::Flag(
4005
                            ast::Flag::CaseInsensitive
4006
                        ),
4007
                    },
4008
                    ast::FlagsItem {
4009
                        span: span(2..3),
4010
                        kind: ast::FlagsItemKind::Flag(
4011
                            ast::Flag::DotMatchesNewLine
4012
                        ),
4013
                    },
4014
                    ast::FlagsItem {
4015
                        span: span(3..4),
4016
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4017
                    },
4018
                ],
4019
            })
4020
        );
4021
        assert_eq!(
4022
            parser("i-sU:").parse_flags(),
4023
            Ok(ast::Flags {
4024
                span: span(0..4),
4025
                items: vec![
4026
                    ast::FlagsItem {
4027
                        span: span(0..1),
4028
                        kind: ast::FlagsItemKind::Flag(
4029
                            ast::Flag::CaseInsensitive
4030
                        ),
4031
                    },
4032
                    ast::FlagsItem {
4033
                        span: span(1..2),
4034
                        kind: ast::FlagsItemKind::Negation,
4035
                    },
4036
                    ast::FlagsItem {
4037
                        span: span(2..3),
4038
                        kind: ast::FlagsItemKind::Flag(
4039
                            ast::Flag::DotMatchesNewLine
4040
                        ),
4041
                    },
4042
                    ast::FlagsItem {
4043
                        span: span(3..4),
4044
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4045
                    },
4046
                ],
4047
            })
4048
        );
4049
4050
        assert_eq!(
4051
            parser("isU").parse_flags().unwrap_err(),
4052
            TestError {
4053
                span: span(3..3),
4054
                kind: ast::ErrorKind::FlagUnexpectedEof,
4055
            }
4056
        );
4057
        assert_eq!(
4058
            parser("isUa:").parse_flags().unwrap_err(),
4059
            TestError {
4060
                span: span(3..4),
4061
                kind: ast::ErrorKind::FlagUnrecognized,
4062
            }
4063
        );
4064
        assert_eq!(
4065
            parser("isUi:").parse_flags().unwrap_err(),
4066
            TestError {
4067
                span: span(3..4),
4068
                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4069
            }
4070
        );
4071
        assert_eq!(
4072
            parser("i-sU-i:").parse_flags().unwrap_err(),
4073
            TestError {
4074
                span: span(4..5),
4075
                kind: ast::ErrorKind::FlagRepeatedNegation {
4076
                    original: span(1..2),
4077
                },
4078
            }
4079
        );
4080
        assert_eq!(
4081
            parser("-)").parse_flags().unwrap_err(),
4082
            TestError {
4083
                span: span(0..1),
4084
                kind: ast::ErrorKind::FlagDanglingNegation,
4085
            }
4086
        );
4087
        assert_eq!(
4088
            parser("i-)").parse_flags().unwrap_err(),
4089
            TestError {
4090
                span: span(1..2),
4091
                kind: ast::ErrorKind::FlagDanglingNegation,
4092
            }
4093
        );
4094
        assert_eq!(
4095
            parser("iU-)").parse_flags().unwrap_err(),
4096
            TestError {
4097
                span: span(2..3),
4098
                kind: ast::ErrorKind::FlagDanglingNegation,
4099
            }
4100
        );
4101
    }
4102
4103
    #[test]
4104
    fn parse_flag() {
4105
        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4106
        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4107
        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4108
        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4109
        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4110
        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4111
4112
        assert_eq!(
4113
            parser("a").parse_flag().unwrap_err(),
4114
            TestError {
4115
                span: span(0..1),
4116
                kind: ast::ErrorKind::FlagUnrecognized,
4117
            }
4118
        );
4119
        assert_eq!(
4120
            parser("☃").parse_flag().unwrap_err(),
4121
            TestError {
4122
                span: span_range("☃", 0..3),
4123
                kind: ast::ErrorKind::FlagUnrecognized,
4124
            }
4125
        );
4126
    }
4127
4128
    #[test]
4129
    fn parse_primitive_non_escape() {
4130
        assert_eq!(
4131
            parser(r".").parse_primitive(),
4132
            Ok(Primitive::Dot(span(0..1)))
4133
        );
4134
        assert_eq!(
4135
            parser(r"^").parse_primitive(),
4136
            Ok(Primitive::Assertion(ast::Assertion {
4137
                span: span(0..1),
4138
                kind: ast::AssertionKind::StartLine,
4139
            }))
4140
        );
4141
        assert_eq!(
4142
            parser(r"$").parse_primitive(),
4143
            Ok(Primitive::Assertion(ast::Assertion {
4144
                span: span(0..1),
4145
                kind: ast::AssertionKind::EndLine,
4146
            }))
4147
        );
4148
4149
        assert_eq!(
4150
            parser(r"a").parse_primitive(),
4151
            Ok(Primitive::Literal(ast::Literal {
4152
                span: span(0..1),
4153
                kind: ast::LiteralKind::Verbatim,
4154
                c: 'a',
4155
            }))
4156
        );
4157
        assert_eq!(
4158
            parser(r"|").parse_primitive(),
4159
            Ok(Primitive::Literal(ast::Literal {
4160
                span: span(0..1),
4161
                kind: ast::LiteralKind::Verbatim,
4162
                c: '|',
4163
            }))
4164
        );
4165
        assert_eq!(
4166
            parser(r"☃").parse_primitive(),
4167
            Ok(Primitive::Literal(ast::Literal {
4168
                span: span_range("☃", 0..3),
4169
                kind: ast::LiteralKind::Verbatim,
4170
                c: '☃',
4171
            }))
4172
        );
4173
    }
4174
4175
    #[test]
4176
    fn parse_escape() {
4177
        assert_eq!(
4178
            parser(r"\|").parse_primitive(),
4179
            Ok(Primitive::Literal(ast::Literal {
4180
                span: span(0..2),
4181
                kind: ast::LiteralKind::Punctuation,
4182
                c: '|',
4183
            }))
4184
        );
4185
        let specials = &[
4186
            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4187
            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4188
            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4189
            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4190
            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4191
            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4192
        ];
4193
        for &(pat, c, ref kind) in specials {
4194
            assert_eq!(
4195
                parser(pat).parse_primitive(),
4196
                Ok(Primitive::Literal(ast::Literal {
4197
                    span: span(0..2),
4198
                    kind: ast::LiteralKind::Special(kind.clone()),
4199
                    c,
4200
                }))
4201
            );
4202
        }
4203
        assert_eq!(
4204
            parser(r"\A").parse_primitive(),
4205
            Ok(Primitive::Assertion(ast::Assertion {
4206
                span: span(0..2),
4207
                kind: ast::AssertionKind::StartText,
4208
            }))
4209
        );
4210
        assert_eq!(
4211
            parser(r"\z").parse_primitive(),
4212
            Ok(Primitive::Assertion(ast::Assertion {
4213
                span: span(0..2),
4214
                kind: ast::AssertionKind::EndText,
4215
            }))
4216
        );
4217
        assert_eq!(
4218
            parser(r"\b").parse_primitive(),
4219
            Ok(Primitive::Assertion(ast::Assertion {
4220
                span: span(0..2),
4221
                kind: ast::AssertionKind::WordBoundary,
4222
            }))
4223
        );
4224
        assert_eq!(
4225
            parser(r"\B").parse_primitive(),
4226
            Ok(Primitive::Assertion(ast::Assertion {
4227
                span: span(0..2),
4228
                kind: ast::AssertionKind::NotWordBoundary,
4229
            }))
4230
        );
4231
4232
        assert_eq!(
4233
            parser(r"\").parse_escape().unwrap_err(),
4234
            TestError {
4235
                span: span(0..1),
4236
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4237
            }
4238
        );
4239
        assert_eq!(
4240
            parser(r"\y").parse_escape().unwrap_err(),
4241
            TestError {
4242
                span: span(0..2),
4243
                kind: ast::ErrorKind::EscapeUnrecognized,
4244
            }
4245
        );
4246
    }
4247
4248
    #[test]
4249
    fn parse_unsupported_backreference() {
4250
        assert_eq!(
4251
            parser(r"\0").parse_escape().unwrap_err(),
4252
            TestError {
4253
                span: span(0..2),
4254
                kind: ast::ErrorKind::UnsupportedBackreference,
4255
            }
4256
        );
4257
        assert_eq!(
4258
            parser(r"\9").parse_escape().unwrap_err(),
4259
            TestError {
4260
                span: span(0..2),
4261
                kind: ast::ErrorKind::UnsupportedBackreference,
4262
            }
4263
        );
4264
    }
4265
4266
    #[test]
4267
    fn parse_octal() {
4268
        for i in 0..511 {
4269
            let pat = format!(r"\{:o}", i);
4270
            assert_eq!(
4271
                parser_octal(&pat).parse_escape(),
4272
                Ok(Primitive::Literal(ast::Literal {
4273
                    span: span(0..pat.len()),
4274
                    kind: ast::LiteralKind::Octal,
4275
                    c: ::std::char::from_u32(i).unwrap(),
4276
                }))
4277
            );
4278
        }
4279
        assert_eq!(
4280
            parser_octal(r"\778").parse_escape(),
4281
            Ok(Primitive::Literal(ast::Literal {
4282
                span: span(0..3),
4283
                kind: ast::LiteralKind::Octal,
4284
                c: '?',
4285
            }))
4286
        );
4287
        assert_eq!(
4288
            parser_octal(r"\7777").parse_escape(),
4289
            Ok(Primitive::Literal(ast::Literal {
4290
                span: span(0..4),
4291
                kind: ast::LiteralKind::Octal,
4292
                c: '\u{01FF}',
4293
            }))
4294
        );
4295
        assert_eq!(
4296
            parser_octal(r"\778").parse(),
4297
            Ok(Ast::Concat(ast::Concat {
4298
                span: span(0..4),
4299
                asts: vec![
4300
                    Ast::Literal(ast::Literal {
4301
                        span: span(0..3),
4302
                        kind: ast::LiteralKind::Octal,
4303
                        c: '?',
4304
                    }),
4305
                    Ast::Literal(ast::Literal {
4306
                        span: span(3..4),
4307
                        kind: ast::LiteralKind::Verbatim,
4308
                        c: '8',
4309
                    }),
4310
                ],
4311
            }))
4312
        );
4313
        assert_eq!(
4314
            parser_octal(r"\7777").parse(),
4315
            Ok(Ast::Concat(ast::Concat {
4316
                span: span(0..5),
4317
                asts: vec![
4318
                    Ast::Literal(ast::Literal {
4319
                        span: span(0..4),
4320
                        kind: ast::LiteralKind::Octal,
4321
                        c: '\u{01FF}',
4322
                    }),
4323
                    Ast::Literal(ast::Literal {
4324
                        span: span(4..5),
4325
                        kind: ast::LiteralKind::Verbatim,
4326
                        c: '7',
4327
                    }),
4328
                ],
4329
            }))
4330
        );
4331
4332
        assert_eq!(
4333
            parser_octal(r"\8").parse_escape().unwrap_err(),
4334
            TestError {
4335
                span: span(0..2),
4336
                kind: ast::ErrorKind::EscapeUnrecognized,
4337
            }
4338
        );
4339
    }
4340
4341
    #[test]
4342
    fn parse_hex_two() {
4343
        for i in 0..256 {
4344
            let pat = format!(r"\x{:02x}", i);
4345
            assert_eq!(
4346
                parser(&pat).parse_escape(),
4347
                Ok(Primitive::Literal(ast::Literal {
4348
                    span: span(0..pat.len()),
4349
                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4350
                    c: ::std::char::from_u32(i).unwrap(),
4351
                }))
4352
            );
4353
        }
4354
4355
        assert_eq!(
4356
            parser(r"\xF").parse_escape().unwrap_err(),
4357
            TestError {
4358
                span: span(3..3),
4359
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4360
            }
4361
        );
4362
        assert_eq!(
4363
            parser(r"\xG").parse_escape().unwrap_err(),
4364
            TestError {
4365
                span: span(2..3),
4366
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4367
            }
4368
        );
4369
        assert_eq!(
4370
            parser(r"\xFG").parse_escape().unwrap_err(),
4371
            TestError {
4372
                span: span(3..4),
4373
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4374
            }
4375
        );
4376
    }
4377
4378
    #[test]
4379
    fn parse_hex_four() {
4380
        for i in 0..65536 {
4381
            let c = match ::std::char::from_u32(i) {
4382
                None => continue,
4383
                Some(c) => c,
4384
            };
4385
            let pat = format!(r"\u{:04x}", i);
4386
            assert_eq!(
4387
                parser(&pat).parse_escape(),
4388
                Ok(Primitive::Literal(ast::Literal {
4389
                    span: span(0..pat.len()),
4390
                    kind: ast::LiteralKind::HexFixed(
4391
                        ast::HexLiteralKind::UnicodeShort
4392
                    ),
4393
                    c,
4394
                }))
4395
            );
4396
        }
4397
4398
        assert_eq!(
4399
            parser(r"\uF").parse_escape().unwrap_err(),
4400
            TestError {
4401
                span: span(3..3),
4402
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4403
            }
4404
        );
4405
        assert_eq!(
4406
            parser(r"\uG").parse_escape().unwrap_err(),
4407
            TestError {
4408
                span: span(2..3),
4409
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4410
            }
4411
        );
4412
        assert_eq!(
4413
            parser(r"\uFG").parse_escape().unwrap_err(),
4414
            TestError {
4415
                span: span(3..4),
4416
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4417
            }
4418
        );
4419
        assert_eq!(
4420
            parser(r"\uFFG").parse_escape().unwrap_err(),
4421
            TestError {
4422
                span: span(4..5),
4423
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4424
            }
4425
        );
4426
        assert_eq!(
4427
            parser(r"\uFFFG").parse_escape().unwrap_err(),
4428
            TestError {
4429
                span: span(5..6),
4430
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4431
            }
4432
        );
4433
        assert_eq!(
4434
            parser(r"\uD800").parse_escape().unwrap_err(),
4435
            TestError {
4436
                span: span(2..6),
4437
                kind: ast::ErrorKind::EscapeHexInvalid,
4438
            }
4439
        );
4440
    }
4441
4442
    #[test]
4443
    fn parse_hex_eight() {
4444
        for i in 0..65536 {
4445
            let c = match ::std::char::from_u32(i) {
4446
                None => continue,
4447
                Some(c) => c,
4448
            };
4449
            let pat = format!(r"\U{:08x}", i);
4450
            assert_eq!(
4451
                parser(&pat).parse_escape(),
4452
                Ok(Primitive::Literal(ast::Literal {
4453
                    span: span(0..pat.len()),
4454
                    kind: ast::LiteralKind::HexFixed(
4455
                        ast::HexLiteralKind::UnicodeLong
4456
                    ),
4457
                    c,
4458
                }))
4459
            );
4460
        }
4461
4462
        assert_eq!(
4463
            parser(r"\UF").parse_escape().unwrap_err(),
4464
            TestError {
4465
                span: span(3..3),
4466
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4467
            }
4468
        );
4469
        assert_eq!(
4470
            parser(r"\UG").parse_escape().unwrap_err(),
4471
            TestError {
4472
                span: span(2..3),
4473
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4474
            }
4475
        );
4476
        assert_eq!(
4477
            parser(r"\UFG").parse_escape().unwrap_err(),
4478
            TestError {
4479
                span: span(3..4),
4480
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4481
            }
4482
        );
4483
        assert_eq!(
4484
            parser(r"\UFFG").parse_escape().unwrap_err(),
4485
            TestError {
4486
                span: span(4..5),
4487
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4488
            }
4489
        );
4490
        assert_eq!(
4491
            parser(r"\UFFFG").parse_escape().unwrap_err(),
4492
            TestError {
4493
                span: span(5..6),
4494
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4495
            }
4496
        );
4497
        assert_eq!(
4498
            parser(r"\UFFFFG").parse_escape().unwrap_err(),
4499
            TestError {
4500
                span: span(6..7),
4501
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4502
            }
4503
        );
4504
        assert_eq!(
4505
            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4506
            TestError {
4507
                span: span(7..8),
4508
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4509
            }
4510
        );
4511
        assert_eq!(
4512
            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4513
            TestError {
4514
                span: span(8..9),
4515
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4516
            }
4517
        );
4518
        assert_eq!(
4519
            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4520
            TestError {
4521
                span: span(9..10),
4522
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4523
            }
4524
        );
4525
    }
4526
4527
    #[test]
4528
    fn parse_hex_brace() {
4529
        assert_eq!(
4530
            parser(r"\u{26c4}").parse_escape(),
4531
            Ok(Primitive::Literal(ast::Literal {
4532
                span: span(0..8),
4533
                kind: ast::LiteralKind::HexBrace(
4534
                    ast::HexLiteralKind::UnicodeShort
4535
                ),
4536
                c: '⛄',
4537
            }))
4538
        );
4539
        assert_eq!(
4540
            parser(r"\U{26c4}").parse_escape(),
4541
            Ok(Primitive::Literal(ast::Literal {
4542
                span: span(0..8),
4543
                kind: ast::LiteralKind::HexBrace(
4544
                    ast::HexLiteralKind::UnicodeLong
4545
                ),
4546
                c: '⛄',
4547
            }))
4548
        );
4549
        assert_eq!(
4550
            parser(r"\x{26c4}").parse_escape(),
4551
            Ok(Primitive::Literal(ast::Literal {
4552
                span: span(0..8),
4553
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4554
                c: '⛄',
4555
            }))
4556
        );
4557
        assert_eq!(
4558
            parser(r"\x{26C4}").parse_escape(),
4559
            Ok(Primitive::Literal(ast::Literal {
4560
                span: span(0..8),
4561
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4562
                c: '⛄',
4563
            }))
4564
        );
4565
        assert_eq!(
4566
            parser(r"\x{10fFfF}").parse_escape(),
4567
            Ok(Primitive::Literal(ast::Literal {
4568
                span: span(0..10),
4569
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4570
                c: '\u{10FFFF}',
4571
            }))
4572
        );
4573
4574
        assert_eq!(
4575
            parser(r"\x").parse_escape().unwrap_err(),
4576
            TestError {
4577
                span: span(2..2),
4578
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4579
            }
4580
        );
4581
        assert_eq!(
4582
            parser(r"\x{").parse_escape().unwrap_err(),
4583
            TestError {
4584
                span: span(2..3),
4585
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4586
            }
4587
        );
4588
        assert_eq!(
4589
            parser(r"\x{FF").parse_escape().unwrap_err(),
4590
            TestError {
4591
                span: span(2..5),
4592
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4593
            }
4594
        );
4595
        assert_eq!(
4596
            parser(r"\x{}").parse_escape().unwrap_err(),
4597
            TestError {
4598
                span: span(2..4),
4599
                kind: ast::ErrorKind::EscapeHexEmpty,
4600
            }
4601
        );
4602
        assert_eq!(
4603
            parser(r"\x{FGF}").parse_escape().unwrap_err(),
4604
            TestError {
4605
                span: span(4..5),
4606
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4607
            }
4608
        );
4609
        assert_eq!(
4610
            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4611
            TestError {
4612
                span: span(3..9),
4613
                kind: ast::ErrorKind::EscapeHexInvalid,
4614
            }
4615
        );
4616
        assert_eq!(
4617
            parser(r"\x{D800}").parse_escape().unwrap_err(),
4618
            TestError {
4619
                span: span(3..7),
4620
                kind: ast::ErrorKind::EscapeHexInvalid,
4621
            }
4622
        );
4623
        assert_eq!(
4624
            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4625
            TestError {
4626
                span: span(3..12),
4627
                kind: ast::ErrorKind::EscapeHexInvalid,
4628
            }
4629
        );
4630
    }
4631
4632
    #[test]
4633
    fn parse_decimal() {
4634
        assert_eq!(parser("123").parse_decimal(), Ok(123));
4635
        assert_eq!(parser("0").parse_decimal(), Ok(0));
4636
        assert_eq!(parser("01").parse_decimal(), Ok(1));
4637
4638
        assert_eq!(
4639
            parser("-1").parse_decimal().unwrap_err(),
4640
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4641
        );
4642
        assert_eq!(
4643
            parser("").parse_decimal().unwrap_err(),
4644
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4645
        );
4646
        assert_eq!(
4647
            parser("9999999999").parse_decimal().unwrap_err(),
4648
            TestError {
4649
                span: span(0..10),
4650
                kind: ast::ErrorKind::DecimalInvalid,
4651
            }
4652
        );
4653
    }
4654
4655
    #[test]
4656
    fn parse_set_class() {
4657
        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4658
            ast::ClassSet::union(ast::ClassSetUnion { span, items })
4659
        }
4660
4661
        fn intersection(
4662
            span: Span,
4663
            lhs: ast::ClassSet,
4664
            rhs: ast::ClassSet,
4665
        ) -> ast::ClassSet {
4666
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4667
                span,
4668
                kind: ast::ClassSetBinaryOpKind::Intersection,
4669
                lhs: Box::new(lhs),
4670
                rhs: Box::new(rhs),
4671
            })
4672
        }
4673
4674
        fn difference(
4675
            span: Span,
4676
            lhs: ast::ClassSet,
4677
            rhs: ast::ClassSet,
4678
        ) -> ast::ClassSet {
4679
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4680
                span,
4681
                kind: ast::ClassSetBinaryOpKind::Difference,
4682
                lhs: Box::new(lhs),
4683
                rhs: Box::new(rhs),
4684
            })
4685
        }
4686
4687
        fn symdifference(
4688
            span: Span,
4689
            lhs: ast::ClassSet,
4690
            rhs: ast::ClassSet,
4691
        ) -> ast::ClassSet {
4692
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4693
                span,
4694
                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4695
                lhs: Box::new(lhs),
4696
                rhs: Box::new(rhs),
4697
            })
4698
        }
4699
4700
        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4701
            ast::ClassSet::Item(item)
4702
        }
4703
4704
        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4705
            ast::ClassSetItem::Ascii(cls)
4706
        }
4707
4708
        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4709
            ast::ClassSetItem::Unicode(cls)
4710
        }
4711
4712
        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4713
            ast::ClassSetItem::Perl(cls)
4714
        }
4715
4716
        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4717
            ast::ClassSetItem::Bracketed(Box::new(cls))
4718
        }
4719
4720
        fn lit(span: Span, c: char) -> ast::ClassSetItem {
4721
            ast::ClassSetItem::Literal(ast::Literal {
4722
                span,
4723
                kind: ast::LiteralKind::Verbatim,
4724
                c,
4725
            })
4726
        }
4727
4728
        fn empty(span: Span) -> ast::ClassSetItem {
4729
            ast::ClassSetItem::Empty(span)
4730
        }
4731
4732
        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4733
            let pos1 = Position {
4734
                offset: span.start.offset + start.len_utf8(),
4735
                column: span.start.column + 1,
4736
                ..span.start
4737
            };
4738
            let pos2 = Position {
4739
                offset: span.end.offset - end.len_utf8(),
4740
                column: span.end.column - 1,
4741
                ..span.end
4742
            };
4743
            ast::ClassSetItem::Range(ast::ClassSetRange {
4744
                span,
4745
                start: ast::Literal {
4746
                    span: Span { end: pos1, ..span },
4747
                    kind: ast::LiteralKind::Verbatim,
4748
                    c: start,
4749
                },
4750
                end: ast::Literal {
4751
                    span: Span { start: pos2, ..span },
4752
                    kind: ast::LiteralKind::Verbatim,
4753
                    c: end,
4754
                },
4755
            })
4756
        }
4757
4758
        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4759
            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4760
        }
4761
4762
        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4763
            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4764
        }
4765
4766
        assert_eq!(
4767
            parser("[[:alnum:]]").parse(),
4768
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4769
                span: span(0..11),
4770
                negated: false,
4771
                kind: itemset(item_ascii(alnum(span(1..10), false))),
4772
            })))
4773
        );
4774
        assert_eq!(
4775
            parser("[[[:alnum:]]]").parse(),
4776
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4777
                span: span(0..13),
4778
                negated: false,
4779
                kind: itemset(item_bracket(ast::ClassBracketed {
4780
                    span: span(1..12),
4781
                    negated: false,
4782
                    kind: itemset(item_ascii(alnum(span(2..11), false))),
4783
                })),
4784
            })))
4785
        );
4786
        assert_eq!(
4787
            parser("[[:alnum:]&&[:lower:]]").parse(),
4788
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4789
                span: span(0..22),
4790
                negated: false,
4791
                kind: intersection(
4792
                    span(1..21),
4793
                    itemset(item_ascii(alnum(span(1..10), false))),
4794
                    itemset(item_ascii(lower(span(12..21), false))),
4795
                ),
4796
            })))
4797
        );
4798
        assert_eq!(
4799
            parser("[[:alnum:]--[:lower:]]").parse(),
4800
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4801
                span: span(0..22),
4802
                negated: false,
4803
                kind: difference(
4804
                    span(1..21),
4805
                    itemset(item_ascii(alnum(span(1..10), false))),
4806
                    itemset(item_ascii(lower(span(12..21), false))),
4807
                ),
4808
            })))
4809
        );
4810
        assert_eq!(
4811
            parser("[[:alnum:]~~[:lower:]]").parse(),
4812
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4813
                span: span(0..22),
4814
                negated: false,
4815
                kind: symdifference(
4816
                    span(1..21),
4817
                    itemset(item_ascii(alnum(span(1..10), false))),
4818
                    itemset(item_ascii(lower(span(12..21), false))),
4819
                ),
4820
            })))
4821
        );
4822
4823
        assert_eq!(
4824
            parser("[a]").parse(),
4825
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826
                span: span(0..3),
4827
                negated: false,
4828
                kind: itemset(lit(span(1..2), 'a')),
4829
            })))
4830
        );
4831
        assert_eq!(
4832
            parser(r"[a\]]").parse(),
4833
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4834
                span: span(0..5),
4835
                negated: false,
4836
                kind: union(
4837
                    span(1..4),
4838
                    vec![
4839
                        lit(span(1..2), 'a'),
4840
                        ast::ClassSetItem::Literal(ast::Literal {
4841
                            span: span(2..4),
4842
                            kind: ast::LiteralKind::Punctuation,
4843
                            c: ']',
4844
                        }),
4845
                    ]
4846
                ),
4847
            })))
4848
        );
4849
        assert_eq!(
4850
            parser(r"[a\-z]").parse(),
4851
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4852
                span: span(0..6),
4853
                negated: false,
4854
                kind: union(
4855
                    span(1..5),
4856
                    vec![
4857
                        lit(span(1..2), 'a'),
4858
                        ast::ClassSetItem::Literal(ast::Literal {
4859
                            span: span(2..4),
4860
                            kind: ast::LiteralKind::Punctuation,
4861
                            c: '-',
4862
                        }),
4863
                        lit(span(4..5), 'z'),
4864
                    ]
4865
                ),
4866
            })))
4867
        );
4868
        assert_eq!(
4869
            parser("[ab]").parse(),
4870
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4871
                span: span(0..4),
4872
                negated: false,
4873
                kind: union(
4874
                    span(1..3),
4875
                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4876
                ),
4877
            })))
4878
        );
4879
        assert_eq!(
4880
            parser("[a-]").parse(),
4881
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4882
                span: span(0..4),
4883
                negated: false,
4884
                kind: union(
4885
                    span(1..3),
4886
                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4887
                ),
4888
            })))
4889
        );
4890
        assert_eq!(
4891
            parser("[-a]").parse(),
4892
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4893
                span: span(0..4),
4894
                negated: false,
4895
                kind: union(
4896
                    span(1..3),
4897
                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4898
                ),
4899
            })))
4900
        );
4901
        assert_eq!(
4902
            parser(r"[\pL]").parse(),
4903
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4904
                span: span(0..5),
4905
                negated: false,
4906
                kind: itemset(item_unicode(ast::ClassUnicode {
4907
                    span: span(1..4),
4908
                    negated: false,
4909
                    kind: ast::ClassUnicodeKind::OneLetter('L'),
4910
                })),
4911
            })))
4912
        );
4913
        assert_eq!(
4914
            parser(r"[\w]").parse(),
4915
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916
                span: span(0..4),
4917
                negated: false,
4918
                kind: itemset(item_perl(ast::ClassPerl {
4919
                    span: span(1..3),
4920
                    kind: ast::ClassPerlKind::Word,
4921
                    negated: false,
4922
                })),
4923
            })))
4924
        );
4925
        assert_eq!(
4926
            parser(r"[a\wz]").parse(),
4927
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4928
                span: span(0..6),
4929
                negated: false,
4930
                kind: union(
4931
                    span(1..5),
4932
                    vec![
4933
                        lit(span(1..2), 'a'),
4934
                        item_perl(ast::ClassPerl {
4935
                            span: span(2..4),
4936
                            kind: ast::ClassPerlKind::Word,
4937
                            negated: false,
4938
                        }),
4939
                        lit(span(4..5), 'z'),
4940
                    ]
4941
                ),
4942
            })))
4943
        );
4944
4945
        assert_eq!(
4946
            parser("[a-z]").parse(),
4947
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4948
                span: span(0..5),
4949
                negated: false,
4950
                kind: itemset(range(span(1..4), 'a', 'z')),
4951
            })))
4952
        );
4953
        assert_eq!(
4954
            parser("[a-cx-z]").parse(),
4955
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4956
                span: span(0..8),
4957
                negated: false,
4958
                kind: union(
4959
                    span(1..7),
4960
                    vec![
4961
                        range(span(1..4), 'a', 'c'),
4962
                        range(span(4..7), 'x', 'z'),
4963
                    ]
4964
                ),
4965
            })))
4966
        );
4967
        assert_eq!(
4968
            parser(r"[\w&&a-cx-z]").parse(),
4969
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4970
                span: span(0..12),
4971
                negated: false,
4972
                kind: intersection(
4973
                    span(1..11),
4974
                    itemset(item_perl(ast::ClassPerl {
4975
                        span: span(1..3),
4976
                        kind: ast::ClassPerlKind::Word,
4977
                        negated: false,
4978
                    })),
4979
                    union(
4980
                        span(5..11),
4981
                        vec![
4982
                            range(span(5..8), 'a', 'c'),
4983
                            range(span(8..11), 'x', 'z'),
4984
                        ]
4985
                    ),
4986
                ),
4987
            })))
4988
        );
4989
        assert_eq!(
4990
            parser(r"[a-cx-z&&\w]").parse(),
4991
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4992
                span: span(0..12),
4993
                negated: false,
4994
                kind: intersection(
4995
                    span(1..11),
4996
                    union(
4997
                        span(1..7),
4998
                        vec![
4999
                            range(span(1..4), 'a', 'c'),
5000
                            range(span(4..7), 'x', 'z'),
5001
                        ]
5002
                    ),
5003
                    itemset(item_perl(ast::ClassPerl {
5004
                        span: span(9..11),
5005
                        kind: ast::ClassPerlKind::Word,
5006
                        negated: false,
5007
                    })),
5008
                ),
5009
            })))
5010
        );
5011
        assert_eq!(
5012
            parser(r"[a--b--c]").parse(),
5013
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5014
                span: span(0..9),
5015
                negated: false,
5016
                kind: difference(
5017
                    span(1..8),
5018
                    difference(
5019
                        span(1..5),
5020
                        itemset(lit(span(1..2), 'a')),
5021
                        itemset(lit(span(4..5), 'b')),
5022
                    ),
5023
                    itemset(lit(span(7..8), 'c')),
5024
                ),
5025
            })))
5026
        );
5027
        assert_eq!(
5028
            parser(r"[a~~b~~c]").parse(),
5029
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5030
                span: span(0..9),
5031
                negated: false,
5032
                kind: symdifference(
5033
                    span(1..8),
5034
                    symdifference(
5035
                        span(1..5),
5036
                        itemset(lit(span(1..2), 'a')),
5037
                        itemset(lit(span(4..5), 'b')),
5038
                    ),
5039
                    itemset(lit(span(7..8), 'c')),
5040
                ),
5041
            })))
5042
        );
5043
        assert_eq!(
5044
            parser(r"[\^&&^]").parse(),
5045
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5046
                span: span(0..7),
5047
                negated: false,
5048
                kind: intersection(
5049
                    span(1..6),
5050
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5051
                        span: span(1..3),
5052
                        kind: ast::LiteralKind::Punctuation,
5053
                        c: '^',
5054
                    })),
5055
                    itemset(lit(span(5..6), '^')),
5056
                ),
5057
            })))
5058
        );
5059
        assert_eq!(
5060
            parser(r"[\&&&&]").parse(),
5061
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5062
                span: span(0..7),
5063
                negated: false,
5064
                kind: intersection(
5065
                    span(1..6),
5066
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5067
                        span: span(1..3),
5068
                        kind: ast::LiteralKind::Punctuation,
5069
                        c: '&',
5070
                    })),
5071
                    itemset(lit(span(5..6), '&')),
5072
                ),
5073
            })))
5074
        );
5075
        assert_eq!(
5076
            parser(r"[&&&&]").parse(),
5077
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5078
                span: span(0..6),
5079
                negated: false,
5080
                kind: intersection(
5081
                    span(1..5),
5082
                    intersection(
5083
                        span(1..3),
5084
                        itemset(empty(span(1..1))),
5085
                        itemset(empty(span(3..3))),
5086
                    ),
5087
                    itemset(empty(span(5..5))),
5088
                ),
5089
            })))
5090
        );
5091
5092
        let pat = "[☃-⛄]";
5093
        assert_eq!(
5094
            parser(pat).parse(),
5095
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5096
                span: span_range(pat, 0..9),
5097
                negated: false,
5098
                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5099
                    span: span_range(pat, 1..8),
5100
                    start: ast::Literal {
5101
                        span: span_range(pat, 1..4),
5102
                        kind: ast::LiteralKind::Verbatim,
5103
                        c: '☃',
5104
                    },
5105
                    end: ast::Literal {
5106
                        span: span_range(pat, 5..8),
5107
                        kind: ast::LiteralKind::Verbatim,
5108
                        c: '⛄',
5109
                    },
5110
                })),
5111
            })))
5112
        );
5113
5114
        assert_eq!(
5115
            parser(r"[]]").parse(),
5116
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5117
                span: span(0..3),
5118
                negated: false,
5119
                kind: itemset(lit(span(1..2), ']')),
5120
            })))
5121
        );
5122
        assert_eq!(
5123
            parser(r"[]\[]").parse(),
5124
            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5125
                span: span(0..5),
5126
                negated: false,
5127
                kind: union(
5128
                    span(1..4),
5129
                    vec![
5130
                        lit(span(1..2), ']'),
5131
                        ast::ClassSetItem::Literal(ast::Literal {
5132
                            span: span(2..4),
5133
                            kind: ast::LiteralKind::Punctuation,
5134
                            c: '[',
5135
                        }),
5136
                    ]
5137
                ),
5138
            })))
5139
        );
5140
        assert_eq!(
5141
            parser(r"[\[]]").parse(),
5142
            Ok(concat(
5143
                0..5,
5144
                vec![
5145
                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5146
                        span: span(0..4),
5147
                        negated: false,
5148
                        kind: itemset(ast::ClassSetItem::Literal(
5149
                            ast::Literal {
5150
                                span: span(1..3),
5151
                                kind: ast::LiteralKind::Punctuation,
5152
                                c: '[',
5153
                            }
5154
                        )),
5155
                    })),
5156
                    Ast::Literal(ast::Literal {
5157
                        span: span(4..5),
5158
                        kind: ast::LiteralKind::Verbatim,
5159
                        c: ']',
5160
                    }),
5161
                ]
5162
            ))
5163
        );
5164
5165
        assert_eq!(
5166
            parser("[").parse().unwrap_err(),
5167
            TestError {
5168
                span: span(0..1),
5169
                kind: ast::ErrorKind::ClassUnclosed,
5170
            }
5171
        );
5172
        assert_eq!(
5173
            parser("[[").parse().unwrap_err(),
5174
            TestError {
5175
                span: span(1..2),
5176
                kind: ast::ErrorKind::ClassUnclosed,
5177
            }
5178
        );
5179
        assert_eq!(
5180
            parser("[[-]").parse().unwrap_err(),
5181
            TestError {
5182
                span: span(0..1),
5183
                kind: ast::ErrorKind::ClassUnclosed,
5184
            }
5185
        );
5186
        assert_eq!(
5187
            parser("[[[:alnum:]").parse().unwrap_err(),
5188
            TestError {
5189
                span: span(1..2),
5190
                kind: ast::ErrorKind::ClassUnclosed,
5191
            }
5192
        );
5193
        assert_eq!(
5194
            parser(r"[\b]").parse().unwrap_err(),
5195
            TestError {
5196
                span: span(1..3),
5197
                kind: ast::ErrorKind::ClassEscapeInvalid,
5198
            }
5199
        );
5200
        assert_eq!(
5201
            parser(r"[\w-a]").parse().unwrap_err(),
5202
            TestError {
5203
                span: span(1..3),
5204
                kind: ast::ErrorKind::ClassRangeLiteral,
5205
            }
5206
        );
5207
        assert_eq!(
5208
            parser(r"[a-\w]").parse().unwrap_err(),
5209
            TestError {
5210
                span: span(3..5),
5211
                kind: ast::ErrorKind::ClassRangeLiteral,
5212
            }
5213
        );
5214
        assert_eq!(
5215
            parser(r"[z-a]").parse().unwrap_err(),
5216
            TestError {
5217
                span: span(1..4),
5218
                kind: ast::ErrorKind::ClassRangeInvalid,
5219
            }
5220
        );
5221
5222
        assert_eq!(
5223
            parser_ignore_whitespace("[a ").parse().unwrap_err(),
5224
            TestError {
5225
                span: span(0..1),
5226
                kind: ast::ErrorKind::ClassUnclosed,
5227
            }
5228
        );
5229
        assert_eq!(
5230
            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5231
            TestError {
5232
                span: span(0..1),
5233
                kind: ast::ErrorKind::ClassUnclosed,
5234
            }
5235
        );
5236
    }
5237
5238
    #[test]
5239
    fn parse_set_class_open() {
5240
        assert_eq!(parser("[a]").parse_set_class_open(), {
5241
            let set = ast::ClassBracketed {
5242
                span: span(0..1),
5243
                negated: false,
5244
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5245
                    span: span(1..1),
5246
                    items: vec![],
5247
                }),
5248
            };
5249
            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5250
            Ok((set, union))
5251
        });
5252
        assert_eq!(
5253
            parser_ignore_whitespace("[   a]").parse_set_class_open(),
5254
            {
5255
                let set = ast::ClassBracketed {
5256
                    span: span(0..4),
5257
                    negated: false,
5258
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5259
                        span: span(4..4),
5260
                        items: vec![],
5261
                    }),
5262
                };
5263
                let union =
5264
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5265
                Ok((set, union))
5266
            }
5267
        );
5268
        assert_eq!(parser("[^a]").parse_set_class_open(), {
5269
            let set = ast::ClassBracketed {
5270
                span: span(0..2),
5271
                negated: true,
5272
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5273
                    span: span(2..2),
5274
                    items: vec![],
5275
                }),
5276
            };
5277
            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5278
            Ok((set, union))
5279
        });
5280
        assert_eq!(
5281
            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5282
            {
5283
                let set = ast::ClassBracketed {
5284
                    span: span(0..4),
5285
                    negated: true,
5286
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5287
                        span: span(4..4),
5288
                        items: vec![],
5289
                    }),
5290
                };
5291
                let union =
5292
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5293
                Ok((set, union))
5294
            }
5295
        );
5296
        assert_eq!(parser("[-a]").parse_set_class_open(), {
5297
            let set = ast::ClassBracketed {
5298
                span: span(0..2),
5299
                negated: false,
5300
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5301
                    span: span(1..1),
5302
                    items: vec![],
5303
                }),
5304
            };
5305
            let union = ast::ClassSetUnion {
5306
                span: span(1..2),
5307
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5308
                    span: span(1..2),
5309
                    kind: ast::LiteralKind::Verbatim,
5310
                    c: '-',
5311
                })],
5312
            };
5313
            Ok((set, union))
5314
        });
5315
        assert_eq!(
5316
            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5317
            {
5318
                let set = ast::ClassBracketed {
5319
                    span: span(0..4),
5320
                    negated: false,
5321
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5322
                        span: span(2..2),
5323
                        items: vec![],
5324
                    }),
5325
                };
5326
                let union = ast::ClassSetUnion {
5327
                    span: span(2..3),
5328
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5329
                        span: span(2..3),
5330
                        kind: ast::LiteralKind::Verbatim,
5331
                        c: '-',
5332
                    })],
5333
                };
5334
                Ok((set, union))
5335
            }
5336
        );
5337
        assert_eq!(parser("[^-a]").parse_set_class_open(), {
5338
            let set = ast::ClassBracketed {
5339
                span: span(0..3),
5340
                negated: true,
5341
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5342
                    span: span(2..2),
5343
                    items: vec![],
5344
                }),
5345
            };
5346
            let union = ast::ClassSetUnion {
5347
                span: span(2..3),
5348
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5349
                    span: span(2..3),
5350
                    kind: ast::LiteralKind::Verbatim,
5351
                    c: '-',
5352
                })],
5353
            };
5354
            Ok((set, union))
5355
        });
5356
        assert_eq!(parser("[--a]").parse_set_class_open(), {
5357
            let set = ast::ClassBracketed {
5358
                span: span(0..3),
5359
                negated: false,
5360
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5361
                    span: span(1..1),
5362
                    items: vec![],
5363
                }),
5364
            };
5365
            let union = ast::ClassSetUnion {
5366
                span: span(1..3),
5367
                items: vec![
5368
                    ast::ClassSetItem::Literal(ast::Literal {
5369
                        span: span(1..2),
5370
                        kind: ast::LiteralKind::Verbatim,
5371
                        c: '-',
5372
                    }),
5373
                    ast::ClassSetItem::Literal(ast::Literal {
5374
                        span: span(2..3),
5375
                        kind: ast::LiteralKind::Verbatim,
5376
                        c: '-',
5377
                    }),
5378
                ],
5379
            };
5380
            Ok((set, union))
5381
        });
5382
        assert_eq!(parser("[]a]").parse_set_class_open(), {
5383
            let set = ast::ClassBracketed {
5384
                span: span(0..2),
5385
                negated: false,
5386
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5387
                    span: span(1..1),
5388
                    items: vec![],
5389
                }),
5390
            };
5391
            let union = ast::ClassSetUnion {
5392
                span: span(1..2),
5393
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5394
                    span: span(1..2),
5395
                    kind: ast::LiteralKind::Verbatim,
5396
                    c: ']',
5397
                })],
5398
            };
5399
            Ok((set, union))
5400
        });
5401
        assert_eq!(
5402
            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5403
            {
5404
                let set = ast::ClassBracketed {
5405
                    span: span(0..4),
5406
                    negated: false,
5407
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5408
                        span: span(2..2),
5409
                        items: vec![],
5410
                    }),
5411
                };
5412
                let union = ast::ClassSetUnion {
5413
                    span: span(2..3),
5414
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5415
                        span: span(2..3),
5416
                        kind: ast::LiteralKind::Verbatim,
5417
                        c: ']',
5418
                    })],
5419
                };
5420
                Ok((set, union))
5421
            }
5422
        );
5423
        assert_eq!(parser("[^]a]").parse_set_class_open(), {
5424
            let set = ast::ClassBracketed {
5425
                span: span(0..3),
5426
                negated: true,
5427
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5428
                    span: span(2..2),
5429
                    items: vec![],
5430
                }),
5431
            };
5432
            let union = ast::ClassSetUnion {
5433
                span: span(2..3),
5434
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5435
                    span: span(2..3),
5436
                    kind: ast::LiteralKind::Verbatim,
5437
                    c: ']',
5438
                })],
5439
            };
5440
            Ok((set, union))
5441
        });
5442
        assert_eq!(parser("[-]a]").parse_set_class_open(), {
5443
            let set = ast::ClassBracketed {
5444
                span: span(0..2),
5445
                negated: false,
5446
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5447
                    span: span(1..1),
5448
                    items: vec![],
5449
                }),
5450
            };
5451
            let union = ast::ClassSetUnion {
5452
                span: span(1..2),
5453
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5454
                    span: span(1..2),
5455
                    kind: ast::LiteralKind::Verbatim,
5456
                    c: '-',
5457
                })],
5458
            };
5459
            Ok((set, union))
5460
        });
5461
5462
        assert_eq!(
5463
            parser("[").parse_set_class_open().unwrap_err(),
5464
            TestError {
5465
                span: span(0..1),
5466
                kind: ast::ErrorKind::ClassUnclosed,
5467
            }
5468
        );
5469
        assert_eq!(
5470
            parser_ignore_whitespace("[    ")
5471
                .parse_set_class_open()
5472
                .unwrap_err(),
5473
            TestError {
5474
                span: span(0..5),
5475
                kind: ast::ErrorKind::ClassUnclosed,
5476
            }
5477
        );
5478
        assert_eq!(
5479
            parser("[^").parse_set_class_open().unwrap_err(),
5480
            TestError {
5481
                span: span(0..2),
5482
                kind: ast::ErrorKind::ClassUnclosed,
5483
            }
5484
        );
5485
        assert_eq!(
5486
            parser("[]").parse_set_class_open().unwrap_err(),
5487
            TestError {
5488
                span: span(0..2),
5489
                kind: ast::ErrorKind::ClassUnclosed,
5490
            }
5491
        );
5492
        assert_eq!(
5493
            parser("[-").parse_set_class_open().unwrap_err(),
5494
            TestError {
5495
                span: span(0..0),
5496
                kind: ast::ErrorKind::ClassUnclosed,
5497
            }
5498
        );
5499
        assert_eq!(
5500
            parser("[--").parse_set_class_open().unwrap_err(),
5501
            TestError {
5502
                span: span(0..0),
5503
                kind: ast::ErrorKind::ClassUnclosed,
5504
            }
5505
        );
5506
5507
        // See: https://github.com/rust-lang/regex/issues/792
5508
        assert_eq!(
5509
            parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5510
            TestError {
5511
                span: span(4..4),
5512
                kind: ast::ErrorKind::ClassUnclosed,
5513
            }
5514
        );
5515
    }
5516
5517
    #[test]
5518
    fn maybe_parse_ascii_class() {
5519
        assert_eq!(
5520
            parser(r"[:alnum:]").maybe_parse_ascii_class(),
5521
            Some(ast::ClassAscii {
5522
                span: span(0..9),
5523
                kind: ast::ClassAsciiKind::Alnum,
5524
                negated: false,
5525
            })
5526
        );
5527
        assert_eq!(
5528
            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5529
            Some(ast::ClassAscii {
5530
                span: span(0..9),
5531
                kind: ast::ClassAsciiKind::Alnum,
5532
                negated: false,
5533
            })
5534
        );
5535
        assert_eq!(
5536
            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5537
            Some(ast::ClassAscii {
5538
                span: span(0..10),
5539
                kind: ast::ClassAsciiKind::Alnum,
5540
                negated: true,
5541
            })
5542
        );
5543
5544
        let p = parser(r"[:");
5545
        assert_eq!(p.maybe_parse_ascii_class(), None);
5546
        assert_eq!(p.offset(), 0);
5547
5548
        let p = parser(r"[:^");
5549
        assert_eq!(p.maybe_parse_ascii_class(), None);
5550
        assert_eq!(p.offset(), 0);
5551
5552
        let p = parser(r"[^:alnum:]");
5553
        assert_eq!(p.maybe_parse_ascii_class(), None);
5554
        assert_eq!(p.offset(), 0);
5555
5556
        let p = parser(r"[:alnnum:]");
5557
        assert_eq!(p.maybe_parse_ascii_class(), None);
5558
        assert_eq!(p.offset(), 0);
5559
5560
        let p = parser(r"[:alnum]");
5561
        assert_eq!(p.maybe_parse_ascii_class(), None);
5562
        assert_eq!(p.offset(), 0);
5563
5564
        let p = parser(r"[:alnum:");
5565
        assert_eq!(p.maybe_parse_ascii_class(), None);
5566
        assert_eq!(p.offset(), 0);
5567
    }
5568
5569
    #[test]
5570
    fn parse_unicode_class() {
5571
        assert_eq!(
5572
            parser(r"\pN").parse_escape(),
5573
            Ok(Primitive::Unicode(ast::ClassUnicode {
5574
                span: span(0..3),
5575
                negated: false,
5576
                kind: ast::ClassUnicodeKind::OneLetter('N'),
5577
            }))
5578
        );
5579
        assert_eq!(
5580
            parser(r"\PN").parse_escape(),
5581
            Ok(Primitive::Unicode(ast::ClassUnicode {
5582
                span: span(0..3),
5583
                negated: true,
5584
                kind: ast::ClassUnicodeKind::OneLetter('N'),
5585
            }))
5586
        );
5587
        assert_eq!(
5588
            parser(r"\p{N}").parse_escape(),
5589
            Ok(Primitive::Unicode(ast::ClassUnicode {
5590
                span: span(0..5),
5591
                negated: false,
5592
                kind: ast::ClassUnicodeKind::Named(s("N")),
5593
            }))
5594
        );
5595
        assert_eq!(
5596
            parser(r"\P{N}").parse_escape(),
5597
            Ok(Primitive::Unicode(ast::ClassUnicode {
5598
                span: span(0..5),
5599
                negated: true,
5600
                kind: ast::ClassUnicodeKind::Named(s("N")),
5601
            }))
5602
        );
5603
        assert_eq!(
5604
            parser(r"\p{Greek}").parse_escape(),
5605
            Ok(Primitive::Unicode(ast::ClassUnicode {
5606
                span: span(0..9),
5607
                negated: false,
5608
                kind: ast::ClassUnicodeKind::Named(s("Greek")),
5609
            }))
5610
        );
5611
5612
        assert_eq!(
5613
            parser(r"\p{scx:Katakana}").parse_escape(),
5614
            Ok(Primitive::Unicode(ast::ClassUnicode {
5615
                span: span(0..16),
5616
                negated: false,
5617
                kind: ast::ClassUnicodeKind::NamedValue {
5618
                    op: ast::ClassUnicodeOpKind::Colon,
5619
                    name: s("scx"),
5620
                    value: s("Katakana"),
5621
                },
5622
            }))
5623
        );
5624
        assert_eq!(
5625
            parser(r"\p{scx=Katakana}").parse_escape(),
5626
            Ok(Primitive::Unicode(ast::ClassUnicode {
5627
                span: span(0..16),
5628
                negated: false,
5629
                kind: ast::ClassUnicodeKind::NamedValue {
5630
                    op: ast::ClassUnicodeOpKind::Equal,
5631
                    name: s("scx"),
5632
                    value: s("Katakana"),
5633
                },
5634
            }))
5635
        );
5636
        assert_eq!(
5637
            parser(r"\p{scx!=Katakana}").parse_escape(),
5638
            Ok(Primitive::Unicode(ast::ClassUnicode {
5639
                span: span(0..17),
5640
                negated: false,
5641
                kind: ast::ClassUnicodeKind::NamedValue {
5642
                    op: ast::ClassUnicodeOpKind::NotEqual,
5643
                    name: s("scx"),
5644
                    value: s("Katakana"),
5645
                },
5646
            }))
5647
        );
5648
5649
        assert_eq!(
5650
            parser(r"\p{:}").parse_escape(),
5651
            Ok(Primitive::Unicode(ast::ClassUnicode {
5652
                span: span(0..5),
5653
                negated: false,
5654
                kind: ast::ClassUnicodeKind::NamedValue {
5655
                    op: ast::ClassUnicodeOpKind::Colon,
5656
                    name: s(""),
5657
                    value: s(""),
5658
                },
5659
            }))
5660
        );
5661
        assert_eq!(
5662
            parser(r"\p{=}").parse_escape(),
5663
            Ok(Primitive::Unicode(ast::ClassUnicode {
5664
                span: span(0..5),
5665
                negated: false,
5666
                kind: ast::ClassUnicodeKind::NamedValue {
5667
                    op: ast::ClassUnicodeOpKind::Equal,
5668
                    name: s(""),
5669
                    value: s(""),
5670
                },
5671
            }))
5672
        );
5673
        assert_eq!(
5674
            parser(r"\p{!=}").parse_escape(),
5675
            Ok(Primitive::Unicode(ast::ClassUnicode {
5676
                span: span(0..6),
5677
                negated: false,
5678
                kind: ast::ClassUnicodeKind::NamedValue {
5679
                    op: ast::ClassUnicodeOpKind::NotEqual,
5680
                    name: s(""),
5681
                    value: s(""),
5682
                },
5683
            }))
5684
        );
5685
5686
        assert_eq!(
5687
            parser(r"\p").parse_escape().unwrap_err(),
5688
            TestError {
5689
                span: span(2..2),
5690
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5691
            }
5692
        );
5693
        assert_eq!(
5694
            parser(r"\p{").parse_escape().unwrap_err(),
5695
            TestError {
5696
                span: span(3..3),
5697
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5698
            }
5699
        );
5700
        assert_eq!(
5701
            parser(r"\p{N").parse_escape().unwrap_err(),
5702
            TestError {
5703
                span: span(4..4),
5704
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5705
            }
5706
        );
5707
        assert_eq!(
5708
            parser(r"\p{Greek").parse_escape().unwrap_err(),
5709
            TestError {
5710
                span: span(8..8),
5711
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5712
            }
5713
        );
5714
5715
        assert_eq!(
5716
            parser(r"\pNz").parse(),
5717
            Ok(Ast::Concat(ast::Concat {
5718
                span: span(0..4),
5719
                asts: vec![
5720
                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5721
                        span: span(0..3),
5722
                        negated: false,
5723
                        kind: ast::ClassUnicodeKind::OneLetter('N'),
5724
                    })),
5725
                    Ast::Literal(ast::Literal {
5726
                        span: span(3..4),
5727
                        kind: ast::LiteralKind::Verbatim,
5728
                        c: 'z',
5729
                    }),
5730
                ],
5731
            }))
5732
        );
5733
        assert_eq!(
5734
            parser(r"\p{Greek}z").parse(),
5735
            Ok(Ast::Concat(ast::Concat {
5736
                span: span(0..10),
5737
                asts: vec![
5738
                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5739
                        span: span(0..9),
5740
                        negated: false,
5741
                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
5742
                    })),
5743
                    Ast::Literal(ast::Literal {
5744
                        span: span(9..10),
5745
                        kind: ast::LiteralKind::Verbatim,
5746
                        c: 'z',
5747
                    }),
5748
                ],
5749
            }))
5750
        );
5751
        assert_eq!(
5752
            parser(r"\p\{").parse().unwrap_err(),
5753
            TestError {
5754
                span: span(2..3),
5755
                kind: ast::ErrorKind::UnicodeClassInvalid,
5756
            }
5757
        );
5758
        assert_eq!(
5759
            parser(r"\P\{").parse().unwrap_err(),
5760
            TestError {
5761
                span: span(2..3),
5762
                kind: ast::ErrorKind::UnicodeClassInvalid,
5763
            }
5764
        );
5765
    }
5766
5767
    #[test]
5768
    fn parse_perl_class() {
5769
        assert_eq!(
5770
            parser(r"\d").parse_escape(),
5771
            Ok(Primitive::Perl(ast::ClassPerl {
5772
                span: span(0..2),
5773
                kind: ast::ClassPerlKind::Digit,
5774
                negated: false,
5775
            }))
5776
        );
5777
        assert_eq!(
5778
            parser(r"\D").parse_escape(),
5779
            Ok(Primitive::Perl(ast::ClassPerl {
5780
                span: span(0..2),
5781
                kind: ast::ClassPerlKind::Digit,
5782
                negated: true,
5783
            }))
5784
        );
5785
        assert_eq!(
5786
            parser(r"\s").parse_escape(),
5787
            Ok(Primitive::Perl(ast::ClassPerl {
5788
                span: span(0..2),
5789
                kind: ast::ClassPerlKind::Space,
5790
                negated: false,
5791
            }))
5792
        );
5793
        assert_eq!(
5794
            parser(r"\S").parse_escape(),
5795
            Ok(Primitive::Perl(ast::ClassPerl {
5796
                span: span(0..2),
5797
                kind: ast::ClassPerlKind::Space,
5798
                negated: true,
5799
            }))
5800
        );
5801
        assert_eq!(
5802
            parser(r"\w").parse_escape(),
5803
            Ok(Primitive::Perl(ast::ClassPerl {
5804
                span: span(0..2),
5805
                kind: ast::ClassPerlKind::Word,
5806
                negated: false,
5807
            }))
5808
        );
5809
        assert_eq!(
5810
            parser(r"\W").parse_escape(),
5811
            Ok(Primitive::Perl(ast::ClassPerl {
5812
                span: span(0..2),
5813
                kind: ast::ClassPerlKind::Word,
5814
                negated: true,
5815
            }))
5816
        );
5817
5818
        assert_eq!(
5819
            parser(r"\d").parse(),
5820
            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5821
                span: span(0..2),
5822
                kind: ast::ClassPerlKind::Digit,
5823
                negated: false,
5824
            })))
5825
        );
5826
        assert_eq!(
5827
            parser(r"\dz").parse(),
5828
            Ok(Ast::Concat(ast::Concat {
5829
                span: span(0..3),
5830
                asts: vec![
5831
                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
5832
                        span: span(0..2),
5833
                        kind: ast::ClassPerlKind::Digit,
5834
                        negated: false,
5835
                    })),
5836
                    Ast::Literal(ast::Literal {
5837
                        span: span(2..3),
5838
                        kind: ast::LiteralKind::Verbatim,
5839
                        c: 'z',
5840
                    }),
5841
                ],
5842
            }))
5843
        );
5844
    }
5845
5846
    // This tests a bug fix where the nest limit checker wasn't decrementing
5847
    // its depth during post-traversal, which causes long regexes to trip
5848
    // the default limit too aggressively.
5849
    #[test]
5850
    fn regression_454_nest_too_big() {
5851
        let pattern = r#"
5852
        2(?:
5853
          [45]\d{3}|
5854
          7(?:
5855
            1[0-267]|
5856
            2[0-289]|
5857
            3[0-29]|
5858
            4[01]|
5859
            5[1-3]|
5860
            6[013]|
5861
            7[0178]|
5862
            91
5863
          )|
5864
          8(?:
5865
            0[125]|
5866
            [139][1-6]|
5867
            2[0157-9]|
5868
            41|
5869
            6[1-35]|
5870
            7[1-5]|
5871
            8[1-8]|
5872
            90
5873
          )|
5874
          9(?:
5875
            0[0-2]|
5876
            1[0-4]|
5877
            2[568]|
5878
            3[3-6]|
5879
            5[5-7]|
5880
            6[0167]|
5881
            7[15]|
5882
            8[0146-9]
5883
          )
5884
        )\d{4}
5885
        "#;
5886
        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5887
    }
5888
5889
    // This tests that we treat a trailing `-` in a character class as a
5890
    // literal `-` even when whitespace mode is enabled and there is whitespace
5891
    // after the trailing `-`.
5892
    #[test]
5893
    fn regression_455_trailing_dash_ignore_whitespace() {
5894
        assert!(parser("(?x)[ / - ]").parse().is_ok());
5895
        assert!(parser("(?x)[ a - ]").parse().is_ok());
5896
        assert!(parser(
5897
            "(?x)[
5898
            a
5899
            - ]
5900
        "
5901
        )
5902
        .parse()
5903
        .is_ok());
5904
        assert!(parser(
5905
            "(?x)[
5906
            a # wat
5907
            - ]
5908
        "
5909
        )
5910
        .parse()
5911
        .is_ok());
5912
5913
        assert!(parser("(?x)[ / -").parse().is_err());
5914
        assert!(parser("(?x)[ / - ").parse().is_err());
5915
        assert!(parser(
5916
            "(?x)[
5917
            / -
5918
        "
5919
        )
5920
        .parse()
5921
        .is_err());
5922
        assert!(parser(
5923
            "(?x)[
5924
            / - # wat
5925
        "
5926
        )
5927
        .parse()
5928
        .is_err());
5929
    }
5930
}