Coverage Report

Created: 2025-07-11 07:02

/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-syntax-0.8.5/src/ast/parse.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
This module provides a regular expression parser.
3
*/
4
5
use core::{
6
    borrow::Borrow,
7
    cell::{Cell, RefCell},
8
    mem,
9
};
10
11
use alloc::{
12
    boxed::Box,
13
    string::{String, ToString},
14
    vec,
15
    vec::Vec,
16
};
17
18
use crate::{
19
    ast::{self, Ast, Position, Span},
20
    either::Either,
21
    is_escapeable_character, is_meta_character,
22
};
23
24
type Result<T> = core::result::Result<T, ast::Error>;
25
26
/// A primitive is an expression with no sub-expressions. This includes
27
/// literals, assertions and non-set character classes. This representation
28
/// is used as intermediate state in the parser.
29
///
30
/// This does not include ASCII character classes, since they can only appear
31
/// within a set character class.
32
#[derive(Clone, Debug, Eq, PartialEq)]
33
enum Primitive {
34
    Literal(ast::Literal),
35
    Assertion(ast::Assertion),
36
    Dot(Span),
37
    Perl(ast::ClassPerl),
38
    Unicode(ast::ClassUnicode),
39
}
40
41
impl Primitive {
42
    /// Return the span of this primitive.
43
0
    fn span(&self) -> &Span {
44
0
        match *self {
45
0
            Primitive::Literal(ref x) => &x.span,
46
0
            Primitive::Assertion(ref x) => &x.span,
47
0
            Primitive::Dot(ref span) => span,
48
0
            Primitive::Perl(ref x) => &x.span,
49
0
            Primitive::Unicode(ref x) => &x.span,
50
        }
51
0
    }
52
53
    /// Convert this primitive into a proper AST.
54
0
    fn into_ast(self) -> Ast {
55
0
        match self {
56
0
            Primitive::Literal(lit) => Ast::literal(lit),
57
0
            Primitive::Assertion(assert) => Ast::assertion(assert),
58
0
            Primitive::Dot(span) => Ast::dot(span),
59
0
            Primitive::Perl(cls) => Ast::class_perl(cls),
60
0
            Primitive::Unicode(cls) => Ast::class_unicode(cls),
61
        }
62
0
    }
63
64
    /// Convert this primitive into an item in a character class.
65
    ///
66
    /// If this primitive is not a legal item (i.e., an assertion or a dot),
67
    /// then return an error.
68
0
    fn into_class_set_item<P: Borrow<Parser>>(
69
0
        self,
70
0
        p: &ParserI<'_, P>,
71
0
    ) -> Result<ast::ClassSetItem> {
72
        use self::Primitive::*;
73
        use crate::ast::ClassSetItem;
74
75
0
        match self {
76
0
            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
77
0
            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
78
0
            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
79
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
80
        }
81
0
    }
82
83
    /// Convert this primitive into a literal in a character class. In
84
    /// particular, literals are the only valid items that can appear in
85
    /// ranges.
86
    ///
87
    /// If this primitive is not a legal item (i.e., a class, assertion or a
88
    /// dot), then return an error.
89
0
    fn into_class_literal<P: Borrow<Parser>>(
90
0
        self,
91
0
        p: &ParserI<'_, P>,
92
0
    ) -> Result<ast::Literal> {
93
        use self::Primitive::*;
94
95
0
        match self {
96
0
            Literal(lit) => Ok(lit),
97
0
            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
98
        }
99
0
    }
100
}
101
102
/// Returns true if the given character is a hexadecimal digit.
103
0
fn is_hex(c: char) -> bool {
104
0
    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
105
0
}
106
107
/// Returns true if the given character is a valid in a capture group name.
108
///
109
/// If `first` is true, then `c` is treated as the first character in the
110
/// group name (which must be alphabetic or underscore).
111
0
fn is_capture_char(c: char, first: bool) -> bool {
112
0
    if first {
113
0
        c == '_' || c.is_alphabetic()
114
    } else {
115
0
        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
116
    }
117
0
}
118
119
/// A builder for a regular expression parser.
120
///
121
/// This builder permits modifying configuration options for the parser.
122
#[derive(Clone, Debug)]
123
pub struct ParserBuilder {
124
    ignore_whitespace: bool,
125
    nest_limit: u32,
126
    octal: bool,
127
    empty_min_range: bool,
128
}
129
130
impl Default for ParserBuilder {
131
0
    fn default() -> ParserBuilder {
132
0
        ParserBuilder::new()
133
0
    }
134
}
135
136
impl ParserBuilder {
137
    /// Create a new parser builder with a default configuration.
138
0
    pub fn new() -> ParserBuilder {
139
0
        ParserBuilder {
140
0
            ignore_whitespace: false,
141
0
            nest_limit: 250,
142
0
            octal: false,
143
0
            empty_min_range: false,
144
0
        }
145
0
    }
146
147
    /// Build a parser from this configuration with the given pattern.
148
0
    pub fn build(&self) -> Parser {
149
0
        Parser {
150
0
            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
151
0
            capture_index: Cell::new(0),
152
0
            nest_limit: self.nest_limit,
153
0
            octal: self.octal,
154
0
            empty_min_range: self.empty_min_range,
155
0
            initial_ignore_whitespace: self.ignore_whitespace,
156
0
            ignore_whitespace: Cell::new(self.ignore_whitespace),
157
0
            comments: RefCell::new(vec![]),
158
0
            stack_group: RefCell::new(vec![]),
159
0
            stack_class: RefCell::new(vec![]),
160
0
            capture_names: RefCell::new(vec![]),
161
0
            scratch: RefCell::new(String::new()),
162
0
        }
163
0
    }
164
165
    /// Set the nesting limit for this parser.
166
    ///
167
    /// The nesting limit controls how deep the abstract syntax tree is allowed
168
    /// to be. If the AST exceeds the given limit (e.g., with too many nested
169
    /// groups), then an error is returned by the parser.
170
    ///
171
    /// The purpose of this limit is to act as a heuristic to prevent stack
172
    /// overflow for consumers that do structural induction on an `Ast` using
173
    /// explicit recursion. While this crate never does this (instead using
174
    /// constant stack space and moving the call stack to the heap), other
175
    /// crates may.
176
    ///
177
    /// This limit is not checked until the entire AST is parsed. Therefore,
178
    /// if callers want to put a limit on the amount of heap space used, then
179
    /// they should impose a limit on the length, in bytes, of the concrete
180
    /// pattern string. In particular, this is viable since this parser
181
    /// implementation will limit itself to heap space proportional to the
182
    /// length of the pattern string.
183
    ///
184
    /// Note that a nest limit of `0` will return a nest limit error for most
185
    /// patterns but not all. For example, a nest limit of `0` permits `a` but
186
    /// not `ab`, since `ab` requires a concatenation, which results in a nest
187
    /// depth of `1`. In general, a nest limit is not something that manifests
188
    /// in an obvious way in the concrete syntax, therefore, it should not be
189
    /// used in a granular way.
190
0
    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
191
0
        self.nest_limit = limit;
192
0
        self
193
0
    }
194
195
    /// Whether to support octal syntax or not.
196
    ///
197
    /// Octal syntax is a little-known way of uttering Unicode codepoints in
198
    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
199
    /// `\141` are all equivalent regular expressions, where the last example
200
    /// shows octal syntax.
201
    ///
202
    /// While supporting octal syntax isn't in and of itself a problem, it does
203
    /// make good error messages harder. That is, in PCRE based regex engines,
204
    /// syntax like `\0` invokes a backreference, which is explicitly
205
    /// unsupported in Rust's regex engine. However, many users expect it to
206
    /// be supported. Therefore, when octal support is disabled, the error
207
    /// message will explicitly mention that backreferences aren't supported.
208
    ///
209
    /// Octal syntax is disabled by default.
210
0
    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
211
0
        self.octal = yes;
212
0
        self
213
0
    }
214
215
    /// Enable verbose mode in the regular expression.
216
    ///
217
    /// When enabled, verbose mode permits insignificant whitespace in many
218
    /// places in the regular expression, as well as comments. Comments are
219
    /// started using `#` and continue until the end of the line.
220
    ///
221
    /// By default, this is disabled. It may be selectively enabled in the
222
    /// regular expression by using the `x` flag regardless of this setting.
223
0
    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
224
0
        self.ignore_whitespace = yes;
225
0
        self
226
0
    }
227
228
    /// Allow using `{,n}` as an equivalent to `{0,n}`.
229
    ///
230
    /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
231
    /// Most regular expression engines don't support the `{,n}` syntax, but
232
    /// some others do it, namely Python's `re` library.
233
    ///
234
    /// This is disabled by default.
235
0
    pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
236
0
        self.empty_min_range = yes;
237
0
        self
238
0
    }
239
}
240
241
/// A regular expression parser.
242
///
243
/// This parses a string representation of a regular expression into an
244
/// abstract syntax tree. The size of the tree is proportional to the length
245
/// of the regular expression pattern.
246
///
247
/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
248
#[derive(Clone, Debug)]
249
pub struct Parser {
250
    /// The current position of the parser.
251
    pos: Cell<Position>,
252
    /// The current capture index.
253
    capture_index: Cell<u32>,
254
    /// The maximum number of open parens/brackets allowed. If the parser
255
    /// exceeds this number, then an error is returned.
256
    nest_limit: u32,
257
    /// Whether to support octal syntax or not. When `false`, the parser will
258
    /// return an error helpfully pointing out that backreferences are not
259
    /// supported.
260
    octal: bool,
261
    /// The initial setting for `ignore_whitespace` as provided by
262
    /// `ParserBuilder`. It is used when resetting the parser's state.
263
    initial_ignore_whitespace: bool,
264
    /// Whether the parser supports `{,n}` repetitions as an equivalent to
265
    /// `{0,n}.`
266
    empty_min_range: bool,
267
    /// Whether whitespace should be ignored. When enabled, comments are
268
    /// also permitted.
269
    ignore_whitespace: Cell<bool>,
270
    /// A list of comments, in order of appearance.
271
    comments: RefCell<Vec<ast::Comment>>,
272
    /// A stack of grouped sub-expressions, including alternations.
273
    stack_group: RefCell<Vec<GroupState>>,
274
    /// A stack of nested character classes. This is only non-empty when
275
    /// parsing a class.
276
    stack_class: RefCell<Vec<ClassState>>,
277
    /// A sorted sequence of capture names. This is used to detect duplicate
278
    /// capture names and report an error if one is detected.
279
    capture_names: RefCell<Vec<ast::CaptureName>>,
280
    /// A scratch buffer used in various places. Mostly this is used to
281
    /// accumulate relevant characters from parts of a pattern.
282
    scratch: RefCell<String>,
283
}
284
285
/// ParserI is the internal parser implementation.
286
///
287
/// We use this separate type so that we can carry the provided pattern string
288
/// along with us. In particular, a `Parser` internal state is not tied to any
289
/// one pattern, but `ParserI` is.
290
///
291
/// This type also lets us use `ParserI<&Parser>` in production code while
292
/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
293
/// work against the internal interface of the parser.
294
#[derive(Clone, Debug)]
295
struct ParserI<'s, P> {
296
    /// The parser state/configuration.
297
    parser: P,
298
    /// The full regular expression provided by the user.
299
    pattern: &'s str,
300
}
301
302
/// GroupState represents a single stack frame while parsing nested groups
303
/// and alternations. Each frame records the state up to an opening parenthesis
304
/// or a alternating bracket `|`.
305
#[derive(Clone, Debug)]
306
enum GroupState {
307
    /// This state is pushed whenever an opening group is found.
308
    Group {
309
        /// The concatenation immediately preceding the opening group.
310
        concat: ast::Concat,
311
        /// The group that has been opened. Its sub-AST is always empty.
312
        group: ast::Group,
313
        /// Whether this group has the `x` flag enabled or not.
314
        ignore_whitespace: bool,
315
    },
316
    /// This state is pushed whenever a new alternation branch is found. If
317
    /// an alternation branch is found and this state is at the top of the
318
    /// stack, then this state should be modified to include the new
319
    /// alternation.
320
    Alternation(ast::Alternation),
321
}
322
323
/// ClassState represents a single stack frame while parsing character classes.
324
/// Each frame records the state up to an intersection, difference, symmetric
325
/// difference or nested class.
326
///
327
/// Note that a parser's character class stack is only non-empty when parsing
328
/// a character class. In all other cases, it is empty.
329
#[derive(Clone, Debug)]
330
enum ClassState {
331
    /// This state is pushed whenever an opening bracket is found.
332
    Open {
333
        /// The union of class items immediately preceding this class.
334
        union: ast::ClassSetUnion,
335
        /// The class that has been opened. Typically this just corresponds
336
        /// to the `[`, but it can also include `[^` since `^` indicates
337
        /// negation of the class.
338
        set: ast::ClassBracketed,
339
    },
340
    /// This state is pushed when a operator is seen. When popped, the stored
341
    /// set becomes the left hand side of the operator.
342
    Op {
343
        /// The type of the operation, i.e., &&, -- or ~~.
344
        kind: ast::ClassSetBinaryOpKind,
345
        /// The left-hand side of the operator.
346
        lhs: ast::ClassSet,
347
    },
348
}
349
350
impl Parser {
351
    /// Create a new parser with a default configuration.
352
    ///
353
    /// The parser can be run with either the `parse` or `parse_with_comments`
354
    /// methods. The parse methods return an abstract syntax tree.
355
    ///
356
    /// To set configuration options on the parser, use [`ParserBuilder`].
357
0
    pub fn new() -> Parser {
358
0
        ParserBuilder::new().build()
359
0
    }
360
361
    /// Parse the regular expression into an abstract syntax tree.
362
0
    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
363
0
        ParserI::new(self, pattern).parse()
364
0
    }
365
366
    /// Parse the regular expression and return an abstract syntax tree with
367
    /// all of the comments found in the pattern.
368
0
    pub fn parse_with_comments(
369
0
        &mut self,
370
0
        pattern: &str,
371
0
    ) -> Result<ast::WithComments> {
372
0
        ParserI::new(self, pattern).parse_with_comments()
373
0
    }
374
375
    /// Reset the internal state of a parser.
376
    ///
377
    /// This is called at the beginning of every parse. This prevents the
378
    /// parser from running with inconsistent state (say, if a previous
379
    /// invocation returned an error and the parser is reused).
380
0
    fn reset(&self) {
381
0
        // These settings should be in line with the construction
382
0
        // in `ParserBuilder::build`.
383
0
        self.pos.set(Position { offset: 0, line: 1, column: 1 });
384
0
        self.ignore_whitespace.set(self.initial_ignore_whitespace);
385
0
        self.comments.borrow_mut().clear();
386
0
        self.stack_group.borrow_mut().clear();
387
0
        self.stack_class.borrow_mut().clear();
388
0
    }
389
}
390
391
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
392
    /// Build an internal parser from a parser configuration and a pattern.
393
0
    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
394
0
        ParserI { parser, pattern }
395
0
    }
396
397
    /// Return a reference to the parser state.
398
0
    fn parser(&self) -> &Parser {
399
0
        self.parser.borrow()
400
0
    }
401
402
    /// Return a reference to the pattern being parsed.
403
0
    fn pattern(&self) -> &str {
404
0
        self.pattern
405
0
    }
406
407
    /// Create a new error with the given span and error type.
408
0
    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
409
0
        ast::Error { kind, pattern: self.pattern().to_string(), span }
410
0
    }
411
412
    /// Return the current offset of the parser.
413
    ///
414
    /// The offset starts at `0` from the beginning of the regular expression
415
    /// pattern string.
416
0
    fn offset(&self) -> usize {
417
0
        self.parser().pos.get().offset
418
0
    }
419
420
    /// Return the current line number of the parser.
421
    ///
422
    /// The line number starts at `1`.
423
0
    fn line(&self) -> usize {
424
0
        self.parser().pos.get().line
425
0
    }
426
427
    /// Return the current column of the parser.
428
    ///
429
    /// The column number starts at `1` and is reset whenever a `\n` is seen.
430
0
    fn column(&self) -> usize {
431
0
        self.parser().pos.get().column
432
0
    }
433
434
    /// Return the next capturing index. Each subsequent call increments the
435
    /// internal index.
436
    ///
437
    /// The span given should correspond to the location of the opening
438
    /// parenthesis.
439
    ///
440
    /// If the capture limit is exceeded, then an error is returned.
441
0
    fn next_capture_index(&self, span: Span) -> Result<u32> {
442
0
        let current = self.parser().capture_index.get();
443
0
        let i = current.checked_add(1).ok_or_else(|| {
444
0
            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
445
0
        })?;
446
0
        self.parser().capture_index.set(i);
447
0
        Ok(i)
448
0
    }
449
450
    /// Adds the given capture name to this parser. If this capture name has
451
    /// already been used, then an error is returned.
452
0
    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
453
0
        let mut names = self.parser().capture_names.borrow_mut();
454
0
        match names
455
0
            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
456
        {
457
0
            Err(i) => {
458
0
                names.insert(i, cap.clone());
459
0
                Ok(())
460
            }
461
0
            Ok(i) => Err(self.error(
462
0
                cap.span,
463
0
                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
464
0
            )),
465
        }
466
0
    }
467
468
    /// Return whether the parser should ignore whitespace or not.
469
0
    fn ignore_whitespace(&self) -> bool {
470
0
        self.parser().ignore_whitespace.get()
471
0
    }
472
473
    /// Return the character at the current position of the parser.
474
    ///
475
    /// This panics if the current position does not point to a valid char.
476
0
    fn char(&self) -> char {
477
0
        self.char_at(self.offset())
478
0
    }
479
480
    /// Return the character at the given position.
481
    ///
482
    /// This panics if the given position does not point to a valid char.
483
0
    fn char_at(&self, i: usize) -> char {
484
0
        self.pattern()[i..]
485
0
            .chars()
486
0
            .next()
487
0
            .unwrap_or_else(|| panic!("expected char at offset {}", i))
488
0
    }
489
490
    /// Bump the parser to the next Unicode scalar value.
491
    ///
492
    /// If the end of the input has been reached, then `false` is returned.
493
0
    fn bump(&self) -> bool {
494
0
        if self.is_eof() {
495
0
            return false;
496
0
        }
497
0
        let Position { mut offset, mut line, mut column } = self.pos();
498
0
        if self.char() == '\n' {
499
0
            line = line.checked_add(1).unwrap();
500
0
            column = 1;
501
0
        } else {
502
0
            column = column.checked_add(1).unwrap();
503
0
        }
504
0
        offset += self.char().len_utf8();
505
0
        self.parser().pos.set(Position { offset, line, column });
506
0
        self.pattern()[self.offset()..].chars().next().is_some()
507
0
    }
508
509
    /// If the substring starting at the current position of the parser has
510
    /// the given prefix, then bump the parser to the character immediately
511
    /// following the prefix and return true. Otherwise, don't bump the parser
512
    /// and return false.
513
0
    fn bump_if(&self, prefix: &str) -> bool {
514
0
        if self.pattern()[self.offset()..].starts_with(prefix) {
515
0
            for _ in 0..prefix.chars().count() {
516
0
                self.bump();
517
0
            }
518
0
            true
519
        } else {
520
0
            false
521
        }
522
0
    }
523
524
    /// Returns true if and only if the parser is positioned at a look-around
525
    /// prefix. The conditions under which this returns true must always
526
    /// correspond to a regular expression that would otherwise be consider
527
    /// invalid.
528
    ///
529
    /// This should only be called immediately after parsing the opening of
530
    /// a group or a set of flags.
531
0
    fn is_lookaround_prefix(&self) -> bool {
532
0
        self.bump_if("?=")
533
0
            || self.bump_if("?!")
534
0
            || self.bump_if("?<=")
535
0
            || self.bump_if("?<!")
536
0
    }
537
538
    /// Bump the parser, and if the `x` flag is enabled, bump through any
539
    /// subsequent spaces. Return true if and only if the parser is not at
540
    /// EOF.
541
0
    fn bump_and_bump_space(&self) -> bool {
542
0
        if !self.bump() {
543
0
            return false;
544
0
        }
545
0
        self.bump_space();
546
0
        !self.is_eof()
547
0
    }
548
549
    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
550
    /// comments), then this will advance the parser through all whitespace
551
    /// and comments to the next non-whitespace non-comment byte.
552
    ///
553
    /// If the `x` flag is disabled, then this is a no-op.
554
    ///
555
    /// This should be used selectively throughout the parser where
556
    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
557
    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
558
0
    fn bump_space(&self) {
559
0
        if !self.ignore_whitespace() {
560
0
            return;
561
0
        }
562
0
        while !self.is_eof() {
563
0
            if self.char().is_whitespace() {
564
0
                self.bump();
565
0
            } else if self.char() == '#' {
566
0
                let start = self.pos();
567
0
                let mut comment_text = String::new();
568
0
                self.bump();
569
0
                while !self.is_eof() {
570
0
                    let c = self.char();
571
0
                    self.bump();
572
0
                    if c == '\n' {
573
0
                        break;
574
0
                    }
575
0
                    comment_text.push(c);
576
                }
577
0
                let comment = ast::Comment {
578
0
                    span: Span::new(start, self.pos()),
579
0
                    comment: comment_text,
580
0
                };
581
0
                self.parser().comments.borrow_mut().push(comment);
582
            } else {
583
0
                break;
584
            }
585
        }
586
0
    }
587
588
    /// Peek at the next character in the input without advancing the parser.
589
    ///
590
    /// If the input has been exhausted, then this returns `None`.
591
0
    fn peek(&self) -> Option<char> {
592
0
        if self.is_eof() {
593
0
            return None;
594
0
        }
595
0
        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
596
0
    }
597
598
    /// Like peek, but will ignore spaces when the parser is in whitespace
599
    /// insensitive mode.
600
0
    fn peek_space(&self) -> Option<char> {
601
0
        if !self.ignore_whitespace() {
602
0
            return self.peek();
603
0
        }
604
0
        if self.is_eof() {
605
0
            return None;
606
0
        }
607
0
        let mut start = self.offset() + self.char().len_utf8();
608
0
        let mut in_comment = false;
609
0
        for (i, c) in self.pattern()[start..].char_indices() {
610
0
            if c.is_whitespace() {
611
0
                continue;
612
0
            } else if !in_comment && c == '#' {
613
0
                in_comment = true;
614
0
            } else if in_comment && c == '\n' {
615
0
                in_comment = false;
616
0
            } else {
617
0
                start += i;
618
0
                break;
619
            }
620
        }
621
0
        self.pattern()[start..].chars().next()
622
0
    }
623
624
    /// Returns true if the next call to `bump` would return false.
625
0
    fn is_eof(&self) -> bool {
626
0
        self.offset() == self.pattern().len()
627
0
    }
628
629
    /// Return the current position of the parser, which includes the offset,
630
    /// line and column.
631
0
    fn pos(&self) -> Position {
632
0
        self.parser().pos.get()
633
0
    }
634
635
    /// Create a span at the current position of the parser. Both the start
636
    /// and end of the span are set.
637
0
    fn span(&self) -> Span {
638
0
        Span::splat(self.pos())
639
0
    }
640
641
    /// Create a span that covers the current character.
642
0
    fn span_char(&self) -> Span {
643
0
        let mut next = Position {
644
0
            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
645
0
            line: self.line(),
646
0
            column: self.column().checked_add(1).unwrap(),
647
0
        };
648
0
        if self.char() == '\n' {
649
0
            next.line += 1;
650
0
            next.column = 1;
651
0
        }
652
0
        Span::new(self.pos(), next)
653
0
    }
654
655
    /// Parse and push a single alternation on to the parser's internal stack.
656
    /// If the top of the stack already has an alternation, then add to that
657
    /// instead of pushing a new one.
658
    ///
659
    /// The concatenation given corresponds to a single alternation branch.
660
    /// The concatenation returned starts the next branch and is empty.
661
    ///
662
    /// This assumes the parser is currently positioned at `|` and will advance
663
    /// the parser to the character following `|`.
664
    #[inline(never)]
665
0
    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
666
0
        assert_eq!(self.char(), '|');
667
0
        concat.span.end = self.pos();
668
0
        self.push_or_add_alternation(concat);
669
0
        self.bump();
670
0
        Ok(ast::Concat { span: self.span(), asts: vec![] })
671
0
    }
672
673
    /// Pushes or adds the given branch of an alternation to the parser's
674
    /// internal stack of state.
675
0
    fn push_or_add_alternation(&self, concat: ast::Concat) {
676
        use self::GroupState::*;
677
678
0
        let mut stack = self.parser().stack_group.borrow_mut();
679
0
        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
680
0
            alts.asts.push(concat.into_ast());
681
0
            return;
682
0
        }
683
0
        stack.push(Alternation(ast::Alternation {
684
0
            span: Span::new(concat.span.start, self.pos()),
685
0
            asts: vec![concat.into_ast()],
686
0
        }));
687
0
    }
688
689
    /// Parse and push a group AST (and its parent concatenation) on to the
690
    /// parser's internal stack. Return a fresh concatenation corresponding
691
    /// to the group's sub-AST.
692
    ///
693
    /// If a set of flags was found (with no group), then the concatenation
694
    /// is returned with that set of flags added.
695
    ///
696
    /// This assumes that the parser is currently positioned on the opening
697
    /// parenthesis. It advances the parser to the character at the start
698
    /// of the sub-expression (or adjoining expression).
699
    ///
700
    /// If there was a problem parsing the start of the group, then an error
701
    /// is returned.
702
    #[inline(never)]
703
0
    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
704
0
        assert_eq!(self.char(), '(');
705
0
        match self.parse_group()? {
706
0
            Either::Left(set) => {
707
0
                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
708
0
                if let Some(v) = ignore {
709
0
                    self.parser().ignore_whitespace.set(v);
710
0
                }
711
712
0
                concat.asts.push(Ast::flags(set));
713
0
                Ok(concat)
714
            }
715
0
            Either::Right(group) => {
716
0
                let old_ignore_whitespace = self.ignore_whitespace();
717
0
                let new_ignore_whitespace = group
718
0
                    .flags()
719
0
                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
720
0
                    .unwrap_or(old_ignore_whitespace);
721
0
                self.parser().stack_group.borrow_mut().push(
722
0
                    GroupState::Group {
723
0
                        concat,
724
0
                        group,
725
0
                        ignore_whitespace: old_ignore_whitespace,
726
0
                    },
727
0
                );
728
0
                self.parser().ignore_whitespace.set(new_ignore_whitespace);
729
0
                Ok(ast::Concat { span: self.span(), asts: vec![] })
730
            }
731
        }
732
0
    }
733
734
    /// Pop a group AST from the parser's internal stack and set the group's
735
    /// AST to the given concatenation. Return the concatenation containing
736
    /// the group.
737
    ///
738
    /// This assumes that the parser is currently positioned on the closing
739
    /// parenthesis and advances the parser to the character following the `)`.
740
    ///
741
    /// If no such group could be popped, then an unopened group error is
742
    /// returned.
743
    #[inline(never)]
744
0
    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
745
        use self::GroupState::*;
746
747
0
        assert_eq!(self.char(), ')');
748
0
        let mut stack = self.parser().stack_group.borrow_mut();
749
0
        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
750
0
            .pop()
751
        {
752
0
            Some(Group { concat, group, ignore_whitespace }) => {
753
0
                (concat, group, ignore_whitespace, None)
754
            }
755
0
            Some(Alternation(alt)) => match stack.pop() {
756
0
                Some(Group { concat, group, ignore_whitespace }) => {
757
0
                    (concat, group, ignore_whitespace, Some(alt))
758
                }
759
                None | Some(Alternation(_)) => {
760
0
                    return Err(self.error(
761
0
                        self.span_char(),
762
0
                        ast::ErrorKind::GroupUnopened,
763
0
                    ));
764
                }
765
            },
766
            None => {
767
0
                return Err(self
768
0
                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
769
            }
770
        };
771
0
        self.parser().ignore_whitespace.set(ignore_whitespace);
772
0
        group_concat.span.end = self.pos();
773
0
        self.bump();
774
0
        group.span.end = self.pos();
775
0
        match alt {
776
0
            Some(mut alt) => {
777
0
                alt.span.end = group_concat.span.end;
778
0
                alt.asts.push(group_concat.into_ast());
779
0
                group.ast = Box::new(alt.into_ast());
780
0
            }
781
0
            None => {
782
0
                group.ast = Box::new(group_concat.into_ast());
783
0
            }
784
        }
785
0
        prior_concat.asts.push(Ast::group(group));
786
0
        Ok(prior_concat)
787
0
    }
788
789
    /// Pop the last state from the parser's internal stack, if it exists, and
790
    /// add the given concatenation to it. There either must be no state or a
791
    /// single alternation item on the stack. Any other scenario produces an
792
    /// error.
793
    ///
794
    /// This assumes that the parser has advanced to the end.
795
    #[inline(never)]
796
0
    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
797
0
        concat.span.end = self.pos();
798
0
        let mut stack = self.parser().stack_group.borrow_mut();
799
0
        let ast = match stack.pop() {
800
0
            None => Ok(concat.into_ast()),
801
0
            Some(GroupState::Alternation(mut alt)) => {
802
0
                alt.span.end = self.pos();
803
0
                alt.asts.push(concat.into_ast());
804
0
                Ok(Ast::alternation(alt))
805
            }
806
0
            Some(GroupState::Group { group, .. }) => {
807
0
                return Err(
808
0
                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
809
0
                );
810
            }
811
        };
812
        // If we try to pop again, there should be nothing.
813
0
        match stack.pop() {
814
0
            None => ast,
815
            Some(GroupState::Alternation(_)) => {
816
                // This unreachable is unfortunate. This case can't happen
817
                // because the only way we can be here is if there were two
818
                // `GroupState::Alternation`s adjacent in the parser's stack,
819
                // which we guarantee to never happen because we never push a
820
                // `GroupState::Alternation` if one is already at the top of
821
                // the stack.
822
0
                unreachable!()
823
            }
824
0
            Some(GroupState::Group { group, .. }) => {
825
0
                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
826
            }
827
        }
828
0
    }
829
830
    /// Parse the opening of a character class and push the current class
831
    /// parsing context onto the parser's stack. This assumes that the parser
832
    /// is positioned at an opening `[`. The given union should correspond to
833
    /// the union of set items built up before seeing the `[`.
834
    ///
835
    /// If there was a problem parsing the opening of the class, then an error
836
    /// is returned. Otherwise, a new union of set items for the class is
837
    /// returned (which may be populated with either a `]` or a `-`).
838
    #[inline(never)]
839
0
    fn push_class_open(
840
0
        &self,
841
0
        parent_union: ast::ClassSetUnion,
842
0
    ) -> Result<ast::ClassSetUnion> {
843
0
        assert_eq!(self.char(), '[');
844
845
0
        let (nested_set, nested_union) = self.parse_set_class_open()?;
846
0
        self.parser()
847
0
            .stack_class
848
0
            .borrow_mut()
849
0
            .push(ClassState::Open { union: parent_union, set: nested_set });
850
0
        Ok(nested_union)
851
0
    }
852
853
    /// Parse the end of a character class set and pop the character class
854
    /// parser stack. The union given corresponds to the last union built
855
    /// before seeing the closing `]`. The union returned corresponds to the
856
    /// parent character class set with the nested class added to it.
857
    ///
858
    /// This assumes that the parser is positioned at a `]` and will advance
859
    /// the parser to the byte immediately following the `]`.
860
    ///
861
    /// If the stack is empty after popping, then this returns the final
862
    /// "top-level" character class AST (where a "top-level" character class
863
    /// is one that is not nested inside any other character class).
864
    ///
865
    /// If there is no corresponding opening bracket on the parser's stack,
866
    /// then an error is returned.
867
    #[inline(never)]
868
0
    fn pop_class(
869
0
        &self,
870
0
        nested_union: ast::ClassSetUnion,
871
0
    ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
872
0
        assert_eq!(self.char(), ']');
873
874
0
        let item = ast::ClassSet::Item(nested_union.into_item());
875
0
        let prevset = self.pop_class_op(item);
876
0
        let mut stack = self.parser().stack_class.borrow_mut();
877
0
        match stack.pop() {
878
            None => {
879
                // We can never observe an empty stack:
880
                //
881
                // 1) We are guaranteed to start with a non-empty stack since
882
                //    the character class parser is only initiated when it sees
883
                //    a `[`.
884
                // 2) If we ever observe an empty stack while popping after
885
                //    seeing a `]`, then we signal the character class parser
886
                //    to terminate.
887
0
                panic!("unexpected empty character class stack")
888
            }
889
            Some(ClassState::Op { .. }) => {
890
                // This panic is unfortunate, but this case is impossible
891
                // since we already popped the Op state if one exists above.
892
                // Namely, every push to the class parser stack is guarded by
893
                // whether an existing Op is already on the top of the stack.
894
                // If it is, the existing Op is modified. That is, the stack
895
                // can never have consecutive Op states.
896
0
                panic!("unexpected ClassState::Op")
897
            }
898
0
            Some(ClassState::Open { mut union, mut set }) => {
899
0
                self.bump();
900
0
                set.span.end = self.pos();
901
0
                set.kind = prevset;
902
0
                if stack.is_empty() {
903
0
                    Ok(Either::Right(set))
904
                } else {
905
0
                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
906
0
                    Ok(Either::Left(union))
907
                }
908
            }
909
        }
910
0
    }
911
912
    /// Return an "unclosed class" error whose span points to the most
913
    /// recently opened class.
914
    ///
915
    /// This should only be called while parsing a character class.
916
    #[inline(never)]
917
0
    fn unclosed_class_error(&self) -> ast::Error {
918
0
        for state in self.parser().stack_class.borrow().iter().rev() {
919
0
            if let ClassState::Open { ref set, .. } = *state {
920
0
                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
921
0
            }
922
        }
923
        // We are guaranteed to have a non-empty stack with at least
924
        // one open bracket, so we should never get here.
925
0
        panic!("no open character class found")
926
0
    }
927
928
    /// Push the current set of class items on to the class parser's stack as
929
    /// the left hand side of the given operator.
930
    ///
931
    /// A fresh set union is returned, which should be used to build the right
932
    /// hand side of this operator.
933
    #[inline(never)]
934
0
    fn push_class_op(
935
0
        &self,
936
0
        next_kind: ast::ClassSetBinaryOpKind,
937
0
        next_union: ast::ClassSetUnion,
938
0
    ) -> ast::ClassSetUnion {
939
0
        let item = ast::ClassSet::Item(next_union.into_item());
940
0
        let new_lhs = self.pop_class_op(item);
941
0
        self.parser()
942
0
            .stack_class
943
0
            .borrow_mut()
944
0
            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
945
0
        ast::ClassSetUnion { span: self.span(), items: vec![] }
946
0
    }
947
948
    /// Pop a character class set from the character class parser stack. If the
949
    /// top of the stack is just an item (not an operation), then return the
950
    /// given set unchanged. If the top of the stack is an operation, then the
951
    /// given set will be used as the rhs of the operation on the top of the
952
    /// stack. In that case, the binary operation is returned as a set.
953
    #[inline(never)]
954
0
    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
955
0
        let mut stack = self.parser().stack_class.borrow_mut();
956
0
        let (kind, lhs) = match stack.pop() {
957
0
            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
958
0
            Some(state @ ClassState::Open { .. }) => {
959
0
                stack.push(state);
960
0
                return rhs;
961
            }
962
0
            None => unreachable!(),
963
        };
964
0
        let span = Span::new(lhs.span().start, rhs.span().end);
965
0
        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
966
0
            span,
967
0
            kind,
968
0
            lhs: Box::new(lhs),
969
0
            rhs: Box::new(rhs),
970
0
        })
971
0
    }
972
}
973
974
impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
975
    /// Parse the regular expression into an abstract syntax tree.
976
0
    fn parse(&self) -> Result<Ast> {
977
0
        self.parse_with_comments().map(|astc| astc.ast)
978
0
    }
979
980
    /// Parse the regular expression and return an abstract syntax tree with
981
    /// all of the comments found in the pattern.
982
0
    fn parse_with_comments(&self) -> Result<ast::WithComments> {
983
0
        assert_eq!(self.offset(), 0, "parser can only be used once");
984
0
        self.parser().reset();
985
0
        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
986
        loop {
987
0
            self.bump_space();
988
0
            if self.is_eof() {
989
0
                break;
990
0
            }
991
0
            match self.char() {
992
0
                '(' => concat = self.push_group(concat)?,
993
0
                ')' => concat = self.pop_group(concat)?,
994
0
                '|' => concat = self.push_alternate(concat)?,
995
                '[' => {
996
0
                    let class = self.parse_set_class()?;
997
0
                    concat.asts.push(Ast::class_bracketed(class));
998
                }
999
                '?' => {
1000
0
                    concat = self.parse_uncounted_repetition(
1001
0
                        concat,
1002
0
                        ast::RepetitionKind::ZeroOrOne,
1003
0
                    )?;
1004
                }
1005
                '*' => {
1006
0
                    concat = self.parse_uncounted_repetition(
1007
0
                        concat,
1008
0
                        ast::RepetitionKind::ZeroOrMore,
1009
0
                    )?;
1010
                }
1011
                '+' => {
1012
0
                    concat = self.parse_uncounted_repetition(
1013
0
                        concat,
1014
0
                        ast::RepetitionKind::OneOrMore,
1015
0
                    )?;
1016
                }
1017
                '{' => {
1018
0
                    concat = self.parse_counted_repetition(concat)?;
1019
                }
1020
0
                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1021
            }
1022
        }
1023
0
        let ast = self.pop_group_end(concat)?;
1024
0
        NestLimiter::new(self).check(&ast)?;
1025
0
        Ok(ast::WithComments {
1026
0
            ast,
1027
0
            comments: mem::replace(
1028
0
                &mut *self.parser().comments.borrow_mut(),
1029
0
                vec![],
1030
0
            ),
1031
0
        })
1032
0
    }
1033
1034
    /// Parses an uncounted repetition operation. An uncounted repetition
1035
    /// operator includes ?, * and +, but does not include the {m,n} syntax.
1036
    /// The given `kind` should correspond to the operator observed by the
1037
    /// caller.
1038
    ///
1039
    /// This assumes that the parser is currently positioned at the repetition
1040
    /// operator and advances the parser to the first character after the
1041
    /// operator. (Note that the operator may include a single additional `?`,
1042
    /// which makes the operator ungreedy.)
1043
    ///
1044
    /// The caller should include the concatenation that is being built. The
1045
    /// concatenation returned includes the repetition operator applied to the
1046
    /// last expression in the given concatenation.
1047
    #[inline(never)]
1048
0
    fn parse_uncounted_repetition(
1049
0
        &self,
1050
0
        mut concat: ast::Concat,
1051
0
        kind: ast::RepetitionKind,
1052
0
    ) -> Result<ast::Concat> {
1053
0
        assert!(
1054
0
            self.char() == '?' || self.char() == '*' || self.char() == '+'
1055
        );
1056
0
        let op_start = self.pos();
1057
0
        let ast = match concat.asts.pop() {
1058
0
            Some(ast) => ast,
1059
            None => {
1060
0
                return Err(
1061
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1062
0
                )
1063
            }
1064
        };
1065
0
        match ast {
1066
            Ast::Empty(_) | Ast::Flags(_) => {
1067
0
                return Err(
1068
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1069
0
                )
1070
            }
1071
0
            _ => {}
1072
0
        }
1073
0
        let mut greedy = true;
1074
0
        if self.bump() && self.char() == '?' {
1075
0
            greedy = false;
1076
0
            self.bump();
1077
0
        }
1078
0
        concat.asts.push(Ast::repetition(ast::Repetition {
1079
0
            span: ast.span().with_end(self.pos()),
1080
0
            op: ast::RepetitionOp {
1081
0
                span: Span::new(op_start, self.pos()),
1082
0
                kind,
1083
0
            },
1084
0
            greedy,
1085
0
            ast: Box::new(ast),
1086
0
        }));
1087
0
        Ok(concat)
1088
0
    }
1089
1090
    /// Parses a counted repetition operation. A counted repetition operator
1091
    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1092
    /// operators.
1093
    ///
1094
    /// This assumes that the parser is currently positioned at the opening `{`
1095
    /// and advances the parser to the first character after the operator.
1096
    /// (Note that the operator may include a single additional `?`, which
1097
    /// makes the operator ungreedy.)
1098
    ///
1099
    /// The caller should include the concatenation that is being built. The
1100
    /// concatenation returned includes the repetition operator applied to the
1101
    /// last expression in the given concatenation.
1102
    #[inline(never)]
1103
0
    fn parse_counted_repetition(
1104
0
        &self,
1105
0
        mut concat: ast::Concat,
1106
0
    ) -> Result<ast::Concat> {
1107
0
        assert!(self.char() == '{');
1108
0
        let start = self.pos();
1109
0
        let ast = match concat.asts.pop() {
1110
0
            Some(ast) => ast,
1111
            None => {
1112
0
                return Err(
1113
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1114
0
                )
1115
            }
1116
        };
1117
0
        match ast {
1118
            Ast::Empty(_) | Ast::Flags(_) => {
1119
0
                return Err(
1120
0
                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1121
0
                )
1122
            }
1123
0
            _ => {}
1124
0
        }
1125
0
        if !self.bump_and_bump_space() {
1126
0
            return Err(self.error(
1127
0
                Span::new(start, self.pos()),
1128
0
                ast::ErrorKind::RepetitionCountUnclosed,
1129
0
            ));
1130
0
        }
1131
0
        let count_start = specialize_err(
1132
0
            self.parse_decimal(),
1133
0
            ast::ErrorKind::DecimalEmpty,
1134
0
            ast::ErrorKind::RepetitionCountDecimalEmpty,
1135
0
        );
1136
0
        if self.is_eof() {
1137
0
            return Err(self.error(
1138
0
                Span::new(start, self.pos()),
1139
0
                ast::ErrorKind::RepetitionCountUnclosed,
1140
0
            ));
1141
0
        }
1142
0
        let range = if self.char() == ',' {
1143
0
            if !self.bump_and_bump_space() {
1144
0
                return Err(self.error(
1145
0
                    Span::new(start, self.pos()),
1146
0
                    ast::ErrorKind::RepetitionCountUnclosed,
1147
0
                ));
1148
0
            }
1149
0
            if self.char() != '}' {
1150
0
                let count_start = match count_start {
1151
0
                    Ok(c) => c,
1152
0
                    Err(err)
1153
0
                        if err.kind
1154
0
                            == ast::ErrorKind::RepetitionCountDecimalEmpty =>
1155
0
                    {
1156
0
                        if self.parser().empty_min_range {
1157
0
                            0
1158
                        } else {
1159
0
                            return Err(err);
1160
                        }
1161
                    }
1162
0
                    err => err?,
1163
                };
1164
0
                let count_end = specialize_err(
1165
0
                    self.parse_decimal(),
1166
0
                    ast::ErrorKind::DecimalEmpty,
1167
0
                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1168
0
                )?;
1169
0
                ast::RepetitionRange::Bounded(count_start, count_end)
1170
            } else {
1171
0
                ast::RepetitionRange::AtLeast(count_start?)
1172
            }
1173
        } else {
1174
0
            ast::RepetitionRange::Exactly(count_start?)
1175
        };
1176
1177
0
        if self.is_eof() || self.char() != '}' {
1178
0
            return Err(self.error(
1179
0
                Span::new(start, self.pos()),
1180
0
                ast::ErrorKind::RepetitionCountUnclosed,
1181
0
            ));
1182
0
        }
1183
0
1184
0
        let mut greedy = true;
1185
0
        if self.bump_and_bump_space() && self.char() == '?' {
1186
0
            greedy = false;
1187
0
            self.bump();
1188
0
        }
1189
1190
0
        let op_span = Span::new(start, self.pos());
1191
0
        if !range.is_valid() {
1192
0
            return Err(
1193
0
                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1194
0
            );
1195
0
        }
1196
0
        concat.asts.push(Ast::repetition(ast::Repetition {
1197
0
            span: ast.span().with_end(self.pos()),
1198
0
            op: ast::RepetitionOp {
1199
0
                span: op_span,
1200
0
                kind: ast::RepetitionKind::Range(range),
1201
0
            },
1202
0
            greedy,
1203
0
            ast: Box::new(ast),
1204
0
        }));
1205
0
        Ok(concat)
1206
0
    }
1207
1208
    /// Parse a group (which contains a sub-expression) or a set of flags.
1209
    ///
1210
    /// If a group was found, then it is returned with an empty AST. If a set
1211
    /// of flags is found, then that set is returned.
1212
    ///
1213
    /// The parser should be positioned at the opening parenthesis.
1214
    ///
1215
    /// This advances the parser to the character before the start of the
1216
    /// sub-expression (in the case of a group) or to the closing parenthesis
1217
    /// immediately following the set of flags.
1218
    ///
1219
    /// # Errors
1220
    ///
1221
    /// If flags are given and incorrectly specified, then a corresponding
1222
    /// error is returned.
1223
    ///
1224
    /// If a capture name is given and it is incorrectly specified, then a
1225
    /// corresponding error is returned.
1226
    #[inline(never)]
1227
0
    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1228
0
        assert_eq!(self.char(), '(');
1229
0
        let open_span = self.span_char();
1230
0
        self.bump();
1231
0
        self.bump_space();
1232
0
        if self.is_lookaround_prefix() {
1233
0
            return Err(self.error(
1234
0
                Span::new(open_span.start, self.span().end),
1235
0
                ast::ErrorKind::UnsupportedLookAround,
1236
0
            ));
1237
0
        }
1238
0
        let inner_span = self.span();
1239
0
        let mut starts_with_p = true;
1240
0
        if self.bump_if("?P<") || {
1241
0
            starts_with_p = false;
1242
0
            self.bump_if("?<")
1243
        } {
1244
0
            let capture_index = self.next_capture_index(open_span)?;
1245
0
            let name = self.parse_capture_name(capture_index)?;
1246
0
            Ok(Either::Right(ast::Group {
1247
0
                span: open_span,
1248
0
                kind: ast::GroupKind::CaptureName { starts_with_p, name },
1249
0
                ast: Box::new(Ast::empty(self.span())),
1250
0
            }))
1251
0
        } else if self.bump_if("?") {
1252
0
            if self.is_eof() {
1253
0
                return Err(
1254
0
                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
1255
0
                );
1256
0
            }
1257
0
            let flags = self.parse_flags()?;
1258
0
            let char_end = self.char();
1259
0
            self.bump();
1260
0
            if char_end == ')' {
1261
                // We don't allow empty flags, e.g., `(?)`. We instead
1262
                // interpret it as a repetition operator missing its argument.
1263
0
                if flags.items.is_empty() {
1264
0
                    return Err(self.error(
1265
0
                        inner_span,
1266
0
                        ast::ErrorKind::RepetitionMissing,
1267
0
                    ));
1268
0
                }
1269
0
                Ok(Either::Left(ast::SetFlags {
1270
0
                    span: Span { end: self.pos(), ..open_span },
1271
0
                    flags,
1272
0
                }))
1273
            } else {
1274
0
                assert_eq!(char_end, ':');
1275
0
                Ok(Either::Right(ast::Group {
1276
0
                    span: open_span,
1277
0
                    kind: ast::GroupKind::NonCapturing(flags),
1278
0
                    ast: Box::new(Ast::empty(self.span())),
1279
0
                }))
1280
            }
1281
        } else {
1282
0
            let capture_index = self.next_capture_index(open_span)?;
1283
0
            Ok(Either::Right(ast::Group {
1284
0
                span: open_span,
1285
0
                kind: ast::GroupKind::CaptureIndex(capture_index),
1286
0
                ast: Box::new(Ast::empty(self.span())),
1287
0
            }))
1288
        }
1289
0
    }
1290
1291
    /// Parses a capture group name. Assumes that the parser is positioned at
1292
    /// the first character in the name following the opening `<` (and may
1293
    /// possibly be EOF). This advances the parser to the first character
1294
    /// following the closing `>`.
1295
    ///
1296
    /// The caller must provide the capture index of the group for this name.
1297
    #[inline(never)]
1298
0
    fn parse_capture_name(
1299
0
        &self,
1300
0
        capture_index: u32,
1301
0
    ) -> Result<ast::CaptureName> {
1302
0
        if self.is_eof() {
1303
0
            return Err(self
1304
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1305
0
        }
1306
0
        let start = self.pos();
1307
        loop {
1308
0
            if self.char() == '>' {
1309
0
                break;
1310
0
            }
1311
0
            if !is_capture_char(self.char(), self.pos() == start) {
1312
0
                return Err(self.error(
1313
0
                    self.span_char(),
1314
0
                    ast::ErrorKind::GroupNameInvalid,
1315
0
                ));
1316
0
            }
1317
0
            if !self.bump() {
1318
0
                break;
1319
0
            }
1320
        }
1321
0
        let end = self.pos();
1322
0
        if self.is_eof() {
1323
0
            return Err(self
1324
0
                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1325
0
        }
1326
0
        assert_eq!(self.char(), '>');
1327
0
        self.bump();
1328
0
        let name = &self.pattern()[start.offset..end.offset];
1329
0
        if name.is_empty() {
1330
0
            return Err(self.error(
1331
0
                Span::new(start, start),
1332
0
                ast::ErrorKind::GroupNameEmpty,
1333
0
            ));
1334
0
        }
1335
0
        let capname = ast::CaptureName {
1336
0
            span: Span::new(start, end),
1337
0
            name: name.to_string(),
1338
0
            index: capture_index,
1339
0
        };
1340
0
        self.add_capture_name(&capname)?;
1341
0
        Ok(capname)
1342
0
    }
1343
1344
    /// Parse a sequence of flags starting at the current character.
1345
    ///
1346
    /// This advances the parser to the character immediately following the
1347
    /// flags, which is guaranteed to be either `:` or `)`.
1348
    ///
1349
    /// # Errors
1350
    ///
1351
    /// If any flags are duplicated, then an error is returned.
1352
    ///
1353
    /// If the negation operator is used more than once, then an error is
1354
    /// returned.
1355
    ///
1356
    /// If no flags could be found or if the negation operation is not followed
1357
    /// by any flags, then an error is returned.
1358
    #[inline(never)]
1359
0
    fn parse_flags(&self) -> Result<ast::Flags> {
1360
0
        let mut flags = ast::Flags { span: self.span(), items: vec![] };
1361
0
        let mut last_was_negation = None;
1362
0
        while self.char() != ':' && self.char() != ')' {
1363
0
            if self.char() == '-' {
1364
0
                last_was_negation = Some(self.span_char());
1365
0
                let item = ast::FlagsItem {
1366
0
                    span: self.span_char(),
1367
0
                    kind: ast::FlagsItemKind::Negation,
1368
0
                };
1369
0
                if let Some(i) = flags.add_item(item) {
1370
0
                    return Err(self.error(
1371
0
                        self.span_char(),
1372
0
                        ast::ErrorKind::FlagRepeatedNegation {
1373
0
                            original: flags.items[i].span,
1374
0
                        },
1375
0
                    ));
1376
0
                }
1377
            } else {
1378
0
                last_was_negation = None;
1379
0
                let item = ast::FlagsItem {
1380
0
                    span: self.span_char(),
1381
0
                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1382
                };
1383
0
                if let Some(i) = flags.add_item(item) {
1384
0
                    return Err(self.error(
1385
0
                        self.span_char(),
1386
0
                        ast::ErrorKind::FlagDuplicate {
1387
0
                            original: flags.items[i].span,
1388
0
                        },
1389
0
                    ));
1390
0
                }
1391
            }
1392
0
            if !self.bump() {
1393
0
                return Err(
1394
0
                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1395
0
                );
1396
0
            }
1397
        }
1398
0
        if let Some(span) = last_was_negation {
1399
0
            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1400
0
        }
1401
0
        flags.span.end = self.pos();
1402
0
        Ok(flags)
1403
0
    }
1404
1405
    /// Parse the current character as a flag. Do not advance the parser.
1406
    ///
1407
    /// # Errors
1408
    ///
1409
    /// If the flag is not recognized, then an error is returned.
1410
    #[inline(never)]
1411
0
    fn parse_flag(&self) -> Result<ast::Flag> {
1412
0
        match self.char() {
1413
0
            'i' => Ok(ast::Flag::CaseInsensitive),
1414
0
            'm' => Ok(ast::Flag::MultiLine),
1415
0
            's' => Ok(ast::Flag::DotMatchesNewLine),
1416
0
            'U' => Ok(ast::Flag::SwapGreed),
1417
0
            'u' => Ok(ast::Flag::Unicode),
1418
0
            'R' => Ok(ast::Flag::CRLF),
1419
0
            'x' => Ok(ast::Flag::IgnoreWhitespace),
1420
            _ => {
1421
0
                Err(self
1422
0
                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1423
            }
1424
        }
1425
0
    }
1426
1427
    /// Parse a primitive AST. e.g., A literal, non-set character class or
1428
    /// assertion.
1429
    ///
1430
    /// This assumes that the parser expects a primitive at the current
1431
    /// location. i.e., All other non-primitive cases have been handled.
1432
    /// For example, if the parser's position is at `|`, then `|` will be
1433
    /// treated as a literal (e.g., inside a character class).
1434
    ///
1435
    /// This advances the parser to the first character immediately following
1436
    /// the primitive.
1437
0
    fn parse_primitive(&self) -> Result<Primitive> {
1438
0
        match self.char() {
1439
0
            '\\' => self.parse_escape(),
1440
            '.' => {
1441
0
                let ast = Primitive::Dot(self.span_char());
1442
0
                self.bump();
1443
0
                Ok(ast)
1444
            }
1445
            '^' => {
1446
0
                let ast = Primitive::Assertion(ast::Assertion {
1447
0
                    span: self.span_char(),
1448
0
                    kind: ast::AssertionKind::StartLine,
1449
0
                });
1450
0
                self.bump();
1451
0
                Ok(ast)
1452
            }
1453
            '$' => {
1454
0
                let ast = Primitive::Assertion(ast::Assertion {
1455
0
                    span: self.span_char(),
1456
0
                    kind: ast::AssertionKind::EndLine,
1457
0
                });
1458
0
                self.bump();
1459
0
                Ok(ast)
1460
            }
1461
0
            c => {
1462
0
                let ast = Primitive::Literal(ast::Literal {
1463
0
                    span: self.span_char(),
1464
0
                    kind: ast::LiteralKind::Verbatim,
1465
0
                    c,
1466
0
                });
1467
0
                self.bump();
1468
0
                Ok(ast)
1469
            }
1470
        }
1471
0
    }
1472
1473
    /// Parse an escape sequence as a primitive AST.
1474
    ///
1475
    /// This assumes the parser is positioned at the start of the escape
1476
    /// sequence, i.e., `\`. It advances the parser to the first position
1477
    /// immediately following the escape sequence.
1478
    #[inline(never)]
1479
0
    fn parse_escape(&self) -> Result<Primitive> {
1480
0
        assert_eq!(self.char(), '\\');
1481
0
        let start = self.pos();
1482
0
        if !self.bump() {
1483
0
            return Err(self.error(
1484
0
                Span::new(start, self.pos()),
1485
0
                ast::ErrorKind::EscapeUnexpectedEof,
1486
0
            ));
1487
0
        }
1488
0
        let c = self.char();
1489
        // Put some of the more complicated routines into helpers.
1490
0
        match c {
1491
0
            '0'..='7' => {
1492
0
                if !self.parser().octal {
1493
0
                    return Err(self.error(
1494
0
                        Span::new(start, self.span_char().end),
1495
0
                        ast::ErrorKind::UnsupportedBackreference,
1496
0
                    ));
1497
0
                }
1498
0
                let mut lit = self.parse_octal();
1499
0
                lit.span.start = start;
1500
0
                return Ok(Primitive::Literal(lit));
1501
            }
1502
0
            '8'..='9' if !self.parser().octal => {
1503
0
                return Err(self.error(
1504
0
                    Span::new(start, self.span_char().end),
1505
0
                    ast::ErrorKind::UnsupportedBackreference,
1506
0
                ));
1507
            }
1508
            'x' | 'u' | 'U' => {
1509
0
                let mut lit = self.parse_hex()?;
1510
0
                lit.span.start = start;
1511
0
                return Ok(Primitive::Literal(lit));
1512
            }
1513
            'p' | 'P' => {
1514
0
                let mut cls = self.parse_unicode_class()?;
1515
0
                cls.span.start = start;
1516
0
                return Ok(Primitive::Unicode(cls));
1517
            }
1518
            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1519
0
                let mut cls = self.parse_perl_class();
1520
0
                cls.span.start = start;
1521
0
                return Ok(Primitive::Perl(cls));
1522
            }
1523
0
            _ => {}
1524
0
        }
1525
0
1526
0
        // Handle all of the one letter sequences inline.
1527
0
        self.bump();
1528
0
        let span = Span::new(start, self.pos());
1529
0
        if is_meta_character(c) {
1530
0
            return Ok(Primitive::Literal(ast::Literal {
1531
0
                span,
1532
0
                kind: ast::LiteralKind::Meta,
1533
0
                c,
1534
0
            }));
1535
0
        }
1536
0
        if is_escapeable_character(c) {
1537
0
            return Ok(Primitive::Literal(ast::Literal {
1538
0
                span,
1539
0
                kind: ast::LiteralKind::Superfluous,
1540
0
                c,
1541
0
            }));
1542
0
        }
1543
0
        let special = |kind, c| {
1544
0
            Ok(Primitive::Literal(ast::Literal {
1545
0
                span,
1546
0
                kind: ast::LiteralKind::Special(kind),
1547
0
                c,
1548
0
            }))
1549
0
        };
1550
0
        match c {
1551
0
            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1552
0
            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1553
0
            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1554
0
            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1555
0
            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1556
0
            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1557
0
            'A' => Ok(Primitive::Assertion(ast::Assertion {
1558
0
                span,
1559
0
                kind: ast::AssertionKind::StartText,
1560
0
            })),
1561
0
            'z' => Ok(Primitive::Assertion(ast::Assertion {
1562
0
                span,
1563
0
                kind: ast::AssertionKind::EndText,
1564
0
            })),
1565
            'b' => {
1566
0
                let mut wb = ast::Assertion {
1567
0
                    span,
1568
0
                    kind: ast::AssertionKind::WordBoundary,
1569
0
                };
1570
0
                // After a \b, we "try" to parse things like \b{start} for
1571
0
                // special word boundary assertions.
1572
0
                if !self.is_eof() && self.char() == '{' {
1573
0
                    if let Some(kind) =
1574
0
                        self.maybe_parse_special_word_boundary(start)?
1575
0
                    {
1576
0
                        wb.kind = kind;
1577
0
                        wb.span.end = self.pos();
1578
0
                    }
1579
0
                }
1580
0
                Ok(Primitive::Assertion(wb))
1581
            }
1582
0
            'B' => Ok(Primitive::Assertion(ast::Assertion {
1583
0
                span,
1584
0
                kind: ast::AssertionKind::NotWordBoundary,
1585
0
            })),
1586
0
            '<' => Ok(Primitive::Assertion(ast::Assertion {
1587
0
                span,
1588
0
                kind: ast::AssertionKind::WordBoundaryStartAngle,
1589
0
            })),
1590
0
            '>' => Ok(Primitive::Assertion(ast::Assertion {
1591
0
                span,
1592
0
                kind: ast::AssertionKind::WordBoundaryEndAngle,
1593
0
            })),
1594
0
            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1595
        }
1596
0
    }
1597
1598
    /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
1599
    /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
1600
    ///
1601
    /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
1602
    /// if it fails it will just return `None` with no error. This is done
1603
    /// because `\b{5}` is a valid expression and we want to let that be parsed
1604
    /// by the existing counted repetition parsing code. (I thought about just
1605
    /// invoking the counted repetition code from here, but it seemed a little
1606
    /// ham-fisted.)
1607
    ///
1608
    /// Unlike `maybe_parse_ascii_class` though, this can return an error.
1609
    /// Namely, if we definitely know it isn't a counted repetition, then we
1610
    /// return an error specific to the specialty word boundaries.
1611
    ///
1612
    /// This assumes the parser is positioned at a `{` immediately following
1613
    /// a `\b`. When `None` is returned, the parser is returned to the position
1614
    /// at which it started: pointing at a `{`.
1615
    ///
1616
    /// The position given should correspond to the start of the `\b`.
1617
0
    fn maybe_parse_special_word_boundary(
1618
0
        &self,
1619
0
        wb_start: Position,
1620
0
    ) -> Result<Option<ast::AssertionKind>> {
1621
0
        assert_eq!(self.char(), '{');
1622
1623
0
        let is_valid_char = |c| match c {
1624
0
            'A'..='Z' | 'a'..='z' | '-' => true,
1625
0
            _ => false,
1626
0
        };
1627
0
        let start = self.pos();
1628
0
        if !self.bump_and_bump_space() {
1629
0
            return Err(self.error(
1630
0
                Span::new(wb_start, self.pos()),
1631
0
                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
1632
0
            ));
1633
0
        }
1634
0
        let start_contents = self.pos();
1635
0
        // This is one of the critical bits: if the first non-whitespace
1636
0
        // character isn't in [-A-Za-z] (i.e., this can't be a special word
1637
0
        // boundary), then we bail and let the counted repetition parser deal
1638
0
        // with this.
1639
0
        if !is_valid_char(self.char()) {
1640
0
            self.parser().pos.set(start);
1641
0
            return Ok(None);
1642
0
        }
1643
0
1644
0
        // Now collect up our chars until we see a '}'.
1645
0
        let mut scratch = self.parser().scratch.borrow_mut();
1646
0
        scratch.clear();
1647
0
        while !self.is_eof() && is_valid_char(self.char()) {
1648
0
            scratch.push(self.char());
1649
0
            self.bump_and_bump_space();
1650
0
        }
1651
0
        if self.is_eof() || self.char() != '}' {
1652
0
            return Err(self.error(
1653
0
                Span::new(start, self.pos()),
1654
0
                ast::ErrorKind::SpecialWordBoundaryUnclosed,
1655
0
            ));
1656
0
        }
1657
0
        let end = self.pos();
1658
0
        self.bump();
1659
0
        let kind = match scratch.as_str() {
1660
0
            "start" => ast::AssertionKind::WordBoundaryStart,
1661
0
            "end" => ast::AssertionKind::WordBoundaryEnd,
1662
0
            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
1663
0
            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
1664
            _ => {
1665
0
                return Err(self.error(
1666
0
                    Span::new(start_contents, end),
1667
0
                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
1668
0
                ))
1669
            }
1670
        };
1671
0
        Ok(Some(kind))
1672
0
    }
1673
1674
    /// Parse an octal representation of a Unicode codepoint up to 3 digits
1675
    /// long. This expects the parser to be positioned at the first octal
1676
    /// digit and advances the parser to the first character immediately
1677
    /// following the octal number. This also assumes that parsing octal
1678
    /// escapes is enabled.
1679
    ///
1680
    /// Assuming the preconditions are met, this routine can never fail.
1681
    #[inline(never)]
1682
0
    fn parse_octal(&self) -> ast::Literal {
1683
0
        assert!(self.parser().octal);
1684
0
        assert!('0' <= self.char() && self.char() <= '7');
1685
0
        let start = self.pos();
1686
        // Parse up to two more digits.
1687
0
        while self.bump()
1688
0
            && '0' <= self.char()
1689
0
            && self.char() <= '7'
1690
0
            && self.pos().offset - start.offset <= 2
1691
0
        {}
1692
0
        let end = self.pos();
1693
0
        let octal = &self.pattern()[start.offset..end.offset];
1694
0
        // Parsing the octal should never fail since the above guarantees a
1695
0
        // valid number.
1696
0
        let codepoint =
1697
0
            u32::from_str_radix(octal, 8).expect("valid octal number");
1698
0
        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1699
0
        // invalid Unicode scalar values.
1700
0
        let c = char::from_u32(codepoint).expect("Unicode scalar value");
1701
0
        ast::Literal {
1702
0
            span: Span::new(start, end),
1703
0
            kind: ast::LiteralKind::Octal,
1704
0
            c,
1705
0
        }
1706
0
    }
1707
1708
    /// Parse a hex representation of a Unicode codepoint. This handles both
1709
    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1710
    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1711
    /// the first character immediately following the hexadecimal literal.
1712
    #[inline(never)]
1713
0
    fn parse_hex(&self) -> Result<ast::Literal> {
1714
0
        assert!(
1715
0
            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1716
        );
1717
1718
0
        let hex_kind = match self.char() {
1719
0
            'x' => ast::HexLiteralKind::X,
1720
0
            'u' => ast::HexLiteralKind::UnicodeShort,
1721
0
            _ => ast::HexLiteralKind::UnicodeLong,
1722
        };
1723
0
        if !self.bump_and_bump_space() {
1724
0
            return Err(
1725
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1726
0
            );
1727
0
        }
1728
0
        if self.char() == '{' {
1729
0
            self.parse_hex_brace(hex_kind)
1730
        } else {
1731
0
            self.parse_hex_digits(hex_kind)
1732
        }
1733
0
    }
1734
1735
    /// Parse an N-digit hex representation of a Unicode codepoint. This
1736
    /// expects the parser to be positioned at the first digit and will advance
1737
    /// the parser to the first character immediately following the escape
1738
    /// sequence.
1739
    ///
1740
    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1741
    /// or 8 (for `\UNNNNNNNN`).
1742
    #[inline(never)]
1743
0
    fn parse_hex_digits(
1744
0
        &self,
1745
0
        kind: ast::HexLiteralKind,
1746
0
    ) -> Result<ast::Literal> {
1747
0
        let mut scratch = self.parser().scratch.borrow_mut();
1748
0
        scratch.clear();
1749
0
1750
0
        let start = self.pos();
1751
0
        for i in 0..kind.digits() {
1752
0
            if i > 0 && !self.bump_and_bump_space() {
1753
0
                return Err(self
1754
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1755
0
            }
1756
0
            if !is_hex(self.char()) {
1757
0
                return Err(self.error(
1758
0
                    self.span_char(),
1759
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1760
0
                ));
1761
0
            }
1762
0
            scratch.push(self.char());
1763
        }
1764
        // The final bump just moves the parser past the literal, which may
1765
        // be EOF.
1766
0
        self.bump_and_bump_space();
1767
0
        let end = self.pos();
1768
0
        let hex = scratch.as_str();
1769
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1770
0
            None => Err(self.error(
1771
0
                Span::new(start, end),
1772
0
                ast::ErrorKind::EscapeHexInvalid,
1773
0
            )),
1774
0
            Some(c) => Ok(ast::Literal {
1775
0
                span: Span::new(start, end),
1776
0
                kind: ast::LiteralKind::HexFixed(kind),
1777
0
                c,
1778
0
            }),
1779
        }
1780
0
    }
1781
1782
    /// Parse a hex representation of any Unicode scalar value. This expects
1783
    /// the parser to be positioned at the opening brace `{` and will advance
1784
    /// the parser to the first character following the closing brace `}`.
1785
    #[inline(never)]
1786
0
    fn parse_hex_brace(
1787
0
        &self,
1788
0
        kind: ast::HexLiteralKind,
1789
0
    ) -> Result<ast::Literal> {
1790
0
        let mut scratch = self.parser().scratch.borrow_mut();
1791
0
        scratch.clear();
1792
0
1793
0
        let brace_pos = self.pos();
1794
0
        let start = self.span_char().end;
1795
0
        while self.bump_and_bump_space() && self.char() != '}' {
1796
0
            if !is_hex(self.char()) {
1797
0
                return Err(self.error(
1798
0
                    self.span_char(),
1799
0
                    ast::ErrorKind::EscapeHexInvalidDigit,
1800
0
                ));
1801
0
            }
1802
0
            scratch.push(self.char());
1803
        }
1804
0
        if self.is_eof() {
1805
0
            return Err(self.error(
1806
0
                Span::new(brace_pos, self.pos()),
1807
0
                ast::ErrorKind::EscapeUnexpectedEof,
1808
0
            ));
1809
0
        }
1810
0
        let end = self.pos();
1811
0
        let hex = scratch.as_str();
1812
0
        assert_eq!(self.char(), '}');
1813
0
        self.bump_and_bump_space();
1814
0
1815
0
        if hex.is_empty() {
1816
0
            return Err(self.error(
1817
0
                Span::new(brace_pos, self.pos()),
1818
0
                ast::ErrorKind::EscapeHexEmpty,
1819
0
            ));
1820
0
        }
1821
0
        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1822
0
            None => Err(self.error(
1823
0
                Span::new(start, end),
1824
0
                ast::ErrorKind::EscapeHexInvalid,
1825
0
            )),
1826
0
            Some(c) => Ok(ast::Literal {
1827
0
                span: Span::new(start, self.pos()),
1828
0
                kind: ast::LiteralKind::HexBrace(kind),
1829
0
                c,
1830
0
            }),
1831
        }
1832
0
    }
1833
1834
    /// Parse a decimal number into a u32 while trimming leading and trailing
1835
    /// whitespace.
1836
    ///
1837
    /// This expects the parser to be positioned at the first position where
1838
    /// a decimal digit could occur. This will advance the parser to the byte
1839
    /// immediately following the last contiguous decimal digit.
1840
    ///
1841
    /// If no decimal digit could be found or if there was a problem parsing
1842
    /// the complete set of digits into a u32, then an error is returned.
1843
0
    fn parse_decimal(&self) -> Result<u32> {
1844
0
        let mut scratch = self.parser().scratch.borrow_mut();
1845
0
        scratch.clear();
1846
1847
0
        while !self.is_eof() && self.char().is_whitespace() {
1848
0
            self.bump();
1849
0
        }
1850
0
        let start = self.pos();
1851
0
        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1852
0
            scratch.push(self.char());
1853
0
            self.bump_and_bump_space();
1854
0
        }
1855
0
        let span = Span::new(start, self.pos());
1856
0
        while !self.is_eof() && self.char().is_whitespace() {
1857
0
            self.bump_and_bump_space();
1858
0
        }
1859
0
        let digits = scratch.as_str();
1860
0
        if digits.is_empty() {
1861
0
            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1862
0
        }
1863
0
        match u32::from_str_radix(digits, 10).ok() {
1864
0
            Some(n) => Ok(n),
1865
0
            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1866
        }
1867
0
    }
1868
1869
    /// Parse a standard character class consisting primarily of characters or
1870
    /// character ranges, but can also contain nested character classes of
1871
    /// any type (sans `.`).
1872
    ///
1873
    /// This assumes the parser is positioned at the opening `[`. If parsing
1874
    /// is successful, then the parser is advanced to the position immediately
1875
    /// following the closing `]`.
1876
    #[inline(never)]
1877
0
    fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
1878
0
        assert_eq!(self.char(), '[');
1879
1880
0
        let mut union =
1881
0
            ast::ClassSetUnion { span: self.span(), items: vec![] };
1882
        loop {
1883
0
            self.bump_space();
1884
0
            if self.is_eof() {
1885
0
                return Err(self.unclosed_class_error());
1886
0
            }
1887
0
            match self.char() {
1888
                '[' => {
1889
                    // If we've already parsed the opening bracket, then
1890
                    // attempt to treat this as the beginning of an ASCII
1891
                    // class. If ASCII class parsing fails, then the parser
1892
                    // backs up to `[`.
1893
0
                    if !self.parser().stack_class.borrow().is_empty() {
1894
0
                        if let Some(cls) = self.maybe_parse_ascii_class() {
1895
0
                            union.push(ast::ClassSetItem::Ascii(cls));
1896
0
                            continue;
1897
0
                        }
1898
0
                    }
1899
0
                    union = self.push_class_open(union)?;
1900
                }
1901
0
                ']' => match self.pop_class(union)? {
1902
0
                    Either::Left(nested_union) => {
1903
0
                        union = nested_union;
1904
0
                    }
1905
0
                    Either::Right(class) => return Ok(class),
1906
                },
1907
0
                '&' if self.peek() == Some('&') => {
1908
0
                    assert!(self.bump_if("&&"));
1909
0
                    union = self.push_class_op(
1910
0
                        ast::ClassSetBinaryOpKind::Intersection,
1911
0
                        union,
1912
0
                    );
1913
                }
1914
0
                '-' if self.peek() == Some('-') => {
1915
0
                    assert!(self.bump_if("--"));
1916
0
                    union = self.push_class_op(
1917
0
                        ast::ClassSetBinaryOpKind::Difference,
1918
0
                        union,
1919
0
                    );
1920
                }
1921
0
                '~' if self.peek() == Some('~') => {
1922
0
                    assert!(self.bump_if("~~"));
1923
0
                    union = self.push_class_op(
1924
0
                        ast::ClassSetBinaryOpKind::SymmetricDifference,
1925
0
                        union,
1926
0
                    );
1927
                }
1928
                _ => {
1929
0
                    union.push(self.parse_set_class_range()?);
1930
                }
1931
            }
1932
        }
1933
0
    }
1934
1935
    /// Parse a single primitive item in a character class set. The item to
1936
    /// be parsed can either be one of a simple literal character, a range
1937
    /// between two simple literal characters or a "primitive" character
1938
    /// class like \w or \p{Greek}.
1939
    ///
1940
    /// If an invalid escape is found, or if a character class is found where
1941
    /// a simple literal is expected (e.g., in a range), then an error is
1942
    /// returned.
1943
    #[inline(never)]
1944
0
    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1945
0
        let prim1 = self.parse_set_class_item()?;
1946
0
        self.bump_space();
1947
0
        if self.is_eof() {
1948
0
            return Err(self.unclosed_class_error());
1949
0
        }
1950
0
        // If the next char isn't a `-`, then we don't have a range.
1951
0
        // There are two exceptions. If the char after a `-` is a `]`, then
1952
0
        // `-` is interpreted as a literal `-`. Alternatively, if the char
1953
0
        // after a `-` is a `-`, then `--` corresponds to a "difference"
1954
0
        // operation.
1955
0
        if self.char() != '-'
1956
0
            || self.peek_space() == Some(']')
1957
0
            || self.peek_space() == Some('-')
1958
        {
1959
0
            return prim1.into_class_set_item(self);
1960
0
        }
1961
0
        // OK, now we're parsing a range, so bump past the `-` and parse the
1962
0
        // second half of the range.
1963
0
        if !self.bump_and_bump_space() {
1964
0
            return Err(self.unclosed_class_error());
1965
0
        }
1966
0
        let prim2 = self.parse_set_class_item()?;
1967
0
        let range = ast::ClassSetRange {
1968
0
            span: Span::new(prim1.span().start, prim2.span().end),
1969
0
            start: prim1.into_class_literal(self)?,
1970
0
            end: prim2.into_class_literal(self)?,
1971
        };
1972
0
        if !range.is_valid() {
1973
0
            return Err(
1974
0
                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1975
0
            );
1976
0
        }
1977
0
        Ok(ast::ClassSetItem::Range(range))
1978
0
    }
1979
1980
    /// Parse a single item in a character class as a primitive, where the
1981
    /// primitive either consists of a verbatim literal or a single escape
1982
    /// sequence.
1983
    ///
1984
    /// This assumes the parser is positioned at the beginning of a primitive,
1985
    /// and advances the parser to the first position after the primitive if
1986
    /// successful.
1987
    ///
1988
    /// Note that it is the caller's responsibility to report an error if an
1989
    /// illegal primitive was parsed.
1990
    #[inline(never)]
1991
0
    fn parse_set_class_item(&self) -> Result<Primitive> {
1992
0
        if self.char() == '\\' {
1993
0
            self.parse_escape()
1994
        } else {
1995
0
            let x = Primitive::Literal(ast::Literal {
1996
0
                span: self.span_char(),
1997
0
                kind: ast::LiteralKind::Verbatim,
1998
0
                c: self.char(),
1999
0
            });
2000
0
            self.bump();
2001
0
            Ok(x)
2002
        }
2003
0
    }
2004
2005
    /// Parses the opening of a character class set. This includes the opening
2006
    /// bracket along with `^` if present to indicate negation. This also
2007
    /// starts parsing the opening set of unioned items if applicable, since
2008
    /// there are special rules applied to certain characters in the opening
2009
    /// of a character class. For example, `[^]]` is the class of all
2010
    /// characters not equal to `]`. (`]` would need to be escaped in any other
2011
    /// position.) Similarly for `-`.
2012
    ///
2013
    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
2014
    /// empty union. This empty union should be replaced with the actual item
2015
    /// when it is popped from the parser's stack.
2016
    ///
2017
    /// This assumes the parser is positioned at the opening `[` and advances
2018
    /// the parser to the first non-special byte of the character class.
2019
    ///
2020
    /// An error is returned if EOF is found.
2021
    #[inline(never)]
2022
0
    fn parse_set_class_open(
2023
0
        &self,
2024
0
    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
2025
0
        assert_eq!(self.char(), '[');
2026
0
        let start = self.pos();
2027
0
        if !self.bump_and_bump_space() {
2028
0
            return Err(self.error(
2029
0
                Span::new(start, self.pos()),
2030
0
                ast::ErrorKind::ClassUnclosed,
2031
0
            ));
2032
0
        }
2033
2034
0
        let negated = if self.char() != '^' {
2035
0
            false
2036
        } else {
2037
0
            if !self.bump_and_bump_space() {
2038
0
                return Err(self.error(
2039
0
                    Span::new(start, self.pos()),
2040
0
                    ast::ErrorKind::ClassUnclosed,
2041
0
                ));
2042
0
            }
2043
0
            true
2044
        };
2045
        // Accept any number of `-` as literal `-`.
2046
0
        let mut union =
2047
0
            ast::ClassSetUnion { span: self.span(), items: vec![] };
2048
0
        while self.char() == '-' {
2049
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
2050
0
                span: self.span_char(),
2051
0
                kind: ast::LiteralKind::Verbatim,
2052
0
                c: '-',
2053
0
            }));
2054
0
            if !self.bump_and_bump_space() {
2055
0
                return Err(self.error(
2056
0
                    Span::new(start, start),
2057
0
                    ast::ErrorKind::ClassUnclosed,
2058
0
                ));
2059
0
            }
2060
        }
2061
        // If `]` is the *first* char in a set, then interpret it as a literal
2062
        // `]`. That is, an empty class is impossible to write.
2063
0
        if union.items.is_empty() && self.char() == ']' {
2064
0
            union.push(ast::ClassSetItem::Literal(ast::Literal {
2065
0
                span: self.span_char(),
2066
0
                kind: ast::LiteralKind::Verbatim,
2067
0
                c: ']',
2068
0
            }));
2069
0
            if !self.bump_and_bump_space() {
2070
0
                return Err(self.error(
2071
0
                    Span::new(start, self.pos()),
2072
0
                    ast::ErrorKind::ClassUnclosed,
2073
0
                ));
2074
0
            }
2075
0
        }
2076
0
        let set = ast::ClassBracketed {
2077
0
            span: Span::new(start, self.pos()),
2078
0
            negated,
2079
0
            kind: ast::ClassSet::union(ast::ClassSetUnion {
2080
0
                span: Span::new(union.span.start, union.span.start),
2081
0
                items: vec![],
2082
0
            }),
2083
0
        };
2084
0
        Ok((set, union))
2085
0
    }
2086
2087
    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
2088
    ///
2089
    /// This assumes the parser is positioned at the opening `[`.
2090
    ///
2091
    /// If no valid ASCII character class could be found, then this does not
2092
    /// advance the parser and `None` is returned. Otherwise, the parser is
2093
    /// advanced to the first byte following the closing `]` and the
2094
    /// corresponding ASCII class is returned.
2095
    #[inline(never)]
2096
0
    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
2097
0
        // ASCII character classes are interesting from a parsing perspective
2098
0
        // because parsing cannot fail with any interesting error. For example,
2099
0
        // in order to use an ASCII character class, it must be enclosed in
2100
0
        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
2101
0
        // of it as "ASCII character classes have the syntax `[:NAME:]` which
2102
0
        // can only appear within character brackets." This means that things
2103
0
        // like `[[:lower:]A]` are legal constructs.
2104
0
        //
2105
0
        // However, if one types an incorrect ASCII character class, e.g.,
2106
0
        // `[[:loower:]]`, then we treat that as a normal nested character
2107
0
        // class containing the characters `:elorw`. One might argue that we
2108
0
        // should return an error instead since the repeated colons give away
2109
0
        // the intent to write an ASCII class. But what if the user typed
2110
0
        // `[[:lower]]` instead? How can we tell that was intended to be an
2111
0
        // ASCII class and not just a normal nested class?
2112
0
        //
2113
0
        // Reasonable people can probably disagree over this, but for better
2114
0
        // or worse, we implement semantics that never fails at the expense
2115
0
        // of better failure modes.
2116
0
        assert_eq!(self.char(), '[');
2117
        // If parsing fails, then we back up the parser to this starting point.
2118
0
        let start = self.pos();
2119
0
        let mut negated = false;
2120
0
        if !self.bump() || self.char() != ':' {
2121
0
            self.parser().pos.set(start);
2122
0
            return None;
2123
0
        }
2124
0
        if !self.bump() {
2125
0
            self.parser().pos.set(start);
2126
0
            return None;
2127
0
        }
2128
0
        if self.char() == '^' {
2129
0
            negated = true;
2130
0
            if !self.bump() {
2131
0
                self.parser().pos.set(start);
2132
0
                return None;
2133
0
            }
2134
0
        }
2135
0
        let name_start = self.offset();
2136
0
        while self.char() != ':' && self.bump() {}
2137
0
        if self.is_eof() {
2138
0
            self.parser().pos.set(start);
2139
0
            return None;
2140
0
        }
2141
0
        let name = &self.pattern()[name_start..self.offset()];
2142
0
        if !self.bump_if(":]") {
2143
0
            self.parser().pos.set(start);
2144
0
            return None;
2145
0
        }
2146
0
        let kind = match ast::ClassAsciiKind::from_name(name) {
2147
0
            Some(kind) => kind,
2148
            None => {
2149
0
                self.parser().pos.set(start);
2150
0
                return None;
2151
            }
2152
        };
2153
0
        Some(ast::ClassAscii {
2154
0
            span: Span::new(start, self.pos()),
2155
0
            kind,
2156
0
            negated,
2157
0
        })
2158
0
    }
2159
2160
    /// Parse a Unicode class in either the single character notation, `\pN`
2161
    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2162
    /// the parser is positioned at the `p` (or `P` for negation) and will
2163
    /// advance the parser to the character immediately following the class.
2164
    ///
2165
    /// Note that this does not check whether the class name is valid or not.
2166
    #[inline(never)]
2167
0
    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2168
0
        assert!(self.char() == 'p' || self.char() == 'P');
2169
2170
0
        let mut scratch = self.parser().scratch.borrow_mut();
2171
0
        scratch.clear();
2172
0
2173
0
        let negated = self.char() == 'P';
2174
0
        if !self.bump_and_bump_space() {
2175
0
            return Err(
2176
0
                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2177
0
            );
2178
0
        }
2179
0
        let (start, kind) = if self.char() == '{' {
2180
0
            let start = self.span_char().end;
2181
0
            while self.bump_and_bump_space() && self.char() != '}' {
2182
0
                scratch.push(self.char());
2183
0
            }
2184
0
            if self.is_eof() {
2185
0
                return Err(self
2186
0
                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2187
0
            }
2188
0
            assert_eq!(self.char(), '}');
2189
0
            self.bump();
2190
0
2191
0
            let name = scratch.as_str();
2192
0
            if let Some(i) = name.find("!=") {
2193
0
                (
2194
0
                    start,
2195
0
                    ast::ClassUnicodeKind::NamedValue {
2196
0
                        op: ast::ClassUnicodeOpKind::NotEqual,
2197
0
                        name: name[..i].to_string(),
2198
0
                        value: name[i + 2..].to_string(),
2199
0
                    },
2200
0
                )
2201
0
            } else if let Some(i) = name.find(':') {
2202
0
                (
2203
0
                    start,
2204
0
                    ast::ClassUnicodeKind::NamedValue {
2205
0
                        op: ast::ClassUnicodeOpKind::Colon,
2206
0
                        name: name[..i].to_string(),
2207
0
                        value: name[i + 1..].to_string(),
2208
0
                    },
2209
0
                )
2210
0
            } else if let Some(i) = name.find('=') {
2211
0
                (
2212
0
                    start,
2213
0
                    ast::ClassUnicodeKind::NamedValue {
2214
0
                        op: ast::ClassUnicodeOpKind::Equal,
2215
0
                        name: name[..i].to_string(),
2216
0
                        value: name[i + 1..].to_string(),
2217
0
                    },
2218
0
                )
2219
            } else {
2220
0
                (start, ast::ClassUnicodeKind::Named(name.to_string()))
2221
            }
2222
        } else {
2223
0
            let start = self.pos();
2224
0
            let c = self.char();
2225
0
            if c == '\\' {
2226
0
                return Err(self.error(
2227
0
                    self.span_char(),
2228
0
                    ast::ErrorKind::UnicodeClassInvalid,
2229
0
                ));
2230
0
            }
2231
0
            self.bump_and_bump_space();
2232
0
            let kind = ast::ClassUnicodeKind::OneLetter(c);
2233
0
            (start, kind)
2234
        };
2235
0
        Ok(ast::ClassUnicode {
2236
0
            span: Span::new(start, self.pos()),
2237
0
            negated,
2238
0
            kind,
2239
0
        })
2240
0
    }
2241
2242
    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2243
    /// parser is currently at a valid character class name and will be
2244
    /// advanced to the character immediately following the class.
2245
    #[inline(never)]
2246
0
    fn parse_perl_class(&self) -> ast::ClassPerl {
2247
0
        let c = self.char();
2248
0
        let span = self.span_char();
2249
0
        self.bump();
2250
0
        let (negated, kind) = match c {
2251
0
            'd' => (false, ast::ClassPerlKind::Digit),
2252
0
            'D' => (true, ast::ClassPerlKind::Digit),
2253
0
            's' => (false, ast::ClassPerlKind::Space),
2254
0
            'S' => (true, ast::ClassPerlKind::Space),
2255
0
            'w' => (false, ast::ClassPerlKind::Word),
2256
0
            'W' => (true, ast::ClassPerlKind::Word),
2257
0
            c => panic!("expected valid Perl class but got '{}'", c),
2258
        };
2259
0
        ast::ClassPerl { span, kind, negated }
2260
0
    }
2261
}
2262
2263
/// A type that traverses a fully parsed Ast and checks whether its depth
2264
/// exceeds the specified nesting limit. If it does, then an error is returned.
2265
#[derive(Debug)]
2266
struct NestLimiter<'p, 's, P> {
2267
    /// The parser that is checking the nest limit.
2268
    p: &'p ParserI<'s, P>,
2269
    /// The current depth while walking an Ast.
2270
    depth: u32,
2271
}
2272
2273
impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2274
0
    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2275
0
        NestLimiter { p, depth: 0 }
2276
0
    }
2277
2278
    #[inline(never)]
2279
0
    fn check(self, ast: &Ast) -> Result<()> {
2280
0
        ast::visit(ast, self)
2281
0
    }
2282
2283
0
    fn increment_depth(&mut self, span: &Span) -> Result<()> {
2284
0
        let new = self.depth.checked_add(1).ok_or_else(|| {
2285
0
            self.p.error(
2286
0
                span.clone(),
2287
0
                ast::ErrorKind::NestLimitExceeded(u32::MAX),
2288
0
            )
2289
0
        })?;
2290
0
        let limit = self.p.parser().nest_limit;
2291
0
        if new > limit {
2292
0
            return Err(self.p.error(
2293
0
                span.clone(),
2294
0
                ast::ErrorKind::NestLimitExceeded(limit),
2295
0
            ));
2296
0
        }
2297
0
        self.depth = new;
2298
0
        Ok(())
2299
0
    }
2300
2301
0
    fn decrement_depth(&mut self) {
2302
0
        // Assuming the correctness of the visitor, this should never drop
2303
0
        // below 0.
2304
0
        self.depth = self.depth.checked_sub(1).unwrap();
2305
0
    }
2306
}
2307
2308
impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2309
    type Output = ();
2310
    type Err = ast::Error;
2311
2312
0
    fn finish(self) -> Result<()> {
2313
0
        Ok(())
2314
0
    }
2315
2316
0
    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2317
0
        let span = match *ast {
2318
            Ast::Empty(_)
2319
            | Ast::Flags(_)
2320
            | Ast::Literal(_)
2321
            | Ast::Dot(_)
2322
            | Ast::Assertion(_)
2323
            | Ast::ClassUnicode(_)
2324
            | Ast::ClassPerl(_) => {
2325
                // These are all base cases, so we don't increment depth.
2326
0
                return Ok(());
2327
            }
2328
0
            Ast::ClassBracketed(ref x) => &x.span,
2329
0
            Ast::Repetition(ref x) => &x.span,
2330
0
            Ast::Group(ref x) => &x.span,
2331
0
            Ast::Alternation(ref x) => &x.span,
2332
0
            Ast::Concat(ref x) => &x.span,
2333
        };
2334
0
        self.increment_depth(span)
2335
0
    }
2336
2337
0
    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2338
0
        match *ast {
2339
            Ast::Empty(_)
2340
            | Ast::Flags(_)
2341
            | Ast::Literal(_)
2342
            | Ast::Dot(_)
2343
            | Ast::Assertion(_)
2344
            | Ast::ClassUnicode(_)
2345
            | Ast::ClassPerl(_) => {
2346
                // These are all base cases, so we don't decrement depth.
2347
0
                Ok(())
2348
            }
2349
            Ast::ClassBracketed(_)
2350
            | Ast::Repetition(_)
2351
            | Ast::Group(_)
2352
            | Ast::Alternation(_)
2353
            | Ast::Concat(_) => {
2354
0
                self.decrement_depth();
2355
0
                Ok(())
2356
            }
2357
        }
2358
0
    }
2359
2360
0
    fn visit_class_set_item_pre(
2361
0
        &mut self,
2362
0
        ast: &ast::ClassSetItem,
2363
0
    ) -> Result<()> {
2364
0
        let span = match *ast {
2365
            ast::ClassSetItem::Empty(_)
2366
            | ast::ClassSetItem::Literal(_)
2367
            | ast::ClassSetItem::Range(_)
2368
            | ast::ClassSetItem::Ascii(_)
2369
            | ast::ClassSetItem::Unicode(_)
2370
            | ast::ClassSetItem::Perl(_) => {
2371
                // These are all base cases, so we don't increment depth.
2372
0
                return Ok(());
2373
            }
2374
0
            ast::ClassSetItem::Bracketed(ref x) => &x.span,
2375
0
            ast::ClassSetItem::Union(ref x) => &x.span,
2376
        };
2377
0
        self.increment_depth(span)
2378
0
    }
2379
2380
0
    fn visit_class_set_item_post(
2381
0
        &mut self,
2382
0
        ast: &ast::ClassSetItem,
2383
0
    ) -> Result<()> {
2384
0
        match *ast {
2385
            ast::ClassSetItem::Empty(_)
2386
            | ast::ClassSetItem::Literal(_)
2387
            | ast::ClassSetItem::Range(_)
2388
            | ast::ClassSetItem::Ascii(_)
2389
            | ast::ClassSetItem::Unicode(_)
2390
            | ast::ClassSetItem::Perl(_) => {
2391
                // These are all base cases, so we don't decrement depth.
2392
0
                Ok(())
2393
            }
2394
            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2395
0
                self.decrement_depth();
2396
0
                Ok(())
2397
            }
2398
        }
2399
0
    }
2400
2401
0
    fn visit_class_set_binary_op_pre(
2402
0
        &mut self,
2403
0
        ast: &ast::ClassSetBinaryOp,
2404
0
    ) -> Result<()> {
2405
0
        self.increment_depth(&ast.span)
2406
0
    }
2407
2408
0
    fn visit_class_set_binary_op_post(
2409
0
        &mut self,
2410
0
        _ast: &ast::ClassSetBinaryOp,
2411
0
    ) -> Result<()> {
2412
0
        self.decrement_depth();
2413
0
        Ok(())
2414
0
    }
2415
}
2416
2417
/// When the result is an error, transforms the ast::ErrorKind from the source
2418
/// Result into another one. This function is used to return clearer error
2419
/// messages when possible.
2420
0
fn specialize_err<T>(
2421
0
    result: Result<T>,
2422
0
    from: ast::ErrorKind,
2423
0
    to: ast::ErrorKind,
2424
0
) -> Result<T> {
2425
0
    if let Err(e) = result {
2426
0
        if e.kind == from {
2427
0
            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2428
        } else {
2429
0
            Err(e)
2430
        }
2431
    } else {
2432
0
        result
2433
    }
2434
0
}
2435
2436
#[cfg(test)]
2437
mod tests {
2438
    use core::ops::Range;
2439
2440
    use alloc::format;
2441
2442
    use super::*;
2443
2444
    // Our own assert_eq, which has slightly better formatting (but honestly
2445
    // still kind of crappy).
2446
    macro_rules! assert_eq {
2447
        ($left:expr, $right:expr) => {{
2448
            match (&$left, &$right) {
2449
                (left_val, right_val) => {
2450
                    if !(*left_val == *right_val) {
2451
                        panic!(
2452
                            "assertion failed: `(left == right)`\n\n\
2453
                             left:  `{:?}`\nright: `{:?}`\n\n",
2454
                            left_val, right_val
2455
                        )
2456
                    }
2457
                }
2458
            }
2459
        }};
2460
    }
2461
2462
    // We create these errors to compare with real ast::Errors in the tests.
2463
    // We define equality between TestError and ast::Error to disregard the
2464
    // pattern string in ast::Error, which is annoying to provide in tests.
2465
    #[derive(Clone, Debug)]
2466
    struct TestError {
2467
        span: Span,
2468
        kind: ast::ErrorKind,
2469
    }
2470
2471
    impl PartialEq<ast::Error> for TestError {
2472
        fn eq(&self, other: &ast::Error) -> bool {
2473
            self.span == other.span && self.kind == other.kind
2474
        }
2475
    }
2476
2477
    impl PartialEq<TestError> for ast::Error {
2478
        fn eq(&self, other: &TestError) -> bool {
2479
            self.span == other.span && self.kind == other.kind
2480
        }
2481
    }
2482
2483
    fn s(str: &str) -> String {
2484
        str.to_string()
2485
    }
2486
2487
    fn parser(pattern: &str) -> ParserI<'_, Parser> {
2488
        ParserI::new(Parser::new(), pattern)
2489
    }
2490
2491
    fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2492
        let parser = ParserBuilder::new().octal(true).build();
2493
        ParserI::new(parser, pattern)
2494
    }
2495
2496
    fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
2497
        let parser = ParserBuilder::new().empty_min_range(true).build();
2498
        ParserI::new(parser, pattern)
2499
    }
2500
2501
    fn parser_nest_limit(
2502
        pattern: &str,
2503
        nest_limit: u32,
2504
    ) -> ParserI<'_, Parser> {
2505
        let p = ParserBuilder::new().nest_limit(nest_limit).build();
2506
        ParserI::new(p, pattern)
2507
    }
2508
2509
    fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2510
        let p = ParserBuilder::new().ignore_whitespace(true).build();
2511
        ParserI::new(p, pattern)
2512
    }
2513
2514
    /// Short alias for creating a new span.
2515
    fn nspan(start: Position, end: Position) -> Span {
2516
        Span::new(start, end)
2517
    }
2518
2519
    /// Short alias for creating a new position.
2520
    fn npos(offset: usize, line: usize, column: usize) -> Position {
2521
        Position::new(offset, line, column)
2522
    }
2523
2524
    /// Create a new span from the given offset range. This assumes a single
2525
    /// line and sets the columns based on the offsets. i.e., This only works
2526
    /// out of the box for ASCII, which is fine for most tests.
2527
    fn span(range: Range<usize>) -> Span {
2528
        let start = Position::new(range.start, 1, range.start + 1);
2529
        let end = Position::new(range.end, 1, range.end + 1);
2530
        Span::new(start, end)
2531
    }
2532
2533
    /// Create a new span for the corresponding byte range in the given string.
2534
    fn span_range(subject: &str, range: Range<usize>) -> Span {
2535
        let start = Position {
2536
            offset: range.start,
2537
            line: 1 + subject[..range.start].matches('\n').count(),
2538
            column: 1 + subject[..range.start]
2539
                .chars()
2540
                .rev()
2541
                .position(|c| c == '\n')
2542
                .unwrap_or(subject[..range.start].chars().count()),
2543
        };
2544
        let end = Position {
2545
            offset: range.end,
2546
            line: 1 + subject[..range.end].matches('\n').count(),
2547
            column: 1 + subject[..range.end]
2548
                .chars()
2549
                .rev()
2550
                .position(|c| c == '\n')
2551
                .unwrap_or(subject[..range.end].chars().count()),
2552
        };
2553
        Span::new(start, end)
2554
    }
2555
2556
    /// Create a verbatim literal starting at the given position.
2557
    fn lit(c: char, start: usize) -> Ast {
2558
        lit_with(c, span(start..start + c.len_utf8()))
2559
    }
2560
2561
    /// Create a meta literal starting at the given position.
2562
    fn meta_lit(c: char, span: Span) -> Ast {
2563
        Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
2564
    }
2565
2566
    /// Create a verbatim literal with the given span.
2567
    fn lit_with(c: char, span: Span) -> Ast {
2568
        Ast::literal(ast::Literal {
2569
            span,
2570
            kind: ast::LiteralKind::Verbatim,
2571
            c,
2572
        })
2573
    }
2574
2575
    /// Create a concatenation with the given range.
2576
    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2577
        concat_with(span(range), asts)
2578
    }
2579
2580
    /// Create a concatenation with the given span.
2581
    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2582
        Ast::concat(ast::Concat { span, asts })
2583
    }
2584
2585
    /// Create an alternation with the given span.
2586
    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2587
        Ast::alternation(ast::Alternation { span: span(range), asts })
2588
    }
2589
2590
    /// Create a capturing group with the given span.
2591
    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2592
        Ast::group(ast::Group {
2593
            span: span(range),
2594
            kind: ast::GroupKind::CaptureIndex(index),
2595
            ast: Box::new(ast),
2596
        })
2597
    }
2598
2599
    /// Create an ast::SetFlags.
2600
    ///
2601
    /// The given pattern should be the full pattern string. The range given
2602
    /// should correspond to the byte offsets where the flag set occurs.
2603
    ///
2604
    /// If negated is true, then the set is interpreted as beginning with a
2605
    /// negation.
2606
    fn flag_set(
2607
        pat: &str,
2608
        range: Range<usize>,
2609
        flag: ast::Flag,
2610
        negated: bool,
2611
    ) -> Ast {
2612
        let mut items = vec![ast::FlagsItem {
2613
            span: span_range(pat, (range.end - 2)..(range.end - 1)),
2614
            kind: ast::FlagsItemKind::Flag(flag),
2615
        }];
2616
        if negated {
2617
            items.insert(
2618
                0,
2619
                ast::FlagsItem {
2620
                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
2621
                    kind: ast::FlagsItemKind::Negation,
2622
                },
2623
            );
2624
        }
2625
        Ast::flags(ast::SetFlags {
2626
            span: span_range(pat, range.clone()),
2627
            flags: ast::Flags {
2628
                span: span_range(pat, (range.start + 2)..(range.end - 1)),
2629
                items,
2630
            },
2631
        })
2632
    }
2633
2634
    #[test]
2635
    fn parse_nest_limit() {
2636
        // A nest limit of 0 still allows some types of regexes.
2637
        assert_eq!(
2638
            parser_nest_limit("", 0).parse(),
2639
            Ok(Ast::empty(span(0..0)))
2640
        );
2641
        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2642
2643
        // Test repetition operations, which require one level of nesting.
2644
        assert_eq!(
2645
            parser_nest_limit("a+", 0).parse().unwrap_err(),
2646
            TestError {
2647
                span: span(0..2),
2648
                kind: ast::ErrorKind::NestLimitExceeded(0),
2649
            }
2650
        );
2651
        assert_eq!(
2652
            parser_nest_limit("a+", 1).parse(),
2653
            Ok(Ast::repetition(ast::Repetition {
2654
                span: span(0..2),
2655
                op: ast::RepetitionOp {
2656
                    span: span(1..2),
2657
                    kind: ast::RepetitionKind::OneOrMore,
2658
                },
2659
                greedy: true,
2660
                ast: Box::new(lit('a', 0)),
2661
            }))
2662
        );
2663
        assert_eq!(
2664
            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2665
            TestError {
2666
                span: span(0..3),
2667
                kind: ast::ErrorKind::NestLimitExceeded(1),
2668
            }
2669
        );
2670
        assert_eq!(
2671
            parser_nest_limit("a+*", 1).parse().unwrap_err(),
2672
            TestError {
2673
                span: span(0..2),
2674
                kind: ast::ErrorKind::NestLimitExceeded(1),
2675
            }
2676
        );
2677
        assert_eq!(
2678
            parser_nest_limit("a+*", 2).parse(),
2679
            Ok(Ast::repetition(ast::Repetition {
2680
                span: span(0..3),
2681
                op: ast::RepetitionOp {
2682
                    span: span(2..3),
2683
                    kind: ast::RepetitionKind::ZeroOrMore,
2684
                },
2685
                greedy: true,
2686
                ast: Box::new(Ast::repetition(ast::Repetition {
2687
                    span: span(0..2),
2688
                    op: ast::RepetitionOp {
2689
                        span: span(1..2),
2690
                        kind: ast::RepetitionKind::OneOrMore,
2691
                    },
2692
                    greedy: true,
2693
                    ast: Box::new(lit('a', 0)),
2694
                })),
2695
            }))
2696
        );
2697
2698
        // Test concatenations. A concatenation requires one level of nesting.
2699
        assert_eq!(
2700
            parser_nest_limit("ab", 0).parse().unwrap_err(),
2701
            TestError {
2702
                span: span(0..2),
2703
                kind: ast::ErrorKind::NestLimitExceeded(0),
2704
            }
2705
        );
2706
        assert_eq!(
2707
            parser_nest_limit("ab", 1).parse(),
2708
            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2709
        );
2710
        assert_eq!(
2711
            parser_nest_limit("abc", 1).parse(),
2712
            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2713
        );
2714
2715
        // Test alternations. An alternation requires one level of nesting.
2716
        assert_eq!(
2717
            parser_nest_limit("a|b", 0).parse().unwrap_err(),
2718
            TestError {
2719
                span: span(0..3),
2720
                kind: ast::ErrorKind::NestLimitExceeded(0),
2721
            }
2722
        );
2723
        assert_eq!(
2724
            parser_nest_limit("a|b", 1).parse(),
2725
            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2726
        );
2727
        assert_eq!(
2728
            parser_nest_limit("a|b|c", 1).parse(),
2729
            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2730
        );
2731
2732
        // Test character classes. Classes form their own mini-recursive
2733
        // syntax!
2734
        assert_eq!(
2735
            parser_nest_limit("[a]", 0).parse().unwrap_err(),
2736
            TestError {
2737
                span: span(0..3),
2738
                kind: ast::ErrorKind::NestLimitExceeded(0),
2739
            }
2740
        );
2741
        assert_eq!(
2742
            parser_nest_limit("[a]", 1).parse(),
2743
            Ok(Ast::class_bracketed(ast::ClassBracketed {
2744
                span: span(0..3),
2745
                negated: false,
2746
                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2747
                    ast::Literal {
2748
                        span: span(1..2),
2749
                        kind: ast::LiteralKind::Verbatim,
2750
                        c: 'a',
2751
                    }
2752
                )),
2753
            }))
2754
        );
2755
        assert_eq!(
2756
            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2757
            TestError {
2758
                span: span(1..3),
2759
                kind: ast::ErrorKind::NestLimitExceeded(1),
2760
            }
2761
        );
2762
        assert_eq!(
2763
            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2764
            TestError {
2765
                span: span(3..7),
2766
                kind: ast::ErrorKind::NestLimitExceeded(2),
2767
            }
2768
        );
2769
        assert_eq!(
2770
            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2771
            TestError {
2772
                span: span(4..6),
2773
                kind: ast::ErrorKind::NestLimitExceeded(3),
2774
            }
2775
        );
2776
        assert_eq!(
2777
            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2778
            TestError {
2779
                span: span(1..5),
2780
                kind: ast::ErrorKind::NestLimitExceeded(1),
2781
            }
2782
        );
2783
        assert_eq!(
2784
            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2785
            TestError {
2786
                span: span(4..6),
2787
                kind: ast::ErrorKind::NestLimitExceeded(2),
2788
            }
2789
        );
2790
    }
2791
2792
    #[test]
2793
    fn parse_comments() {
2794
        let pat = "(?x)
2795
# This is comment 1.
2796
foo # This is comment 2.
2797
  # This is comment 3.
2798
bar
2799
# This is comment 4.";
2800
        let astc = parser(pat).parse_with_comments().unwrap();
2801
        assert_eq!(
2802
            astc.ast,
2803
            concat_with(
2804
                span_range(pat, 0..pat.len()),
2805
                vec![
2806
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2807
                    lit_with('f', span_range(pat, 26..27)),
2808
                    lit_with('o', span_range(pat, 27..28)),
2809
                    lit_with('o', span_range(pat, 28..29)),
2810
                    lit_with('b', span_range(pat, 74..75)),
2811
                    lit_with('a', span_range(pat, 75..76)),
2812
                    lit_with('r', span_range(pat, 76..77)),
2813
                ]
2814
            )
2815
        );
2816
        assert_eq!(
2817
            astc.comments,
2818
            vec![
2819
                ast::Comment {
2820
                    span: span_range(pat, 5..26),
2821
                    comment: s(" This is comment 1."),
2822
                },
2823
                ast::Comment {
2824
                    span: span_range(pat, 30..51),
2825
                    comment: s(" This is comment 2."),
2826
                },
2827
                ast::Comment {
2828
                    span: span_range(pat, 53..74),
2829
                    comment: s(" This is comment 3."),
2830
                },
2831
                ast::Comment {
2832
                    span: span_range(pat, 78..98),
2833
                    comment: s(" This is comment 4."),
2834
                },
2835
            ]
2836
        );
2837
    }
2838
2839
    #[test]
2840
    fn parse_holistic() {
2841
        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2842
        assert_eq!(
2843
            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2844
            Ok(concat(
2845
                0..36,
2846
                vec![
2847
                    meta_lit('\\', span(0..2)),
2848
                    meta_lit('.', span(2..4)),
2849
                    meta_lit('+', span(4..6)),
2850
                    meta_lit('*', span(6..8)),
2851
                    meta_lit('?', span(8..10)),
2852
                    meta_lit('(', span(10..12)),
2853
                    meta_lit(')', span(12..14)),
2854
                    meta_lit('|', span(14..16)),
2855
                    meta_lit('[', span(16..18)),
2856
                    meta_lit(']', span(18..20)),
2857
                    meta_lit('{', span(20..22)),
2858
                    meta_lit('}', span(22..24)),
2859
                    meta_lit('^', span(24..26)),
2860
                    meta_lit('$', span(26..28)),
2861
                    meta_lit('#', span(28..30)),
2862
                    meta_lit('&', span(30..32)),
2863
                    meta_lit('-', span(32..34)),
2864
                    meta_lit('~', span(34..36)),
2865
                ]
2866
            ))
2867
        );
2868
    }
2869
2870
    #[test]
2871
    fn parse_ignore_whitespace() {
2872
        // Test that basic whitespace insensitivity works.
2873
        let pat = "(?x)a b";
2874
        assert_eq!(
2875
            parser(pat).parse(),
2876
            Ok(concat_with(
2877
                nspan(npos(0, 1, 1), npos(7, 1, 8)),
2878
                vec![
2879
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2880
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2881
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2882
                ]
2883
            ))
2884
        );
2885
2886
        // Test that we can toggle whitespace insensitivity.
2887
        let pat = "(?x)a b(?-x)a b";
2888
        assert_eq!(
2889
            parser(pat).parse(),
2890
            Ok(concat_with(
2891
                nspan(npos(0, 1, 1), npos(15, 1, 16)),
2892
                vec![
2893
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2894
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2895
                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2896
                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2897
                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2898
                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2899
                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2900
                ]
2901
            ))
2902
        );
2903
2904
        // Test that nesting whitespace insensitive flags works.
2905
        let pat = "a (?x:a )a ";
2906
        assert_eq!(
2907
            parser(pat).parse(),
2908
            Ok(concat_with(
2909
                span_range(pat, 0..11),
2910
                vec![
2911
                    lit_with('a', span_range(pat, 0..1)),
2912
                    lit_with(' ', span_range(pat, 1..2)),
2913
                    Ast::group(ast::Group {
2914
                        span: span_range(pat, 2..9),
2915
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2916
                            span: span_range(pat, 4..5),
2917
                            items: vec![ast::FlagsItem {
2918
                                span: span_range(pat, 4..5),
2919
                                kind: ast::FlagsItemKind::Flag(
2920
                                    ast::Flag::IgnoreWhitespace
2921
                                ),
2922
                            },],
2923
                        }),
2924
                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2925
                    }),
2926
                    lit_with('a', span_range(pat, 9..10)),
2927
                    lit_with(' ', span_range(pat, 10..11)),
2928
                ]
2929
            ))
2930
        );
2931
2932
        // Test that whitespace after an opening paren is insignificant.
2933
        let pat = "(?x)( ?P<foo> a )";
2934
        assert_eq!(
2935
            parser(pat).parse(),
2936
            Ok(concat_with(
2937
                span_range(pat, 0..pat.len()),
2938
                vec![
2939
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2940
                    Ast::group(ast::Group {
2941
                        span: span_range(pat, 4..pat.len()),
2942
                        kind: ast::GroupKind::CaptureName {
2943
                            starts_with_p: true,
2944
                            name: ast::CaptureName {
2945
                                span: span_range(pat, 9..12),
2946
                                name: s("foo"),
2947
                                index: 1,
2948
                            }
2949
                        },
2950
                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2951
                    }),
2952
                ]
2953
            ))
2954
        );
2955
        let pat = "(?x)(  a )";
2956
        assert_eq!(
2957
            parser(pat).parse(),
2958
            Ok(concat_with(
2959
                span_range(pat, 0..pat.len()),
2960
                vec![
2961
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2962
                    Ast::group(ast::Group {
2963
                        span: span_range(pat, 4..pat.len()),
2964
                        kind: ast::GroupKind::CaptureIndex(1),
2965
                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2966
                    }),
2967
                ]
2968
            ))
2969
        );
2970
        let pat = "(?x)(  ?:  a )";
2971
        assert_eq!(
2972
            parser(pat).parse(),
2973
            Ok(concat_with(
2974
                span_range(pat, 0..pat.len()),
2975
                vec![
2976
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2977
                    Ast::group(ast::Group {
2978
                        span: span_range(pat, 4..pat.len()),
2979
                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2980
                            span: span_range(pat, 8..8),
2981
                            items: vec![],
2982
                        }),
2983
                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2984
                    }),
2985
                ]
2986
            ))
2987
        );
2988
        let pat = r"(?x)\x { 53 }";
2989
        assert_eq!(
2990
            parser(pat).parse(),
2991
            Ok(concat_with(
2992
                span_range(pat, 0..pat.len()),
2993
                vec![
2994
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2995
                    Ast::literal(ast::Literal {
2996
                        span: span(4..13),
2997
                        kind: ast::LiteralKind::HexBrace(
2998
                            ast::HexLiteralKind::X
2999
                        ),
3000
                        c: 'S',
3001
                    }),
3002
                ]
3003
            ))
3004
        );
3005
3006
        // Test that whitespace after an escape is OK.
3007
        let pat = r"(?x)\ ";
3008
        assert_eq!(
3009
            parser(pat).parse(),
3010
            Ok(concat_with(
3011
                span_range(pat, 0..pat.len()),
3012
                vec![
3013
                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
3014
                    Ast::literal(ast::Literal {
3015
                        span: span_range(pat, 4..6),
3016
                        kind: ast::LiteralKind::Superfluous,
3017
                        c: ' ',
3018
                    }),
3019
                ]
3020
            ))
3021
        );
3022
    }
3023
3024
    #[test]
3025
    fn parse_newlines() {
3026
        let pat = ".\n.";
3027
        assert_eq!(
3028
            parser(pat).parse(),
3029
            Ok(concat_with(
3030
                span_range(pat, 0..3),
3031
                vec![
3032
                    Ast::dot(span_range(pat, 0..1)),
3033
                    lit_with('\n', span_range(pat, 1..2)),
3034
                    Ast::dot(span_range(pat, 2..3)),
3035
                ]
3036
            ))
3037
        );
3038
3039
        let pat = "foobar\nbaz\nquux\n";
3040
        assert_eq!(
3041
            parser(pat).parse(),
3042
            Ok(concat_with(
3043
                span_range(pat, 0..pat.len()),
3044
                vec![
3045
                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
3046
                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
3047
                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
3048
                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
3049
                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
3050
                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
3051
                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
3052
                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
3053
                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
3054
                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
3055
                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
3056
                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
3057
                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
3058
                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
3059
                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
3060
                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
3061
                ]
3062
            ))
3063
        );
3064
    }
3065
3066
    #[test]
3067
    fn parse_uncounted_repetition() {
3068
        assert_eq!(
3069
            parser(r"a*").parse(),
3070
            Ok(Ast::repetition(ast::Repetition {
3071
                span: span(0..2),
3072
                op: ast::RepetitionOp {
3073
                    span: span(1..2),
3074
                    kind: ast::RepetitionKind::ZeroOrMore,
3075
                },
3076
                greedy: true,
3077
                ast: Box::new(lit('a', 0)),
3078
            }))
3079
        );
3080
        assert_eq!(
3081
            parser(r"a+").parse(),
3082
            Ok(Ast::repetition(ast::Repetition {
3083
                span: span(0..2),
3084
                op: ast::RepetitionOp {
3085
                    span: span(1..2),
3086
                    kind: ast::RepetitionKind::OneOrMore,
3087
                },
3088
                greedy: true,
3089
                ast: Box::new(lit('a', 0)),
3090
            }))
3091
        );
3092
3093
        assert_eq!(
3094
            parser(r"a?").parse(),
3095
            Ok(Ast::repetition(ast::Repetition {
3096
                span: span(0..2),
3097
                op: ast::RepetitionOp {
3098
                    span: span(1..2),
3099
                    kind: ast::RepetitionKind::ZeroOrOne,
3100
                },
3101
                greedy: true,
3102
                ast: Box::new(lit('a', 0)),
3103
            }))
3104
        );
3105
        assert_eq!(
3106
            parser(r"a??").parse(),
3107
            Ok(Ast::repetition(ast::Repetition {
3108
                span: span(0..3),
3109
                op: ast::RepetitionOp {
3110
                    span: span(1..3),
3111
                    kind: ast::RepetitionKind::ZeroOrOne,
3112
                },
3113
                greedy: false,
3114
                ast: Box::new(lit('a', 0)),
3115
            }))
3116
        );
3117
        assert_eq!(
3118
            parser(r"a?").parse(),
3119
            Ok(Ast::repetition(ast::Repetition {
3120
                span: span(0..2),
3121
                op: ast::RepetitionOp {
3122
                    span: span(1..2),
3123
                    kind: ast::RepetitionKind::ZeroOrOne,
3124
                },
3125
                greedy: true,
3126
                ast: Box::new(lit('a', 0)),
3127
            }))
3128
        );
3129
        assert_eq!(
3130
            parser(r"a?b").parse(),
3131
            Ok(concat(
3132
                0..3,
3133
                vec![
3134
                    Ast::repetition(ast::Repetition {
3135
                        span: span(0..2),
3136
                        op: ast::RepetitionOp {
3137
                            span: span(1..2),
3138
                            kind: ast::RepetitionKind::ZeroOrOne,
3139
                        },
3140
                        greedy: true,
3141
                        ast: Box::new(lit('a', 0)),
3142
                    }),
3143
                    lit('b', 2),
3144
                ]
3145
            ))
3146
        );
3147
        assert_eq!(
3148
            parser(r"a??b").parse(),
3149
            Ok(concat(
3150
                0..4,
3151
                vec![
3152
                    Ast::repetition(ast::Repetition {
3153
                        span: span(0..3),
3154
                        op: ast::RepetitionOp {
3155
                            span: span(1..3),
3156
                            kind: ast::RepetitionKind::ZeroOrOne,
3157
                        },
3158
                        greedy: false,
3159
                        ast: Box::new(lit('a', 0)),
3160
                    }),
3161
                    lit('b', 3),
3162
                ]
3163
            ))
3164
        );
3165
        assert_eq!(
3166
            parser(r"ab?").parse(),
3167
            Ok(concat(
3168
                0..3,
3169
                vec![
3170
                    lit('a', 0),
3171
                    Ast::repetition(ast::Repetition {
3172
                        span: span(1..3),
3173
                        op: ast::RepetitionOp {
3174
                            span: span(2..3),
3175
                            kind: ast::RepetitionKind::ZeroOrOne,
3176
                        },
3177
                        greedy: true,
3178
                        ast: Box::new(lit('b', 1)),
3179
                    }),
3180
                ]
3181
            ))
3182
        );
3183
        assert_eq!(
3184
            parser(r"(ab)?").parse(),
3185
            Ok(Ast::repetition(ast::Repetition {
3186
                span: span(0..5),
3187
                op: ast::RepetitionOp {
3188
                    span: span(4..5),
3189
                    kind: ast::RepetitionKind::ZeroOrOne,
3190
                },
3191
                greedy: true,
3192
                ast: Box::new(group(
3193
                    0..4,
3194
                    1,
3195
                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
3196
                )),
3197
            }))
3198
        );
3199
        assert_eq!(
3200
            parser(r"|a?").parse(),
3201
            Ok(alt(
3202
                0..3,
3203
                vec![
3204
                    Ast::empty(span(0..0)),
3205
                    Ast::repetition(ast::Repetition {
3206
                        span: span(1..3),
3207
                        op: ast::RepetitionOp {
3208
                            span: span(2..3),
3209
                            kind: ast::RepetitionKind::ZeroOrOne,
3210
                        },
3211
                        greedy: true,
3212
                        ast: Box::new(lit('a', 1)),
3213
                    }),
3214
                ]
3215
            ))
3216
        );
3217
3218
        assert_eq!(
3219
            parser(r"*").parse().unwrap_err(),
3220
            TestError {
3221
                span: span(0..0),
3222
                kind: ast::ErrorKind::RepetitionMissing,
3223
            }
3224
        );
3225
        assert_eq!(
3226
            parser(r"(?i)*").parse().unwrap_err(),
3227
            TestError {
3228
                span: span(4..4),
3229
                kind: ast::ErrorKind::RepetitionMissing,
3230
            }
3231
        );
3232
        assert_eq!(
3233
            parser(r"(*)").parse().unwrap_err(),
3234
            TestError {
3235
                span: span(1..1),
3236
                kind: ast::ErrorKind::RepetitionMissing,
3237
            }
3238
        );
3239
        assert_eq!(
3240
            parser(r"(?:?)").parse().unwrap_err(),
3241
            TestError {
3242
                span: span(3..3),
3243
                kind: ast::ErrorKind::RepetitionMissing,
3244
            }
3245
        );
3246
        assert_eq!(
3247
            parser(r"+").parse().unwrap_err(),
3248
            TestError {
3249
                span: span(0..0),
3250
                kind: ast::ErrorKind::RepetitionMissing,
3251
            }
3252
        );
3253
        assert_eq!(
3254
            parser(r"?").parse().unwrap_err(),
3255
            TestError {
3256
                span: span(0..0),
3257
                kind: ast::ErrorKind::RepetitionMissing,
3258
            }
3259
        );
3260
        assert_eq!(
3261
            parser(r"(?)").parse().unwrap_err(),
3262
            TestError {
3263
                span: span(1..1),
3264
                kind: ast::ErrorKind::RepetitionMissing,
3265
            }
3266
        );
3267
        assert_eq!(
3268
            parser(r"|*").parse().unwrap_err(),
3269
            TestError {
3270
                span: span(1..1),
3271
                kind: ast::ErrorKind::RepetitionMissing,
3272
            }
3273
        );
3274
        assert_eq!(
3275
            parser(r"|+").parse().unwrap_err(),
3276
            TestError {
3277
                span: span(1..1),
3278
                kind: ast::ErrorKind::RepetitionMissing,
3279
            }
3280
        );
3281
        assert_eq!(
3282
            parser(r"|?").parse().unwrap_err(),
3283
            TestError {
3284
                span: span(1..1),
3285
                kind: ast::ErrorKind::RepetitionMissing,
3286
            }
3287
        );
3288
    }
3289
3290
    #[test]
3291
    fn parse_counted_repetition() {
3292
        assert_eq!(
3293
            parser(r"a{5}").parse(),
3294
            Ok(Ast::repetition(ast::Repetition {
3295
                span: span(0..4),
3296
                op: ast::RepetitionOp {
3297
                    span: span(1..4),
3298
                    kind: ast::RepetitionKind::Range(
3299
                        ast::RepetitionRange::Exactly(5)
3300
                    ),
3301
                },
3302
                greedy: true,
3303
                ast: Box::new(lit('a', 0)),
3304
            }))
3305
        );
3306
        assert_eq!(
3307
            parser(r"a{5,}").parse(),
3308
            Ok(Ast::repetition(ast::Repetition {
3309
                span: span(0..5),
3310
                op: ast::RepetitionOp {
3311
                    span: span(1..5),
3312
                    kind: ast::RepetitionKind::Range(
3313
                        ast::RepetitionRange::AtLeast(5)
3314
                    ),
3315
                },
3316
                greedy: true,
3317
                ast: Box::new(lit('a', 0)),
3318
            }))
3319
        );
3320
        assert_eq!(
3321
            parser(r"a{5,9}").parse(),
3322
            Ok(Ast::repetition(ast::Repetition {
3323
                span: span(0..6),
3324
                op: ast::RepetitionOp {
3325
                    span: span(1..6),
3326
                    kind: ast::RepetitionKind::Range(
3327
                        ast::RepetitionRange::Bounded(5, 9)
3328
                    ),
3329
                },
3330
                greedy: true,
3331
                ast: Box::new(lit('a', 0)),
3332
            }))
3333
        );
3334
        assert_eq!(
3335
            parser(r"a{5}?").parse(),
3336
            Ok(Ast::repetition(ast::Repetition {
3337
                span: span(0..5),
3338
                op: ast::RepetitionOp {
3339
                    span: span(1..5),
3340
                    kind: ast::RepetitionKind::Range(
3341
                        ast::RepetitionRange::Exactly(5)
3342
                    ),
3343
                },
3344
                greedy: false,
3345
                ast: Box::new(lit('a', 0)),
3346
            }))
3347
        );
3348
        assert_eq!(
3349
            parser(r"ab{5}").parse(),
3350
            Ok(concat(
3351
                0..5,
3352
                vec![
3353
                    lit('a', 0),
3354
                    Ast::repetition(ast::Repetition {
3355
                        span: span(1..5),
3356
                        op: ast::RepetitionOp {
3357
                            span: span(2..5),
3358
                            kind: ast::RepetitionKind::Range(
3359
                                ast::RepetitionRange::Exactly(5)
3360
                            ),
3361
                        },
3362
                        greedy: true,
3363
                        ast: Box::new(lit('b', 1)),
3364
                    }),
3365
                ]
3366
            ))
3367
        );
3368
        assert_eq!(
3369
            parser(r"ab{5}c").parse(),
3370
            Ok(concat(
3371
                0..6,
3372
                vec![
3373
                    lit('a', 0),
3374
                    Ast::repetition(ast::Repetition {
3375
                        span: span(1..5),
3376
                        op: ast::RepetitionOp {
3377
                            span: span(2..5),
3378
                            kind: ast::RepetitionKind::Range(
3379
                                ast::RepetitionRange::Exactly(5)
3380
                            ),
3381
                        },
3382
                        greedy: true,
3383
                        ast: Box::new(lit('b', 1)),
3384
                    }),
3385
                    lit('c', 5),
3386
                ]
3387
            ))
3388
        );
3389
3390
        assert_eq!(
3391
            parser(r"a{ 5 }").parse(),
3392
            Ok(Ast::repetition(ast::Repetition {
3393
                span: span(0..6),
3394
                op: ast::RepetitionOp {
3395
                    span: span(1..6),
3396
                    kind: ast::RepetitionKind::Range(
3397
                        ast::RepetitionRange::Exactly(5)
3398
                    ),
3399
                },
3400
                greedy: true,
3401
                ast: Box::new(lit('a', 0)),
3402
            }))
3403
        );
3404
        assert_eq!(
3405
            parser(r"a{ 5 , 9 }").parse(),
3406
            Ok(Ast::repetition(ast::Repetition {
3407
                span: span(0..10),
3408
                op: ast::RepetitionOp {
3409
                    span: span(1..10),
3410
                    kind: ast::RepetitionKind::Range(
3411
                        ast::RepetitionRange::Bounded(5, 9)
3412
                    ),
3413
                },
3414
                greedy: true,
3415
                ast: Box::new(lit('a', 0)),
3416
            }))
3417
        );
3418
        assert_eq!(
3419
            parser_empty_min_range(r"a{,9}").parse(),
3420
            Ok(Ast::repetition(ast::Repetition {
3421
                span: span(0..5),
3422
                op: ast::RepetitionOp {
3423
                    span: span(1..5),
3424
                    kind: ast::RepetitionKind::Range(
3425
                        ast::RepetitionRange::Bounded(0, 9)
3426
                    ),
3427
                },
3428
                greedy: true,
3429
                ast: Box::new(lit('a', 0)),
3430
            }))
3431
        );
3432
        assert_eq!(
3433
            parser_ignore_whitespace(r"a{5,9} ?").parse(),
3434
            Ok(Ast::repetition(ast::Repetition {
3435
                span: span(0..8),
3436
                op: ast::RepetitionOp {
3437
                    span: span(1..8),
3438
                    kind: ast::RepetitionKind::Range(
3439
                        ast::RepetitionRange::Bounded(5, 9)
3440
                    ),
3441
                },
3442
                greedy: false,
3443
                ast: Box::new(lit('a', 0)),
3444
            }))
3445
        );
3446
        assert_eq!(
3447
            parser(r"\b{5,9}").parse(),
3448
            Ok(Ast::repetition(ast::Repetition {
3449
                span: span(0..7),
3450
                op: ast::RepetitionOp {
3451
                    span: span(2..7),
3452
                    kind: ast::RepetitionKind::Range(
3453
                        ast::RepetitionRange::Bounded(5, 9)
3454
                    ),
3455
                },
3456
                greedy: true,
3457
                ast: Box::new(Ast::assertion(ast::Assertion {
3458
                    span: span(0..2),
3459
                    kind: ast::AssertionKind::WordBoundary,
3460
                })),
3461
            }))
3462
        );
3463
3464
        assert_eq!(
3465
            parser(r"(?i){0}").parse().unwrap_err(),
3466
            TestError {
3467
                span: span(4..4),
3468
                kind: ast::ErrorKind::RepetitionMissing,
3469
            }
3470
        );
3471
        assert_eq!(
3472
            parser(r"(?m){1,1}").parse().unwrap_err(),
3473
            TestError {
3474
                span: span(4..4),
3475
                kind: ast::ErrorKind::RepetitionMissing,
3476
            }
3477
        );
3478
        assert_eq!(
3479
            parser(r"a{]}").parse().unwrap_err(),
3480
            TestError {
3481
                span: span(2..2),
3482
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3483
            }
3484
        );
3485
        assert_eq!(
3486
            parser(r"a{1,]}").parse().unwrap_err(),
3487
            TestError {
3488
                span: span(4..4),
3489
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3490
            }
3491
        );
3492
        assert_eq!(
3493
            parser(r"a{").parse().unwrap_err(),
3494
            TestError {
3495
                span: span(1..2),
3496
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3497
            }
3498
        );
3499
        assert_eq!(
3500
            parser(r"a{}").parse().unwrap_err(),
3501
            TestError {
3502
                span: span(2..2),
3503
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3504
            }
3505
        );
3506
        assert_eq!(
3507
            parser(r"a{a").parse().unwrap_err(),
3508
            TestError {
3509
                span: span(2..2),
3510
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3511
            }
3512
        );
3513
        assert_eq!(
3514
            parser(r"a{9999999999}").parse().unwrap_err(),
3515
            TestError {
3516
                span: span(2..12),
3517
                kind: ast::ErrorKind::DecimalInvalid,
3518
            }
3519
        );
3520
        assert_eq!(
3521
            parser(r"a{9").parse().unwrap_err(),
3522
            TestError {
3523
                span: span(1..3),
3524
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3525
            }
3526
        );
3527
        assert_eq!(
3528
            parser(r"a{9,a").parse().unwrap_err(),
3529
            TestError {
3530
                span: span(4..4),
3531
                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3532
            }
3533
        );
3534
        assert_eq!(
3535
            parser(r"a{9,9999999999}").parse().unwrap_err(),
3536
            TestError {
3537
                span: span(4..14),
3538
                kind: ast::ErrorKind::DecimalInvalid,
3539
            }
3540
        );
3541
        assert_eq!(
3542
            parser(r"a{9,").parse().unwrap_err(),
3543
            TestError {
3544
                span: span(1..4),
3545
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3546
            }
3547
        );
3548
        assert_eq!(
3549
            parser(r"a{9,11").parse().unwrap_err(),
3550
            TestError {
3551
                span: span(1..6),
3552
                kind: ast::ErrorKind::RepetitionCountUnclosed,
3553
            }
3554
        );
3555
        assert_eq!(
3556
            parser(r"a{2,1}").parse().unwrap_err(),
3557
            TestError {
3558
                span: span(1..6),
3559
                kind: ast::ErrorKind::RepetitionCountInvalid,
3560
            }
3561
        );
3562
        assert_eq!(
3563
            parser(r"{5}").parse().unwrap_err(),
3564
            TestError {
3565
                span: span(0..0),
3566
                kind: ast::ErrorKind::RepetitionMissing,
3567
            }
3568
        );
3569
        assert_eq!(
3570
            parser(r"|{5}").parse().unwrap_err(),
3571
            TestError {
3572
                span: span(1..1),
3573
                kind: ast::ErrorKind::RepetitionMissing,
3574
            }
3575
        );
3576
    }
3577
3578
    #[test]
3579
    fn parse_alternate() {
3580
        assert_eq!(
3581
            parser(r"a|b").parse(),
3582
            Ok(Ast::alternation(ast::Alternation {
3583
                span: span(0..3),
3584
                asts: vec![lit('a', 0), lit('b', 2)],
3585
            }))
3586
        );
3587
        assert_eq!(
3588
            parser(r"(a|b)").parse(),
3589
            Ok(group(
3590
                0..5,
3591
                1,
3592
                Ast::alternation(ast::Alternation {
3593
                    span: span(1..4),
3594
                    asts: vec![lit('a', 1), lit('b', 3)],
3595
                })
3596
            ))
3597
        );
3598
3599
        assert_eq!(
3600
            parser(r"a|b|c").parse(),
3601
            Ok(Ast::alternation(ast::Alternation {
3602
                span: span(0..5),
3603
                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3604
            }))
3605
        );
3606
        assert_eq!(
3607
            parser(r"ax|by|cz").parse(),
3608
            Ok(Ast::alternation(ast::Alternation {
3609
                span: span(0..8),
3610
                asts: vec![
3611
                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3612
                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3613
                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3614
                ],
3615
            }))
3616
        );
3617
        assert_eq!(
3618
            parser(r"(ax|by|cz)").parse(),
3619
            Ok(group(
3620
                0..10,
3621
                1,
3622
                Ast::alternation(ast::Alternation {
3623
                    span: span(1..9),
3624
                    asts: vec![
3625
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3626
                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3627
                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3628
                    ],
3629
                })
3630
            ))
3631
        );
3632
        assert_eq!(
3633
            parser(r"(ax|(by|(cz)))").parse(),
3634
            Ok(group(
3635
                0..14,
3636
                1,
3637
                alt(
3638
                    1..13,
3639
                    vec![
3640
                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3641
                        group(
3642
                            4..13,
3643
                            2,
3644
                            alt(
3645
                                5..12,
3646
                                vec![
3647
                                    concat(
3648
                                        5..7,
3649
                                        vec![lit('b', 5), lit('y', 6)]
3650
                                    ),
3651
                                    group(
3652
                                        8..12,
3653
                                        3,
3654
                                        concat(
3655
                                            9..11,
3656
                                            vec![lit('c', 9), lit('z', 10),]
3657
                                        )
3658
                                    ),
3659
                                ]
3660
                            )
3661
                        ),
3662
                    ]
3663
                )
3664
            ))
3665
        );
3666
3667
        assert_eq!(
3668
            parser(r"|").parse(),
3669
            Ok(alt(
3670
                0..1,
3671
                vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
3672
            ))
3673
        );
3674
        assert_eq!(
3675
            parser(r"||").parse(),
3676
            Ok(alt(
3677
                0..2,
3678
                vec![
3679
                    Ast::empty(span(0..0)),
3680
                    Ast::empty(span(1..1)),
3681
                    Ast::empty(span(2..2)),
3682
                ]
3683
            ))
3684
        );
3685
        assert_eq!(
3686
            parser(r"a|").parse(),
3687
            Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
3688
        );
3689
        assert_eq!(
3690
            parser(r"|a").parse(),
3691
            Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
3692
        );
3693
3694
        assert_eq!(
3695
            parser(r"(|)").parse(),
3696
            Ok(group(
3697
                0..3,
3698
                1,
3699
                alt(
3700
                    1..2,
3701
                    vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
3702
                )
3703
            ))
3704
        );
3705
        assert_eq!(
3706
            parser(r"(a|)").parse(),
3707
            Ok(group(
3708
                0..4,
3709
                1,
3710
                alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
3711
            ))
3712
        );
3713
        assert_eq!(
3714
            parser(r"(|a)").parse(),
3715
            Ok(group(
3716
                0..4,
3717
                1,
3718
                alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
3719
            ))
3720
        );
3721
3722
        assert_eq!(
3723
            parser(r"a|b)").parse().unwrap_err(),
3724
            TestError {
3725
                span: span(3..4),
3726
                kind: ast::ErrorKind::GroupUnopened,
3727
            }
3728
        );
3729
        assert_eq!(
3730
            parser(r"(a|b").parse().unwrap_err(),
3731
            TestError {
3732
                span: span(0..1),
3733
                kind: ast::ErrorKind::GroupUnclosed,
3734
            }
3735
        );
3736
    }
3737
3738
    #[test]
3739
    fn parse_unsupported_lookaround() {
3740
        assert_eq!(
3741
            parser(r"(?=a)").parse().unwrap_err(),
3742
            TestError {
3743
                span: span(0..3),
3744
                kind: ast::ErrorKind::UnsupportedLookAround,
3745
            }
3746
        );
3747
        assert_eq!(
3748
            parser(r"(?!a)").parse().unwrap_err(),
3749
            TestError {
3750
                span: span(0..3),
3751
                kind: ast::ErrorKind::UnsupportedLookAround,
3752
            }
3753
        );
3754
        assert_eq!(
3755
            parser(r"(?<=a)").parse().unwrap_err(),
3756
            TestError {
3757
                span: span(0..4),
3758
                kind: ast::ErrorKind::UnsupportedLookAround,
3759
            }
3760
        );
3761
        assert_eq!(
3762
            parser(r"(?<!a)").parse().unwrap_err(),
3763
            TestError {
3764
                span: span(0..4),
3765
                kind: ast::ErrorKind::UnsupportedLookAround,
3766
            }
3767
        );
3768
    }
3769
3770
    #[test]
3771
    fn parse_group() {
3772
        assert_eq!(
3773
            parser("(?i)").parse(),
3774
            Ok(Ast::flags(ast::SetFlags {
3775
                span: span(0..4),
3776
                flags: ast::Flags {
3777
                    span: span(2..3),
3778
                    items: vec![ast::FlagsItem {
3779
                        span: span(2..3),
3780
                        kind: ast::FlagsItemKind::Flag(
3781
                            ast::Flag::CaseInsensitive
3782
                        ),
3783
                    }],
3784
                },
3785
            }))
3786
        );
3787
        assert_eq!(
3788
            parser("(?iU)").parse(),
3789
            Ok(Ast::flags(ast::SetFlags {
3790
                span: span(0..5),
3791
                flags: ast::Flags {
3792
                    span: span(2..4),
3793
                    items: vec![
3794
                        ast::FlagsItem {
3795
                            span: span(2..3),
3796
                            kind: ast::FlagsItemKind::Flag(
3797
                                ast::Flag::CaseInsensitive
3798
                            ),
3799
                        },
3800
                        ast::FlagsItem {
3801
                            span: span(3..4),
3802
                            kind: ast::FlagsItemKind::Flag(
3803
                                ast::Flag::SwapGreed
3804
                            ),
3805
                        },
3806
                    ],
3807
                },
3808
            }))
3809
        );
3810
        assert_eq!(
3811
            parser("(?i-U)").parse(),
3812
            Ok(Ast::flags(ast::SetFlags {
3813
                span: span(0..6),
3814
                flags: ast::Flags {
3815
                    span: span(2..5),
3816
                    items: vec![
3817
                        ast::FlagsItem {
3818
                            span: span(2..3),
3819
                            kind: ast::FlagsItemKind::Flag(
3820
                                ast::Flag::CaseInsensitive
3821
                            ),
3822
                        },
3823
                        ast::FlagsItem {
3824
                            span: span(3..4),
3825
                            kind: ast::FlagsItemKind::Negation,
3826
                        },
3827
                        ast::FlagsItem {
3828
                            span: span(4..5),
3829
                            kind: ast::FlagsItemKind::Flag(
3830
                                ast::Flag::SwapGreed
3831
                            ),
3832
                        },
3833
                    ],
3834
                },
3835
            }))
3836
        );
3837
3838
        assert_eq!(
3839
            parser("()").parse(),
3840
            Ok(Ast::group(ast::Group {
3841
                span: span(0..2),
3842
                kind: ast::GroupKind::CaptureIndex(1),
3843
                ast: Box::new(Ast::empty(span(1..1))),
3844
            }))
3845
        );
3846
        assert_eq!(
3847
            parser("(a)").parse(),
3848
            Ok(Ast::group(ast::Group {
3849
                span: span(0..3),
3850
                kind: ast::GroupKind::CaptureIndex(1),
3851
                ast: Box::new(lit('a', 1)),
3852
            }))
3853
        );
3854
        assert_eq!(
3855
            parser("(())").parse(),
3856
            Ok(Ast::group(ast::Group {
3857
                span: span(0..4),
3858
                kind: ast::GroupKind::CaptureIndex(1),
3859
                ast: Box::new(Ast::group(ast::Group {
3860
                    span: span(1..3),
3861
                    kind: ast::GroupKind::CaptureIndex(2),
3862
                    ast: Box::new(Ast::empty(span(2..2))),
3863
                })),
3864
            }))
3865
        );
3866
3867
        assert_eq!(
3868
            parser("(?:a)").parse(),
3869
            Ok(Ast::group(ast::Group {
3870
                span: span(0..5),
3871
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3872
                    span: span(2..2),
3873
                    items: vec![],
3874
                }),
3875
                ast: Box::new(lit('a', 3)),
3876
            }))
3877
        );
3878
3879
        assert_eq!(
3880
            parser("(?i:a)").parse(),
3881
            Ok(Ast::group(ast::Group {
3882
                span: span(0..6),
3883
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3884
                    span: span(2..3),
3885
                    items: vec![ast::FlagsItem {
3886
                        span: span(2..3),
3887
                        kind: ast::FlagsItemKind::Flag(
3888
                            ast::Flag::CaseInsensitive
3889
                        ),
3890
                    },],
3891
                }),
3892
                ast: Box::new(lit('a', 4)),
3893
            }))
3894
        );
3895
        assert_eq!(
3896
            parser("(?i-U:a)").parse(),
3897
            Ok(Ast::group(ast::Group {
3898
                span: span(0..8),
3899
                kind: ast::GroupKind::NonCapturing(ast::Flags {
3900
                    span: span(2..5),
3901
                    items: vec![
3902
                        ast::FlagsItem {
3903
                            span: span(2..3),
3904
                            kind: ast::FlagsItemKind::Flag(
3905
                                ast::Flag::CaseInsensitive
3906
                            ),
3907
                        },
3908
                        ast::FlagsItem {
3909
                            span: span(3..4),
3910
                            kind: ast::FlagsItemKind::Negation,
3911
                        },
3912
                        ast::FlagsItem {
3913
                            span: span(4..5),
3914
                            kind: ast::FlagsItemKind::Flag(
3915
                                ast::Flag::SwapGreed
3916
                            ),
3917
                        },
3918
                    ],
3919
                }),
3920
                ast: Box::new(lit('a', 6)),
3921
            }))
3922
        );
3923
3924
        assert_eq!(
3925
            parser("(").parse().unwrap_err(),
3926
            TestError {
3927
                span: span(0..1),
3928
                kind: ast::ErrorKind::GroupUnclosed,
3929
            }
3930
        );
3931
        assert_eq!(
3932
            parser("(?").parse().unwrap_err(),
3933
            TestError {
3934
                span: span(0..1),
3935
                kind: ast::ErrorKind::GroupUnclosed,
3936
            }
3937
        );
3938
        assert_eq!(
3939
            parser("(?P").parse().unwrap_err(),
3940
            TestError {
3941
                span: span(2..3),
3942
                kind: ast::ErrorKind::FlagUnrecognized,
3943
            }
3944
        );
3945
        assert_eq!(
3946
            parser("(?P<").parse().unwrap_err(),
3947
            TestError {
3948
                span: span(4..4),
3949
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3950
            }
3951
        );
3952
        assert_eq!(
3953
            parser("(a").parse().unwrap_err(),
3954
            TestError {
3955
                span: span(0..1),
3956
                kind: ast::ErrorKind::GroupUnclosed,
3957
            }
3958
        );
3959
        assert_eq!(
3960
            parser("(()").parse().unwrap_err(),
3961
            TestError {
3962
                span: span(0..1),
3963
                kind: ast::ErrorKind::GroupUnclosed,
3964
            }
3965
        );
3966
        assert_eq!(
3967
            parser(")").parse().unwrap_err(),
3968
            TestError {
3969
                span: span(0..1),
3970
                kind: ast::ErrorKind::GroupUnopened,
3971
            }
3972
        );
3973
        assert_eq!(
3974
            parser("a)").parse().unwrap_err(),
3975
            TestError {
3976
                span: span(1..2),
3977
                kind: ast::ErrorKind::GroupUnopened,
3978
            }
3979
        );
3980
    }
3981
3982
    #[test]
3983
    fn parse_capture_name() {
3984
        assert_eq!(
3985
            parser("(?<a>z)").parse(),
3986
            Ok(Ast::group(ast::Group {
3987
                span: span(0..7),
3988
                kind: ast::GroupKind::CaptureName {
3989
                    starts_with_p: false,
3990
                    name: ast::CaptureName {
3991
                        span: span(3..4),
3992
                        name: s("a"),
3993
                        index: 1,
3994
                    }
3995
                },
3996
                ast: Box::new(lit('z', 5)),
3997
            }))
3998
        );
3999
        assert_eq!(
4000
            parser("(?P<a>z)").parse(),
4001
            Ok(Ast::group(ast::Group {
4002
                span: span(0..8),
4003
                kind: ast::GroupKind::CaptureName {
4004
                    starts_with_p: true,
4005
                    name: ast::CaptureName {
4006
                        span: span(4..5),
4007
                        name: s("a"),
4008
                        index: 1,
4009
                    }
4010
                },
4011
                ast: Box::new(lit('z', 6)),
4012
            }))
4013
        );
4014
        assert_eq!(
4015
            parser("(?P<abc>z)").parse(),
4016
            Ok(Ast::group(ast::Group {
4017
                span: span(0..10),
4018
                kind: ast::GroupKind::CaptureName {
4019
                    starts_with_p: true,
4020
                    name: ast::CaptureName {
4021
                        span: span(4..7),
4022
                        name: s("abc"),
4023
                        index: 1,
4024
                    }
4025
                },
4026
                ast: Box::new(lit('z', 8)),
4027
            }))
4028
        );
4029
4030
        assert_eq!(
4031
            parser("(?P<a_1>z)").parse(),
4032
            Ok(Ast::group(ast::Group {
4033
                span: span(0..10),
4034
                kind: ast::GroupKind::CaptureName {
4035
                    starts_with_p: true,
4036
                    name: ast::CaptureName {
4037
                        span: span(4..7),
4038
                        name: s("a_1"),
4039
                        index: 1,
4040
                    }
4041
                },
4042
                ast: Box::new(lit('z', 8)),
4043
            }))
4044
        );
4045
4046
        assert_eq!(
4047
            parser("(?P<a.1>z)").parse(),
4048
            Ok(Ast::group(ast::Group {
4049
                span: span(0..10),
4050
                kind: ast::GroupKind::CaptureName {
4051
                    starts_with_p: true,
4052
                    name: ast::CaptureName {
4053
                        span: span(4..7),
4054
                        name: s("a.1"),
4055
                        index: 1,
4056
                    }
4057
                },
4058
                ast: Box::new(lit('z', 8)),
4059
            }))
4060
        );
4061
4062
        assert_eq!(
4063
            parser("(?P<a[1]>z)").parse(),
4064
            Ok(Ast::group(ast::Group {
4065
                span: span(0..11),
4066
                kind: ast::GroupKind::CaptureName {
4067
                    starts_with_p: true,
4068
                    name: ast::CaptureName {
4069
                        span: span(4..8),
4070
                        name: s("a[1]"),
4071
                        index: 1,
4072
                    }
4073
                },
4074
                ast: Box::new(lit('z', 9)),
4075
            }))
4076
        );
4077
4078
        assert_eq!(
4079
            parser("(?P<a¾>)").parse(),
4080
            Ok(Ast::group(ast::Group {
4081
                span: Span::new(
4082
                    Position::new(0, 1, 1),
4083
                    Position::new(9, 1, 9),
4084
                ),
4085
                kind: ast::GroupKind::CaptureName {
4086
                    starts_with_p: true,
4087
                    name: ast::CaptureName {
4088
                        span: Span::new(
4089
                            Position::new(4, 1, 5),
4090
                            Position::new(7, 1, 7),
4091
                        ),
4092
                        name: s("a¾"),
4093
                        index: 1,
4094
                    }
4095
                },
4096
                ast: Box::new(Ast::empty(Span::new(
4097
                    Position::new(8, 1, 8),
4098
                    Position::new(8, 1, 8),
4099
                ))),
4100
            }))
4101
        );
4102
        assert_eq!(
4103
            parser("(?P<名字>)").parse(),
4104
            Ok(Ast::group(ast::Group {
4105
                span: Span::new(
4106
                    Position::new(0, 1, 1),
4107
                    Position::new(12, 1, 9),
4108
                ),
4109
                kind: ast::GroupKind::CaptureName {
4110
                    starts_with_p: true,
4111
                    name: ast::CaptureName {
4112
                        span: Span::new(
4113
                            Position::new(4, 1, 5),
4114
                            Position::new(10, 1, 7),
4115
                        ),
4116
                        name: s("名字"),
4117
                        index: 1,
4118
                    }
4119
                },
4120
                ast: Box::new(Ast::empty(Span::new(
4121
                    Position::new(11, 1, 8),
4122
                    Position::new(11, 1, 8),
4123
                ))),
4124
            }))
4125
        );
4126
4127
        assert_eq!(
4128
            parser("(?P<").parse().unwrap_err(),
4129
            TestError {
4130
                span: span(4..4),
4131
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
4132
            }
4133
        );
4134
        assert_eq!(
4135
            parser("(?P<>z)").parse().unwrap_err(),
4136
            TestError {
4137
                span: span(4..4),
4138
                kind: ast::ErrorKind::GroupNameEmpty,
4139
            }
4140
        );
4141
        assert_eq!(
4142
            parser("(?P<a").parse().unwrap_err(),
4143
            TestError {
4144
                span: span(5..5),
4145
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
4146
            }
4147
        );
4148
        assert_eq!(
4149
            parser("(?P<ab").parse().unwrap_err(),
4150
            TestError {
4151
                span: span(6..6),
4152
                kind: ast::ErrorKind::GroupNameUnexpectedEof,
4153
            }
4154
        );
4155
        assert_eq!(
4156
            parser("(?P<0a").parse().unwrap_err(),
4157
            TestError {
4158
                span: span(4..5),
4159
                kind: ast::ErrorKind::GroupNameInvalid,
4160
            }
4161
        );
4162
        assert_eq!(
4163
            parser("(?P<~").parse().unwrap_err(),
4164
            TestError {
4165
                span: span(4..5),
4166
                kind: ast::ErrorKind::GroupNameInvalid,
4167
            }
4168
        );
4169
        assert_eq!(
4170
            parser("(?P<abc~").parse().unwrap_err(),
4171
            TestError {
4172
                span: span(7..8),
4173
                kind: ast::ErrorKind::GroupNameInvalid,
4174
            }
4175
        );
4176
        assert_eq!(
4177
            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
4178
            TestError {
4179
                span: span(12..13),
4180
                kind: ast::ErrorKind::GroupNameDuplicate {
4181
                    original: span(4..5),
4182
                },
4183
            }
4184
        );
4185
        assert_eq!(
4186
            parser("(?P<5>)").parse().unwrap_err(),
4187
            TestError {
4188
                span: span(4..5),
4189
                kind: ast::ErrorKind::GroupNameInvalid,
4190
            }
4191
        );
4192
        assert_eq!(
4193
            parser("(?P<5a>)").parse().unwrap_err(),
4194
            TestError {
4195
                span: span(4..5),
4196
                kind: ast::ErrorKind::GroupNameInvalid,
4197
            }
4198
        );
4199
        assert_eq!(
4200
            parser("(?P<¾>)").parse().unwrap_err(),
4201
            TestError {
4202
                span: Span::new(
4203
                    Position::new(4, 1, 5),
4204
                    Position::new(6, 1, 6),
4205
                ),
4206
                kind: ast::ErrorKind::GroupNameInvalid,
4207
            }
4208
        );
4209
        assert_eq!(
4210
            parser("(?P<¾a>)").parse().unwrap_err(),
4211
            TestError {
4212
                span: Span::new(
4213
                    Position::new(4, 1, 5),
4214
                    Position::new(6, 1, 6),
4215
                ),
4216
                kind: ast::ErrorKind::GroupNameInvalid,
4217
            }
4218
        );
4219
        assert_eq!(
4220
            parser("(?P<☃>)").parse().unwrap_err(),
4221
            TestError {
4222
                span: Span::new(
4223
                    Position::new(4, 1, 5),
4224
                    Position::new(7, 1, 6),
4225
                ),
4226
                kind: ast::ErrorKind::GroupNameInvalid,
4227
            }
4228
        );
4229
        assert_eq!(
4230
            parser("(?P<a☃>)").parse().unwrap_err(),
4231
            TestError {
4232
                span: Span::new(
4233
                    Position::new(5, 1, 6),
4234
                    Position::new(8, 1, 7),
4235
                ),
4236
                kind: ast::ErrorKind::GroupNameInvalid,
4237
            }
4238
        );
4239
    }
4240
4241
    #[test]
4242
    fn parse_flags() {
4243
        assert_eq!(
4244
            parser("i:").parse_flags(),
4245
            Ok(ast::Flags {
4246
                span: span(0..1),
4247
                items: vec![ast::FlagsItem {
4248
                    span: span(0..1),
4249
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4250
                }],
4251
            })
4252
        );
4253
        assert_eq!(
4254
            parser("i)").parse_flags(),
4255
            Ok(ast::Flags {
4256
                span: span(0..1),
4257
                items: vec![ast::FlagsItem {
4258
                    span: span(0..1),
4259
                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4260
                }],
4261
            })
4262
        );
4263
4264
        assert_eq!(
4265
            parser("isU:").parse_flags(),
4266
            Ok(ast::Flags {
4267
                span: span(0..3),
4268
                items: vec![
4269
                    ast::FlagsItem {
4270
                        span: span(0..1),
4271
                        kind: ast::FlagsItemKind::Flag(
4272
                            ast::Flag::CaseInsensitive
4273
                        ),
4274
                    },
4275
                    ast::FlagsItem {
4276
                        span: span(1..2),
4277
                        kind: ast::FlagsItemKind::Flag(
4278
                            ast::Flag::DotMatchesNewLine
4279
                        ),
4280
                    },
4281
                    ast::FlagsItem {
4282
                        span: span(2..3),
4283
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4284
                    },
4285
                ],
4286
            })
4287
        );
4288
4289
        assert_eq!(
4290
            parser("-isU:").parse_flags(),
4291
            Ok(ast::Flags {
4292
                span: span(0..4),
4293
                items: vec![
4294
                    ast::FlagsItem {
4295
                        span: span(0..1),
4296
                        kind: ast::FlagsItemKind::Negation,
4297
                    },
4298
                    ast::FlagsItem {
4299
                        span: span(1..2),
4300
                        kind: ast::FlagsItemKind::Flag(
4301
                            ast::Flag::CaseInsensitive
4302
                        ),
4303
                    },
4304
                    ast::FlagsItem {
4305
                        span: span(2..3),
4306
                        kind: ast::FlagsItemKind::Flag(
4307
                            ast::Flag::DotMatchesNewLine
4308
                        ),
4309
                    },
4310
                    ast::FlagsItem {
4311
                        span: span(3..4),
4312
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4313
                    },
4314
                ],
4315
            })
4316
        );
4317
        assert_eq!(
4318
            parser("i-sU:").parse_flags(),
4319
            Ok(ast::Flags {
4320
                span: span(0..4),
4321
                items: vec![
4322
                    ast::FlagsItem {
4323
                        span: span(0..1),
4324
                        kind: ast::FlagsItemKind::Flag(
4325
                            ast::Flag::CaseInsensitive
4326
                        ),
4327
                    },
4328
                    ast::FlagsItem {
4329
                        span: span(1..2),
4330
                        kind: ast::FlagsItemKind::Negation,
4331
                    },
4332
                    ast::FlagsItem {
4333
                        span: span(2..3),
4334
                        kind: ast::FlagsItemKind::Flag(
4335
                            ast::Flag::DotMatchesNewLine
4336
                        ),
4337
                    },
4338
                    ast::FlagsItem {
4339
                        span: span(3..4),
4340
                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4341
                    },
4342
                ],
4343
            })
4344
        );
4345
        assert_eq!(
4346
            parser("i-sR:").parse_flags(),
4347
            Ok(ast::Flags {
4348
                span: span(0..4),
4349
                items: vec![
4350
                    ast::FlagsItem {
4351
                        span: span(0..1),
4352
                        kind: ast::FlagsItemKind::Flag(
4353
                            ast::Flag::CaseInsensitive
4354
                        ),
4355
                    },
4356
                    ast::FlagsItem {
4357
                        span: span(1..2),
4358
                        kind: ast::FlagsItemKind::Negation,
4359
                    },
4360
                    ast::FlagsItem {
4361
                        span: span(2..3),
4362
                        kind: ast::FlagsItemKind::Flag(
4363
                            ast::Flag::DotMatchesNewLine
4364
                        ),
4365
                    },
4366
                    ast::FlagsItem {
4367
                        span: span(3..4),
4368
                        kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
4369
                    },
4370
                ],
4371
            })
4372
        );
4373
4374
        assert_eq!(
4375
            parser("isU").parse_flags().unwrap_err(),
4376
            TestError {
4377
                span: span(3..3),
4378
                kind: ast::ErrorKind::FlagUnexpectedEof,
4379
            }
4380
        );
4381
        assert_eq!(
4382
            parser("isUa:").parse_flags().unwrap_err(),
4383
            TestError {
4384
                span: span(3..4),
4385
                kind: ast::ErrorKind::FlagUnrecognized,
4386
            }
4387
        );
4388
        assert_eq!(
4389
            parser("isUi:").parse_flags().unwrap_err(),
4390
            TestError {
4391
                span: span(3..4),
4392
                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4393
            }
4394
        );
4395
        assert_eq!(
4396
            parser("i-sU-i:").parse_flags().unwrap_err(),
4397
            TestError {
4398
                span: span(4..5),
4399
                kind: ast::ErrorKind::FlagRepeatedNegation {
4400
                    original: span(1..2),
4401
                },
4402
            }
4403
        );
4404
        assert_eq!(
4405
            parser("-)").parse_flags().unwrap_err(),
4406
            TestError {
4407
                span: span(0..1),
4408
                kind: ast::ErrorKind::FlagDanglingNegation,
4409
            }
4410
        );
4411
        assert_eq!(
4412
            parser("i-)").parse_flags().unwrap_err(),
4413
            TestError {
4414
                span: span(1..2),
4415
                kind: ast::ErrorKind::FlagDanglingNegation,
4416
            }
4417
        );
4418
        assert_eq!(
4419
            parser("iU-)").parse_flags().unwrap_err(),
4420
            TestError {
4421
                span: span(2..3),
4422
                kind: ast::ErrorKind::FlagDanglingNegation,
4423
            }
4424
        );
4425
    }
4426
4427
    #[test]
4428
    fn parse_flag() {
4429
        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4430
        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4431
        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4432
        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4433
        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4434
        assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
4435
        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4436
4437
        assert_eq!(
4438
            parser("a").parse_flag().unwrap_err(),
4439
            TestError {
4440
                span: span(0..1),
4441
                kind: ast::ErrorKind::FlagUnrecognized,
4442
            }
4443
        );
4444
        assert_eq!(
4445
            parser("☃").parse_flag().unwrap_err(),
4446
            TestError {
4447
                span: span_range("☃", 0..3),
4448
                kind: ast::ErrorKind::FlagUnrecognized,
4449
            }
4450
        );
4451
    }
4452
4453
    #[test]
4454
    fn parse_primitive_non_escape() {
4455
        assert_eq!(
4456
            parser(r".").parse_primitive(),
4457
            Ok(Primitive::Dot(span(0..1)))
4458
        );
4459
        assert_eq!(
4460
            parser(r"^").parse_primitive(),
4461
            Ok(Primitive::Assertion(ast::Assertion {
4462
                span: span(0..1),
4463
                kind: ast::AssertionKind::StartLine,
4464
            }))
4465
        );
4466
        assert_eq!(
4467
            parser(r"$").parse_primitive(),
4468
            Ok(Primitive::Assertion(ast::Assertion {
4469
                span: span(0..1),
4470
                kind: ast::AssertionKind::EndLine,
4471
            }))
4472
        );
4473
4474
        assert_eq!(
4475
            parser(r"a").parse_primitive(),
4476
            Ok(Primitive::Literal(ast::Literal {
4477
                span: span(0..1),
4478
                kind: ast::LiteralKind::Verbatim,
4479
                c: 'a',
4480
            }))
4481
        );
4482
        assert_eq!(
4483
            parser(r"|").parse_primitive(),
4484
            Ok(Primitive::Literal(ast::Literal {
4485
                span: span(0..1),
4486
                kind: ast::LiteralKind::Verbatim,
4487
                c: '|',
4488
            }))
4489
        );
4490
        assert_eq!(
4491
            parser(r"☃").parse_primitive(),
4492
            Ok(Primitive::Literal(ast::Literal {
4493
                span: span_range("☃", 0..3),
4494
                kind: ast::LiteralKind::Verbatim,
4495
                c: '☃',
4496
            }))
4497
        );
4498
    }
4499
4500
    #[test]
4501
    fn parse_escape() {
4502
        assert_eq!(
4503
            parser(r"\|").parse_primitive(),
4504
            Ok(Primitive::Literal(ast::Literal {
4505
                span: span(0..2),
4506
                kind: ast::LiteralKind::Meta,
4507
                c: '|',
4508
            }))
4509
        );
4510
        let specials = &[
4511
            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4512
            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4513
            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4514
            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4515
            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4516
            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4517
        ];
4518
        for &(pat, c, ref kind) in specials {
4519
            assert_eq!(
4520
                parser(pat).parse_primitive(),
4521
                Ok(Primitive::Literal(ast::Literal {
4522
                    span: span(0..2),
4523
                    kind: ast::LiteralKind::Special(kind.clone()),
4524
                    c,
4525
                }))
4526
            );
4527
        }
4528
        assert_eq!(
4529
            parser(r"\A").parse_primitive(),
4530
            Ok(Primitive::Assertion(ast::Assertion {
4531
                span: span(0..2),
4532
                kind: ast::AssertionKind::StartText,
4533
            }))
4534
        );
4535
        assert_eq!(
4536
            parser(r"\z").parse_primitive(),
4537
            Ok(Primitive::Assertion(ast::Assertion {
4538
                span: span(0..2),
4539
                kind: ast::AssertionKind::EndText,
4540
            }))
4541
        );
4542
        assert_eq!(
4543
            parser(r"\b").parse_primitive(),
4544
            Ok(Primitive::Assertion(ast::Assertion {
4545
                span: span(0..2),
4546
                kind: ast::AssertionKind::WordBoundary,
4547
            }))
4548
        );
4549
        assert_eq!(
4550
            parser(r"\b{start}").parse_primitive(),
4551
            Ok(Primitive::Assertion(ast::Assertion {
4552
                span: span(0..9),
4553
                kind: ast::AssertionKind::WordBoundaryStart,
4554
            }))
4555
        );
4556
        assert_eq!(
4557
            parser(r"\b{end}").parse_primitive(),
4558
            Ok(Primitive::Assertion(ast::Assertion {
4559
                span: span(0..7),
4560
                kind: ast::AssertionKind::WordBoundaryEnd,
4561
            }))
4562
        );
4563
        assert_eq!(
4564
            parser(r"\b{start-half}").parse_primitive(),
4565
            Ok(Primitive::Assertion(ast::Assertion {
4566
                span: span(0..14),
4567
                kind: ast::AssertionKind::WordBoundaryStartHalf,
4568
            }))
4569
        );
4570
        assert_eq!(
4571
            parser(r"\b{end-half}").parse_primitive(),
4572
            Ok(Primitive::Assertion(ast::Assertion {
4573
                span: span(0..12),
4574
                kind: ast::AssertionKind::WordBoundaryEndHalf,
4575
            }))
4576
        );
4577
        assert_eq!(
4578
            parser(r"\<").parse_primitive(),
4579
            Ok(Primitive::Assertion(ast::Assertion {
4580
                span: span(0..2),
4581
                kind: ast::AssertionKind::WordBoundaryStartAngle,
4582
            }))
4583
        );
4584
        assert_eq!(
4585
            parser(r"\>").parse_primitive(),
4586
            Ok(Primitive::Assertion(ast::Assertion {
4587
                span: span(0..2),
4588
                kind: ast::AssertionKind::WordBoundaryEndAngle,
4589
            }))
4590
        );
4591
        assert_eq!(
4592
            parser(r"\B").parse_primitive(),
4593
            Ok(Primitive::Assertion(ast::Assertion {
4594
                span: span(0..2),
4595
                kind: ast::AssertionKind::NotWordBoundary,
4596
            }))
4597
        );
4598
4599
        // We also support superfluous escapes in most cases now too.
4600
        for c in ['!', '@', '%', '"', '\'', '/', ' '] {
4601
            let pat = format!(r"\{}", c);
4602
            assert_eq!(
4603
                parser(&pat).parse_primitive(),
4604
                Ok(Primitive::Literal(ast::Literal {
4605
                    span: span(0..2),
4606
                    kind: ast::LiteralKind::Superfluous,
4607
                    c,
4608
                }))
4609
            );
4610
        }
4611
4612
        // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
4613
        // gives flexibility for future evolution.
4614
        assert_eq!(
4615
            parser(r"\e").parse_escape().unwrap_err(),
4616
            TestError {
4617
                span: span(0..2),
4618
                kind: ast::ErrorKind::EscapeUnrecognized,
4619
            }
4620
        );
4621
        assert_eq!(
4622
            parser(r"\y").parse_escape().unwrap_err(),
4623
            TestError {
4624
                span: span(0..2),
4625
                kind: ast::ErrorKind::EscapeUnrecognized,
4626
            }
4627
        );
4628
4629
        // Starting a special word boundary without any non-whitespace chars
4630
        // after the brace makes it ambiguous whether the user meant to write
4631
        // a counted repetition (probably not?) or an actual special word
4632
        // boundary assertion.
4633
        assert_eq!(
4634
            parser(r"\b{").parse_escape().unwrap_err(),
4635
            TestError {
4636
                span: span(0..3),
4637
                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4638
            }
4639
        );
4640
        assert_eq!(
4641
            parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
4642
            TestError {
4643
                span: span(0..4),
4644
                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4645
            }
4646
        );
4647
        // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
4648
        // and thus causes the parser to treat it as a counted repetition.
4649
        assert_eq!(
4650
            parser(r"\b{ ").parse().unwrap_err(),
4651
            TestError {
4652
                span: span(2..4),
4653
                kind: ast::ErrorKind::RepetitionCountUnclosed,
4654
            }
4655
        );
4656
        // In this case, we got some valid chars that makes it look like the
4657
        // user is writing one of the special word boundary assertions, but
4658
        // we forget to close the brace.
4659
        assert_eq!(
4660
            parser(r"\b{foo").parse_escape().unwrap_err(),
4661
            TestError {
4662
                span: span(2..6),
4663
                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4664
            }
4665
        );
4666
        // We get the same error as above, except it is provoked by seeing a
4667
        // char that we know is invalid before seeing a closing brace.
4668
        assert_eq!(
4669
            parser(r"\b{foo!}").parse_escape().unwrap_err(),
4670
            TestError {
4671
                span: span(2..6),
4672
                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4673
            }
4674
        );
4675
        // And this one occurs when, syntactically, everything looks okay, but
4676
        // we don't use a valid spelling of a word boundary assertion.
4677
        assert_eq!(
4678
            parser(r"\b{foo}").parse_escape().unwrap_err(),
4679
            TestError {
4680
                span: span(3..6),
4681
                kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
4682
            }
4683
        );
4684
4685
        // An unfinished escape is illegal.
4686
        assert_eq!(
4687
            parser(r"\").parse_escape().unwrap_err(),
4688
            TestError {
4689
                span: span(0..1),
4690
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4691
            }
4692
        );
4693
    }
4694
4695
    #[test]
4696
    fn parse_unsupported_backreference() {
4697
        assert_eq!(
4698
            parser(r"\0").parse_escape().unwrap_err(),
4699
            TestError {
4700
                span: span(0..2),
4701
                kind: ast::ErrorKind::UnsupportedBackreference,
4702
            }
4703
        );
4704
        assert_eq!(
4705
            parser(r"\9").parse_escape().unwrap_err(),
4706
            TestError {
4707
                span: span(0..2),
4708
                kind: ast::ErrorKind::UnsupportedBackreference,
4709
            }
4710
        );
4711
    }
4712
4713
    #[test]
4714
    fn parse_octal() {
4715
        for i in 0..511 {
4716
            let pat = format!(r"\{:o}", i);
4717
            assert_eq!(
4718
                parser_octal(&pat).parse_escape(),
4719
                Ok(Primitive::Literal(ast::Literal {
4720
                    span: span(0..pat.len()),
4721
                    kind: ast::LiteralKind::Octal,
4722
                    c: char::from_u32(i).unwrap(),
4723
                }))
4724
            );
4725
        }
4726
        assert_eq!(
4727
            parser_octal(r"\778").parse_escape(),
4728
            Ok(Primitive::Literal(ast::Literal {
4729
                span: span(0..3),
4730
                kind: ast::LiteralKind::Octal,
4731
                c: '?',
4732
            }))
4733
        );
4734
        assert_eq!(
4735
            parser_octal(r"\7777").parse_escape(),
4736
            Ok(Primitive::Literal(ast::Literal {
4737
                span: span(0..4),
4738
                kind: ast::LiteralKind::Octal,
4739
                c: '\u{01FF}',
4740
            }))
4741
        );
4742
        assert_eq!(
4743
            parser_octal(r"\778").parse(),
4744
            Ok(Ast::concat(ast::Concat {
4745
                span: span(0..4),
4746
                asts: vec![
4747
                    Ast::literal(ast::Literal {
4748
                        span: span(0..3),
4749
                        kind: ast::LiteralKind::Octal,
4750
                        c: '?',
4751
                    }),
4752
                    Ast::literal(ast::Literal {
4753
                        span: span(3..4),
4754
                        kind: ast::LiteralKind::Verbatim,
4755
                        c: '8',
4756
                    }),
4757
                ],
4758
            }))
4759
        );
4760
        assert_eq!(
4761
            parser_octal(r"\7777").parse(),
4762
            Ok(Ast::concat(ast::Concat {
4763
                span: span(0..5),
4764
                asts: vec![
4765
                    Ast::literal(ast::Literal {
4766
                        span: span(0..4),
4767
                        kind: ast::LiteralKind::Octal,
4768
                        c: '\u{01FF}',
4769
                    }),
4770
                    Ast::literal(ast::Literal {
4771
                        span: span(4..5),
4772
                        kind: ast::LiteralKind::Verbatim,
4773
                        c: '7',
4774
                    }),
4775
                ],
4776
            }))
4777
        );
4778
4779
        assert_eq!(
4780
            parser_octal(r"\8").parse_escape().unwrap_err(),
4781
            TestError {
4782
                span: span(0..2),
4783
                kind: ast::ErrorKind::EscapeUnrecognized,
4784
            }
4785
        );
4786
    }
4787
4788
    #[test]
4789
    fn parse_hex_two() {
4790
        for i in 0..256 {
4791
            let pat = format!(r"\x{:02x}", i);
4792
            assert_eq!(
4793
                parser(&pat).parse_escape(),
4794
                Ok(Primitive::Literal(ast::Literal {
4795
                    span: span(0..pat.len()),
4796
                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4797
                    c: char::from_u32(i).unwrap(),
4798
                }))
4799
            );
4800
        }
4801
4802
        assert_eq!(
4803
            parser(r"\xF").parse_escape().unwrap_err(),
4804
            TestError {
4805
                span: span(3..3),
4806
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4807
            }
4808
        );
4809
        assert_eq!(
4810
            parser(r"\xG").parse_escape().unwrap_err(),
4811
            TestError {
4812
                span: span(2..3),
4813
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4814
            }
4815
        );
4816
        assert_eq!(
4817
            parser(r"\xFG").parse_escape().unwrap_err(),
4818
            TestError {
4819
                span: span(3..4),
4820
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4821
            }
4822
        );
4823
    }
4824
4825
    #[test]
4826
    fn parse_hex_four() {
4827
        for i in 0..65536 {
4828
            let c = match char::from_u32(i) {
4829
                None => continue,
4830
                Some(c) => c,
4831
            };
4832
            let pat = format!(r"\u{:04x}", i);
4833
            assert_eq!(
4834
                parser(&pat).parse_escape(),
4835
                Ok(Primitive::Literal(ast::Literal {
4836
                    span: span(0..pat.len()),
4837
                    kind: ast::LiteralKind::HexFixed(
4838
                        ast::HexLiteralKind::UnicodeShort
4839
                    ),
4840
                    c,
4841
                }))
4842
            );
4843
        }
4844
4845
        assert_eq!(
4846
            parser(r"\uF").parse_escape().unwrap_err(),
4847
            TestError {
4848
                span: span(3..3),
4849
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4850
            }
4851
        );
4852
        assert_eq!(
4853
            parser(r"\uG").parse_escape().unwrap_err(),
4854
            TestError {
4855
                span: span(2..3),
4856
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4857
            }
4858
        );
4859
        assert_eq!(
4860
            parser(r"\uFG").parse_escape().unwrap_err(),
4861
            TestError {
4862
                span: span(3..4),
4863
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4864
            }
4865
        );
4866
        assert_eq!(
4867
            parser(r"\uFFG").parse_escape().unwrap_err(),
4868
            TestError {
4869
                span: span(4..5),
4870
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4871
            }
4872
        );
4873
        assert_eq!(
4874
            parser(r"\uFFFG").parse_escape().unwrap_err(),
4875
            TestError {
4876
                span: span(5..6),
4877
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4878
            }
4879
        );
4880
        assert_eq!(
4881
            parser(r"\uD800").parse_escape().unwrap_err(),
4882
            TestError {
4883
                span: span(2..6),
4884
                kind: ast::ErrorKind::EscapeHexInvalid,
4885
            }
4886
        );
4887
    }
4888
4889
    #[test]
4890
    fn parse_hex_eight() {
4891
        for i in 0..65536 {
4892
            let c = match char::from_u32(i) {
4893
                None => continue,
4894
                Some(c) => c,
4895
            };
4896
            let pat = format!(r"\U{:08x}", i);
4897
            assert_eq!(
4898
                parser(&pat).parse_escape(),
4899
                Ok(Primitive::Literal(ast::Literal {
4900
                    span: span(0..pat.len()),
4901
                    kind: ast::LiteralKind::HexFixed(
4902
                        ast::HexLiteralKind::UnicodeLong
4903
                    ),
4904
                    c,
4905
                }))
4906
            );
4907
        }
4908
4909
        assert_eq!(
4910
            parser(r"\UF").parse_escape().unwrap_err(),
4911
            TestError {
4912
                span: span(3..3),
4913
                kind: ast::ErrorKind::EscapeUnexpectedEof,
4914
            }
4915
        );
4916
        assert_eq!(
4917
            parser(r"\UG").parse_escape().unwrap_err(),
4918
            TestError {
4919
                span: span(2..3),
4920
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4921
            }
4922
        );
4923
        assert_eq!(
4924
            parser(r"\UFG").parse_escape().unwrap_err(),
4925
            TestError {
4926
                span: span(3..4),
4927
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4928
            }
4929
        );
4930
        assert_eq!(
4931
            parser(r"\UFFG").parse_escape().unwrap_err(),
4932
            TestError {
4933
                span: span(4..5),
4934
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4935
            }
4936
        );
4937
        assert_eq!(
4938
            parser(r"\UFFFG").parse_escape().unwrap_err(),
4939
            TestError {
4940
                span: span(5..6),
4941
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4942
            }
4943
        );
4944
        assert_eq!(
4945
            parser(r"\UFFFFG").parse_escape().unwrap_err(),
4946
            TestError {
4947
                span: span(6..7),
4948
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4949
            }
4950
        );
4951
        assert_eq!(
4952
            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4953
            TestError {
4954
                span: span(7..8),
4955
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4956
            }
4957
        );
4958
        assert_eq!(
4959
            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4960
            TestError {
4961
                span: span(8..9),
4962
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4963
            }
4964
        );
4965
        assert_eq!(
4966
            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4967
            TestError {
4968
                span: span(9..10),
4969
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4970
            }
4971
        );
4972
    }
4973
4974
    #[test]
4975
    fn parse_hex_brace() {
4976
        assert_eq!(
4977
            parser(r"\u{26c4}").parse_escape(),
4978
            Ok(Primitive::Literal(ast::Literal {
4979
                span: span(0..8),
4980
                kind: ast::LiteralKind::HexBrace(
4981
                    ast::HexLiteralKind::UnicodeShort
4982
                ),
4983
                c: '⛄',
4984
            }))
4985
        );
4986
        assert_eq!(
4987
            parser(r"\U{26c4}").parse_escape(),
4988
            Ok(Primitive::Literal(ast::Literal {
4989
                span: span(0..8),
4990
                kind: ast::LiteralKind::HexBrace(
4991
                    ast::HexLiteralKind::UnicodeLong
4992
                ),
4993
                c: '⛄',
4994
            }))
4995
        );
4996
        assert_eq!(
4997
            parser(r"\x{26c4}").parse_escape(),
4998
            Ok(Primitive::Literal(ast::Literal {
4999
                span: span(0..8),
5000
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5001
                c: '⛄',
5002
            }))
5003
        );
5004
        assert_eq!(
5005
            parser(r"\x{26C4}").parse_escape(),
5006
            Ok(Primitive::Literal(ast::Literal {
5007
                span: span(0..8),
5008
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5009
                c: '⛄',
5010
            }))
5011
        );
5012
        assert_eq!(
5013
            parser(r"\x{10fFfF}").parse_escape(),
5014
            Ok(Primitive::Literal(ast::Literal {
5015
                span: span(0..10),
5016
                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5017
                c: '\u{10FFFF}',
5018
            }))
5019
        );
5020
5021
        assert_eq!(
5022
            parser(r"\x").parse_escape().unwrap_err(),
5023
            TestError {
5024
                span: span(2..2),
5025
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5026
            }
5027
        );
5028
        assert_eq!(
5029
            parser(r"\x{").parse_escape().unwrap_err(),
5030
            TestError {
5031
                span: span(2..3),
5032
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5033
            }
5034
        );
5035
        assert_eq!(
5036
            parser(r"\x{FF").parse_escape().unwrap_err(),
5037
            TestError {
5038
                span: span(2..5),
5039
                kind: ast::ErrorKind::EscapeUnexpectedEof,
5040
            }
5041
        );
5042
        assert_eq!(
5043
            parser(r"\x{}").parse_escape().unwrap_err(),
5044
            TestError {
5045
                span: span(2..4),
5046
                kind: ast::ErrorKind::EscapeHexEmpty,
5047
            }
5048
        );
5049
        assert_eq!(
5050
            parser(r"\x{FGF}").parse_escape().unwrap_err(),
5051
            TestError {
5052
                span: span(4..5),
5053
                kind: ast::ErrorKind::EscapeHexInvalidDigit,
5054
            }
5055
        );
5056
        assert_eq!(
5057
            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
5058
            TestError {
5059
                span: span(3..9),
5060
                kind: ast::ErrorKind::EscapeHexInvalid,
5061
            }
5062
        );
5063
        assert_eq!(
5064
            parser(r"\x{D800}").parse_escape().unwrap_err(),
5065
            TestError {
5066
                span: span(3..7),
5067
                kind: ast::ErrorKind::EscapeHexInvalid,
5068
            }
5069
        );
5070
        assert_eq!(
5071
            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
5072
            TestError {
5073
                span: span(3..12),
5074
                kind: ast::ErrorKind::EscapeHexInvalid,
5075
            }
5076
        );
5077
    }
5078
5079
    #[test]
5080
    fn parse_decimal() {
5081
        assert_eq!(parser("123").parse_decimal(), Ok(123));
5082
        assert_eq!(parser("0").parse_decimal(), Ok(0));
5083
        assert_eq!(parser("01").parse_decimal(), Ok(1));
5084
5085
        assert_eq!(
5086
            parser("-1").parse_decimal().unwrap_err(),
5087
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
5088
        );
5089
        assert_eq!(
5090
            parser("").parse_decimal().unwrap_err(),
5091
            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
5092
        );
5093
        assert_eq!(
5094
            parser("9999999999").parse_decimal().unwrap_err(),
5095
            TestError {
5096
                span: span(0..10),
5097
                kind: ast::ErrorKind::DecimalInvalid,
5098
            }
5099
        );
5100
    }
5101
5102
    #[test]
5103
    fn parse_set_class() {
5104
        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
5105
            ast::ClassSet::union(ast::ClassSetUnion { span, items })
5106
        }
5107
5108
        fn intersection(
5109
            span: Span,
5110
            lhs: ast::ClassSet,
5111
            rhs: ast::ClassSet,
5112
        ) -> ast::ClassSet {
5113
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5114
                span,
5115
                kind: ast::ClassSetBinaryOpKind::Intersection,
5116
                lhs: Box::new(lhs),
5117
                rhs: Box::new(rhs),
5118
            })
5119
        }
5120
5121
        fn difference(
5122
            span: Span,
5123
            lhs: ast::ClassSet,
5124
            rhs: ast::ClassSet,
5125
        ) -> ast::ClassSet {
5126
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5127
                span,
5128
                kind: ast::ClassSetBinaryOpKind::Difference,
5129
                lhs: Box::new(lhs),
5130
                rhs: Box::new(rhs),
5131
            })
5132
        }
5133
5134
        fn symdifference(
5135
            span: Span,
5136
            lhs: ast::ClassSet,
5137
            rhs: ast::ClassSet,
5138
        ) -> ast::ClassSet {
5139
            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5140
                span,
5141
                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
5142
                lhs: Box::new(lhs),
5143
                rhs: Box::new(rhs),
5144
            })
5145
        }
5146
5147
        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
5148
            ast::ClassSet::Item(item)
5149
        }
5150
5151
        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
5152
            ast::ClassSetItem::Ascii(cls)
5153
        }
5154
5155
        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
5156
            ast::ClassSetItem::Unicode(cls)
5157
        }
5158
5159
        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
5160
            ast::ClassSetItem::Perl(cls)
5161
        }
5162
5163
        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
5164
            ast::ClassSetItem::Bracketed(Box::new(cls))
5165
        }
5166
5167
        fn lit(span: Span, c: char) -> ast::ClassSetItem {
5168
            ast::ClassSetItem::Literal(ast::Literal {
5169
                span,
5170
                kind: ast::LiteralKind::Verbatim,
5171
                c,
5172
            })
5173
        }
5174
5175
        fn empty(span: Span) -> ast::ClassSetItem {
5176
            ast::ClassSetItem::Empty(span)
5177
        }
5178
5179
        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
5180
            let pos1 = Position {
5181
                offset: span.start.offset + start.len_utf8(),
5182
                column: span.start.column + 1,
5183
                ..span.start
5184
            };
5185
            let pos2 = Position {
5186
                offset: span.end.offset - end.len_utf8(),
5187
                column: span.end.column - 1,
5188
                ..span.end
5189
            };
5190
            ast::ClassSetItem::Range(ast::ClassSetRange {
5191
                span,
5192
                start: ast::Literal {
5193
                    span: Span { end: pos1, ..span },
5194
                    kind: ast::LiteralKind::Verbatim,
5195
                    c: start,
5196
                },
5197
                end: ast::Literal {
5198
                    span: Span { start: pos2, ..span },
5199
                    kind: ast::LiteralKind::Verbatim,
5200
                    c: end,
5201
                },
5202
            })
5203
        }
5204
5205
        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
5206
            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
5207
        }
5208
5209
        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
5210
            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
5211
        }
5212
5213
        assert_eq!(
5214
            parser("[[:alnum:]]").parse(),
5215
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5216
                span: span(0..11),
5217
                negated: false,
5218
                kind: itemset(item_ascii(alnum(span(1..10), false))),
5219
            }))
5220
        );
5221
        assert_eq!(
5222
            parser("[[[:alnum:]]]").parse(),
5223
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5224
                span: span(0..13),
5225
                negated: false,
5226
                kind: itemset(item_bracket(ast::ClassBracketed {
5227
                    span: span(1..12),
5228
                    negated: false,
5229
                    kind: itemset(item_ascii(alnum(span(2..11), false))),
5230
                })),
5231
            }))
5232
        );
5233
        assert_eq!(
5234
            parser("[[:alnum:]&&[:lower:]]").parse(),
5235
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5236
                span: span(0..22),
5237
                negated: false,
5238
                kind: intersection(
5239
                    span(1..21),
5240
                    itemset(item_ascii(alnum(span(1..10), false))),
5241
                    itemset(item_ascii(lower(span(12..21), false))),
5242
                ),
5243
            }))
5244
        );
5245
        assert_eq!(
5246
            parser("[[:alnum:]--[:lower:]]").parse(),
5247
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5248
                span: span(0..22),
5249
                negated: false,
5250
                kind: difference(
5251
                    span(1..21),
5252
                    itemset(item_ascii(alnum(span(1..10), false))),
5253
                    itemset(item_ascii(lower(span(12..21), false))),
5254
                ),
5255
            }))
5256
        );
5257
        assert_eq!(
5258
            parser("[[:alnum:]~~[:lower:]]").parse(),
5259
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5260
                span: span(0..22),
5261
                negated: false,
5262
                kind: symdifference(
5263
                    span(1..21),
5264
                    itemset(item_ascii(alnum(span(1..10), false))),
5265
                    itemset(item_ascii(lower(span(12..21), false))),
5266
                ),
5267
            }))
5268
        );
5269
5270
        assert_eq!(
5271
            parser("[a]").parse(),
5272
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5273
                span: span(0..3),
5274
                negated: false,
5275
                kind: itemset(lit(span(1..2), 'a')),
5276
            }))
5277
        );
5278
        assert_eq!(
5279
            parser(r"[a\]]").parse(),
5280
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5281
                span: span(0..5),
5282
                negated: false,
5283
                kind: union(
5284
                    span(1..4),
5285
                    vec![
5286
                        lit(span(1..2), 'a'),
5287
                        ast::ClassSetItem::Literal(ast::Literal {
5288
                            span: span(2..4),
5289
                            kind: ast::LiteralKind::Meta,
5290
                            c: ']',
5291
                        }),
5292
                    ]
5293
                ),
5294
            }))
5295
        );
5296
        assert_eq!(
5297
            parser(r"[a\-z]").parse(),
5298
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5299
                span: span(0..6),
5300
                negated: false,
5301
                kind: union(
5302
                    span(1..5),
5303
                    vec![
5304
                        lit(span(1..2), 'a'),
5305
                        ast::ClassSetItem::Literal(ast::Literal {
5306
                            span: span(2..4),
5307
                            kind: ast::LiteralKind::Meta,
5308
                            c: '-',
5309
                        }),
5310
                        lit(span(4..5), 'z'),
5311
                    ]
5312
                ),
5313
            }))
5314
        );
5315
        assert_eq!(
5316
            parser("[ab]").parse(),
5317
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5318
                span: span(0..4),
5319
                negated: false,
5320
                kind: union(
5321
                    span(1..3),
5322
                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
5323
                ),
5324
            }))
5325
        );
5326
        assert_eq!(
5327
            parser("[a-]").parse(),
5328
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5329
                span: span(0..4),
5330
                negated: false,
5331
                kind: union(
5332
                    span(1..3),
5333
                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
5334
                ),
5335
            }))
5336
        );
5337
        assert_eq!(
5338
            parser("[-a]").parse(),
5339
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5340
                span: span(0..4),
5341
                negated: false,
5342
                kind: union(
5343
                    span(1..3),
5344
                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
5345
                ),
5346
            }))
5347
        );
5348
        assert_eq!(
5349
            parser(r"[\pL]").parse(),
5350
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5351
                span: span(0..5),
5352
                negated: false,
5353
                kind: itemset(item_unicode(ast::ClassUnicode {
5354
                    span: span(1..4),
5355
                    negated: false,
5356
                    kind: ast::ClassUnicodeKind::OneLetter('L'),
5357
                })),
5358
            }))
5359
        );
5360
        assert_eq!(
5361
            parser(r"[\w]").parse(),
5362
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5363
                span: span(0..4),
5364
                negated: false,
5365
                kind: itemset(item_perl(ast::ClassPerl {
5366
                    span: span(1..3),
5367
                    kind: ast::ClassPerlKind::Word,
5368
                    negated: false,
5369
                })),
5370
            }))
5371
        );
5372
        assert_eq!(
5373
            parser(r"[a\wz]").parse(),
5374
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5375
                span: span(0..6),
5376
                negated: false,
5377
                kind: union(
5378
                    span(1..5),
5379
                    vec![
5380
                        lit(span(1..2), 'a'),
5381
                        item_perl(ast::ClassPerl {
5382
                            span: span(2..4),
5383
                            kind: ast::ClassPerlKind::Word,
5384
                            negated: false,
5385
                        }),
5386
                        lit(span(4..5), 'z'),
5387
                    ]
5388
                ),
5389
            }))
5390
        );
5391
5392
        assert_eq!(
5393
            parser("[a-z]").parse(),
5394
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5395
                span: span(0..5),
5396
                negated: false,
5397
                kind: itemset(range(span(1..4), 'a', 'z')),
5398
            }))
5399
        );
5400
        assert_eq!(
5401
            parser("[a-cx-z]").parse(),
5402
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5403
                span: span(0..8),
5404
                negated: false,
5405
                kind: union(
5406
                    span(1..7),
5407
                    vec![
5408
                        range(span(1..4), 'a', 'c'),
5409
                        range(span(4..7), 'x', 'z'),
5410
                    ]
5411
                ),
5412
            }))
5413
        );
5414
        assert_eq!(
5415
            parser(r"[\w&&a-cx-z]").parse(),
5416
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5417
                span: span(0..12),
5418
                negated: false,
5419
                kind: intersection(
5420
                    span(1..11),
5421
                    itemset(item_perl(ast::ClassPerl {
5422
                        span: span(1..3),
5423
                        kind: ast::ClassPerlKind::Word,
5424
                        negated: false,
5425
                    })),
5426
                    union(
5427
                        span(5..11),
5428
                        vec![
5429
                            range(span(5..8), 'a', 'c'),
5430
                            range(span(8..11), 'x', 'z'),
5431
                        ]
5432
                    ),
5433
                ),
5434
            }))
5435
        );
5436
        assert_eq!(
5437
            parser(r"[a-cx-z&&\w]").parse(),
5438
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5439
                span: span(0..12),
5440
                negated: false,
5441
                kind: intersection(
5442
                    span(1..11),
5443
                    union(
5444
                        span(1..7),
5445
                        vec![
5446
                            range(span(1..4), 'a', 'c'),
5447
                            range(span(4..7), 'x', 'z'),
5448
                        ]
5449
                    ),
5450
                    itemset(item_perl(ast::ClassPerl {
5451
                        span: span(9..11),
5452
                        kind: ast::ClassPerlKind::Word,
5453
                        negated: false,
5454
                    })),
5455
                ),
5456
            }))
5457
        );
5458
        assert_eq!(
5459
            parser(r"[a--b--c]").parse(),
5460
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5461
                span: span(0..9),
5462
                negated: false,
5463
                kind: difference(
5464
                    span(1..8),
5465
                    difference(
5466
                        span(1..5),
5467
                        itemset(lit(span(1..2), 'a')),
5468
                        itemset(lit(span(4..5), 'b')),
5469
                    ),
5470
                    itemset(lit(span(7..8), 'c')),
5471
                ),
5472
            }))
5473
        );
5474
        assert_eq!(
5475
            parser(r"[a~~b~~c]").parse(),
5476
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5477
                span: span(0..9),
5478
                negated: false,
5479
                kind: symdifference(
5480
                    span(1..8),
5481
                    symdifference(
5482
                        span(1..5),
5483
                        itemset(lit(span(1..2), 'a')),
5484
                        itemset(lit(span(4..5), 'b')),
5485
                    ),
5486
                    itemset(lit(span(7..8), 'c')),
5487
                ),
5488
            }))
5489
        );
5490
        assert_eq!(
5491
            parser(r"[\^&&^]").parse(),
5492
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5493
                span: span(0..7),
5494
                negated: false,
5495
                kind: intersection(
5496
                    span(1..6),
5497
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5498
                        span: span(1..3),
5499
                        kind: ast::LiteralKind::Meta,
5500
                        c: '^',
5501
                    })),
5502
                    itemset(lit(span(5..6), '^')),
5503
                ),
5504
            }))
5505
        );
5506
        assert_eq!(
5507
            parser(r"[\&&&&]").parse(),
5508
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5509
                span: span(0..7),
5510
                negated: false,
5511
                kind: intersection(
5512
                    span(1..6),
5513
                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5514
                        span: span(1..3),
5515
                        kind: ast::LiteralKind::Meta,
5516
                        c: '&',
5517
                    })),
5518
                    itemset(lit(span(5..6), '&')),
5519
                ),
5520
            }))
5521
        );
5522
        assert_eq!(
5523
            parser(r"[&&&&]").parse(),
5524
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5525
                span: span(0..6),
5526
                negated: false,
5527
                kind: intersection(
5528
                    span(1..5),
5529
                    intersection(
5530
                        span(1..3),
5531
                        itemset(empty(span(1..1))),
5532
                        itemset(empty(span(3..3))),
5533
                    ),
5534
                    itemset(empty(span(5..5))),
5535
                ),
5536
            }))
5537
        );
5538
5539
        let pat = "[☃-⛄]";
5540
        assert_eq!(
5541
            parser(pat).parse(),
5542
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5543
                span: span_range(pat, 0..9),
5544
                negated: false,
5545
                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5546
                    span: span_range(pat, 1..8),
5547
                    start: ast::Literal {
5548
                        span: span_range(pat, 1..4),
5549
                        kind: ast::LiteralKind::Verbatim,
5550
                        c: '☃',
5551
                    },
5552
                    end: ast::Literal {
5553
                        span: span_range(pat, 5..8),
5554
                        kind: ast::LiteralKind::Verbatim,
5555
                        c: '⛄',
5556
                    },
5557
                })),
5558
            }))
5559
        );
5560
5561
        assert_eq!(
5562
            parser(r"[]]").parse(),
5563
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5564
                span: span(0..3),
5565
                negated: false,
5566
                kind: itemset(lit(span(1..2), ']')),
5567
            }))
5568
        );
5569
        assert_eq!(
5570
            parser(r"[]\[]").parse(),
5571
            Ok(Ast::class_bracketed(ast::ClassBracketed {
5572
                span: span(0..5),
5573
                negated: false,
5574
                kind: union(
5575
                    span(1..4),
5576
                    vec![
5577
                        lit(span(1..2), ']'),
5578
                        ast::ClassSetItem::Literal(ast::Literal {
5579
                            span: span(2..4),
5580
                            kind: ast::LiteralKind::Meta,
5581
                            c: '[',
5582
                        }),
5583
                    ]
5584
                ),
5585
            }))
5586
        );
5587
        assert_eq!(
5588
            parser(r"[\[]]").parse(),
5589
            Ok(concat(
5590
                0..5,
5591
                vec![
5592
                    Ast::class_bracketed(ast::ClassBracketed {
5593
                        span: span(0..4),
5594
                        negated: false,
5595
                        kind: itemset(ast::ClassSetItem::Literal(
5596
                            ast::Literal {
5597
                                span: span(1..3),
5598
                                kind: ast::LiteralKind::Meta,
5599
                                c: '[',
5600
                            }
5601
                        )),
5602
                    }),
5603
                    Ast::literal(ast::Literal {
5604
                        span: span(4..5),
5605
                        kind: ast::LiteralKind::Verbatim,
5606
                        c: ']',
5607
                    }),
5608
                ]
5609
            ))
5610
        );
5611
5612
        assert_eq!(
5613
            parser("[").parse().unwrap_err(),
5614
            TestError {
5615
                span: span(0..1),
5616
                kind: ast::ErrorKind::ClassUnclosed,
5617
            }
5618
        );
5619
        assert_eq!(
5620
            parser("[[").parse().unwrap_err(),
5621
            TestError {
5622
                span: span(1..2),
5623
                kind: ast::ErrorKind::ClassUnclosed,
5624
            }
5625
        );
5626
        assert_eq!(
5627
            parser("[[-]").parse().unwrap_err(),
5628
            TestError {
5629
                span: span(0..1),
5630
                kind: ast::ErrorKind::ClassUnclosed,
5631
            }
5632
        );
5633
        assert_eq!(
5634
            parser("[[[:alnum:]").parse().unwrap_err(),
5635
            TestError {
5636
                span: span(1..2),
5637
                kind: ast::ErrorKind::ClassUnclosed,
5638
            }
5639
        );
5640
        assert_eq!(
5641
            parser(r"[\b]").parse().unwrap_err(),
5642
            TestError {
5643
                span: span(1..3),
5644
                kind: ast::ErrorKind::ClassEscapeInvalid,
5645
            }
5646
        );
5647
        assert_eq!(
5648
            parser(r"[\w-a]").parse().unwrap_err(),
5649
            TestError {
5650
                span: span(1..3),
5651
                kind: ast::ErrorKind::ClassRangeLiteral,
5652
            }
5653
        );
5654
        assert_eq!(
5655
            parser(r"[a-\w]").parse().unwrap_err(),
5656
            TestError {
5657
                span: span(3..5),
5658
                kind: ast::ErrorKind::ClassRangeLiteral,
5659
            }
5660
        );
5661
        assert_eq!(
5662
            parser(r"[z-a]").parse().unwrap_err(),
5663
            TestError {
5664
                span: span(1..4),
5665
                kind: ast::ErrorKind::ClassRangeInvalid,
5666
            }
5667
        );
5668
5669
        assert_eq!(
5670
            parser_ignore_whitespace("[a ").parse().unwrap_err(),
5671
            TestError {
5672
                span: span(0..1),
5673
                kind: ast::ErrorKind::ClassUnclosed,
5674
            }
5675
        );
5676
        assert_eq!(
5677
            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5678
            TestError {
5679
                span: span(0..1),
5680
                kind: ast::ErrorKind::ClassUnclosed,
5681
            }
5682
        );
5683
    }
5684
5685
    #[test]
5686
    fn parse_set_class_open() {
5687
        assert_eq!(parser("[a]").parse_set_class_open(), {
5688
            let set = ast::ClassBracketed {
5689
                span: span(0..1),
5690
                negated: false,
5691
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5692
                    span: span(1..1),
5693
                    items: vec![],
5694
                }),
5695
            };
5696
            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5697
            Ok((set, union))
5698
        });
5699
        assert_eq!(
5700
            parser_ignore_whitespace("[   a]").parse_set_class_open(),
5701
            {
5702
                let set = ast::ClassBracketed {
5703
                    span: span(0..4),
5704
                    negated: false,
5705
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5706
                        span: span(4..4),
5707
                        items: vec![],
5708
                    }),
5709
                };
5710
                let union =
5711
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5712
                Ok((set, union))
5713
            }
5714
        );
5715
        assert_eq!(parser("[^a]").parse_set_class_open(), {
5716
            let set = ast::ClassBracketed {
5717
                span: span(0..2),
5718
                negated: true,
5719
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5720
                    span: span(2..2),
5721
                    items: vec![],
5722
                }),
5723
            };
5724
            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5725
            Ok((set, union))
5726
        });
5727
        assert_eq!(
5728
            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5729
            {
5730
                let set = ast::ClassBracketed {
5731
                    span: span(0..4),
5732
                    negated: true,
5733
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5734
                        span: span(4..4),
5735
                        items: vec![],
5736
                    }),
5737
                };
5738
                let union =
5739
                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5740
                Ok((set, union))
5741
            }
5742
        );
5743
        assert_eq!(parser("[-a]").parse_set_class_open(), {
5744
            let set = ast::ClassBracketed {
5745
                span: span(0..2),
5746
                negated: false,
5747
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5748
                    span: span(1..1),
5749
                    items: vec![],
5750
                }),
5751
            };
5752
            let union = ast::ClassSetUnion {
5753
                span: span(1..2),
5754
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5755
                    span: span(1..2),
5756
                    kind: ast::LiteralKind::Verbatim,
5757
                    c: '-',
5758
                })],
5759
            };
5760
            Ok((set, union))
5761
        });
5762
        assert_eq!(
5763
            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5764
            {
5765
                let set = ast::ClassBracketed {
5766
                    span: span(0..4),
5767
                    negated: false,
5768
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5769
                        span: span(2..2),
5770
                        items: vec![],
5771
                    }),
5772
                };
5773
                let union = ast::ClassSetUnion {
5774
                    span: span(2..3),
5775
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5776
                        span: span(2..3),
5777
                        kind: ast::LiteralKind::Verbatim,
5778
                        c: '-',
5779
                    })],
5780
                };
5781
                Ok((set, union))
5782
            }
5783
        );
5784
        assert_eq!(parser("[^-a]").parse_set_class_open(), {
5785
            let set = ast::ClassBracketed {
5786
                span: span(0..3),
5787
                negated: true,
5788
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5789
                    span: span(2..2),
5790
                    items: vec![],
5791
                }),
5792
            };
5793
            let union = ast::ClassSetUnion {
5794
                span: span(2..3),
5795
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5796
                    span: span(2..3),
5797
                    kind: ast::LiteralKind::Verbatim,
5798
                    c: '-',
5799
                })],
5800
            };
5801
            Ok((set, union))
5802
        });
5803
        assert_eq!(parser("[--a]").parse_set_class_open(), {
5804
            let set = ast::ClassBracketed {
5805
                span: span(0..3),
5806
                negated: false,
5807
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5808
                    span: span(1..1),
5809
                    items: vec![],
5810
                }),
5811
            };
5812
            let union = ast::ClassSetUnion {
5813
                span: span(1..3),
5814
                items: vec![
5815
                    ast::ClassSetItem::Literal(ast::Literal {
5816
                        span: span(1..2),
5817
                        kind: ast::LiteralKind::Verbatim,
5818
                        c: '-',
5819
                    }),
5820
                    ast::ClassSetItem::Literal(ast::Literal {
5821
                        span: span(2..3),
5822
                        kind: ast::LiteralKind::Verbatim,
5823
                        c: '-',
5824
                    }),
5825
                ],
5826
            };
5827
            Ok((set, union))
5828
        });
5829
        assert_eq!(parser("[]a]").parse_set_class_open(), {
5830
            let set = ast::ClassBracketed {
5831
                span: span(0..2),
5832
                negated: false,
5833
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5834
                    span: span(1..1),
5835
                    items: vec![],
5836
                }),
5837
            };
5838
            let union = ast::ClassSetUnion {
5839
                span: span(1..2),
5840
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5841
                    span: span(1..2),
5842
                    kind: ast::LiteralKind::Verbatim,
5843
                    c: ']',
5844
                })],
5845
            };
5846
            Ok((set, union))
5847
        });
5848
        assert_eq!(
5849
            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5850
            {
5851
                let set = ast::ClassBracketed {
5852
                    span: span(0..4),
5853
                    negated: false,
5854
                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5855
                        span: span(2..2),
5856
                        items: vec![],
5857
                    }),
5858
                };
5859
                let union = ast::ClassSetUnion {
5860
                    span: span(2..3),
5861
                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5862
                        span: span(2..3),
5863
                        kind: ast::LiteralKind::Verbatim,
5864
                        c: ']',
5865
                    })],
5866
                };
5867
                Ok((set, union))
5868
            }
5869
        );
5870
        assert_eq!(parser("[^]a]").parse_set_class_open(), {
5871
            let set = ast::ClassBracketed {
5872
                span: span(0..3),
5873
                negated: true,
5874
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5875
                    span: span(2..2),
5876
                    items: vec![],
5877
                }),
5878
            };
5879
            let union = ast::ClassSetUnion {
5880
                span: span(2..3),
5881
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5882
                    span: span(2..3),
5883
                    kind: ast::LiteralKind::Verbatim,
5884
                    c: ']',
5885
                })],
5886
            };
5887
            Ok((set, union))
5888
        });
5889
        assert_eq!(parser("[-]a]").parse_set_class_open(), {
5890
            let set = ast::ClassBracketed {
5891
                span: span(0..2),
5892
                negated: false,
5893
                kind: ast::ClassSet::union(ast::ClassSetUnion {
5894
                    span: span(1..1),
5895
                    items: vec![],
5896
                }),
5897
            };
5898
            let union = ast::ClassSetUnion {
5899
                span: span(1..2),
5900
                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5901
                    span: span(1..2),
5902
                    kind: ast::LiteralKind::Verbatim,
5903
                    c: '-',
5904
                })],
5905
            };
5906
            Ok((set, union))
5907
        });
5908
5909
        assert_eq!(
5910
            parser("[").parse_set_class_open().unwrap_err(),
5911
            TestError {
5912
                span: span(0..1),
5913
                kind: ast::ErrorKind::ClassUnclosed,
5914
            }
5915
        );
5916
        assert_eq!(
5917
            parser_ignore_whitespace("[    ")
5918
                .parse_set_class_open()
5919
                .unwrap_err(),
5920
            TestError {
5921
                span: span(0..5),
5922
                kind: ast::ErrorKind::ClassUnclosed,
5923
            }
5924
        );
5925
        assert_eq!(
5926
            parser("[^").parse_set_class_open().unwrap_err(),
5927
            TestError {
5928
                span: span(0..2),
5929
                kind: ast::ErrorKind::ClassUnclosed,
5930
            }
5931
        );
5932
        assert_eq!(
5933
            parser("[]").parse_set_class_open().unwrap_err(),
5934
            TestError {
5935
                span: span(0..2),
5936
                kind: ast::ErrorKind::ClassUnclosed,
5937
            }
5938
        );
5939
        assert_eq!(
5940
            parser("[-").parse_set_class_open().unwrap_err(),
5941
            TestError {
5942
                span: span(0..0),
5943
                kind: ast::ErrorKind::ClassUnclosed,
5944
            }
5945
        );
5946
        assert_eq!(
5947
            parser("[--").parse_set_class_open().unwrap_err(),
5948
            TestError {
5949
                span: span(0..0),
5950
                kind: ast::ErrorKind::ClassUnclosed,
5951
            }
5952
        );
5953
5954
        // See: https://github.com/rust-lang/regex/issues/792
5955
        assert_eq!(
5956
            parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5957
            TestError {
5958
                span: span(4..4),
5959
                kind: ast::ErrorKind::ClassUnclosed,
5960
            }
5961
        );
5962
    }
5963
5964
    #[test]
5965
    fn maybe_parse_ascii_class() {
5966
        assert_eq!(
5967
            parser(r"[:alnum:]").maybe_parse_ascii_class(),
5968
            Some(ast::ClassAscii {
5969
                span: span(0..9),
5970
                kind: ast::ClassAsciiKind::Alnum,
5971
                negated: false,
5972
            })
5973
        );
5974
        assert_eq!(
5975
            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5976
            Some(ast::ClassAscii {
5977
                span: span(0..9),
5978
                kind: ast::ClassAsciiKind::Alnum,
5979
                negated: false,
5980
            })
5981
        );
5982
        assert_eq!(
5983
            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5984
            Some(ast::ClassAscii {
5985
                span: span(0..10),
5986
                kind: ast::ClassAsciiKind::Alnum,
5987
                negated: true,
5988
            })
5989
        );
5990
5991
        let p = parser(r"[:");
5992
        assert_eq!(p.maybe_parse_ascii_class(), None);
5993
        assert_eq!(p.offset(), 0);
5994
5995
        let p = parser(r"[:^");
5996
        assert_eq!(p.maybe_parse_ascii_class(), None);
5997
        assert_eq!(p.offset(), 0);
5998
5999
        let p = parser(r"[^:alnum:]");
6000
        assert_eq!(p.maybe_parse_ascii_class(), None);
6001
        assert_eq!(p.offset(), 0);
6002
6003
        let p = parser(r"[:alnnum:]");
6004
        assert_eq!(p.maybe_parse_ascii_class(), None);
6005
        assert_eq!(p.offset(), 0);
6006
6007
        let p = parser(r"[:alnum]");
6008
        assert_eq!(p.maybe_parse_ascii_class(), None);
6009
        assert_eq!(p.offset(), 0);
6010
6011
        let p = parser(r"[:alnum:");
6012
        assert_eq!(p.maybe_parse_ascii_class(), None);
6013
        assert_eq!(p.offset(), 0);
6014
    }
6015
6016
    #[test]
6017
    fn parse_unicode_class() {
6018
        assert_eq!(
6019
            parser(r"\pN").parse_escape(),
6020
            Ok(Primitive::Unicode(ast::ClassUnicode {
6021
                span: span(0..3),
6022
                negated: false,
6023
                kind: ast::ClassUnicodeKind::OneLetter('N'),
6024
            }))
6025
        );
6026
        assert_eq!(
6027
            parser(r"\PN").parse_escape(),
6028
            Ok(Primitive::Unicode(ast::ClassUnicode {
6029
                span: span(0..3),
6030
                negated: true,
6031
                kind: ast::ClassUnicodeKind::OneLetter('N'),
6032
            }))
6033
        );
6034
        assert_eq!(
6035
            parser(r"\p{N}").parse_escape(),
6036
            Ok(Primitive::Unicode(ast::ClassUnicode {
6037
                span: span(0..5),
6038
                negated: false,
6039
                kind: ast::ClassUnicodeKind::Named(s("N")),
6040
            }))
6041
        );
6042
        assert_eq!(
6043
            parser(r"\P{N}").parse_escape(),
6044
            Ok(Primitive::Unicode(ast::ClassUnicode {
6045
                span: span(0..5),
6046
                negated: true,
6047
                kind: ast::ClassUnicodeKind::Named(s("N")),
6048
            }))
6049
        );
6050
        assert_eq!(
6051
            parser(r"\p{Greek}").parse_escape(),
6052
            Ok(Primitive::Unicode(ast::ClassUnicode {
6053
                span: span(0..9),
6054
                negated: false,
6055
                kind: ast::ClassUnicodeKind::Named(s("Greek")),
6056
            }))
6057
        );
6058
6059
        assert_eq!(
6060
            parser(r"\p{scx:Katakana}").parse_escape(),
6061
            Ok(Primitive::Unicode(ast::ClassUnicode {
6062
                span: span(0..16),
6063
                negated: false,
6064
                kind: ast::ClassUnicodeKind::NamedValue {
6065
                    op: ast::ClassUnicodeOpKind::Colon,
6066
                    name: s("scx"),
6067
                    value: s("Katakana"),
6068
                },
6069
            }))
6070
        );
6071
        assert_eq!(
6072
            parser(r"\p{scx=Katakana}").parse_escape(),
6073
            Ok(Primitive::Unicode(ast::ClassUnicode {
6074
                span: span(0..16),
6075
                negated: false,
6076
                kind: ast::ClassUnicodeKind::NamedValue {
6077
                    op: ast::ClassUnicodeOpKind::Equal,
6078
                    name: s("scx"),
6079
                    value: s("Katakana"),
6080
                },
6081
            }))
6082
        );
6083
        assert_eq!(
6084
            parser(r"\p{scx!=Katakana}").parse_escape(),
6085
            Ok(Primitive::Unicode(ast::ClassUnicode {
6086
                span: span(0..17),
6087
                negated: false,
6088
                kind: ast::ClassUnicodeKind::NamedValue {
6089
                    op: ast::ClassUnicodeOpKind::NotEqual,
6090
                    name: s("scx"),
6091
                    value: s("Katakana"),
6092
                },
6093
            }))
6094
        );
6095
6096
        assert_eq!(
6097
            parser(r"\p{:}").parse_escape(),
6098
            Ok(Primitive::Unicode(ast::ClassUnicode {
6099
                span: span(0..5),
6100
                negated: false,
6101
                kind: ast::ClassUnicodeKind::NamedValue {
6102
                    op: ast::ClassUnicodeOpKind::Colon,
6103
                    name: s(""),
6104
                    value: s(""),
6105
                },
6106
            }))
6107
        );
6108
        assert_eq!(
6109
            parser(r"\p{=}").parse_escape(),
6110
            Ok(Primitive::Unicode(ast::ClassUnicode {
6111
                span: span(0..5),
6112
                negated: false,
6113
                kind: ast::ClassUnicodeKind::NamedValue {
6114
                    op: ast::ClassUnicodeOpKind::Equal,
6115
                    name: s(""),
6116
                    value: s(""),
6117
                },
6118
            }))
6119
        );
6120
        assert_eq!(
6121
            parser(r"\p{!=}").parse_escape(),
6122
            Ok(Primitive::Unicode(ast::ClassUnicode {
6123
                span: span(0..6),
6124
                negated: false,
6125
                kind: ast::ClassUnicodeKind::NamedValue {
6126
                    op: ast::ClassUnicodeOpKind::NotEqual,
6127
                    name: s(""),
6128
                    value: s(""),
6129
                },
6130
            }))
6131
        );
6132
6133
        assert_eq!(
6134
            parser(r"\p").parse_escape().unwrap_err(),
6135
            TestError {
6136
                span: span(2..2),
6137
                kind: ast::ErrorKind::EscapeUnexpectedEof,
6138
            }
6139
        );
6140
        assert_eq!(
6141
            parser(r"\p{").parse_escape().unwrap_err(),
6142
            TestError {
6143
                span: span(3..3),
6144
                kind: ast::ErrorKind::EscapeUnexpectedEof,
6145
            }
6146
        );
6147
        assert_eq!(
6148
            parser(r"\p{N").parse_escape().unwrap_err(),
6149
            TestError {
6150
                span: span(4..4),
6151
                kind: ast::ErrorKind::EscapeUnexpectedEof,
6152
            }
6153
        );
6154
        assert_eq!(
6155
            parser(r"\p{Greek").parse_escape().unwrap_err(),
6156
            TestError {
6157
                span: span(8..8),
6158
                kind: ast::ErrorKind::EscapeUnexpectedEof,
6159
            }
6160
        );
6161
6162
        assert_eq!(
6163
            parser(r"\pNz").parse(),
6164
            Ok(Ast::concat(ast::Concat {
6165
                span: span(0..4),
6166
                asts: vec![
6167
                    Ast::class_unicode(ast::ClassUnicode {
6168
                        span: span(0..3),
6169
                        negated: false,
6170
                        kind: ast::ClassUnicodeKind::OneLetter('N'),
6171
                    }),
6172
                    Ast::literal(ast::Literal {
6173
                        span: span(3..4),
6174
                        kind: ast::LiteralKind::Verbatim,
6175
                        c: 'z',
6176
                    }),
6177
                ],
6178
            }))
6179
        );
6180
        assert_eq!(
6181
            parser(r"\p{Greek}z").parse(),
6182
            Ok(Ast::concat(ast::Concat {
6183
                span: span(0..10),
6184
                asts: vec![
6185
                    Ast::class_unicode(ast::ClassUnicode {
6186
                        span: span(0..9),
6187
                        negated: false,
6188
                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
6189
                    }),
6190
                    Ast::literal(ast::Literal {
6191
                        span: span(9..10),
6192
                        kind: ast::LiteralKind::Verbatim,
6193
                        c: 'z',
6194
                    }),
6195
                ],
6196
            }))
6197
        );
6198
        assert_eq!(
6199
            parser(r"\p\{").parse().unwrap_err(),
6200
            TestError {
6201
                span: span(2..3),
6202
                kind: ast::ErrorKind::UnicodeClassInvalid,
6203
            }
6204
        );
6205
        assert_eq!(
6206
            parser(r"\P\{").parse().unwrap_err(),
6207
            TestError {
6208
                span: span(2..3),
6209
                kind: ast::ErrorKind::UnicodeClassInvalid,
6210
            }
6211
        );
6212
    }
6213
6214
    #[test]
6215
    fn parse_perl_class() {
6216
        assert_eq!(
6217
            parser(r"\d").parse_escape(),
6218
            Ok(Primitive::Perl(ast::ClassPerl {
6219
                span: span(0..2),
6220
                kind: ast::ClassPerlKind::Digit,
6221
                negated: false,
6222
            }))
6223
        );
6224
        assert_eq!(
6225
            parser(r"\D").parse_escape(),
6226
            Ok(Primitive::Perl(ast::ClassPerl {
6227
                span: span(0..2),
6228
                kind: ast::ClassPerlKind::Digit,
6229
                negated: true,
6230
            }))
6231
        );
6232
        assert_eq!(
6233
            parser(r"\s").parse_escape(),
6234
            Ok(Primitive::Perl(ast::ClassPerl {
6235
                span: span(0..2),
6236
                kind: ast::ClassPerlKind::Space,
6237
                negated: false,
6238
            }))
6239
        );
6240
        assert_eq!(
6241
            parser(r"\S").parse_escape(),
6242
            Ok(Primitive::Perl(ast::ClassPerl {
6243
                span: span(0..2),
6244
                kind: ast::ClassPerlKind::Space,
6245
                negated: true,
6246
            }))
6247
        );
6248
        assert_eq!(
6249
            parser(r"\w").parse_escape(),
6250
            Ok(Primitive::Perl(ast::ClassPerl {
6251
                span: span(0..2),
6252
                kind: ast::ClassPerlKind::Word,
6253
                negated: false,
6254
            }))
6255
        );
6256
        assert_eq!(
6257
            parser(r"\W").parse_escape(),
6258
            Ok(Primitive::Perl(ast::ClassPerl {
6259
                span: span(0..2),
6260
                kind: ast::ClassPerlKind::Word,
6261
                negated: true,
6262
            }))
6263
        );
6264
6265
        assert_eq!(
6266
            parser(r"\d").parse(),
6267
            Ok(Ast::class_perl(ast::ClassPerl {
6268
                span: span(0..2),
6269
                kind: ast::ClassPerlKind::Digit,
6270
                negated: false,
6271
            }))
6272
        );
6273
        assert_eq!(
6274
            parser(r"\dz").parse(),
6275
            Ok(Ast::concat(ast::Concat {
6276
                span: span(0..3),
6277
                asts: vec![
6278
                    Ast::class_perl(ast::ClassPerl {
6279
                        span: span(0..2),
6280
                        kind: ast::ClassPerlKind::Digit,
6281
                        negated: false,
6282
                    }),
6283
                    Ast::literal(ast::Literal {
6284
                        span: span(2..3),
6285
                        kind: ast::LiteralKind::Verbatim,
6286
                        c: 'z',
6287
                    }),
6288
                ],
6289
            }))
6290
        );
6291
    }
6292
6293
    // This tests a bug fix where the nest limit checker wasn't decrementing
6294
    // its depth during post-traversal, which causes long regexes to trip
6295
    // the default limit too aggressively.
6296
    #[test]
6297
    fn regression_454_nest_too_big() {
6298
        let pattern = r#"
6299
        2(?:
6300
          [45]\d{3}|
6301
          7(?:
6302
            1[0-267]|
6303
            2[0-289]|
6304
            3[0-29]|
6305
            4[01]|
6306
            5[1-3]|
6307
            6[013]|
6308
            7[0178]|
6309
            91
6310
          )|
6311
          8(?:
6312
            0[125]|
6313
            [139][1-6]|
6314
            2[0157-9]|
6315
            41|
6316
            6[1-35]|
6317
            7[1-5]|
6318
            8[1-8]|
6319
            90
6320
          )|
6321
          9(?:
6322
            0[0-2]|
6323
            1[0-4]|
6324
            2[568]|
6325
            3[3-6]|
6326
            5[5-7]|
6327
            6[0167]|
6328
            7[15]|
6329
            8[0146-9]
6330
          )
6331
        )\d{4}
6332
        "#;
6333
        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
6334
    }
6335
6336
    // This tests that we treat a trailing `-` in a character class as a
6337
    // literal `-` even when whitespace mode is enabled and there is whitespace
6338
    // after the trailing `-`.
6339
    #[test]
6340
    fn regression_455_trailing_dash_ignore_whitespace() {
6341
        assert!(parser("(?x)[ / - ]").parse().is_ok());
6342
        assert!(parser("(?x)[ a - ]").parse().is_ok());
6343
        assert!(parser(
6344
            "(?x)[
6345
            a
6346
            - ]
6347
        "
6348
        )
6349
        .parse()
6350
        .is_ok());
6351
        assert!(parser(
6352
            "(?x)[
6353
            a # wat
6354
            - ]
6355
        "
6356
        )
6357
        .parse()
6358
        .is_ok());
6359
6360
        assert!(parser("(?x)[ / -").parse().is_err());
6361
        assert!(parser("(?x)[ / - ").parse().is_err());
6362
        assert!(parser(
6363
            "(?x)[
6364
            / -
6365
        "
6366
        )
6367
        .parse()
6368
        .is_err());
6369
        assert!(parser(
6370
            "(?x)[
6371
            / - # wat
6372
        "
6373
        )
6374
        .parse()
6375
        .is_err());
6376
    }
6377
}