Coverage Report

Created: 2021-03-22 08:29

/rust/registry/src/github.com-1ecc6299db9ec823/regex-syntax-0.6.23/src/hir/mod.rs
Line
Count
Source (jump to first uncovered line)
1
/*!
2
Defines a high-level intermediate representation for regular expressions.
3
*/
4
use std::char;
5
use std::cmp;
6
use std::error;
7
use std::fmt;
8
use std::result;
9
use std::u8;
10
11
use ast::Span;
12
use hir::interval::{Interval, IntervalSet, IntervalSetIter};
13
use unicode;
14
15
pub use hir::visitor::{visit, Visitor};
16
pub use unicode::CaseFoldError;
17
18
mod interval;
19
pub mod literal;
20
pub mod print;
21
pub mod translate;
22
mod visitor;
23
24
/// An error that can occur while translating an `Ast` to a `Hir`.
25
0
#[derive(Clone, Debug, Eq, PartialEq)]
26
pub struct Error {
27
    /// The kind of error.
28
    kind: ErrorKind,
29
    /// The original pattern that the translator's Ast was parsed from. Every
30
    /// span in an error is a valid range into this string.
31
    pattern: String,
32
    /// The span of this error, derived from the Ast given to the translator.
33
    span: Span,
34
}
35
36
impl Error {
37
    /// Return the type of this error.
38
    pub fn kind(&self) -> &ErrorKind {
39
        &self.kind
40
    }
41
42
    /// The original pattern string in which this error occurred.
43
    ///
44
    /// Every span reported by this error is reported in terms of this string.
45
    pub fn pattern(&self) -> &str {
46
        &self.pattern
47
    }
48
49
    /// Return the span at which this error occurred.
50
    pub fn span(&self) -> &Span {
51
        &self.span
52
    }
53
}
54
55
/// The type of an error that occurred while building an `Hir`.
56
0
#[derive(Clone, Debug, Eq, PartialEq)]
57
pub enum ErrorKind {
58
    /// This error occurs when a Unicode feature is used when Unicode
59
    /// support is disabled. For example `(?-u:\pL)` would trigger this error.
60
    UnicodeNotAllowed,
61
    /// This error occurs when translating a pattern that could match a byte
62
    /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
63
    InvalidUtf8,
64
    /// This occurs when an unrecognized Unicode property name could not
65
    /// be found.
66
    UnicodePropertyNotFound,
67
    /// This occurs when an unrecognized Unicode property value could not
68
    /// be found.
69
    UnicodePropertyValueNotFound,
70
    /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
71
    /// `\d`) could not be found. This can occur when the `unicode-perl`
72
    /// crate feature is not enabled.
73
    UnicodePerlClassNotFound,
74
    /// This occurs when the Unicode simple case mapping tables are not
75
    /// available, and the regular expression required Unicode aware case
76
    /// insensitivity.
77
    UnicodeCaseUnavailable,
78
    /// This occurs when the translator attempts to construct a character class
79
    /// that is empty.
80
    ///
81
    /// Note that this restriction in the translator may be removed in the
82
    /// future.
83
    EmptyClassNotAllowed,
84
    /// Hints that destructuring should not be exhaustive.
85
    ///
86
    /// This enum may grow additional variants, so this makes sure clients
87
    /// don't count on exhaustive matching. (Otherwise, adding a new variant
88
    /// could break existing code.)
89
    #[doc(hidden)]
90
    __Nonexhaustive,
91
}
92
93
impl ErrorKind {
94
    // TODO: Remove this method entirely on the next breaking semver release.
95
    #[allow(deprecated)]
96
0
    fn description(&self) -> &str {
97
        use self::ErrorKind::*;
98
0
        match *self {
99
0
            UnicodeNotAllowed => "Unicode not allowed here",
100
0
            InvalidUtf8 => "pattern can match invalid UTF-8",
101
0
            UnicodePropertyNotFound => "Unicode property not found",
102
0
            UnicodePropertyValueNotFound => "Unicode property value not found",
103
            UnicodePerlClassNotFound => {
104
0
                "Unicode-aware Perl class not found \
105
0
                 (make sure the unicode-perl feature is enabled)"
106
            }
107
            UnicodeCaseUnavailable => {
108
0
                "Unicode-aware case insensitivity matching is not available \
109
0
                 (make sure the unicode-case feature is enabled)"
110
            }
111
0
            EmptyClassNotAllowed => "empty character classes are not allowed",
112
0
            __Nonexhaustive => unreachable!(),
113
        }
114
0
    }
115
}
116
117
impl error::Error for Error {
118
    // TODO: Remove this method entirely on the next breaking semver release.
119
    #[allow(deprecated)]
120
    fn description(&self) -> &str {
121
        self.kind.description()
122
    }
123
}
124
125
impl fmt::Display for Error {
126
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
127
        ::error::Formatter::from(self).fmt(f)
128
    }
129
}
130
131
impl fmt::Display for ErrorKind {
132
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
133
        // TODO: Remove this on the next breaking semver release.
134
        #[allow(deprecated)]
135
        f.write_str(self.description())
136
    }
137
}
138
139
/// A high-level intermediate representation (HIR) for a regular expression.
140
///
141
/// The HIR of a regular expression represents an intermediate step between its
142
/// abstract syntax (a structured description of the concrete syntax) and
143
/// compiled byte codes. The purpose of HIR is to make regular expressions
144
/// easier to analyze. In particular, the AST is much more complex than the
145
/// HIR. For example, while an AST supports arbitrarily nested character
146
/// classes, the HIR will flatten all nested classes into a single set. The HIR
147
/// will also "compile away" every flag present in the concrete syntax. For
148
/// example, users of HIR expressions never need to worry about case folding;
149
/// it is handled automatically by the translator (e.g., by translating `(?i)A`
150
/// to `[aA]`).
151
///
152
/// If the HIR was produced by a translator that disallows invalid UTF-8, then
153
/// the HIR is guaranteed to match UTF-8 exclusively.
154
///
155
/// This type defines its own destructor that uses constant stack space and
156
/// heap space proportional to the size of the HIR.
157
///
158
/// The specific type of an HIR expression can be accessed via its `kind`
159
/// or `into_kind` methods. This extra level of indirection exists for two
160
/// reasons:
161
///
162
/// 1. Construction of an HIR expression *must* use the constructor methods
163
///    on this `Hir` type instead of building the `HirKind` values directly.
164
///    This permits construction to enforce invariants like "concatenations
165
///    always consist of two or more sub-expressions."
166
/// 2. Every HIR expression contains attributes that are defined inductively,
167
///    and can be computed cheaply during the construction process. For
168
///    example, one such attribute is whether the expression must match at the
169
///    beginning of the text.
170
///
171
/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
172
/// expression pattern string, and uses constant stack space and heap space
173
/// proportional to the size of the `Hir`.
174
0
#[derive(Clone, Debug, Eq, PartialEq)]
175
pub struct Hir {
176
    /// The underlying HIR kind.
177
    kind: HirKind,
178
    /// Analysis info about this HIR, computed during construction.
179
    info: HirInfo,
180
}
181
182
/// The kind of an arbitrary `Hir` expression.
183
0
#[derive(Clone, Debug, Eq, PartialEq)]
184
pub enum HirKind {
185
    /// The empty regular expression, which matches everything, including the
186
    /// empty string.
187
    Empty,
188
    /// A single literal character that matches exactly this character.
189
    Literal(Literal),
190
    /// A single character class that matches any of the characters in the
191
    /// class. A class can either consist of Unicode scalar values as
192
    /// characters, or it can use bytes.
193
    Class(Class),
194
    /// An anchor assertion. An anchor assertion match always has zero length.
195
    Anchor(Anchor),
196
    /// A word boundary assertion, which may or may not be Unicode aware. A
197
    /// word boundary assertion match always has zero length.
198
    WordBoundary(WordBoundary),
199
    /// A repetition operation applied to a child expression.
200
    Repetition(Repetition),
201
    /// A possibly capturing group, which contains a child expression.
202
    Group(Group),
203
    /// A concatenation of expressions. A concatenation always has at least two
204
    /// child expressions.
205
    ///
206
    /// A concatenation matches only if each of its child expression matches
207
    /// one after the other.
208
    Concat(Vec<Hir>),
209
    /// An alternation of expressions. An alternation always has at least two
210
    /// child expressions.
211
    ///
212
    /// An alternation matches only if at least one of its child expression
213
    /// matches. If multiple expressions match, then the leftmost is preferred.
214
    Alternation(Vec<Hir>),
215
}
216
217
impl Hir {
218
    /// Returns a reference to the underlying HIR kind.
219
    pub fn kind(&self) -> &HirKind {
220
        &self.kind
221
    }
222
223
    /// Consumes ownership of this HIR expression and returns its underlying
224
    /// `HirKind`.
225
    pub fn into_kind(mut self) -> HirKind {
226
        use std::mem;
227
        mem::replace(&mut self.kind, HirKind::Empty)
228
    }
229
230
    /// Returns an empty HIR expression.
231
    ///
232
    /// An empty HIR expression always matches, including the empty string.
233
    pub fn empty() -> Hir {
234
        let mut info = HirInfo::new();
235
        info.set_always_utf8(true);
236
        info.set_all_assertions(true);
237
        info.set_anchored_start(false);
238
        info.set_anchored_end(false);
239
        info.set_line_anchored_start(false);
240
        info.set_line_anchored_end(false);
241
        info.set_any_anchored_start(false);
242
        info.set_any_anchored_end(false);
243
        info.set_match_empty(true);
244
        info.set_literal(false);
245
        info.set_alternation_literal(false);
246
        Hir { kind: HirKind::Empty, info: info }
247
    }
248
249
    /// Creates a literal HIR expression.
250
    ///
251
    /// If the given literal has a `Byte` variant with an ASCII byte, then this
252
    /// method panics. This enforces the invariant that `Byte` variants are
253
    /// only used to express matching of invalid UTF-8.
254
0
    pub fn literal(lit: Literal) -> Hir {
255
0
        if let Literal::Byte(b) = lit {
256
0
            assert!(b > 0x7F);
257
0
        }
258
259
0
        let mut info = HirInfo::new();
260
0
        info.set_always_utf8(lit.is_unicode());
261
0
        info.set_all_assertions(false);
262
0
        info.set_anchored_start(false);
263
0
        info.set_anchored_end(false);
264
0
        info.set_line_anchored_start(false);
265
0
        info.set_line_anchored_end(false);
266
0
        info.set_any_anchored_start(false);
267
0
        info.set_any_anchored_end(false);
268
0
        info.set_match_empty(false);
269
0
        info.set_literal(true);
270
0
        info.set_alternation_literal(true);
271
0
        Hir { kind: HirKind::Literal(lit), info: info }
272
0
    }
273
274
    /// Creates a class HIR expression.
275
    pub fn class(class: Class) -> Hir {
276
        let mut info = HirInfo::new();
277
        info.set_always_utf8(class.is_always_utf8());
278
        info.set_all_assertions(false);
279
        info.set_anchored_start(false);
280
        info.set_anchored_end(false);
281
        info.set_line_anchored_start(false);
282
        info.set_line_anchored_end(false);
283
        info.set_any_anchored_start(false);
284
        info.set_any_anchored_end(false);
285
        info.set_match_empty(false);
286
        info.set_literal(false);
287
        info.set_alternation_literal(false);
288
        Hir { kind: HirKind::Class(class), info: info }
289
    }
290
291
    /// Creates an anchor assertion HIR expression.
292
0
    pub fn anchor(anchor: Anchor) -> Hir {
293
0
        let mut info = HirInfo::new();
294
0
        info.set_always_utf8(true);
295
0
        info.set_all_assertions(true);
296
0
        info.set_anchored_start(false);
297
0
        info.set_anchored_end(false);
298
0
        info.set_line_anchored_start(false);
299
0
        info.set_line_anchored_end(false);
300
0
        info.set_any_anchored_start(false);
301
0
        info.set_any_anchored_end(false);
302
0
        info.set_match_empty(true);
303
0
        info.set_literal(false);
304
0
        info.set_alternation_literal(false);
305
0
        if let Anchor::StartText = anchor {
306
0
            info.set_anchored_start(true);
307
0
            info.set_line_anchored_start(true);
308
0
            info.set_any_anchored_start(true);
309
0
        }
310
0
        if let Anchor::EndText = anchor {
311
0
            info.set_anchored_end(true);
312
0
            info.set_line_anchored_end(true);
313
0
            info.set_any_anchored_end(true);
314
0
        }
315
0
        if let Anchor::StartLine = anchor {
316
0
            info.set_line_anchored_start(true);
317
0
        }
318
0
        if let Anchor::EndLine = anchor {
319
0
            info.set_line_anchored_end(true);
320
0
        }
321
0
        Hir { kind: HirKind::Anchor(anchor), info: info }
322
0
    }
323
324
    /// Creates a word boundary assertion HIR expression.
325
0
    pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
326
0
        let mut info = HirInfo::new();
327
0
        info.set_always_utf8(true);
328
0
        info.set_all_assertions(true);
329
0
        info.set_anchored_start(false);
330
0
        info.set_anchored_end(false);
331
0
        info.set_line_anchored_start(false);
332
0
        info.set_line_anchored_end(false);
333
0
        info.set_any_anchored_start(false);
334
0
        info.set_any_anchored_end(false);
335
0
        info.set_literal(false);
336
0
        info.set_alternation_literal(false);
337
0
        // A negated word boundary matches the empty string, but a normal
338
0
        // word boundary does not!
339
0
        info.set_match_empty(word_boundary.is_negated());
340
0
        // Negated ASCII word boundaries can match invalid UTF-8.
341
0
        if let WordBoundary::AsciiNegate = word_boundary {
342
0
            info.set_always_utf8(false);
343
0
        }
344
0
        Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
345
0
    }
346
347
    /// Creates a repetition HIR expression.
348
0
    pub fn repetition(rep: Repetition) -> Hir {
349
0
        let mut info = HirInfo::new();
350
0
        info.set_always_utf8(rep.hir.is_always_utf8());
351
0
        info.set_all_assertions(rep.hir.is_all_assertions());
352
        // If this operator can match the empty string, then it can never
353
        // be anchored.
354
0
        info.set_anchored_start(
355
0
            !rep.is_match_empty() && rep.hir.is_anchored_start(),
356
        );
357
0
        info.set_anchored_end(
358
0
            !rep.is_match_empty() && rep.hir.is_anchored_end(),
359
        );
360
0
        info.set_line_anchored_start(
361
0
            !rep.is_match_empty() && rep.hir.is_anchored_start(),
362
        );
363
0
        info.set_line_anchored_end(
364
0
            !rep.is_match_empty() && rep.hir.is_anchored_end(),
365
        );
366
0
        info.set_any_anchored_start(rep.hir.is_any_anchored_start());
367
0
        info.set_any_anchored_end(rep.hir.is_any_anchored_end());
368
0
        info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
369
0
        info.set_literal(false);
370
0
        info.set_alternation_literal(false);
371
0
        Hir { kind: HirKind::Repetition(rep), info: info }
372
0
    }
373
374
    /// Creates a group HIR expression.
375
    pub fn group(group: Group) -> Hir {
376
        let mut info = HirInfo::new();
377
        info.set_always_utf8(group.hir.is_always_utf8());
378
        info.set_all_assertions(group.hir.is_all_assertions());
379
        info.set_anchored_start(group.hir.is_anchored_start());
380
        info.set_anchored_end(group.hir.is_anchored_end());
381
        info.set_line_anchored_start(group.hir.is_line_anchored_start());
382
        info.set_line_anchored_end(group.hir.is_line_anchored_end());
383
        info.set_any_anchored_start(group.hir.is_any_anchored_start());
384
        info.set_any_anchored_end(group.hir.is_any_anchored_end());
385
        info.set_match_empty(group.hir.is_match_empty());
386
        info.set_literal(false);
387
        info.set_alternation_literal(false);
388
        Hir { kind: HirKind::Group(group), info: info }
389
    }
390
391
    /// Returns the concatenation of the given expressions.
392
    ///
393
    /// This flattens the concatenation as appropriate.
394
0
    pub fn concat(mut exprs: Vec<Hir>) -> Hir {
395
0
        match exprs.len() {
396
0
            0 => Hir::empty(),
397
0
            1 => exprs.pop().unwrap(),
398
            _ => {
399
0
                let mut info = HirInfo::new();
400
0
                info.set_always_utf8(true);
401
0
                info.set_all_assertions(true);
402
0
                info.set_any_anchored_start(false);
403
0
                info.set_any_anchored_end(false);
404
0
                info.set_match_empty(true);
405
0
                info.set_literal(true);
406
0
                info.set_alternation_literal(true);
407
408
                // Some attributes require analyzing all sub-expressions.
409
0
                for e in &exprs {
410
0
                    let x = info.is_always_utf8() && e.is_always_utf8();
411
0
                    info.set_always_utf8(x);
412
413
0
                    let x = info.is_all_assertions() && e.is_all_assertions();
414
0
                    info.set_all_assertions(x);
415
416
0
                    let x = info.is_any_anchored_start()
417
0
                        || e.is_any_anchored_start();
418
0
                    info.set_any_anchored_start(x);
419
420
0
                    let x =
421
0
                        info.is_any_anchored_end() || e.is_any_anchored_end();
422
0
                    info.set_any_anchored_end(x);
423
424
0
                    let x = info.is_match_empty() && e.is_match_empty();
425
0
                    info.set_match_empty(x);
426
427
0
                    let x = info.is_literal() && e.is_literal();
428
0
                    info.set_literal(x);
429
430
0
                    let x = info.is_alternation_literal()
431
0
                        && e.is_alternation_literal();
432
0
                    info.set_alternation_literal(x);
433
                }
434
                // Anchored attributes require something slightly more
435
                // sophisticated. Normally, WLOG, to determine whether an
436
                // expression is anchored to the start, we'd only need to check
437
                // the first expression of a concatenation. However,
438
                // expressions like `$\b^` are still anchored to the start,
439
                // but the first expression in the concatenation *isn't*
440
                // anchored to the start. So the "first" expression to look at
441
                // is actually one that is either not an assertion or is
442
                // specifically the StartText assertion.
443
0
                info.set_anchored_start(
444
0
                    exprs
445
0
                        .iter()
446
0
                        .take_while(|e| {
447
                            e.is_anchored_start() || e.is_all_assertions()
448
0
                        })
449
0
                        .any(|e| e.is_anchored_start()),
450
0
                );
451
0
                // Similarly for the end anchor, but in reverse.
452
0
                info.set_anchored_end(
453
0
                    exprs
454
0
                        .iter()
455
0
                        .rev()
456
0
                        .take_while(|e| {
457
                            e.is_anchored_end() || e.is_all_assertions()
458
0
                        })
459
0
                        .any(|e| e.is_anchored_end()),
460
0
                );
461
0
                // Repeat the process for line anchors.
462
0
                info.set_line_anchored_start(
463
0
                    exprs
464
0
                        .iter()
465
0
                        .take_while(|e| {
466
                            e.is_line_anchored_start() || e.is_all_assertions()
467
0
                        })
468
0
                        .any(|e| e.is_line_anchored_start()),
469
0
                );
470
0
                info.set_line_anchored_end(
471
0
                    exprs
472
0
                        .iter()
473
0
                        .rev()
474
0
                        .take_while(|e| {
475
                            e.is_line_anchored_end() || e.is_all_assertions()
476
0
                        })
477
0
                        .any(|e| e.is_line_anchored_end()),
478
0
                );
479
0
                Hir { kind: HirKind::Concat(exprs), info: info }
480
            }
481
        }
482
0
    }
483
484
    /// Returns the alternation of the given expressions.
485
    ///
486
    /// This flattens the alternation as appropriate.
487
0
    pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
488
0
        match exprs.len() {
489
0
            0 => Hir::empty(),
490
0
            1 => exprs.pop().unwrap(),
491
            _ => {
492
0
                let mut info = HirInfo::new();
493
0
                info.set_always_utf8(true);
494
0
                info.set_all_assertions(true);
495
0
                info.set_anchored_start(true);
496
0
                info.set_anchored_end(true);
497
0
                info.set_line_anchored_start(true);
498
0
                info.set_line_anchored_end(true);
499
0
                info.set_any_anchored_start(false);
500
0
                info.set_any_anchored_end(false);
501
0
                info.set_match_empty(false);
502
0
                info.set_literal(false);
503
0
                info.set_alternation_literal(true);
504
505
                // Some attributes require analyzing all sub-expressions.
506
0
                for e in &exprs {
507
0
                    let x = info.is_always_utf8() && e.is_always_utf8();
508
0
                    info.set_always_utf8(x);
509
510
0
                    let x = info.is_all_assertions() && e.is_all_assertions();
511
0
                    info.set_all_assertions(x);
512
513
0
                    let x = info.is_anchored_start() && e.is_anchored_start();
514
0
                    info.set_anchored_start(x);
515
516
0
                    let x = info.is_anchored_end() && e.is_anchored_end();
517
0
                    info.set_anchored_end(x);
518
519
0
                    let x = info.is_line_anchored_start()
520
0
                        && e.is_line_anchored_start();
521
0
                    info.set_line_anchored_start(x);
522
523
0
                    let x = info.is_line_anchored_end()
524
0
                        && e.is_line_anchored_end();
525
0
                    info.set_line_anchored_end(x);
526
527
0
                    let x = info.is_any_anchored_start()
528
0
                        || e.is_any_anchored_start();
529
0
                    info.set_any_anchored_start(x);
530
531
0
                    let x =
532
0
                        info.is_any_anchored_end() || e.is_any_anchored_end();
533
0
                    info.set_any_anchored_end(x);
534
535
0
                    let x = info.is_match_empty() || e.is_match_empty();
536
0
                    info.set_match_empty(x);
537
538
0
                    let x = info.is_alternation_literal() && e.is_literal();
539
0
                    info.set_alternation_literal(x);
540
                }
541
0
                Hir { kind: HirKind::Alternation(exprs), info: info }
542
            }
543
        }
544
0
    }
545
546
    /// Build an HIR expression for `.`.
547
    ///
548
    /// A `.` expression matches any character except for `\n`. To build an
549
    /// expression that matches any character, including `\n`, use the `any`
550
    /// method.
551
    ///
552
    /// If `bytes` is `true`, then this assumes characters are limited to a
553
    /// single byte.
554
0
    pub fn dot(bytes: bool) -> Hir {
555
0
        if bytes {
556
0
            let mut cls = ClassBytes::empty();
557
0
            cls.push(ClassBytesRange::new(b'\0', b'\x09'));
558
0
            cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
559
0
            Hir::class(Class::Bytes(cls))
560
        } else {
561
0
            let mut cls = ClassUnicode::empty();
562
0
            cls.push(ClassUnicodeRange::new('\0', '\x09'));
563
0
            cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
564
0
            Hir::class(Class::Unicode(cls))
565
        }
566
0
    }
567
568
    /// Build an HIR expression for `(?s).`.
569
    ///
570
    /// A `(?s).` expression matches any character, including `\n`. To build an
571
    /// expression that matches any character except for `\n`, then use the
572
    /// `dot` method.
573
    ///
574
    /// If `bytes` is `true`, then this assumes characters are limited to a
575
    /// single byte.
576
0
    pub fn any(bytes: bool) -> Hir {
577
0
        if bytes {
578
0
            let mut cls = ClassBytes::empty();
579
0
            cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
580
0
            Hir::class(Class::Bytes(cls))
581
        } else {
582
0
            let mut cls = ClassUnicode::empty();
583
0
            cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
584
0
            Hir::class(Class::Unicode(cls))
585
        }
586
0
    }
587
588
    /// Return true if and only if this HIR will always match valid UTF-8.
589
    ///
590
    /// When this returns false, then it is possible for this HIR expression
591
    /// to match invalid UTF-8.
592
    pub fn is_always_utf8(&self) -> bool {
593
        self.info.is_always_utf8()
594
    }
595
596
    /// Returns true if and only if this entire HIR expression is made up of
597
    /// zero-width assertions.
598
    ///
599
    /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
600
    /// not `^a`.
601
    pub fn is_all_assertions(&self) -> bool {
602
        self.info.is_all_assertions()
603
    }
604
605
    /// Return true if and only if this HIR is required to match from the
606
    /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
607
    /// `^foo|^bar` but not `^foo|bar`.
608
    pub fn is_anchored_start(&self) -> bool {
609
        self.info.is_anchored_start()
610
    }
611
612
    /// Return true if and only if this HIR is required to match at the end
613
    /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
614
    /// `foo$|bar$` but not `foo$|bar`.
615
    pub fn is_anchored_end(&self) -> bool {
616
        self.info.is_anchored_end()
617
    }
618
619
    /// Return true if and only if this HIR is required to match from the
620
    /// beginning of text or the beginning of a line. This includes expressions
621
    /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar`
622
    /// but not `^foo|bar` or `(?m)^foo|bar`.
623
    ///
624
    /// Note that if `is_anchored_start` is `true`, then
625
    /// `is_line_anchored_start` will also be `true`. The reverse implication
626
    /// is not true. For example, `(?m)^foo` is line anchored, but not
627
    /// `is_anchored_start`.
628
    pub fn is_line_anchored_start(&self) -> bool {
629
        self.info.is_line_anchored_start()
630
    }
631
632
    /// Return true if and only if this HIR is required to match at the
633
    /// end of text or the end of a line. This includes expressions like
634
    /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`,
635
    /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`.
636
    ///
637
    /// Note that if `is_anchored_end` is `true`, then
638
    /// `is_line_anchored_end` will also be `true`. The reverse implication
639
    /// is not true. For example, `(?m)foo$` is line anchored, but not
640
    /// `is_anchored_end`.
641
    pub fn is_line_anchored_end(&self) -> bool {
642
        self.info.is_line_anchored_end()
643
    }
644
645
    /// Return true if and only if this HIR contains any sub-expression that
646
    /// is required to match at the beginning of text. Specifically, this
647
    /// returns true if the `^` symbol (when multiline mode is disabled) or the
648
    /// `\A` escape appear anywhere in the regex.
649
    pub fn is_any_anchored_start(&self) -> bool {
650
        self.info.is_any_anchored_start()
651
    }
652
653
    /// Return true if and only if this HIR contains any sub-expression that is
654
    /// required to match at the end of text. Specifically, this returns true
655
    /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
656
    /// appear anywhere in the regex.
657
    pub fn is_any_anchored_end(&self) -> bool {
658
        self.info.is_any_anchored_end()
659
    }
660
661
    /// Return true if and only if the empty string is part of the language
662
    /// matched by this regular expression.
663
    ///
664
    /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
665
    /// but not `a`, `a+` or `\b`.
666
    pub fn is_match_empty(&self) -> bool {
667
        self.info.is_match_empty()
668
    }
669
670
    /// Return true if and only if this HIR is a simple literal. This is only
671
    /// true when this HIR expression is either itself a `Literal` or a
672
    /// concatenation of only `Literal`s.
673
    ///
674
    /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`,
675
    /// `` are not (even though that contain sub-expressions that are literals).
676
    pub fn is_literal(&self) -> bool {
677
        self.info.is_literal()
678
    }
679
680
    /// Return true if and only if this HIR is either a simple literal or an
681
    /// alternation of simple literals. This is only
682
    /// true when this HIR expression is either itself a `Literal` or a
683
    /// concatenation of only `Literal`s or an alternation of only `Literal`s.
684
    ///
685
    /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
686
    /// literals, but `f+`, `(foo)`, `foo()`, ``
687
    /// are not (even though that contain sub-expressions that are literals).
688
    pub fn is_alternation_literal(&self) -> bool {
689
        self.info.is_alternation_literal()
690
    }
691
}
692
693
impl HirKind {
694
    /// Return true if and only if this HIR is the empty regular expression.
695
    ///
696
    /// Note that this is not defined inductively. That is, it only tests if
697
    /// this kind is the `Empty` variant. To get the inductive definition,
698
    /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
699
0
    pub fn is_empty(&self) -> bool {
700
0
        match *self {
701
0
            HirKind::Empty => true,
702
0
            _ => false,
703
        }
704
0
    }
705
706
    /// Returns true if and only if this kind has any (including possibly
707
    /// empty) subexpressions.
708
    pub fn has_subexprs(&self) -> bool {
709
        match *self {
710
            HirKind::Empty
711
            | HirKind::Literal(_)
712
            | HirKind::Class(_)
713
            | HirKind::Anchor(_)
714
            | HirKind::WordBoundary(_) => false,
715
            HirKind::Group(_)
716
            | HirKind::Repetition(_)
717
            | HirKind::Concat(_)
718
            | HirKind::Alternation(_) => true,
719
        }
720
    }
721
}
722
723
/// Print a display representation of this Hir.
724
///
725
/// The result of this is a valid regular expression pattern string.
726
///
727
/// This implementation uses constant stack space and heap space proportional
728
/// to the size of the `Hir`.
729
impl fmt::Display for Hir {
730
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
731
        use hir::print::Printer;
732
        Printer::new().print(self, f)
733
    }
734
}
735
736
/// The high-level intermediate representation of a literal.
737
///
738
/// A literal corresponds to a single character, where a character is either
739
/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
740
/// are preferred whenever possible. In particular, a `Byte` variant is only
741
/// ever produced when it could match invalid UTF-8.
742
0
#[derive(Clone, Debug, Eq, PartialEq)]
743
pub enum Literal {
744
    /// A single character represented by a Unicode scalar value.
745
    Unicode(char),
746
    /// A single character represented by an arbitrary byte.
747
    Byte(u8),
748
}
749
750
impl Literal {
751
    /// Returns true if and only if this literal corresponds to a Unicode
752
    /// scalar value.
753
    pub fn is_unicode(&self) -> bool {
754
0
        match *self {
755
0
            Literal::Unicode(_) => true,
756
0
            Literal::Byte(b) if b <= 0x7F => true,
757
0
            Literal::Byte(_) => false,
758
        }
759
0
    }
760
}
761
762
/// The high-level intermediate representation of a character class.
763
///
764
/// A character class corresponds to a set of characters. A character is either
765
/// defined by a Unicode scalar value or a byte. Unicode characters are used
766
/// by default, while bytes are used when Unicode mode (via the `u` flag) is
767
/// disabled.
768
///
769
/// A character class, regardless of its character type, is represented by a
770
/// sequence of non-overlapping non-adjacent ranges of characters.
771
///
772
/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
773
/// be produced even when it exclusively matches valid UTF-8. This is because
774
/// a `Bytes` variant represents an intention by the author of the regular
775
/// expression to disable Unicode mode, which in turn impacts the semantics of
776
/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
777
/// match the same set of strings.
778
0
#[derive(Clone, Debug, Eq, PartialEq)]
779
pub enum Class {
780
    /// A set of characters represented by Unicode scalar values.
781
    Unicode(ClassUnicode),
782
    /// A set of characters represented by arbitrary bytes (one byte per
783
    /// character).
784
    Bytes(ClassBytes),
785
}
786
787
impl Class {
788
    /// Apply Unicode simple case folding to this character class, in place.
789
    /// The character class will be expanded to include all simple case folded
790
    /// character variants.
791
    ///
792
    /// If this is a byte oriented character class, then this will be limited
793
    /// to the ASCII ranges `A-Z` and `a-z`.
794
0
    pub fn case_fold_simple(&mut self) {
795
0
        match *self {
796
0
            Class::Unicode(ref mut x) => x.case_fold_simple(),
797
0
            Class::Bytes(ref mut x) => x.case_fold_simple(),
798
        }
799
0
    }
800
801
    /// Negate this character class in place.
802
    ///
803
    /// After completion, this character class will contain precisely the
804
    /// characters that weren't previously in the class.
805
0
    pub fn negate(&mut self) {
806
0
        match *self {
807
0
            Class::Unicode(ref mut x) => x.negate(),
808
0
            Class::Bytes(ref mut x) => x.negate(),
809
        }
810
0
    }
811
812
    /// Returns true if and only if this character class will only ever match
813
    /// valid UTF-8.
814
    ///
815
    /// A character class can match invalid UTF-8 only when the following
816
    /// conditions are met:
817
    ///
818
    /// 1. The translator was configured to permit generating an expression
819
    ///    that can match invalid UTF-8. (By default, this is disabled.)
820
    /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
821
    ///    syntax or in the parser builder. By default, Unicode mode is
822
    ///    enabled.
823
0
    pub fn is_always_utf8(&self) -> bool {
824
0
        match *self {
825
0
            Class::Unicode(_) => true,
826
0
            Class::Bytes(ref x) => x.is_all_ascii(),
827
        }
828
0
    }
829
}
830
831
/// A set of characters represented by Unicode scalar values.
832
0
#[derive(Clone, Debug, Eq, PartialEq)]
833
pub struct ClassUnicode {
834
    set: IntervalSet<ClassUnicodeRange>,
835
}
836
837
impl ClassUnicode {
838
    /// Create a new class from a sequence of ranges.
839
    ///
840
    /// The given ranges do not need to be in any specific order, and ranges
841
    /// may overlap.
842
    pub fn new<I>(ranges: I) -> ClassUnicode
843
    where
844
        I: IntoIterator<Item = ClassUnicodeRange>,
845
    {
846
        ClassUnicode { set: IntervalSet::new(ranges) }
847
    }
848
849
    /// Create a new class with no ranges.
850
    pub fn empty() -> ClassUnicode {
851
        ClassUnicode::new(vec![])
852
    }
853
854
    /// Add a new range to this set.
855
    pub fn push(&mut self, range: ClassUnicodeRange) {
856
        self.set.push(range);
857
    }
858
859
    /// Return an iterator over all ranges in this class.
860
    ///
861
    /// The iterator yields ranges in ascending order.
862
    pub fn iter(&self) -> ClassUnicodeIter {
863
        ClassUnicodeIter(self.set.iter())
864
    }
865
866
    /// Return the underlying ranges as a slice.
867
    pub fn ranges(&self) -> &[ClassUnicodeRange] {
868
        self.set.intervals()
869
    }
870
871
    /// Expand this character class such that it contains all case folded
872
    /// characters, according to Unicode's "simple" mapping. For example, if
873
    /// this class consists of the range `a-z`, then applying case folding will
874
    /// result in the class containing both the ranges `a-z` and `A-Z`.
875
    ///
876
    /// # Panics
877
    ///
878
    /// This routine panics when the case mapping data necessary for this
879
    /// routine to complete is unavailable. This occurs when the `unicode-case`
880
    /// feature is not enabled.
881
    ///
882
    /// Callers should prefer using `try_case_fold_simple` instead, which will
883
    /// return an error instead of panicking.
884
    pub fn case_fold_simple(&mut self) {
885
        self.set
886
            .case_fold_simple()
887
            .expect("unicode-case feature must be enabled");
888
    }
889
890
    /// Expand this character class such that it contains all case folded
891
    /// characters, according to Unicode's "simple" mapping. For example, if
892
    /// this class consists of the range `a-z`, then applying case folding will
893
    /// result in the class containing both the ranges `a-z` and `A-Z`.
894
    ///
895
    /// # Error
896
    ///
897
    /// This routine returns an error when the case mapping data necessary
898
    /// for this routine to complete is unavailable. This occurs when the
899
    /// `unicode-case` feature is not enabled.
900
    pub fn try_case_fold_simple(
901
        &mut self,
902
    ) -> result::Result<(), CaseFoldError> {
903
        self.set.case_fold_simple()
904
    }
905
906
    /// Negate this character class.
907
    ///
908
    /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
909
    /// set, then it will not be in this set after negation.
910
    pub fn negate(&mut self) {
911
        self.set.negate();
912
    }
913
914
    /// Union this character class with the given character class, in place.
915
    pub fn union(&mut self, other: &ClassUnicode) {
916
        self.set.union(&other.set);
917
    }
918
919
    /// Intersect this character class with the given character class, in
920
    /// place.
921
    pub fn intersect(&mut self, other: &ClassUnicode) {
922
        self.set.intersect(&other.set);
923
    }
924
925
    /// Subtract the given character class from this character class, in place.
926
    pub fn difference(&mut self, other: &ClassUnicode) {
927
        self.set.difference(&other.set);
928
    }
929
930
    /// Compute the symmetric difference of the given character classes, in
931
    /// place.
932
    ///
933
    /// This computes the symmetric difference of two character classes. This
934
    /// removes all elements in this class that are also in the given class,
935
    /// but all adds all elements from the given class that aren't in this
936
    /// class. That is, the class will contain all elements in either class,
937
    /// but will not contain any elements that are in both classes.
938
    pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
939
        self.set.symmetric_difference(&other.set);
940
    }
941
942
    /// Returns true if and only if this character class will either match
943
    /// nothing or only ASCII bytes. Stated differently, this returns false
944
    /// if and only if this class contains a non-ASCII codepoint.
945
0
    pub fn is_all_ascii(&self) -> bool {
946
0
        self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
947
0
    }
948
}
949
950
/// An iterator over all ranges in a Unicode character class.
951
///
952
/// The lifetime `'a` refers to the lifetime of the underlying class.
953
#[derive(Debug)]
954
pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
955
956
impl<'a> Iterator for ClassUnicodeIter<'a> {
957
    type Item = &'a ClassUnicodeRange;
958
959
    fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
960
        self.0.next()
961
    }
962
}
963
964
/// A single range of characters represented by Unicode scalar values.
965
///
966
/// The range is closed. That is, the start and end of the range are included
967
/// in the range.
968
0
#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
969
pub struct ClassUnicodeRange {
970
    start: char,
971
    end: char,
972
}
973
974
impl fmt::Debug for ClassUnicodeRange {
975
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
976
0
        let start = if !self.start.is_whitespace() && !self.start.is_control()
977
        {
978
0
            self.start.to_string()
979
        } else {
980
0
            format!("0x{:X}", self.start as u32)
981
        };
982
0
        let end = if !self.end.is_whitespace() && !self.end.is_control() {
983
0
            self.end.to_string()
984
        } else {
985
0
            format!("0x{:X}", self.end as u32)
986
        };
987
0
        f.debug_struct("ClassUnicodeRange")
988
0
            .field("start", &start)
989
0
            .field("end", &end)
990
0
            .finish()
991
0
    }
992
}
993
994
impl Interval for ClassUnicodeRange {
995
    type Bound = char;
996
997
    #[inline]
998
    fn lower(&self) -> char {
999
        self.start
1000
    }
1001
    #[inline]
1002
    fn upper(&self) -> char {
1003
        self.end
1004
    }
1005
    #[inline]
1006
    fn set_lower(&mut self, bound: char) {
1007
        self.start = bound;
1008
    }
1009
    #[inline]
1010
    fn set_upper(&mut self, bound: char) {
1011
        self.end = bound;
1012
    }
1013
1014
    /// Apply simple case folding to this Unicode scalar value range.
1015
    ///
1016
    /// Additional ranges are appended to the given vector. Canonical ordering
1017
    /// is *not* maintained in the given vector.
1018
0
    fn case_fold_simple(
1019
0
        &self,
1020
0
        ranges: &mut Vec<ClassUnicodeRange>,
1021
0
    ) -> Result<(), unicode::CaseFoldError> {
1022
0
        if !unicode::contains_simple_case_mapping(self.start, self.end)? {
1023
0
            return Ok(());
1024
0
        }
1025
0
        let start = self.start as u32;
1026
0
        let end = (self.end as u32).saturating_add(1);
1027
0
        let mut next_simple_cp = None;
1028
0
        for cp in (start..end).filter_map(char::from_u32) {
1029
0
            if next_simple_cp.map_or(false, |next| cp < next) {
1030
                continue;
1031
0
            }
1032
0
            let it = match unicode::simple_fold(cp)? {
1033
0
                Ok(it) => it,
1034
0
                Err(next) => {
1035
0
                    next_simple_cp = next;
1036
                    continue;
1037
                }
1038
            };
1039
0
            for cp_folded in it {
1040
0
                ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
1041
0
            }
1042
        }
1043
0
        Ok(())
1044
0
    }
1045
}
1046
1047
impl ClassUnicodeRange {
1048
    /// Create a new Unicode scalar value range for a character class.
1049
    ///
1050
    /// The returned range is always in a canonical form. That is, the range
1051
    /// returned always satisfies the invariant that `start <= end`.
1052
    pub fn new(start: char, end: char) -> ClassUnicodeRange {
1053
        ClassUnicodeRange::create(start, end)
1054
    }
1055
1056
    /// Return the start of this range.
1057
    ///
1058
    /// The start of a range is always less than or equal to the end of the
1059
    /// range.
1060
    pub fn start(&self) -> char {
1061
        self.start
1062
    }
1063
1064
    /// Return the end of this range.
1065
    ///
1066
    /// The end of a range is always greater than or equal to the start of the
1067
    /// range.
1068
    pub fn end(&self) -> char {
1069
        self.end
1070
    }
1071
}
1072
1073
/// A set of characters represented by arbitrary bytes (where one byte
1074
/// corresponds to one character).
1075
0
#[derive(Clone, Debug, Eq, PartialEq)]
1076
pub struct ClassBytes {
1077
    set: IntervalSet<ClassBytesRange>,
1078
}
1079
1080
impl ClassBytes {
1081
    /// Create a new class from a sequence of ranges.
1082
    ///
1083
    /// The given ranges do not need to be in any specific order, and ranges
1084
    /// may overlap.
1085
    pub fn new<I>(ranges: I) -> ClassBytes
1086
    where
1087
        I: IntoIterator<Item = ClassBytesRange>,
1088
    {
1089
        ClassBytes { set: IntervalSet::new(ranges) }
1090
    }
1091
1092
    /// Create a new class with no ranges.
1093
    pub fn empty() -> ClassBytes {
1094
        ClassBytes::new(vec![])
1095
    }
1096
1097
    /// Add a new range to this set.
1098
    pub fn push(&mut self, range: ClassBytesRange) {
1099
        self.set.push(range);
1100
    }
1101
1102
    /// Return an iterator over all ranges in this class.
1103
    ///
1104
    /// The iterator yields ranges in ascending order.
1105
    pub fn iter(&self) -> ClassBytesIter {
1106
        ClassBytesIter(self.set.iter())
1107
    }
1108
1109
    /// Return the underlying ranges as a slice.
1110
    pub fn ranges(&self) -> &[ClassBytesRange] {
1111
        self.set.intervals()
1112
    }
1113
1114
    /// Expand this character class such that it contains all case folded
1115
    /// characters. For example, if this class consists of the range `a-z`,
1116
    /// then applying case folding will result in the class containing both the
1117
    /// ranges `a-z` and `A-Z`.
1118
    ///
1119
    /// Note that this only applies ASCII case folding, which is limited to the
1120
    /// characters `a-z` and `A-Z`.
1121
    pub fn case_fold_simple(&mut self) {
1122
        self.set.case_fold_simple().expect("ASCII case folding never fails");
1123
    }
1124
1125
    /// Negate this byte class.
1126
    ///
1127
    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
1128
    /// will not be in this set after negation.
1129
    pub fn negate(&mut self) {
1130
        self.set.negate();
1131
    }
1132
1133
    /// Union this byte class with the given byte class, in place.
1134
    pub fn union(&mut self, other: &ClassBytes) {
1135
        self.set.union(&other.set);
1136
    }
1137
1138
    /// Intersect this byte class with the given byte class, in place.
1139
    pub fn intersect(&mut self, other: &ClassBytes) {
1140
        self.set.intersect(&other.set);
1141
    }
1142
1143
    /// Subtract the given byte class from this byte class, in place.
1144
    pub fn difference(&mut self, other: &ClassBytes) {
1145
        self.set.difference(&other.set);
1146
    }
1147
1148
    /// Compute the symmetric difference of the given byte classes, in place.
1149
    ///
1150
    /// This computes the symmetric difference of two byte classes. This
1151
    /// removes all elements in this class that are also in the given class,
1152
    /// but all adds all elements from the given class that aren't in this
1153
    /// class. That is, the class will contain all elements in either class,
1154
    /// but will not contain any elements that are in both classes.
1155
    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
1156
        self.set.symmetric_difference(&other.set);
1157
    }
1158
1159
    /// Returns true if and only if this character class will either match
1160
    /// nothing or only ASCII bytes. Stated differently, this returns false
1161
    /// if and only if this class contains a non-ASCII byte.
1162
0
    pub fn is_all_ascii(&self) -> bool {
1163
0
        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
1164
0
    }
1165
}
1166
1167
/// An iterator over all ranges in a byte character class.
1168
///
1169
/// The lifetime `'a` refers to the lifetime of the underlying class.
1170
#[derive(Debug)]
1171
pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
1172
1173
impl<'a> Iterator for ClassBytesIter<'a> {
1174
    type Item = &'a ClassBytesRange;
1175
1176
    fn next(&mut self) -> Option<&'a ClassBytesRange> {
1177
        self.0.next()
1178
    }
1179
}
1180
1181
/// A single range of characters represented by arbitrary bytes.
1182
///
1183
/// The range is closed. That is, the start and end of the range are included
1184
/// in the range.
1185
0
#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
1186
pub struct ClassBytesRange {
1187
    start: u8,
1188
    end: u8,
1189
}
1190
1191
impl Interval for ClassBytesRange {
1192
    type Bound = u8;
1193
1194
    #[inline]
1195
    fn lower(&self) -> u8 {
1196
        self.start
1197
    }
1198
    #[inline]
1199
    fn upper(&self) -> u8 {
1200
        self.end
1201
    }
1202
    #[inline]
1203
    fn set_lower(&mut self, bound: u8) {
1204
        self.start = bound;
1205
    }
1206
    #[inline]
1207
    fn set_upper(&mut self, bound: u8) {
1208
        self.end = bound;
1209
    }
1210
1211
    /// Apply simple case folding to this byte range. Only ASCII case mappings
1212
    /// (for a-z) are applied.
1213
    ///
1214
    /// Additional ranges are appended to the given vector. Canonical ordering
1215
    /// is *not* maintained in the given vector.
1216
0
    fn case_fold_simple(
1217
0
        &self,
1218
0
        ranges: &mut Vec<ClassBytesRange>,
1219
0
    ) -> Result<(), unicode::CaseFoldError> {
1220
0
        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
1221
0
            let lower = cmp::max(self.start, b'a');
1222
0
            let upper = cmp::min(self.end, b'z');
1223
0
            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
1224
0
        }
1225
0
        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
1226
0
            let lower = cmp::max(self.start, b'A');
1227
0
            let upper = cmp::min(self.end, b'Z');
1228
0
            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
1229
0
        }
1230
0
        Ok(())
1231
0
    }
1232
}
1233
1234
impl ClassBytesRange {
1235
    /// Create a new byte range for a character class.
1236
    ///
1237
    /// The returned range is always in a canonical form. That is, the range
1238
    /// returned always satisfies the invariant that `start <= end`.
1239
    pub fn new(start: u8, end: u8) -> ClassBytesRange {
1240
        ClassBytesRange::create(start, end)
1241
    }
1242
1243
    /// Return the start of this range.
1244
    ///
1245
    /// The start of a range is always less than or equal to the end of the
1246
    /// range.
1247
    pub fn start(&self) -> u8 {
1248
        self.start
1249
    }
1250
1251
    /// Return the end of this range.
1252
    ///
1253
    /// The end of a range is always greater than or equal to the start of the
1254
    /// range.
1255
    pub fn end(&self) -> u8 {
1256
        self.end
1257
    }
1258
}
1259
1260
impl fmt::Debug for ClassBytesRange {
1261
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1262
0
        let mut debug = f.debug_struct("ClassBytesRange");
1263
0
        if self.start <= 0x7F {
1264
0
            debug.field("start", &(self.start as char));
1265
0
        } else {
1266
0
            debug.field("start", &self.start);
1267
0
        }
1268
0
        if self.end <= 0x7F {
1269
0
            debug.field("end", &(self.end as char));
1270
0
        } else {
1271
0
            debug.field("end", &self.end);
1272
0
        }
1273
0
        debug.finish()
1274
0
    }
1275
}
1276
1277
/// The high-level intermediate representation for an anchor assertion.
1278
///
1279
/// A matching anchor assertion is always zero-length.
1280
0
#[derive(Clone, Debug, Eq, PartialEq)]
1281
pub enum Anchor {
1282
    /// Match the beginning of a line or the beginning of text. Specifically,
1283
    /// this matches at the starting position of the input, or at the position
1284
    /// immediately following a `\n` character.
1285
    StartLine,
1286
    /// Match the end of a line or the end of text. Specifically,
1287
    /// this matches at the end position of the input, or at the position
1288
    /// immediately preceding a `\n` character.
1289
    EndLine,
1290
    /// Match the beginning of text. Specifically, this matches at the starting
1291
    /// position of the input.
1292
    StartText,
1293
    /// Match the end of text. Specifically, this matches at the ending
1294
    /// position of the input.
1295
    EndText,
1296
}
1297
1298
/// The high-level intermediate representation for a word-boundary assertion.
1299
///
1300
/// A matching word boundary assertion is always zero-length.
1301
0
#[derive(Clone, Debug, Eq, PartialEq)]
1302
pub enum WordBoundary {
1303
    /// Match a Unicode-aware word boundary. That is, this matches a position
1304
    /// where the left adjacent character and right adjacent character
1305
    /// correspond to a word and non-word or a non-word and word character.
1306
    Unicode,
1307
    /// Match a Unicode-aware negation of a word boundary.
1308
    UnicodeNegate,
1309
    /// Match an ASCII-only word boundary. That is, this matches a position
1310
    /// where the left adjacent character and right adjacent character
1311
    /// correspond to a word and non-word or a non-word and word character.
1312
    Ascii,
1313
    /// Match an ASCII-only negation of a word boundary.
1314
    AsciiNegate,
1315
}
1316
1317
impl WordBoundary {
1318
    /// Returns true if and only if this word boundary assertion is negated.
1319
    pub fn is_negated(&self) -> bool {
1320
        match *self {
1321
            WordBoundary::Unicode | WordBoundary::Ascii => false,
1322
            WordBoundary::UnicodeNegate | WordBoundary::AsciiNegate => true,
1323
        }
1324
    }
1325
}
1326
1327
/// The high-level intermediate representation for a group.
1328
///
1329
/// This represents one of three possible group types:
1330
///
1331
/// 1. A non-capturing group (e.g., `(?:expr)`).
1332
/// 2. A capturing group (e.g., `(expr)`).
1333
/// 3. A named capturing group (e.g., `(?P<name>expr)`).
1334
0
#[derive(Clone, Debug, Eq, PartialEq)]
1335
pub struct Group {
1336
    /// The kind of this group. If it is a capturing group, then the kind
1337
    /// contains the capture group index (and the name, if it is a named
1338
    /// group).
1339
    pub kind: GroupKind,
1340
    /// The expression inside the capturing group, which may be empty.
1341
    pub hir: Box<Hir>,
1342
}
1343
1344
/// The kind of group.
1345
0
#[derive(Clone, Debug, Eq, PartialEq)]
1346
pub enum GroupKind {
1347
    /// A normal unnamed capturing group.
1348
    ///
1349
    /// The value is the capture index of the group.
1350
    CaptureIndex(u32),
1351
    /// A named capturing group.
1352
    CaptureName {
1353
        /// The name of the group.
1354
        name: String,
1355
        /// The capture index of the group.
1356
        index: u32,
1357
    },
1358
    /// A non-capturing group.
1359
    NonCapturing,
1360
}
1361
1362
/// The high-level intermediate representation of a repetition operator.
1363
///
1364
/// A repetition operator permits the repetition of an arbitrary
1365
/// sub-expression.
1366
0
#[derive(Clone, Debug, Eq, PartialEq)]
1367
pub struct Repetition {
1368
    /// The kind of this repetition operator.
1369
    pub kind: RepetitionKind,
1370
    /// Whether this repetition operator is greedy or not. A greedy operator
1371
    /// will match as much as it can. A non-greedy operator will match as
1372
    /// little as it can.
1373
    ///
1374
    /// Typically, operators are greedy by default and are only non-greedy when
1375
    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
1376
    /// not. However, this can be inverted via the `U` "ungreedy" flag.
1377
    pub greedy: bool,
1378
    /// The expression being repeated.
1379
    pub hir: Box<Hir>,
1380
}
1381
1382
impl Repetition {
1383
    /// Returns true if and only if this repetition operator makes it possible
1384
    /// to match the empty string.
1385
    ///
1386
    /// Note that this is not defined inductively. For example, while `a*`
1387
    /// will report `true`, `()+` will not, even though `()` matches the empty
1388
    /// string and one or more occurrences of something that matches the empty
1389
    /// string will always match the empty string. In order to get the
1390
    /// inductive definition, see the corresponding method on
1391
    /// [`Hir`](struct.Hir.html).
1392
0
    pub fn is_match_empty(&self) -> bool {
1393
0
        match self.kind {
1394
0
            RepetitionKind::ZeroOrOne => true,
1395
0
            RepetitionKind::ZeroOrMore => true,
1396
0
            RepetitionKind::OneOrMore => false,
1397
0
            RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
1398
0
            RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
1399
0
            RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
1400
        }
1401
0
    }
1402
}
1403
1404
/// The kind of a repetition operator.
1405
0
#[derive(Clone, Debug, Eq, PartialEq)]
1406
pub enum RepetitionKind {
1407
    /// Matches a sub-expression zero or one times.
1408
    ZeroOrOne,
1409
    /// Matches a sub-expression zero or more times.
1410
    ZeroOrMore,
1411
    /// Matches a sub-expression one or more times.
1412
    OneOrMore,
1413
    /// Matches a sub-expression within a bounded range of times.
1414
    Range(RepetitionRange),
1415
}
1416
1417
/// The kind of a counted repetition operator.
1418
0
#[derive(Clone, Debug, Eq, PartialEq)]
1419
pub enum RepetitionRange {
1420
    /// Matches a sub-expression exactly this many times.
1421
    Exactly(u32),
1422
    /// Matches a sub-expression at least this many times.
1423
    AtLeast(u32),
1424
    /// Matches a sub-expression at least `m` times and at most `n` times.
1425
    Bounded(u32, u32),
1426
}
1427
1428
/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
1429
/// space but heap space proportional to the depth of the total `Hir`.
1430
impl Drop for Hir {
1431
    fn drop(&mut self) {
1432
        use std::mem;
1433
1434
        match *self.kind() {
1435
            HirKind::Empty
1436
            | HirKind::Literal(_)
1437
            | HirKind::Class(_)
1438
            | HirKind::Anchor(_)
1439
            | HirKind::WordBoundary(_) => return,
1440
            HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
1441
            HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
1442
            HirKind::Concat(ref x) if x.is_empty() => return,
1443
            HirKind::Alternation(ref x) if x.is_empty() => return,
1444
            _ => {}
1445
        }
1446
1447
        let mut stack = vec![mem::replace(self, Hir::empty())];
1448
        while let Some(mut expr) = stack.pop() {
1449
            match expr.kind {
1450
                HirKind::Empty
1451
                | HirKind::Literal(_)
1452
                | HirKind::Class(_)
1453
                | HirKind::Anchor(_)
1454
                | HirKind::WordBoundary(_) => {}
1455
                HirKind::Group(ref mut x) => {
1456
                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
1457
                }
1458
                HirKind::Repetition(ref mut x) => {
1459
                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
1460
                }
1461
                HirKind::Concat(ref mut x) => {
1462
                    stack.extend(x.drain(..));
1463
                }
1464
                HirKind::Alternation(ref mut x) => {
1465
                    stack.extend(x.drain(..));
1466
                }
1467
            }
1468
        }
1469
    }
1470
}
1471
1472
/// A type that documents various attributes of an HIR expression.
1473
///
1474
/// These attributes are typically defined inductively on the HIR.
1475
0
#[derive(Clone, Debug, Eq, PartialEq)]
1476
struct HirInfo {
1477
    /// Represent yes/no questions by a bitfield to conserve space, since
1478
    /// this is included in every HIR expression.
1479
    ///
1480
    /// If more attributes need to be added, it is OK to increase the size of
1481
    /// this as appropriate.
1482
    bools: u16,
1483
}
1484
1485
// A simple macro for defining bitfield accessors/mutators.
1486
macro_rules! define_bool {
1487
    ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
1488
        fn $is_fn_name(&self) -> bool {
1489
            self.bools & (0b1 << $bit) > 0
1490
        }
1491
1492
0
        fn $set_fn_name(&mut self, yes: bool) {
1493
0
            if yes {
1494
0
                self.bools |= 1 << $bit;
1495
0
            } else {
1496
0
                self.bools &= !(1 << $bit);
1497
0
            }
1498
0
        }
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_all_assertions
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_anchored_end
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_line_anchored_end
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_always_utf8
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_any_anchored_start
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_match_empty
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_alternation_literal
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_line_anchored_start
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_literal
Unexecuted instantiation: <regex_syntax::hir::HirInfo>::set_any_anchored_end
1499
    };
1500
}
1501
1502
impl HirInfo {
1503
    fn new() -> HirInfo {
1504
        HirInfo { bools: 0 }
1505
    }
1506
1507
    define_bool!(0, is_always_utf8, set_always_utf8);
1508
    define_bool!(1, is_all_assertions, set_all_assertions);
1509
    define_bool!(2, is_anchored_start, set_anchored_start);
1510
    define_bool!(3, is_anchored_end, set_anchored_end);
1511
    define_bool!(4, is_line_anchored_start, set_line_anchored_start);
1512
    define_bool!(5, is_line_anchored_end, set_line_anchored_end);
1513
    define_bool!(6, is_any_anchored_start, set_any_anchored_start);
1514
    define_bool!(7, is_any_anchored_end, set_any_anchored_end);
1515
    define_bool!(8, is_match_empty, set_match_empty);
1516
    define_bool!(9, is_literal, set_literal);
1517
    define_bool!(10, is_alternation_literal, set_alternation_literal);
1518
}
1519
1520
#[cfg(test)]
1521
mod tests {
1522
    use super::*;
1523
1524
    fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
1525
        let ranges: Vec<ClassUnicodeRange> = ranges
1526
            .iter()
1527
            .map(|&(s, e)| ClassUnicodeRange::new(s, e))
1528
            .collect();
1529
        ClassUnicode::new(ranges)
1530
    }
1531
1532
    fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
1533
        let ranges: Vec<ClassBytesRange> =
1534
            ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
1535
        ClassBytes::new(ranges)
1536
    }
1537
1538
    fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
1539
        cls.iter().map(|x| (x.start(), x.end())).collect()
1540
    }
1541
1542
    #[cfg(feature = "unicode-case")]
1543
    fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
1544
        let mut cls_ = cls.clone();
1545
        cls_.case_fold_simple();
1546
        cls_
1547
    }
1548
1549
    fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1550
        let mut cls_ = cls1.clone();
1551
        cls_.union(cls2);
1552
        cls_
1553
    }
1554
1555
    fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1556
        let mut cls_ = cls1.clone();
1557
        cls_.intersect(cls2);
1558
        cls_
1559
    }
1560
1561
    fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
1562
        let mut cls_ = cls1.clone();
1563
        cls_.difference(cls2);
1564
        cls_
1565
    }
1566
1567
    fn usymdifference(
1568
        cls1: &ClassUnicode,
1569
        cls2: &ClassUnicode,
1570
    ) -> ClassUnicode {
1571
        let mut cls_ = cls1.clone();
1572
        cls_.symmetric_difference(cls2);
1573
        cls_
1574
    }
1575
1576
    fn unegate(cls: &ClassUnicode) -> ClassUnicode {
1577
        let mut cls_ = cls.clone();
1578
        cls_.negate();
1579
        cls_
1580
    }
1581
1582
    fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
1583
        cls.iter().map(|x| (x.start(), x.end())).collect()
1584
    }
1585
1586
    fn bcasefold(cls: &ClassBytes) -> ClassBytes {
1587
        let mut cls_ = cls.clone();
1588
        cls_.case_fold_simple();
1589
        cls_
1590
    }
1591
1592
    fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1593
        let mut cls_ = cls1.clone();
1594
        cls_.union(cls2);
1595
        cls_
1596
    }
1597
1598
    fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1599
        let mut cls_ = cls1.clone();
1600
        cls_.intersect(cls2);
1601
        cls_
1602
    }
1603
1604
    fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1605
        let mut cls_ = cls1.clone();
1606
        cls_.difference(cls2);
1607
        cls_
1608
    }
1609
1610
    fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
1611
        let mut cls_ = cls1.clone();
1612
        cls_.symmetric_difference(cls2);
1613
        cls_
1614
    }
1615
1616
    fn bnegate(cls: &ClassBytes) -> ClassBytes {
1617
        let mut cls_ = cls.clone();
1618
        cls_.negate();
1619
        cls_
1620
    }
1621
1622
    #[test]
1623
    fn class_range_canonical_unicode() {
1624
        let range = ClassUnicodeRange::new('\u{00FF}', '\0');
1625
        assert_eq!('\0', range.start());
1626
        assert_eq!('\u{00FF}', range.end());
1627
    }
1628
1629
    #[test]
1630
    fn class_range_canonical_bytes() {
1631
        let range = ClassBytesRange::new(b'\xFF', b'\0');
1632
        assert_eq!(b'\0', range.start());
1633
        assert_eq!(b'\xFF', range.end());
1634
    }
1635
1636
    #[test]
1637
    fn class_canonicalize_unicode() {
1638
        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
1639
        let expected = vec![('a', 'c'), ('x', 'z')];
1640
        assert_eq!(expected, uranges(&cls));
1641
1642
        let cls = uclass(&[('x', 'z'), ('a', 'c')]);
1643
        let expected = vec![('a', 'c'), ('x', 'z')];
1644
        assert_eq!(expected, uranges(&cls));
1645
1646
        let cls = uclass(&[('x', 'z'), ('w', 'y')]);
1647
        let expected = vec![('w', 'z')];
1648
        assert_eq!(expected, uranges(&cls));
1649
1650
        let cls = uclass(&[
1651
            ('c', 'f'),
1652
            ('a', 'g'),
1653
            ('d', 'j'),
1654
            ('a', 'c'),
1655
            ('m', 'p'),
1656
            ('l', 's'),
1657
        ]);
1658
        let expected = vec![('a', 'j'), ('l', 's')];
1659
        assert_eq!(expected, uranges(&cls));
1660
1661
        let cls = uclass(&[('x', 'z'), ('u', 'w')]);
1662
        let expected = vec![('u', 'z')];
1663
        assert_eq!(expected, uranges(&cls));
1664
1665
        let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
1666
        let expected = vec![('\x00', '\u{10FFFF}')];
1667
        assert_eq!(expected, uranges(&cls));
1668
1669
        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
1670
        let expected = vec![('a', 'b')];
1671
        assert_eq!(expected, uranges(&cls));
1672
    }
1673
1674
    #[test]
1675
    fn class_canonicalize_bytes() {
1676
        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
1677
        let expected = vec![(b'a', b'c'), (b'x', b'z')];
1678
        assert_eq!(expected, branges(&cls));
1679
1680
        let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
1681
        let expected = vec![(b'a', b'c'), (b'x', b'z')];
1682
        assert_eq!(expected, branges(&cls));
1683
1684
        let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
1685
        let expected = vec![(b'w', b'z')];
1686
        assert_eq!(expected, branges(&cls));
1687
1688
        let cls = bclass(&[
1689
            (b'c', b'f'),
1690
            (b'a', b'g'),
1691
            (b'd', b'j'),
1692
            (b'a', b'c'),
1693
            (b'm', b'p'),
1694
            (b'l', b's'),
1695
        ]);
1696
        let expected = vec![(b'a', b'j'), (b'l', b's')];
1697
        assert_eq!(expected, branges(&cls));
1698
1699
        let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
1700
        let expected = vec![(b'u', b'z')];
1701
        assert_eq!(expected, branges(&cls));
1702
1703
        let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
1704
        let expected = vec![(b'\x00', b'\xFF')];
1705
        assert_eq!(expected, branges(&cls));
1706
1707
        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
1708
        let expected = vec![(b'a', b'b')];
1709
        assert_eq!(expected, branges(&cls));
1710
    }
1711
1712
    #[test]
1713
    #[cfg(feature = "unicode-case")]
1714
    fn class_case_fold_unicode() {
1715
        let cls = uclass(&[
1716
            ('C', 'F'),
1717
            ('A', 'G'),
1718
            ('D', 'J'),
1719
            ('A', 'C'),
1720
            ('M', 'P'),
1721
            ('L', 'S'),
1722
            ('c', 'f'),
1723
        ]);
1724
        let expected = uclass(&[
1725
            ('A', 'J'),
1726
            ('L', 'S'),
1727
            ('a', 'j'),
1728
            ('l', 's'),
1729
            ('\u{17F}', '\u{17F}'),
1730
        ]);
1731
        assert_eq!(expected, ucasefold(&cls));
1732
1733
        let cls = uclass(&[('A', 'Z')]);
1734
        let expected = uclass(&[
1735
            ('A', 'Z'),
1736
            ('a', 'z'),
1737
            ('\u{17F}', '\u{17F}'),
1738
            ('\u{212A}', '\u{212A}'),
1739
        ]);
1740
        assert_eq!(expected, ucasefold(&cls));
1741
1742
        let cls = uclass(&[('a', 'z')]);
1743
        let expected = uclass(&[
1744
            ('A', 'Z'),
1745
            ('a', 'z'),
1746
            ('\u{17F}', '\u{17F}'),
1747
            ('\u{212A}', '\u{212A}'),
1748
        ]);
1749
        assert_eq!(expected, ucasefold(&cls));
1750
1751
        let cls = uclass(&[('A', 'A'), ('_', '_')]);
1752
        let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
1753
        assert_eq!(expected, ucasefold(&cls));
1754
1755
        let cls = uclass(&[('A', 'A'), ('=', '=')]);
1756
        let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
1757
        assert_eq!(expected, ucasefold(&cls));
1758
1759
        let cls = uclass(&[('\x00', '\x10')]);
1760
        assert_eq!(cls, ucasefold(&cls));
1761
1762
        let cls = uclass(&[('k', 'k')]);
1763
        let expected =
1764
            uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
1765
        assert_eq!(expected, ucasefold(&cls));
1766
1767
        let cls = uclass(&[('@', '@')]);
1768
        assert_eq!(cls, ucasefold(&cls));
1769
    }
1770
1771
    #[test]
1772
    #[cfg(not(feature = "unicode-case"))]
1773
    fn class_case_fold_unicode_disabled() {
1774
        let mut cls = uclass(&[
1775
            ('C', 'F'),
1776
            ('A', 'G'),
1777
            ('D', 'J'),
1778
            ('A', 'C'),
1779
            ('M', 'P'),
1780
            ('L', 'S'),
1781
            ('c', 'f'),
1782
        ]);
1783
        assert!(cls.try_case_fold_simple().is_err());
1784
    }
1785
1786
    #[test]
1787
    #[should_panic]
1788
    #[cfg(not(feature = "unicode-case"))]
1789
    fn class_case_fold_unicode_disabled_panics() {
1790
        let mut cls = uclass(&[
1791
            ('C', 'F'),
1792
            ('A', 'G'),
1793
            ('D', 'J'),
1794
            ('A', 'C'),
1795
            ('M', 'P'),
1796
            ('L', 'S'),
1797
            ('c', 'f'),
1798
        ]);
1799
        cls.case_fold_simple();
1800
    }
1801
1802
    #[test]
1803
    fn class_case_fold_bytes() {
1804
        let cls = bclass(&[
1805
            (b'C', b'F'),
1806
            (b'A', b'G'),
1807
            (b'D', b'J'),
1808
            (b'A', b'C'),
1809
            (b'M', b'P'),
1810
            (b'L', b'S'),
1811
            (b'c', b'f'),
1812
        ]);
1813
        let expected =
1814
            bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
1815
        assert_eq!(expected, bcasefold(&cls));
1816
1817
        let cls = bclass(&[(b'A', b'Z')]);
1818
        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
1819
        assert_eq!(expected, bcasefold(&cls));
1820
1821
        let cls = bclass(&[(b'a', b'z')]);
1822
        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
1823
        assert_eq!(expected, bcasefold(&cls));
1824
1825
        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
1826
        let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
1827
        assert_eq!(expected, bcasefold(&cls));
1828
1829
        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
1830
        let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
1831
        assert_eq!(expected, bcasefold(&cls));
1832
1833
        let cls = bclass(&[(b'\x00', b'\x10')]);
1834
        assert_eq!(cls, bcasefold(&cls));
1835
1836
        let cls = bclass(&[(b'k', b'k')]);
1837
        let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
1838
        assert_eq!(expected, bcasefold(&cls));
1839
1840
        let cls = bclass(&[(b'@', b'@')]);
1841
        assert_eq!(cls, bcasefold(&cls));
1842
    }
1843
1844
    #[test]
1845
    fn class_negate_unicode() {
1846
        let cls = uclass(&[('a', 'a')]);
1847
        let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
1848
        assert_eq!(expected, unegate(&cls));
1849
1850
        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
1851
        let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
1852
        assert_eq!(expected, unegate(&cls));
1853
1854
        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
1855
        let expected = uclass(&[
1856
            ('\x00', '\x60'),
1857
            ('\x64', '\x77'),
1858
            ('\x7B', '\u{10FFFF}'),
1859
        ]);
1860
        assert_eq!(expected, unegate(&cls));
1861
1862
        let cls = uclass(&[('\x00', 'a')]);
1863
        let expected = uclass(&[('\x62', '\u{10FFFF}')]);
1864
        assert_eq!(expected, unegate(&cls));
1865
1866
        let cls = uclass(&[('a', '\u{10FFFF}')]);
1867
        let expected = uclass(&[('\x00', '\x60')]);
1868
        assert_eq!(expected, unegate(&cls));
1869
1870
        let cls = uclass(&[('\x00', '\u{10FFFF}')]);
1871
        let expected = uclass(&[]);
1872
        assert_eq!(expected, unegate(&cls));
1873
1874
        let cls = uclass(&[]);
1875
        let expected = uclass(&[('\x00', '\u{10FFFF}')]);
1876
        assert_eq!(expected, unegate(&cls));
1877
1878
        let cls =
1879
            uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
1880
        let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
1881
        assert_eq!(expected, unegate(&cls));
1882
1883
        let cls = uclass(&[('\x00', '\u{D7FF}')]);
1884
        let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
1885
        assert_eq!(expected, unegate(&cls));
1886
1887
        let cls = uclass(&[('\x00', '\u{D7FE}')]);
1888
        let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
1889
        assert_eq!(expected, unegate(&cls));
1890
1891
        let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
1892
        let expected = uclass(&[('\x00', '\u{D7FF}')]);
1893
        assert_eq!(expected, unegate(&cls));
1894
1895
        let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
1896
        let expected = uclass(&[('\x00', '\u{E000}')]);
1897
        assert_eq!(expected, unegate(&cls));
1898
    }
1899
1900
    #[test]
1901
    fn class_negate_bytes() {
1902
        let cls = bclass(&[(b'a', b'a')]);
1903
        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
1904
        assert_eq!(expected, bnegate(&cls));
1905
1906
        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
1907
        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
1908
        assert_eq!(expected, bnegate(&cls));
1909
1910
        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
1911
        let expected = bclass(&[
1912
            (b'\x00', b'\x60'),
1913
            (b'\x64', b'\x77'),
1914
            (b'\x7B', b'\xFF'),
1915
        ]);
1916
        assert_eq!(expected, bnegate(&cls));
1917
1918
        let cls = bclass(&[(b'\x00', b'a')]);
1919
        let expected = bclass(&[(b'\x62', b'\xFF')]);
1920
        assert_eq!(expected, bnegate(&cls));
1921
1922
        let cls = bclass(&[(b'a', b'\xFF')]);
1923
        let expected = bclass(&[(b'\x00', b'\x60')]);
1924
        assert_eq!(expected, bnegate(&cls));
1925
1926
        let cls = bclass(&[(b'\x00', b'\xFF')]);
1927
        let expected = bclass(&[]);
1928
        assert_eq!(expected, bnegate(&cls));
1929
1930
        let cls = bclass(&[]);
1931
        let expected = bclass(&[(b'\x00', b'\xFF')]);
1932
        assert_eq!(expected, bnegate(&cls));
1933
1934
        let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
1935
        let expected = bclass(&[(b'\xFE', b'\xFE')]);
1936
        assert_eq!(expected, bnegate(&cls));
1937
    }
1938
1939
    #[test]
1940
    fn class_union_unicode() {
1941
        let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
1942
        let cls2 = uclass(&[('a', 'z')]);
1943
        let expected = uclass(&[('a', 'z'), ('A', 'C')]);
1944
        assert_eq!(expected, uunion(&cls1, &cls2));
1945
    }
1946
1947
    #[test]
1948
    fn class_union_bytes() {
1949
        let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
1950
        let cls2 = bclass(&[(b'a', b'z')]);
1951
        let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
1952
        assert_eq!(expected, bunion(&cls1, &cls2));
1953
    }
1954
1955
    #[test]
1956
    fn class_intersect_unicode() {
1957
        let cls1 = uclass(&[]);
1958
        let cls2 = uclass(&[('a', 'a')]);
1959
        let expected = uclass(&[]);
1960
        assert_eq!(expected, uintersect(&cls1, &cls2));
1961
1962
        let cls1 = uclass(&[('a', 'a')]);
1963
        let cls2 = uclass(&[('a', 'a')]);
1964
        let expected = uclass(&[('a', 'a')]);
1965
        assert_eq!(expected, uintersect(&cls1, &cls2));
1966
1967
        let cls1 = uclass(&[('a', 'a')]);
1968
        let cls2 = uclass(&[('b', 'b')]);
1969
        let expected = uclass(&[]);
1970
        assert_eq!(expected, uintersect(&cls1, &cls2));
1971
1972
        let cls1 = uclass(&[('a', 'a')]);
1973
        let cls2 = uclass(&[('a', 'c')]);
1974
        let expected = uclass(&[('a', 'a')]);
1975
        assert_eq!(expected, uintersect(&cls1, &cls2));
1976
1977
        let cls1 = uclass(&[('a', 'b')]);
1978
        let cls2 = uclass(&[('a', 'c')]);
1979
        let expected = uclass(&[('a', 'b')]);
1980
        assert_eq!(expected, uintersect(&cls1, &cls2));
1981
1982
        let cls1 = uclass(&[('a', 'b')]);
1983
        let cls2 = uclass(&[('b', 'c')]);
1984
        let expected = uclass(&[('b', 'b')]);
1985
        assert_eq!(expected, uintersect(&cls1, &cls2));
1986
1987
        let cls1 = uclass(&[('a', 'b')]);
1988
        let cls2 = uclass(&[('c', 'd')]);
1989
        let expected = uclass(&[]);
1990
        assert_eq!(expected, uintersect(&cls1, &cls2));
1991
1992
        let cls1 = uclass(&[('b', 'c')]);
1993
        let cls2 = uclass(&[('a', 'd')]);
1994
        let expected = uclass(&[('b', 'c')]);
1995
        assert_eq!(expected, uintersect(&cls1, &cls2));
1996
1997
        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
1998
        let cls2 = uclass(&[('a', 'h')]);
1999
        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2000
        assert_eq!(expected, uintersect(&cls1, &cls2));
2001
2002
        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2003
        let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2004
        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2005
        assert_eq!(expected, uintersect(&cls1, &cls2));
2006
2007
        let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
2008
        let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
2009
        let expected = uclass(&[]);
2010
        assert_eq!(expected, uintersect(&cls1, &cls2));
2011
2012
        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
2013
        let cls2 = uclass(&[('h', 'h')]);
2014
        let expected = uclass(&[('h', 'h')]);
2015
        assert_eq!(expected, uintersect(&cls1, &cls2));
2016
2017
        let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
2018
        let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
2019
        let expected = uclass(&[]);
2020
        assert_eq!(expected, uintersect(&cls1, &cls2));
2021
2022
        let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
2023
        let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
2024
        let expected = uclass(&[('b', 'f')]);
2025
        assert_eq!(expected, uintersect(&cls1, &cls2));
2026
    }
2027
2028
    #[test]
2029
    fn class_intersect_bytes() {
2030
        let cls1 = bclass(&[]);
2031
        let cls2 = bclass(&[(b'a', b'a')]);
2032
        let expected = bclass(&[]);
2033
        assert_eq!(expected, bintersect(&cls1, &cls2));
2034
2035
        let cls1 = bclass(&[(b'a', b'a')]);
2036
        let cls2 = bclass(&[(b'a', b'a')]);
2037
        let expected = bclass(&[(b'a', b'a')]);
2038
        assert_eq!(expected, bintersect(&cls1, &cls2));
2039
2040
        let cls1 = bclass(&[(b'a', b'a')]);
2041
        let cls2 = bclass(&[(b'b', b'b')]);
2042
        let expected = bclass(&[]);
2043
        assert_eq!(expected, bintersect(&cls1, &cls2));
2044
2045
        let cls1 = bclass(&[(b'a', b'a')]);
2046
        let cls2 = bclass(&[(b'a', b'c')]);
2047
        let expected = bclass(&[(b'a', b'a')]);
2048
        assert_eq!(expected, bintersect(&cls1, &cls2));
2049
2050
        let cls1 = bclass(&[(b'a', b'b')]);
2051
        let cls2 = bclass(&[(b'a', b'c')]);
2052
        let expected = bclass(&[(b'a', b'b')]);
2053
        assert_eq!(expected, bintersect(&cls1, &cls2));
2054
2055
        let cls1 = bclass(&[(b'a', b'b')]);
2056
        let cls2 = bclass(&[(b'b', b'c')]);
2057
        let expected = bclass(&[(b'b', b'b')]);
2058
        assert_eq!(expected, bintersect(&cls1, &cls2));
2059
2060
        let cls1 = bclass(&[(b'a', b'b')]);
2061
        let cls2 = bclass(&[(b'c', b'd')]);
2062
        let expected = bclass(&[]);
2063
        assert_eq!(expected, bintersect(&cls1, &cls2));
2064
2065
        let cls1 = bclass(&[(b'b', b'c')]);
2066
        let cls2 = bclass(&[(b'a', b'd')]);
2067
        let expected = bclass(&[(b'b', b'c')]);
2068
        assert_eq!(expected, bintersect(&cls1, &cls2));
2069
2070
        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2071
        let cls2 = bclass(&[(b'a', b'h')]);
2072
        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2073
        assert_eq!(expected, bintersect(&cls1, &cls2));
2074
2075
        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2076
        let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2077
        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2078
        assert_eq!(expected, bintersect(&cls1, &cls2));
2079
2080
        let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
2081
        let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
2082
        let expected = bclass(&[]);
2083
        assert_eq!(expected, bintersect(&cls1, &cls2));
2084
2085
        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
2086
        let cls2 = bclass(&[(b'h', b'h')]);
2087
        let expected = bclass(&[(b'h', b'h')]);
2088
        assert_eq!(expected, bintersect(&cls1, &cls2));
2089
2090
        let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
2091
        let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
2092
        let expected = bclass(&[]);
2093
        assert_eq!(expected, bintersect(&cls1, &cls2));
2094
2095
        let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
2096
        let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
2097
        let expected = bclass(&[(b'b', b'f')]);
2098
        assert_eq!(expected, bintersect(&cls1, &cls2));
2099
    }
2100
2101
    #[test]
2102
    fn class_difference_unicode() {
2103
        let cls1 = uclass(&[('a', 'a')]);
2104
        let cls2 = uclass(&[('a', 'a')]);
2105
        let expected = uclass(&[]);
2106
        assert_eq!(expected, udifference(&cls1, &cls2));
2107
2108
        let cls1 = uclass(&[('a', 'a')]);
2109
        let cls2 = uclass(&[]);
2110
        let expected = uclass(&[('a', 'a')]);
2111
        assert_eq!(expected, udifference(&cls1, &cls2));
2112
2113
        let cls1 = uclass(&[]);
2114
        let cls2 = uclass(&[('a', 'a')]);
2115
        let expected = uclass(&[]);
2116
        assert_eq!(expected, udifference(&cls1, &cls2));
2117
2118
        let cls1 = uclass(&[('a', 'z')]);
2119
        let cls2 = uclass(&[('a', 'a')]);
2120
        let expected = uclass(&[('b', 'z')]);
2121
        assert_eq!(expected, udifference(&cls1, &cls2));
2122
2123
        let cls1 = uclass(&[('a', 'z')]);
2124
        let cls2 = uclass(&[('z', 'z')]);
2125
        let expected = uclass(&[('a', 'y')]);
2126
        assert_eq!(expected, udifference(&cls1, &cls2));
2127
2128
        let cls1 = uclass(&[('a', 'z')]);
2129
        let cls2 = uclass(&[('m', 'm')]);
2130
        let expected = uclass(&[('a', 'l'), ('n', 'z')]);
2131
        assert_eq!(expected, udifference(&cls1, &cls2));
2132
2133
        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2134
        let cls2 = uclass(&[('a', 'z')]);
2135
        let expected = uclass(&[]);
2136
        assert_eq!(expected, udifference(&cls1, &cls2));
2137
2138
        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2139
        let cls2 = uclass(&[('d', 'v')]);
2140
        let expected = uclass(&[('a', 'c')]);
2141
        assert_eq!(expected, udifference(&cls1, &cls2));
2142
2143
        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2144
        let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
2145
        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
2146
        assert_eq!(expected, udifference(&cls1, &cls2));
2147
2148
        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
2149
        let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
2150
        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
2151
        assert_eq!(expected, udifference(&cls1, &cls2));
2152
2153
        let cls1 = uclass(&[('x', 'z')]);
2154
        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
2155
        let expected = uclass(&[('x', 'z')]);
2156
        assert_eq!(expected, udifference(&cls1, &cls2));
2157
2158
        let cls1 = uclass(&[('a', 'z')]);
2159
        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
2160
        let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
2161
        assert_eq!(expected, udifference(&cls1, &cls2));
2162
    }
2163
2164
    #[test]
2165
    fn class_difference_bytes() {
2166
        let cls1 = bclass(&[(b'a', b'a')]);
2167
        let cls2 = bclass(&[(b'a', b'a')]);
2168
        let expected = bclass(&[]);
2169
        assert_eq!(expected, bdifference(&cls1, &cls2));
2170
2171
        let cls1 = bclass(&[(b'a', b'a')]);
2172
        let cls2 = bclass(&[]);
2173
        let expected = bclass(&[(b'a', b'a')]);
2174
        assert_eq!(expected, bdifference(&cls1, &cls2));
2175
2176
        let cls1 = bclass(&[]);
2177
        let cls2 = bclass(&[(b'a', b'a')]);
2178
        let expected = bclass(&[]);
2179
        assert_eq!(expected, bdifference(&cls1, &cls2));
2180
2181
        let cls1 = bclass(&[(b'a', b'z')]);
2182
        let cls2 = bclass(&[(b'a', b'a')]);
2183
        let expected = bclass(&[(b'b', b'z')]);
2184
        assert_eq!(expected, bdifference(&cls1, &cls2));
2185
2186
        let cls1 = bclass(&[(b'a', b'z')]);
2187
        let cls2 = bclass(&[(b'z', b'z')]);
2188
        let expected = bclass(&[(b'a', b'y')]);
2189
        assert_eq!(expected, bdifference(&cls1, &cls2));
2190
2191
        let cls1 = bclass(&[(b'a', b'z')]);
2192
        let cls2 = bclass(&[(b'm', b'm')]);
2193
        let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
2194
        assert_eq!(expected, bdifference(&cls1, &cls2));
2195
2196
        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2197
        let cls2 = bclass(&[(b'a', b'z')]);
2198
        let expected = bclass(&[]);
2199
        assert_eq!(expected, bdifference(&cls1, &cls2));
2200
2201
        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2202
        let cls2 = bclass(&[(b'd', b'v')]);
2203
        let expected = bclass(&[(b'a', b'c')]);
2204
        assert_eq!(expected, bdifference(&cls1, &cls2));
2205
2206
        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2207
        let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
2208
        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
2209
        assert_eq!(expected, bdifference(&cls1, &cls2));
2210
2211
        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
2212
        let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
2213
        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
2214
        assert_eq!(expected, bdifference(&cls1, &cls2));
2215
2216
        let cls1 = bclass(&[(b'x', b'z')]);
2217
        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
2218
        let expected = bclass(&[(b'x', b'z')]);
2219
        assert_eq!(expected, bdifference(&cls1, &cls2));
2220
2221
        let cls1 = bclass(&[(b'a', b'z')]);
2222
        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
2223
        let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
2224
        assert_eq!(expected, bdifference(&cls1, &cls2));
2225
    }
2226
2227
    #[test]
2228
    fn class_symmetric_difference_unicode() {
2229
        let cls1 = uclass(&[('a', 'm')]);
2230
        let cls2 = uclass(&[('g', 't')]);
2231
        let expected = uclass(&[('a', 'f'), ('n', 't')]);
2232
        assert_eq!(expected, usymdifference(&cls1, &cls2));
2233
    }
2234
2235
    #[test]
2236
    fn class_symmetric_difference_bytes() {
2237
        let cls1 = bclass(&[(b'a', b'm')]);
2238
        let cls2 = bclass(&[(b'g', b't')]);
2239
        let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
2240
        assert_eq!(expected, bsymdifference(&cls1, &cls2));
2241
    }
2242
2243
    #[test]
2244
    #[should_panic]
2245
    fn hir_byte_literal_non_ascii() {
2246
        Hir::literal(Literal::Byte(b'a'));
2247
    }
2248
2249
    // We use a thread with an explicit stack size to test that our destructor
2250
    // for Hir can handle arbitrarily sized expressions in constant stack
2251
    // space. In case we run on a platform without threads (WASM?), we limit
2252
    // this test to Windows/Unix.
2253
    #[test]
2254
    #[cfg(any(unix, windows))]
2255
    fn no_stack_overflow_on_drop() {
2256
        use std::thread;
2257
2258
        let run = || {
2259
            let mut expr = Hir::empty();
2260
            for _ in 0..100 {
2261
                expr = Hir::group(Group {
2262
                    kind: GroupKind::NonCapturing,
2263
                    hir: Box::new(expr),
2264
                });
2265
                expr = Hir::repetition(Repetition {
2266
                    kind: RepetitionKind::ZeroOrOne,
2267
                    greedy: true,
2268
                    hir: Box::new(expr),
2269
                });
2270
2271
                expr = Hir {
2272
                    kind: HirKind::Concat(vec![expr]),
2273
                    info: HirInfo::new(),
2274
                };
2275
                expr = Hir {
2276
                    kind: HirKind::Alternation(vec![expr]),
2277
                    info: HirInfo::new(),
2278
                };
2279
            }
2280
            assert!(!expr.kind.is_empty());
2281
        };
2282
2283
        // We run our test on a thread with a small stack size so we can
2284
        // force the issue more easily.
2285
        thread::Builder::new()
2286
            .stack_size(1 << 10)
2287
            .spawn(run)
2288
            .unwrap()
2289
            .join()
2290
            .unwrap();
2291
    }
2292
}