Coverage Report

Created: 2021-03-22 08:29

/rust/registry/src/github.com-1ecc6299db9ec823/regex-1.4.3/src/compile.rs
Line
Count
Source (jump to first uncovered line)
1
use std::collections::HashMap;
2
use std::fmt;
3
use std::iter;
4
use std::result;
5
use std::sync::Arc;
6
7
use syntax::hir::{self, Hir};
8
use syntax::is_word_byte;
9
use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
10
11
use prog::{
12
    EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
13
    InstSave, InstSplit, Program,
14
};
15
16
use Error;
17
18
type Result = result::Result<Patch, Error>;
19
type ResultOrEmpty = result::Result<Option<Patch>, Error>;
20
21
#[derive(Debug)]
22
struct Patch {
23
    hole: Hole,
24
    entry: InstPtr,
25
}
26
27
/// A compiler translates a regular expression AST to a sequence of
28
/// instructions. The sequence of instructions represents an NFA.
29
// `Compiler` is only public via the `internal` module, so avoid deriving
30
// `Debug`.
31
#[allow(missing_debug_implementations)]
32
pub struct Compiler {
33
    insts: Vec<MaybeInst>,
34
    compiled: Program,
35
    capture_name_idx: HashMap<String, usize>,
36
    num_exprs: usize,
37
    size_limit: usize,
38
    suffix_cache: SuffixCache,
39
    utf8_seqs: Option<Utf8Sequences>,
40
    byte_classes: ByteClassSet,
41
}
42
43
impl Compiler {
44
    /// Create a new regular expression compiler.
45
    ///
46
    /// Various options can be set before calling `compile` on an expression.
47
    pub fn new() -> Self {
48
        Compiler {
49
            insts: vec![],
50
            compiled: Program::new(),
51
            capture_name_idx: HashMap::new(),
52
            num_exprs: 0,
53
            size_limit: 10 * (1 << 20),
54
            suffix_cache: SuffixCache::new(1000),
55
            utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
56
            byte_classes: ByteClassSet::new(),
57
        }
58
    }
59
60
    /// The size of the resulting program is limited by size_limit. If
61
    /// the program approximately exceeds the given size (in bytes), then
62
    /// compilation will stop and return an error.
63
    pub fn size_limit(mut self, size_limit: usize) -> Self {
64
        self.size_limit = size_limit;
65
        self
66
    }
67
68
    /// If bytes is true, then the program is compiled as a byte based
69
    /// automaton, which incorporates UTF-8 decoding into the machine. If it's
70
    /// false, then the automaton is Unicode scalar value based, e.g., an
71
    /// engine utilizing such an automaton is responsible for UTF-8 decoding.
72
    ///
73
    /// The specific invariant is that when returning a byte based machine,
74
    /// the neither the `Char` nor `Ranges` instructions are produced.
75
    /// Conversely, when producing a Unicode scalar value machine, the `Bytes`
76
    /// instruction is never produced.
77
    ///
78
    /// Note that `dfa(true)` implies `bytes(true)`.
79
    pub fn bytes(mut self, yes: bool) -> Self {
80
        self.compiled.is_bytes = yes;
81
        self
82
    }
83
84
    /// When disabled, the program compiled may match arbitrary bytes.
85
    ///
86
    /// When enabled (the default), all compiled programs exclusively match
87
    /// valid UTF-8 bytes.
88
    pub fn only_utf8(mut self, yes: bool) -> Self {
89
        self.compiled.only_utf8 = yes;
90
        self
91
    }
92
93
    /// When set, the machine returned is suitable for use in the DFA matching
94
    /// engine.
95
    ///
96
    /// In particular, this ensures that if the regex is not anchored in the
97
    /// beginning, then a preceding `.*?` is included in the program. (The NFA
98
    /// based engines handle the preceding `.*?` explicitly, which is difficult
99
    /// or impossible in the DFA engine.)
100
    pub fn dfa(mut self, yes: bool) -> Self {
101
        self.compiled.is_dfa = yes;
102
        self
103
    }
104
105
    /// When set, the machine returned is suitable for matching text in
106
    /// reverse. In particular, all concatenations are flipped.
107
    pub fn reverse(mut self, yes: bool) -> Self {
108
        self.compiled.is_reverse = yes;
109
        self
110
    }
111
112
    /// Compile a regular expression given its AST.
113
    ///
114
    /// The compiler is guaranteed to succeed unless the program exceeds the
115
    /// specified size limit. If the size limit is exceeded, then compilation
116
    /// stops and returns an error.
117
0
    pub fn compile(mut self, exprs: &[Hir]) -> result::Result<Program, Error> {
118
        debug_assert!(!exprs.is_empty());
119
0
        self.num_exprs = exprs.len();
120
0
        if exprs.len() == 1 {
121
0
            self.compile_one(&exprs[0])
122
        } else {
123
0
            self.compile_many(exprs)
124
        }
125
0
    }
126
127
0
    fn compile_one(mut self, expr: &Hir) -> result::Result<Program, Error> {
128
0
        // If we're compiling a forward DFA and we aren't anchored, then
129
0
        // add a `.*?` before the first capture group.
130
0
        // Other matching engines handle this by baking the logic into the
131
0
        // matching engine itself.
132
0
        let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
133
0
        self.compiled.is_anchored_start = expr.is_anchored_start();
134
0
        self.compiled.is_anchored_end = expr.is_anchored_end();
135
0
        if self.compiled.needs_dotstar() {
136
0
            dotstar_patch = self.c_dotstar()?;
137
0
            self.compiled.start = dotstar_patch.entry;
138
0
        }
139
0
        self.compiled.captures = vec![None];
140
0
        let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst());
141
0
        if self.compiled.needs_dotstar() {
142
0
            self.fill(dotstar_patch.hole, patch.entry);
143
0
        } else {
144
0
            self.compiled.start = patch.entry;
145
0
        }
146
0
        self.fill_to_next(patch.hole);
147
0
        self.compiled.matches = vec![self.insts.len()];
148
0
        self.push_compiled(Inst::Match(0));
149
0
        self.compile_finish()
150
0
    }
151
152
0
    fn compile_many(
153
0
        mut self,
154
0
        exprs: &[Hir],
155
0
    ) -> result::Result<Program, Error> {
156
        debug_assert!(exprs.len() > 1);
157
158
0
        self.compiled.is_anchored_start =
159
0
            exprs.iter().all(|e| e.is_anchored_start());
160
0
        self.compiled.is_anchored_end =
161
0
            exprs.iter().all(|e| e.is_anchored_end());
162
0
        let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
163
0
        if self.compiled.needs_dotstar() {
164
0
            dotstar_patch = self.c_dotstar()?;
165
0
            self.compiled.start = dotstar_patch.entry;
166
0
        } else {
167
0
            self.compiled.start = 0; // first instruction is always split
168
0
        }
169
0
        self.fill_to_next(dotstar_patch.hole);
170
0
171
0
        let mut prev_hole = Hole::None;
172
0
        for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
173
0
            self.fill_to_next(prev_hole);
174
0
            let split = self.push_split_hole();
175
0
            let Patch { hole, entry } =
176
0
                self.c_capture(0, expr)?.unwrap_or(self.next_inst());
177
0
            self.fill_to_next(hole);
178
0
            self.compiled.matches.push(self.insts.len());
179
0
            self.push_compiled(Inst::Match(i));
180
0
            prev_hole = self.fill_split(split, Some(entry), None);
181
        }
182
0
        let i = exprs.len() - 1;
183
0
        let Patch { hole, entry } =
184
0
            self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst());
185
0
        self.fill(prev_hole, entry);
186
0
        self.fill_to_next(hole);
187
0
        self.compiled.matches.push(self.insts.len());
188
0
        self.push_compiled(Inst::Match(i));
189
0
        self.compile_finish()
190
0
    }
191
192
0
    fn compile_finish(mut self) -> result::Result<Program, Error> {
193
0
        self.compiled.insts =
194
0
            self.insts.into_iter().map(|inst| inst.unwrap()).collect();
195
0
        self.compiled.byte_classes = self.byte_classes.byte_classes();
196
0
        self.compiled.capture_name_idx = Arc::new(self.capture_name_idx);
197
0
        Ok(self.compiled)
198
0
    }
199
200
    /// Compile expr into self.insts, returning a patch on success,
201
    /// or an error if we run out of memory.
202
    ///
203
    /// All of the c_* methods of the compiler share the contract outlined
204
    /// here.
205
    ///
206
    /// The main thing that a c_* method does is mutate `self.insts`
207
    /// to add a list of mostly compiled instructions required to execute
208
    /// the given expression. `self.insts` contains MaybeInsts rather than
209
    /// Insts because there is some backpatching required.
210
    ///
211
    /// The `Patch` value returned by each c_* method provides metadata
212
    /// about the compiled instructions emitted to `self.insts`. The
213
    /// `entry` member of the patch refers to the first instruction
214
    /// (the entry point), while the `hole` member contains zero or
215
    /// more offsets to partial instructions that need to be backpatched.
216
    /// The c_* routine can't know where its list of instructions are going to
217
    /// jump to after execution, so it is up to the caller to patch
218
    /// these jumps to point to the right place. So compiling some
219
    /// expression, e, we would end up with a situation that looked like:
220
    ///
221
    /// ```text
222
    /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...]
223
    ///                     ^              ^             ^
224
    ///                     |                \         /
225
    ///                   entry                \     /
226
    ///                                         hole
227
    /// ```
228
    ///
229
    /// To compile two expressions, e1 and e2, concatenated together we
230
    /// would do:
231
    ///
232
    /// ```ignore
233
    /// let patch1 = self.c(e1);
234
    /// let patch2 = self.c(e2);
235
    /// ```
236
    ///
237
    /// while leaves us with a situation that looks like
238
    ///
239
    /// ```text
240
    /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ]
241
    ///                     ^        ^            ^        ^
242
    ///                     |        |            |        |
243
    ///                entry1        hole1   entry2        hole2
244
    /// ```
245
    ///
246
    /// Then to merge the two patches together into one we would backpatch
247
    /// hole1 with entry2 and return a new patch that enters at entry1
248
    /// and has hole2 for a hole. In fact, if you look at the c_concat
249
    /// method you will see that it does exactly this, though it handles
250
    /// a list of expressions rather than just the two that we use for
251
    /// an example.
252
    ///
253
    /// Ok(None) is returned when an expression is compiled to no
254
    /// instruction, and so no patch.entry value makes sense.
255
0
    fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
256
        use prog;
257
        use syntax::hir::HirKind::*;
258
259
0
        self.check_size()?;
260
0
        match *expr.kind() {
261
0
            Empty => Ok(None),
262
0
            Literal(hir::Literal::Unicode(c)) => self.c_char(c),
263
0
            Literal(hir::Literal::Byte(b)) => {
264
0
                assert!(self.compiled.uses_bytes());
265
0
                self.c_byte(b)
266
            }
267
0
            Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()),
268
0
            Class(hir::Class::Bytes(ref cls)) => {
269
0
                if self.compiled.uses_bytes() {
270
0
                    self.c_class_bytes(cls.ranges())
271
                } else {
272
0
                    assert!(cls.is_all_ascii());
273
0
                    let mut char_ranges = vec![];
274
0
                    for r in cls.iter() {
275
0
                        let (s, e) = (r.start() as char, r.end() as char);
276
0
                        char_ranges.push(hir::ClassUnicodeRange::new(s, e));
277
0
                    }
278
0
                    self.c_class(&char_ranges)
279
                }
280
            }
281
0
            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
282
0
                self.byte_classes.set_range(b'\n', b'\n');
283
0
                self.c_empty_look(prog::EmptyLook::EndLine)
284
            }
285
            Anchor(hir::Anchor::StartLine) => {
286
0
                self.byte_classes.set_range(b'\n', b'\n');
287
0
                self.c_empty_look(prog::EmptyLook::StartLine)
288
            }
289
0
            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
290
0
                self.byte_classes.set_range(b'\n', b'\n');
291
0
                self.c_empty_look(prog::EmptyLook::StartLine)
292
            }
293
            Anchor(hir::Anchor::EndLine) => {
294
0
                self.byte_classes.set_range(b'\n', b'\n');
295
0
                self.c_empty_look(prog::EmptyLook::EndLine)
296
            }
297
0
            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
298
0
                self.c_empty_look(prog::EmptyLook::EndText)
299
            }
300
            Anchor(hir::Anchor::StartText) => {
301
0
                self.c_empty_look(prog::EmptyLook::StartText)
302
            }
303
0
            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
304
0
                self.c_empty_look(prog::EmptyLook::StartText)
305
            }
306
            Anchor(hir::Anchor::EndText) => {
307
0
                self.c_empty_look(prog::EmptyLook::EndText)
308
            }
309
0
            WordBoundary(hir::WordBoundary::Unicode) => {
310
0
                if !cfg!(feature = "unicode-perl") {
311
                    return Err(Error::Syntax(
312
                        "Unicode word boundaries are unavailable when \
313
                         the unicode-perl feature is disabled"
314
                            .to_string(),
315
                    ));
316
0
                }
317
0
                self.compiled.has_unicode_word_boundary = true;
318
0
                self.byte_classes.set_word_boundary();
319
0
                self.c_empty_look(prog::EmptyLook::WordBoundary)
320
            }
321
            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
322
0
                if !cfg!(feature = "unicode-perl") {
323
                    return Err(Error::Syntax(
324
                        "Unicode word boundaries are unavailable when \
325
                         the unicode-perl feature is disabled"
326
                            .to_string(),
327
                    ));
328
0
                }
329
0
                self.compiled.has_unicode_word_boundary = true;
330
0
                self.byte_classes.set_word_boundary();
331
0
                self.c_empty_look(prog::EmptyLook::NotWordBoundary)
332
            }
333
            WordBoundary(hir::WordBoundary::Ascii) => {
334
0
                self.byte_classes.set_word_boundary();
335
0
                self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
336
            }
337
            WordBoundary(hir::WordBoundary::AsciiNegate) => {
338
0
                self.byte_classes.set_word_boundary();
339
0
                self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
340
            }
341
0
            Group(ref g) => match g.kind {
342
0
                hir::GroupKind::NonCapturing => self.c(&g.hir),
343
0
                hir::GroupKind::CaptureIndex(index) => {
344
0
                    if index as usize >= self.compiled.captures.len() {
345
0
                        self.compiled.captures.push(None);
346
0
                    }
347
0
                    self.c_capture(2 * index as usize, &g.hir)
348
                }
349
0
                hir::GroupKind::CaptureName { index, ref name } => {
350
0
                    if index as usize >= self.compiled.captures.len() {
351
0
                        let n = name.to_string();
352
0
                        self.compiled.captures.push(Some(n.clone()));
353
0
                        self.capture_name_idx.insert(n, index as usize);
354
0
                    }
355
0
                    self.c_capture(2 * index as usize, &g.hir)
356
                }
357
            },
358
0
            Concat(ref es) => {
359
0
                if self.compiled.is_reverse {
360
0
                    self.c_concat(es.iter().rev())
361
                } else {
362
0
                    self.c_concat(es)
363
                }
364
            }
365
0
            Alternation(ref es) => self.c_alternate(&**es),
366
0
            Repetition(ref rep) => self.c_repeat(rep),
367
        }
368
0
    }
369
370
0
    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty {
371
0
        if self.num_exprs > 1 || self.compiled.is_dfa {
372
            // Don't ever compile Save instructions for regex sets because
373
            // they are never used. They are also never used in DFA programs
374
            // because DFAs can't handle captures.
375
0
            self.c(expr)
376
        } else {
377
0
            let entry = self.insts.len();
378
0
            let hole = self.push_hole(InstHole::Save { slot: first_slot });
379
0
            let patch = self.c(expr)?.unwrap_or(self.next_inst());
380
0
            self.fill(hole, patch.entry);
381
0
            self.fill_to_next(patch.hole);
382
0
            let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
383
0
            Ok(Some(Patch { hole: hole, entry: entry }))
384
        }
385
0
    }
386
387
0
    fn c_dotstar(&mut self) -> Result {
388
0
        Ok(if !self.compiled.only_utf8() {
389
0
            self.c(&Hir::repetition(hir::Repetition {
390
0
                kind: hir::RepetitionKind::ZeroOrMore,
391
0
                greedy: false,
392
0
                hir: Box::new(Hir::any(true)),
393
0
            }))?
394
            .unwrap()
395
        } else {
396
0
            self.c(&Hir::repetition(hir::Repetition {
397
0
                kind: hir::RepetitionKind::ZeroOrMore,
398
0
                greedy: false,
399
0
                hir: Box::new(Hir::any(false)),
400
0
            }))?
401
            .unwrap()
402
        })
403
0
    }
404
405
0
    fn c_char(&mut self, c: char) -> ResultOrEmpty {
406
0
        if self.compiled.uses_bytes() {
407
0
            if c.is_ascii() {
408
0
                let b = c as u8;
409
0
                let hole =
410
0
                    self.push_hole(InstHole::Bytes { start: b, end: b });
411
0
                self.byte_classes.set_range(b, b);
412
0
                Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
413
            } else {
414
0
                self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
415
            }
416
        } else {
417
0
            let hole = self.push_hole(InstHole::Char { c: c });
418
0
            Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
419
        }
420
0
    }
421
422
0
    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
423
0
        assert!(!ranges.is_empty());
424
0
        if self.compiled.uses_bytes() {
425
0
            Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
426
        } else {
427
0
            let ranges: Vec<(char, char)> =
428
0
                ranges.iter().map(|r| (r.start(), r.end())).collect();
429
0
            let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
430
0
                self.push_hole(InstHole::Char { c: ranges[0].0 })
431
            } else {
432
0
                self.push_hole(InstHole::Ranges { ranges: ranges })
433
            };
434
0
            Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
435
        }
436
0
    }
437
438
    fn c_byte(&mut self, b: u8) -> ResultOrEmpty {
439
        self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
440
    }
441
442
0
    fn c_class_bytes(
443
0
        &mut self,
444
0
        ranges: &[hir::ClassBytesRange],
445
0
    ) -> ResultOrEmpty {
446
        debug_assert!(!ranges.is_empty());
447
448
0
        let first_split_entry = self.insts.len();
449
0
        let mut holes = vec![];
450
0
        let mut prev_hole = Hole::None;
451
0
        for r in &ranges[0..ranges.len() - 1] {
452
0
            self.fill_to_next(prev_hole);
453
0
            let split = self.push_split_hole();
454
0
            let next = self.insts.len();
455
0
            self.byte_classes.set_range(r.start(), r.end());
456
0
            holes.push(self.push_hole(InstHole::Bytes {
457
0
                start: r.start(),
458
0
                end: r.end(),
459
0
            }));
460
0
            prev_hole = self.fill_split(split, Some(next), None);
461
0
        }
462
0
        let next = self.insts.len();
463
0
        let r = &ranges[ranges.len() - 1];
464
0
        self.byte_classes.set_range(r.start(), r.end());
465
0
        holes.push(
466
0
            self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }),
467
0
        );
468
0
        self.fill(prev_hole, next);
469
0
        Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
470
0
    }
471
472
    fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty {
473
        let hole = self.push_hole(InstHole::EmptyLook { look: look });
474
        Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
475
    }
476
477
0
    fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
478
0
    where
479
0
        I: IntoIterator<Item = &'a Hir>,
480
0
    {
481
0
        let mut exprs = exprs.into_iter();
482
0
        let Patch { mut hole, entry } = loop {
483
0
            match exprs.next() {
484
0
                None => return Ok(None),
485
0
                Some(e) => {
486
0
                    if let Some(p) = self.c(e)? {
487
0
                        break p;
488
0
                    }
489
                }
490
            }
491
        };
492
0
        for e in exprs {
493
0
            if let Some(p) = self.c(e)? {
494
0
                self.fill(hole, p.entry);
495
0
                hole = p.hole;
496
0
            }
497
        }
498
0
        Ok(Some(Patch { hole: hole, entry: entry }))
499
0
    }
Unexecuted instantiation: <regex::compile::Compiler>::c_concat::<core::iter::adapters::take::Take<core::iter::sources::repeat::Repeat<&regex_syntax::hir::Hir>>>
Unexecuted instantiation: <regex::compile::Compiler>::c_concat::<&alloc::vec::Vec<regex_syntax::hir::Hir>>
Unexecuted instantiation: <regex::compile::Compiler>::c_concat::<core::iter::adapters::rev::Rev<core::slice::iter::Iter<regex_syntax::hir::Hir>>>
500
501
0
    fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty {
502
        debug_assert!(
503
            exprs.len() >= 2,
504
            "alternates must have at least 2 exprs"
505
        );
506
507
        // Initial entry point is always the first split.
508
0
        let first_split_entry = self.insts.len();
509
0
510
0
        // Save up all of the holes from each alternate. They will all get
511
0
        // patched to point to the same location.
512
0
        let mut holes = vec![];
513
0
514
0
        // true indicates that the hole is a split where we want to fill
515
0
        // the second branch.
516
0
        let mut prev_hole = (Hole::None, false);
517
0
        for e in &exprs[0..exprs.len() - 1] {
518
0
            if prev_hole.1 {
519
0
                let next = self.insts.len();
520
0
                self.fill_split(prev_hole.0, None, Some(next));
521
0
            } else {
522
0
                self.fill_to_next(prev_hole.0);
523
0
            }
524
0
            let split = self.push_split_hole();
525
0
            if let Some(Patch { hole, entry }) = self.c(e)? {
526
0
                holes.push(hole);
527
0
                prev_hole = (self.fill_split(split, Some(entry), None), false);
528
0
            } else {
529
0
                let (split1, split2) = split.dup_one();
530
0
                holes.push(split1);
531
0
                prev_hole = (split2, true);
532
0
            }
533
        }
534
0
        if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? {
535
0
            holes.push(hole);
536
0
            if prev_hole.1 {
537
0
                self.fill_split(prev_hole.0, None, Some(entry));
538
0
            } else {
539
0
                self.fill(prev_hole.0, entry);
540
0
            }
541
0
        } else {
542
0
            // We ignore prev_hole.1. When it's true, it means we have two
543
0
            // empty branches both pushing prev_hole.0 into holes, so both
544
0
            // branches will go to the same place anyway.
545
0
            holes.push(prev_hole.0);
546
0
        }
547
0
        Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry }))
548
0
    }
549
550
0
    fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
551
0
        use syntax::hir::RepetitionKind::*;
552
0
        match rep.kind {
553
0
            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
554
0
            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
555
0
            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
556
0
            Range(hir::RepetitionRange::Exactly(min_max)) => {
557
0
                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
558
            }
559
0
            Range(hir::RepetitionRange::AtLeast(min)) => {
560
0
                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
561
            }
562
0
            Range(hir::RepetitionRange::Bounded(min, max)) => {
563
0
                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
564
            }
565
        }
566
0
    }
567
568
0
    fn c_repeat_zero_or_one(
569
0
        &mut self,
570
0
        expr: &Hir,
571
0
        greedy: bool,
572
0
    ) -> ResultOrEmpty {
573
0
        let split_entry = self.insts.len();
574
0
        let split = self.push_split_hole();
575
0
        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
576
0
            Some(p) => p,
577
0
            None => return self.pop_split_hole(),
578
        };
579
0
        let split_hole = if greedy {
580
0
            self.fill_split(split, Some(entry_rep), None)
581
        } else {
582
0
            self.fill_split(split, None, Some(entry_rep))
583
        };
584
0
        let holes = vec![hole_rep, split_hole];
585
0
        Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry }))
586
0
    }
587
588
0
    fn c_repeat_zero_or_more(
589
0
        &mut self,
590
0
        expr: &Hir,
591
0
        greedy: bool,
592
0
    ) -> ResultOrEmpty {
593
0
        let split_entry = self.insts.len();
594
0
        let split = self.push_split_hole();
595
0
        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
596
0
            Some(p) => p,
597
0
            None => return self.pop_split_hole(),
598
        };
599
600
0
        self.fill(hole_rep, split_entry);
601
0
        let split_hole = if greedy {
602
0
            self.fill_split(split, Some(entry_rep), None)
603
        } else {
604
0
            self.fill_split(split, None, Some(entry_rep))
605
        };
606
0
        Ok(Some(Patch { hole: split_hole, entry: split_entry }))
607
0
    }
608
609
0
    fn c_repeat_one_or_more(
610
0
        &mut self,
611
0
        expr: &Hir,
612
0
        greedy: bool,
613
0
    ) -> ResultOrEmpty {
614
0
        let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? {
615
0
            Some(p) => p,
616
0
            None => return Ok(None),
617
        };
618
0
        self.fill_to_next(hole_rep);
619
0
        let split = self.push_split_hole();
620
621
0
        let split_hole = if greedy {
622
0
            self.fill_split(split, Some(entry_rep), None)
623
        } else {
624
0
            self.fill_split(split, None, Some(entry_rep))
625
        };
626
0
        Ok(Some(Patch { hole: split_hole, entry: entry_rep }))
627
0
    }
628
629
0
    fn c_repeat_range_min_or_more(
630
0
        &mut self,
631
0
        expr: &Hir,
632
0
        greedy: bool,
633
0
        min: u32,
634
0
    ) -> ResultOrEmpty {
635
0
        let min = u32_to_usize(min);
636
        // Using next_inst() is ok, because we can't return it (concat would
637
        // have to return Some(_) while c_repeat_range_min_or_more returns
638
        // None).
639
0
        let patch_concat = self
640
0
            .c_concat(iter::repeat(expr).take(min))?
641
0
            .unwrap_or(self.next_inst());
642
0
        if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? {
643
0
            self.fill(patch_concat.hole, patch_rep.entry);
644
0
            Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry }))
645
        } else {
646
0
            Ok(None)
647
        }
648
0
    }
649
650
0
    fn c_repeat_range(
651
0
        &mut self,
652
0
        expr: &Hir,
653
0
        greedy: bool,
654
0
        min: u32,
655
0
        max: u32,
656
0
    ) -> ResultOrEmpty {
657
0
        let (min, max) = (u32_to_usize(min), u32_to_usize(max));
658
        debug_assert!(min <= max);
659
0
        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
660
0
        if min == max {
661
0
            return Ok(patch_concat);
662
0
        }
663
0
        // Same reasoning as in c_repeat_range_min_or_more (we know that min <
664
0
        // max at this point).
665
0
        let patch_concat = patch_concat.unwrap_or(self.next_inst());
666
0
        let initial_entry = patch_concat.entry;
667
0
        // It is much simpler to compile, e.g., `a{2,5}` as:
668
0
        //
669
0
        //     aaa?a?a?
670
0
        //
671
0
        // But you end up with a sequence of instructions like this:
672
0
        //
673
0
        //     0: 'a'
674
0
        //     1: 'a',
675
0
        //     2: split(3, 4)
676
0
        //     3: 'a'
677
0
        //     4: split(5, 6)
678
0
        //     5: 'a'
679
0
        //     6: split(7, 8)
680
0
        //     7: 'a'
681
0
        //     8: MATCH
682
0
        //
683
0
        // This is *incredibly* inefficient because the splits end
684
0
        // up forming a chain, which has to be resolved everything a
685
0
        // transition is followed.
686
0
        let mut holes = vec![];
687
0
        let mut prev_hole = patch_concat.hole;
688
0
        for _ in min..max {
689
0
            self.fill_to_next(prev_hole);
690
0
            let split = self.push_split_hole();
691
0
            let Patch { hole, entry } = match self.c(expr)? {
692
0
                Some(p) => p,
693
0
                None => return self.pop_split_hole(),
694
            };
695
0
            prev_hole = hole;
696
0
            if greedy {
697
0
                holes.push(self.fill_split(split, Some(entry), None));
698
0
            } else {
699
0
                holes.push(self.fill_split(split, None, Some(entry)));
700
0
            }
701
        }
702
0
        holes.push(prev_hole);
703
0
        Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
704
0
    }
705
706
    /// Can be used as a default value for the c_* functions when the call to
707
    /// c_function is followed by inserting at least one instruction that is
708
    /// always executed after the ones written by the c* function.
709
    fn next_inst(&self) -> Patch {
710
        Patch { hole: Hole::None, entry: self.insts.len() }
711
    }
712
713
0
    fn fill(&mut self, hole: Hole, goto: InstPtr) {
714
0
        match hole {
715
0
            Hole::None => {}
716
0
            Hole::One(pc) => {
717
0
                self.insts[pc].fill(goto);
718
0
            }
719
0
            Hole::Many(holes) => {
720
0
                for hole in holes {
721
0
                    self.fill(hole, goto);
722
0
                }
723
            }
724
        }
725
0
    }
726
727
    fn fill_to_next(&mut self, hole: Hole) {
728
        let next = self.insts.len();
729
        self.fill(hole, next);
730
    }
731
732
0
    fn fill_split(
733
0
        &mut self,
734
0
        hole: Hole,
735
0
        goto1: Option<InstPtr>,
736
0
        goto2: Option<InstPtr>,
737
0
    ) -> Hole {
738
0
        match hole {
739
0
            Hole::None => Hole::None,
740
0
            Hole::One(pc) => match (goto1, goto2) {
741
0
                (Some(goto1), Some(goto2)) => {
742
0
                    self.insts[pc].fill_split(goto1, goto2);
743
0
                    Hole::None
744
                }
745
0
                (Some(goto1), None) => {
746
0
                    self.insts[pc].half_fill_split_goto1(goto1);
747
0
                    Hole::One(pc)
748
                }
749
0
                (None, Some(goto2)) => {
750
0
                    self.insts[pc].half_fill_split_goto2(goto2);
751
0
                    Hole::One(pc)
752
                }
753
0
                (None, None) => unreachable!(
754
0
                    "at least one of the split \
755
0
                     holes must be filled"
756
0
                ),
757
            },
758
0
            Hole::Many(holes) => {
759
0
                let mut new_holes = vec![];
760
0
                for hole in holes {
761
0
                    new_holes.push(self.fill_split(hole, goto1, goto2));
762
0
                }
763
0
                if new_holes.is_empty() {
764
0
                    Hole::None
765
0
                } else if new_holes.len() == 1 {
766
0
                    new_holes.pop().unwrap()
767
                } else {
768
0
                    Hole::Many(new_holes)
769
                }
770
            }
771
        }
772
0
    }
773
774
    fn push_compiled(&mut self, inst: Inst) {
775
        self.insts.push(MaybeInst::Compiled(inst));
776
    }
777
778
    fn push_hole(&mut self, inst: InstHole) -> Hole {
779
        let hole = self.insts.len();
780
        self.insts.push(MaybeInst::Uncompiled(inst));
781
        Hole::One(hole)
782
    }
783
784
    fn push_split_hole(&mut self) -> Hole {
785
        let hole = self.insts.len();
786
        self.insts.push(MaybeInst::Split);
787
        Hole::One(hole)
788
    }
789
790
    fn pop_split_hole(&mut self) -> ResultOrEmpty {
791
        self.insts.pop();
792
        Ok(None)
793
    }
794
795
0
    fn check_size(&self) -> result::Result<(), Error> {
796
0
        use std::mem::size_of;
797
0
798
0
        if self.insts.len() * size_of::<Inst>() > self.size_limit {
799
0
            Err(Error::CompiledTooBig(self.size_limit))
800
        } else {
801
0
            Ok(())
802
        }
803
0
    }
804
}
805
806
#[derive(Debug)]
807
enum Hole {
808
    None,
809
    One(InstPtr),
810
    Many(Vec<Hole>),
811
}
812
813
impl Hole {
814
0
    fn dup_one(self) -> (Self, Self) {
815
0
        match self {
816
0
            Hole::One(pc) => (Hole::One(pc), Hole::One(pc)),
817
            Hole::None | Hole::Many(_) => {
818
0
                unreachable!("must be called on single hole")
819
            }
820
        }
821
0
    }
822
}
823
824
0
#[derive(Clone, Debug)]
825
enum MaybeInst {
826
    Compiled(Inst),
827
    Uncompiled(InstHole),
828
    Split,
829
    Split1(InstPtr),
830
    Split2(InstPtr),
831
}
832
833
impl MaybeInst {
834
0
    fn fill(&mut self, goto: InstPtr) {
835
0
        let maybeinst = match *self {
836
0
            MaybeInst::Split => MaybeInst::Split1(goto),
837
0
            MaybeInst::Uncompiled(ref inst) => {
838
0
                MaybeInst::Compiled(inst.fill(goto))
839
            }
840
0
            MaybeInst::Split1(goto1) => {
841
0
                MaybeInst::Compiled(Inst::Split(InstSplit {
842
0
                    goto1: goto1,
843
0
                    goto2: goto,
844
0
                }))
845
            }
846
0
            MaybeInst::Split2(goto2) => {
847
0
                MaybeInst::Compiled(Inst::Split(InstSplit {
848
0
                    goto1: goto,
849
0
                    goto2: goto2,
850
0
                }))
851
            }
852
0
            _ => unreachable!(
853
0
                "not all instructions were compiled! \
854
0
                 found uncompiled instruction: {:?}",
855
0
                self
856
0
            ),
857
        };
858
0
        *self = maybeinst;
859
0
    }
860
861
0
    fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
862
0
        let filled = match *self {
863
0
            MaybeInst::Split => {
864
0
                Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
865
            }
866
0
            _ => unreachable!(
867
0
                "must be called on Split instruction, \
868
0
                 instead it was called on: {:?}",
869
0
                self
870
0
            ),
871
        };
872
0
        *self = MaybeInst::Compiled(filled);
873
0
    }
874
875
0
    fn half_fill_split_goto1(&mut self, goto1: InstPtr) {
876
0
        let half_filled = match *self {
877
0
            MaybeInst::Split => goto1,
878
0
            _ => unreachable!(
879
0
                "must be called on Split instruction, \
880
0
                 instead it was called on: {:?}",
881
0
                self
882
0
            ),
883
        };
884
0
        *self = MaybeInst::Split1(half_filled);
885
0
    }
886
887
0
    fn half_fill_split_goto2(&mut self, goto2: InstPtr) {
888
0
        let half_filled = match *self {
889
0
            MaybeInst::Split => goto2,
890
0
            _ => unreachable!(
891
0
                "must be called on Split instruction, \
892
0
                 instead it was called on: {:?}",
893
0
                self
894
0
            ),
895
        };
896
0
        *self = MaybeInst::Split2(half_filled);
897
0
    }
898
899
0
    fn unwrap(self) -> Inst {
900
0
        match self {
901
0
            MaybeInst::Compiled(inst) => inst,
902
0
            _ => unreachable!(
903
0
                "must be called on a compiled instruction, \
904
0
                 instead it was called on: {:?}",
905
0
                self
906
0
            ),
907
        }
908
0
    }
909
}
910
911
0
#[derive(Clone, Debug)]
912
enum InstHole {
913
    Save { slot: usize },
914
    EmptyLook { look: EmptyLook },
915
    Char { c: char },
916
    Ranges { ranges: Vec<(char, char)> },
917
    Bytes { start: u8, end: u8 },
918
}
919
920
impl InstHole {
921
0
    fn fill(&self, goto: InstPtr) -> Inst {
922
0
        match *self {
923
0
            InstHole::Save { slot } => {
924
0
                Inst::Save(InstSave { goto: goto, slot: slot })
925
            }
926
0
            InstHole::EmptyLook { look } => {
927
0
                Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
928
            }
929
0
            InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
930
0
            InstHole::Ranges { ref ranges } => {
931
0
                Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
932
            }
933
0
            InstHole::Bytes { start, end } => {
934
0
                Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
935
            }
936
        }
937
0
    }
938
}
939
940
struct CompileClass<'a, 'b> {
941
    c: &'a mut Compiler,
942
    ranges: &'b [hir::ClassUnicodeRange],
943
}
944
945
impl<'a, 'b> CompileClass<'a, 'b> {
946
0
    fn compile(mut self) -> Result {
947
0
        let mut holes = vec![];
948
0
        let mut initial_entry = None;
949
0
        let mut last_split = Hole::None;
950
0
        let mut utf8_seqs = self.c.utf8_seqs.take().unwrap();
951
0
        self.c.suffix_cache.clear();
952
953
0
        for (i, range) in self.ranges.iter().enumerate() {
954
0
            let is_last_range = i + 1 == self.ranges.len();
955
0
            utf8_seqs.reset(range.start(), range.end());
956
0
            let mut it = (&mut utf8_seqs).peekable();
957
            loop {
958
0
                let utf8_seq = match it.next() {
959
0
                    None => break,
960
0
                    Some(utf8_seq) => utf8_seq,
961
                };
962
0
                if is_last_range && it.peek().is_none() {
963
0
                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
964
0
                    holes.push(hole);
965
0
                    self.c.fill(last_split, entry);
966
0
                    last_split = Hole::None;
967
0
                    if initial_entry.is_none() {
968
0
                        initial_entry = Some(entry);
969
0
                    }
970
                } else {
971
0
                    if initial_entry.is_none() {
972
0
                        initial_entry = Some(self.c.insts.len());
973
0
                    }
974
0
                    self.c.fill_to_next(last_split);
975
0
                    last_split = self.c.push_split_hole();
976
0
                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
977
0
                    holes.push(hole);
978
0
                    last_split =
979
0
                        self.c.fill_split(last_split, Some(entry), None);
980
                }
981
            }
982
        }
983
0
        self.c.utf8_seqs = Some(utf8_seqs);
984
0
        Ok(Patch { hole: Hole::Many(holes), entry: initial_entry.unwrap() })
985
0
    }
986
987
0
    fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result {
988
0
        if self.c.compiled.is_reverse {
989
0
            self.c_utf8_seq_(seq)
990
        } else {
991
0
            self.c_utf8_seq_(seq.into_iter().rev())
992
        }
993
0
    }
994
995
0
    fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result
996
0
    where
997
0
        I: IntoIterator<Item = &'r Utf8Range>,
998
0
    {
999
        // The initial instruction for each UTF-8 sequence should be the same.
1000
0
        let mut from_inst = ::std::usize::MAX;
1001
0
        let mut last_hole = Hole::None;
1002
0
        for byte_range in seq {
1003
0
            let key = SuffixCacheKey {
1004
0
                from_inst: from_inst,
1005
0
                start: byte_range.start,
1006
0
                end: byte_range.end,
1007
0
            };
1008
0
            {
1009
0
                let pc = self.c.insts.len();
1010
0
                if let Some(cached_pc) = self.c.suffix_cache.get(key, pc) {
1011
0
                    from_inst = cached_pc;
1012
                    continue;
1013
0
                }
1014
0
            }
1015
0
            self.c.byte_classes.set_range(byte_range.start, byte_range.end);
1016
0
            if from_inst == ::std::usize::MAX {
1017
0
                last_hole = self.c.push_hole(InstHole::Bytes {
1018
0
                    start: byte_range.start,
1019
0
                    end: byte_range.end,
1020
0
                });
1021
0
            } else {
1022
0
                self.c.push_compiled(Inst::Bytes(InstBytes {
1023
0
                    goto: from_inst,
1024
0
                    start: byte_range.start,
1025
0
                    end: byte_range.end,
1026
0
                }));
1027
0
            }
1028
0
            from_inst = self.c.insts.len().checked_sub(1).unwrap();
1029
            debug_assert!(from_inst < ::std::usize::MAX);
1030
        }
1031
        debug_assert!(from_inst < ::std::usize::MAX);
1032
0
        Ok(Patch { hole: last_hole, entry: from_inst })
1033
0
    }
Unexecuted instantiation: <regex::compile::CompileClass>::c_utf8_seq_::<&regex_syntax::utf8::Utf8Sequence>
Unexecuted instantiation: <regex::compile::CompileClass>::c_utf8_seq_::<core::iter::adapters::rev::Rev<core::slice::iter::Iter<regex_syntax::utf8::Utf8Range>>>
1034
}
1035
1036
/// `SuffixCache` is a simple bounded hash map for caching suffix entries in
1037
/// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}.
1038
/// The set of byte ranges looks like this:
1039
///
1040
/// [0-7F]
1041
/// [C2-DF][80-BF]
1042
/// [E0][A0-BF][80-BF]
1043
/// [E1-EC][80-BF][80-BF]
1044
/// [ED][80-9F][80-BF]
1045
/// [EE-EF][80-BF][80-BF]
1046
///
1047
/// Each line above translates to one alternate in the compiled regex program.
1048
/// However, all but one of the alternates end in the same suffix, which is
1049
/// a waste of an instruction. The suffix cache facilitates reusing them across
1050
/// alternates.
1051
///
1052
/// Note that a HashMap could be trivially used for this, but we don't need its
1053
/// overhead. Some small bounded space (LRU style) is more than enough.
1054
///
1055
/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html),
1056
/// except it uses hashes as original indices and then compares full keys for
1057
/// validation against `dense` array.
1058
#[derive(Debug)]
1059
struct SuffixCache {
1060
    sparse: Box<[usize]>,
1061
    dense: Vec<SuffixCacheEntry>,
1062
}
1063
1064
0
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
1065
struct SuffixCacheEntry {
1066
    key: SuffixCacheKey,
1067
    pc: InstPtr,
1068
}
1069
1070
0
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
1071
struct SuffixCacheKey {
1072
    from_inst: InstPtr,
1073
    start: u8,
1074
    end: u8,
1075
}
1076
1077
impl SuffixCache {
1078
    fn new(size: usize) -> Self {
1079
        SuffixCache {
1080
            sparse: vec![0usize; size].into(),
1081
            dense: Vec::with_capacity(size),
1082
        }
1083
    }
1084
1085
0
    fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option<InstPtr> {
1086
0
        let hash = self.hash(&key);
1087
0
        let pos = &mut self.sparse[hash];
1088
0
        if let Some(entry) = self.dense.get(*pos) {
1089
0
            if entry.key == key {
1090
0
                return Some(entry.pc);
1091
0
            }
1092
0
        }
1093
0
        *pos = self.dense.len();
1094
0
        self.dense.push(SuffixCacheEntry { key: key, pc: pc });
1095
0
        None
1096
0
    }
1097
1098
    fn clear(&mut self) {
1099
        self.dense.clear();
1100
    }
1101
1102
    fn hash(&self, suffix: &SuffixCacheKey) -> usize {
1103
        // Basic FNV-1a hash as described:
1104
        // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
1105
        const FNV_PRIME: u64 = 1099511628211;
1106
        let mut h = 14695981039346656037;
1107
        h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME);
1108
        h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME);
1109
        h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME);
1110
        (h as usize) % self.sparse.len()
1111
    }
1112
}
1113
1114
struct ByteClassSet([bool; 256]);
1115
1116
impl ByteClassSet {
1117
    fn new() -> Self {
1118
        ByteClassSet([false; 256])
1119
    }
1120
1121
0
    fn set_range(&mut self, start: u8, end: u8) {
1122
        debug_assert!(start <= end);
1123
0
        if start > 0 {
1124
0
            self.0[start as usize - 1] = true;
1125
0
        }
1126
0
        self.0[end as usize] = true;
1127
0
    }
1128
1129
0
    fn set_word_boundary(&mut self) {
1130
0
        // We need to mark all ranges of bytes whose pairs result in
1131
0
        // evaluating \b differently.
1132
0
        let iswb = is_word_byte;
1133
0
        let mut b1: u16 = 0;
1134
        let mut b2: u16;
1135
0
        while b1 <= 255 {
1136
0
            b2 = b1 + 1;
1137
0
            while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) {
1138
0
                b2 += 1;
1139
0
            }
1140
0
            self.set_range(b1 as u8, (b2 - 1) as u8);
1141
0
            b1 = b2;
1142
        }
1143
0
    }
1144
1145
0
    fn byte_classes(&self) -> Vec<u8> {
1146
0
        // N.B. If you're debugging the DFA, it's useful to simply return
1147
0
        // `(0..256).collect()`, which effectively removes the byte classes
1148
0
        // and makes the transitions easier to read.
1149
0
        // (0usize..256).map(|x| x as u8).collect()
1150
0
        let mut byte_classes = vec![0; 256];
1151
0
        let mut class = 0u8;
1152
0
        let mut i = 0;
1153
        loop {
1154
0
            byte_classes[i] = class as u8;
1155
0
            if i >= 255 {
1156
0
                break;
1157
0
            }
1158
0
            if self.0[i] {
1159
0
                class = class.checked_add(1).unwrap();
1160
0
            }
1161
0
            i += 1;
1162
        }
1163
0
        byte_classes
1164
0
    }
1165
}
1166
1167
impl fmt::Debug for ByteClassSet {
1168
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1169
        f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish()
1170
    }
1171
}
1172
1173
0
fn u32_to_usize(n: u32) -> usize {
1174
    // In case usize is less than 32 bits, we need to guard against overflow.
1175
    // On most platforms this compiles to nothing.
1176
    // TODO Use `std::convert::TryFrom` once it's stable.
1177
0
    if (n as u64) > (::std::usize::MAX as u64) {
1178
0
        panic!("BUG: {} is too big to be pointer sized", n)
1179
0
    }
1180
0
    n as usize
1181
0
}
1182
1183
#[cfg(test)]
1184
mod tests {
1185
    use super::ByteClassSet;
1186
1187
    #[test]
1188
    fn byte_classes() {
1189
        let mut set = ByteClassSet::new();
1190
        set.set_range(b'a', b'z');
1191
        let classes = set.byte_classes();
1192
        assert_eq!(classes[0], 0);
1193
        assert_eq!(classes[1], 0);
1194
        assert_eq!(classes[2], 0);
1195
        assert_eq!(classes[b'a' as usize - 1], 0);
1196
        assert_eq!(classes[b'a' as usize], 1);
1197
        assert_eq!(classes[b'm' as usize], 1);
1198
        assert_eq!(classes[b'z' as usize], 1);
1199
        assert_eq!(classes[b'z' as usize + 1], 2);
1200
        assert_eq!(classes[254], 2);
1201
        assert_eq!(classes[255], 2);
1202
1203
        let mut set = ByteClassSet::new();
1204
        set.set_range(0, 2);
1205
        set.set_range(4, 6);
1206
        let classes = set.byte_classes();
1207
        assert_eq!(classes[0], 0);
1208
        assert_eq!(classes[1], 0);
1209
        assert_eq!(classes[2], 0);
1210
        assert_eq!(classes[3], 1);
1211
        assert_eq!(classes[4], 2);
1212
        assert_eq!(classes[5], 2);
1213
        assert_eq!(classes[6], 2);
1214
        assert_eq!(classes[7], 3);
1215
        assert_eq!(classes[255], 3);
1216
    }
1217
1218
    #[test]
1219
    fn full_byte_classes() {
1220
        let mut set = ByteClassSet::new();
1221
        for i in 0..256u16 {
1222
            set.set_range(i as u8, i as u8);
1223
        }
1224
        assert_eq!(set.byte_classes().len(), 256);
1225
    }
1226
}