Coverage Report

Created: 2025-12-28 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-syntax-0.6.29/src/ast/print.rs
Line
Count
Source
1
/*!
2
This module provides a regular expression printer for `Ast`.
3
*/
4
5
use std::fmt;
6
7
use crate::ast::visitor::{self, Visitor};
8
use crate::ast::{self, Ast};
9
10
/// A builder for constructing a printer.
11
///
12
/// Note that since a printer doesn't have any configuration knobs, this type
13
/// remains unexported.
14
#[derive(Clone, Debug)]
15
struct PrinterBuilder {
16
    _priv: (),
17
}
18
19
impl Default for PrinterBuilder {
20
0
    fn default() -> PrinterBuilder {
21
0
        PrinterBuilder::new()
22
0
    }
23
}
24
25
impl PrinterBuilder {
26
0
    fn new() -> PrinterBuilder {
27
0
        PrinterBuilder { _priv: () }
28
0
    }
29
30
0
    fn build(&self) -> Printer {
31
0
        Printer { _priv: () }
32
0
    }
33
}
34
35
/// A printer for a regular expression abstract syntax tree.
36
///
37
/// A printer converts an abstract syntax tree (AST) to a regular expression
38
/// pattern string. This particular printer uses constant stack space and heap
39
/// space proportional to the size of the AST.
40
///
41
/// This printer will not necessarily preserve the original formatting of the
42
/// regular expression pattern string. For example, all whitespace and comments
43
/// are ignored.
44
#[derive(Debug)]
45
pub struct Printer {
46
    _priv: (),
47
}
48
49
impl Printer {
50
    /// Create a new printer.
51
0
    pub fn new() -> Printer {
52
0
        PrinterBuilder::new().build()
53
0
    }
54
55
    /// Print the given `Ast` to the given writer. The writer must implement
56
    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57
    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58
    /// implementations) or a `&mut String`.
59
0
    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60
0
        visitor::visit(ast, Writer { wtr })
61
0
    }
62
}
63
64
#[derive(Debug)]
65
struct Writer<W> {
66
    wtr: W,
67
}
68
69
impl<W: fmt::Write> Visitor for Writer<W> {
70
    type Output = ();
71
    type Err = fmt::Error;
72
73
0
    fn finish(self) -> fmt::Result {
74
0
        Ok(())
75
0
    }
76
77
0
    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
78
0
        match *ast {
79
0
            Ast::Group(ref x) => self.fmt_group_pre(x),
80
0
            Ast::Class(ast::Class::Bracketed(ref x)) => {
81
0
                self.fmt_class_bracketed_pre(x)
82
            }
83
0
            _ => Ok(()),
84
        }
85
0
    }
86
87
0
    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
88
        use crate::ast::Class;
89
90
0
        match *ast {
91
0
            Ast::Empty(_) => Ok(()),
92
0
            Ast::Flags(ref x) => self.fmt_set_flags(x),
93
0
            Ast::Literal(ref x) => self.fmt_literal(x),
94
0
            Ast::Dot(_) => self.wtr.write_str("."),
95
0
            Ast::Assertion(ref x) => self.fmt_assertion(x),
96
0
            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
97
0
            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
98
0
            Ast::Class(Class::Bracketed(ref x)) => {
99
0
                self.fmt_class_bracketed_post(x)
100
            }
101
0
            Ast::Repetition(ref x) => self.fmt_repetition(x),
102
0
            Ast::Group(ref x) => self.fmt_group_post(x),
103
0
            Ast::Alternation(_) => Ok(()),
104
0
            Ast::Concat(_) => Ok(()),
105
        }
106
0
    }
107
108
0
    fn visit_alternation_in(&mut self) -> fmt::Result {
109
0
        self.wtr.write_str("|")
110
0
    }
111
112
0
    fn visit_class_set_item_pre(
113
0
        &mut self,
114
0
        ast: &ast::ClassSetItem,
115
0
    ) -> Result<(), Self::Err> {
116
0
        match *ast {
117
0
            ast::ClassSetItem::Bracketed(ref x) => {
118
0
                self.fmt_class_bracketed_pre(x)
119
            }
120
0
            _ => Ok(()),
121
        }
122
0
    }
123
124
0
    fn visit_class_set_item_post(
125
0
        &mut self,
126
0
        ast: &ast::ClassSetItem,
127
0
    ) -> Result<(), Self::Err> {
128
        use crate::ast::ClassSetItem::*;
129
130
0
        match *ast {
131
0
            Empty(_) => Ok(()),
132
0
            Literal(ref x) => self.fmt_literal(x),
133
0
            Range(ref x) => {
134
0
                self.fmt_literal(&x.start)?;
135
0
                self.wtr.write_str("-")?;
136
0
                self.fmt_literal(&x.end)?;
137
0
                Ok(())
138
            }
139
0
            Ascii(ref x) => self.fmt_class_ascii(x),
140
0
            Unicode(ref x) => self.fmt_class_unicode(x),
141
0
            Perl(ref x) => self.fmt_class_perl(x),
142
0
            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
143
0
            Union(_) => Ok(()),
144
        }
145
0
    }
146
147
0
    fn visit_class_set_binary_op_in(
148
0
        &mut self,
149
0
        ast: &ast::ClassSetBinaryOp,
150
0
    ) -> Result<(), Self::Err> {
151
0
        self.fmt_class_set_binary_op_kind(&ast.kind)
152
0
    }
153
}
154
155
impl<W: fmt::Write> Writer<W> {
156
0
    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
157
        use crate::ast::GroupKind::*;
158
0
        match ast.kind {
159
0
            CaptureIndex(_) => self.wtr.write_str("("),
160
0
            CaptureName(ref x) => {
161
0
                self.wtr.write_str("(?P<")?;
162
0
                self.wtr.write_str(&x.name)?;
163
0
                self.wtr.write_str(">")?;
164
0
                Ok(())
165
            }
166
0
            NonCapturing(ref flags) => {
167
0
                self.wtr.write_str("(?")?;
168
0
                self.fmt_flags(flags)?;
169
0
                self.wtr.write_str(":")?;
170
0
                Ok(())
171
            }
172
        }
173
0
    }
174
175
0
    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
176
0
        self.wtr.write_str(")")
177
0
    }
178
179
0
    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
180
        use crate::ast::RepetitionKind::*;
181
0
        match ast.op.kind {
182
0
            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
183
0
            ZeroOrOne => self.wtr.write_str("??"),
184
0
            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
185
0
            ZeroOrMore => self.wtr.write_str("*?"),
186
0
            OneOrMore if ast.greedy => self.wtr.write_str("+"),
187
0
            OneOrMore => self.wtr.write_str("+?"),
188
0
            Range(ref x) => {
189
0
                self.fmt_repetition_range(x)?;
190
0
                if !ast.greedy {
191
0
                    self.wtr.write_str("?")?;
192
0
                }
193
0
                Ok(())
194
            }
195
        }
196
0
    }
197
198
0
    fn fmt_repetition_range(
199
0
        &mut self,
200
0
        ast: &ast::RepetitionRange,
201
0
    ) -> fmt::Result {
202
        use crate::ast::RepetitionRange::*;
203
0
        match *ast {
204
0
            Exactly(x) => write!(self.wtr, "{{{}}}", x),
205
0
            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
206
0
            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
207
        }
208
0
    }
209
210
0
    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
211
        use crate::ast::LiteralKind::*;
212
213
0
        match ast.kind {
214
0
            Verbatim => self.wtr.write_char(ast.c),
215
0
            Punctuation => write!(self.wtr, r"\{}", ast.c),
216
0
            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
217
            HexFixed(ast::HexLiteralKind::X) => {
218
0
                write!(self.wtr, r"\x{:02X}", ast.c as u32)
219
            }
220
            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
221
0
                write!(self.wtr, r"\u{:04X}", ast.c as u32)
222
            }
223
            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
224
0
                write!(self.wtr, r"\U{:08X}", ast.c as u32)
225
            }
226
            HexBrace(ast::HexLiteralKind::X) => {
227
0
                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
228
            }
229
            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
230
0
                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
231
            }
232
            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
233
0
                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
234
            }
235
            Special(ast::SpecialLiteralKind::Bell) => {
236
0
                self.wtr.write_str(r"\a")
237
            }
238
            Special(ast::SpecialLiteralKind::FormFeed) => {
239
0
                self.wtr.write_str(r"\f")
240
            }
241
0
            Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
242
            Special(ast::SpecialLiteralKind::LineFeed) => {
243
0
                self.wtr.write_str(r"\n")
244
            }
245
            Special(ast::SpecialLiteralKind::CarriageReturn) => {
246
0
                self.wtr.write_str(r"\r")
247
            }
248
            Special(ast::SpecialLiteralKind::VerticalTab) => {
249
0
                self.wtr.write_str(r"\v")
250
            }
251
            Special(ast::SpecialLiteralKind::Space) => {
252
0
                self.wtr.write_str(r"\ ")
253
            }
254
        }
255
0
    }
256
257
0
    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
258
        use crate::ast::AssertionKind::*;
259
0
        match ast.kind {
260
0
            StartLine => self.wtr.write_str("^"),
261
0
            EndLine => self.wtr.write_str("$"),
262
0
            StartText => self.wtr.write_str(r"\A"),
263
0
            EndText => self.wtr.write_str(r"\z"),
264
0
            WordBoundary => self.wtr.write_str(r"\b"),
265
0
            NotWordBoundary => self.wtr.write_str(r"\B"),
266
        }
267
0
    }
268
269
0
    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
270
0
        self.wtr.write_str("(?")?;
271
0
        self.fmt_flags(&ast.flags)?;
272
0
        self.wtr.write_str(")")?;
273
0
        Ok(())
274
0
    }
275
276
0
    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
277
        use crate::ast::{Flag, FlagsItemKind};
278
279
0
        for item in &ast.items {
280
0
            match item.kind {
281
0
                FlagsItemKind::Negation => self.wtr.write_str("-"),
282
0
                FlagsItemKind::Flag(ref flag) => match *flag {
283
0
                    Flag::CaseInsensitive => self.wtr.write_str("i"),
284
0
                    Flag::MultiLine => self.wtr.write_str("m"),
285
0
                    Flag::DotMatchesNewLine => self.wtr.write_str("s"),
286
0
                    Flag::SwapGreed => self.wtr.write_str("U"),
287
0
                    Flag::Unicode => self.wtr.write_str("u"),
288
0
                    Flag::IgnoreWhitespace => self.wtr.write_str("x"),
289
                },
290
0
            }?;
291
        }
292
0
        Ok(())
293
0
    }
294
295
0
    fn fmt_class_bracketed_pre(
296
0
        &mut self,
297
0
        ast: &ast::ClassBracketed,
298
0
    ) -> fmt::Result {
299
0
        if ast.negated {
300
0
            self.wtr.write_str("[^")
301
        } else {
302
0
            self.wtr.write_str("[")
303
        }
304
0
    }
305
306
0
    fn fmt_class_bracketed_post(
307
0
        &mut self,
308
0
        _ast: &ast::ClassBracketed,
309
0
    ) -> fmt::Result {
310
0
        self.wtr.write_str("]")
311
0
    }
312
313
0
    fn fmt_class_set_binary_op_kind(
314
0
        &mut self,
315
0
        ast: &ast::ClassSetBinaryOpKind,
316
0
    ) -> fmt::Result {
317
        use crate::ast::ClassSetBinaryOpKind::*;
318
0
        match *ast {
319
0
            Intersection => self.wtr.write_str("&&"),
320
0
            Difference => self.wtr.write_str("--"),
321
0
            SymmetricDifference => self.wtr.write_str("~~"),
322
        }
323
0
    }
324
325
0
    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
326
        use crate::ast::ClassPerlKind::*;
327
0
        match ast.kind {
328
0
            Digit if ast.negated => self.wtr.write_str(r"\D"),
329
0
            Digit => self.wtr.write_str(r"\d"),
330
0
            Space if ast.negated => self.wtr.write_str(r"\S"),
331
0
            Space => self.wtr.write_str(r"\s"),
332
0
            Word if ast.negated => self.wtr.write_str(r"\W"),
333
0
            Word => self.wtr.write_str(r"\w"),
334
        }
335
0
    }
336
337
0
    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
338
        use crate::ast::ClassAsciiKind::*;
339
0
        match ast.kind {
340
0
            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
341
0
            Alnum => self.wtr.write_str("[:alnum:]"),
342
0
            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
343
0
            Alpha => self.wtr.write_str("[:alpha:]"),
344
0
            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
345
0
            Ascii => self.wtr.write_str("[:ascii:]"),
346
0
            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
347
0
            Blank => self.wtr.write_str("[:blank:]"),
348
0
            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
349
0
            Cntrl => self.wtr.write_str("[:cntrl:]"),
350
0
            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
351
0
            Digit => self.wtr.write_str("[:digit:]"),
352
0
            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
353
0
            Graph => self.wtr.write_str("[:graph:]"),
354
0
            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
355
0
            Lower => self.wtr.write_str("[:lower:]"),
356
0
            Print if ast.negated => self.wtr.write_str("[:^print:]"),
357
0
            Print => self.wtr.write_str("[:print:]"),
358
0
            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
359
0
            Punct => self.wtr.write_str("[:punct:]"),
360
0
            Space if ast.negated => self.wtr.write_str("[:^space:]"),
361
0
            Space => self.wtr.write_str("[:space:]"),
362
0
            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
363
0
            Upper => self.wtr.write_str("[:upper:]"),
364
0
            Word if ast.negated => self.wtr.write_str("[:^word:]"),
365
0
            Word => self.wtr.write_str("[:word:]"),
366
0
            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
367
0
            Xdigit => self.wtr.write_str("[:xdigit:]"),
368
        }
369
0
    }
370
371
0
    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
372
        use crate::ast::ClassUnicodeKind::*;
373
        use crate::ast::ClassUnicodeOpKind::*;
374
375
0
        if ast.negated {
376
0
            self.wtr.write_str(r"\P")?;
377
        } else {
378
0
            self.wtr.write_str(r"\p")?;
379
        }
380
0
        match ast.kind {
381
0
            OneLetter(c) => self.wtr.write_char(c),
382
0
            Named(ref x) => write!(self.wtr, "{{{}}}", x),
383
0
            NamedValue { op: Equal, ref name, ref value } => {
384
0
                write!(self.wtr, "{{{}={}}}", name, value)
385
            }
386
0
            NamedValue { op: Colon, ref name, ref value } => {
387
0
                write!(self.wtr, "{{{}:{}}}", name, value)
388
            }
389
0
            NamedValue { op: NotEqual, ref name, ref value } => {
390
0
                write!(self.wtr, "{{{}!={}}}", name, value)
391
            }
392
        }
393
0
    }
394
}
395
396
#[cfg(test)]
397
mod tests {
398
    use super::Printer;
399
    use crate::ast::parse::ParserBuilder;
400
401
    fn roundtrip(given: &str) {
402
        roundtrip_with(|b| b, given);
403
    }
404
405
    fn roundtrip_with<F>(mut f: F, given: &str)
406
    where
407
        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
408
    {
409
        let mut builder = ParserBuilder::new();
410
        f(&mut builder);
411
        let ast = builder.build().parse(given).unwrap();
412
413
        let mut printer = Printer::new();
414
        let mut dst = String::new();
415
        printer.print(&ast, &mut dst).unwrap();
416
        assert_eq!(given, dst);
417
    }
418
419
    #[test]
420
    fn print_literal() {
421
        roundtrip("a");
422
        roundtrip(r"\[");
423
        roundtrip_with(|b| b.octal(true), r"\141");
424
        roundtrip(r"\x61");
425
        roundtrip(r"\x7F");
426
        roundtrip(r"\u0061");
427
        roundtrip(r"\U00000061");
428
        roundtrip(r"\x{61}");
429
        roundtrip(r"\x{7F}");
430
        roundtrip(r"\u{61}");
431
        roundtrip(r"\U{61}");
432
433
        roundtrip(r"\a");
434
        roundtrip(r"\f");
435
        roundtrip(r"\t");
436
        roundtrip(r"\n");
437
        roundtrip(r"\r");
438
        roundtrip(r"\v");
439
        roundtrip(r"(?x)\ ");
440
    }
441
442
    #[test]
443
    fn print_dot() {
444
        roundtrip(".");
445
    }
446
447
    #[test]
448
    fn print_concat() {
449
        roundtrip("ab");
450
        roundtrip("abcde");
451
        roundtrip("a(bcd)ef");
452
    }
453
454
    #[test]
455
    fn print_alternation() {
456
        roundtrip("a|b");
457
        roundtrip("a|b|c|d|e");
458
        roundtrip("|a|b|c|d|e");
459
        roundtrip("|a|b|c|d|e|");
460
        roundtrip("a(b|c|d)|e|f");
461
    }
462
463
    #[test]
464
    fn print_assertion() {
465
        roundtrip(r"^");
466
        roundtrip(r"$");
467
        roundtrip(r"\A");
468
        roundtrip(r"\z");
469
        roundtrip(r"\b");
470
        roundtrip(r"\B");
471
    }
472
473
    #[test]
474
    fn print_repetition() {
475
        roundtrip("a?");
476
        roundtrip("a??");
477
        roundtrip("a*");
478
        roundtrip("a*?");
479
        roundtrip("a+");
480
        roundtrip("a+?");
481
        roundtrip("a{5}");
482
        roundtrip("a{5}?");
483
        roundtrip("a{5,}");
484
        roundtrip("a{5,}?");
485
        roundtrip("a{5,10}");
486
        roundtrip("a{5,10}?");
487
    }
488
489
    #[test]
490
    fn print_flags() {
491
        roundtrip("(?i)");
492
        roundtrip("(?-i)");
493
        roundtrip("(?s-i)");
494
        roundtrip("(?-si)");
495
        roundtrip("(?siUmux)");
496
    }
497
498
    #[test]
499
    fn print_group() {
500
        roundtrip("(?i:a)");
501
        roundtrip("(?P<foo>a)");
502
        roundtrip("(a)");
503
    }
504
505
    #[test]
506
    fn print_class() {
507
        roundtrip(r"[abc]");
508
        roundtrip(r"[a-z]");
509
        roundtrip(r"[^a-z]");
510
        roundtrip(r"[a-z0-9]");
511
        roundtrip(r"[-a-z0-9]");
512
        roundtrip(r"[-a-z0-9]");
513
        roundtrip(r"[a-z0-9---]");
514
        roundtrip(r"[a-z&&m-n]");
515
        roundtrip(r"[[a-z&&m-n]]");
516
        roundtrip(r"[a-z--m-n]");
517
        roundtrip(r"[a-z~~m-n]");
518
        roundtrip(r"[a-z[0-9]]");
519
        roundtrip(r"[a-z[^0-9]]");
520
521
        roundtrip(r"\d");
522
        roundtrip(r"\D");
523
        roundtrip(r"\s");
524
        roundtrip(r"\S");
525
        roundtrip(r"\w");
526
        roundtrip(r"\W");
527
528
        roundtrip(r"[[:alnum:]]");
529
        roundtrip(r"[[:^alnum:]]");
530
        roundtrip(r"[[:alpha:]]");
531
        roundtrip(r"[[:^alpha:]]");
532
        roundtrip(r"[[:ascii:]]");
533
        roundtrip(r"[[:^ascii:]]");
534
        roundtrip(r"[[:blank:]]");
535
        roundtrip(r"[[:^blank:]]");
536
        roundtrip(r"[[:cntrl:]]");
537
        roundtrip(r"[[:^cntrl:]]");
538
        roundtrip(r"[[:digit:]]");
539
        roundtrip(r"[[:^digit:]]");
540
        roundtrip(r"[[:graph:]]");
541
        roundtrip(r"[[:^graph:]]");
542
        roundtrip(r"[[:lower:]]");
543
        roundtrip(r"[[:^lower:]]");
544
        roundtrip(r"[[:print:]]");
545
        roundtrip(r"[[:^print:]]");
546
        roundtrip(r"[[:punct:]]");
547
        roundtrip(r"[[:^punct:]]");
548
        roundtrip(r"[[:space:]]");
549
        roundtrip(r"[[:^space:]]");
550
        roundtrip(r"[[:upper:]]");
551
        roundtrip(r"[[:^upper:]]");
552
        roundtrip(r"[[:word:]]");
553
        roundtrip(r"[[:^word:]]");
554
        roundtrip(r"[[:xdigit:]]");
555
        roundtrip(r"[[:^xdigit:]]");
556
557
        roundtrip(r"\pL");
558
        roundtrip(r"\PL");
559
        roundtrip(r"\p{L}");
560
        roundtrip(r"\P{L}");
561
        roundtrip(r"\p{X=Y}");
562
        roundtrip(r"\P{X=Y}");
563
        roundtrip(r"\p{X:Y}");
564
        roundtrip(r"\P{X:Y}");
565
        roundtrip(r"\p{X!=Y}");
566
        roundtrip(r"\P{X!=Y}");
567
    }
568
}