Coverage Report

Created: 2026-02-14 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pest/meta/src/optimizer/mod.rs
Line
Count
Source
1
// pest. The Elegant Parser
2
// Copyright (c) 2018 DragoČ™ Tiselice
3
//
4
// Licensed under the Apache License, Version 2.0
5
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. All files in the project carrying such notice may not be copied,
8
// modified, or distributed except according to those terms.
9
10
//! Different optimizations for pest's ASTs.
11
12
use crate::ast::*;
13
use std::collections::HashMap;
14
15
#[cfg(test)]
16
macro_rules! box_tree {
17
    ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18
      $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19
    );
20
    ($expr:expr) => ($expr);
21
}
22
23
mod concatenator;
24
mod factorizer;
25
mod lister;
26
mod restorer;
27
mod rotator;
28
mod skipper;
29
mod unroller;
30
31
/// Takes pest's ASTs and optimizes them
32
0
pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33
0
    let map = to_hash_map(&rules);
34
0
    let optimized: Vec<OptimizedRule> = rules
35
0
        .into_iter()
36
0
        .map(rotator::rotate)
37
0
        .map(|rule| skipper::skip(rule, &map))
38
0
        .map(unroller::unroll)
39
0
        .map(concatenator::concatenate)
40
0
        .map(factorizer::factor)
41
0
        .map(lister::list)
42
0
        .map(rule_to_optimized_rule)
43
0
        .collect();
44
45
0
    let optimized_map = to_optimized_hash_map(&optimized);
46
0
    optimized
47
0
        .into_iter()
48
0
        .map(|rule| restorer::restore_on_err(rule, &optimized_map))
49
0
        .collect()
50
0
}
51
52
0
fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
53
0
    fn to_optimized(expr: Expr) -> OptimizedExpr {
54
0
        match expr {
55
0
            Expr::Str(string) => OptimizedExpr::Str(string),
56
0
            Expr::Insens(string) => OptimizedExpr::Insens(string),
57
0
            Expr::Range(start, end) => OptimizedExpr::Range(start, end),
58
0
            Expr::Ident(ident) => OptimizedExpr::Ident(ident),
59
0
            Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
60
0
            Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
61
0
            Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
62
0
            Expr::Seq(lhs, rhs) => {
63
0
                OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
64
            }
65
0
            Expr::Choice(lhs, rhs) => {
66
0
                OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
67
            }
68
0
            Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
69
0
            Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
70
0
            Expr::Skip(strings) => OptimizedExpr::Skip(strings),
71
0
            Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
72
            #[cfg(feature = "grammar-extras")]
73
            Expr::PushLiteral(string) => OptimizedExpr::PushLiteral(string),
74
            #[cfg(feature = "grammar-extras")]
75
            Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
76
            #[cfg(feature = "grammar-extras")]
77
            Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
78
            #[cfg(not(feature = "grammar-extras"))]
79
0
            Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
80
            Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
81
0
                unreachable!("No valid transformation to OptimizedRule")
82
            }
83
        }
84
0
    }
85
86
0
    OptimizedRule {
87
0
        name: rule.name,
88
0
        ty: rule.ty,
89
0
        expr: to_optimized(rule.expr),
90
0
    }
91
0
}
92
93
macro_rules! to_hash_map {
94
    ($func_name:ident, $rule:ty, $expr:ty) => {
95
0
        fn $func_name(rules: &[$rule]) -> HashMap<String, $expr> {
96
0
            rules
97
0
                .iter()
98
0
                .map(|r| (r.name.clone(), r.expr.clone()))
Unexecuted instantiation: pest_meta::optimizer::to_hash_map::{closure#0}
Unexecuted instantiation: pest_meta::optimizer::to_optimized_hash_map::{closure#0}
99
0
                .collect()
100
0
        }
Unexecuted instantiation: pest_meta::optimizer::to_hash_map
Unexecuted instantiation: pest_meta::optimizer::to_optimized_hash_map
101
    };
102
}
103
to_hash_map!(to_hash_map, Rule, Expr);
104
to_hash_map!(to_optimized_hash_map, OptimizedRule, OptimizedExpr);
105
106
/// The optimized version of the pest AST's `Rule`.
107
#[derive(Clone, Debug, Eq, PartialEq)]
108
pub struct OptimizedRule {
109
    /// The name of the rule.
110
    pub name: String,
111
    /// The type of the rule.
112
    pub ty: RuleType,
113
    /// The optimized expression of the rule.
114
    pub expr: OptimizedExpr,
115
}
116
117
/// The optimized version of the pest AST's `Expr`.
118
///
119
/// # Warning: Semantic Versioning
120
/// There may be non-breaking changes to the meta-grammar
121
/// between minor versions. Those non-breaking changes, however,
122
/// may translate into semver-breaking changes due to the additional variants
123
/// propagated from the `Rule` enum. This is a known issue and will be fixed in the
124
/// future (e.g. by increasing MSRV and non_exhaustive annotations).
125
#[derive(Clone, Debug, Eq, PartialEq)]
126
pub enum OptimizedExpr {
127
    /// Matches an exact string, e.g. `"a"`
128
    Str(String),
129
    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
130
    Insens(String),
131
    /// Matches one character in the range, e.g. `'a'..'z'`
132
    Range(String, String),
133
    /// Matches the rule with the given name, e.g. `a`
134
    Ident(String),
135
    /// Matches a custom part of the stack, e.g. `PEEK[..]`
136
    PeekSlice(i32, Option<i32>),
137
    /// Positive lookahead; matches expression without making progress, e.g. `&e`
138
    PosPred(Box<OptimizedExpr>),
139
    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
140
    NegPred(Box<OptimizedExpr>),
141
    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
142
    Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
143
    /// Matches either of two expressions, e.g. `e1 | e2`
144
    Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
145
    /// Optionally matches an expression, e.g. `e?`
146
    Opt(Box<OptimizedExpr>),
147
    /// Matches an expression zero or more times, e.g. `e*`
148
    Rep(Box<OptimizedExpr>),
149
    /// Matches an expression one or more times, e.g. `e+`
150
    #[cfg(feature = "grammar-extras")]
151
    RepOnce(Box<OptimizedExpr>),
152
    /// Continues to match expressions until one of the strings in the `Vec` is found
153
    Skip(Vec<String>),
154
    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
155
    Push(Box<OptimizedExpr>),
156
    /// Pushes a literal string to the stack, e.g. `push_literal("a")`
157
    #[cfg(feature = "grammar-extras")]
158
    PushLiteral(String),
159
    /// Matches an expression and assigns a label to it, e.g. #label = exp
160
    #[cfg(feature = "grammar-extras")]
161
    NodeTag(Box<OptimizedExpr>, String),
162
    /// Restores an expression's checkpoint
163
    RestoreOnErr(Box<OptimizedExpr>),
164
}
165
166
impl OptimizedExpr {
167
    /// Returns a top-down iterator over the `OptimizedExpr`.
168
0
    pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
169
0
        OptimizedExprTopDownIterator::new(self)
170
0
    }
171
172
    /// Applies `f` to the `OptimizedExpr` top-down.
173
    pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
174
    where
175
        F: FnMut(OptimizedExpr) -> OptimizedExpr,
176
    {
177
        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
178
        where
179
            F: FnMut(OptimizedExpr) -> OptimizedExpr,
180
        {
181
            let expr = f(expr);
182
183
            match expr {
184
                OptimizedExpr::PosPred(expr) => {
185
                    let mapped = Box::new(map_internal(*expr, f));
186
                    OptimizedExpr::PosPred(mapped)
187
                }
188
                OptimizedExpr::NegPred(expr) => {
189
                    let mapped = Box::new(map_internal(*expr, f));
190
                    OptimizedExpr::NegPred(mapped)
191
                }
192
                OptimizedExpr::Seq(lhs, rhs) => {
193
                    let mapped_lhs = Box::new(map_internal(*lhs, f));
194
                    let mapped_rhs = Box::new(map_internal(*rhs, f));
195
                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
196
                }
197
                OptimizedExpr::Choice(lhs, rhs) => {
198
                    let mapped_lhs = Box::new(map_internal(*lhs, f));
199
                    let mapped_rhs = Box::new(map_internal(*rhs, f));
200
                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
201
                }
202
                OptimizedExpr::Rep(expr) => {
203
                    let mapped = Box::new(map_internal(*expr, f));
204
                    OptimizedExpr::Rep(mapped)
205
                }
206
                OptimizedExpr::Opt(expr) => {
207
                    let mapped = Box::new(map_internal(*expr, f));
208
                    OptimizedExpr::Opt(mapped)
209
                }
210
                OptimizedExpr::Push(expr) => {
211
                    let mapped = Box::new(map_internal(*expr, f));
212
                    OptimizedExpr::Push(mapped)
213
                }
214
                expr => expr,
215
            }
216
        }
217
218
        map_internal(self, &mut f)
219
    }
220
221
    /// Applies `f` to the `OptimizedExpr` bottom-up.
222
0
    pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
223
0
    where
224
0
        F: FnMut(OptimizedExpr) -> OptimizedExpr,
225
    {
226
0
        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
227
0
        where
228
0
            F: FnMut(OptimizedExpr) -> OptimizedExpr,
229
        {
230
0
            let mapped = match expr {
231
0
                OptimizedExpr::PosPred(expr) => {
232
0
                    let mapped = Box::new(map_internal(*expr, f));
233
0
                    OptimizedExpr::PosPred(mapped)
234
                }
235
0
                OptimizedExpr::NegPred(expr) => {
236
0
                    let mapped = Box::new(map_internal(*expr, f));
237
0
                    OptimizedExpr::NegPred(mapped)
238
                }
239
0
                OptimizedExpr::Seq(lhs, rhs) => {
240
0
                    let mapped_lhs = Box::new(map_internal(*lhs, f));
241
0
                    let mapped_rhs = Box::new(map_internal(*rhs, f));
242
0
                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
243
                }
244
0
                OptimizedExpr::Choice(lhs, rhs) => {
245
0
                    let mapped_lhs = Box::new(map_internal(*lhs, f));
246
0
                    let mapped_rhs = Box::new(map_internal(*rhs, f));
247
0
                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
248
                }
249
0
                OptimizedExpr::Rep(expr) => {
250
0
                    let mapped = Box::new(map_internal(*expr, f));
251
0
                    OptimizedExpr::Rep(mapped)
252
                }
253
0
                OptimizedExpr::Opt(expr) => {
254
0
                    let mapped = Box::new(map_internal(*expr, f));
255
0
                    OptimizedExpr::Opt(mapped)
256
                }
257
0
                OptimizedExpr::Push(expr) => {
258
0
                    let mapped = Box::new(map_internal(*expr, f));
259
0
                    OptimizedExpr::Push(mapped)
260
                }
261
0
                expr => expr,
262
            };
263
264
0
            f(mapped)
265
0
        }
266
267
0
        map_internal(self, &mut f)
268
0
    }
269
}
270
271
impl core::fmt::Display for OptimizedExpr {
272
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
273
0
        match self {
274
0
            OptimizedExpr::Str(s) => write!(f, "{:?}", s),
275
0
            OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
276
0
            OptimizedExpr::Range(start, end) => {
277
0
                let start = start.chars().next().expect("Empty range start.");
278
0
                let end = end.chars().next().expect("Empty range end.");
279
0
                write!(f, "({:?}..{:?})", start, end)
280
            }
281
0
            OptimizedExpr::Ident(id) => write!(f, "{}", id),
282
0
            OptimizedExpr::PeekSlice(start, end) => match end {
283
0
                Some(end) => write!(f, "PEEK[{}..{}]", start, end),
284
0
                None => write!(f, "PEEK[{}..]", start),
285
            },
286
0
            OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
287
0
            OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
288
0
            OptimizedExpr::Seq(lhs, rhs) => {
289
0
                let mut nodes = Vec::new();
290
0
                nodes.push(lhs);
291
0
                let mut current = rhs;
292
0
                while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
293
0
                    nodes.push(lhs);
294
0
                    current = rhs;
295
0
                }
296
0
                nodes.push(current);
297
0
                let sequence = nodes
298
0
                    .iter()
299
0
                    .map(|node| format!("{}", node))
300
0
                    .collect::<Vec<_>>()
301
0
                    .join(" ~ ");
302
0
                write!(f, "({})", sequence)
303
            }
304
0
            OptimizedExpr::Choice(lhs, rhs) => {
305
0
                let mut nodes = Vec::new();
306
0
                nodes.push(lhs);
307
0
                let mut current = rhs;
308
0
                while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
309
0
                    nodes.push(lhs);
310
0
                    current = rhs;
311
0
                }
312
0
                nodes.push(current);
313
0
                let sequence = nodes
314
0
                    .iter()
315
0
                    .map(|node| format!("{}", node))
316
0
                    .collect::<Vec<_>>()
317
0
                    .join(" | ");
318
0
                write!(f, "({})", sequence)
319
            }
320
0
            OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
321
0
            OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
322
            #[cfg(feature = "grammar-extras")]
323
            OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
324
0
            OptimizedExpr::Skip(strings) => {
325
0
                let strings = strings
326
0
                    .iter()
327
0
                    .map(|s| format!("{:?}", s))
328
0
                    .collect::<Vec<_>>()
329
0
                    .join(" | ");
330
0
                write!(f, "(!({}) ~ ANY)*", strings)
331
            }
332
0
            OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
333
            #[cfg(feature = "grammar-extras")]
334
            OptimizedExpr::PushLiteral(s) => write!(f, "PUSH_LITERAL({:?})", s),
335
            #[cfg(feature = "grammar-extras")]
336
            OptimizedExpr::NodeTag(expr, tag) => {
337
                write!(f, "(#{} = {})", tag, expr)
338
            }
339
0
            OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
340
        }
341
0
    }
342
}
343
344
/// A top-down iterator over an `OptimizedExpr`.
345
pub struct OptimizedExprTopDownIterator {
346
    current: Option<OptimizedExpr>,
347
    next: Option<OptimizedExpr>,
348
    right_branches: Vec<OptimizedExpr>,
349
}
350
351
impl OptimizedExprTopDownIterator {
352
    /// Creates a new top down iterator from an `OptimizedExpr`.
353
0
    pub fn new(expr: &OptimizedExpr) -> Self {
354
0
        let mut iter = OptimizedExprTopDownIterator {
355
0
            current: None,
356
0
            next: None,
357
0
            right_branches: vec![],
358
0
        };
359
0
        iter.iterate_expr(expr.clone());
360
0
        iter
361
0
    }
362
363
0
    fn iterate_expr(&mut self, expr: OptimizedExpr) {
364
0
        self.current = Some(expr.clone());
365
0
        match expr {
366
0
            OptimizedExpr::Seq(lhs, rhs) => {
367
0
                self.right_branches.push(*rhs);
368
0
                self.next = Some(*lhs);
369
0
            }
370
0
            OptimizedExpr::Choice(lhs, rhs) => {
371
0
                self.right_branches.push(*rhs);
372
0
                self.next = Some(*lhs);
373
0
            }
374
0
            OptimizedExpr::PosPred(expr)
375
0
            | OptimizedExpr::NegPred(expr)
376
0
            | OptimizedExpr::Rep(expr)
377
0
            | OptimizedExpr::Opt(expr)
378
0
            | OptimizedExpr::Push(expr) => {
379
0
                self.next = Some(*expr);
380
0
            }
381
0
            _ => {
382
0
                self.next = None;
383
0
            }
384
        }
385
0
    }
386
}
387
388
impl Iterator for OptimizedExprTopDownIterator {
389
    type Item = OptimizedExpr;
390
391
0
    fn next(&mut self) -> Option<Self::Item> {
392
0
        let result = self.current.take();
393
394
0
        if let Some(expr) = self.next.take() {
395
0
            self.iterate_expr(expr);
396
0
        } else if let Some(expr) = self.right_branches.pop() {
397
0
            self.iterate_expr(expr);
398
0
        }
399
400
0
        result
401
0
    }
402
}
403
404
#[cfg(test)]
405
mod tests {
406
    use super::*;
407
408
    #[test]
409
    fn rotate() {
410
        let rules = {
411
            use crate::ast::Expr::*;
412
            vec![Rule {
413
                name: "rule".to_owned(),
414
                ty: RuleType::Normal,
415
                expr: box_tree!(Choice(
416
                    Choice(
417
                        Choice(Str(String::from("a")), Str(String::from("b"))),
418
                        Str(String::from("c"))
419
                    ),
420
                    Str(String::from("d"))
421
                )),
422
            }]
423
        };
424
        let rotated = {
425
            use crate::optimizer::OptimizedExpr::*;
426
            vec![OptimizedRule {
427
                name: "rule".to_owned(),
428
                ty: RuleType::Normal,
429
                expr: box_tree!(Choice(
430
                    Str(String::from("a")),
431
                    Choice(
432
                        Str(String::from("b")),
433
                        Choice(Str(String::from("c")), Str(String::from("d")))
434
                    )
435
                )),
436
            }]
437
        };
438
439
        assert_eq!(optimize(rules), rotated);
440
    }
441
442
    #[test]
443
    fn skip() {
444
        let rules = {
445
            use crate::ast::Expr::*;
446
            vec![Rule {
447
                name: "rule".to_owned(),
448
                ty: RuleType::Atomic,
449
                expr: box_tree!(Rep(Seq(
450
                    NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
451
                    Ident(String::from("ANY"))
452
                ))),
453
            }]
454
        };
455
        let skipped = vec![OptimizedRule {
456
            name: "rule".to_owned(),
457
            ty: RuleType::Atomic,
458
            expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
459
        }];
460
461
        assert_eq!(optimize(rules), skipped);
462
    }
463
464
    #[test]
465
    fn concat_strings() {
466
        let rules = {
467
            use crate::ast::Expr::*;
468
            vec![Rule {
469
                name: "rule".to_owned(),
470
                ty: RuleType::Atomic,
471
                expr: box_tree!(Seq(
472
                    Seq(Str(String::from("a")), Str(String::from("b"))),
473
                    Seq(Str(String::from("c")), Str(String::from("d")))
474
                )),
475
            }]
476
        };
477
        let concatenated = vec![OptimizedRule {
478
            name: "rule".to_owned(),
479
            ty: RuleType::Atomic,
480
            expr: OptimizedExpr::Str(String::from("abcd")),
481
        }];
482
483
        assert_eq!(optimize(rules), concatenated);
484
    }
485
486
    #[test]
487
    fn unroll_loop_exact() {
488
        let rules = vec![Rule {
489
            name: "rule".to_owned(),
490
            ty: RuleType::Atomic,
491
            expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
492
        }];
493
        let unrolled = {
494
            use crate::optimizer::OptimizedExpr::*;
495
            vec![OptimizedRule {
496
                name: "rule".to_owned(),
497
                ty: RuleType::Atomic,
498
                expr: box_tree!(Seq(
499
                    Ident(String::from("a")),
500
                    Seq(Ident(String::from("a")), Ident(String::from("a")))
501
                )),
502
            }]
503
        };
504
505
        assert_eq!(optimize(rules), unrolled);
506
    }
507
508
    #[test]
509
    fn unroll_loop_max() {
510
        let rules = vec![Rule {
511
            name: "rule".to_owned(),
512
            ty: RuleType::Atomic,
513
            expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
514
        }];
515
        let unrolled = {
516
            use crate::optimizer::OptimizedExpr::*;
517
            vec![OptimizedRule {
518
                name: "rule".to_owned(),
519
                ty: RuleType::Atomic,
520
                expr: box_tree!(Seq(
521
                    Opt(Str(String::from("a"))),
522
                    Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
523
                )),
524
            }]
525
        };
526
527
        assert_eq!(optimize(rules), unrolled);
528
    }
529
530
    #[test]
531
    fn unroll_loop_min() {
532
        let rules = vec![Rule {
533
            name: "rule".to_owned(),
534
            ty: RuleType::Atomic,
535
            expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
536
        }];
537
        let unrolled = {
538
            use crate::optimizer::OptimizedExpr::*;
539
            vec![OptimizedRule {
540
                name: "rule".to_owned(),
541
                ty: RuleType::Atomic,
542
                expr: box_tree!(Seq(
543
                    Str(String::from("a")),
544
                    Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
545
                )),
546
            }]
547
        };
548
549
        assert_eq!(optimize(rules), unrolled);
550
    }
551
552
    #[test]
553
    fn unroll_loop_min_max() {
554
        let rules = vec![Rule {
555
            name: "rule".to_owned(),
556
            ty: RuleType::Atomic,
557
            expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
558
        }];
559
        let unrolled = {
560
            use crate::optimizer::OptimizedExpr::*;
561
            vec![OptimizedRule {
562
                name: "rule".to_owned(),
563
                ty: RuleType::Atomic,
564
                /* TODO possible room for improvement here:
565
                 * if the sequences were rolled out in the opposite
566
                 * order, we could further optimize the strings
567
                 * in cases like this.
568
                Str(String::from(("aa")),
569
                Opt(Str(String::from("a")))
570
                */
571
                expr: box_tree!(Seq(
572
                    Str(String::from("a")),
573
                    Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
574
                )),
575
            }]
576
        };
577
578
        assert_eq!(optimize(rules), unrolled);
579
    }
580
581
    #[test]
582
    fn concat_insensitive_strings() {
583
        let rules = {
584
            use crate::ast::Expr::*;
585
            vec![Rule {
586
                name: "rule".to_owned(),
587
                ty: RuleType::Atomic,
588
                expr: box_tree!(Seq(
589
                    Seq(Insens(String::from("a")), Insens(String::from("b"))),
590
                    Seq(Insens(String::from("c")), Insens(String::from("d")))
591
                )),
592
            }]
593
        };
594
        let concatenated = vec![OptimizedRule {
595
            name: "rule".to_owned(),
596
            ty: RuleType::Atomic,
597
            expr: OptimizedExpr::Insens(String::from("abcd")),
598
        }];
599
600
        assert_eq!(optimize(rules), concatenated);
601
    }
602
603
    #[test]
604
    fn long_common_sequence() {
605
        let rules = {
606
            use crate::ast::Expr::*;
607
            vec![Rule {
608
                name: "rule".to_owned(),
609
                ty: RuleType::Silent,
610
                expr: box_tree!(Choice(
611
                    Seq(
612
                        Ident(String::from("a")),
613
                        Seq(Ident(String::from("b")), Ident(String::from("c")))
614
                    ),
615
                    Seq(
616
                        Seq(Ident(String::from("a")), Ident(String::from("b"))),
617
                        Ident(String::from("d"))
618
                    )
619
                )),
620
            }]
621
        };
622
        let optimized = {
623
            use crate::optimizer::OptimizedExpr::*;
624
            vec![OptimizedRule {
625
                name: "rule".to_owned(),
626
                ty: RuleType::Silent,
627
                expr: box_tree!(Seq(
628
                    Ident(String::from("a")),
629
                    Seq(
630
                        Ident(String::from("b")),
631
                        Choice(Ident(String::from("c")), Ident(String::from("d")))
632
                    )
633
                )),
634
            }]
635
        };
636
637
        assert_eq!(optimize(rules), optimized);
638
    }
639
640
    #[test]
641
    fn short_common_sequence() {
642
        let rules = {
643
            use crate::ast::Expr::*;
644
            vec![Rule {
645
                name: "rule".to_owned(),
646
                ty: RuleType::Atomic,
647
                expr: box_tree!(Choice(
648
                    Seq(Ident(String::from("a")), Ident(String::from("b"))),
649
                    Ident(String::from("a"))
650
                )),
651
            }]
652
        };
653
        let optimized = {
654
            use crate::optimizer::OptimizedExpr::*;
655
            vec![OptimizedRule {
656
                name: "rule".to_owned(),
657
                ty: RuleType::Atomic,
658
                expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
659
            }]
660
        };
661
662
        assert_eq!(optimize(rules), optimized);
663
    }
664
665
    #[test]
666
    fn impossible_common_sequence() {
667
        let rules = {
668
            use crate::ast::Expr::*;
669
            vec![Rule {
670
                name: "rule".to_owned(),
671
                ty: RuleType::Silent,
672
                expr: box_tree!(Choice(
673
                    Ident(String::from("a")),
674
                    Seq(Ident(String::from("a")), Ident(String::from("b")))
675
                )),
676
            }]
677
        };
678
        let optimized = {
679
            use crate::optimizer::OptimizedExpr::*;
680
            vec![OptimizedRule {
681
                name: "rule".to_owned(),
682
                ty: RuleType::Silent,
683
                expr: box_tree!(Ident(String::from("a"))),
684
            }]
685
        };
686
687
        assert_eq!(optimize(rules), optimized);
688
    }
689
690
    #[test]
691
    fn lister() {
692
        let rules = {
693
            use crate::ast::Expr::*;
694
            vec![Rule {
695
                name: "rule".to_owned(),
696
                ty: RuleType::Silent,
697
                expr: box_tree!(Seq(
698
                    Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
699
                    Ident(String::from("a"))
700
                )),
701
            }]
702
        };
703
        let optimized = {
704
            use crate::optimizer::OptimizedExpr::*;
705
            vec![OptimizedRule {
706
                name: "rule".to_owned(),
707
                ty: RuleType::Silent,
708
                expr: box_tree!(Seq(
709
                    Ident(String::from("a")),
710
                    Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
711
                )),
712
            }]
713
        };
714
715
        assert_eq!(optimize(rules), optimized);
716
    }
717
718
    mod display {
719
        use super::super::*;
720
        /// In previous implementation of Display for OptimizedExpr
721
        /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
722
        /// Str("\n") will be displayed as
723
        /// "
724
        /// "
725
        ///
726
        /// It will not break the compilation in normal use.
727
        ///
728
        /// But when I use it in automatically generating documents,
729
        /// it will quite confusing and we'll be unable to distinguish \n and \r.
730
        ///
731
        /// And `cargo expand` will emit codes that can't be compiled,
732
        /// for it expand `#[doc("...")]` to `/// ...`,
733
        /// and when the document comment breaks the line,
734
        /// it will be expanded into wrong codes.
735
        #[test]
736
        fn control_character() {
737
            assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
738
            assert_eq!(
739
                OptimizedExpr::Insens("\n".to_owned()).to_string(),
740
                "^\"\\n\"",
741
            );
742
            assert_eq!(
743
                OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
744
                "('\\n'..'\\r')",
745
            );
746
            assert_eq!(
747
                OptimizedExpr::Skip(vec![
748
                    "\n".to_owned(),
749
                    "\r".to_owned(),
750
                    "\n\r".to_owned(),
751
                    "\0".to_owned(),
752
                ])
753
                .to_string(),
754
                r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
755
            );
756
757
            assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
758
        }
759
760
        #[test]
761
        fn str() {
762
            assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
763
        }
764
765
        #[test]
766
        fn insens() {
767
            assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
768
        }
769
770
        #[test]
771
        fn range() {
772
            assert_eq!(
773
                OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
774
                r#"('a'..'z')"#,
775
            );
776
        }
777
778
        #[test]
779
        fn ident() {
780
            assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
781
        }
782
783
        #[test]
784
        fn peek_slice() {
785
            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
786
            assert_eq!(
787
                OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
788
                "PEEK[0..-1]",
789
            );
790
            assert_eq!(
791
                OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
792
                "PEEK[2..3]",
793
            );
794
            assert_eq!(
795
                OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
796
                "PEEK[2..-1]",
797
            );
798
            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
799
        }
800
801
        #[test]
802
        fn pos_pred() {
803
            assert_eq!(
804
                OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
805
                    OptimizedExpr::Ident("a".to_owned()),
806
                ))))
807
                .to_string(),
808
                "&!a",
809
            );
810
            assert_eq!(
811
                OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
812
                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
813
                        "a".to_owned(),
814
                    )))),
815
                    Box::new(OptimizedExpr::Str("a".to_owned())),
816
                )))
817
                .to_string(),
818
                r#"&(a* | "a")"#,
819
            );
820
            assert_eq!(
821
                OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
822
                    OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
823
                ))))
824
                .to_string(),
825
                "&!a",
826
            );
827
        }
828
829
        #[test]
830
        fn neg_pred() {
831
            assert_eq!(
832
                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
833
                r#"!e"#,
834
            );
835
            assert_eq!(
836
                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
837
                    Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
838
                        "a".to_owned(),
839
                    )))),
840
                    Box::new(OptimizedExpr::Str("a".to_owned())),
841
                )))
842
                .to_string(),
843
                r#"!(PUSH(a) | "a")"#,
844
            );
845
        }
846
847
        #[test]
848
        fn seq() {
849
            assert_eq!(
850
                OptimizedExpr::Seq(
851
                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
852
                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
853
                )
854
                .to_string(),
855
                r#"(e1 ~ e2)"#,
856
            );
857
            assert_eq!(
858
                Expr::Seq(
859
                    Box::new(Expr::Ident("e1".to_owned())),
860
                    Box::new(Expr::Seq(
861
                        Box::new(Expr::Ident("e2".to_owned())),
862
                        Box::new(Expr::Ident("e3".to_owned())),
863
                    )),
864
                )
865
                .to_string(),
866
                "(e1 ~ e2 ~ e3)",
867
            );
868
            assert_eq!(
869
                Expr::Seq(
870
                    Box::new(Expr::Ident("e1".to_owned())),
871
                    Box::new(Expr::Seq(
872
                        Box::new(Expr::Ident("e2".to_owned())),
873
                        Box::new(Expr::Seq(
874
                            Box::new(Expr::Ident("e3".to_owned())),
875
                            Box::new(Expr::Ident("e4".to_owned())),
876
                        )),
877
                    )),
878
                )
879
                .to_string(),
880
                "(e1 ~ e2 ~ e3 ~ e4)",
881
            );
882
            assert_eq!(
883
                Expr::Seq(
884
                    Box::new(Expr::Ident("e1".to_owned())),
885
                    Box::new(Expr::Choice(
886
                        Box::new(Expr::Ident("e2".to_owned())),
887
                        Box::new(Expr::Seq(
888
                            Box::new(Expr::Ident("e3".to_owned())),
889
                            Box::new(Expr::Ident("e4".to_owned())),
890
                        )),
891
                    )),
892
                )
893
                .to_string(),
894
                "(e1 ~ (e2 | (e3 ~ e4)))",
895
            );
896
            assert_eq!(
897
                Expr::Seq(
898
                    Box::new(Expr::Ident("e1".to_owned())),
899
                    Box::new(Expr::Seq(
900
                        Box::new(Expr::Ident("e2".to_owned())),
901
                        Box::new(Expr::Choice(
902
                            Box::new(Expr::Ident("e3".to_owned())),
903
                            Box::new(Expr::Ident("e4".to_owned())),
904
                        )),
905
                    )),
906
                )
907
                .to_string(),
908
                "(e1 ~ e2 ~ (e3 | e4))",
909
            );
910
            assert_eq!(
911
                OptimizedExpr::Seq(
912
                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
913
                        "a".to_owned(),
914
                    )))),
915
                    Box::new(OptimizedExpr::Seq(
916
                        Box::new(OptimizedExpr::Ident("b".to_owned())),
917
                        Box::new(OptimizedExpr::Insens("c".to_owned())),
918
                    )),
919
                )
920
                .to_string(),
921
                r#"("a"* ~ b ~ ^"c")"#,
922
            );
923
            assert_eq!(
924
                OptimizedExpr::Seq(
925
                    Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
926
                        "a".to_owned(),
927
                        "z".to_owned(),
928
                    )))),
929
                    Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
930
                        Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
931
                    )))),
932
                )
933
                .to_string(),
934
                "(&('a'..'z') ~ !('A'..'Z')?)",
935
            );
936
        }
937
938
        #[test]
939
        fn choice() {
940
            assert_eq!(
941
                OptimizedExpr::Choice(
942
                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
943
                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
944
                )
945
                .to_string(),
946
                r#"(e1 | e2)"#,
947
            );
948
            assert_eq!(
949
                Expr::Choice(
950
                    Box::new(Expr::Ident("e1".to_owned())),
951
                    Box::new(Expr::Choice(
952
                        Box::new(Expr::Ident("e2".to_owned())),
953
                        Box::new(Expr::Ident("e3".to_owned())),
954
                    )),
955
                )
956
                .to_string(),
957
                "(e1 | e2 | e3)",
958
            );
959
            assert_eq!(
960
                Expr::Choice(
961
                    Box::new(Expr::Ident("e1".to_owned())),
962
                    Box::new(Expr::Choice(
963
                        Box::new(Expr::Ident("e2".to_owned())),
964
                        Box::new(Expr::Choice(
965
                            Box::new(Expr::Ident("e3".to_owned())),
966
                            Box::new(Expr::Ident("e4".to_owned())),
967
                        )),
968
                    )),
969
                )
970
                .to_string(),
971
                "(e1 | e2 | e3 | e4)",
972
            );
973
            assert_eq!(
974
                Expr::Choice(
975
                    Box::new(Expr::Ident("e1".to_owned())),
976
                    Box::new(Expr::Seq(
977
                        Box::new(Expr::Ident("e2".to_owned())),
978
                        Box::new(Expr::Choice(
979
                            Box::new(Expr::Ident("e3".to_owned())),
980
                            Box::new(Expr::Ident("e4".to_owned())),
981
                        )),
982
                    )),
983
                )
984
                .to_string(),
985
                "(e1 | (e2 ~ (e3 | e4)))",
986
            );
987
            assert_eq!(
988
                OptimizedExpr::Choice(
989
                    Box::new(OptimizedExpr::Str("a".to_owned())),
990
                    Box::new(OptimizedExpr::Choice(
991
                        Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
992
                            "b".to_owned(),
993
                        )))),
994
                        Box::new(OptimizedExpr::Insens("c".to_owned())),
995
                    )),
996
                )
997
                .to_string(),
998
                r#"("a" | PUSH(b) | ^"c")"#,
999
            );
1000
        }
1001
1002
        #[test]
1003
        fn opt() {
1004
            assert_eq!(
1005
                OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1006
                "e?",
1007
            );
1008
        }
1009
1010
        #[test]
1011
        fn rep() {
1012
            assert_eq!(
1013
                OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1014
                "x*",
1015
            );
1016
            assert_eq!(
1017
                OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1018
                    "0".to_owned(),
1019
                    "9".to_owned(),
1020
                )))
1021
                .to_string(),
1022
                "('0'..'9')*",
1023
            );
1024
        }
1025
1026
        #[test]
1027
        #[cfg(feature = "grammar-extras")]
1028
        fn rep_once() {
1029
            assert_eq!(
1030
                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1031
                "e+",
1032
            );
1033
            assert_eq!(
1034
                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1035
                    "0".to_owned(),
1036
                    "9".to_owned(),
1037
                )))
1038
                .to_string(),
1039
                "('0'..'9')+",
1040
            );
1041
        }
1042
1043
        #[test]
1044
        fn skip() {
1045
            assert_eq!(
1046
                OptimizedExpr::Skip(
1047
                    ["a", "bc"]
1048
                        .into_iter()
1049
                        .map(|s| s.to_owned())
1050
                        .collect::<Vec<_>>(),
1051
                )
1052
                .to_string(),
1053
                r#"(!("a" | "bc") ~ ANY)*"#,
1054
            );
1055
        }
1056
1057
        #[test]
1058
        fn inline_skip() {
1059
            use crate::ast::Expr::*;
1060
            let rules = vec![
1061
                Rule {
1062
                    name: "inline".to_owned(),
1063
                    ty: RuleType::Atomic,
1064
                    expr: Str("a".to_owned()),
1065
                },
1066
                Rule {
1067
                    name: "skip".to_owned(),
1068
                    ty: RuleType::Atomic,
1069
                    expr: box_tree!(Rep(Seq(
1070
                        NegPred(Choice(
1071
                            Ident(String::from("inline")),
1072
                            Str(String::from("b"))
1073
                        )),
1074
                        Ident("ANY".to_owned())
1075
                    ))),
1076
                },
1077
            ];
1078
            let map = to_hash_map(&rules);
1079
            let rule = skipper::skip(rules[1].clone(), &map);
1080
            assert!(matches!(rule, Rule { expr: Skip(..), .. }));
1081
            let choices = match rule.expr {
1082
                Skip(choices) => choices,
1083
                _ => unreachable!(),
1084
            };
1085
            assert_eq!(choices, vec!["a".to_owned(), "b".to_owned()]);
1086
        }
1087
1088
        #[test]
1089
        fn push() {
1090
            assert_eq!(
1091
                OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1092
                "PUSH(e)",
1093
            );
1094
        }
1095
1096
        #[test]
1097
        #[cfg(feature = "grammar-extras")]
1098
        fn push_literal() {
1099
            assert_eq!(
1100
                OptimizedExpr::PushLiteral("a \" b".to_owned()).to_string(),
1101
                r#"PUSH_LITERAL("a \" b")"#,
1102
            );
1103
        }
1104
1105
        #[test]
1106
        #[cfg(feature = "grammar-extras")]
1107
        fn node_tag() {
1108
            assert_eq!(
1109
                OptimizedExpr::NodeTag(
1110
                    Box::new(OptimizedExpr::Ident("expr".to_owned())),
1111
                    "label".to_owned(),
1112
                )
1113
                .to_string(),
1114
                r#"(#label = expr)"#,
1115
            );
1116
            assert_eq!(
1117
                OptimizedExpr::NodeTag(
1118
                    Box::new(OptimizedExpr::Ident("x".to_owned())),
1119
                    "X".to_owned(),
1120
                )
1121
                .to_string(),
1122
                r#"(#X = x)"#,
1123
            );
1124
            assert_eq!(
1125
                OptimizedExpr::NodeTag(
1126
                    Box::new(OptimizedExpr::Seq(
1127
                        Box::new(OptimizedExpr::Ident("x".to_owned())),
1128
                        Box::new(OptimizedExpr::Str("y".to_owned())),
1129
                    )),
1130
                    "X".to_owned(),
1131
                )
1132
                .to_string(),
1133
                r#"(#X = (x ~ "y"))"#,
1134
            );
1135
        }
1136
1137
        #[test]
1138
        fn restore_on_err() {
1139
            assert_eq!(
1140
                OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1141
                    .to_string(),
1142
                "e",
1143
            );
1144
        }
1145
    }
1146
}