/rust/registry/src/index.crates.io-6f17d22bba15001f/pest-2.7.15/src/pratt_parser.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // pest. The Elegant Parser |
2 | | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 |
5 | | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. All files in the project carrying such notice may not be copied, |
8 | | // modified, or distributed except according to those terms. |
9 | | |
10 | | //! Constructs useful in prefix, postfix, and infix operator parsing with the |
11 | | //! Pratt parsing method. |
12 | | |
13 | | use core::iter::Peekable; |
14 | | use core::marker::PhantomData; |
15 | | use core::ops::BitOr; |
16 | | |
17 | | use alloc::boxed::Box; |
18 | | use alloc::collections::BTreeMap; |
19 | | |
20 | | use crate::iterators::Pair; |
21 | | use crate::RuleType; |
22 | | |
23 | | /// Associativity of an infix binary operator, used by [`Op::infix(Assoc)`]. |
24 | | /// |
25 | | /// [`Op::infix(Assoc)`]: struct.Op.html |
26 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
27 | | pub enum Assoc { |
28 | | /// Left operator associativity. Evaluate expressions from left-to-right. |
29 | | Left, |
30 | | /// Right operator associativity. Evaluate expressions from right-to-left. |
31 | | Right, |
32 | | } |
33 | | |
34 | | type Prec = u32; |
35 | | const PREC_STEP: Prec = 10; |
36 | | |
37 | | /// An operator that corresponds to a rule. |
38 | | pub struct Op<R: RuleType> { |
39 | | rule: R, |
40 | | affix: Affix, |
41 | | next: Option<Box<Op<R>>>, |
42 | | } |
43 | | |
44 | | enum Affix { |
45 | | Prefix, |
46 | | Postfix, |
47 | | Infix(Assoc), |
48 | | } |
49 | | |
50 | | impl<R: RuleType> Op<R> { |
51 | | /// Defines `rule` as a prefix unary operator. |
52 | 0 | pub fn prefix(rule: R) -> Self { |
53 | 0 | Self { |
54 | 0 | rule, |
55 | 0 | affix: Affix::Prefix, |
56 | 0 | next: None, |
57 | 0 | } |
58 | 0 | } |
59 | | |
60 | | /// Defines `rule` as a postfix unary operator. |
61 | 0 | pub fn postfix(rule: R) -> Self { |
62 | 0 | Self { |
63 | 0 | rule, |
64 | 0 | affix: Affix::Postfix, |
65 | 0 | next: None, |
66 | 0 | } |
67 | 0 | } |
68 | | |
69 | | /// Defines `rule` as an infix binary operator with associativity `assoc`. |
70 | 0 | pub fn infix(rule: R, assoc: Assoc) -> Self { |
71 | 0 | Self { |
72 | 0 | rule, |
73 | 0 | affix: Affix::Infix(assoc), |
74 | 0 | next: None, |
75 | 0 | } |
76 | 0 | } |
77 | | } |
78 | | |
79 | | impl<R: RuleType> BitOr for Op<R> { |
80 | | type Output = Self; |
81 | | |
82 | 0 | fn bitor(mut self, rhs: Self) -> Self { |
83 | 0 | fn assign_next<R: RuleType>(op: &mut Op<R>, next: Op<R>) { |
84 | 0 | if let Some(ref mut child) = op.next { |
85 | 0 | assign_next(child, next); |
86 | 0 | } else { |
87 | 0 | op.next = Some(Box::new(next)); |
88 | 0 | } |
89 | 0 | } |
90 | | |
91 | 0 | assign_next(&mut self, rhs); |
92 | 0 | self |
93 | 0 | } |
94 | | } |
95 | | |
96 | | /// Struct containing operators and precedences, which can perform [Pratt parsing][1] on |
97 | | /// primary, prefix, postfix and infix expressions over [`Pairs`]. The tokens in [`Pairs`] |
98 | | /// should alternate in the order: |
99 | | /// `prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix*)*` |
100 | | /// |
101 | | /// # Panics |
102 | | /// |
103 | | /// Panics will occur when: |
104 | | /// * `pairs` is empty |
105 | | /// * The tokens in `pairs` does not alternate in the expected order. |
106 | | /// * No `map_*` function is specified for a certain kind of operator encountered in `pairs`. |
107 | | /// |
108 | | /// # Example |
109 | | /// |
110 | | /// The following pest grammar defines a calculator which can be used for Pratt parsing. |
111 | | /// |
112 | | /// ```pest |
113 | | /// WHITESPACE = _{ " " | "\t" | NEWLINE } |
114 | | /// |
115 | | /// program = { SOI ~ expr ~ EOI } |
116 | | /// expr = { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* } |
117 | | /// infix = _{ add | sub | mul | div | pow } |
118 | | /// add = { "+" } // Addition |
119 | | /// sub = { "-" } // Subtraction |
120 | | /// mul = { "*" } // Multiplication |
121 | | /// div = { "/" } // Division |
122 | | /// pow = { "^" } // Exponentiation |
123 | | /// prefix = _{ neg } |
124 | | /// neg = { "-" } // Negation |
125 | | /// postfix = _{ fac } |
126 | | /// fac = { "!" } // Factorial |
127 | | /// primary = _{ int | "(" ~ expr ~ ")" } |
128 | | /// int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) } |
129 | | /// ``` |
130 | | /// |
131 | | /// Below is a [`PrattParser`] that is able to parse an `expr` in the above grammar. The order |
132 | | /// of precedence corresponds to the order in which [`op`] is called. Thus, `mul` will |
133 | | /// have higher precedence than `add`. Operators can also be chained with `|` to give them equal |
134 | | /// precedence. |
135 | | /// |
136 | | /// ``` |
137 | | /// # use pest::pratt_parser::{Assoc, Op, PrattParser}; |
138 | | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
139 | | /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } |
140 | | /// let pratt = |
141 | | /// PrattParser::new() |
142 | | /// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) |
143 | | /// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) |
144 | | /// .op(Op::infix(Rule::pow, Assoc::Right)) |
145 | | /// .op(Op::prefix(Rule::neg)) |
146 | | /// .op(Op::postfix(Rule::fac)); |
147 | | /// ``` |
148 | | /// |
149 | | /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`], |
150 | | /// [`map_infix`] and [`parse`] methods as follows: |
151 | | /// |
152 | | /// ``` |
153 | | /// # use pest::{iterators::Pairs, pratt_parser::PrattParser}; |
154 | | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
155 | | /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } |
156 | | /// fn parse_expr(pairs: Pairs<Rule>, pratt: &PrattParser<Rule>) -> i32 { |
157 | | /// pratt |
158 | | /// .map_primary(|primary| match primary.as_rule() { |
159 | | /// Rule::int => primary.as_str().parse().unwrap(), |
160 | | /// Rule::expr => parse_expr(primary.into_inner(), pratt), // from "(" ~ expr ~ ")" |
161 | | /// _ => unreachable!(), |
162 | | /// }) |
163 | | /// .map_prefix(|op, rhs| match op.as_rule() { |
164 | | /// Rule::neg => -rhs, |
165 | | /// _ => unreachable!(), |
166 | | /// }) |
167 | | /// .map_postfix(|lhs, op| match op.as_rule() { |
168 | | /// Rule::fac => (1..lhs+1).product(), |
169 | | /// _ => unreachable!(), |
170 | | /// }) |
171 | | /// .map_infix(|lhs, op, rhs| match op.as_rule() { |
172 | | /// Rule::add => lhs + rhs, |
173 | | /// Rule::sub => lhs - rhs, |
174 | | /// Rule::mul => lhs * rhs, |
175 | | /// Rule::div => lhs / rhs, |
176 | | /// Rule::pow => (1..rhs+1).map(|_| lhs).product(), |
177 | | /// _ => unreachable!(), |
178 | | /// }) |
179 | | /// .parse(pairs) |
180 | | /// } |
181 | | /// ``` |
182 | | /// |
183 | | /// Note that [`map_prefix`], [`map_postfix`] and [`map_infix`] only need to be specified if the |
184 | | /// grammar contains the corresponding operators. |
185 | | /// |
186 | | /// [1]: https://en.wikipedia.org/wiki/Pratt_parser |
187 | | /// [`Pairs`]: ../iterators/struct.Pairs.html |
188 | | /// [`PrattParser`]: struct.PrattParser.html |
189 | | /// [`map_primary`]: struct.PrattParser.html#method.map_primary |
190 | | /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix |
191 | | /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix |
192 | | /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix |
193 | | /// [`parse`]: struct.PrattParserMap.html#method.parse |
194 | | /// [`op`]: struct.PrattParserMap.html#method.op |
195 | | pub struct PrattParser<R: RuleType> { |
196 | | prec: Prec, |
197 | | ops: BTreeMap<R, (Affix, Prec)>, |
198 | | has_prefix: bool, |
199 | | has_postfix: bool, |
200 | | has_infix: bool, |
201 | | } |
202 | | |
203 | | impl<R: RuleType> Default for PrattParser<R> { |
204 | 0 | fn default() -> Self { |
205 | 0 | Self::new() |
206 | 0 | } |
207 | | } |
208 | | |
209 | | impl<R: RuleType> PrattParser<R> { |
210 | | /// Instantiate a new `PrattParser`. |
211 | 0 | pub fn new() -> Self { |
212 | 0 | Self { |
213 | 0 | prec: PREC_STEP, |
214 | 0 | ops: BTreeMap::new(), |
215 | 0 | has_prefix: false, |
216 | 0 | has_postfix: false, |
217 | 0 | has_infix: false, |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | | /// Add `op` to `PrattParser`. |
222 | 0 | pub fn op(mut self, op: Op<R>) -> Self { |
223 | 0 | self.prec += PREC_STEP; |
224 | 0 | let mut iter = Some(op); |
225 | 0 | while let Some(Op { rule, affix, next }) = iter.take() { |
226 | 0 | match affix { |
227 | 0 | Affix::Prefix => self.has_prefix = true, |
228 | 0 | Affix::Postfix => self.has_postfix = true, |
229 | 0 | Affix::Infix(_) => self.has_infix = true, |
230 | | } |
231 | 0 | self.ops.insert(rule, (affix, self.prec)); |
232 | 0 | iter = next.map(|op| *op); |
233 | | } |
234 | 0 | self |
235 | 0 | } |
236 | | |
237 | | /// Maps primary expressions with a closure `primary`. |
238 | 0 | pub fn map_primary<'pratt, 'a, 'i, X, T>( |
239 | 0 | &'pratt self, |
240 | 0 | primary: X, |
241 | 0 | ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T> |
242 | 0 | where |
243 | 0 | X: FnMut(Pair<'i, R>) -> T, |
244 | 0 | R: 'pratt, |
245 | 0 | { |
246 | 0 | PrattParserMap { |
247 | 0 | pratt: self, |
248 | 0 | primary, |
249 | 0 | prefix: None, |
250 | 0 | postfix: None, |
251 | 0 | infix: None, |
252 | 0 | phantom: PhantomData, |
253 | 0 | } |
254 | 0 | } |
255 | | } |
256 | | |
257 | | type PrefixFn<'a, 'i, R, T> = Box<dyn FnMut(Pair<'i, R>, T) -> T + 'a>; |
258 | | type PostfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>) -> T + 'a>; |
259 | | type InfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>, T) -> T + 'a>; |
260 | | |
261 | | /// Product of calling [`map_primary`] on [`PrattParser`], defines how expressions should |
262 | | /// be mapped. |
263 | | /// |
264 | | /// [`map_primary`]: struct.PrattParser.html#method.map_primary |
265 | | /// [`PrattParser`]: struct.PrattParser.html |
266 | | pub struct PrattParserMap<'pratt, 'a, 'i, R, F, T> |
267 | | where |
268 | | R: RuleType, |
269 | | F: FnMut(Pair<'i, R>) -> T, |
270 | | { |
271 | | pratt: &'pratt PrattParser<R>, |
272 | | primary: F, |
273 | | prefix: Option<PrefixFn<'a, 'i, R, T>>, |
274 | | postfix: Option<PostfixFn<'a, 'i, R, T>>, |
275 | | infix: Option<InfixFn<'a, 'i, R, T>>, |
276 | | phantom: PhantomData<T>, |
277 | | } |
278 | | |
279 | | impl<'pratt, 'a, 'i, R, F, T> PrattParserMap<'pratt, 'a, 'i, R, F, T> |
280 | | where |
281 | | R: RuleType + 'pratt, |
282 | | F: FnMut(Pair<'i, R>) -> T, |
283 | | { |
284 | | /// Maps prefix operators with closure `prefix`. |
285 | 0 | pub fn map_prefix<X>(mut self, prefix: X) -> Self |
286 | 0 | where |
287 | 0 | X: FnMut(Pair<'i, R>, T) -> T + 'a, |
288 | 0 | { |
289 | 0 | self.prefix = Some(Box::new(prefix)); |
290 | 0 | self |
291 | 0 | } |
292 | | |
293 | | /// Maps postfix operators with closure `postfix`. |
294 | 0 | pub fn map_postfix<X>(mut self, postfix: X) -> Self |
295 | 0 | where |
296 | 0 | X: FnMut(T, Pair<'i, R>) -> T + 'a, |
297 | 0 | { |
298 | 0 | self.postfix = Some(Box::new(postfix)); |
299 | 0 | self |
300 | 0 | } |
301 | | |
302 | | /// Maps infix operators with a closure `infix`. |
303 | 0 | pub fn map_infix<X>(mut self, infix: X) -> Self |
304 | 0 | where |
305 | 0 | X: FnMut(T, Pair<'i, R>, T) -> T + 'a, |
306 | 0 | { |
307 | 0 | self.infix = Some(Box::new(infix)); |
308 | 0 | self |
309 | 0 | } |
310 | | |
311 | | /// The last method to call on the provided pairs to execute the Pratt |
312 | | /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`], |
313 | | /// and [`map_infix`] methods). |
314 | | /// |
315 | | /// [`map_primary`]: struct.PrattParser.html#method.map_primary |
316 | | /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix |
317 | | /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix |
318 | | /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix |
319 | 0 | pub fn parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T { |
320 | 0 | self.expr(&mut pairs.peekable(), 0) |
321 | 0 | } |
322 | | |
323 | 0 | fn expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T { |
324 | 0 | let mut lhs = self.nud(pairs); |
325 | 0 | while rbp < self.lbp(pairs) { |
326 | 0 | lhs = self.led(pairs, lhs); |
327 | 0 | } |
328 | 0 | lhs |
329 | 0 | } |
330 | | |
331 | | /// Null-Denotation |
332 | | /// |
333 | | /// "the action that should happen when the symbol is encountered |
334 | | /// as start of an expression (most notably, prefix operators) |
335 | 0 | fn nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T { |
336 | 0 | let pair = pairs.next().expect("Pratt parsing expects non-empty Pairs"); |
337 | 0 | match self.pratt.ops.get(&pair.as_rule()) { |
338 | 0 | Some((Affix::Prefix, prec)) => { |
339 | 0 | let rhs = self.expr(pairs, *prec - 1); |
340 | 0 | match self.prefix.as_mut() { |
341 | 0 | Some(prefix) => prefix(pair, rhs), |
342 | 0 | None => panic!("Could not map {}, no `.map_prefix(...)` specified", pair), |
343 | | } |
344 | | } |
345 | 0 | None => (self.primary)(pair), |
346 | 0 | _ => panic!("Expected prefix or primary expression, found {}", pair), |
347 | | } |
348 | 0 | } |
349 | | |
350 | | /// Left-Denotation |
351 | | /// |
352 | | /// "the action that should happen when the symbol is encountered |
353 | | /// after the start of an expression (most notably, infix and postfix operators)" |
354 | 0 | fn led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T { |
355 | 0 | let pair = pairs.next().unwrap(); |
356 | 0 | match self.pratt.ops.get(&pair.as_rule()) { |
357 | 0 | Some((Affix::Infix(assoc), prec)) => { |
358 | 0 | let rhs = match *assoc { |
359 | 0 | Assoc::Left => self.expr(pairs, *prec), |
360 | 0 | Assoc::Right => self.expr(pairs, *prec - 1), |
361 | | }; |
362 | 0 | match self.infix.as_mut() { |
363 | 0 | Some(infix) => infix(lhs, pair, rhs), |
364 | 0 | None => panic!("Could not map {}, no `.map_infix(...)` specified", pair), |
365 | | } |
366 | | } |
367 | 0 | Some((Affix::Postfix, _)) => match self.postfix.as_mut() { |
368 | 0 | Some(postfix) => postfix(lhs, pair), |
369 | 0 | None => panic!("Could not map {}, no `.map_postfix(...)` specified", pair), |
370 | | }, |
371 | 0 | _ => panic!("Expected postfix or infix expression, found {}", pair), |
372 | | } |
373 | 0 | } |
374 | | |
375 | | /// Left-Binding-Power |
376 | | /// |
377 | | /// "describes the symbol's precedence in infix form (most notably, operator precedence)" |
378 | 0 | fn lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec { |
379 | 0 | match pairs.peek() { |
380 | 0 | Some(pair) => match self.pratt.ops.get(&pair.as_rule()) { |
381 | 0 | Some((_, prec)) => *prec, |
382 | 0 | None => panic!("Expected operator, found {}", pair), |
383 | | }, |
384 | 0 | None => 0, |
385 | | } |
386 | 0 | } |
387 | | } |