/src/semver-parser/src/parser.rs
Line | Count | Source |
1 | | // this is only for parsing versions now |
2 | | |
3 | | use std::fmt; |
4 | | use std::mem; |
5 | | |
6 | | use self::Error::*; |
7 | | use crate::lexer::{self, Lexer, Token}; |
8 | | use crate::version::{Identifier, Version}; |
9 | | |
10 | | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] |
11 | | pub enum Error<'input> { |
12 | | /// Needed more tokens for parsing, but none are available. |
13 | | UnexpectedEnd, |
14 | | /// Unexpected token. |
15 | | UnexpectedToken(Token<'input>), |
16 | | /// An error occurred in the lexer. |
17 | | Lexer(lexer::Error), |
18 | | /// More input available. |
19 | | MoreInput(Vec<Token<'input>>), |
20 | | /// Encountered empty predicate in a set of predicates. |
21 | | EmptyPredicate, |
22 | | /// Encountered an empty range. |
23 | | EmptyRange, |
24 | | } |
25 | | |
26 | | impl<'input> From<lexer::Error> for Error<'input> { |
27 | 352 | fn from(value: lexer::Error) -> Self { |
28 | 352 | Error::Lexer(value) |
29 | 352 | } |
30 | | } |
31 | | |
32 | | impl<'input> fmt::Display for Error<'input> { |
33 | 0 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { |
34 | | use self::Error::*; |
35 | | |
36 | 0 | match *self { |
37 | 0 | UnexpectedEnd => write!(fmt, "expected more input"), |
38 | 0 | UnexpectedToken(ref token) => write!(fmt, "encountered unexpected token: {:?}", token), |
39 | 0 | Lexer(ref error) => write!(fmt, "lexer error: {:?}", error), |
40 | 0 | MoreInput(ref tokens) => write!(fmt, "expected end of input, but got: {:?}", tokens), |
41 | 0 | EmptyPredicate => write!(fmt, "encountered empty predicate"), |
42 | 0 | EmptyRange => write!(fmt, "encountered empty range"), |
43 | | } |
44 | 0 | } |
45 | | } |
46 | | |
47 | | /// impl for backwards compatibility. |
48 | | impl<'input> From<Error<'input>> for String { |
49 | 0 | fn from(value: Error<'input>) -> Self { |
50 | 0 | value.to_string() |
51 | 0 | } |
52 | | } |
53 | | |
54 | | /// A recursive-descent parser for parsing version requirements. |
55 | | pub struct Parser<'input> { |
56 | | /// Source of token. |
57 | | lexer: Lexer<'input>, |
58 | | /// Lookaehead. |
59 | | c1: Option<Token<'input>>, |
60 | | } |
61 | | |
62 | | impl<'input> Parser<'input> { |
63 | | /// Construct a new parser for the given input. |
64 | 425 | pub fn new(input: &'input str) -> Result<Parser<'input>, Error<'input>> { |
65 | 425 | let mut lexer = Lexer::new(input); |
66 | | |
67 | 425 | let c1 = if let Some(c1) = lexer.next() { |
68 | 425 | Some(c1?) |
69 | | } else { |
70 | 0 | None |
71 | | }; |
72 | | |
73 | 250 | Ok(Parser { lexer, c1 }) |
74 | 425 | } |
75 | | |
76 | | /// Pop one token. |
77 | | #[inline(always)] |
78 | 2.41M | fn pop(&mut self) -> Result<Token<'input>, Error<'input>> { |
79 | 2.41M | let c1 = if let Some(c1) = self.lexer.next() { |
80 | 2.41M | Some(c1?) |
81 | | } else { |
82 | 22 | None |
83 | | }; |
84 | | |
85 | 2.41M | mem::replace(&mut self.c1, c1).ok_or_else(|| UnexpectedEnd) |
86 | 2.41M | } |
87 | | |
88 | | /// Peek one token. |
89 | | #[inline(always)] |
90 | 2.29M | fn peek(&mut self) -> Option<&Token<'input>> { |
91 | 2.29M | self.c1.as_ref() |
92 | 2.29M | } |
93 | | |
94 | | /// Skip whitespace if present. |
95 | 268 | fn skip_whitespace(&mut self) -> Result<(), Error<'input>> { |
96 | 268 | match self.peek() { |
97 | 13 | Some(&Token::Whitespace(_, _)) => self.pop().map(|_| ()), |
98 | 255 | _ => Ok(()), |
99 | | } |
100 | 268 | } |
101 | | |
102 | | /// Parse a single component. |
103 | | /// |
104 | | /// Returns `None` if the component is a wildcard. |
105 | 0 | pub fn component(&mut self) -> Result<Option<u64>, Error<'input>> { |
106 | 0 | match self.pop()? { |
107 | 0 | Token::Numeric(number) => Ok(Some(number)), |
108 | 0 | ref t if t.is_wildcard() => Ok(None), |
109 | 0 | tok => Err(UnexpectedToken(tok)), |
110 | | } |
111 | 0 | } |
112 | | |
113 | | /// Parse a single numeric. |
114 | 359 | pub fn numeric(&mut self) -> Result<u64, Error<'input>> { |
115 | 359 | match self.pop()? { |
116 | 174 | Token::Numeric(number) => Ok(number), |
117 | 46 | tok => Err(UnexpectedToken(tok)), |
118 | | } |
119 | 359 | } |
120 | | |
121 | | /// Optionally parse a dot, then a component. |
122 | | /// |
123 | | /// The second component of the tuple indicates if a wildcard has been encountered, and is |
124 | | /// always `false` if the first component is `Some`. |
125 | | /// |
126 | | /// If a dot is not encountered, `(None, false)` is returned. |
127 | | /// |
128 | | /// If a wildcard is encountered, `(None, true)` is returned. |
129 | 0 | pub fn dot_component(&mut self) -> Result<(Option<u64>, bool), Error<'input>> { |
130 | 0 | match self.peek() { |
131 | 0 | Some(&Token::Dot) => {} |
132 | 0 | _ => return Ok((None, false)), |
133 | | } |
134 | | |
135 | | // pop the peeked dot. |
136 | 0 | self.pop()?; |
137 | 0 | self.component().map(|n| (n, n.is_none())) |
138 | 0 | } |
139 | | |
140 | | /// Parse a dot, then a numeric. |
141 | 125 | pub fn dot_numeric(&mut self) -> Result<u64, Error<'input>> { |
142 | 125 | match self.pop()? { |
143 | 115 | Token::Dot => {} |
144 | 1 | tok => return Err(UnexpectedToken(tok)), |
145 | | } |
146 | | |
147 | 115 | self.numeric() |
148 | 125 | } |
149 | | |
150 | | /// Parse an string identifier. |
151 | | /// |
152 | | /// Like, `foo`, or `bar`, or `beta-1`. |
153 | 1.08M | pub fn identifier(&mut self) -> Result<Identifier, Error<'input>> { |
154 | 1.08M | self.bounded_identifier(0) |
155 | 1.08M | } |
156 | | |
157 | 1.20M | fn bounded_identifier(&mut self, count: u32) -> Result<Identifier, Error<'input>> { |
158 | 1.20M | if count > 255 { |
159 | 0 | panic!("Cannot have more than 255 identifiers"); |
160 | 1.20M | } |
161 | | |
162 | 1.20M | let identifier = match self.pop()? { |
163 | 296k | Token::AlphaNumeric(identifier) => { |
164 | | // TODO: Borrow? |
165 | 296k | Identifier::AlphaNumeric(identifier.to_string()) |
166 | | } |
167 | 910k | Token::Numeric(n) => Identifier::Numeric(n), |
168 | 10 | tok => return Err(UnexpectedToken(tok)), |
169 | | }; |
170 | | |
171 | 1.20M | if let Some(&Token::Hyphen) = self.peek() { |
172 | | // pop the peeked hyphen |
173 | 120k | self.pop()?; |
174 | | // concat with any following identifiers |
175 | 120k | Ok(identifier |
176 | 120k | .concat("-") |
177 | 120k | .concat(&self.bounded_identifier(count + 1)?.to_string())) |
178 | | } else { |
179 | 1.08M | Ok(identifier) |
180 | | } |
181 | 1.20M | } |
182 | | |
183 | | /// Parse all pre-release identifiers, separated by dots. |
184 | | /// |
185 | | /// Like, `abcdef.1234`. |
186 | 49 | fn pre(&mut self) -> Result<Vec<Identifier>, Error<'input>> { |
187 | 49 | match self.peek() { |
188 | 47 | Some(&Token::Hyphen) => {} |
189 | 2 | _ => return Ok(vec![]), |
190 | | } |
191 | | |
192 | | // pop the peeked hyphen. |
193 | 47 | self.pop()?; |
194 | 47 | self.parts() |
195 | 49 | } |
196 | | |
197 | | /// Parse a dot-separated set of identifiers. |
198 | 58 | fn parts(&mut self) -> Result<Vec<Identifier>, Error<'input>> { |
199 | 58 | let mut parts = Vec::new(); |
200 | | |
201 | 58 | parts.push(self.identifier()?); |
202 | | |
203 | 1.08M | while let Some(&Token::Dot) = self.peek() { |
204 | 1.08M | self.pop()?; |
205 | | |
206 | 1.08M | parts.push(self.identifier()?); |
207 | | } |
208 | | |
209 | 27 | Ok(parts) |
210 | 58 | } |
211 | | |
212 | | /// Parse optional build metadata. |
213 | | /// |
214 | | /// Like, `` (empty), or `+abcdef`. |
215 | 26 | fn plus_build_metadata(&mut self) -> Result<Vec<Identifier>, Error<'input>> { |
216 | 26 | match self.peek() { |
217 | 11 | Some(&Token::Plus) => {} |
218 | 15 | _ => return Ok(vec![]), |
219 | | } |
220 | | |
221 | | // pop the plus. |
222 | 11 | self.pop()?; |
223 | 11 | self.parts() |
224 | 26 | } |
225 | | |
226 | | /// Parse a version. |
227 | | /// |
228 | | /// Like, `1.0.0` or `3.0.0-beta.1`. |
229 | 250 | pub fn version(&mut self) -> Result<Version, Error<'input>> { |
230 | 250 | self.skip_whitespace()?; |
231 | | |
232 | 244 | let major = self.numeric()?; |
233 | 69 | let minor = self.dot_numeric()?; |
234 | 56 | let patch = self.dot_numeric()?; |
235 | 49 | let pre = self.pre()?; |
236 | 26 | let build = self.plus_build_metadata()?; |
237 | | |
238 | 18 | self.skip_whitespace()?; |
239 | | |
240 | 14 | Ok(Version { |
241 | 14 | major, |
242 | 14 | minor, |
243 | 14 | patch, |
244 | 14 | pre, |
245 | 14 | build, |
246 | 14 | }) |
247 | 250 | } |
248 | | |
249 | | /// Check if we have reached the end of input. |
250 | 0 | pub fn is_eof(&mut self) -> bool { |
251 | 0 | self.c1.is_none() |
252 | 0 | } |
253 | | |
254 | | /// Get the rest of the tokens in the parser. |
255 | | /// |
256 | | /// Useful for debugging. |
257 | 0 | pub fn tail(&mut self) -> Result<Vec<Token<'input>>, Error<'input>> { |
258 | 0 | let mut out = Vec::new(); |
259 | | |
260 | 0 | if let Some(t) = self.c1.take() { |
261 | 0 | out.push(t); |
262 | 0 | } |
263 | | |
264 | 0 | while let Some(t) = self.lexer.next() { |
265 | 0 | out.push(t?); |
266 | | } |
267 | | |
268 | 0 | Ok(out) |
269 | 0 | } |
270 | | } |
271 | | |
272 | | #[cfg(test)] |
273 | | mod tests { |
274 | | use crate::version::parse; |
275 | | |
276 | | #[test] |
277 | | #[should_panic(expected = "Cannot have more than 255 identifiers")] |
278 | | fn fuzz_0001() { |
279 | | let version = std::fs::read_to_string("tests/fixtures/fuzz-0001.txt").expect("should be able to read version from file"); |
280 | | |
281 | | parse(&version).ok(); |
282 | | } |
283 | | |
284 | | } |