Coverage Report

Created: 2025-11-11 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/semver-parser/src/parser.rs
Line
Count
Source
1
// this is only for parsing versions now
2
3
use std::fmt;
4
use std::mem;
5
6
use self::Error::*;
7
use crate::lexer::{self, Lexer, Token};
8
use crate::version::{Identifier, Version};
9
10
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
11
pub enum Error<'input> {
12
    /// Needed more tokens for parsing, but none are available.
13
    UnexpectedEnd,
14
    /// Unexpected token.
15
    UnexpectedToken(Token<'input>),
16
    /// An error occurred in the lexer.
17
    Lexer(lexer::Error),
18
    /// More input available.
19
    MoreInput(Vec<Token<'input>>),
20
    /// Encountered empty predicate in a set of predicates.
21
    EmptyPredicate,
22
    /// Encountered an empty range.
23
    EmptyRange,
24
}
25
26
impl<'input> From<lexer::Error> for Error<'input> {
27
352
    fn from(value: lexer::Error) -> Self {
28
352
        Error::Lexer(value)
29
352
    }
30
}
31
32
impl<'input> fmt::Display for Error<'input> {
33
0
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
34
        use self::Error::*;
35
36
0
        match *self {
37
0
            UnexpectedEnd => write!(fmt, "expected more input"),
38
0
            UnexpectedToken(ref token) => write!(fmt, "encountered unexpected token: {:?}", token),
39
0
            Lexer(ref error) => write!(fmt, "lexer error: {:?}", error),
40
0
            MoreInput(ref tokens) => write!(fmt, "expected end of input, but got: {:?}", tokens),
41
0
            EmptyPredicate => write!(fmt, "encountered empty predicate"),
42
0
            EmptyRange => write!(fmt, "encountered empty range"),
43
        }
44
0
    }
45
}
46
47
/// impl for backwards compatibility.
48
impl<'input> From<Error<'input>> for String {
49
0
    fn from(value: Error<'input>) -> Self {
50
0
        value.to_string()
51
0
    }
52
}
53
54
/// A recursive-descent parser for parsing version requirements.
55
pub struct Parser<'input> {
56
    /// Source of token.
57
    lexer: Lexer<'input>,
58
    /// Lookaehead.
59
    c1: Option<Token<'input>>,
60
}
61
62
impl<'input> Parser<'input> {
63
    /// Construct a new parser for the given input.
64
425
    pub fn new(input: &'input str) -> Result<Parser<'input>, Error<'input>> {
65
425
        let mut lexer = Lexer::new(input);
66
67
425
        let c1 = if let Some(c1) = lexer.next() {
68
425
            Some(c1?)
69
        } else {
70
0
            None
71
        };
72
73
250
        Ok(Parser { lexer, c1 })
74
425
    }
75
76
    /// Pop one token.
77
    #[inline(always)]
78
2.41M
    fn pop(&mut self) -> Result<Token<'input>, Error<'input>> {
79
2.41M
        let c1 = if let Some(c1) = self.lexer.next() {
80
2.41M
            Some(c1?)
81
        } else {
82
22
            None
83
        };
84
85
2.41M
        mem::replace(&mut self.c1, c1).ok_or_else(|| UnexpectedEnd)
86
2.41M
    }
87
88
    /// Peek one token.
89
    #[inline(always)]
90
2.29M
    fn peek(&mut self) -> Option<&Token<'input>> {
91
2.29M
        self.c1.as_ref()
92
2.29M
    }
93
94
    /// Skip whitespace if present.
95
268
    fn skip_whitespace(&mut self) -> Result<(), Error<'input>> {
96
268
        match self.peek() {
97
13
            Some(&Token::Whitespace(_, _)) => self.pop().map(|_| ()),
98
255
            _ => Ok(()),
99
        }
100
268
    }
101
102
    /// Parse a single component.
103
    ///
104
    /// Returns `None` if the component is a wildcard.
105
0
    pub fn component(&mut self) -> Result<Option<u64>, Error<'input>> {
106
0
        match self.pop()? {
107
0
            Token::Numeric(number) => Ok(Some(number)),
108
0
            ref t if t.is_wildcard() => Ok(None),
109
0
            tok => Err(UnexpectedToken(tok)),
110
        }
111
0
    }
112
113
    /// Parse a single numeric.
114
359
    pub fn numeric(&mut self) -> Result<u64, Error<'input>> {
115
359
        match self.pop()? {
116
174
            Token::Numeric(number) => Ok(number),
117
46
            tok => Err(UnexpectedToken(tok)),
118
        }
119
359
    }
120
121
    /// Optionally parse a dot, then a component.
122
    ///
123
    /// The second component of the tuple indicates if a wildcard has been encountered, and is
124
    /// always `false` if the first component is `Some`.
125
    ///
126
    /// If a dot is not encountered, `(None, false)` is returned.
127
    ///
128
    /// If a wildcard is encountered, `(None, true)` is returned.
129
0
    pub fn dot_component(&mut self) -> Result<(Option<u64>, bool), Error<'input>> {
130
0
        match self.peek() {
131
0
            Some(&Token::Dot) => {}
132
0
            _ => return Ok((None, false)),
133
        }
134
135
        // pop the peeked dot.
136
0
        self.pop()?;
137
0
        self.component().map(|n| (n, n.is_none()))
138
0
    }
139
140
    /// Parse a dot, then a numeric.
141
125
    pub fn dot_numeric(&mut self) -> Result<u64, Error<'input>> {
142
125
        match self.pop()? {
143
115
            Token::Dot => {}
144
1
            tok => return Err(UnexpectedToken(tok)),
145
        }
146
147
115
        self.numeric()
148
125
    }
149
150
    /// Parse an string identifier.
151
    ///
152
    /// Like, `foo`, or `bar`, or `beta-1`.
153
1.08M
    pub fn identifier(&mut self) -> Result<Identifier, Error<'input>> {
154
1.08M
        self.bounded_identifier(0)
155
1.08M
    }
156
157
1.20M
    fn bounded_identifier(&mut self, count: u32) -> Result<Identifier, Error<'input>> {
158
1.20M
        if count > 255 {
159
0
            panic!("Cannot have more than 255 identifiers");
160
1.20M
        }
161
162
1.20M
        let identifier = match self.pop()? {
163
296k
            Token::AlphaNumeric(identifier) => {
164
                // TODO: Borrow?
165
296k
                Identifier::AlphaNumeric(identifier.to_string())
166
            }
167
910k
            Token::Numeric(n) => Identifier::Numeric(n),
168
10
            tok => return Err(UnexpectedToken(tok)),
169
        };
170
171
1.20M
        if let Some(&Token::Hyphen) = self.peek() {
172
            // pop the peeked hyphen
173
120k
            self.pop()?;
174
            // concat with any following identifiers
175
120k
            Ok(identifier
176
120k
                .concat("-")
177
120k
                .concat(&self.bounded_identifier(count + 1)?.to_string()))
178
        } else {
179
1.08M
            Ok(identifier)
180
        }
181
1.20M
    }
182
183
    /// Parse all pre-release identifiers, separated by dots.
184
    ///
185
    /// Like, `abcdef.1234`.
186
49
    fn pre(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
187
49
        match self.peek() {
188
47
            Some(&Token::Hyphen) => {}
189
2
            _ => return Ok(vec![]),
190
        }
191
192
        // pop the peeked hyphen.
193
47
        self.pop()?;
194
47
        self.parts()
195
49
    }
196
197
    /// Parse a dot-separated set of identifiers.
198
58
    fn parts(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
199
58
        let mut parts = Vec::new();
200
201
58
        parts.push(self.identifier()?);
202
203
1.08M
        while let Some(&Token::Dot) = self.peek() {
204
1.08M
            self.pop()?;
205
206
1.08M
            parts.push(self.identifier()?);
207
        }
208
209
27
        Ok(parts)
210
58
    }
211
212
    /// Parse optional build metadata.
213
    ///
214
    /// Like, `` (empty), or `+abcdef`.
215
26
    fn plus_build_metadata(&mut self) -> Result<Vec<Identifier>, Error<'input>> {
216
26
        match self.peek() {
217
11
            Some(&Token::Plus) => {}
218
15
            _ => return Ok(vec![]),
219
        }
220
221
        // pop the plus.
222
11
        self.pop()?;
223
11
        self.parts()
224
26
    }
225
226
    /// Parse a version.
227
    ///
228
    /// Like, `1.0.0` or `3.0.0-beta.1`.
229
250
    pub fn version(&mut self) -> Result<Version, Error<'input>> {
230
250
        self.skip_whitespace()?;
231
232
244
        let major = self.numeric()?;
233
69
        let minor = self.dot_numeric()?;
234
56
        let patch = self.dot_numeric()?;
235
49
        let pre = self.pre()?;
236
26
        let build = self.plus_build_metadata()?;
237
238
18
        self.skip_whitespace()?;
239
240
14
        Ok(Version {
241
14
            major,
242
14
            minor,
243
14
            patch,
244
14
            pre,
245
14
            build,
246
14
        })
247
250
    }
248
249
    /// Check if we have reached the end of input.
250
0
    pub fn is_eof(&mut self) -> bool {
251
0
        self.c1.is_none()
252
0
    }
253
254
    /// Get the rest of the tokens in the parser.
255
    ///
256
    /// Useful for debugging.
257
0
    pub fn tail(&mut self) -> Result<Vec<Token<'input>>, Error<'input>> {
258
0
        let mut out = Vec::new();
259
260
0
        if let Some(t) = self.c1.take() {
261
0
            out.push(t);
262
0
        }
263
264
0
        while let Some(t) = self.lexer.next() {
265
0
            out.push(t?);
266
        }
267
268
0
        Ok(out)
269
0
    }
270
}
271
272
#[cfg(test)]
273
mod tests {
274
    use crate::version::parse;
275
276
    #[test]
277
    #[should_panic(expected = "Cannot have more than 255 identifiers")]
278
    fn fuzz_0001() {
279
        let version = std::fs::read_to_string("tests/fixtures/fuzz-0001.txt").expect("should be able to read version from file");
280
281
        parse(&version).ok();
282
    }
283
284
}