/src/wasmtime/cranelift/reader/src/lexer.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Lexical analysis for .clif files. |
2 | | |
3 | | use crate::error::Location; |
4 | | use cranelift_codegen::ir::types; |
5 | | use cranelift_codegen::ir::{Block, Value}; |
6 | | #[allow(unused_imports, deprecated)] |
7 | | use std::ascii::AsciiExt; |
8 | | use std::str::CharIndices; |
9 | | use std::u16; |
10 | | |
11 | | /// A Token returned from the `Lexer`. |
12 | | /// |
13 | | /// Some variants may contains references to the original source text, so the `Token` has the same |
14 | | /// lifetime as the source. |
15 | 0 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] |
16 | | pub enum Token<'a> { |
17 | | Comment(&'a str), |
18 | | LPar, // '(' |
19 | | RPar, // ')' |
20 | | LBrace, // '{' |
21 | | RBrace, // '}' |
22 | | LBracket, // '[' |
23 | | RBracket, // ']' |
24 | | Minus, // '-' |
25 | | Plus, // '+' |
26 | | Multiply, // '*' |
27 | | Comma, // ',' |
28 | | Dot, // '.' |
29 | | Colon, // ':' |
30 | | Equal, // '=' |
31 | | Not, // '!' |
32 | | Arrow, // '->' |
33 | | Float(&'a str), // Floating point immediate |
34 | | Integer(&'a str), // Integer immediate |
35 | | Type(types::Type), // i32, f32, b32x4, ... |
36 | | DynamicType(u32), // dt5 |
37 | | Value(Value), // v12, v7 |
38 | | Block(Block), // block3 |
39 | | Cold, // cold (flag on block) |
40 | | StackSlot(u32), // ss3 |
41 | | DynamicStackSlot(u32), // dss4 |
42 | | GlobalValue(u32), // gv3 |
43 | | Table(u32), // table2 |
44 | | Constant(u32), // const2 |
45 | | FuncRef(u32), // fn2 |
46 | | SigRef(u32), // sig2 |
47 | | UserRef(u32), // u345 |
48 | | UserNameRef(u32), // userextname345 |
49 | | Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... |
50 | | String(&'a str), // "arbitrary quoted string with no escape" ... |
51 | | HexSequence(&'a str), // #89AF |
52 | | Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) |
53 | | SourceLoc(&'a str), // @00c7 |
54 | | } |
55 | | |
56 | | /// A `Token` with an associated location. |
57 | 0 | #[derive(Debug, PartialEq, Eq)] |
58 | | pub struct LocatedToken<'a> { |
59 | | pub token: Token<'a>, |
60 | | pub location: Location, |
61 | | } |
62 | | |
63 | | /// Wrap up a `Token` with the given location. |
64 | 0 | fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> { |
65 | 0 | Ok(LocatedToken { |
66 | 0 | token, |
67 | 0 | location: loc, |
68 | 0 | }) |
69 | 0 | } |
70 | | |
71 | | /// An error from the lexical analysis. |
72 | 0 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
73 | | pub enum LexError { |
74 | | InvalidChar, |
75 | | } |
76 | | |
77 | | /// A `LexError` with an associated Location. |
78 | 0 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
79 | | pub struct LocatedError { |
80 | | pub error: LexError, |
81 | | pub location: Location, |
82 | | } |
83 | | |
84 | | /// Wrap up a `LexError` with the given location. |
85 | 0 | fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> { |
86 | 0 | Err(LocatedError { |
87 | 0 | error, |
88 | 0 | location: loc, |
89 | 0 | }) |
90 | 0 | } |
91 | | |
92 | | /// Get the number of decimal digits at the end of `s`. |
93 | 0 | fn trailing_digits(s: &str) -> usize { |
94 | 0 | // It's faster to iterate backwards over bytes, and we're only counting ASCII digits. |
95 | 0 | s.as_bytes() |
96 | 0 | .iter() |
97 | 0 | .rev() |
98 | 0 | .take_while(|&&b| b'0' <= b && b <= b'9') |
99 | 0 | .count() |
100 | 0 | } |
101 | | |
102 | | /// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII |
103 | | /// letters and numeric tail. |
104 | 0 | pub fn split_entity_name(name: &str) -> Option<(&str, u32)> { |
105 | 0 | let (head, tail) = name.split_at(name.len() - trailing_digits(name)); |
106 | 0 | if tail.len() > 1 && tail.starts_with('0') { |
107 | 0 | None |
108 | | } else { |
109 | 0 | tail.parse().ok().map(|n| (head, n)) |
110 | | } |
111 | 0 | } |
112 | | |
113 | | /// Lexical analysis. |
114 | | /// |
115 | | /// A `Lexer` reads text from a `&str` and provides a sequence of tokens. |
116 | | /// |
117 | | /// Also keep track of a line number for error reporting. |
118 | | /// |
119 | | pub struct Lexer<'a> { |
120 | | // Complete source being processed. |
121 | | source: &'a str, |
122 | | |
123 | | // Iterator into `source`. |
124 | | chars: CharIndices<'a>, |
125 | | |
126 | | // Next character to be processed, or `None` at the end. |
127 | | lookahead: Option<char>, |
128 | | |
129 | | // Index into `source` of lookahead character. |
130 | | pos: usize, |
131 | | |
132 | | // Current line number. |
133 | | line_number: usize, |
134 | | } |
135 | | |
136 | | impl<'a> Lexer<'a> { |
137 | 0 | pub fn new(s: &'a str) -> Self { |
138 | 0 | let mut lex = Self { |
139 | 0 | source: s, |
140 | 0 | chars: s.char_indices(), |
141 | 0 | lookahead: None, |
142 | 0 | pos: 0, |
143 | 0 | line_number: 1, |
144 | 0 | }; |
145 | 0 | // Advance to the first char. |
146 | 0 | lex.next_ch(); |
147 | 0 | lex |
148 | 0 | } |
149 | | |
150 | | // Advance to the next character. |
151 | | // Return the next lookahead character, or None when the end is encountered. |
152 | | // Always update cur_ch to reflect |
153 | 0 | fn next_ch(&mut self) -> Option<char> { |
154 | 0 | if self.lookahead == Some('\n') { |
155 | 0 | self.line_number += 1; |
156 | 0 | } |
157 | 0 | match self.chars.next() { |
158 | 0 | Some((idx, ch)) => { |
159 | 0 | self.pos = idx; |
160 | 0 | self.lookahead = Some(ch); |
161 | 0 | } |
162 | 0 | None => { |
163 | 0 | self.pos = self.source.len(); |
164 | 0 | self.lookahead = None; |
165 | 0 | } |
166 | | } |
167 | 0 | self.lookahead |
168 | 0 | } |
169 | | |
170 | | // Get the location corresponding to `lookahead`. |
171 | 0 | fn loc(&self) -> Location { |
172 | 0 | Location { |
173 | 0 | line_number: self.line_number, |
174 | 0 | } |
175 | 0 | } |
176 | | |
177 | | // Starting from `lookahead`, are we looking at `prefix`? |
178 | 0 | fn looking_at(&self, prefix: &str) -> bool { |
179 | 0 | self.source[self.pos..].starts_with(prefix) |
180 | 0 | } |
181 | | |
182 | | // Starting from `lookahead`, are we looking at a number? |
183 | | fn looking_at_numeric(&self) -> bool { |
184 | 0 | if let Some(c) = self.lookahead { |
185 | 0 | match c { |
186 | 0 | '0'..='9' => return true, |
187 | 0 | '-' => return true, |
188 | 0 | '+' => return true, |
189 | 0 | '.' => return true, |
190 | 0 | _ => {} |
191 | 0 | } |
192 | 0 | if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") { |
193 | 0 | return true; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | false |
197 | 0 | } |
198 | | |
199 | | // Scan a single-char token. |
200 | 0 | fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> { |
201 | 0 | assert_ne!(self.lookahead, None); |
202 | 0 | let loc = self.loc(); |
203 | 0 | self.next_ch(); |
204 | 0 | token(tok, loc) |
205 | 0 | } |
206 | | |
207 | | // Scan a multi-char token. |
208 | 0 | fn scan_chars( |
209 | 0 | &mut self, |
210 | 0 | count: usize, |
211 | 0 | tok: Token<'a>, |
212 | 0 | ) -> Result<LocatedToken<'a>, LocatedError> { |
213 | 0 | let loc = self.loc(); |
214 | 0 | for _ in 0..count { |
215 | 0 | assert_ne!(self.lookahead, None); |
216 | 0 | self.next_ch(); |
217 | | } |
218 | 0 | token(tok, loc) |
219 | 0 | } |
220 | | |
221 | | /// Get the rest of the current line. |
222 | | /// The next token returned by `next()` will be from the following lines. |
223 | 0 | pub fn rest_of_line(&mut self) -> &'a str { |
224 | 0 | let begin = self.pos; |
225 | 0 | loop { |
226 | 0 | match self.next_ch() { |
227 | 0 | None | Some('\n') => return &self.source[begin..self.pos], |
228 | 0 | _ => {} |
229 | | } |
230 | | } |
231 | 0 | } |
232 | | |
233 | | // Scan a comment extending to the end of the current line. |
234 | 0 | fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
235 | 0 | let loc = self.loc(); |
236 | 0 | let text = self.rest_of_line(); |
237 | 0 | token(Token::Comment(text), loc) |
238 | 0 | } |
239 | | |
240 | | // Scan a number token which can represent either an integer or floating point number. |
241 | | // |
242 | | // Accept the following forms: |
243 | | // |
244 | | // - `10`: Integer |
245 | | // - `-10`: Integer |
246 | | // - `0xff_00`: Integer |
247 | | // - `0.0`: Float |
248 | | // - `0x1.f`: Float |
249 | | // - `-0x2.4`: Float |
250 | | // - `0x0.4p-34`: Float |
251 | | // |
252 | | // This function does not filter out all invalid numbers. It depends in the context-sensitive |
253 | | // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and |
254 | | // an `Ieee64` constant are different. |
255 | 0 | fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
256 | 0 | let begin = self.pos; |
257 | 0 | let loc = self.loc(); |
258 | 0 | let mut is_float = false; |
259 | 0 |
|
260 | 0 | // Skip a leading sign. |
261 | 0 | match self.lookahead { |
262 | | Some('-') => { |
263 | 0 | self.next_ch(); |
264 | 0 | if !self.looking_at_numeric() { |
265 | | // If the next characters won't parse as a number, we return Token::Minus |
266 | 0 | return token(Token::Minus, loc); |
267 | 0 | } |
268 | | } |
269 | | Some('+') => { |
270 | 0 | self.next_ch(); |
271 | 0 | if !self.looking_at_numeric() { |
272 | | // If the next characters won't parse as a number, we return Token::Plus |
273 | 0 | return token(Token::Plus, loc); |
274 | 0 | } |
275 | | } |
276 | 0 | _ => {} |
277 | | } |
278 | | |
279 | | // Check for NaNs with payloads. |
280 | 0 | if self.looking_at("NaN:") || self.looking_at("sNaN:") { |
281 | | // Skip the `NaN:` prefix, the loop below won't accept it. |
282 | | // We expect a hexadecimal number to follow the colon. |
283 | 0 | while self.next_ch() != Some(':') {} |
284 | 0 | is_float = true; |
285 | 0 | } else if self.looking_at("NaN") || self.looking_at("Inf") { |
286 | 0 | // This is Inf or a default quiet NaN. |
287 | 0 | is_float = true; |
288 | 0 | } |
289 | | |
290 | | // Look for the end of this number. Detect the radix point if there is one. |
291 | 0 | loop { |
292 | 0 | match self.next_ch() { |
293 | 0 | Some('-') | Some('_') => {} |
294 | 0 | Some('.') => is_float = true, |
295 | 0 | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} |
296 | 0 | _ => break, |
297 | 0 | } |
298 | 0 | } |
299 | 0 | let text = &self.source[begin..self.pos]; |
300 | 0 | if is_float { |
301 | 0 | token(Token::Float(text), loc) |
302 | | } else { |
303 | 0 | token(Token::Integer(text), loc) |
304 | | } |
305 | 0 | } |
306 | | |
307 | | // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an |
308 | | // alphabetic char, followed by zero or more alphanumeric or '_' characters. |
309 | 0 | fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
310 | 0 | let begin = self.pos; |
311 | 0 | let loc = self.loc(); |
312 | | |
313 | 0 | assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic()); |
314 | 0 | loop { |
315 | 0 | match self.next_ch() { |
316 | 0 | Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} |
317 | 0 | _ => break, |
318 | 0 | } |
319 | 0 | } |
320 | 0 | let text = &self.source[begin..self.pos]; |
321 | 0 |
|
322 | 0 | // Look for numbered well-known entities like block15, v45, ... |
323 | 0 | token( |
324 | 0 | split_entity_name(text) |
325 | 0 | .and_then(|(prefix, number)| { |
326 | 0 | Self::numbered_entity(prefix, number) |
327 | 0 | .or_else(|| Self::value_type(text, prefix, number)) |
328 | 0 | }) |
329 | 0 | .unwrap_or_else(|| match text { |
330 | 0 | "cold" => Token::Cold, |
331 | 0 | _ => Token::Identifier(text), |
332 | 0 | }), |
333 | 0 | loc, |
334 | 0 | ) |
335 | 0 | } |
336 | | |
337 | | // If prefix is a well-known entity prefix and suffix is a valid entity number, return the |
338 | | // decoded token. |
339 | 0 | fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> { |
340 | 0 | match prefix { |
341 | 0 | "v" => Value::with_number(number).map(Token::Value), |
342 | 0 | "block" => Block::with_number(number).map(Token::Block), |
343 | 0 | "ss" => Some(Token::StackSlot(number)), |
344 | 0 | "dss" => Some(Token::DynamicStackSlot(number)), |
345 | 0 | "dt" => Some(Token::DynamicType(number)), |
346 | 0 | "gv" => Some(Token::GlobalValue(number)), |
347 | 0 | "table" => Some(Token::Table(number)), |
348 | 0 | "const" => Some(Token::Constant(number)), |
349 | 0 | "fn" => Some(Token::FuncRef(number)), |
350 | 0 | "sig" => Some(Token::SigRef(number)), |
351 | 0 | "u" => Some(Token::UserRef(number)), |
352 | 0 | "userextname" => Some(Token::UserNameRef(number)), |
353 | 0 | _ => None, |
354 | | } |
355 | 0 | } |
356 | | |
357 | | // Recognize a scalar or vector type. |
358 | 0 | fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> { |
359 | 0 | let is_vector = prefix.ends_with('x'); |
360 | 0 | let scalar = if is_vector { |
361 | 0 | &prefix[0..prefix.len() - 1] |
362 | | } else { |
363 | 0 | text |
364 | | }; |
365 | 0 | let base_type = match scalar { |
366 | 0 | "i8" => types::I8, |
367 | 0 | "i16" => types::I16, |
368 | 0 | "i32" => types::I32, |
369 | 0 | "i64" => types::I64, |
370 | 0 | "i128" => types::I128, |
371 | 0 | "f32" => types::F32, |
372 | 0 | "f64" => types::F64, |
373 | 0 | "r32" => types::R32, |
374 | 0 | "r64" => types::R64, |
375 | 0 | _ => return None, |
376 | | }; |
377 | 0 | if is_vector { |
378 | 0 | if number <= u32::from(u16::MAX) { |
379 | 0 | base_type.by(number).map(Token::Type) |
380 | | } else { |
381 | 0 | None |
382 | | } |
383 | | } else { |
384 | 0 | Some(Token::Type(base_type)) |
385 | | } |
386 | 0 | } |
387 | | |
388 | 0 | fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
389 | 0 | let loc = self.loc(); |
390 | 0 | let begin = self.pos + 1; |
391 | 0 |
|
392 | 0 | assert_eq!(self.lookahead, Some('%')); |
393 | | |
394 | 0 | loop { |
395 | 0 | match self.next_ch() { |
396 | 0 | Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {} |
397 | 0 | _ => break, |
398 | 0 | } |
399 | 0 | } |
400 | 0 |
|
401 | 0 | let end = self.pos; |
402 | 0 | token(Token::Name(&self.source[begin..end]), loc) |
403 | 0 | } |
404 | | |
405 | | /// Scan for a multi-line quoted string with no escape character. |
406 | 0 | fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
407 | 0 | let loc = self.loc(); |
408 | 0 | let begin = self.pos + 1; |
409 | 0 |
|
410 | 0 | assert_eq!(self.lookahead, Some('"')); |
411 | | |
412 | 0 | while let Some(c) = self.next_ch() { |
413 | 0 | if c == '"' { |
414 | 0 | break; |
415 | 0 | } |
416 | | } |
417 | | |
418 | 0 | let end = self.pos; |
419 | 0 | if self.lookahead != Some('"') { |
420 | 0 | return error(LexError::InvalidChar, self.loc()); |
421 | 0 | } |
422 | 0 | self.next_ch(); |
423 | 0 | token(Token::String(&self.source[begin..end]), loc) |
424 | 0 | } |
425 | | |
426 | 0 | fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
427 | 0 | let loc = self.loc(); |
428 | 0 | let begin = self.pos + 1; |
429 | 0 |
|
430 | 0 | assert_eq!(self.lookahead, Some('#')); |
431 | | |
432 | 0 | while let Some(c) = self.next_ch() { |
433 | 0 | if !char::is_digit(c, 16) { |
434 | 0 | break; |
435 | 0 | } |
436 | | } |
437 | | |
438 | 0 | let end = self.pos; |
439 | 0 | token(Token::HexSequence(&self.source[begin..end]), loc) |
440 | 0 | } |
441 | | |
442 | 0 | fn scan_srcloc(&mut self) -> Result<LocatedToken<'a>, LocatedError> { |
443 | 0 | let loc = self.loc(); |
444 | 0 | let begin = self.pos + 1; |
445 | 0 |
|
446 | 0 | assert_eq!(self.lookahead, Some('@')); |
447 | | |
448 | 0 | while let Some(c) = self.next_ch() { |
449 | 0 | if !char::is_digit(c, 16) { |
450 | 0 | break; |
451 | 0 | } |
452 | | } |
453 | | |
454 | 0 | let end = self.pos; |
455 | 0 | token(Token::SourceLoc(&self.source[begin..end]), loc) |
456 | 0 | } |
457 | | |
458 | | /// Get the next token or a lexical error. |
459 | | /// |
460 | | /// Return None when the end of the source is encountered. |
461 | | #[allow(clippy::cognitive_complexity)] |
462 | 0 | pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> { |
463 | 0 | loop { |
464 | 0 | let loc = self.loc(); |
465 | 0 | return match self.lookahead { |
466 | 0 | None => None, |
467 | 0 | Some(';') => Some(self.scan_comment()), |
468 | 0 | Some('(') => Some(self.scan_char(Token::LPar)), |
469 | 0 | Some(')') => Some(self.scan_char(Token::RPar)), |
470 | 0 | Some('{') => Some(self.scan_char(Token::LBrace)), |
471 | 0 | Some('}') => Some(self.scan_char(Token::RBrace)), |
472 | 0 | Some('[') => Some(self.scan_char(Token::LBracket)), |
473 | 0 | Some(']') => Some(self.scan_char(Token::RBracket)), |
474 | 0 | Some(',') => Some(self.scan_char(Token::Comma)), |
475 | 0 | Some('.') => Some(self.scan_char(Token::Dot)), |
476 | 0 | Some(':') => Some(self.scan_char(Token::Colon)), |
477 | 0 | Some('=') => Some(self.scan_char(Token::Equal)), |
478 | 0 | Some('!') => Some(self.scan_char(Token::Not)), |
479 | 0 | Some('+') => Some(self.scan_number()), |
480 | 0 | Some('*') => Some(self.scan_char(Token::Multiply)), |
481 | | Some('-') => { |
482 | 0 | if self.looking_at("->") { |
483 | 0 | Some(self.scan_chars(2, Token::Arrow)) |
484 | | } else { |
485 | 0 | Some(self.scan_number()) |
486 | | } |
487 | | } |
488 | 0 | Some('0'..='9') => Some(self.scan_number()), |
489 | 0 | Some('a'..='z') | Some('A'..='Z') => { |
490 | 0 | if self.looking_at("NaN") || self.looking_at("Inf") { |
491 | 0 | Some(self.scan_number()) |
492 | | } else { |
493 | 0 | Some(self.scan_word()) |
494 | | } |
495 | | } |
496 | 0 | Some('%') => Some(self.scan_name()), |
497 | 0 | Some('"') => Some(self.scan_string()), |
498 | 0 | Some('#') => Some(self.scan_hex_sequence()), |
499 | 0 | Some('@') => Some(self.scan_srcloc()), |
500 | | // all ascii whitespace |
501 | 0 | Some(' ') | Some('\x09'..='\x0d') => { |
502 | 0 | self.next_ch(); |
503 | 0 | continue; |
504 | | } |
505 | | _ => { |
506 | | // Skip invalid char, return error. |
507 | 0 | self.next_ch(); |
508 | 0 | Some(error(LexError::InvalidChar, loc)) |
509 | | } |
510 | | }; |
511 | | } |
512 | 0 | } |
513 | | } |
514 | | |
515 | | #[cfg(test)] |
516 | | mod tests { |
517 | | use super::trailing_digits; |
518 | | use super::*; |
519 | | use crate::error::Location; |
520 | | use cranelift_codegen::ir::types; |
521 | | use cranelift_codegen::ir::{Block, Value}; |
522 | | |
523 | | #[test] |
524 | | fn digits() { |
525 | | assert_eq!(trailing_digits(""), 0); |
526 | | assert_eq!(trailing_digits("x"), 0); |
527 | | assert_eq!(trailing_digits("0x"), 0); |
528 | | assert_eq!(trailing_digits("x1"), 1); |
529 | | assert_eq!(trailing_digits("1x1"), 1); |
530 | | assert_eq!(trailing_digits("1x01"), 2); |
531 | | } |
532 | | |
533 | | #[test] |
534 | | fn entity_name() { |
535 | | assert_eq!(split_entity_name(""), None); |
536 | | assert_eq!(split_entity_name("x"), None); |
537 | | assert_eq!(split_entity_name("x+"), None); |
538 | | assert_eq!(split_entity_name("x+1"), Some(("x+", 1))); |
539 | | assert_eq!(split_entity_name("x-1"), Some(("x-", 1))); |
540 | | assert_eq!(split_entity_name("1"), Some(("", 1))); |
541 | | assert_eq!(split_entity_name("x1"), Some(("x", 1))); |
542 | | assert_eq!(split_entity_name("xy0"), Some(("xy", 0))); |
543 | | // Reject this non-canonical form. |
544 | | assert_eq!(split_entity_name("inst01"), None); |
545 | | } |
546 | | |
547 | | fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> { |
548 | | Some(super::token(token, Location { line_number: line })) |
549 | | } |
550 | | |
551 | | fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> { |
552 | | Some(super::error(error, Location { line_number: line })) |
553 | | } |
554 | | |
555 | | #[test] |
556 | | fn make_lexer() { |
557 | | let mut l1 = Lexer::new(""); |
558 | | let mut l2 = Lexer::new(" "); |
559 | | let mut l3 = Lexer::new("\n "); |
560 | | |
561 | | assert_eq!(l1.next(), None); |
562 | | assert_eq!(l2.next(), None); |
563 | | assert_eq!(l3.next(), None); |
564 | | } |
565 | | |
566 | | #[test] |
567 | | fn lex_comment() { |
568 | | let mut lex = Lexer::new("; hello"); |
569 | | assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); |
570 | | assert_eq!(lex.next(), None); |
571 | | |
572 | | lex = Lexer::new("\n ;hello\n;foo"); |
573 | | assert_eq!(lex.next(), token(Token::Comment(";hello"), 2)); |
574 | | assert_eq!(lex.next(), token(Token::Comment(";foo"), 3)); |
575 | | assert_eq!(lex.next(), None); |
576 | | |
577 | | // Scan a comment after an invalid char. |
578 | | let mut lex = Lexer::new("$; hello"); |
579 | | assert_eq!(lex.next(), error(LexError::InvalidChar, 1)); |
580 | | assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); |
581 | | assert_eq!(lex.next(), None); |
582 | | } |
583 | | |
584 | | #[test] |
585 | | fn lex_chars() { |
586 | | let mut lex = Lexer::new("(); hello\n = :{, }."); |
587 | | assert_eq!(lex.next(), token(Token::LPar, 1)); |
588 | | assert_eq!(lex.next(), token(Token::RPar, 1)); |
589 | | assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); |
590 | | assert_eq!(lex.next(), token(Token::Equal, 2)); |
591 | | assert_eq!(lex.next(), token(Token::Colon, 2)); |
592 | | assert_eq!(lex.next(), token(Token::LBrace, 2)); |
593 | | assert_eq!(lex.next(), token(Token::Comma, 2)); |
594 | | assert_eq!(lex.next(), token(Token::RBrace, 2)); |
595 | | assert_eq!(lex.next(), token(Token::Dot, 2)); |
596 | | assert_eq!(lex.next(), None); |
597 | | } |
598 | | |
599 | | #[test] |
600 | | fn lex_numbers() { |
601 | | let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5"); |
602 | | assert_eq!(lex.next(), token(Token::Integer("0"), 1)); |
603 | | assert_eq!(lex.next(), token(Token::Integer("2_000"), 1)); |
604 | | assert_eq!(lex.next(), token(Token::Integer("-1"), 1)); |
605 | | assert_eq!(lex.next(), token(Token::Comma, 1)); |
606 | | assert_eq!(lex.next(), token(Token::Integer("0xf"), 1)); |
607 | | assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1)); |
608 | | assert_eq!(lex.next(), token(Token::Float("0.0"), 1)); |
609 | | assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1)); |
610 | | assert_eq!(lex.next(), token(Token::Float("NaN"), 1)); |
611 | | assert_eq!(lex.next(), token(Token::Integer("+5"), 1)); |
612 | | assert_eq!(lex.next(), None); |
613 | | } |
614 | | |
615 | | #[test] |
616 | | fn lex_identifiers() { |
617 | | let mut lex = Lexer::new( |
618 | | "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \ |
619 | | function0 function i8 i32x4 f32x5", |
620 | | ); |
621 | | assert_eq!( |
622 | | lex.next(), |
623 | | token(Token::Value(Value::with_number(0).unwrap()), 1) |
624 | | ); |
625 | | assert_eq!(lex.next(), token(Token::Identifier("v00"), 1)); |
626 | | assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1)); |
627 | | assert_eq!( |
628 | | lex.next(), |
629 | | token(Token::Block(Block::with_number(1234567890).unwrap()), 1) |
630 | | ); |
631 | | assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1)); |
632 | | assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1)); |
633 | | assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1)); |
634 | | assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1)); |
635 | | assert_eq!(lex.next(), token(Token::Identifier("function0"), 1)); |
636 | | assert_eq!(lex.next(), token(Token::Identifier("function"), 1)); |
637 | | assert_eq!(lex.next(), token(Token::Type(types::I8), 1)); |
638 | | assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1)); |
639 | | assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1)); |
640 | | assert_eq!(lex.next(), None); |
641 | | } |
642 | | |
643 | | #[test] |
644 | | fn lex_hex_sequences() { |
645 | | let mut lex = Lexer::new("#0 #DEADbeef123 #789"); |
646 | | |
647 | | assert_eq!(lex.next(), token(Token::HexSequence("0"), 1)); |
648 | | assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1)); |
649 | | assert_eq!(lex.next(), token(Token::HexSequence("789"), 1)); |
650 | | } |
651 | | |
652 | | #[test] |
653 | | fn lex_names() { |
654 | | let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_"); |
655 | | |
656 | | assert_eq!(lex.next(), token(Token::Name("0"), 1)); |
657 | | assert_eq!(lex.next(), token(Token::Name("x3"), 1)); |
658 | | assert_eq!(lex.next(), token(Token::Name("function"), 1)); |
659 | | assert_eq!(lex.next(), token(Token::Name("123_abc"), 1)); |
660 | | assert_eq!(lex.next(), token(Token::Name("ss0"), 1)); |
661 | | assert_eq!(lex.next(), token(Token::Name("v3"), 1)); |
662 | | assert_eq!(lex.next(), token(Token::Name("block11"), 1)); |
663 | | assert_eq!(lex.next(), token(Token::Name("const42"), 1)); |
664 | | assert_eq!(lex.next(), token(Token::Name("_"), 1)); |
665 | | } |
666 | | |
667 | | #[test] |
668 | | fn lex_strings() { |
669 | | let mut lex = Lexer::new( |
670 | | r#""" "0" "x3""function" "123 abc" "\" "start |
671 | | and end on |
672 | | different lines" "#, |
673 | | ); |
674 | | |
675 | | assert_eq!(lex.next(), token(Token::String(""), 1)); |
676 | | assert_eq!(lex.next(), token(Token::String("0"), 1)); |
677 | | assert_eq!(lex.next(), token(Token::String("x3"), 1)); |
678 | | assert_eq!(lex.next(), token(Token::String("function"), 1)); |
679 | | assert_eq!(lex.next(), token(Token::String("123 abc"), 1)); |
680 | | assert_eq!(lex.next(), token(Token::String(r#"\"#), 1)); |
681 | | assert_eq!( |
682 | | lex.next(), |
683 | | token( |
684 | | Token::String( |
685 | | r#"start |
686 | | and end on |
687 | | different lines"# |
688 | | ), |
689 | | 1 |
690 | | ) |
691 | | ); |
692 | | } |
693 | | |
694 | | #[test] |
695 | | fn lex_userrefs() { |
696 | | let mut lex = Lexer::new("u0 u1 u234567890 u9:8765"); |
697 | | |
698 | | assert_eq!(lex.next(), token(Token::UserRef(0), 1)); |
699 | | assert_eq!(lex.next(), token(Token::UserRef(1), 1)); |
700 | | assert_eq!(lex.next(), token(Token::UserRef(234567890), 1)); |
701 | | assert_eq!(lex.next(), token(Token::UserRef(9), 1)); |
702 | | assert_eq!(lex.next(), token(Token::Colon, 1)); |
703 | | assert_eq!(lex.next(), token(Token::Integer("8765"), 1)); |
704 | | assert_eq!(lex.next(), None); |
705 | | } |
706 | | } |