Coverage Report

Created: 2023-04-25 07:07

/src/wasmtime/cranelift/reader/src/lexer.rs
Line
Count
Source (jump to first uncovered line)
1
//! Lexical analysis for .clif files.
2
3
use crate::error::Location;
4
use cranelift_codegen::ir::types;
5
use cranelift_codegen::ir::{Block, Value};
6
#[allow(unused_imports, deprecated)]
7
use std::ascii::AsciiExt;
8
use std::str::CharIndices;
9
use std::u16;
10
11
/// A Token returned from the `Lexer`.
12
///
13
/// Some variants may contains references to the original source text, so the `Token` has the same
14
/// lifetime as the source.
15
0
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
16
pub enum Token<'a> {
17
    Comment(&'a str),
18
    LPar,                  // '('
19
    RPar,                  // ')'
20
    LBrace,                // '{'
21
    RBrace,                // '}'
22
    LBracket,              // '['
23
    RBracket,              // ']'
24
    Minus,                 // '-'
25
    Plus,                  // '+'
26
    Multiply,              // '*'
27
    Comma,                 // ','
28
    Dot,                   // '.'
29
    Colon,                 // ':'
30
    Equal,                 // '='
31
    Not,                   // '!'
32
    Arrow,                 // '->'
33
    Float(&'a str),        // Floating point immediate
34
    Integer(&'a str),      // Integer immediate
35
    Type(types::Type),     // i32, f32, b32x4, ...
36
    DynamicType(u32),      // dt5
37
    Value(Value),          // v12, v7
38
    Block(Block),          // block3
39
    Cold,                  // cold (flag on block)
40
    StackSlot(u32),        // ss3
41
    DynamicStackSlot(u32), // dss4
42
    GlobalValue(u32),      // gv3
43
    Table(u32),            // table2
44
    Constant(u32),         // const2
45
    FuncRef(u32),          // fn2
46
    SigRef(u32),           // sig2
47
    UserRef(u32),          // u345
48
    UserNameRef(u32),      // userextname345
49
    Name(&'a str),         // %9arbitrary_alphanum, %x3, %0, %function ...
50
    String(&'a str),       // "arbitrary quoted string with no escape" ...
51
    HexSequence(&'a str),  // #89AF
52
    Identifier(&'a str),   // Unrecognized identifier (opcode, enumerator, ...)
53
    SourceLoc(&'a str),    // @00c7
54
}
55
56
/// A `Token` with an associated location.
57
0
#[derive(Debug, PartialEq, Eq)]
58
pub struct LocatedToken<'a> {
59
    pub token: Token<'a>,
60
    pub location: Location,
61
}
62
63
/// Wrap up a `Token` with the given location.
64
0
fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
65
0
    Ok(LocatedToken {
66
0
        token,
67
0
        location: loc,
68
0
    })
69
0
}
70
71
/// An error from the lexical analysis.
72
0
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73
pub enum LexError {
74
    InvalidChar,
75
}
76
77
/// A `LexError` with an associated Location.
78
0
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79
pub struct LocatedError {
80
    pub error: LexError,
81
    pub location: Location,
82
}
83
84
/// Wrap up a `LexError` with the given location.
85
0
fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
86
0
    Err(LocatedError {
87
0
        error,
88
0
        location: loc,
89
0
    })
90
0
}
91
92
/// Get the number of decimal digits at the end of `s`.
93
0
fn trailing_digits(s: &str) -> usize {
94
0
    // It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
95
0
    s.as_bytes()
96
0
        .iter()
97
0
        .rev()
98
0
        .take_while(|&&b| b'0' <= b && b <= b'9')
99
0
        .count()
100
0
}
101
102
/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
103
/// letters and numeric tail.
104
0
pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
105
0
    let (head, tail) = name.split_at(name.len() - trailing_digits(name));
106
0
    if tail.len() > 1 && tail.starts_with('0') {
107
0
        None
108
    } else {
109
0
        tail.parse().ok().map(|n| (head, n))
110
    }
111
0
}
112
113
/// Lexical analysis.
114
///
115
/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
116
///
117
/// Also keep track of a line number for error reporting.
118
///
119
pub struct Lexer<'a> {
120
    // Complete source being processed.
121
    source: &'a str,
122
123
    // Iterator into `source`.
124
    chars: CharIndices<'a>,
125
126
    // Next character to be processed, or `None` at the end.
127
    lookahead: Option<char>,
128
129
    // Index into `source` of lookahead character.
130
    pos: usize,
131
132
    // Current line number.
133
    line_number: usize,
134
}
135
136
impl<'a> Lexer<'a> {
137
0
    pub fn new(s: &'a str) -> Self {
138
0
        let mut lex = Self {
139
0
            source: s,
140
0
            chars: s.char_indices(),
141
0
            lookahead: None,
142
0
            pos: 0,
143
0
            line_number: 1,
144
0
        };
145
0
        // Advance to the first char.
146
0
        lex.next_ch();
147
0
        lex
148
0
    }
149
150
    // Advance to the next character.
151
    // Return the next lookahead character, or None when the end is encountered.
152
    // Always update cur_ch to reflect
153
0
    fn next_ch(&mut self) -> Option<char> {
154
0
        if self.lookahead == Some('\n') {
155
0
            self.line_number += 1;
156
0
        }
157
0
        match self.chars.next() {
158
0
            Some((idx, ch)) => {
159
0
                self.pos = idx;
160
0
                self.lookahead = Some(ch);
161
0
            }
162
0
            None => {
163
0
                self.pos = self.source.len();
164
0
                self.lookahead = None;
165
0
            }
166
        }
167
0
        self.lookahead
168
0
    }
169
170
    // Get the location corresponding to `lookahead`.
171
0
    fn loc(&self) -> Location {
172
0
        Location {
173
0
            line_number: self.line_number,
174
0
        }
175
0
    }
176
177
    // Starting from `lookahead`, are we looking at `prefix`?
178
0
    fn looking_at(&self, prefix: &str) -> bool {
179
0
        self.source[self.pos..].starts_with(prefix)
180
0
    }
181
182
    // Starting from `lookahead`, are we looking at a number?
183
    fn looking_at_numeric(&self) -> bool {
184
0
        if let Some(c) = self.lookahead {
185
0
            match c {
186
0
                '0'..='9' => return true,
187
0
                '-' => return true,
188
0
                '+' => return true,
189
0
                '.' => return true,
190
0
                _ => {}
191
0
            }
192
0
            if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
193
0
                return true;
194
0
            }
195
0
        }
196
0
        false
197
0
    }
198
199
    // Scan a single-char token.
200
0
    fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
201
0
        assert_ne!(self.lookahead, None);
202
0
        let loc = self.loc();
203
0
        self.next_ch();
204
0
        token(tok, loc)
205
0
    }
206
207
    // Scan a multi-char token.
208
0
    fn scan_chars(
209
0
        &mut self,
210
0
        count: usize,
211
0
        tok: Token<'a>,
212
0
    ) -> Result<LocatedToken<'a>, LocatedError> {
213
0
        let loc = self.loc();
214
0
        for _ in 0..count {
215
0
            assert_ne!(self.lookahead, None);
216
0
            self.next_ch();
217
        }
218
0
        token(tok, loc)
219
0
    }
220
221
    /// Get the rest of the current line.
222
    /// The next token returned by `next()` will be from the following lines.
223
0
    pub fn rest_of_line(&mut self) -> &'a str {
224
0
        let begin = self.pos;
225
0
        loop {
226
0
            match self.next_ch() {
227
0
                None | Some('\n') => return &self.source[begin..self.pos],
228
0
                _ => {}
229
            }
230
        }
231
0
    }
232
233
    // Scan a comment extending to the end of the current line.
234
0
    fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
235
0
        let loc = self.loc();
236
0
        let text = self.rest_of_line();
237
0
        token(Token::Comment(text), loc)
238
0
    }
239
240
    // Scan a number token which can represent either an integer or floating point number.
241
    //
242
    // Accept the following forms:
243
    //
244
    // - `10`: Integer
245
    // - `-10`: Integer
246
    // - `0xff_00`: Integer
247
    // - `0.0`: Float
248
    // - `0x1.f`: Float
249
    // - `-0x2.4`: Float
250
    // - `0x0.4p-34`: Float
251
    //
252
    // This function does not filter out all invalid numbers. It depends in the context-sensitive
253
    // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and
254
    // an `Ieee64` constant are different.
255
0
    fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
256
0
        let begin = self.pos;
257
0
        let loc = self.loc();
258
0
        let mut is_float = false;
259
0
260
0
        // Skip a leading sign.
261
0
        match self.lookahead {
262
            Some('-') => {
263
0
                self.next_ch();
264
0
                if !self.looking_at_numeric() {
265
                    // If the next characters won't parse as a number, we return Token::Minus
266
0
                    return token(Token::Minus, loc);
267
0
                }
268
            }
269
            Some('+') => {
270
0
                self.next_ch();
271
0
                if !self.looking_at_numeric() {
272
                    // If the next characters won't parse as a number, we return Token::Plus
273
0
                    return token(Token::Plus, loc);
274
0
                }
275
            }
276
0
            _ => {}
277
        }
278
279
        // Check for NaNs with payloads.
280
0
        if self.looking_at("NaN:") || self.looking_at("sNaN:") {
281
            // Skip the `NaN:` prefix, the loop below won't accept it.
282
            // We expect a hexadecimal number to follow the colon.
283
0
            while self.next_ch() != Some(':') {}
284
0
            is_float = true;
285
0
        } else if self.looking_at("NaN") || self.looking_at("Inf") {
286
0
            // This is Inf or a default quiet NaN.
287
0
            is_float = true;
288
0
        }
289
290
        // Look for the end of this number. Detect the radix point if there is one.
291
0
        loop {
292
0
            match self.next_ch() {
293
0
                Some('-') | Some('_') => {}
294
0
                Some('.') => is_float = true,
295
0
                Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
296
0
                _ => break,
297
0
            }
298
0
        }
299
0
        let text = &self.source[begin..self.pos];
300
0
        if is_float {
301
0
            token(Token::Float(text), loc)
302
        } else {
303
0
            token(Token::Integer(text), loc)
304
        }
305
0
    }
306
307
    // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an
308
    // alphabetic char, followed by zero or more alphanumeric or '_' characters.
309
0
    fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
310
0
        let begin = self.pos;
311
0
        let loc = self.loc();
312
313
0
        assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
314
0
        loop {
315
0
            match self.next_ch() {
316
0
                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
317
0
                _ => break,
318
0
            }
319
0
        }
320
0
        let text = &self.source[begin..self.pos];
321
0
322
0
        // Look for numbered well-known entities like block15, v45, ...
323
0
        token(
324
0
            split_entity_name(text)
325
0
                .and_then(|(prefix, number)| {
326
0
                    Self::numbered_entity(prefix, number)
327
0
                        .or_else(|| Self::value_type(text, prefix, number))
328
0
                })
329
0
                .unwrap_or_else(|| match text {
330
0
                    "cold" => Token::Cold,
331
0
                    _ => Token::Identifier(text),
332
0
                }),
333
0
            loc,
334
0
        )
335
0
    }
336
337
    // If prefix is a well-known entity prefix and suffix is a valid entity number, return the
338
    // decoded token.
339
0
    fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
340
0
        match prefix {
341
0
            "v" => Value::with_number(number).map(Token::Value),
342
0
            "block" => Block::with_number(number).map(Token::Block),
343
0
            "ss" => Some(Token::StackSlot(number)),
344
0
            "dss" => Some(Token::DynamicStackSlot(number)),
345
0
            "dt" => Some(Token::DynamicType(number)),
346
0
            "gv" => Some(Token::GlobalValue(number)),
347
0
            "table" => Some(Token::Table(number)),
348
0
            "const" => Some(Token::Constant(number)),
349
0
            "fn" => Some(Token::FuncRef(number)),
350
0
            "sig" => Some(Token::SigRef(number)),
351
0
            "u" => Some(Token::UserRef(number)),
352
0
            "userextname" => Some(Token::UserNameRef(number)),
353
0
            _ => None,
354
        }
355
0
    }
356
357
    // Recognize a scalar or vector type.
358
0
    fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
359
0
        let is_vector = prefix.ends_with('x');
360
0
        let scalar = if is_vector {
361
0
            &prefix[0..prefix.len() - 1]
362
        } else {
363
0
            text
364
        };
365
0
        let base_type = match scalar {
366
0
            "i8" => types::I8,
367
0
            "i16" => types::I16,
368
0
            "i32" => types::I32,
369
0
            "i64" => types::I64,
370
0
            "i128" => types::I128,
371
0
            "f32" => types::F32,
372
0
            "f64" => types::F64,
373
0
            "r32" => types::R32,
374
0
            "r64" => types::R64,
375
0
            _ => return None,
376
        };
377
0
        if is_vector {
378
0
            if number <= u32::from(u16::MAX) {
379
0
                base_type.by(number).map(Token::Type)
380
            } else {
381
0
                None
382
            }
383
        } else {
384
0
            Some(Token::Type(base_type))
385
        }
386
0
    }
387
388
0
    fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
389
0
        let loc = self.loc();
390
0
        let begin = self.pos + 1;
391
0
392
0
        assert_eq!(self.lookahead, Some('%'));
393
394
0
        loop {
395
0
            match self.next_ch() {
396
0
                Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
397
0
                _ => break,
398
0
            }
399
0
        }
400
0
401
0
        let end = self.pos;
402
0
        token(Token::Name(&self.source[begin..end]), loc)
403
0
    }
404
405
    /// Scan for a multi-line quoted string with no escape character.
406
0
    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
407
0
        let loc = self.loc();
408
0
        let begin = self.pos + 1;
409
0
410
0
        assert_eq!(self.lookahead, Some('"'));
411
412
0
        while let Some(c) = self.next_ch() {
413
0
            if c == '"' {
414
0
                break;
415
0
            }
416
        }
417
418
0
        let end = self.pos;
419
0
        if self.lookahead != Some('"') {
420
0
            return error(LexError::InvalidChar, self.loc());
421
0
        }
422
0
        self.next_ch();
423
0
        token(Token::String(&self.source[begin..end]), loc)
424
0
    }
425
426
0
    fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
427
0
        let loc = self.loc();
428
0
        let begin = self.pos + 1;
429
0
430
0
        assert_eq!(self.lookahead, Some('#'));
431
432
0
        while let Some(c) = self.next_ch() {
433
0
            if !char::is_digit(c, 16) {
434
0
                break;
435
0
            }
436
        }
437
438
0
        let end = self.pos;
439
0
        token(Token::HexSequence(&self.source[begin..end]), loc)
440
0
    }
441
442
0
    fn scan_srcloc(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
443
0
        let loc = self.loc();
444
0
        let begin = self.pos + 1;
445
0
446
0
        assert_eq!(self.lookahead, Some('@'));
447
448
0
        while let Some(c) = self.next_ch() {
449
0
            if !char::is_digit(c, 16) {
450
0
                break;
451
0
            }
452
        }
453
454
0
        let end = self.pos;
455
0
        token(Token::SourceLoc(&self.source[begin..end]), loc)
456
0
    }
457
458
    /// Get the next token or a lexical error.
459
    ///
460
    /// Return None when the end of the source is encountered.
461
    #[allow(clippy::cognitive_complexity)]
462
0
    pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
463
0
        loop {
464
0
            let loc = self.loc();
465
0
            return match self.lookahead {
466
0
                None => None,
467
0
                Some(';') => Some(self.scan_comment()),
468
0
                Some('(') => Some(self.scan_char(Token::LPar)),
469
0
                Some(')') => Some(self.scan_char(Token::RPar)),
470
0
                Some('{') => Some(self.scan_char(Token::LBrace)),
471
0
                Some('}') => Some(self.scan_char(Token::RBrace)),
472
0
                Some('[') => Some(self.scan_char(Token::LBracket)),
473
0
                Some(']') => Some(self.scan_char(Token::RBracket)),
474
0
                Some(',') => Some(self.scan_char(Token::Comma)),
475
0
                Some('.') => Some(self.scan_char(Token::Dot)),
476
0
                Some(':') => Some(self.scan_char(Token::Colon)),
477
0
                Some('=') => Some(self.scan_char(Token::Equal)),
478
0
                Some('!') => Some(self.scan_char(Token::Not)),
479
0
                Some('+') => Some(self.scan_number()),
480
0
                Some('*') => Some(self.scan_char(Token::Multiply)),
481
                Some('-') => {
482
0
                    if self.looking_at("->") {
483
0
                        Some(self.scan_chars(2, Token::Arrow))
484
                    } else {
485
0
                        Some(self.scan_number())
486
                    }
487
                }
488
0
                Some('0'..='9') => Some(self.scan_number()),
489
0
                Some('a'..='z') | Some('A'..='Z') => {
490
0
                    if self.looking_at("NaN") || self.looking_at("Inf") {
491
0
                        Some(self.scan_number())
492
                    } else {
493
0
                        Some(self.scan_word())
494
                    }
495
                }
496
0
                Some('%') => Some(self.scan_name()),
497
0
                Some('"') => Some(self.scan_string()),
498
0
                Some('#') => Some(self.scan_hex_sequence()),
499
0
                Some('@') => Some(self.scan_srcloc()),
500
                // all ascii whitespace
501
0
                Some(' ') | Some('\x09'..='\x0d') => {
502
0
                    self.next_ch();
503
0
                    continue;
504
                }
505
                _ => {
506
                    // Skip invalid char, return error.
507
0
                    self.next_ch();
508
0
                    Some(error(LexError::InvalidChar, loc))
509
                }
510
            };
511
        }
512
0
    }
513
}
514
515
#[cfg(test)]
516
mod tests {
517
    use super::trailing_digits;
518
    use super::*;
519
    use crate::error::Location;
520
    use cranelift_codegen::ir::types;
521
    use cranelift_codegen::ir::{Block, Value};
522
523
    #[test]
524
    fn digits() {
525
        assert_eq!(trailing_digits(""), 0);
526
        assert_eq!(trailing_digits("x"), 0);
527
        assert_eq!(trailing_digits("0x"), 0);
528
        assert_eq!(trailing_digits("x1"), 1);
529
        assert_eq!(trailing_digits("1x1"), 1);
530
        assert_eq!(trailing_digits("1x01"), 2);
531
    }
532
533
    #[test]
534
    fn entity_name() {
535
        assert_eq!(split_entity_name(""), None);
536
        assert_eq!(split_entity_name("x"), None);
537
        assert_eq!(split_entity_name("x+"), None);
538
        assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
539
        assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
540
        assert_eq!(split_entity_name("1"), Some(("", 1)));
541
        assert_eq!(split_entity_name("x1"), Some(("x", 1)));
542
        assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
543
        // Reject this non-canonical form.
544
        assert_eq!(split_entity_name("inst01"), None);
545
    }
546
547
    fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
548
        Some(super::token(token, Location { line_number: line }))
549
    }
550
551
    fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
552
        Some(super::error(error, Location { line_number: line }))
553
    }
554
555
    #[test]
556
    fn make_lexer() {
557
        let mut l1 = Lexer::new("");
558
        let mut l2 = Lexer::new(" ");
559
        let mut l3 = Lexer::new("\n ");
560
561
        assert_eq!(l1.next(), None);
562
        assert_eq!(l2.next(), None);
563
        assert_eq!(l3.next(), None);
564
    }
565
566
    #[test]
567
    fn lex_comment() {
568
        let mut lex = Lexer::new("; hello");
569
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
570
        assert_eq!(lex.next(), None);
571
572
        lex = Lexer::new("\n  ;hello\n;foo");
573
        assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
574
        assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
575
        assert_eq!(lex.next(), None);
576
577
        // Scan a comment after an invalid char.
578
        let mut lex = Lexer::new("$; hello");
579
        assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
580
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
581
        assert_eq!(lex.next(), None);
582
    }
583
584
    #[test]
585
    fn lex_chars() {
586
        let mut lex = Lexer::new("(); hello\n = :{, }.");
587
        assert_eq!(lex.next(), token(Token::LPar, 1));
588
        assert_eq!(lex.next(), token(Token::RPar, 1));
589
        assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
590
        assert_eq!(lex.next(), token(Token::Equal, 2));
591
        assert_eq!(lex.next(), token(Token::Colon, 2));
592
        assert_eq!(lex.next(), token(Token::LBrace, 2));
593
        assert_eq!(lex.next(), token(Token::Comma, 2));
594
        assert_eq!(lex.next(), token(Token::RBrace, 2));
595
        assert_eq!(lex.next(), token(Token::Dot, 2));
596
        assert_eq!(lex.next(), None);
597
    }
598
599
    #[test]
600
    fn lex_numbers() {
601
        let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
602
        assert_eq!(lex.next(), token(Token::Integer("0"), 1));
603
        assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
604
        assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
605
        assert_eq!(lex.next(), token(Token::Comma, 1));
606
        assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
607
        assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
608
        assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
609
        assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
610
        assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
611
        assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
612
        assert_eq!(lex.next(), None);
613
    }
614
615
    #[test]
616
    fn lex_identifiers() {
617
        let mut lex = Lexer::new(
618
            "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
619
             function0 function i8 i32x4 f32x5",
620
        );
621
        assert_eq!(
622
            lex.next(),
623
            token(Token::Value(Value::with_number(0).unwrap()), 1)
624
        );
625
        assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
626
        assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
627
        assert_eq!(
628
            lex.next(),
629
            token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
630
        );
631
        assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
632
        assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
633
        assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
634
        assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
635
        assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
636
        assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
637
        assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
638
        assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
639
        assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
640
        assert_eq!(lex.next(), None);
641
    }
642
643
    #[test]
644
    fn lex_hex_sequences() {
645
        let mut lex = Lexer::new("#0 #DEADbeef123 #789");
646
647
        assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
648
        assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
649
        assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
650
    }
651
652
    #[test]
653
    fn lex_names() {
654
        let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
655
656
        assert_eq!(lex.next(), token(Token::Name("0"), 1));
657
        assert_eq!(lex.next(), token(Token::Name("x3"), 1));
658
        assert_eq!(lex.next(), token(Token::Name("function"), 1));
659
        assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
660
        assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
661
        assert_eq!(lex.next(), token(Token::Name("v3"), 1));
662
        assert_eq!(lex.next(), token(Token::Name("block11"), 1));
663
        assert_eq!(lex.next(), token(Token::Name("const42"), 1));
664
        assert_eq!(lex.next(), token(Token::Name("_"), 1));
665
    }
666
667
    #[test]
668
    fn lex_strings() {
669
        let mut lex = Lexer::new(
670
            r#"""  "0" "x3""function" "123 abc" "\" "start
671
                    and end on
672
                    different lines" "#,
673
        );
674
675
        assert_eq!(lex.next(), token(Token::String(""), 1));
676
        assert_eq!(lex.next(), token(Token::String("0"), 1));
677
        assert_eq!(lex.next(), token(Token::String("x3"), 1));
678
        assert_eq!(lex.next(), token(Token::String("function"), 1));
679
        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
680
        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
681
        assert_eq!(
682
            lex.next(),
683
            token(
684
                Token::String(
685
                    r#"start
686
                    and end on
687
                    different lines"#
688
                ),
689
                1
690
            )
691
        );
692
    }
693
694
    #[test]
695
    fn lex_userrefs() {
696
        let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
697
698
        assert_eq!(lex.next(), token(Token::UserRef(0), 1));
699
        assert_eq!(lex.next(), token(Token::UserRef(1), 1));
700
        assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
701
        assert_eq!(lex.next(), token(Token::UserRef(9), 1));
702
        assert_eq!(lex.next(), token(Token::Colon, 1));
703
        assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
704
        assert_eq!(lex.next(), None);
705
    }
706
}