/src/wabt/src/wast-lexer.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2016 WebAssembly Community Group participants |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include "wabt/wast-lexer.h" |
18 | | |
19 | | #include <cassert> |
20 | | #include <cstdio> |
21 | | |
22 | | #include "wabt/config.h" |
23 | | |
24 | | #include "wabt/lexer-source.h" |
25 | | |
26 | 25.6M | #define ERROR(...) Error(GetLocation(), __VA_ARGS__) |
27 | | |
28 | | namespace wabt { |
29 | | |
30 | | namespace { |
31 | | |
32 | | #if __clang__ |
33 | | #pragma clang diagnostic push |
34 | | #pragma clang diagnostic ignored "-Wimplicit-fallthrough" |
35 | | #endif |
36 | | #include "prebuilt/lexer-keywords.cc" |
37 | | #if __clang__ |
38 | | #pragma clang diagnostic pop |
39 | | #endif |
40 | | |
41 | | } // namespace |
42 | | |
43 | | WastLexer::WastLexer(std::unique_ptr<LexerSource> source, |
44 | | std::string_view filename, |
45 | | Errors* errors) |
46 | 23.5k | : source_(std::move(source)), |
47 | 23.5k | filename_(filename), |
48 | 23.5k | line_(1), |
49 | 23.5k | buffer_(static_cast<const char*>(source_->data())), |
50 | 23.5k | buffer_end_(buffer_ + source_->size()), |
51 | 23.5k | line_start_(buffer_), |
52 | 23.5k | token_start_(buffer_), |
53 | 23.5k | cursor_(buffer_), |
54 | 23.5k | errors_(errors) {} |
55 | | |
56 | | // static |
57 | | std::unique_ptr<WastLexer> WastLexer::CreateBufferLexer( |
58 | | std::string_view filename, |
59 | | const void* data, |
60 | | size_t size, |
61 | 23.5k | Errors* errors) { |
62 | 23.5k | return std::make_unique<WastLexer>(std::make_unique<LexerSource>(data, size), |
63 | 23.5k | filename, errors); |
64 | 23.5k | } |
65 | | |
66 | 24.3M | Token WastLexer::GetToken() { |
67 | 43.5M | while (true) { |
68 | 43.5M | token_start_ = cursor_; |
69 | 43.5M | switch (PeekChar()) { |
70 | 1.49M | case kEof: |
71 | 1.49M | return BareToken(TokenType::Eof); |
72 | | |
73 | 7.98M | case '(': |
74 | 7.98M | if (MatchString("(;")) { |
75 | 6.13k | if (ReadBlockComment()) { |
76 | 5.98k | continue; |
77 | 5.98k | } |
78 | 145 | return BareToken(TokenType::Eof); |
79 | 7.97M | } else if (MatchString("(@")) { |
80 | 1.31M | GetIdChars(); |
81 | | // offset=2 to skip the "(@" prefix |
82 | 1.31M | return TextToken(TokenType::LparAnn, 2); |
83 | 6.66M | } else { |
84 | 6.66M | ReadChar(); |
85 | 6.66M | return BareToken(TokenType::Lpar); |
86 | 6.66M | } |
87 | 0 | break; |
88 | | |
89 | 1.83M | case ')': |
90 | 1.83M | ReadChar(); |
91 | 1.83M | return BareToken(TokenType::Rpar); |
92 | | |
93 | 395k | case ';': |
94 | 395k | if (MatchString(";;")) { |
95 | 1.15k | if (ReadLineComment()) { |
96 | 1.13k | continue; |
97 | 1.13k | } |
98 | 15 | return BareToken(TokenType::Eof); |
99 | 394k | } else { |
100 | 394k | ReadChar(); |
101 | 394k | ERROR("unexpected char"); |
102 | 394k | continue; |
103 | 394k | } |
104 | 0 | break; |
105 | | |
106 | 206k | case ' ': |
107 | 234k | case '\t': |
108 | 359k | case '\r': |
109 | 981k | case '\n': |
110 | 981k | ReadWhitespace(); |
111 | 981k | continue; |
112 | | |
113 | 569k | case '"': |
114 | 569k | return GetStringToken(); |
115 | | |
116 | 76.7k | case '+': |
117 | 153k | case '-': |
118 | 153k | ReadChar(); |
119 | 153k | switch (PeekChar()) { |
120 | 2.02k | case 'i': |
121 | 2.02k | return GetInfToken(); |
122 | | |
123 | 11.5k | case 'n': |
124 | 11.5k | return GetNanToken(); |
125 | | |
126 | 6.60k | case '0': |
127 | 6.60k | return MatchString("0x") ? GetHexNumberToken(TokenType::Int) |
128 | 6.60k | : GetNumberToken(TokenType::Int); |
129 | 9.04k | case '1': |
130 | 10.7k | case '2': |
131 | 22.6k | case '3': |
132 | 30.8k | case '4': |
133 | 31.1k | case '5': |
134 | 31.7k | case '6': |
135 | 33.4k | case '7': |
136 | 35.6k | case '8': |
137 | 36.0k | case '9': |
138 | 36.0k | return GetNumberToken(TokenType::Int); |
139 | | |
140 | 96.7k | default: |
141 | 96.7k | return GetReservedToken(); |
142 | 153k | } |
143 | 0 | break; |
144 | | |
145 | 648k | case '0': |
146 | 648k | return MatchString("0x") ? GetHexNumberToken(TokenType::Nat) |
147 | 648k | : GetNumberToken(TokenType::Nat); |
148 | | |
149 | 226k | case '1': |
150 | 294k | case '2': |
151 | 596k | case '3': |
152 | 620k | case '4': |
153 | 639k | case '5': |
154 | 667k | case '6': |
155 | 678k | case '7': |
156 | 771k | case '8': |
157 | 775k | case '9': |
158 | 775k | return GetNumberToken(TokenType::Nat); |
159 | | |
160 | 2.40M | case '$': |
161 | 2.40M | return GetIdChars(); // Initial $ is idchar, so this produces id token |
162 | | |
163 | 77.3k | case 'a': |
164 | 77.3k | return GetNameEqNumToken("align=", TokenType::AlignEqNat); |
165 | | |
166 | 1.27M | case 'i': |
167 | 1.27M | return GetInfToken(); |
168 | | |
169 | 369k | case 'n': |
170 | 369k | return GetNanToken(); |
171 | | |
172 | 58.8k | case 'o': |
173 | 58.8k | return GetNameEqNumToken("offset=", TokenType::OffsetEqNat); |
174 | | |
175 | 24.5M | default: |
176 | 24.5M | if (IsKeyword(PeekChar())) { |
177 | 6.30M | return GetKeywordToken(); |
178 | 18.2M | } else if (IsIdChar(PeekChar())) { |
179 | 351k | return GetReservedToken(); |
180 | 17.8M | } else { |
181 | 17.8M | ReadChar(); |
182 | 17.8M | ERROR("unexpected char"); |
183 | 17.8M | continue; |
184 | 17.8M | } |
185 | 43.5M | } |
186 | 43.5M | } |
187 | 24.3M | } |
188 | | |
189 | 52.5M | Location WastLexer::GetLocation() { |
190 | 105M | auto column = [this](const char* p) { |
191 | 105M | return std::max(1, static_cast<int>(p - line_start_ + 1)); |
192 | 105M | }; |
193 | 52.5M | return Location(filename_, line_, column(token_start_), column(cursor_)); |
194 | 52.5M | } |
195 | | |
196 | 9.19M | std::string_view WastLexer::GetText(size_t offset) { |
197 | | // Bounds checks are necessary because token_start may have been moved |
198 | | // (e.g. if GetStringToken found a newline and reset token_start to |
199 | | // point at it). |
200 | | |
201 | 9.19M | if (token_start_ + offset >= buffer_end_) |
202 | 104 | return {}; |
203 | | |
204 | 9.19M | if (cursor_ <= token_start_ + offset) |
205 | 1.25M | return {}; |
206 | | |
207 | 7.93M | return std::string_view(token_start_ + offset, |
208 | 7.93M | (cursor_ - token_start_) - offset); |
209 | 9.19M | } |
210 | | |
211 | 12.2M | Token WastLexer::BareToken(TokenType token_type) { |
212 | 12.2M | return Token(GetLocation(), token_type); |
213 | 12.2M | } |
214 | | |
215 | 1.40M | Token WastLexer::LiteralToken(TokenType token_type, LiteralType literal_type) { |
216 | 1.40M | return Token(GetLocation(), token_type, Literal(literal_type, GetText())); |
217 | 1.40M | } |
218 | | |
219 | 7.79M | Token WastLexer::TextToken(TokenType token_type, size_t offset) { |
220 | 7.79M | return Token(GetLocation(), token_type, GetText(offset)); |
221 | 7.79M | } |
222 | | |
223 | 373M | int WastLexer::PeekChar() { |
224 | 373M | return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_) : kEof; |
225 | 373M | } |
226 | | |
227 | 338M | int WastLexer::ReadChar() { |
228 | 338M | return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_++) : kEof; |
229 | 338M | } |
230 | | |
231 | 22.8M | bool WastLexer::MatchChar(char c) { |
232 | 22.8M | if (PeekChar() == c) { |
233 | 1.08M | ReadChar(); |
234 | 1.08M | return true; |
235 | 1.08M | } |
236 | 21.7M | return false; |
237 | 22.8M | } |
238 | | |
239 | 18.8M | bool WastLexer::MatchString(std::string_view s) { |
240 | 18.8M | const char* saved_cursor = cursor_; |
241 | 37.9M | for (char c : s) { |
242 | 37.9M | if (ReadChar() != c) { |
243 | 17.1M | cursor_ = saved_cursor; |
244 | 17.1M | return false; |
245 | 17.1M | } |
246 | 37.9M | } |
247 | 1.73M | return true; |
248 | 18.8M | } |
249 | | |
250 | 74.1M | void WastLexer::Newline() { |
251 | 74.1M | line_++; |
252 | 74.1M | line_start_ = cursor_; |
253 | 74.1M | } |
254 | | |
255 | 6.13k | bool WastLexer::ReadBlockComment() { |
256 | 6.13k | int nesting = 1; |
257 | 2.88M | while (true) { |
258 | 2.88M | switch (ReadChar()) { |
259 | 145 | case kEof: |
260 | 145 | ERROR("EOF in block comment"); |
261 | 145 | return false; |
262 | | |
263 | 116k | case ';': |
264 | 116k | if (MatchChar(')') && --nesting == 0) { |
265 | 5.98k | return true; |
266 | 5.98k | } |
267 | 110k | break; |
268 | | |
269 | 678k | case '(': |
270 | 678k | if (MatchChar(';')) { |
271 | 589k | nesting++; |
272 | 589k | } |
273 | 678k | break; |
274 | | |
275 | 607k | case '\n': |
276 | 607k | Newline(); |
277 | 607k | break; |
278 | 2.88M | } |
279 | 2.88M | } |
280 | 6.13k | } |
281 | | |
282 | 1.15k | bool WastLexer::ReadLineComment() { |
283 | 2.01k | while (true) { |
284 | 2.01k | switch (ReadChar()) { |
285 | 15 | case kEof: |
286 | 15 | return false; |
287 | | |
288 | 900 | case '\r': |
289 | 900 | if (PeekChar() == '\n') { |
290 | 415 | ReadChar(); |
291 | 415 | } |
292 | 900 | Newline(); |
293 | 900 | return true; |
294 | | |
295 | 235 | case '\n': |
296 | 235 | Newline(); |
297 | 235 | return true; |
298 | 2.01k | } |
299 | 2.01k | } |
300 | 1.15k | } |
301 | | |
302 | 981k | void WastLexer::ReadWhitespace() { |
303 | 68.6M | while (true) { |
304 | 68.6M | switch (PeekChar()) { |
305 | 248k | case ' ': |
306 | 331k | case '\t': |
307 | 463k | case '\r': |
308 | 463k | ReadChar(); |
309 | 463k | break; |
310 | | |
311 | 67.2M | case '\n': |
312 | 67.2M | ReadChar(); |
313 | 67.2M | Newline(); |
314 | 67.2M | break; |
315 | | |
316 | 981k | default: |
317 | 981k | return; |
318 | 68.6M | } |
319 | 68.6M | } |
320 | 981k | } |
321 | | |
322 | 1.83M | Token WastLexer::GetStringToken() { |
323 | 1.83M | const char* saved_token_start = token_start_; |
324 | 1.83M | bool has_error = false; |
325 | 1.83M | bool in_string = true; |
326 | 1.83M | ReadChar(); |
327 | 41.9M | while (in_string) { |
328 | 40.1M | switch (ReadChar()) { |
329 | 912 | case kEof: |
330 | 912 | return BareToken(TokenType::Eof); |
331 | | |
332 | 6.27M | case '\n': |
333 | 6.27M | token_start_ = cursor_ - 1; |
334 | 6.27M | ERROR("newline in string"); |
335 | 6.27M | has_error = true; |
336 | 6.27M | Newline(); |
337 | 6.27M | continue; |
338 | | |
339 | 1.83M | case '"': |
340 | 1.83M | if (PeekChar() == '"') { |
341 | 1.09M | ERROR("invalid string token"); |
342 | 1.09M | has_error = true; |
343 | 1.09M | } |
344 | 1.83M | in_string = false; |
345 | 1.83M | break; |
346 | | |
347 | 142k | case '\\': { |
348 | 142k | switch (ReadChar()) { |
349 | 995 | case 't': |
350 | 3.12k | case 'n': |
351 | 4.13k | case 'r': |
352 | 5.23k | case '"': |
353 | 6.08k | case '\'': |
354 | 14.6k | case '\\': |
355 | | // Valid escape. |
356 | 14.6k | break; |
357 | | |
358 | 475 | case '0': |
359 | 75.6k | case '1': |
360 | 76.3k | case '2': |
361 | 77.0k | case '3': |
362 | 77.4k | case '4': |
363 | 77.8k | case '5': |
364 | 81.9k | case '6': |
365 | 83.5k | case '7': |
366 | 83.8k | case '8': |
367 | 84.0k | case '9': |
368 | 91.0k | case 'a': |
369 | 94.4k | case 'b': |
370 | 102k | case 'c': |
371 | 102k | case 'd': |
372 | 102k | case 'e': |
373 | 103k | case 'f': |
374 | 103k | case 'A': |
375 | 103k | case 'B': |
376 | 104k | case 'C': |
377 | 104k | case 'D': |
378 | 105k | case 'E': |
379 | 105k | case 'F': // Hex byte escape. |
380 | 105k | if (IsHexDigit(PeekChar())) { |
381 | 87.4k | ReadChar(); |
382 | 87.4k | } else { |
383 | 17.9k | token_start_ = cursor_ - 2; |
384 | 17.9k | goto error; |
385 | 17.9k | } |
386 | 87.4k | break; |
387 | | |
388 | 87.4k | case 'u': { |
389 | 16.3k | token_start_ = cursor_ - 2; |
390 | 16.3k | if (ReadChar() != '{') { |
391 | 919 | goto error; |
392 | 919 | } |
393 | | |
394 | | // Value must be a valid unicode scalar value. |
395 | 15.4k | uint32_t digit; |
396 | 15.4k | uint32_t scalar_value = 0; |
397 | | |
398 | 231k | while (IsHexDigit(PeekChar())) { |
399 | 219k | ParseHexdigit(*cursor_++, &digit); |
400 | | |
401 | 219k | scalar_value = (scalar_value << 4) | digit; |
402 | | // Maximum value of a unicode code point. |
403 | 219k | if (scalar_value >= 0x110000) { |
404 | 3.95k | goto error; |
405 | 3.95k | } |
406 | 219k | } |
407 | | |
408 | 11.4k | if (PeekChar() != '}') { |
409 | 3.85k | goto error; |
410 | 3.85k | } |
411 | | |
412 | | // Scalars between 0xd800 and 0xdfff are not allowed. |
413 | 7.61k | if ((scalar_value >= 0xd800 && scalar_value < 0xe000) || |
414 | 7.61k | token_start_ == cursor_ - 3) { |
415 | 408 | ReadChar(); |
416 | 408 | goto error; |
417 | 408 | } |
418 | 7.21k | break; |
419 | 7.61k | } |
420 | | |
421 | 7.21k | default: |
422 | 5.65k | token_start_ = cursor_ - 2; |
423 | 5.65k | goto error; |
424 | | |
425 | 32.7k | error: |
426 | 32.7k | ERROR("bad escape \"%.*s\"", |
427 | 32.7k | static_cast<int>(cursor_ - token_start_), token_start_); |
428 | 32.7k | has_error = true; |
429 | 32.7k | break; |
430 | 142k | } |
431 | 142k | break; |
432 | 142k | } |
433 | 40.1M | } |
434 | 40.1M | } |
435 | 1.83M | token_start_ = saved_token_start; |
436 | 1.83M | if (has_error) { |
437 | 1.09M | return Token(GetLocation(), TokenType::Invalid); |
438 | 1.09M | } |
439 | | |
440 | 739k | return TextToken(TokenType::Text); |
441 | 1.83M | } |
442 | | |
443 | | // static |
444 | 235M | bool WastLexer::IsCharClass(int c, CharClass bit) { |
445 | | // Generated by the following python script: |
446 | | // |
447 | | // def Range(c, lo, hi): return lo <= c <= hi |
448 | | // def IsDigit(c): return Range(c, '0', '9') |
449 | | // def IsHexDigit(c): return IsDigit(c) or Range(c.lower(), 'a', 'f') |
450 | | // def IsKeyword(c): return Range(c, 'a', 'z') |
451 | | // def IsIdChar(c): return Range(c, '!', '~') and c not in '"(),;[]{}' |
452 | | // |
453 | | // print ([0] + [ |
454 | | // (8 if IsDigit(c) else 0) | |
455 | | // (4 if IsHexDigit(c) else 0) | |
456 | | // (2 if IsKeyword(c) else 0) | |
457 | | // (1 if IsIdChar(c) else 0) |
458 | | // for c in map(chr, range(0, 127)) |
459 | | // ]) |
460 | 235M | static const char kCharClasses[257] = { |
461 | 235M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
462 | 235M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, |
463 | 235M | 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, |
464 | 235M | 13, 13, 1, 0, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, |
465 | 235M | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, |
466 | 235M | 1, 1, 1, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
467 | 235M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 0, 1, |
468 | 235M | }; |
469 | | |
470 | 235M | assert(c >= -1 && c < 256); |
471 | 235M | return (kCharClasses[c + 1] & static_cast<int>(bit)) != 0; |
472 | 235M | } |
473 | | |
474 | 9.91M | bool WastLexer::ReadNum() { |
475 | 9.91M | if (IsDigit(PeekChar())) { |
476 | 9.89M | ReadChar(); |
477 | 9.89M | return MatchChar('_') || IsDigit(PeekChar()) ? ReadNum() : true; |
478 | 9.89M | } |
479 | 16.6k | return false; |
480 | 9.91M | } |
481 | | |
482 | 7.76M | bool WastLexer::ReadHexNum() { |
483 | 7.76M | if (IsHexDigit(PeekChar())) { |
484 | 7.76M | ReadChar(); |
485 | 7.76M | return MatchChar('_') || IsHexDigit(PeekChar()) ? ReadHexNum() : true; |
486 | 7.76M | } |
487 | 4.50k | return false; |
488 | 7.76M | } |
489 | | |
490 | 13.8M | WastLexer::ReservedChars WastLexer::ReadReservedChars() { |
491 | 13.8M | ReservedChars ret{ReservedChars::None}; |
492 | 157M | while (true) { |
493 | 157M | auto peek = PeekChar(); |
494 | 157M | if (IsIdChar(peek)) { |
495 | 142M | ReadChar(); |
496 | 142M | if (ret == ReservedChars::None) { |
497 | 10.9M | ret = ReservedChars::Id; |
498 | 10.9M | } |
499 | 142M | } else if (peek == '"') { |
500 | 1.26M | GetStringToken(); |
501 | 1.26M | ret = ReservedChars::Some; |
502 | 13.8M | } else { |
503 | 13.8M | break; |
504 | 13.8M | } |
505 | 157M | } |
506 | 13.8M | return ret; |
507 | 13.8M | } |
508 | | |
509 | 95.4k | void WastLexer::ReadSign() { |
510 | 95.4k | if (PeekChar() == '+' || PeekChar() == '-') { |
511 | 71.1k | ReadChar(); |
512 | 71.1k | } |
513 | 95.4k | } |
514 | | |
515 | 1.22M | Token WastLexer::GetNumberToken(TokenType token_type) { |
516 | 1.22M | if (ReadNum()) { |
517 | 1.22M | if (MatchChar('.')) { |
518 | 121k | token_type = TokenType::Float; |
519 | 121k | if (IsDigit(PeekChar()) && !ReadNum()) { |
520 | 459 | return GetReservedToken(); |
521 | 459 | } |
522 | 121k | } |
523 | 1.22M | if (MatchChar('e') || MatchChar('E')) { |
524 | 17.3k | token_type = TokenType::Float; |
525 | 17.3k | ReadSign(); |
526 | 17.3k | if (!ReadNum()) { |
527 | 3.08k | return GetReservedToken(); |
528 | 3.08k | } |
529 | 17.3k | } |
530 | 1.22M | if (NoTrailingReservedChars()) { |
531 | 1.10M | if (token_type == TokenType::Float) { |
532 | 113k | return LiteralToken(token_type, LiteralType::Float); |
533 | 994k | } else { |
534 | 994k | return LiteralToken(token_type, LiteralType::Int); |
535 | 994k | } |
536 | 1.10M | } |
537 | 1.22M | } |
538 | 116k | return GetReservedToken(); |
539 | 1.22M | } |
540 | | |
541 | 237k | Token WastLexer::GetHexNumberToken(TokenType token_type) { |
542 | 237k | if (ReadHexNum()) { |
543 | 235k | if (MatchChar('.')) { |
544 | 90.2k | token_type = TokenType::Float; |
545 | 90.2k | if (IsHexDigit(PeekChar()) && !ReadHexNum()) { |
546 | 266 | return GetReservedToken(); |
547 | 266 | } |
548 | 90.2k | } |
549 | 234k | if (MatchChar('p') || MatchChar('P')) { |
550 | 78.0k | token_type = TokenType::Float; |
551 | 78.0k | ReadSign(); |
552 | 78.0k | if (!ReadNum()) { |
553 | 10.9k | return GetReservedToken(); |
554 | 10.9k | } |
555 | 78.0k | } |
556 | 224k | if (NoTrailingReservedChars()) { |
557 | 197k | if (token_type == TokenType::Float) { |
558 | 141k | return LiteralToken(token_type, LiteralType::Hexfloat); |
559 | 141k | } else { |
560 | 56.2k | return LiteralToken(token_type, LiteralType::Int); |
561 | 56.2k | } |
562 | 197k | } |
563 | 224k | } |
564 | 29.0k | return GetReservedToken(); |
565 | 237k | } |
566 | | |
567 | 1.28M | Token WastLexer::GetInfToken() { |
568 | 1.28M | if (MatchString("inf")) { |
569 | 58.6k | if (NoTrailingReservedChars()) { |
570 | 57.2k | return LiteralToken(TokenType::Float, LiteralType::Infinity); |
571 | 57.2k | } |
572 | 1.39k | return GetReservedToken(); |
573 | 58.6k | } |
574 | 1.22M | return GetKeywordToken(); |
575 | 1.28M | } |
576 | | |
577 | 380k | Token WastLexer::GetNanToken() { |
578 | 380k | if (MatchString("nan")) { |
579 | 41.5k | if (MatchChar(':')) { |
580 | 32.4k | if (MatchString("0x") && ReadHexNum() && NoTrailingReservedChars()) { |
581 | 28.5k | return LiteralToken(TokenType::Float, LiteralType::Nan); |
582 | 28.5k | } |
583 | 32.4k | } else if (NoTrailingReservedChars()) { |
584 | 8.30k | return LiteralToken(TokenType::Float, LiteralType::Nan); |
585 | 8.30k | } |
586 | 41.5k | } |
587 | 343k | return GetKeywordToken(); |
588 | 380k | } |
589 | | |
590 | | Token WastLexer::GetNameEqNumToken(std::string_view name, |
591 | 136k | TokenType token_type) { |
592 | 136k | if (MatchString(name)) { |
593 | 30.4k | if (MatchString("0x")) { |
594 | 11.4k | if (ReadHexNum() && NoTrailingReservedChars()) { |
595 | 8.93k | return TextToken(token_type, name.size()); |
596 | 8.93k | } |
597 | 19.0k | } else if (ReadNum() && NoTrailingReservedChars()) { |
598 | 12.9k | return TextToken(token_type, name.size()); |
599 | 12.9k | } |
600 | 30.4k | } |
601 | 114k | return GetKeywordToken(); |
602 | 136k | } |
603 | | |
604 | 3.72M | Token WastLexer::GetIdChars() { |
605 | 3.72M | if (ReadReservedChars() == ReservedChars::Id) { |
606 | 2.46M | return TextToken(TokenType::Var); |
607 | 2.46M | } |
608 | | |
609 | 1.25M | return TextToken(TokenType::Reserved); |
610 | 3.72M | } |
611 | | |
612 | 7.98M | Token WastLexer::GetKeywordToken() { |
613 | 7.98M | ReadReservedChars(); |
614 | 7.98M | TokenInfo* info = |
615 | 7.98M | Perfect_Hash::InWordSet(token_start_, cursor_ - token_start_); |
616 | 7.98M | if (!info) { |
617 | 1.38M | return TextToken(TokenType::Reserved); |
618 | 1.38M | } |
619 | 6.60M | if (IsTokenTypeBare(info->token_type)) { |
620 | 2.22M | return BareToken(info->token_type); |
621 | 4.37M | } else if (IsTokenTypeType(info->token_type) || |
622 | 4.37M | IsTokenTypeRefKind(info->token_type)) { |
623 | 3.00M | return Token(GetLocation(), info->token_type, info->value_type); |
624 | 3.00M | } else { |
625 | 1.37M | assert(IsTokenTypeOpcode(info->token_type)); |
626 | 1.37M | return Token(GetLocation(), info->token_type, info->opcode); |
627 | 1.37M | } |
628 | 6.60M | } |
629 | | |
630 | 609k | Token WastLexer::GetReservedToken() { |
631 | 609k | ReadReservedChars(); |
632 | 609k | return TextToken(TokenType::Reserved); |
633 | 609k | } |
634 | | |
635 | 25.6M | void WastLexer::Error(Location loc, const char* format, ...) { |
636 | 25.6M | WABT_SNPRINTF_ALLOCA(buffer, length, format); |
637 | 25.6M | errors_->emplace_back(ErrorLevel::Error, loc, buffer); |
638 | 25.6M | } |
639 | | |
640 | | } // namespace wabt |