/src/wabt/src/wast-lexer.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright 2016 WebAssembly Community Group participants |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include "wabt/wast-lexer.h" |
18 | | |
19 | | #include <cassert> |
20 | | #include <cstdio> |
21 | | |
22 | | #include "wabt/config.h" |
23 | | |
24 | | #include "wabt/lexer-source.h" |
25 | | |
26 | 23.5M | #define ERROR(...) Error(GetLocation(), __VA_ARGS__) |
27 | | |
28 | | namespace wabt { |
29 | | |
30 | | namespace { |
31 | | |
32 | | #if __clang__ |
33 | | #pragma clang diagnostic push |
34 | | #pragma clang diagnostic ignored "-Wimplicit-fallthrough" |
35 | | #endif |
36 | | #include "prebuilt/lexer-keywords.cc" |
37 | | #if __clang__ |
38 | | #pragma clang diagnostic pop |
39 | | #endif |
40 | | |
41 | | } // namespace |
42 | | |
43 | | WastLexer::WastLexer(std::unique_ptr<LexerSource> source, |
44 | | std::string_view filename, |
45 | | Errors* errors) |
46 | 22.8k | : source_(std::move(source)), |
47 | 22.8k | filename_(filename), |
48 | 22.8k | line_(1), |
49 | 22.8k | buffer_(static_cast<const char*>(source_->data())), |
50 | 22.8k | buffer_end_(buffer_ + source_->size()), |
51 | 22.8k | line_start_(buffer_), |
52 | 22.8k | token_start_(buffer_), |
53 | 22.8k | cursor_(buffer_), |
54 | 22.8k | errors_(errors) {} |
55 | | |
56 | | // static |
57 | | std::unique_ptr<WastLexer> WastLexer::CreateBufferLexer( |
58 | | std::string_view filename, |
59 | | const void* data, |
60 | | size_t size, |
61 | 22.8k | Errors* errors) { |
62 | 22.8k | return std::make_unique<WastLexer>(std::make_unique<LexerSource>(data, size), |
63 | 22.8k | filename, errors); |
64 | 22.8k | } |
65 | | |
66 | 27.9M | Token WastLexer::GetToken() { |
67 | 48.5M | while (true) { |
68 | 48.5M | token_start_ = cursor_; |
69 | 48.5M | switch (PeekChar()) { |
70 | 1.97M | case kEof: |
71 | 1.97M | return BareToken(TokenType::Eof); |
72 | | |
73 | 8.08M | case '(': |
74 | 8.08M | if (MatchString("(;")) { |
75 | 392 | if (ReadBlockComment()) { |
76 | 256 | continue; |
77 | 256 | } |
78 | 136 | return BareToken(TokenType::Eof); |
79 | 8.08M | } else if (MatchString("(@")) { |
80 | 825k | GetIdChars(); |
81 | | // offset=2 to skip the "(@" prefix |
82 | 825k | return TextToken(TokenType::LparAnn, 2); |
83 | 7.26M | } else { |
84 | 7.26M | ReadChar(); |
85 | 7.26M | return BareToken(TokenType::Lpar); |
86 | 7.26M | } |
87 | 0 | break; |
88 | | |
89 | 2.56M | case ')': |
90 | 2.56M | ReadChar(); |
91 | 2.56M | return BareToken(TokenType::Rpar); |
92 | | |
93 | 286k | case ';': |
94 | 286k | if (MatchString(";;")) { |
95 | 1.19k | if (ReadLineComment()) { |
96 | 1.17k | continue; |
97 | 1.17k | } |
98 | 14 | return BareToken(TokenType::Eof); |
99 | 285k | } else { |
100 | 285k | ReadChar(); |
101 | 285k | ERROR("unexpected char"); |
102 | 285k | continue; |
103 | 285k | } |
104 | 0 | break; |
105 | | |
106 | 200k | case ' ': |
107 | 262k | case '\t': |
108 | 310k | case '\r': |
109 | 2.05M | case '\n': |
110 | 2.05M | ReadWhitespace(); |
111 | 2.05M | continue; |
112 | | |
113 | 523k | case '"': |
114 | 523k | return GetStringToken(TokenType::Text); |
115 | | |
116 | 24.3k | case '+': |
117 | 88.3k | case '-': |
118 | 88.3k | ReadChar(); |
119 | 88.3k | switch (PeekChar()) { |
120 | 2.48k | case 'i': |
121 | 2.48k | return GetInfToken(); |
122 | | |
123 | 9.48k | case 'n': |
124 | 9.48k | return GetNanToken(); |
125 | | |
126 | 36.8k | case '0': |
127 | 36.8k | return MatchString("0x") ? GetHexNumberToken(TokenType::Int) |
128 | 36.8k | : GetNumberToken(TokenType::Int); |
129 | 4.64k | case '1': |
130 | 6.58k | case '2': |
131 | 11.0k | case '3': |
132 | 14.6k | case '4': |
133 | 15.2k | case '5': |
134 | 15.9k | case '6': |
135 | 16.7k | case '7': |
136 | 18.9k | case '8': |
137 | 19.6k | case '9': |
138 | 19.6k | return GetNumberToken(TokenType::Int); |
139 | | |
140 | 19.8k | default: |
141 | 19.8k | return GetReservedToken(); |
142 | 88.3k | } |
143 | 0 | break; |
144 | | |
145 | 507k | case '0': |
146 | 507k | return MatchString("0x") ? GetHexNumberToken(TokenType::Nat) |
147 | 507k | : GetNumberToken(TokenType::Nat); |
148 | | |
149 | 232k | case '1': |
150 | 312k | case '2': |
151 | 487k | case '3': |
152 | 543k | case '4': |
153 | 561k | case '5': |
154 | 602k | case '6': |
155 | 618k | case '7': |
156 | 670k | case '8': |
157 | 679k | case '9': |
158 | 679k | return GetNumberToken(TokenType::Nat); |
159 | | |
160 | 3.73M | case '$': |
161 | 3.73M | ReadChar(); |
162 | 3.73M | if (PeekChar() == '"') { |
163 | 729 | return GetStringToken(TokenType::Var); |
164 | 729 | } |
165 | 3.73M | return GetIdChars(); // Initial $ is idchar, so this produces id token |
166 | | |
167 | 53.1k | case 'a': |
168 | 53.1k | return GetNameEqNumToken("align=", TokenType::AlignEqNat); |
169 | | |
170 | 1.32M | case 'i': |
171 | 1.32M | return GetInfToken(); |
172 | | |
173 | 314k | case 'n': |
174 | 314k | return GetNanToken(); |
175 | | |
176 | 85.6k | case 'o': |
177 | 85.6k | return GetNameEqNumToken("offset=", TokenType::OffsetEqNat); |
178 | | |
179 | 26.2M | default: |
180 | 26.2M | if (IsKeyword(PeekChar())) { |
181 | 7.68M | return GetKeywordToken(); |
182 | 18.5M | } else if (IsIdChar(PeekChar())) { |
183 | 286k | return GetReservedToken(); |
184 | 18.2M | } else { |
185 | 18.2M | ReadChar(); |
186 | 18.2M | ERROR("unexpected char"); |
187 | 18.2M | continue; |
188 | 18.2M | } |
189 | 48.5M | } |
190 | 48.5M | } |
191 | 27.9M | } |
192 | | |
193 | 52.6M | Location WastLexer::GetLocation() { |
194 | 105M | auto column = [this](const char* p) { |
195 | 105M | return std::max(1, static_cast<int>(p - line_start_ + 1)); |
196 | 105M | }; |
197 | 52.6M | return Location(line_, column(token_start_), column(cursor_)); |
198 | 52.6M | } |
199 | | |
200 | 9.47M | std::string_view WastLexer::GetText(size_t offset) { |
201 | | // Bounds checks are necessary because token_start may have been moved |
202 | | // (e.g. if GetStringToken found a newline and reset token_start to |
203 | | // point at it). |
204 | | |
205 | 9.47M | if (token_start_ + offset >= buffer_end_) |
206 | 65 | return {}; |
207 | | |
208 | 9.47M | if (cursor_ <= token_start_ + offset) |
209 | 798k | return {}; |
210 | | |
211 | 8.67M | return std::string_view(token_start_ + offset, |
212 | 8.67M | (cursor_ - token_start_) - offset); |
213 | 9.47M | } |
214 | | |
215 | 14.6M | Token WastLexer::BareToken(TokenType token_type) { |
216 | 14.6M | return Token(GetLocation(), token_type); |
217 | 14.6M | } |
218 | | |
219 | 1.13M | Token WastLexer::LiteralToken(TokenType token_type, LiteralType literal_type) { |
220 | 1.13M | return Token(GetLocation(), token_type, Literal(literal_type, GetText())); |
221 | 1.13M | } |
222 | | |
223 | 8.33M | Token WastLexer::TextToken(TokenType token_type, size_t offset) { |
224 | 8.33M | return Token(GetLocation(), token_type, GetText(offset)); |
225 | 8.33M | } |
226 | | |
227 | 391M | int WastLexer::PeekChar() { |
228 | 391M | return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_) : kEof; |
229 | 391M | } |
230 | | |
231 | 338M | int WastLexer::ReadChar() { |
232 | 338M | return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_++) : kEof; |
233 | 338M | } |
234 | | |
235 | 19.6M | bool WastLexer::MatchChar(char c) { |
236 | 19.6M | if (PeekChar() == c) { |
237 | 271k | ReadChar(); |
238 | 271k | return true; |
239 | 271k | } |
240 | 19.3M | return false; |
241 | 19.6M | } |
242 | | |
243 | 18.8M | bool WastLexer::MatchString(std::string_view s) { |
244 | 18.8M | const char* saved_cursor = cursor_; |
245 | 38.1M | for (char c : s) { |
246 | 38.1M | if (ReadChar() != c) { |
247 | 17.7M | cursor_ = saved_cursor; |
248 | 17.7M | return false; |
249 | 17.7M | } |
250 | 38.1M | } |
251 | 1.16M | return true; |
252 | 18.8M | } |
253 | | |
254 | 105M | void WastLexer::Newline() { |
255 | 105M | line_++; |
256 | 105M | line_start_ = cursor_; |
257 | 105M | } |
258 | | |
259 | 392 | bool WastLexer::ReadBlockComment() { |
260 | 392 | int nesting = 1; |
261 | 2.40M | while (true) { |
262 | 2.40M | switch (ReadChar()) { |
263 | 136 | case kEof: |
264 | 136 | ERROR("EOF in block comment"); |
265 | 136 | return false; |
266 | | |
267 | 4.70k | case ';': |
268 | 4.70k | if (MatchChar(')') && --nesting == 0) { |
269 | 256 | return true; |
270 | 256 | } |
271 | 4.44k | break; |
272 | | |
273 | 41.2k | case '(': |
274 | 41.2k | if (MatchChar(';')) { |
275 | 292 | nesting++; |
276 | 292 | } |
277 | 41.2k | break; |
278 | | |
279 | 586k | case '\n': |
280 | 586k | Newline(); |
281 | 586k | break; |
282 | 2.40M | } |
283 | 2.40M | } |
284 | 392 | } |
285 | | |
286 | 1.19k | bool WastLexer::ReadLineComment() { |
287 | 115k | while (true) { |
288 | 115k | switch (ReadChar()) { |
289 | 14 | case kEof: |
290 | 14 | return false; |
291 | | |
292 | 650 | case '\r': |
293 | 650 | if (PeekChar() == '\n') { |
294 | 201 | ReadChar(); |
295 | 201 | } |
296 | 650 | Newline(); |
297 | 650 | return true; |
298 | | |
299 | 528 | case '\n': |
300 | 528 | Newline(); |
301 | 528 | return true; |
302 | 115k | } |
303 | 115k | } |
304 | 1.19k | } |
305 | | |
306 | 2.05M | void WastLexer::ReadWhitespace() { |
307 | 102M | while (true) { |
308 | 102M | switch (PeekChar()) { |
309 | 201k | case ' ': |
310 | 338k | case '\t': |
311 | 389k | case '\r': |
312 | 389k | ReadChar(); |
313 | 389k | break; |
314 | | |
315 | 100M | case '\n': |
316 | 100M | ReadChar(); |
317 | 100M | Newline(); |
318 | 100M | break; |
319 | | |
320 | 2.05M | default: |
321 | 2.05M | return; |
322 | 102M | } |
323 | 102M | } |
324 | 2.05M | } |
325 | | |
326 | 840k | Token WastLexer::GetStringToken(TokenType token_type) { |
327 | 840k | const char* saved_token_start = token_start_; |
328 | 840k | bool has_error = false; |
329 | 840k | bool in_string = true; |
330 | 840k | ReadChar(); |
331 | 26.1M | while (in_string) { |
332 | 25.3M | switch (ReadChar()) { |
333 | 994 | case kEof: |
334 | 994 | return BareToken(TokenType::Eof); |
335 | | |
336 | 4.37M | case '\n': |
337 | 4.37M | token_start_ = cursor_ - 1; |
338 | 4.37M | ERROR("newline in string"); |
339 | 4.37M | has_error = true; |
340 | 4.37M | Newline(); |
341 | 4.37M | continue; |
342 | | |
343 | 839k | case '"': |
344 | 839k | if (PeekChar() == '"') { |
345 | 310k | ERROR("invalid string token"); |
346 | 310k | has_error = true; |
347 | 310k | } |
348 | 839k | in_string = false; |
349 | 839k | break; |
350 | | |
351 | 429k | case '\\': { |
352 | 429k | switch (ReadChar()) { |
353 | 1.19k | case 't': |
354 | 2.03k | case 'n': |
355 | 2.92k | case 'r': |
356 | 52.4k | case '"': |
357 | 53.8k | case '\'': |
358 | 59.5k | case '\\': |
359 | | // Valid escape. |
360 | 59.5k | break; |
361 | | |
362 | 366 | case '0': |
363 | 706 | case '1': |
364 | 923 | case '2': |
365 | 1.15k | case '3': |
366 | 1.43k | case '4': |
367 | 1.67k | case '5': |
368 | 1.88k | case '6': |
369 | 2.35k | case '7': |
370 | 2.61k | case '8': |
371 | 3.00k | case '9': |
372 | 3.38k | case 'a': |
373 | 3.59k | case 'b': |
374 | 4.29k | case 'c': |
375 | 4.58k | case 'd': |
376 | 4.79k | case 'e': |
377 | 5.02k | case 'f': |
378 | 6.75k | case 'A': |
379 | 7.02k | case 'B': |
380 | 346k | case 'C': |
381 | 347k | case 'D': |
382 | 347k | case 'E': |
383 | 348k | case 'F': // Hex byte escape. |
384 | 348k | if (IsHexDigit(PeekChar())) { |
385 | 3.60k | ReadChar(); |
386 | 344k | } else { |
387 | 344k | token_start_ = cursor_ - 2; |
388 | 344k | goto error; |
389 | 344k | } |
390 | 3.60k | break; |
391 | | |
392 | 11.4k | case 'u': { |
393 | 11.4k | token_start_ = cursor_ - 2; |
394 | 11.4k | if (ReadChar() != '{') { |
395 | 877 | goto error; |
396 | 877 | } |
397 | | |
398 | | // Value must be a valid unicode scalar value. |
399 | 10.6k | uint32_t digit; |
400 | 10.6k | uint32_t scalar_value = 0; |
401 | | |
402 | 84.9k | while (IsHexDigit(PeekChar())) { |
403 | 75.1k | ParseHexdigit(*cursor_++, &digit); |
404 | | |
405 | 75.1k | scalar_value = (scalar_value << 4) | digit; |
406 | | // Maximum value of a unicode code point. |
407 | 75.1k | if (scalar_value >= 0x110000) { |
408 | 755 | goto error; |
409 | 755 | } |
410 | 75.1k | } |
411 | | |
412 | 9.84k | if (PeekChar() != '}') { |
413 | 1.00k | goto error; |
414 | 1.00k | } |
415 | | |
416 | | // Scalars between 0xd800 and 0xdfff are not allowed. |
417 | 8.84k | if ((scalar_value >= 0xd800 && scalar_value < 0xe000) || |
418 | 8.55k | token_start_ == cursor_ - 3) { |
419 | 484 | ReadChar(); |
420 | 484 | goto error; |
421 | 484 | } |
422 | 8.35k | break; |
423 | 8.84k | } |
424 | | |
425 | 10.0k | default: |
426 | 10.0k | token_start_ = cursor_ - 2; |
427 | 10.0k | goto error; |
428 | | |
429 | 357k | error: |
430 | 357k | ERROR("bad escape \"%.*s\"", |
431 | 357k | static_cast<int>(cursor_ - token_start_), token_start_); |
432 | 357k | has_error = true; |
433 | 357k | break; |
434 | 429k | } |
435 | 429k | break; |
436 | 429k | } |
437 | 25.3M | } |
438 | 25.3M | } |
439 | 839k | token_start_ = saved_token_start; |
440 | 839k | if (has_error) { |
441 | 311k | return Token(GetLocation(), TokenType::Invalid); |
442 | 311k | } |
443 | | |
444 | 527k | return TextToken(token_type); |
445 | 839k | } |
446 | | |
447 | | // static |
448 | 215M | bool WastLexer::IsCharClass(int c, CharClass bit) { |
449 | | // Generated by the following python script: |
450 | | // |
451 | | // def Range(c, lo, hi): return lo <= c <= hi |
452 | | // def IsDigit(c): return Range(c, '0', '9') |
453 | | // def IsHexDigit(c): return IsDigit(c) or Range(c.lower(), 'a', 'f') |
454 | | // def IsKeyword(c): return Range(c, 'a', 'z') |
455 | | // def IsIdChar(c): return Range(c, '!', '~') and c not in '"(),;[]{}' |
456 | | // |
457 | | // print ([0] + [ |
458 | | // (8 if IsDigit(c) else 0) | |
459 | | // (4 if IsHexDigit(c) else 0) | |
460 | | // (2 if IsKeyword(c) else 0) | |
461 | | // (1 if IsIdChar(c) else 0) |
462 | | // for c in map(chr, range(0, 127)) |
463 | | // ]) |
464 | 215M | static const char kCharClasses[257] = { |
465 | 215M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
466 | 215M | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, |
467 | 215M | 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, |
468 | 215M | 13, 13, 1, 0, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, |
469 | 215M | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, |
470 | 215M | 1, 1, 1, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
471 | 215M | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 0, 1, |
472 | 215M | }; |
473 | | |
474 | 215M | assert(c >= -1 && c < 256); |
475 | 215M | return (kCharClasses[c + 1] & static_cast<int>(bit)) != 0; |
476 | 215M | } |
477 | | |
478 | 7.81M | bool WastLexer::ReadNum() { |
479 | 7.81M | if (IsDigit(PeekChar())) { |
480 | 7.78M | ReadChar(); |
481 | 7.78M | return MatchChar('_') || IsDigit(PeekChar()) ? ReadNum() : true; |
482 | 7.78M | } |
483 | 28.3k | return false; |
484 | 7.81M | } |
485 | | |
486 | 8.14M | bool WastLexer::ReadHexNum() { |
487 | 8.14M | if (IsHexDigit(PeekChar())) { |
488 | 8.13M | ReadChar(); |
489 | 8.13M | return MatchChar('_') || IsHexDigit(PeekChar()) ? ReadHexNum() : true; |
490 | 8.13M | } |
491 | 5.79k | return false; |
492 | 8.14M | } |
493 | | |
494 | 15.7M | WastLexer::ReservedChars WastLexer::ReadReservedChars() { |
495 | 15.7M | ReservedChars ret{ReservedChars::None}; |
496 | 138M | while (true) { |
497 | 138M | auto peek = PeekChar(); |
498 | 138M | if (IsIdChar(peek)) { |
499 | 122M | ReadChar(); |
500 | 122M | if (ret == ReservedChars::None) { |
501 | 12.3M | ret = ReservedChars::Id; |
502 | 12.3M | } |
503 | 122M | } else if (peek == '"') { |
504 | 315k | GetStringToken(TokenType::Text); |
505 | 315k | ret = ReservedChars::Some; |
506 | 15.7M | } else { |
507 | 15.7M | break; |
508 | 15.7M | } |
509 | 138M | } |
510 | 15.7M | return ret; |
511 | 15.7M | } |
512 | | |
513 | 75.8k | void WastLexer::ReadSign() { |
514 | 75.8k | if (PeekChar() == '+' || PeekChar() == '-') { |
515 | 47.4k | ReadChar(); |
516 | 47.4k | } |
517 | 75.8k | } |
518 | | |
519 | 1.08M | Token WastLexer::GetNumberToken(TokenType token_type) { |
520 | 1.08M | if (ReadNum()) { |
521 | 1.07M | if (MatchChar('.')) { |
522 | 91.7k | token_type = TokenType::Float; |
523 | 91.7k | if (IsDigit(PeekChar()) && !ReadNum()) { |
524 | 2.03k | return GetReservedToken(); |
525 | 2.03k | } |
526 | 91.7k | } |
527 | 1.07M | if (MatchChar('e') || MatchChar('E')) { |
528 | 24.4k | token_type = TokenType::Float; |
529 | 24.4k | ReadSign(); |
530 | 24.4k | if (!ReadNum()) { |
531 | 17.4k | return GetReservedToken(); |
532 | 17.4k | } |
533 | 24.4k | } |
534 | 1.05M | if (NoTrailingReservedChars()) { |
535 | 941k | if (token_type == TokenType::Float) { |
536 | 62.7k | return LiteralToken(token_type, LiteralType::Float); |
537 | 878k | } else { |
538 | 878k | return LiteralToken(token_type, LiteralType::Int); |
539 | 878k | } |
540 | 941k | } |
541 | 1.05M | } |
542 | 120k | return GetReservedToken(); |
543 | 1.08M | } |
544 | | |
545 | 161k | Token WastLexer::GetHexNumberToken(TokenType token_type) { |
546 | 161k | if (ReadHexNum()) { |
547 | 157k | if (MatchChar('.')) { |
548 | 40.3k | token_type = TokenType::Float; |
549 | 40.3k | if (IsHexDigit(PeekChar()) && !ReadHexNum()) { |
550 | 405 | return GetReservedToken(); |
551 | 405 | } |
552 | 40.3k | } |
553 | 156k | if (MatchChar('p') || MatchChar('P')) { |
554 | 51.3k | token_type = TokenType::Float; |
555 | 51.3k | ReadSign(); |
556 | 51.3k | if (!ReadNum()) { |
557 | 4.00k | return GetReservedToken(); |
558 | 4.00k | } |
559 | 51.3k | } |
560 | 152k | if (NoTrailingReservedChars()) { |
561 | 135k | if (token_type == TokenType::Float) { |
562 | 82.0k | return LiteralToken(token_type, LiteralType::Hexfloat); |
563 | 82.0k | } else { |
564 | 53.2k | return LiteralToken(token_type, LiteralType::Int); |
565 | 53.2k | } |
566 | 135k | } |
567 | 152k | } |
568 | 21.7k | return GetReservedToken(); |
569 | 161k | } |
570 | | |
571 | 1.33M | Token WastLexer::GetInfToken() { |
572 | 1.33M | if (MatchString("inf")) { |
573 | 26.1k | if (NoTrailingReservedChars()) { |
574 | 25.2k | return LiteralToken(TokenType::Float, LiteralType::Infinity); |
575 | 25.2k | } |
576 | 878 | return GetReservedToken(); |
577 | 26.1k | } |
578 | 1.30M | return GetKeywordToken(); |
579 | 1.33M | } |
580 | | |
581 | 323k | Token WastLexer::GetNanToken() { |
582 | 323k | if (MatchString("nan")) { |
583 | 33.2k | if (MatchChar(':')) { |
584 | 24.3k | if (MatchString("0x") && ReadHexNum() && NoTrailingReservedChars()) { |
585 | 19.6k | return LiteralToken(TokenType::Float, LiteralType::Nan); |
586 | 19.6k | } |
587 | 24.3k | } else if (NoTrailingReservedChars()) { |
588 | 8.44k | return LiteralToken(TokenType::Float, LiteralType::Nan); |
589 | 8.44k | } |
590 | 33.2k | } |
591 | 295k | return GetKeywordToken(); |
592 | 323k | } |
593 | | |
594 | | Token WastLexer::GetNameEqNumToken(std::string_view name, |
595 | 138k | TokenType token_type) { |
596 | 138k | if (MatchString(name)) { |
597 | 62.9k | if (MatchString("0x")) { |
598 | 37.1k | if (ReadHexNum() && NoTrailingReservedChars()) { |
599 | 31.3k | return TextToken(token_type, name.size()); |
600 | 31.3k | } |
601 | 37.1k | } else if (ReadNum() && NoTrailingReservedChars()) { |
602 | 22.1k | return TextToken(token_type, name.size()); |
603 | 22.1k | } |
604 | 62.9k | } |
605 | 85.2k | return GetKeywordToken(); |
606 | 138k | } |
607 | | |
608 | 4.55M | Token WastLexer::GetIdChars() { |
609 | 4.55M | if (ReadReservedChars() != ReservedChars::Some) { |
610 | 4.55M | return TextToken(TokenType::Var); |
611 | 4.55M | } |
612 | | |
613 | 1.58k | return TextToken(TokenType::Reserved); |
614 | 4.55M | } |
615 | | |
616 | 9.37M | Token WastLexer::GetKeywordToken() { |
617 | 9.37M | ReadReservedChars(); |
618 | 9.37M | TokenInfo* info = |
619 | 9.37M | Perfect_Hash::InWordSet(token_start_, cursor_ - token_start_); |
620 | 9.37M | if (!info) { |
621 | 1.90M | return TextToken(TokenType::Reserved); |
622 | 1.90M | } |
623 | 7.47M | if (IsTokenTypeBare(info->token_type)) { |
624 | 2.81M | return BareToken(info->token_type); |
625 | 4.65M | } else if (IsTokenTypeType(info->token_type) || |
626 | 4.07M | IsTokenTypeRefKind(info->token_type)) { |
627 | 3.28M | return Token(GetLocation(), info->token_type, info->value_type); |
628 | 3.28M | } else { |
629 | 1.36M | assert(IsTokenTypeOpcode(info->token_type)); |
630 | 1.36M | return Token(GetLocation(), info->token_type, info->opcode); |
631 | 1.36M | } |
632 | 7.47M | } |
633 | | |
634 | 473k | Token WastLexer::GetReservedToken() { |
635 | 473k | ReadReservedChars(); |
636 | 473k | return TextToken(TokenType::Reserved); |
637 | 473k | } |
638 | | |
639 | 23.5M | void WastLexer::Error(Location loc, const char* format, ...) { |
640 | | WABT_SNPRINTF_ALLOCA(buffer, length, format); |
641 | 23.5M | errors_->emplace_back(ErrorLevel::Error, loc, filename_, buffer); |
642 | 23.5M | } |
643 | | |
644 | | } // namespace wabt |