/src/solidity/liblangutil/Scanner.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This file is part of solidity. |
3 | | * |
4 | | * solidity is free software: you can redistribute it and/or modify |
5 | | * it under the terms of the GNU General Public License as published by |
6 | | * the Free Software Foundation, either version 3 of the License, or |
7 | | * (at your option) any later version. |
8 | | * |
9 | | * solidity is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License |
15 | | * along with solidity. If not, see <http://www.gnu.org/licenses/>. |
16 | | * |
17 | | * This file is derived from the file "scanner.cc", which was part of the |
18 | | * V8 project. The original copyright header follows: |
19 | | * |
20 | | * Copyright 2006-2012, the V8 project authors. All rights reserved. |
21 | | * Redistribution and use in source and binary forms, with or without |
22 | | * modification, are permitted provided that the following conditions are |
23 | | * met: |
24 | | * |
25 | | * * Redistributions of source code must retain the above copyright |
26 | | * notice, this list of conditions and the following disclaimer. |
27 | | * * Redistributions in binary form must reproduce the above |
28 | | * copyright notice, this list of conditions and the following |
29 | | * disclaimer in the documentation and/or other materials provided |
30 | | * with the distribution. |
31 | | * * Neither the name of Google Inc. nor the names of its |
32 | | * contributors may be used to endorse or promote products derived |
33 | | * from this software without specific prior written permission. |
34 | | * |
35 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
36 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
37 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
38 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
39 | | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
40 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
41 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
42 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
43 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
44 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
45 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
46 | | */ |
47 | | /** |
48 | | * @author Christian <c@ethdev.com> |
49 | | * @date 2014 |
50 | | * Solidity scanner. |
51 | | */ |
52 | | |
53 | | #include <liblangutil/Common.h> |
54 | | #include <liblangutil/Exceptions.h> |
55 | | #include <liblangutil/Scanner.h> |
56 | | |
57 | | #include <boost/algorithm/string/classification.hpp> |
58 | | |
59 | | #include <optional> |
60 | | #include <string_view> |
61 | | #include <tuple> |
62 | | #include <array> |
63 | | |
64 | | |
65 | | namespace solidity::langutil |
66 | | { |
67 | | |
68 | | std::string to_string(ScannerError _errorCode) |
69 | 2.24k | { |
70 | 2.24k | switch (_errorCode) |
71 | 2.24k | { |
72 | 0 | case ScannerError::NoError: return "No error."; |
73 | 542 | case ScannerError::IllegalToken: return "Invalid token."; |
74 | 82 | case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles."; |
75 | 39 | case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid."; |
76 | 452 | case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator."; |
77 | 268 | case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence."; |
78 | 6 | case ScannerError::UnicodeCharacterInNonUnicodeString: return "Invalid character in string. If you are trying to use Unicode characters, use a unicode\"...\" string literal."; |
79 | 43 | case ScannerError::IllegalCharacterInString: return "Invalid character in string."; |
80 | 353 | case ScannerError::IllegalStringEndQuote: return "Expected string end-quote."; |
81 | 20 | case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'."; |
82 | 76 | case ScannerError::IllegalExponent: return "Invalid exponent."; |
83 | 192 | case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number."; |
84 | 30 | case ScannerError::OctalNotAllowed: return "Octal numbers not allowed."; |
85 | 24 | case ScannerError::DirectionalOverrideUnderflow: return "Unicode direction override underflow in comment or string literal."; |
86 | 113 | case ScannerError::DirectionalOverrideMismatch: return "Mismatching directional override markers in comment or string literal."; |
87 | 0 | default: |
88 | 0 | solAssert(false, "Unhandled case in to_string(ScannerError)"); |
89 | 0 | return ""; |
90 | 2.24k | } |
91 | 2.24k | } |
92 | | |
93 | | |
94 | | std::ostream& operator<<(std::ostream& os, ScannerError _errorCode) |
95 | 0 | { |
96 | 0 | return os << to_string(_errorCode); |
97 | 0 | } |
98 | | |
99 | | /// Scoped helper for literal recording. Automatically drops the literal |
100 | | /// if aborting the scanning before it's complete. |
101 | | enum LiteralType |
102 | | { |
103 | | LITERAL_TYPE_STRING, |
104 | | LITERAL_TYPE_NUMBER, // not really different from string type in behaviour |
105 | | LITERAL_TYPE_COMMENT |
106 | | }; |
107 | | |
108 | | class LiteralScope |
109 | | { |
110 | | public: |
111 | | explicit LiteralScope(Scanner* _self, enum LiteralType _type): |
112 | | m_type(_type), |
113 | | m_scanner(_self), |
114 | | m_complete(false) |
115 | 28.4M | { |
116 | 28.4M | if (_type == LITERAL_TYPE_COMMENT) |
117 | 459k | m_scanner->m_skippedComments[Scanner::NextNext].literal.clear(); |
118 | 28.0M | else |
119 | 28.0M | m_scanner->m_tokens[Scanner::NextNext].literal.clear(); |
120 | 28.4M | } |
121 | | ~LiteralScope() |
122 | 28.4M | { |
123 | 28.4M | if (!m_complete) |
124 | 12.5k | { |
125 | 12.5k | if (m_type == LITERAL_TYPE_COMMENT) |
126 | 0 | m_scanner->m_skippedComments[Scanner::NextNext].literal.clear(); |
127 | 12.5k | else |
128 | 12.5k | m_scanner->m_tokens[Scanner::NextNext].literal.clear(); |
129 | 12.5k | } |
130 | 28.4M | } |
131 | 28.4M | void complete() { m_complete = true; } |
132 | | |
133 | | private: |
134 | | enum LiteralType m_type; |
135 | | Scanner* m_scanner; |
136 | | bool m_complete; |
137 | | }; |
138 | | |
139 | | void Scanner::reset() |
140 | 650k | { |
141 | 650k | m_source.reset(); |
142 | 650k | m_kind = ScannerKind::Solidity; |
143 | 650k | m_char = m_source.get(); |
144 | 650k | skipWhitespace(); |
145 | 650k | next(); |
146 | 650k | next(); |
147 | 650k | next(); |
148 | 650k | } |
149 | | |
150 | | void Scanner::setPosition(size_t _offset) |
151 | 0 | { |
152 | 0 | m_char = m_source.setPosition(_offset); |
153 | 0 | scanToken(); |
154 | 0 | next(); |
155 | 0 | next(); |
156 | 0 | } |
157 | | |
158 | | bool Scanner::scanHexByte(char& o_scannedByte) |
159 | 3.62M | { |
160 | 3.62M | char x = 0; |
161 | 10.8M | for (size_t i = 0; i < 2; i++) |
162 | 7.25M | { |
163 | 7.25M | int d = hexValue(m_char); |
164 | 7.25M | if (d < 0) |
165 | 2.58k | { |
166 | 2.58k | rollback(i); |
167 | 2.58k | return false; |
168 | 2.58k | } |
169 | 7.25M | x = static_cast<char>(x * 16 + d); |
170 | 7.25M | advance(); |
171 | 7.25M | } |
172 | 3.62M | o_scannedByte = x; |
173 | 3.62M | return true; |
174 | 3.62M | } |
175 | | |
176 | | std::optional<unsigned> Scanner::scanUnicode() |
177 | 6.21k | { |
178 | 6.21k | unsigned x = 0; |
179 | 26.2k | for (size_t i = 0; i < 4; i++) |
180 | 21.8k | { |
181 | 21.8k | int d = hexValue(m_char); |
182 | 21.8k | if (d < 0) |
183 | 1.79k | { |
184 | 1.79k | rollback(i); |
185 | 1.79k | return {}; |
186 | 1.79k | } |
187 | 20.0k | x = x * 16 + static_cast<unsigned>(d); |
188 | 20.0k | advance(); |
189 | 20.0k | } |
190 | 4.42k | return x; |
191 | 6.21k | } |
192 | | |
193 | | // This supports codepoints between 0000 and FFFF. |
194 | | void Scanner::addUnicodeAsUTF8(unsigned codepoint) |
195 | 4.42k | { |
196 | 4.42k | if (codepoint <= 0x7f) |
197 | 821 | addLiteralChar(char(codepoint)); |
198 | 3.60k | else if (codepoint <= 0x7ff) |
199 | 849 | { |
200 | 849 | addLiteralChar(char(0xc0u | (codepoint >> 6u))); |
201 | 849 | addLiteralChar(char(0x80u | (codepoint & 0x3fu))); |
202 | 849 | } |
203 | 2.75k | else |
204 | 2.75k | { |
205 | 2.75k | addLiteralChar(char(0xe0u | (codepoint >> 12u))); |
206 | 2.75k | addLiteralChar(char(0x80u | ((codepoint >> 6u) & 0x3fu))); |
207 | 2.75k | addLiteralChar(char(0x80u | (codepoint & 0x3fu))); |
208 | 2.75k | } |
209 | 4.42k | } |
210 | | |
211 | | void Scanner::rescan() |
212 | 866k | { |
213 | 866k | size_t rollbackTo = 0; |
214 | 866k | if (m_skippedComments[Current].literal.empty()) |
215 | 848k | rollbackTo = static_cast<size_t>(m_tokens[Current].location.start); |
216 | 17.6k | else |
217 | 17.6k | rollbackTo = static_cast<size_t>(m_skippedComments[Current].location.start); |
218 | 866k | m_char = m_source.rollback(m_source.position() - rollbackTo); |
219 | 866k | next(); |
220 | 866k | next(); |
221 | 866k | next(); |
222 | 866k | } |
223 | | |
224 | | // Ensure that tokens can be stored in a byte. |
225 | | BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100); |
226 | | |
227 | | Token Scanner::next() |
228 | 61.6M | { |
229 | 61.6M | m_tokens[Current] = std::move(m_tokens[Next]); |
230 | 61.6M | m_tokens[Next] = std::move(m_tokens[NextNext]); |
231 | 61.6M | m_skippedComments[Current] = std::move(m_skippedComments[Next]); |
232 | 61.6M | m_skippedComments[Next] = std::move(m_skippedComments[NextNext]); |
233 | | |
234 | 61.6M | scanToken(); |
235 | | |
236 | 61.6M | return m_tokens[Current].token; |
237 | 61.6M | } |
238 | | |
239 | | Token Scanner::selectToken(char _next, Token _then, Token _else) |
240 | 12.9k | { |
241 | 12.9k | advance(); |
242 | 12.9k | if (m_char == _next) |
243 | 905 | return selectToken(_then); |
244 | 12.0k | else |
245 | 12.0k | return _else; |
246 | 12.9k | } |
247 | | |
248 | | bool Scanner::skipWhitespace() |
249 | 29.1M | { |
250 | 29.1M | size_t const startPosition = sourcePos(); |
251 | 132M | while (isWhiteSpace(m_char)) |
252 | 103M | advance(); |
253 | | // Return whether or not we skipped any characters. |
254 | 29.1M | return sourcePos() != startPosition; |
255 | 29.1M | } |
256 | | |
257 | | bool Scanner::skipWhitespaceExceptUnicodeLinebreak() |
258 | 723k | { |
259 | 723k | size_t const startPosition = sourcePos(); |
260 | 5.60M | while (isWhiteSpace(m_char) && !isUnicodeLinebreak()) |
261 | 4.88M | advance(); |
262 | | // Return whether or not we skipped any characters. |
263 | 723k | return sourcePos() != startPosition; |
264 | 723k | } |
265 | | |
266 | | |
267 | | namespace |
268 | | { |
269 | | |
270 | | /// Tries to scan for an RLO/LRO/RLE/LRE/PDF and keeps track of script writing direction override depth. |
271 | | /// |
272 | | /// @returns ScannerError::NoError in case of successful parsing and directional encodings are paired |
273 | | /// and error code in case the input's lexical parser state is invalid and this error should be reported |
274 | | /// to the user. |
275 | | static ScannerError validateBiDiMarkup(CharStream& _stream, size_t _startPosition) |
276 | 176k | { |
277 | 176k | static std::array<std::pair<std::string_view, int>, 5> constexpr directionalSequences{ |
278 | 176k | std::pair<std::string_view, int>{"\xE2\x80\xAD", 1}, // U+202D (LRO - Left-to-Right Override) |
279 | 176k | std::pair<std::string_view, int>{"\xE2\x80\xAE", 1}, // U+202E (RLO - Right-to-Left Override) |
280 | 176k | std::pair<std::string_view, int>{"\xE2\x80\xAA", 1}, // U+202A (LRE - Left-to-Right Embedding) |
281 | 176k | std::pair<std::string_view, int>{"\xE2\x80\xAB", 1}, // U+202B (RLE - Right-to-Left Embedding) |
282 | 176k | std::pair<std::string_view, int>{"\xE2\x80\xAC", -1} // U+202C (PDF - Pop Directional Formatting |
283 | 176k | }; |
284 | | |
285 | 176k | size_t endPosition = _stream.position(); |
286 | 176k | _stream.setPosition(_startPosition); |
287 | | |
288 | 176k | int directionOverrideDepth = 0; |
289 | | |
290 | 3.52M | for (size_t currentPos = _startPosition; currentPos < endPosition; ++currentPos) |
291 | 3.34M | { |
292 | 3.34M | _stream.setPosition(currentPos); |
293 | | |
294 | 3.34M | for (auto const& [sequence, depthChange]: directionalSequences) |
295 | 16.7M | if (_stream.prefixMatch(sequence)) |
296 | 3.95k | directionOverrideDepth += depthChange; |
297 | | |
298 | 3.34M | if (directionOverrideDepth < 0) |
299 | 122 | return ScannerError::DirectionalOverrideUnderflow; |
300 | 3.34M | } |
301 | | |
302 | 176k | _stream.setPosition(endPosition); |
303 | | |
304 | 176k | return directionOverrideDepth > 0 ? ScannerError::DirectionalOverrideMismatch : ScannerError::NoError; |
305 | 176k | } |
306 | | |
307 | | } |
308 | | |
309 | | Token Scanner::skipSingleLineComment() |
310 | 173k | { |
311 | | // Line terminator is not part of the comment. If it is a |
312 | | // non-ascii line terminator, it will result in a parser error. |
313 | 173k | size_t startPosition = m_source.position(); |
314 | 3.43M | while (!isUnicodeLinebreak()) |
315 | 3.26M | if (!advance()) |
316 | 1.94k | break; |
317 | | |
318 | 173k | ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition); |
319 | 173k | if (unicodeDirectionError != ScannerError::NoError) |
320 | 553 | return setError(unicodeDirectionError); |
321 | | |
322 | 173k | return Token::Whitespace; |
323 | 173k | } |
324 | | |
325 | | bool Scanner::atEndOfLine() const |
326 | 2.99M | { |
327 | 2.99M | return m_char == '\n' || m_char == '\r'; |
328 | 2.99M | } |
329 | | |
330 | | bool Scanner::tryScanEndOfLine() |
331 | 11.8M | { |
332 | 11.8M | if (m_char == '\n') |
333 | 364k | { |
334 | 364k | advance(); |
335 | 364k | return true; |
336 | 364k | } |
337 | | |
338 | 11.4M | if (m_char == '\r') |
339 | 5.60k | { |
340 | 5.60k | if (advance() && m_char == '\n') |
341 | 1.56k | advance(); |
342 | 5.60k | return true; |
343 | 5.60k | } |
344 | | |
345 | 11.4M | return false; |
346 | 11.4M | } |
347 | | |
348 | | size_t Scanner::scanSingleLineDocComment() |
349 | 355k | { |
350 | 355k | LiteralScope literal(this, LITERAL_TYPE_COMMENT); |
351 | 355k | size_t endPosition = m_source.position(); |
352 | | |
353 | 355k | skipWhitespaceExceptUnicodeLinebreak(); |
354 | | |
355 | 11.7M | while (!isSourcePastEndOfInput()) |
356 | 11.7M | { |
357 | 11.7M | endPosition = m_source.position(); |
358 | 11.7M | if (tryScanEndOfLine()) |
359 | 367k | { |
360 | | // Check if next line is also a single-line comment. |
361 | | // If any whitespaces were skipped, use source position before. |
362 | 367k | if (!skipWhitespaceExceptUnicodeLinebreak()) |
363 | 56.0k | endPosition = m_source.position(); |
364 | | |
365 | 367k | if (!m_source.isPastEndOfInput(3) && |
366 | 367k | m_source.get(0) == '/' && |
367 | 367k | m_source.get(1) == '/' && |
368 | 367k | m_source.get(2) == '/') |
369 | 14.9k | { |
370 | 14.9k | if (!m_source.isPastEndOfInput(4) && m_source.get(3) == '/') |
371 | 523 | break; // "////" is not a documentation comment |
372 | 14.3k | m_char = m_source.advanceAndGet(3); |
373 | 14.3k | if (atEndOfLine()) |
374 | 2.72k | continue; |
375 | 11.6k | addCommentLiteralChar('\n'); |
376 | 11.6k | } |
377 | 352k | else |
378 | 352k | break; // next line is not a documentation comment, we are done |
379 | 367k | } |
380 | 11.4M | else if (isUnicodeLinebreak()) |
381 | | // Any line terminator that is not '\n' is considered to end the |
382 | | // comment. |
383 | 121 | break; |
384 | 11.4M | addCommentLiteralChar(m_char); |
385 | 11.4M | advance(); |
386 | 11.4M | } |
387 | 355k | literal.complete(); |
388 | 355k | return endPosition; |
389 | 355k | } |
390 | | |
391 | | Token Scanner::skipMultiLineComment() |
392 | 2.15k | { |
393 | 2.15k | size_t startPosition = m_source.position(); |
394 | 29.6k | while (!isSourcePastEndOfInput()) |
395 | 29.0k | { |
396 | 29.0k | char prevChar = m_char; |
397 | 29.0k | advance(); |
398 | | |
399 | | // If we have reached the end of the multi-line comment, we |
400 | | // consume the '/' and insert a whitespace. This way all |
401 | | // multi-line comments are treated as whitespace. |
402 | 29.0k | if (prevChar == '*' && m_char == '/') |
403 | 1.55k | { |
404 | 1.55k | ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition); |
405 | 1.55k | if (unicodeDirectionError != ScannerError::NoError) |
406 | 126 | return setError(unicodeDirectionError); |
407 | | |
408 | 1.42k | m_char = ' '; |
409 | 1.42k | return Token::Whitespace; |
410 | 1.55k | } |
411 | 29.0k | } |
412 | | // Unterminated multi-line comment. |
413 | 600 | return setError(ScannerError::IllegalCommentTerminator); |
414 | 2.15k | } |
415 | | |
416 | | Token Scanner::scanMultiLineDocComment() |
417 | 103k | { |
418 | 103k | LiteralScope literal(this, LITERAL_TYPE_COMMENT); |
419 | 103k | bool endFound = false; |
420 | 103k | bool charsAdded = false; |
421 | | |
422 | 202k | while (isWhiteSpace(m_char) && !atEndOfLine()) |
423 | 99.2k | advance(); |
424 | | |
425 | 2.87M | while (!isSourcePastEndOfInput()) |
426 | 2.87M | { |
427 | | // handle newlines in multiline comments |
428 | 2.87M | if (atEndOfLine()) |
429 | 18.8k | { |
430 | 18.8k | skipWhitespace(); |
431 | 18.8k | if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '*') |
432 | 1.08k | { // it is unknown if this leads to the end of the comment |
433 | 1.08k | addCommentLiteralChar('*'); |
434 | 1.08k | advance(); |
435 | 1.08k | } |
436 | 17.7k | else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/') |
437 | 5.02k | { // skip first '*' in subsequent lines |
438 | 5.02k | m_char = m_source.advanceAndGet(1); |
439 | 5.02k | if (atEndOfLine()) // ignores empty lines |
440 | 2.45k | continue; |
441 | 2.56k | if (charsAdded) |
442 | 1.29k | addCommentLiteralChar('\n'); // corresponds to the end of previous line |
443 | 2.56k | } |
444 | 12.7k | else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/') |
445 | 1.73k | { // if after newline the comment ends, don't insert the newline |
446 | 1.73k | m_char = m_source.advanceAndGet(2); |
447 | 1.73k | endFound = true; |
448 | 1.73k | break; |
449 | 1.73k | } |
450 | 11.0k | else if (charsAdded) |
451 | 9.51k | addCommentLiteralChar('\n'); |
452 | 18.8k | } |
453 | | |
454 | 2.87M | if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/') |
455 | 99.8k | { |
456 | 99.8k | m_char = m_source.advanceAndGet(2); |
457 | 99.8k | endFound = true; |
458 | 99.8k | break; |
459 | 99.8k | } |
460 | 2.77M | addCommentLiteralChar(m_char); |
461 | 2.77M | charsAdded = true; |
462 | 2.77M | advance(); |
463 | 2.77M | } |
464 | 103k | literal.complete(); |
465 | 103k | if (!endFound) |
466 | 2.10k | return setError(ScannerError::IllegalCommentTerminator); |
467 | 101k | else |
468 | 101k | return Token::CommentLiteral; |
469 | 103k | } |
470 | | |
471 | | Token Scanner::scanSlash() |
472 | 647k | { |
473 | 647k | int firstSlashPosition = static_cast<int>(sourcePos()); |
474 | 647k | advance(); |
475 | 647k | if (m_char == '/') |
476 | 529k | { |
477 | 529k | if (!advance()) /* double slash comment directly before EOS */ |
478 | 447 | return Token::Whitespace; |
479 | 529k | else if (m_char == '/') |
480 | 358k | { |
481 | 358k | advance(); //consume the last '/' at /// |
482 | | |
483 | | // "////" |
484 | 358k | if (m_char == '/') |
485 | 2.30k | return skipSingleLineComment(); |
486 | | // doxygen style /// comment |
487 | 355k | m_skippedComments[NextNext].location.start = firstSlashPosition; |
488 | 355k | m_skippedComments[NextNext].location.sourceName = m_sourceName; |
489 | 355k | m_skippedComments[NextNext].token = Token::CommentLiteral; |
490 | 355k | m_skippedComments[NextNext].location.end = static_cast<int>(scanSingleLineDocComment()); |
491 | 355k | return Token::Whitespace; |
492 | 358k | } |
493 | 171k | else |
494 | 171k | return skipSingleLineComment(); |
495 | 529k | } |
496 | 118k | else if (m_char == '*') |
497 | 107k | { |
498 | | // doxygen style /** natspec comment |
499 | 107k | if (!advance()) /* slash star comment before EOS */ |
500 | 55 | return setError(ScannerError::IllegalCommentTerminator); |
501 | 107k | else if (m_char == '*') |
502 | 105k | { |
503 | 105k | advance(); //consume the last '*' at /** |
504 | | |
505 | | // "/**/" |
506 | 105k | if (m_char == '/') |
507 | 1.24k | { |
508 | 1.24k | advance(); //skip the closing slash |
509 | 1.24k | return Token::Whitespace; |
510 | 1.24k | } |
511 | | // "/***" |
512 | 104k | if (m_char == '*') |
513 | | // "/***/" may be interpreted as empty natspec or skipped; skipping is simpler |
514 | 1.00k | return skipMultiLineComment(); |
515 | | // we actually have a multiline documentation comment |
516 | 103k | m_skippedComments[NextNext].location.start = firstSlashPosition; |
517 | 103k | m_skippedComments[NextNext].location.sourceName = m_sourceName; |
518 | 103k | Token comment = scanMultiLineDocComment(); |
519 | 103k | m_skippedComments[NextNext].location.end = static_cast<int>(sourcePos()); |
520 | 103k | m_skippedComments[NextNext].token = comment; |
521 | 103k | if (comment == Token::Illegal) |
522 | 2.10k | return Token::Illegal; // error already set |
523 | 101k | else |
524 | 101k | return Token::Whitespace; |
525 | 103k | } |
526 | 1.14k | else |
527 | 1.14k | return skipMultiLineComment(); |
528 | 107k | } |
529 | 10.8k | else if (m_char == '=') |
530 | 341 | return selectToken(Token::AssignDiv); |
531 | 10.5k | else |
532 | 10.5k | return Token::Div; |
533 | 647k | } |
534 | | |
535 | | void Scanner::scanToken() |
536 | 61.6M | { |
537 | 61.6M | m_tokens[NextNext] = {}; |
538 | 61.6M | m_skippedComments[NextNext] = {}; |
539 | | |
540 | 61.6M | Token token; |
541 | | // M and N are for the purposes of grabbing different type sizes |
542 | 61.6M | unsigned m = 0; |
543 | 61.6M | unsigned n = 0; |
544 | 61.6M | do |
545 | 88.3M | { |
546 | | // Remember the position of the next token |
547 | 88.3M | m_tokens[NextNext].location.start = static_cast<int>(sourcePos()); |
548 | 88.3M | switch (m_char) |
549 | 88.3M | { |
550 | 636k | case '"': |
551 | 648k | case '\'': |
552 | 648k | token = scanString(false); |
553 | 648k | break; |
554 | 8.15k | case '<': |
555 | | // < <= << <<= |
556 | 8.15k | advance(); |
557 | 8.15k | if (m_char == '=') |
558 | 325 | token = selectToken(Token::LessThanOrEqual); |
559 | 7.83k | else if (m_char == '<') |
560 | 1.35k | token = selectToken('=', Token::AssignShl, Token::SHL); |
561 | 6.48k | else |
562 | 6.48k | token = Token::LessThan; |
563 | 8.15k | break; |
564 | 7.42k | case '>': |
565 | | // > >= >> >>= >>> >>>= |
566 | 7.42k | advance(); |
567 | 7.42k | if (m_char == '=') |
568 | 2.14k | token = selectToken(Token::GreaterThanOrEqual); |
569 | 5.28k | else if (m_char == '>') |
570 | 1.92k | { |
571 | | // >> >>= >>> >>>= |
572 | 1.92k | advance(); |
573 | 1.92k | if (m_char == '=') |
574 | 117 | token = selectToken(Token::AssignSar); |
575 | 1.81k | else if (m_char == '>') |
576 | 738 | token = selectToken('=', Token::AssignShr, Token::SHR); |
577 | 1.07k | else |
578 | 1.07k | token = Token::SAR; |
579 | 1.92k | } |
580 | 3.35k | else |
581 | 3.35k | token = Token::GreaterThan; |
582 | 7.42k | break; |
583 | 58.2k | case '=': |
584 | | // = == => |
585 | 58.2k | advance(); |
586 | 58.2k | if (m_char == '=') |
587 | 4.83k | token = selectToken(Token::Equal); |
588 | 53.4k | else if (m_char == '>') |
589 | 1.23k | token = selectToken(Token::DoubleArrow); |
590 | 52.1k | else |
591 | 52.1k | token = Token::Assign; |
592 | 58.2k | break; |
593 | 46.0k | case '!': |
594 | | // ! != |
595 | 46.0k | advance(); |
596 | 46.0k | if (m_char == '=') |
597 | 33.1k | token = selectToken(Token::NotEqual); |
598 | 12.8k | else |
599 | 12.8k | token = Token::Not; |
600 | 46.0k | break; |
601 | 11.0k | case '+': |
602 | | // + ++ += |
603 | 11.0k | advance(); |
604 | 11.0k | if (m_char == '+') |
605 | 4.12k | token = selectToken(Token::Inc); |
606 | 6.91k | else if (m_char == '=') |
607 | 699 | token = selectToken(Token::AssignAdd); |
608 | 6.21k | else |
609 | 6.21k | token = Token::Add; |
610 | 11.0k | break; |
611 | 651k | case '-': |
612 | | // - -- -= -> |
613 | 651k | advance(); |
614 | 651k | if (m_char == '-') |
615 | 1.88k | token = selectToken(Token::Dec); |
616 | 649k | else if (m_char == '=') |
617 | 397 | token = selectToken(Token::AssignSub); |
618 | 649k | else if (m_char == '>') |
619 | 516k | token = selectToken(Token::RightArrow); |
620 | 133k | else |
621 | 133k | token = Token::Sub; |
622 | 651k | break; |
623 | 21.4k | case '*': |
624 | | // * ** *= |
625 | 21.4k | advance(); |
626 | 21.4k | if (m_char == '*') |
627 | 9.62k | token = selectToken(Token::Exp); |
628 | 11.8k | else if (m_char == '=') |
629 | 175 | token = selectToken(Token::AssignMul); |
630 | 11.6k | else |
631 | 11.6k | token = Token::Mul; |
632 | 21.4k | break; |
633 | 6.45k | case '%': |
634 | | // % %= |
635 | 6.45k | token = selectToken('=', Token::AssignMod, Token::Mod); |
636 | 6.45k | break; |
637 | 647k | case '/': |
638 | | // / // /* /= |
639 | 647k | token = scanSlash(); |
640 | 647k | break; |
641 | 4.93k | case '&': |
642 | | // & && &= |
643 | 4.93k | advance(); |
644 | 4.93k | if (m_char == '&') |
645 | 1.57k | token = selectToken(Token::And); |
646 | 3.36k | else if (m_char == '=') |
647 | 496 | token = selectToken(Token::AssignBitAnd); |
648 | 2.86k | else |
649 | 2.86k | token = Token::BitAnd; |
650 | 4.93k | break; |
651 | 4.90k | case '|': |
652 | | // | || |= |
653 | 4.90k | advance(); |
654 | 4.90k | if (m_char == '|') |
655 | 1.92k | token = selectToken(Token::Or); |
656 | 2.97k | else if (m_char == '=') |
657 | 150 | token = selectToken(Token::AssignBitOr); |
658 | 2.82k | else |
659 | 2.82k | token = Token::BitOr; |
660 | 4.90k | break; |
661 | 4.41k | case '^': |
662 | | // ^ ^= |
663 | 4.41k | token = selectToken('=', Token::AssignBitXor, Token::BitXor); |
664 | 4.41k | break; |
665 | 169k | case '.': |
666 | | // . Number |
667 | 169k | advance(); |
668 | 169k | if (m_kind != ScannerKind::ExperimentalSolidity && isDecimalDigit(m_char)) |
669 | 3.34k | token = scanNumber('.'); |
670 | 165k | else |
671 | 165k | token = Token::Period; |
672 | 169k | break; |
673 | 3.11M | case ':': |
674 | | // : := |
675 | 3.11M | advance(); |
676 | 3.11M | if (m_char == '=') |
677 | 2.01M | token = selectToken(Token::AssemblyAssign); |
678 | 1.09M | else |
679 | 1.09M | token = Token::Colon; |
680 | 3.11M | break; |
681 | 216k | case ';': |
682 | 216k | token = selectToken(Token::Semicolon); |
683 | 216k | break; |
684 | 5.93M | case ',': |
685 | 5.93M | token = selectToken(Token::Comma); |
686 | 5.93M | break; |
687 | 7.37M | case '(': |
688 | 7.37M | token = selectToken(Token::LParen); |
689 | 7.37M | break; |
690 | 7.27M | case ')': |
691 | 7.27M | token = selectToken(Token::RParen); |
692 | 7.27M | break; |
693 | 128k | case '[': |
694 | 128k | token = selectToken(Token::LBrack); |
695 | 128k | break; |
696 | 121k | case ']': |
697 | 121k | token = selectToken(Token::RBrack); |
698 | 121k | break; |
699 | 3.24M | case '{': |
700 | 3.24M | token = selectToken(Token::LBrace); |
701 | 3.24M | break; |
702 | 2.88M | case '}': |
703 | 2.88M | token = selectToken(Token::RBrace); |
704 | 2.88M | break; |
705 | 2.40k | case '?': |
706 | 2.40k | token = selectToken(Token::Conditional); |
707 | 2.40k | break; |
708 | 10.7k | case '~': |
709 | 10.7k | token = selectToken(Token::BitNot); |
710 | 10.7k | break; |
711 | 55.7M | default: |
712 | 55.7M | if (isIdentifierStart(m_char)) |
713 | 19.4M | { |
714 | 19.4M | std::tie(token, m, n) = scanIdentifierOrKeyword(); |
715 | | |
716 | | // Special case for hexadecimal literals |
717 | 19.4M | if (token == Token::Hex) |
718 | 51.2k | { |
719 | | // reset |
720 | 51.2k | m = 0; |
721 | 51.2k | n = 0; |
722 | | |
723 | | // Special quoted hex string must follow |
724 | 51.2k | if (m_char == '"' || m_char == '\'') |
725 | 51.0k | token = scanHexString(); |
726 | 203 | else |
727 | 203 | token = setError(ScannerError::IllegalToken); |
728 | 51.2k | } |
729 | 19.4M | else if (token == Token::Unicode && m_kind != ScannerKind::Yul) |
730 | 1.13k | { |
731 | | // reset |
732 | 1.13k | m = 0; |
733 | 1.13k | n = 0; |
734 | | |
735 | | // Special quoted hex string must follow |
736 | 1.13k | if (m_char == '"' || m_char == '\'') |
737 | 1.08k | token = scanString(true); |
738 | 51 | else |
739 | 51 | token = setError(ScannerError::IllegalToken); |
740 | 1.13k | } |
741 | 19.4M | } |
742 | 36.2M | else if (isDecimalDigit(m_char)) |
743 | 7.85M | token = scanNumber(); |
744 | 28.4M | else if (skipWhitespace()) |
745 | 26.0M | token = Token::Whitespace; |
746 | 2.40M | else if (isSourcePastEndOfInput()) |
747 | 2.36M | token = Token::EOS; |
748 | 38.6k | else |
749 | 38.6k | token = selectErrorToken(ScannerError::IllegalToken); |
750 | 55.7M | break; |
751 | 88.3M | } |
752 | | // Continue scanning for tokens as long as we're just skipping |
753 | | // whitespace. |
754 | 88.3M | } |
755 | 88.3M | while (token == Token::Whitespace); |
756 | 61.6M | m_tokens[NextNext].location.end = static_cast<int>(sourcePos()); |
757 | 61.6M | m_tokens[NextNext].location.sourceName = m_sourceName; |
758 | 61.6M | m_tokens[NextNext].token = token; |
759 | 61.6M | m_tokens[NextNext].extendedTokenInfo = std::make_tuple(m, n); |
760 | 61.6M | } |
761 | | |
762 | | bool Scanner::scanEscape() |
763 | 19.6k | { |
764 | 19.6k | char c = m_char; |
765 | | |
766 | | // Skip escaped newlines. |
767 | 19.6k | if (tryScanEndOfLine()) |
768 | 2.88k | return true; |
769 | 16.7k | advance(); |
770 | | |
771 | 16.7k | switch (c) |
772 | 16.7k | { |
773 | 1.08k | case '\'': // fall through |
774 | 2.32k | case '"': // fall through |
775 | 4.91k | case '\\': |
776 | 4.91k | break; |
777 | 1.03k | case 'n': |
778 | 1.03k | c = '\n'; |
779 | 1.03k | break; |
780 | 1.24k | case 'r': |
781 | 1.24k | c = '\r'; |
782 | 1.24k | break; |
783 | 1.17k | case 't': |
784 | 1.17k | c = '\t'; |
785 | 1.17k | break; |
786 | 6.21k | case 'u': |
787 | 6.21k | { |
788 | 6.21k | if (auto const codepoint = scanUnicode(); codepoint.has_value()) |
789 | 4.42k | addUnicodeAsUTF8(*codepoint); |
790 | 1.79k | else |
791 | 1.79k | return false; |
792 | 4.42k | return true; |
793 | 6.21k | } |
794 | 1.85k | case 'x': |
795 | 1.85k | if (!scanHexByte(c)) |
796 | 480 | return false; |
797 | 1.37k | break; |
798 | 1.37k | default: |
799 | 290 | return false; |
800 | 16.7k | } |
801 | | |
802 | 9.74k | addLiteralChar(c); |
803 | 9.74k | return true; |
804 | 16.7k | } |
805 | | |
806 | | bool Scanner::isUnicodeLinebreak() |
807 | 27.5M | { |
808 | 27.5M | if (0x0a <= m_char && m_char <= 0x0d) |
809 | | // line feed, vertical tab, form feed, carriage return |
810 | 194k | return true; |
811 | 27.4M | if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85) |
812 | | // NEL - U+0085, C2 85 in utf8 |
813 | 43 | return true; |
814 | 27.4M | if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && ( |
815 | 5.20k | uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9 |
816 | 5.20k | )) |
817 | | // LS - U+2028, E2 80 A8 in utf8 |
818 | | // PS - U+2029, E2 80 A9 in utf8 |
819 | 72 | return true; |
820 | 27.4M | return false; |
821 | 27.4M | } |
822 | | |
823 | | Token Scanner::scanString(bool const _isUnicode) |
824 | 649k | { |
825 | 649k | size_t startPosition = m_source.position(); |
826 | 649k | char const quote = m_char; |
827 | 649k | advance(); // consume quote |
828 | 649k | LiteralScope literal(this, LITERAL_TYPE_STRING); |
829 | | // for source location comments we allow multiline string literals |
830 | 8.47M | while (m_char != quote && !isSourcePastEndOfInput() && (!isUnicodeLinebreak() || m_kind == ScannerKind::SpecialComment)) |
831 | 7.82M | { |
832 | 7.82M | char c = m_char; |
833 | 7.82M | advance(); |
834 | | |
835 | 7.82M | if (m_kind == ScannerKind::SpecialComment) |
836 | 5.25M | { |
837 | 5.25M | if (c == '\\') |
838 | 127k | { |
839 | 127k | if (isSourcePastEndOfInput()) |
840 | 58 | return setError(ScannerError::IllegalEscapeSequence); |
841 | 127k | advance(); |
842 | 127k | } |
843 | 5.12M | else |
844 | 5.12M | addLiteralChar(c); |
845 | 5.25M | } |
846 | 2.57M | else |
847 | 2.57M | { |
848 | 2.57M | if (c == '\\') |
849 | 19.7k | { |
850 | 19.7k | if (isSourcePastEndOfInput() || !scanEscape()) |
851 | 2.69k | return setError(ScannerError::IllegalEscapeSequence); |
852 | 19.7k | } |
853 | 2.55M | else |
854 | 2.55M | { |
855 | | // Report error on non-printable characters in string literals, however |
856 | | // allow anything for unicode string literals, because their validity will |
857 | | // be verified later (in the syntax checker). |
858 | | // |
859 | | // We are using a manual range and not isprint() to avoid |
860 | | // any potential complications with locale. |
861 | 2.55M | if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f)) |
862 | 1.02k | { |
863 | 1.02k | if (m_kind == ScannerKind::Yul) |
864 | 108 | return setError(ScannerError::IllegalCharacterInString); |
865 | 918 | return setError(ScannerError::UnicodeCharacterInNonUnicodeString); |
866 | 1.02k | } |
867 | 2.55M | addLiteralChar(c); |
868 | 2.55M | } |
869 | 2.57M | } |
870 | | |
871 | 7.82M | } |
872 | 646k | if (m_char != quote) |
873 | 2.48k | return setError(ScannerError::IllegalStringEndQuote); |
874 | | |
875 | 643k | if (_isUnicode) |
876 | 1.00k | { |
877 | 1.00k | ScannerError unicodeDirectionError = validateBiDiMarkup(m_source, startPosition); |
878 | 1.00k | if (unicodeDirectionError != ScannerError::NoError) |
879 | 8 | return setError(unicodeDirectionError); |
880 | 1.00k | } |
881 | | |
882 | 643k | literal.complete(); |
883 | 643k | advance(); // consume quote |
884 | 643k | return _isUnicode ? Token::UnicodeStringLiteral : Token::StringLiteral; |
885 | 643k | } |
886 | | |
887 | | Token Scanner::scanHexString() |
888 | 51.0k | { |
889 | 51.0k | char const quote = m_char; |
890 | 51.0k | advance(); // consume quote |
891 | 51.0k | LiteralScope literal(this, LITERAL_TYPE_STRING); |
892 | 51.0k | bool allowUnderscore = false; |
893 | 3.67M | while (m_char != quote && !isSourcePastEndOfInput()) |
894 | 3.62M | { |
895 | 3.62M | char c = m_char; |
896 | | |
897 | 3.62M | if (scanHexByte(c)) |
898 | 3.62M | { |
899 | 3.62M | addLiteralChar(c); |
900 | 3.62M | allowUnderscore = true; |
901 | 3.62M | } |
902 | 2.10k | else if (c == '_') |
903 | 1.09k | { |
904 | 1.09k | advance(); |
905 | 1.09k | if (!allowUnderscore || m_char == quote) |
906 | 290 | return setError(ScannerError::IllegalNumberSeparator); |
907 | 805 | allowUnderscore = false; |
908 | 805 | } |
909 | 1.00k | else |
910 | 1.00k | return setError(ScannerError::IllegalHexString); |
911 | 3.62M | } |
912 | | |
913 | 49.7k | if (m_char != quote) |
914 | 256 | return setError(ScannerError::IllegalStringEndQuote); |
915 | | |
916 | 49.4k | literal.complete(); |
917 | 49.4k | advance(); // consume quote |
918 | 49.4k | return Token::HexStringLiteral; |
919 | 49.7k | } |
920 | | |
921 | | // Parse for regex [:digit:]+(_[:digit:]+)* |
922 | | void Scanner::scanDecimalDigits() |
923 | 5.97M | { |
924 | | // MUST begin with a decimal digit. |
925 | 5.97M | if (!isDecimalDigit(m_char)) |
926 | 2.04M | return; |
927 | | |
928 | | // May continue with decimal digit or underscore for grouping. |
929 | 3.93M | do |
930 | 12.7M | addLiteralCharAndAdvance(); |
931 | 12.7M | while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_')); |
932 | | |
933 | | // Defer further validation of underscore to SyntaxChecker. |
934 | 3.93M | } |
935 | | |
936 | | Token Scanner::scanNumber(char _charSeen) |
937 | 7.85M | { |
938 | 7.85M | enum { DECIMAL, HEX, BINARY } kind = DECIMAL; |
939 | 7.85M | LiteralScope literal(this, LITERAL_TYPE_NUMBER); |
940 | 7.85M | if (_charSeen == '.') |
941 | 3.34k | { |
942 | | // we have already seen a decimal point of the float |
943 | 3.34k | addLiteralChar('.'); |
944 | 3.34k | if (m_char == '_') |
945 | 0 | return setError(ScannerError::IllegalToken); |
946 | 3.34k | scanDecimalDigits(); // we know we have at least one digit |
947 | 3.34k | } |
948 | 7.85M | else |
949 | 7.85M | { |
950 | 7.85M | solAssert(_charSeen == 0, ""); |
951 | | // if the first character is '0' we must check for octals and hex |
952 | 7.85M | if (m_char == '0') |
953 | 3.93M | { |
954 | 3.93M | addLiteralCharAndAdvance(); |
955 | | // either 0, 0exxx, 0Exxx, 0.xxx or a hex number |
956 | 3.93M | if (m_char == 'x') |
957 | 1.89M | { |
958 | | // hex number |
959 | 1.89M | kind = HEX; |
960 | 1.89M | addLiteralCharAndAdvance(); |
961 | 1.89M | if (!isHexDigit(m_char)) |
962 | 393 | return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x' |
963 | | |
964 | 41.9M | while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation |
965 | 40.0M | addLiteralCharAndAdvance(); |
966 | 1.89M | } |
967 | 2.04M | else if (isDecimalDigit(m_char)) |
968 | | // We do not allow octal numbers |
969 | 1.17k | return setError(ScannerError::OctalNotAllowed); |
970 | 3.93M | } |
971 | | // Parse decimal digits and allow trailing fractional part. |
972 | 7.84M | if (kind == DECIMAL) |
973 | 5.95M | { |
974 | 5.95M | scanDecimalDigits(); // optional |
975 | 5.95M | if (m_char == '.') |
976 | 7.87k | { |
977 | 7.87k | if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') |
978 | 987 | { |
979 | | // Assume the input may be a floating point number with leading '_' in fraction part. |
980 | | // Recover by consuming it all but returning `Illegal` right away. |
981 | 987 | addLiteralCharAndAdvance(); // '.' |
982 | 987 | addLiteralCharAndAdvance(); // '_' |
983 | 987 | scanDecimalDigits(); |
984 | 987 | } |
985 | 7.87k | if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1))) |
986 | 1.26k | { |
987 | | // A '.' has to be followed by a number. |
988 | 1.26k | literal.complete(); |
989 | 1.26k | return Token::Number; |
990 | 1.26k | } |
991 | 6.61k | addLiteralCharAndAdvance(); |
992 | 6.61k | scanDecimalDigits(); |
993 | 6.61k | } |
994 | 5.95M | } |
995 | 7.84M | } |
996 | | // scan exponent, if any |
997 | 7.85M | if (m_char == 'e' || m_char == 'E') |
998 | 9.80k | { |
999 | 9.80k | solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); |
1000 | 9.80k | if (kind != DECIMAL) |
1001 | 0 | return setError(ScannerError::IllegalExponent); |
1002 | 9.80k | else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') |
1003 | 645 | { |
1004 | | // Recover from wrongly placed underscore as delimiter in literal with scientific |
1005 | | // notation by consuming until the end. |
1006 | 645 | addLiteralCharAndAdvance(); // 'e' |
1007 | 645 | addLiteralCharAndAdvance(); // '_' |
1008 | 645 | scanDecimalDigits(); |
1009 | 645 | literal.complete(); |
1010 | 645 | return Token::Number; |
1011 | 645 | } |
1012 | | // scan exponent |
1013 | 9.16k | addLiteralCharAndAdvance(); // 'e' | 'E' |
1014 | 9.16k | if (m_char == '+' || m_char == '-') |
1015 | 2.02k | addLiteralCharAndAdvance(); |
1016 | 9.16k | if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E' |
1017 | 800 | return setError(ScannerError::IllegalExponent); |
1018 | 8.36k | scanDecimalDigits(); |
1019 | 8.36k | } |
1020 | | // The source character immediately following a numeric literal must |
1021 | | // not be an identifier start or a decimal digit; see ECMA-262 |
1022 | | // section 7.8.3, page 17 (note that we read only one decimal digit |
1023 | | // if the value is 0). |
1024 | 7.84M | if (isDecimalDigit(m_char) || isIdentifierStart(m_char)) |
1025 | 2.37k | return setError(ScannerError::IllegalNumberEnd); |
1026 | 7.84M | literal.complete(); |
1027 | 7.84M | return Token::Number; |
1028 | 7.84M | } |
1029 | | |
1030 | | std::tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword() |
1031 | 19.4M | { |
1032 | 19.4M | solAssert(isIdentifierStart(m_char), ""); |
1033 | 19.4M | LiteralScope literal(this, LITERAL_TYPE_STRING); |
1034 | 19.4M | addLiteralCharAndAdvance(); |
1035 | | // Scan the rest of the identifier characters. |
1036 | 142M | while (isIdentifierPart(m_char) || (m_char == '.' && m_kind == ScannerKind::Yul)) |
1037 | 122M | addLiteralCharAndAdvance(); |
1038 | 19.4M | literal.complete(); |
1039 | | |
1040 | 19.4M | auto const token = TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal); |
1041 | 19.4M | switch (m_kind) |
1042 | 19.4M | { |
1043 | 1.09k | case ScannerKind::SpecialComment: |
1044 | | // there are no keywords in special comments |
1045 | 1.09k | return std::make_tuple(Token::Identifier, 0, 0); |
1046 | 1.57M | case ScannerKind::Solidity: |
1047 | | // Turn experimental Solidity keywords that are not keywords in legacy Solidity into identifiers. |
1048 | 1.57M | if (TokenTraits::isExperimentalSolidityOnlyKeyword(std::get<0>(token))) |
1049 | 168 | return std::make_tuple(Token::Identifier, 0, 0); |
1050 | 1.57M | break; |
1051 | 17.8M | case ScannerKind::Yul: |
1052 | | // Turn Solidity identifier into a Yul keyword |
1053 | 17.8M | if (m_tokens[NextNext].literal == "leave") |
1054 | 78.4k | return std::make_tuple(Token::Leave, 0, 0); |
1055 | | // Turn non-Yul keywords into identifiers. |
1056 | 17.8M | if (!TokenTraits::isYulKeyword(std::get<0>(token))) |
1057 | 14.8M | return std::make_tuple(Token::Identifier, 0, 0); |
1058 | 2.97M | break; |
1059 | 2.97M | case ScannerKind::ExperimentalSolidity: |
1060 | | // Turn legacy Solidity keywords that are not keywords in experimental Solidity into identifiers. |
1061 | 5.64k | if (!TokenTraits::isExperimentalSolidityKeyword(std::get<0>(token))) |
1062 | 4.02k | return std::make_tuple(Token::Identifier, 0, 0); |
1063 | 1.61k | break; |
1064 | 19.4M | } |
1065 | 4.54M | return token; |
1066 | 19.4M | } |
1067 | | |
1068 | | } // namespace solidity::langutil |