/src/solidity/liblangutil/Scanner.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This file is part of solidity. |
3 | | * |
4 | | * solidity is free software: you can redistribute it and/or modify |
5 | | * it under the terms of the GNU General Public License as published by |
6 | | * the Free Software Foundation, either version 3 of the License, or |
7 | | * (at your option) any later version. |
8 | | * |
9 | | * solidity is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | | * GNU General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU General Public License |
15 | | * along with solidity. If not, see <http://www.gnu.org/licenses/>. |
16 | | * |
17 | | * This file is derived from the file "scanner.h", which was part of the |
18 | | * V8 project. The original copyright header follows: |
19 | | * |
20 | | * Copyright 2006-2012, the V8 project authors. All rights reserved. |
21 | | * Redistribution and use in source and binary forms, with or without |
22 | | * modification, are permitted provided that the following conditions are |
23 | | * met: |
24 | | * |
25 | | * * Redistributions of source code must retain the above copyright |
26 | | * notice, this list of conditions and the following disclaimer. |
27 | | * * Redistributions in binary form must reproduce the above |
28 | | * copyright notice, this list of conditions and the following |
29 | | * disclaimer in the documentation and/or other materials provided |
30 | | * with the distribution. |
31 | | * * Neither the name of Google Inc. nor the names of its |
32 | | * contributors may be used to endorse or promote products derived |
33 | | * from this software without specific prior written permission. |
34 | | * |
35 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
36 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
37 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
38 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
39 | | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
40 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
41 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
42 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
43 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
44 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
45 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
46 | | */ |
47 | | /** |
48 | | * @author Christian <c@ethdev.com> |
49 | | * @date 2014 |
50 | | * Solidity scanner. |
51 | | */ |
52 | | |
53 | | #pragma once |
54 | | |
55 | | #include <liblangutil/Token.h> |
56 | | #include <liblangutil/CharStream.h> |
57 | | #include <liblangutil/SourceLocation.h> |
58 | | |
59 | | #include <optional> |
60 | | #include <iosfwd> |
61 | | |
62 | | namespace solidity::langutil |
63 | | { |
64 | | |
65 | | class AstRawString; |
66 | | class AstValueFactory; |
67 | | class ParserRecorder; |
68 | | |
69 | | enum class ScannerKind |
70 | | { |
71 | | Solidity, |
72 | | Yul |
73 | | }; |
74 | | |
75 | | enum class ScannerError |
76 | | { |
77 | | NoError, |
78 | | |
79 | | IllegalToken, |
80 | | IllegalHexString, |
81 | | IllegalHexDigit, |
82 | | IllegalCommentTerminator, |
83 | | IllegalEscapeSequence, |
84 | | UnicodeCharacterInNonUnicodeString, |
85 | | IllegalCharacterInString, |
86 | | IllegalStringEndQuote, |
87 | | IllegalNumberSeparator, |
88 | | IllegalExponent, |
89 | | IllegalNumberEnd, |
90 | | |
91 | | DirectionalOverrideUnderflow, |
92 | | DirectionalOverrideMismatch, |
93 | | |
94 | | OctalNotAllowed, |
95 | | }; |
96 | | |
97 | | std::string to_string(ScannerError _errorCode); |
98 | | std::ostream& operator<<(std::ostream& os, ScannerError _errorCode); |
99 | | |
100 | | class Scanner |
101 | | { |
102 | | friend class LiteralScope; |
103 | | public: |
104 | | explicit Scanner(CharStream& _source): |
105 | | m_source(_source), |
106 | | m_sourceName{std::make_shared<std::string>(_source.name())} |
107 | 54.0k | { |
108 | 54.0k | reset(); |
109 | 54.0k | } |
110 | | |
111 | | /// Resets scanner to the start of input. |
112 | | void reset(); |
113 | | |
114 | | /// Changes the scanner mode. |
115 | | void setScannerMode(ScannerKind _kind) |
116 | 74.6k | { |
117 | 74.6k | m_kind = _kind; |
118 | | |
119 | | // Invalidate lookahead buffer. |
120 | 74.6k | rescan(); |
121 | 74.6k | } |
122 | | |
123 | 14.8k | CharStream const& charStream() const noexcept { return m_source; } |
124 | | |
125 | | /// @returns the next token and advances input |
126 | | Token next(); |
127 | | |
128 | | /// Set scanner to a specific offset. This is used in error recovery. |
129 | | void setPosition(size_t _offset); |
130 | | |
131 | | ///@{ |
132 | | ///@name Information about the current token |
133 | | |
134 | | /// @returns the current token |
135 | | Token currentToken() const |
136 | 11.4M | { |
137 | 11.4M | return m_tokens[Current].token; |
138 | 11.4M | } |
139 | | ElementaryTypeNameToken currentElementaryTypeNameToken() const |
140 | 12 | { |
141 | 12 | unsigned firstSize; |
142 | 12 | unsigned secondSize; |
143 | 12 | std::tie(firstSize, secondSize) = m_tokens[Current].extendedTokenInfo; |
144 | 12 | return ElementaryTypeNameToken(m_tokens[Current].token, firstSize, secondSize); |
145 | 12 | } |
146 | | |
147 | 4.92M | SourceLocation currentLocation() const { return m_tokens[Current].location; } |
148 | 2.13M | std::string const& currentLiteral() const { return m_tokens[Current].literal; } |
149 | 35.7k | std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_tokens[Current].extendedTokenInfo; } |
150 | | |
151 | | /// Retrieves the last error that occurred during lexical analysis. |
152 | | /// @note If no error occurred, the value is undefined. |
153 | 100 | ScannerError currentError() const noexcept { return m_tokens[Current].error; } |
154 | | ///@} |
155 | | |
156 | | ///@{ |
157 | | ///@name Information about the current comment token |
158 | | |
159 | 33.4k | SourceLocation currentCommentLocation() const { return m_skippedComments[Current].location; } |
160 | 1.59M | std::string const& currentCommentLiteral() const { return m_skippedComments[Current].literal; } |
161 | | /// Called by the parser during FunctionDefinition parsing to clear the current comment |
162 | 0 | void clearCurrentCommentLiteral() { m_skippedComments[Current].literal.clear(); } |
163 | | |
164 | 0 | ScannerKind scannerKind() const { return m_kind; } |
165 | | |
166 | | ///@} |
167 | | |
168 | | ///@{ |
169 | | ///@name Information about the next token |
170 | | |
171 | | /// @returns the next token without advancing input. |
172 | 106k | Token peekNextToken() const { return m_tokens[Next].token; } |
173 | 0 | SourceLocation peekLocation() const { return m_tokens[Next].location; } |
174 | 0 | std::string const& peekLiteral() const { return m_tokens[Next].literal; } |
175 | | |
176 | 872 | Token peekNextNextToken() const { return m_tokens[NextNext].token; } |
177 | | ///@} |
178 | | |
179 | | private: |
180 | | |
181 | | inline Token setError(ScannerError _error) noexcept |
182 | 6.51k | { |
183 | 6.51k | m_tokens[NextNext].error = _error; |
184 | 6.51k | return Token::Illegal; |
185 | 6.51k | } |
186 | | |
187 | | /// Used for the current and look-ahead token and comments |
188 | | struct TokenDesc |
189 | | { |
190 | | Token token; |
191 | | SourceLocation location; |
192 | | std::string literal; |
193 | | ScannerError error = ScannerError::NoError; |
194 | | std::tuple<unsigned, unsigned> extendedTokenInfo; |
195 | | }; |
196 | | |
197 | | ///@{ |
198 | | ///@name Literal buffer support |
199 | 21.3M | inline void addLiteralChar(char c) { m_tokens[NextNext].literal.push_back(c); } |
200 | 1.38M | inline void addCommentLiteralChar(char c) { m_skippedComments[NextNext].literal.push_back(c); } |
201 | 20.8M | inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } |
202 | | void addUnicodeAsUTF8(unsigned codepoint); |
203 | | ///@} |
204 | | |
205 | 36.4M | bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } |
206 | 457 | void rollback(size_t _amount) { m_char = m_source.rollback(_amount); } |
207 | | /// Rolls back to the start of the current token and re-runs the scanner. |
208 | | void rescan(); |
209 | | |
210 | 4.97k | inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); } |
211 | 2.56M | inline Token selectToken(Token _tok) { advance(); return _tok; } |
212 | | /// If the next character is _next, advance and return _then, otherwise return _else. |
213 | | inline Token selectToken(char _next, Token _then, Token _else); |
214 | | |
215 | | bool scanHexByte(char& o_scannedByte); |
216 | | std::optional<unsigned> scanUnicode(); |
217 | | |
218 | | /// Scans a single Solidity token. |
219 | | void scanToken(); |
220 | | |
221 | | /// Skips all whitespace and @returns true if something was skipped. |
222 | | bool skipWhitespace(); |
223 | | /// Skips all whitespace that are neither '\r' nor '\n'. |
224 | | bool skipWhitespaceExceptUnicodeLinebreak(); |
225 | | Token skipSingleLineComment(); |
226 | | Token skipMultiLineComment(); |
227 | | |
228 | | /// Tests if current source position is CR, LF or CRLF. |
229 | | bool atEndOfLine() const; |
230 | | |
231 | | /// Tries to consume CR, LF or CRLF line terminators and returns success or failure. |
232 | | bool tryScanEndOfLine(); |
233 | | |
234 | | void scanDecimalDigits(); |
235 | | Token scanNumber(char _charSeen = 0); |
236 | | std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword(); |
237 | | |
238 | | Token scanString(bool const _isUnicode); |
239 | | Token scanHexString(); |
240 | | /// Scans a single line comment and returns its corrected end position. |
241 | | size_t scanSingleLineDocComment(); |
242 | | Token scanMultiLineDocComment(); |
243 | | /// Scans a slash '/' and depending on the characters returns the appropriate token |
244 | | Token scanSlash(); |
245 | | |
246 | | /// Scans an escape-sequence which is part of a string and adds the |
247 | | /// decoded character to the current literal. Returns true if a pattern |
248 | | /// is scanned. |
249 | | bool scanEscape(); |
250 | | |
251 | | /// @returns true iff we are currently positioned at a unicode line break. |
252 | | bool isUnicodeLinebreak(); |
253 | | |
254 | | /// Return the current source position. |
255 | 18.9M | size_t sourcePos() const { return m_source.position(); } |
256 | 2.18M | bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); } |
257 | | |
258 | | enum TokenIndex { Current, Next, NextNext }; |
259 | | |
260 | | TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment |
261 | | TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token |
262 | | |
263 | | CharStream& m_source; |
264 | | std::shared_ptr<std::string const> m_sourceName; |
265 | | |
266 | | ScannerKind m_kind = ScannerKind::Solidity; |
267 | | |
268 | | /// one character look-ahead, equals 0 at end of input |
269 | | char m_char; |
270 | | }; |
271 | | |
272 | | } |