Coverage Report

Created: 2022-08-24 06:31

/src/solidity/liblangutil/Scanner.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * This file is part of solidity.
3
 *
4
 * solidity is free software: you can redistribute it and/or modify
5
 * it under the terms of the GNU General Public License as published by
6
 * the Free Software Foundation, either version 3 of the License, or
7
 * (at your option) any later version.
8
 *
9
 * solidity is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU General Public License
15
 * along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16
 *
17
 * This file is derived from the file "scanner.h", which was part of the
18
 * V8 project. The original copyright header follows:
19
 *
20
 * Copyright 2006-2012, the V8 project authors. All rights reserved.
21
 * Redistribution and use in source and binary forms, with or without
22
 * modification, are permitted provided that the following conditions are
23
 * met:
24
 *
25
 * * Redistributions of source code must retain the above copyright
26
 *   notice, this list of conditions and the following disclaimer.
27
 * * Redistributions in binary form must reproduce the above
28
 *   copyright notice, this list of conditions and the following
29
 *   disclaimer in the documentation and/or other materials provided
30
 *   with the distribution.
31
 * * Neither the name of Google Inc. nor the names of its
32
 *   contributors may be used to endorse or promote products derived
33
 *   from this software without specific prior written permission.
34
 *
35
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
36
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
38
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
39
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
45
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46
*/
47
/**
48
 * @author Christian <c@ethdev.com>
49
 * @date 2014
50
 * Solidity scanner.
51
 */
52
53
#pragma once
54
55
#include <liblangutil/Token.h>
56
#include <liblangutil/CharStream.h>
57
#include <liblangutil/SourceLocation.h>
58
59
#include <optional>
60
#include <iosfwd>
61
62
namespace solidity::langutil
63
{
64
65
class AstRawString;
66
class AstValueFactory;
67
class ParserRecorder;
68
69
enum class ScannerKind
70
{
71
  Solidity,
72
  Yul
73
};
74
75
enum class ScannerError
76
{
77
  NoError,
78
79
  IllegalToken,
80
  IllegalHexString,
81
  IllegalHexDigit,
82
  IllegalCommentTerminator,
83
  IllegalEscapeSequence,
84
  UnicodeCharacterInNonUnicodeString,
85
  IllegalCharacterInString,
86
  IllegalStringEndQuote,
87
  IllegalNumberSeparator,
88
  IllegalExponent,
89
  IllegalNumberEnd,
90
91
  DirectionalOverrideUnderflow,
92
  DirectionalOverrideMismatch,
93
94
  OctalNotAllowed,
95
};
96
97
std::string to_string(ScannerError _errorCode);
98
std::ostream& operator<<(std::ostream& os, ScannerError _errorCode);
99
100
class Scanner
101
{
102
  friend class LiteralScope;
103
public:
104
  explicit Scanner(CharStream& _source):
105
    m_source(_source),
106
    m_sourceName{std::make_shared<std::string>(_source.name())}
107
54.0k
  {
108
54.0k
    reset();
109
54.0k
  }
110
111
  /// Resets scanner to the start of input.
112
  void reset();
113
114
  /// Changes the scanner mode.
115
  void setScannerMode(ScannerKind _kind)
116
74.6k
  {
117
74.6k
    m_kind = _kind;
118
119
    // Invalidate lookahead buffer.
120
74.6k
    rescan();
121
74.6k
  }
122
123
14.8k
  CharStream const& charStream() const noexcept { return m_source; }
124
125
  /// @returns the next token and advances input
126
  Token next();
127
128
  /// Set scanner to a specific offset. This is used in error recovery.
129
  void setPosition(size_t _offset);
130
131
  ///@{
132
  ///@name Information about the current token
133
134
  /// @returns the current token
135
  Token currentToken() const
136
11.4M
  {
137
11.4M
    return m_tokens[Current].token;
138
11.4M
  }
139
  ElementaryTypeNameToken currentElementaryTypeNameToken() const
140
12
  {
141
12
    unsigned firstSize;
142
12
    unsigned secondSize;
143
12
    std::tie(firstSize, secondSize) = m_tokens[Current].extendedTokenInfo;
144
12
    return ElementaryTypeNameToken(m_tokens[Current].token, firstSize, secondSize);
145
12
  }
146
147
4.92M
  SourceLocation currentLocation() const { return m_tokens[Current].location; }
148
2.13M
  std::string const& currentLiteral() const { return m_tokens[Current].literal; }
149
35.7k
  std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_tokens[Current].extendedTokenInfo; }
150
151
  /// Retrieves the last error that occurred during lexical analysis.
152
  /// @note If no error occurred, the value is undefined.
153
100
  ScannerError currentError() const noexcept { return m_tokens[Current].error; }
154
  ///@}
155
156
  ///@{
157
  ///@name Information about the current comment token
158
159
33.4k
  SourceLocation currentCommentLocation() const { return m_skippedComments[Current].location; }
160
1.59M
  std::string const& currentCommentLiteral() const { return m_skippedComments[Current].literal; }
161
  /// Called by the parser during FunctionDefinition parsing to clear the current comment
162
0
  void clearCurrentCommentLiteral() { m_skippedComments[Current].literal.clear(); }
163
164
0
  ScannerKind scannerKind() const { return m_kind; }
165
166
  ///@}
167
168
  ///@{
169
  ///@name Information about the next token
170
171
  /// @returns the next token without advancing input.
172
106k
  Token peekNextToken() const { return m_tokens[Next].token; }
173
0
  SourceLocation peekLocation() const { return m_tokens[Next].location; }
174
0
  std::string const& peekLiteral() const { return m_tokens[Next].literal; }
175
176
872
  Token peekNextNextToken() const { return m_tokens[NextNext].token; }
177
  ///@}
178
179
private:
180
181
  inline Token setError(ScannerError _error) noexcept
182
6.51k
  {
183
6.51k
    m_tokens[NextNext].error = _error;
184
6.51k
    return Token::Illegal;
185
6.51k
  }
186
187
  /// Used for the current and look-ahead token and comments
188
  struct TokenDesc
189
  {
190
    Token token;
191
    SourceLocation location;
192
    std::string literal;
193
    ScannerError error = ScannerError::NoError;
194
    std::tuple<unsigned, unsigned> extendedTokenInfo;
195
  };
196
197
  ///@{
198
  ///@name Literal buffer support
199
21.3M
  inline void addLiteralChar(char c) { m_tokens[NextNext].literal.push_back(c); }
200
1.38M
  inline void addCommentLiteralChar(char c) { m_skippedComments[NextNext].literal.push_back(c); }
201
20.8M
  inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
202
  void addUnicodeAsUTF8(unsigned codepoint);
203
  ///@}
204
205
36.4M
  bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
206
457
  void rollback(size_t _amount) { m_char = m_source.rollback(_amount); }
207
  /// Rolls back to the start of the current token and re-runs the scanner.
208
  void rescan();
209
210
4.97k
  inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); }
211
2.56M
  inline Token selectToken(Token _tok) { advance(); return _tok; }
212
  /// If the next character is _next, advance and return _then, otherwise return _else.
213
  inline Token selectToken(char _next, Token _then, Token _else);
214
215
  bool scanHexByte(char& o_scannedByte);
216
  std::optional<unsigned> scanUnicode();
217
218
  /// Scans a single Solidity token.
219
  void scanToken();
220
221
  /// Skips all whitespace and @returns true if something was skipped.
222
  bool skipWhitespace();
223
  /// Skips all whitespace that are neither '\r' nor '\n'.
224
  bool skipWhitespaceExceptUnicodeLinebreak();
225
  Token skipSingleLineComment();
226
  Token skipMultiLineComment();
227
228
  /// Tests if current source position is CR, LF or CRLF.
229
  bool atEndOfLine() const;
230
231
  /// Tries to consume CR, LF or CRLF line terminators and returns success or failure.
232
  bool tryScanEndOfLine();
233
234
  void scanDecimalDigits();
235
  Token scanNumber(char _charSeen = 0);
236
  std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
237
238
  Token scanString(bool const _isUnicode);
239
  Token scanHexString();
240
  /// Scans a single line comment and returns its corrected end position.
241
  size_t scanSingleLineDocComment();
242
  Token scanMultiLineDocComment();
243
  /// Scans a slash '/' and depending on the characters returns the appropriate token
244
  Token scanSlash();
245
246
  /// Scans an escape-sequence which is part of a string and adds the
247
  /// decoded character to the current literal. Returns true if a pattern
248
  /// is scanned.
249
  bool scanEscape();
250
251
  /// @returns true iff we are currently positioned at a unicode line break.
252
  bool isUnicodeLinebreak();
253
254
  /// Return the current source position.
255
18.9M
  size_t sourcePos() const { return m_source.position(); }
256
2.18M
  bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
257
258
  enum TokenIndex { Current, Next, NextNext };
259
260
  TokenDesc m_skippedComments[3] = {}; // desc for the current, next and nextnext skipped comment
261
  TokenDesc m_tokens[3] = {}; // desc for the current, next and nextnext token
262
263
  CharStream& m_source;
264
  std::shared_ptr<std::string const> m_sourceName;
265
266
  ScannerKind m_kind = ScannerKind::Solidity;
267
268
  /// one character look-ahead, equals 0 at end of input
269
  char m_char;
270
};
271
272
}