Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_SCANNER_H_
6 : #define V8_ASMJS_ASM_SCANNER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 : #include <unordered_map>
11 :
12 : #include "src/asmjs/asm-names.h"
13 : #include "src/base/logging.h"
14 : #include "src/globals.h"
15 :
16 : namespace v8 {
17 : namespace internal {
18 :
19 : class Utf16CharacterStream;
20 :
21 : // A custom scanner to extract the token stream needed to parse valid
22 : // asm.js: http://asmjs.org/spec/latest/
23 : // This scanner intentionally avoids the portion of JavaScript lexing
24 : // that are not required to determine if code is valid asm.js code.
25 : // * Strings are disallowed except for 'use asm'.
26 : // * Only the subset of keywords needed to check asm.js invariants are
27 : // included.
28 : // * Identifiers are accumulated into local + global string tables
29 : // (for performance).
30 702323 : class V8_EXPORT_PRIVATE AsmJsScanner {
31 : public:
32 : typedef int32_t token_t;
33 :
34 : AsmJsScanner();
35 : // Pick the stream to parse (must be called before anything else).
36 : void SetStream(std::unique_ptr<Utf16CharacterStream> stream);
37 :
38 : // Get current token.
39 : token_t Token() const { return token_; }
40 : // Get position of current token.
41 : size_t Position() const { return position_; }
42 : // Advance to the next token.
43 : void Next();
44 : // Back up by one token.
45 : void Rewind();
46 :
47 : // Get raw string for current identifier. Note that the returned string will
48 : // become invalid when the scanner advances, create a copy to preserve it.
49 : const std::string& GetIdentifierString() const {
50 : // Identifier strings don't work after a rewind.
51 : DCHECK(!rewind_);
52 : return identifier_string_;
53 : }
54 :
55 : // Check if we just passed a newline.
56 : bool IsPrecededByNewline() const {
57 : // Newline tracking doesn't work if you back up.
58 : DCHECK(!rewind_);
59 : return preceded_by_newline_;
60 : }
61 :
62 : #if DEBUG
63 : // Debug only method to go from a token back to its name.
64 : // Slow, only use for debugging.
65 : std::string Name(token_t token) const;
66 : #endif
67 :
68 : // Restores old position (token after that position). Note that it is not
69 : // allowed to rewind right after a seek, because previous tokens are unknown.
70 : void Seek(size_t pos);
71 :
72 : // Select whether identifiers are resolved in global or local scope,
73 : // and which scope new identifiers are added to.
74 7441 : void EnterLocalScope() { in_local_scope_ = true; }
75 7441 : void EnterGlobalScope() { in_local_scope_ = false; }
76 : // Drop all current local identifiers.
77 : void ResetLocals();
78 :
79 : // Methods to check if a token is an identifier and which scope.
80 299836 : bool IsLocal() const { return IsLocal(Token()); }
81 754872 : bool IsGlobal() const { return IsGlobal(Token()); }
82 : static bool IsLocal(token_t token) { return token <= kLocalsStart; }
83 : static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
84 : // Methods to find the index position of an identifier (count starting from
85 : // 0 for each scope separately).
86 : static size_t LocalIndex(token_t token) {
87 : DCHECK(IsLocal(token));
88 128689 : return -(token - kLocalsStart);
89 : }
90 : static size_t GlobalIndex(token_t token) {
91 : DCHECK(IsGlobal(token));
92 149500 : return token - kGlobalsStart;
93 : }
94 :
95 : // Methods to check if the current token is a numeric literal considered an
96 : // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
97 : // that numbers without a dot outside the [0 .. 2^32) range are errors.
98 342115 : bool IsUnsigned() const { return Token() == kUnsigned; }
99 : uint32_t AsUnsigned() const {
100 : DCHECK(IsUnsigned());
101 : return unsigned_value_;
102 : }
103 73447 : bool IsDouble() const { return Token() == kDouble; }
104 : double AsDouble() const {
105 : DCHECK(IsDouble());
106 : return double_value_;
107 : }
108 :
109 : // clang-format off
110 : enum {
111 : // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
112 : // backwards)
113 : // [-10000 .. -1) :: Builtin tokens like keywords
114 : // (also includes some special
115 : // ones like end of input)
116 : // 0 .. 255 :: Single char tokens
117 : // 256 .. 256+kMaxIdentifierCount :: Global identifiers
118 : kLocalsStart = -10000,
119 : #define V(name, _junk1, _junk2, _junk3) kToken_##name,
120 : STDLIB_MATH_FUNCTION_LIST(V)
121 : STDLIB_ARRAY_TYPE_LIST(V)
122 : #undef V
123 : #define V(name, _junk1) kToken_##name,
124 : STDLIB_MATH_VALUE_LIST(V)
125 : #undef V
126 : #define V(name) kToken_##name,
127 : STDLIB_OTHER_LIST(V)
128 : KEYWORD_NAME_LIST(V)
129 : #undef V
130 : #define V(rawname, name) kToken_##name,
131 : LONG_SYMBOL_NAME_LIST(V)
132 : #undef V
133 : #define V(name, value, string_name) name = value,
134 : SPECIAL_TOKEN_LIST(V)
135 : #undef V
136 : kGlobalsStart = 256,
137 : };
138 : // clang-format on
139 :
140 : private:
141 : std::unique_ptr<Utf16CharacterStream> stream_;
142 : token_t token_;
143 : token_t preceding_token_;
144 : token_t next_token_; // Only set when in {rewind} state.
145 : size_t position_; // Corresponds to {token} position.
146 : size_t preceding_position_; // Corresponds to {preceding_token} position.
147 : size_t next_position_; // Only set when in {rewind} state.
148 : bool rewind_;
149 : std::string identifier_string_;
150 : bool in_local_scope_;
151 : std::unordered_map<std::string, token_t> local_names_;
152 : std::unordered_map<std::string, token_t> global_names_;
153 : std::unordered_map<std::string, token_t> property_names_;
154 : int global_count_;
155 : double double_value_;
156 : uint32_t unsigned_value_;
157 : bool preceded_by_newline_;
158 :
159 : // Consume multiple characters.
160 : void ConsumeIdentifier(uc32 ch);
161 : void ConsumeNumber(uc32 ch);
162 : bool ConsumeCComment();
163 : void ConsumeCPPComment();
164 : void ConsumeString(uc32 quote);
165 : void ConsumeCompareOrShift(uc32 ch);
166 :
167 : // Classify character categories.
168 : bool IsIdentifierStart(uc32 ch);
169 : bool IsIdentifierPart(uc32 ch);
170 : bool IsNumberStart(uc32 ch);
171 : };
172 :
173 : } // namespace internal
174 : } // namespace v8
175 :
176 : #endif // V8_ASMJS_ASM_SCANNER_H_
|