Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_SCANNER_H_
6 : #define V8_ASMJS_ASM_SCANNER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 : #include <unordered_map>
11 :
12 : #include "src/asmjs/asm-names.h"
13 : #include "src/base/logging.h"
14 : #include "src/globals.h"
15 :
16 : namespace v8 {
17 : namespace internal {
18 :
19 : class Utf16CharacterStream;
20 :
21 : // A custom scanner to extract the token stream needed to parse valid
22 : // asm.js: http://asmjs.org/spec/latest/
23 : // This scanner intentionally avoids the portion of JavaScript lexing
24 : // that are not required to determine if code is valid asm.js code.
25 : // * Strings are disallowed except for 'use asm'.
26 : // * Only the subset of keywords needed to check asm.js invariants are
27 : // included.
28 : // * Identifiers are accumulated into local + global string tables
29 : // (for performance).
30 9117 : class V8_EXPORT_PRIVATE AsmJsScanner {
31 : public:
32 : typedef int32_t token_t;
33 :
34 : explicit AsmJsScanner(Utf16CharacterStream* stream);
35 :
36 : // Get current token.
37 : token_t Token() const { return token_; }
38 : // Get position of current token.
39 : size_t Position() const { return position_; }
40 : // Advance to the next token.
41 : void Next();
42 : // Back up by one token.
43 : void Rewind();
44 :
45 : // Get raw string for current identifier. Note that the returned string will
46 : // become invalid when the scanner advances, create a copy to preserve it.
47 : const std::string& GetIdentifierString() const {
48 : // Identifier strings don't work after a rewind.
49 : DCHECK(!rewind_);
50 : return identifier_string_;
51 : }
52 :
53 : // Check if we just passed a newline.
54 : bool IsPrecededByNewline() const {
55 : // Newline tracking doesn't work if you back up.
56 : DCHECK(!rewind_);
57 : return preceded_by_newline_;
58 : }
59 :
60 : #if DEBUG
61 : // Debug only method to go from a token back to its name.
62 : // Slow, only use for debugging.
63 : std::string Name(token_t token) const;
64 : #endif
65 :
66 : // Restores old position (token after that position). Note that it is not
67 : // allowed to rewind right after a seek, because previous tokens are unknown.
68 : void Seek(size_t pos);
69 :
70 : // Select whether identifiers are resolved in global or local scope,
71 : // and which scope new identifiers are added to.
72 664166 : void EnterLocalScope() { in_local_scope_ = true; }
73 664169 : void EnterGlobalScope() { in_local_scope_ = false; }
74 : // Drop all current local identifiers.
75 : void ResetLocals();
76 :
77 : // Methods to check if a token is an identifier and which scope.
78 3487539 : bool IsLocal() const { return IsLocal(Token()); }
79 7057396 : bool IsGlobal() const { return IsGlobal(Token()); }
80 : static bool IsLocal(token_t token) { return token <= kLocalsStart; }
81 : static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
82 : // Methods to find the index position of an identifier (count starting from
83 : // 0 for each scope separately).
84 : static size_t LocalIndex(token_t token) {
85 : DCHECK(IsLocal(token));
86 1762814 : return -(token - kLocalsStart);
87 : }
88 : static size_t GlobalIndex(token_t token) {
89 : DCHECK(IsGlobal(token));
90 1422900 : return token - kGlobalsStart;
91 : }
92 :
93 : // Methods to check if the current token is a numeric literal considered an
94 : // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
95 : // that numbers without a dot outside the [0 .. 2^32) range are errors.
96 4009486 : bool IsUnsigned() const { return Token() == kUnsigned; }
97 : uint32_t AsUnsigned() const {
98 : DCHECK(IsUnsigned());
99 : return unsigned_value_;
100 : }
101 826420 : bool IsDouble() const { return Token() == kDouble; }
102 : double AsDouble() const {
103 : DCHECK(IsDouble());
104 : return double_value_;
105 : }
106 :
107 : // clang-format off
108 : enum {
109 : // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
110 : // backwards)
111 : // [-10000 .. -1) :: Builtin tokens like keywords
112 : // (also includes some special
113 : // ones like end of input)
114 : // 0 .. 255 :: Single char tokens
115 : // 256 .. 256+kMaxIdentifierCount :: Global identifiers
116 : kLocalsStart = -10000,
117 : #define V(name, _junk1, _junk2, _junk3) kToken_##name,
118 : STDLIB_MATH_FUNCTION_LIST(V)
119 : STDLIB_ARRAY_TYPE_LIST(V)
120 : #undef V
121 : #define V(name, _junk1) kToken_##name,
122 : STDLIB_MATH_VALUE_LIST(V)
123 : #undef V
124 : #define V(name) kToken_##name,
125 : STDLIB_OTHER_LIST(V)
126 : KEYWORD_NAME_LIST(V)
127 : #undef V
128 : #define V(rawname, name) kToken_##name,
129 : LONG_SYMBOL_NAME_LIST(V)
130 : #undef V
131 : #define V(name, value, string_name) name = value,
132 : SPECIAL_TOKEN_LIST(V)
133 : #undef V
134 : kGlobalsStart = 256,
135 : };
136 : // clang-format on
137 :
138 : private:
139 : Utf16CharacterStream* stream_;
140 : token_t token_;
141 : token_t preceding_token_;
142 : token_t next_token_; // Only set when in {rewind} state.
143 : size_t position_; // Corresponds to {token} position.
144 : size_t preceding_position_; // Corresponds to {preceding_token} position.
145 : size_t next_position_; // Only set when in {rewind} state.
146 : bool rewind_;
147 : std::string identifier_string_;
148 : bool in_local_scope_;
149 : std::unordered_map<std::string, token_t> local_names_;
150 : std::unordered_map<std::string, token_t> global_names_;
151 : std::unordered_map<std::string, token_t> property_names_;
152 : int global_count_;
153 : double double_value_;
154 : uint32_t unsigned_value_;
155 : bool preceded_by_newline_;
156 :
157 : // Consume multiple characters.
158 : void ConsumeIdentifier(uc32 ch);
159 : void ConsumeNumber(uc32 ch);
160 : bool ConsumeCComment();
161 : void ConsumeCPPComment();
162 : void ConsumeString(uc32 quote);
163 : void ConsumeCompareOrShift(uc32 ch);
164 :
165 : // Classify character categories.
166 : bool IsIdentifierStart(uc32 ch);
167 : bool IsIdentifierPart(uc32 ch);
168 : bool IsNumberStart(uc32 ch);
169 : };
170 :
171 : } // namespace internal
172 : } // namespace v8
173 :
174 : #endif // V8_ASMJS_ASM_SCANNER_H_
|