Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_SCANNER_H_
6 : #define V8_ASMJS_ASM_SCANNER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 : #include <unordered_map>
11 :
12 : #include "src/asmjs/asm-names.h"
13 : #include "src/base/logging.h"
14 : #include "src/globals.h"
15 :
16 : namespace v8 {
17 : namespace internal {
18 :
19 : class Utf16CharacterStream;
20 :
21 : // A custom scanner to extract the token stream needed to parse valid
22 : // asm.js: http://asmjs.org/spec/latest/
23 : // This scanner intentionally avoids the portion of JavaScript lexing
24 : // that are not required to determine if code is valid asm.js code.
25 : // * Strings are disallowed except for 'use asm'.
26 : // * Only the subset of keywords needed to check asm.js invariants are
27 : // included.
28 : // * Identifiers are accumulated into local + global string tables
29 : // (for performance).
30 701714 : class V8_EXPORT_PRIVATE AsmJsScanner {
31 : public:
32 : typedef int32_t token_t;
33 :
34 : AsmJsScanner();
35 : // Pick the stream to parse (must be called before anything else).
36 : void SetStream(std::unique_ptr<Utf16CharacterStream> stream);
37 :
38 : // Get current token.
39 : token_t Token() const { return token_; }
40 : // Get position of current token.
41 : size_t Position() const { return position_; }
42 : // Advance to the next token.
43 : void Next();
44 : // Back up by one token.
45 : void Rewind();
46 :
47 : // Get raw string for current identifier. Note that the returned string will
48 : // become invalid when the scanner advances, create a copy to preserve it.
49 : const std::string& GetIdentifierString() const {
50 : // Identifier strings don't work after a rewind.
51 : DCHECK(!rewind_);
52 : return identifier_string_;
53 : }
54 :
55 : // Check if we just passed a newline.
56 : bool IsPrecededByNewline() const {
57 : // Newline tracking doesn't work if you back up.
58 : DCHECK(!rewind_);
59 : return preceded_by_newline_;
60 : }
61 :
62 : #if DEBUG
63 : // Debug only method to go from a token back to its name.
64 : // Slow, only use for debugging.
65 : std::string Name(token_t token) const;
66 : #endif
67 :
68 : // Restores old position (token after that position). Note that it is not
69 : // allowed to rewind right after a seek, because previous tokens are unknown.
70 : void Seek(size_t pos);
71 :
72 : // Select whether identifiers are resolved in global or local scope,
73 : // and which scope new identifiers are added to.
74 7439 : void EnterLocalScope() { in_local_scope_ = true; }
75 7439 : void EnterGlobalScope() { in_local_scope_ = false; }
76 : // Drop all current local identifiers.
77 : void ResetLocals();
78 :
79 : // Methods to check if a token is an identifier and which scope.
80 299833 : bool IsLocal() const { return IsLocal(Token()); }
81 754710 : bool IsGlobal() const { return IsGlobal(Token()); }
82 : static bool IsLocal(token_t token) { return token <= kLocalsStart; }
83 : static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
84 : // Methods to find the index position of an identifier (count starting from
85 : // 0 for each scope separately).
86 : static size_t LocalIndex(token_t token) {
87 : DCHECK(IsLocal(token));
88 128689 : return -(token - kLocalsStart);
89 : }
90 : static size_t GlobalIndex(token_t token) {
91 : DCHECK(IsGlobal(token));
92 149492 : return token - kGlobalsStart;
93 : }
94 :
95 : // Methods to check if the current token is an asm.js "number" (contains a
96 : // dot) or an "unsigned" (a number without a dot).
97 342112 : bool IsUnsigned() const { return Token() == kUnsigned; }
98 : uint64_t AsUnsigned() const { return unsigned_value_; }
99 73446 : bool IsDouble() const { return Token() == kDouble; }
100 : double AsDouble() const { return double_value_; }
101 :
102 : // clang-format off
103 : enum {
104 : // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
105 : // backwards)
106 : // [-10000 .. -1) :: Builtin tokens like keywords
107 : // (also includes some special
108 : // ones like end of input)
109 : // 0 .. 255 :: Single char tokens
110 : // 256 .. 256+kMaxIdentifierCount :: Global identifiers
111 : kLocalsStart = -10000,
112 : #define V(name, _junk1, _junk2, _junk3) kToken_##name,
113 : STDLIB_MATH_FUNCTION_LIST(V)
114 : STDLIB_ARRAY_TYPE_LIST(V)
115 : #undef V
116 : #define V(name, _junk1) kToken_##name,
117 : STDLIB_MATH_VALUE_LIST(V)
118 : #undef V
119 : #define V(name) kToken_##name,
120 : STDLIB_OTHER_LIST(V)
121 : KEYWORD_NAME_LIST(V)
122 : #undef V
123 : #define V(rawname, name) kToken_##name,
124 : LONG_SYMBOL_NAME_LIST(V)
125 : #undef V
126 : #define V(name, value, string_name) name = value,
127 : SPECIAL_TOKEN_LIST(V)
128 : #undef V
129 : kGlobalsStart = 256,
130 : };
131 : // clang-format on
132 :
133 : private:
134 : std::unique_ptr<Utf16CharacterStream> stream_;
135 : token_t token_;
136 : token_t preceding_token_;
137 : token_t next_token_; // Only set when in {rewind} state.
138 : size_t position_; // Corresponds to {token} position.
139 : size_t preceding_position_; // Corresponds to {preceding_token} position.
140 : size_t next_position_; // Only set when in {rewind} state.
141 : bool rewind_;
142 : std::string identifier_string_;
143 : bool in_local_scope_;
144 : std::unordered_map<std::string, token_t> local_names_;
145 : std::unordered_map<std::string, token_t> global_names_;
146 : std::unordered_map<std::string, token_t> property_names_;
147 : int global_count_;
148 : double double_value_;
149 : uint64_t unsigned_value_;
150 : bool preceded_by_newline_;
151 :
152 : // Consume multiple characters.
153 : void ConsumeIdentifier(uc32 ch);
154 : void ConsumeNumber(uc32 ch);
155 : bool ConsumeCComment();
156 : void ConsumeCPPComment();
157 : void ConsumeString(uc32 quote);
158 : void ConsumeCompareOrShift(uc32 ch);
159 :
160 : // Classify character categories.
161 : bool IsIdentifierStart(uc32 ch);
162 : bool IsIdentifierPart(uc32 ch);
163 : bool IsNumberStart(uc32 ch);
164 : };
165 :
166 : } // namespace internal
167 : } // namespace v8
168 :
169 : #endif // V8_ASMJS_ASM_SCANNER_H_
|