Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_PARSER_H_
6 : #define V8_ASMJS_ASM_PARSER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 :
11 : #include "src/asmjs/asm-scanner.h"
12 : #include "src/asmjs/asm-types.h"
13 : #include "src/wasm/wasm-module-builder.h"
14 : #include "src/zone/zone-containers.h"
15 :
16 : namespace v8 {
17 : namespace internal {
18 :
19 : class Utf16CharacterStream;
20 :
21 : namespace wasm {
22 :
23 : // A custom parser + validator + wasm converter for asm.js:
24 : // http://asmjs.org/spec/latest/
25 : // This parser intentionally avoids the portion of JavaScript parsing
26 : // that are not required to determine if code is valid asm.js code.
27 : // * It is mostly one pass.
28 : // * It bails out on unexpected input.
29 : // * It assumes strict ordering insofar as permitted by asm.js validation rules.
30 : // * It relies on a custom scanner that provides de-duped identifiers in two
31 : // scopes (local + module wide).
32 346048 : class AsmJsParser {
33 : public:
34 : // clang-format off
35 : enum StandardMember {
36 : kInfinity,
37 : kNaN,
38 : #define V(_unused1, name, _unused2, _unused3) kMath##name,
39 : STDLIB_MATH_FUNCTION_LIST(V)
40 : #undef V
41 : #define V(name, _unused1) kMath##name,
42 : STDLIB_MATH_VALUE_LIST(V)
43 : #undef V
44 : #define V(name, _unused1, _unused2, _unused3) k##name,
45 : STDLIB_ARRAY_TYPE_LIST(V)
46 : #undef V
47 : };
48 : // clang-format on
49 :
50 : typedef EnumSet<StandardMember, uint64_t> StdlibSet;
51 :
52 : explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
53 : Utf16CharacterStream* stream);
54 : bool Run();
55 : const char* failure_message() const { return failure_message_; }
56 : int failure_location() const { return failure_location_; }
57 : WasmModuleBuilder* module_builder() { return module_builder_; }
58 : const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
59 :
60 : private:
61 : // clang-format off
62 : enum class VarKind {
63 : kUnused,
64 : kLocal,
65 : kGlobal,
66 : kSpecial,
67 : kFunction,
68 : kTable,
69 : kImportedFunction,
70 : #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
71 : STDLIB_MATH_FUNCTION_LIST(V)
72 : #undef V
73 : #define V(Name, _unused1) kMath##Name,
74 : STDLIB_MATH_VALUE_LIST(V)
75 : #undef V
76 : };
77 : // clang-format on
78 :
79 : struct FunctionImportInfo {
80 : Vector<const char> function_name;
81 : WasmModuleBuilder::SignatureMap cache;
82 : };
83 :
84 127503 : struct VarInfo {
85 : AsmType* type = AsmType::None();
86 : WasmFunctionBuilder* function_builder = nullptr;
87 : FunctionImportInfo* import = nullptr;
88 : uint32_t mask = 0;
89 : uint32_t index = 0;
90 : VarKind kind = VarKind::kUnused;
91 : bool mutable_variable = true;
92 : bool function_defined = false;
93 : };
94 :
95 : struct GlobalImport {
96 : Vector<const char> import_name;
97 : ValueType value_type;
98 : VarInfo* var_info;
99 : };
100 :
101 : enum class BlockKind { kRegular, kLoop, kOther };
102 :
103 : struct BlockInfo {
104 : BlockKind kind;
105 : AsmJsScanner::token_t label;
106 : };
107 :
108 : // Helper class to make {TempVariable} safe for nesting.
109 : class TemporaryVariableScope;
110 :
111 : template <typename T>
112 : class CachedVectors {
113 : public:
114 : explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
115 :
116 : Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
117 :
118 167223 : inline void fill(ZoneVector<T>* vec) {
119 334446 : if (reusable_vectors_.empty()) return;
120 : reusable_vectors_.back().swap(*vec);
121 : reusable_vectors_.pop_back();
122 : vec->clear();
123 : }
124 :
125 : inline void reuse(ZoneVector<T>* vec) {
126 167223 : reusable_vectors_.emplace_back(std::move(*vec));
127 : }
128 :
129 : private:
130 : ZoneVector<ZoneVector<T>> reusable_vectors_;
131 : };
132 :
133 : template <typename T>
134 : class CachedVector final : public ZoneVector<T> {
135 : public:
136 : explicit CachedVector(CachedVectors<T>& cache)
137 167223 : : ZoneVector<T>(cache.zone()), cache_(&cache) {
138 167223 : cache.fill(this);
139 : }
140 167223 : ~CachedVector() { cache_->reuse(this); }
141 :
142 : private:
143 : CachedVectors<T>* cache_;
144 : };
145 :
146 : Zone* zone_;
147 : AsmJsScanner scanner_;
148 : WasmModuleBuilder* module_builder_;
149 : WasmFunctionBuilder* current_function_builder_;
150 : AsmType* return_type_;
151 : uintptr_t stack_limit_;
152 : StdlibSet stdlib_uses_;
153 : ZoneVector<VarInfo> global_var_info_;
154 : ZoneVector<VarInfo> local_var_info_;
155 :
156 : CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
157 : CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
158 : CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
159 : CachedVectors<int32_t> cached_int_vectors_{zone_};
160 :
161 : int function_temp_locals_offset_;
162 : int function_temp_locals_used_;
163 : int function_temp_locals_depth_;
164 :
165 : // Error Handling related
166 : bool failed_;
167 : const char* failure_message_;
168 : int failure_location_;
169 :
170 : // Module Related.
171 : AsmJsScanner::token_t stdlib_name_;
172 : AsmJsScanner::token_t foreign_name_;
173 : AsmJsScanner::token_t heap_name_;
174 :
175 : static const AsmJsScanner::token_t kTokenNone = 0;
176 :
177 : // Track if parsing a heap assignment.
178 : bool inside_heap_assignment_;
179 : AsmType* heap_access_type_;
180 :
181 : ZoneVector<BlockInfo> block_stack_;
182 :
183 : // Types used for stdlib function and their set up.
184 : AsmType* stdlib_dq2d_;
185 : AsmType* stdlib_dqdq2d_;
186 : AsmType* stdlib_i2s_;
187 : AsmType* stdlib_ii2s_;
188 : AsmType* stdlib_minmax_;
189 : AsmType* stdlib_abs_;
190 : AsmType* stdlib_ceil_like_;
191 : AsmType* stdlib_fround_;
192 :
193 : // When making calls, the return type is needed to lookup signatures.
194 : // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
195 : // along the coercion.
196 : AsmType* call_coercion_;
197 :
198 : // The source position associated with the above {call_coercion}.
199 : size_t call_coercion_position_;
200 :
201 : // When making calls, the coercion can also appear in the source stream
202 : // syntactically "behind" the call site. For `callsite(..)|0` use this
203 : // value to flag that such a coercion must happen.
204 : AsmType* call_coercion_deferred_;
205 :
206 : // The source position at which requesting a deferred coercion via the
207 : // aforementioned {call_coercion_deferred} is allowed.
208 : size_t call_coercion_deferred_position_;
209 :
210 : // The code position of the last heap access shift by an immediate value.
211 : // For `heap[expr >> value:NumericLiteral]` this indicates from where to
212 : // delete code when the expression is used as part of a valid heap access.
213 : // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
214 : size_t heap_access_shift_position_;
215 : uint32_t heap_access_shift_value_;
216 : static const size_t kNoHeapAccessShift = -1;
217 :
218 : // Used to track the last label we've seen so it can be matched to later
219 : // statements it's attached to.
220 : AsmJsScanner::token_t pending_label_;
221 :
222 : // Global imports. The list of imported variables that are copied during
223 : // module instantiation into a corresponding global variable.
224 : ZoneLinkedList<GlobalImport> global_imports_;
225 :
226 : Zone* zone() { return zone_; }
227 :
228 : inline bool Peek(AsmJsScanner::token_t token) {
229 9178705 : return scanner_.Token() == token;
230 : }
231 :
232 : inline bool Check(AsmJsScanner::token_t token) {
233 23723075 : if (scanner_.Token() == token) {
234 1450805 : scanner_.Next();
235 : return true;
236 : } else {
237 : return false;
238 : }
239 : }
240 :
241 336763 : inline bool CheckForZero() {
242 336763 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
243 326070 : scanner_.Next();
244 326070 : return true;
245 : } else {
246 : return false;
247 : }
248 : }
249 :
250 : inline bool CheckForDouble(double* value) {
251 973736 : if (scanner_.IsDouble()) {
252 : *value = scanner_.AsDouble();
253 12498 : scanner_.Next();
254 : return true;
255 : } else {
256 : return false;
257 : }
258 : }
259 :
260 : inline bool CheckForUnsigned(uint32_t* value) {
261 1448768 : if (scanner_.IsUnsigned()) {
262 : *value = scanner_.AsUnsigned();
263 1224871 : scanner_.Next();
264 : return true;
265 : } else {
266 : return false;
267 : }
268 : }
269 :
270 : inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
271 2002516 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
272 : *value = scanner_.AsUnsigned();
273 909052 : scanner_.Next();
274 : return true;
275 : } else {
276 : return false;
277 : }
278 : }
279 :
280 : inline AsmJsScanner::token_t Consume() {
281 295414 : AsmJsScanner::token_t ret = scanner_.Token();
282 1241761 : scanner_.Next();
283 : return ret;
284 : }
285 :
286 : void SkipSemicolon();
287 :
288 : VarInfo* GetVarInfo(AsmJsScanner::token_t token);
289 : uint32_t VarIndex(VarInfo* info);
290 : void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
291 : ValueType vtype,
292 : const WasmInitExpr& init = WasmInitExpr());
293 : void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
294 : void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
295 : bool mutable_variable, VarInfo* info);
296 :
297 : // Allocates a temporary local variable. The given {index} is absolute within
298 : // the function body, consider using {TemporaryVariableScope} when nesting.
299 : uint32_t TempVariable(int index);
300 :
301 : // Preserves a copy of the scanner's current identifier string in the zone.
302 : Vector<const char> CopyCurrentIdentifierString();
303 :
304 : // Use to set up block stack layers (including synthetic ones for if-else).
305 : // Begin/Loop/End below are implemented with these plus code generation.
306 : void BareBegin(BlockKind kind = BlockKind::kOther,
307 : AsmJsScanner::token_t label = 0);
308 : void BareEnd();
309 : int FindContinueLabelDepth(AsmJsScanner::token_t label);
310 : int FindBreakLabelDepth(AsmJsScanner::token_t label);
311 :
312 : // Use to set up actual wasm blocks/loops.
313 : void Begin(AsmJsScanner::token_t label = 0);
314 : void Loop(AsmJsScanner::token_t label = 0);
315 : void End();
316 :
317 : void InitializeStdlibTypes();
318 :
319 : FunctionSig* ConvertSignature(AsmType* return_type,
320 : const ZoneVector<AsmType*>& params);
321 :
322 : void ValidateModule(); // 6.1 ValidateModule
323 : void ValidateModuleParameters(); // 6.1 ValidateModule - parameters
324 : void ValidateModuleVars(); // 6.1 ValidateModule - variables
325 : void ValidateModuleVar(bool mutable_variable);
326 : void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
327 : void ValidateModuleVarStdlib(VarInfo* info);
328 : void ValidateModuleVarNewStdlib(VarInfo* info);
329 : void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
330 :
331 : void ValidateExport(); // 6.2 ValidateExport
332 : void ValidateFunctionTable(); // 6.3 ValidateFunctionTable
333 : void ValidateFunction(); // 6.4 ValidateFunction
334 : void ValidateFunctionParams(ZoneVector<AsmType*>* params);
335 : void ValidateFunctionLocals(size_t param_count,
336 : ZoneVector<ValueType>* locals);
337 : void ValidateStatement(); // 6.5 ValidateStatement
338 : void Block(); // 6.5.1 Block
339 : void ExpressionStatement(); // 6.5.2 ExpressionStatement
340 : void EmptyStatement(); // 6.5.3 EmptyStatement
341 : void IfStatement(); // 6.5.4 IfStatement
342 : void ReturnStatement(); // 6.5.5 ReturnStatement
343 : bool IterationStatement(); // 6.5.6 IterationStatement
344 : void WhileStatement(); // 6.5.6 IterationStatement - while
345 : void DoStatement(); // 6.5.6 IterationStatement - do
346 : void ForStatement(); // 6.5.6 IterationStatement - for
347 : void BreakStatement(); // 6.5.7 BreakStatement
348 : void ContinueStatement(); // 6.5.8 ContinueStatement
349 : void LabelledStatement(); // 6.5.9 LabelledStatement
350 : void SwitchStatement(); // 6.5.10 SwitchStatement
351 : void ValidateCase(); // 6.6. ValidateCase
352 : void ValidateDefault(); // 6.7 ValidateDefault
353 : AsmType* ValidateExpression(); // 6.8 ValidateExpression
354 : AsmType* Expression(AsmType* expect); // 6.8.1 Expression
355 : AsmType* NumericLiteral(); // 6.8.2 NumericLiteral
356 : AsmType* Identifier(); // 6.8.3 Identifier
357 : AsmType* CallExpression(); // 6.8.4 CallExpression
358 : AsmType* MemberExpression(); // 6.8.5 MemberExpression
359 : AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression
360 : AsmType* UnaryExpression(); // 6.8.7 UnaryExpression
361 : AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression
362 : AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression
363 : AsmType* ShiftExpression(); // 6.8.10 ShiftExpression
364 : AsmType* RelationalExpression(); // 6.8.11 RelationalExpression
365 : AsmType* EqualityExpression(); // 6.8.12 EqualityExpression
366 : AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression
367 : AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression
368 : AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression
369 : AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression
370 : AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression
371 : AsmType* ValidateCall(); // 6.9 ValidateCall
372 : bool PeekCall(); // 6.9 ValidateCall - helper
373 : void ValidateHeapAccess(); // 6.10 ValidateHeapAccess
374 : void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion
375 :
376 : // Used as part of {ForStatement}. Scans forward to the next `)` in order to
377 : // skip over the third expression in a for-statement. This is one piece that
378 : // makes this parser not be a pure single-pass.
379 : void ScanToClosingParenthesis();
380 :
381 : // Used as part of {SwitchStatement}. Collects all case labels in the current
382 : // switch-statement, then resets the scanner position. This is one piece that
383 : // makes this parser not be a pure single-pass.
384 : void GatherCases(ZoneVector<int32_t>* cases);
385 : };
386 :
387 : } // namespace wasm
388 : } // namespace internal
389 : } // namespace v8
390 :
391 : #endif // V8_ASMJS_ASM_PARSER_H_
|