Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_PARSER_H_
6 : #define V8_ASMJS_ASM_PARSER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 :
11 : #include "src/asmjs/asm-scanner.h"
12 : #include "src/asmjs/asm-types.h"
13 : #include "src/base/enum-set.h"
14 : #include "src/vector.h"
15 : #include "src/wasm/wasm-module-builder.h"
16 : #include "src/zone/zone-containers.h"
17 :
18 : namespace v8 {
19 : namespace internal {
20 :
21 : class Utf16CharacterStream;
22 :
23 : namespace wasm {
24 :
25 : // A custom parser + validator + wasm converter for asm.js:
26 : // http://asmjs.org/spec/latest/
27 : // This parser intentionally avoids the portion of JavaScript parsing
28 : // that are not required to determine if code is valid asm.js code.
29 : // * It is mostly one pass.
30 : // * It bails out on unexpected input.
31 : // * It assumes strict ordering insofar as permitted by asm.js validation rules.
32 : // * It relies on a custom scanner that provides de-duped identifiers in two
33 : // scopes (local + module wide).
34 3759 : class AsmJsParser {
35 : public:
36 : // clang-format off
37 : enum StandardMember {
38 : kInfinity,
39 : kNaN,
40 : #define V(_unused1, name, _unused2, _unused3) kMath##name,
41 : STDLIB_MATH_FUNCTION_LIST(V)
42 : #undef V
43 : #define V(name, _unused1) kMath##name,
44 : STDLIB_MATH_VALUE_LIST(V)
45 : #undef V
46 : #define V(name, _unused1, _unused2, _unused3) k##name,
47 : STDLIB_ARRAY_TYPE_LIST(V)
48 : #undef V
49 : };
50 : // clang-format on
51 :
52 : using StdlibSet = base::EnumSet<StandardMember, uint64_t>;
53 :
54 : explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
55 : Utf16CharacterStream* stream);
56 : bool Run();
57 : const char* failure_message() const { return failure_message_; }
58 : int failure_location() const { return failure_location_; }
59 : WasmModuleBuilder* module_builder() { return module_builder_; }
60 : const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
61 :
62 : private:
63 : // clang-format off
64 : enum class VarKind {
65 : kUnused,
66 : kLocal,
67 : kGlobal,
68 : kSpecial,
69 : kFunction,
70 : kTable,
71 : kImportedFunction,
72 : #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
73 : STDLIB_MATH_FUNCTION_LIST(V)
74 : #undef V
75 : #define V(Name, _unused1) kMath##Name,
76 : STDLIB_MATH_VALUE_LIST(V)
77 : #undef V
78 : };
79 : // clang-format on
80 :
81 : // A single import in asm.js can require multiple imports in wasm, if the
82 : // function is used with different signatures. {cache} keeps the wasm
83 : // imports for the single asm.js import of name {function_name}.
84 : struct FunctionImportInfo {
85 : Vector<const char> function_name;
86 : ZoneUnorderedMap<FunctionSig, uint32_t> cache;
87 :
88 : // Constructor.
89 : FunctionImportInfo(Vector<const char> name, Zone* zone)
90 2095 : : function_name(name), cache(zone) {}
91 : };
92 :
93 607044 : struct VarInfo {
94 : AsmType* type = AsmType::None();
95 : WasmFunctionBuilder* function_builder = nullptr;
96 : FunctionImportInfo* import = nullptr;
97 : uint32_t mask = 0;
98 : uint32_t index = 0;
99 : VarKind kind = VarKind::kUnused;
100 : bool mutable_variable = true;
101 : bool function_defined = false;
102 : };
103 :
104 : struct GlobalImport {
105 : Vector<const char> import_name;
106 : ValueType value_type;
107 : VarInfo* var_info;
108 : };
109 :
110 : // Distinguish different kinds of blocks participating in {block_stack}. Each
111 : // entry on that stack represents one block in the wasm code, and determines
112 : // which block 'break' and 'continue' target in the current context:
113 : // - kRegular: The target of a 'break' (with & without identifier).
114 : // Pushed by an IterationStatement and a SwitchStatement.
115 : // - kLoop : The target of a 'continue' (with & without identifier).
116 : // Pushed by an IterationStatement.
117 : // - kNamed : The target of a 'break' with a specific identifier.
118 : // Pushed by a BlockStatement.
119 : // - kOther : Only used for internal blocks, can never be targeted.
120 : enum class BlockKind { kRegular, kLoop, kNamed, kOther };
121 :
122 : // One entry in the {block_stack}, see {BlockKind} above for details. Blocks
123 : // without a label have {kTokenNone} set as their label.
124 : struct BlockInfo {
125 : BlockKind kind;
126 : AsmJsScanner::token_t label;
127 : };
128 :
129 : // Helper class to make {TempVariable} safe for nesting.
130 : class TemporaryVariableScope;
131 :
132 : template <typename T>
133 15036 : class CachedVectors {
134 : public:
135 : explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
136 :
137 : Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
138 :
139 : inline void fill(ZoneVector<T>* vec) {
140 133251 : if (reusable_vectors_.empty()) return;
141 : reusable_vectors_.back().swap(*vec);
142 : reusable_vectors_.pop_back();
143 : vec->clear();
144 : }
145 :
146 : inline void reuse(ZoneVector<T>* vec) {
147 133251 : reusable_vectors_.emplace_back(std::move(*vec));
148 : }
149 :
150 : private:
151 : ZoneVector<ZoneVector<T>> reusable_vectors_;
152 : };
153 :
154 : template <typename T>
155 : class CachedVector final : public ZoneVector<T> {
156 : public:
157 133251 : explicit CachedVector(CachedVectors<T>& cache)
158 133251 : : ZoneVector<T>(cache.zone()), cache_(&cache) {
159 : cache.fill(this);
160 133251 : }
161 205788 : ~CachedVector() { cache_->reuse(this); }
162 :
163 : private:
164 : CachedVectors<T>* cache_;
165 : };
166 :
167 : Zone* zone_;
168 : AsmJsScanner scanner_;
169 : WasmModuleBuilder* module_builder_;
170 : WasmFunctionBuilder* current_function_builder_;
171 : AsmType* return_type_;
172 : uintptr_t stack_limit_;
173 : StdlibSet stdlib_uses_;
174 : ZoneVector<VarInfo> global_var_info_;
175 : ZoneVector<VarInfo> local_var_info_;
176 :
177 : CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
178 : CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
179 : CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
180 : CachedVectors<int32_t> cached_int_vectors_{zone_};
181 :
182 : int function_temp_locals_offset_;
183 : int function_temp_locals_used_;
184 : int function_temp_locals_depth_;
185 :
186 : // Error Handling related
187 : bool failed_;
188 : const char* failure_message_;
189 : int failure_location_;
190 :
191 : // Module Related.
192 : AsmJsScanner::token_t stdlib_name_;
193 : AsmJsScanner::token_t foreign_name_;
194 : AsmJsScanner::token_t heap_name_;
195 :
196 : static const AsmJsScanner::token_t kTokenNone = 0;
197 :
198 : // Track if parsing a heap assignment.
199 : bool inside_heap_assignment_;
200 : AsmType* heap_access_type_;
201 :
202 : ZoneVector<BlockInfo> block_stack_;
203 :
204 : // Types used for stdlib function and their set up.
205 : AsmType* stdlib_dq2d_;
206 : AsmType* stdlib_dqdq2d_;
207 : AsmType* stdlib_i2s_;
208 : AsmType* stdlib_ii2s_;
209 : AsmType* stdlib_minmax_;
210 : AsmType* stdlib_abs_;
211 : AsmType* stdlib_ceil_like_;
212 : AsmType* stdlib_fround_;
213 :
214 : // When making calls, the return type is needed to lookup signatures.
215 : // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
216 : // along the coercion.
217 : AsmType* call_coercion_;
218 :
219 : // The source position associated with the above {call_coercion}.
220 : size_t call_coercion_position_;
221 :
222 : // When making calls, the coercion can also appear in the source stream
223 : // syntactically "behind" the call site. For `callsite(..)|0` use this
224 : // value to flag that such a coercion must happen.
225 : AsmType* call_coercion_deferred_;
226 :
227 : // The source position at which requesting a deferred coercion via the
228 : // aforementioned {call_coercion_deferred} is allowed.
229 : size_t call_coercion_deferred_position_;
230 :
231 : // The code position of the last heap access shift by an immediate value.
232 : // For `heap[expr >> value:NumericLiteral]` this indicates from where to
233 : // delete code when the expression is used as part of a valid heap access.
234 : // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
235 : size_t heap_access_shift_position_;
236 : uint32_t heap_access_shift_value_;
237 : static const size_t kNoHeapAccessShift = -1;
238 :
239 : // Used to track the last label we've seen so it can be matched to later
240 : // statements it's attached to.
241 : AsmJsScanner::token_t pending_label_;
242 :
243 : // Global imports. The list of imported variables that are copied during
244 : // module instantiation into a corresponding global variable.
245 : ZoneLinkedList<GlobalImport> global_imports_;
246 :
247 : Zone* zone() { return zone_; }
248 :
249 : inline bool Peek(AsmJsScanner::token_t token) {
250 106381 : return scanner_.Token() == token;
251 : }
252 :
253 : inline bool Check(AsmJsScanner::token_t token) {
254 85077704 : if (scanner_.Token() == token) {
255 1662504 : scanner_.Next();
256 : return true;
257 : } else {
258 : return false;
259 : }
260 : }
261 :
262 : inline bool CheckForZero() {
263 270743 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
264 262291 : scanner_.Next();
265 : return true;
266 : } else {
267 : return false;
268 : }
269 : }
270 :
271 : inline bool CheckForDouble(double* value) {
272 6279218 : if (scanner_.IsDouble()) {
273 : *value = scanner_.AsDouble();
274 5009563 : scanner_.Next();
275 : return true;
276 : } else {
277 : return false;
278 : }
279 : }
280 :
281 : inline bool CheckForUnsigned(uint32_t* value) {
282 1656590 : if (scanner_.IsUnsigned()) {
283 : *value = scanner_.AsUnsigned();
284 1479712 : scanner_.Next();
285 : return true;
286 : } else {
287 : return false;
288 : }
289 : }
290 :
291 : inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
292 6604096 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
293 : *value = scanner_.AsUnsigned();
294 730384 : scanner_.Next();
295 : return true;
296 : } else {
297 : return false;
298 : }
299 : }
300 :
301 : inline AsmJsScanner::token_t Consume() {
302 : AsmJsScanner::token_t ret = scanner_.Token();
303 1341767 : scanner_.Next();
304 : return ret;
305 : }
306 :
307 : void SkipSemicolon();
308 :
309 : VarInfo* GetVarInfo(AsmJsScanner::token_t token);
310 : uint32_t VarIndex(VarInfo* info);
311 : void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
312 : ValueType vtype,
313 : const WasmInitExpr& init = WasmInitExpr());
314 : void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
315 : void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
316 : bool mutable_variable, VarInfo* info);
317 :
318 : // Allocates a temporary local variable. The given {index} is absolute within
319 : // the function body, consider using {TemporaryVariableScope} when nesting.
320 : uint32_t TempVariable(int index);
321 :
322 : // Preserves a copy of the scanner's current identifier string in the zone.
323 : Vector<const char> CopyCurrentIdentifierString();
324 :
325 : // Use to set up block stack layers (including synthetic ones for if-else).
326 : // Begin/Loop/End below are implemented with these plus code generation.
327 : void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0);
328 : void BareEnd();
329 : int FindContinueLabelDepth(AsmJsScanner::token_t label);
330 : int FindBreakLabelDepth(AsmJsScanner::token_t label);
331 :
332 : // Use to set up actual wasm blocks/loops.
333 : void Begin(AsmJsScanner::token_t label = 0);
334 : void Loop(AsmJsScanner::token_t label = 0);
335 : void End();
336 :
337 : void InitializeStdlibTypes();
338 :
339 : FunctionSig* ConvertSignature(AsmType* return_type,
340 : const ZoneVector<AsmType*>& params);
341 :
342 : void ValidateModule(); // 6.1 ValidateModule
343 : void ValidateModuleParameters(); // 6.1 ValidateModule - parameters
344 : void ValidateModuleVars(); // 6.1 ValidateModule - variables
345 : void ValidateModuleVar(bool mutable_variable);
346 : void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
347 : void ValidateModuleVarStdlib(VarInfo* info);
348 : void ValidateModuleVarNewStdlib(VarInfo* info);
349 : void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
350 :
351 : void ValidateExport(); // 6.2 ValidateExport
352 : void ValidateFunctionTable(); // 6.3 ValidateFunctionTable
353 : void ValidateFunction(); // 6.4 ValidateFunction
354 : void ValidateFunctionParams(ZoneVector<AsmType*>* params);
355 : void ValidateFunctionLocals(size_t param_count,
356 : ZoneVector<ValueType>* locals);
357 : void ValidateStatement(); // 6.5 ValidateStatement
358 : void Block(); // 6.5.1 Block
359 : void ExpressionStatement(); // 6.5.2 ExpressionStatement
360 : void EmptyStatement(); // 6.5.3 EmptyStatement
361 : void IfStatement(); // 6.5.4 IfStatement
362 : void ReturnStatement(); // 6.5.5 ReturnStatement
363 : bool IterationStatement(); // 6.5.6 IterationStatement
364 : void WhileStatement(); // 6.5.6 IterationStatement - while
365 : void DoStatement(); // 6.5.6 IterationStatement - do
366 : void ForStatement(); // 6.5.6 IterationStatement - for
367 : void BreakStatement(); // 6.5.7 BreakStatement
368 : void ContinueStatement(); // 6.5.8 ContinueStatement
369 : void LabelledStatement(); // 6.5.9 LabelledStatement
370 : void SwitchStatement(); // 6.5.10 SwitchStatement
371 : void ValidateCase(); // 6.6. ValidateCase
372 : void ValidateDefault(); // 6.7 ValidateDefault
373 : AsmType* ValidateExpression(); // 6.8 ValidateExpression
374 : AsmType* Expression(AsmType* expect); // 6.8.1 Expression
375 : AsmType* NumericLiteral(); // 6.8.2 NumericLiteral
376 : AsmType* Identifier(); // 6.8.3 Identifier
377 : AsmType* CallExpression(); // 6.8.4 CallExpression
378 : AsmType* MemberExpression(); // 6.8.5 MemberExpression
379 : AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression
380 : AsmType* UnaryExpression(); // 6.8.7 UnaryExpression
381 : AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression
382 : AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression
383 : AsmType* ShiftExpression(); // 6.8.10 ShiftExpression
384 : AsmType* RelationalExpression(); // 6.8.11 RelationalExpression
385 : AsmType* EqualityExpression(); // 6.8.12 EqualityExpression
386 : AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression
387 : AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression
388 : AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression
389 : AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression
390 : AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression
391 : AsmType* ValidateCall(); // 6.9 ValidateCall
392 : bool PeekCall(); // 6.9 ValidateCall - helper
393 : void ValidateHeapAccess(); // 6.10 ValidateHeapAccess
394 : void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion
395 :
396 : // Used as part of {ForStatement}. Scans forward to the next `)` in order to
397 : // skip over the third expression in a for-statement. This is one piece that
398 : // makes this parser not be a pure single-pass.
399 : void ScanToClosingParenthesis();
400 :
401 : // Used as part of {SwitchStatement}. Collects all case labels in the current
402 : // switch-statement, then resets the scanner position. This is one piece that
403 : // makes this parser not be a pure single-pass.
404 : void GatherCases(ZoneVector<int32_t>* cases);
405 : };
406 :
407 : } // namespace wasm
408 : } // namespace internal
409 : } // namespace v8
410 :
411 : #endif // V8_ASMJS_ASM_PARSER_H_
|