Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_ASMJS_ASM_PARSER_H_
6 : #define V8_ASMJS_ASM_PARSER_H_
7 :
8 : #include <memory>
9 : #include <string>
10 :
11 : #include "src/asmjs/asm-scanner.h"
12 : #include "src/asmjs/asm-types.h"
13 : #include "src/base/enum-set.h"
14 : #include "src/wasm/wasm-module-builder.h"
15 : #include "src/zone/zone-containers.h"
16 :
17 : namespace v8 {
18 : namespace internal {
19 :
20 : class Utf16CharacterStream;
21 :
22 : namespace wasm {
23 :
24 : // A custom parser + validator + wasm converter for asm.js:
25 : // http://asmjs.org/spec/latest/
26 : // This parser intentionally avoids the portion of JavaScript parsing
27 : // that are not required to determine if code is valid asm.js code.
28 : // * It is mostly one pass.
29 : // * It bails out on unexpected input.
30 : // * It assumes strict ordering insofar as permitted by asm.js validation rules.
31 : // * It relies on a custom scanner that provides de-duped identifiers in two
32 : // scopes (local + module wide).
33 3716 : class AsmJsParser {
34 : public:
35 : // clang-format off
36 : enum StandardMember {
37 : kInfinity,
38 : kNaN,
39 : #define V(_unused1, name, _unused2, _unused3) kMath##name,
40 : STDLIB_MATH_FUNCTION_LIST(V)
41 : #undef V
42 : #define V(name, _unused1) kMath##name,
43 : STDLIB_MATH_VALUE_LIST(V)
44 : #undef V
45 : #define V(name, _unused1, _unused2, _unused3) k##name,
46 : STDLIB_ARRAY_TYPE_LIST(V)
47 : #undef V
48 : };
49 : // clang-format on
50 :
51 : using StdlibSet = base::EnumSet<StandardMember, uint64_t>;
52 :
53 : explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
54 : Utf16CharacterStream* stream);
55 : bool Run();
56 : const char* failure_message() const { return failure_message_; }
57 : int failure_location() const { return failure_location_; }
58 : WasmModuleBuilder* module_builder() { return module_builder_; }
59 : const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
60 :
61 : private:
62 : // clang-format off
63 : enum class VarKind {
64 : kUnused,
65 : kLocal,
66 : kGlobal,
67 : kSpecial,
68 : kFunction,
69 : kTable,
70 : kImportedFunction,
71 : #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
72 : STDLIB_MATH_FUNCTION_LIST(V)
73 : #undef V
74 : #define V(Name, _unused1) kMath##Name,
75 : STDLIB_MATH_VALUE_LIST(V)
76 : #undef V
77 : };
78 : // clang-format on
79 :
80 : // A single import in asm.js can require multiple imports in wasm, if the
81 : // function is used with different signatures. {cache} keeps the wasm
82 : // imports for the single asm.js import of name {function_name}.
83 : struct FunctionImportInfo {
84 : Vector<const char> function_name;
85 : ZoneUnorderedMap<FunctionSig, uint32_t> cache;
86 :
87 : // Constructor.
88 : FunctionImportInfo(Vector<const char> name, Zone* zone)
89 2091 : : function_name(name), cache(zone) {}
90 : };
91 :
92 606935 : struct VarInfo {
93 : AsmType* type = AsmType::None();
94 : WasmFunctionBuilder* function_builder = nullptr;
95 : FunctionImportInfo* import = nullptr;
96 : uint32_t mask = 0;
97 : uint32_t index = 0;
98 : VarKind kind = VarKind::kUnused;
99 : bool mutable_variable = true;
100 : bool function_defined = false;
101 : };
102 :
103 : struct GlobalImport {
104 : Vector<const char> import_name;
105 : ValueType value_type;
106 : VarInfo* var_info;
107 : };
108 :
109 : enum class BlockKind { kRegular, kLoop, kOther };
110 :
111 : struct BlockInfo {
112 : BlockKind kind;
113 : AsmJsScanner::token_t label;
114 : };
115 :
116 : // Helper class to make {TempVariable} safe for nesting.
117 : class TemporaryVariableScope;
118 :
119 : template <typename T>
120 : class CachedVectors {
121 : public:
122 : explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
123 :
124 : Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
125 :
126 133108 : inline void fill(ZoneVector<T>* vec) {
127 266216 : if (reusable_vectors_.empty()) return;
128 : reusable_vectors_.back().swap(*vec);
129 : reusable_vectors_.pop_back();
130 : vec->clear();
131 : }
132 :
133 : inline void reuse(ZoneVector<T>* vec) {
134 133113 : reusable_vectors_.emplace_back(std::move(*vec));
135 : }
136 :
137 : private:
138 : ZoneVector<ZoneVector<T>> reusable_vectors_;
139 : };
140 :
141 : template <typename T>
142 : class CachedVector final : public ZoneVector<T> {
143 : public:
144 : explicit CachedVector(CachedVectors<T>& cache)
145 133111 : : ZoneVector<T>(cache.zone()), cache_(&cache) {
146 133111 : cache.fill(this);
147 : }
148 133113 : ~CachedVector() { cache_->reuse(this); }
149 :
150 : private:
151 : CachedVectors<T>* cache_;
152 : };
153 :
154 : Zone* zone_;
155 : AsmJsScanner scanner_;
156 : WasmModuleBuilder* module_builder_;
157 : WasmFunctionBuilder* current_function_builder_;
158 : AsmType* return_type_;
159 : uintptr_t stack_limit_;
160 : StdlibSet stdlib_uses_;
161 : ZoneVector<VarInfo> global_var_info_;
162 : ZoneVector<VarInfo> local_var_info_;
163 :
164 : CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
165 : CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
166 : CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
167 : CachedVectors<int32_t> cached_int_vectors_{zone_};
168 :
169 : int function_temp_locals_offset_;
170 : int function_temp_locals_used_;
171 : int function_temp_locals_depth_;
172 :
173 : // Error Handling related
174 : bool failed_;
175 : const char* failure_message_;
176 : int failure_location_;
177 :
178 : // Module Related.
179 : AsmJsScanner::token_t stdlib_name_;
180 : AsmJsScanner::token_t foreign_name_;
181 : AsmJsScanner::token_t heap_name_;
182 :
183 : static const AsmJsScanner::token_t kTokenNone = 0;
184 :
185 : // Track if parsing a heap assignment.
186 : bool inside_heap_assignment_;
187 : AsmType* heap_access_type_;
188 :
189 : ZoneVector<BlockInfo> block_stack_;
190 :
191 : // Types used for stdlib function and their set up.
192 : AsmType* stdlib_dq2d_;
193 : AsmType* stdlib_dqdq2d_;
194 : AsmType* stdlib_i2s_;
195 : AsmType* stdlib_ii2s_;
196 : AsmType* stdlib_minmax_;
197 : AsmType* stdlib_abs_;
198 : AsmType* stdlib_ceil_like_;
199 : AsmType* stdlib_fround_;
200 :
201 : // When making calls, the return type is needed to lookup signatures.
202 : // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
203 : // along the coercion.
204 : AsmType* call_coercion_;
205 :
206 : // The source position associated with the above {call_coercion}.
207 : size_t call_coercion_position_;
208 :
209 : // When making calls, the coercion can also appear in the source stream
210 : // syntactically "behind" the call site. For `callsite(..)|0` use this
211 : // value to flag that such a coercion must happen.
212 : AsmType* call_coercion_deferred_;
213 :
214 : // The source position at which requesting a deferred coercion via the
215 : // aforementioned {call_coercion_deferred} is allowed.
216 : size_t call_coercion_deferred_position_;
217 :
218 : // The code position of the last heap access shift by an immediate value.
219 : // For `heap[expr >> value:NumericLiteral]` this indicates from where to
220 : // delete code when the expression is used as part of a valid heap access.
221 : // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
222 : size_t heap_access_shift_position_;
223 : uint32_t heap_access_shift_value_;
224 : static const size_t kNoHeapAccessShift = -1;
225 :
226 : // Used to track the last label we've seen so it can be matched to later
227 : // statements it's attached to.
228 : AsmJsScanner::token_t pending_label_;
229 :
230 : // Global imports. The list of imported variables that are copied during
231 : // module instantiation into a corresponding global variable.
232 : ZoneLinkedList<GlobalImport> global_imports_;
233 :
234 : Zone* zone() { return zone_; }
235 :
236 : inline bool Peek(AsmJsScanner::token_t token) {
237 8045030 : return scanner_.Token() == token;
238 : }
239 :
240 : inline bool Check(AsmJsScanner::token_t token) {
241 20074607 : if (scanner_.Token() == token) {
242 1660101 : scanner_.Next();
243 : return true;
244 : } else {
245 : return false;
246 : }
247 : }
248 :
249 270732 : inline bool CheckForZero() {
250 270732 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
251 262280 : scanner_.Next();
252 262280 : return true;
253 : } else {
254 : return false;
255 : }
256 : }
257 :
258 : inline bool CheckForDouble(double* value) {
259 1279205 : if (scanner_.IsDouble()) {
260 : *value = scanner_.AsDouble();
261 9584 : scanner_.Next();
262 : return true;
263 : } else {
264 : return false;
265 : }
266 : }
267 :
268 : inline bool CheckForUnsigned(uint32_t* value) {
269 1656557 : if (scanner_.IsUnsigned()) {
270 : *value = scanner_.AsUnsigned();
271 1479708 : scanner_.Next();
272 : return true;
273 : } else {
274 : return false;
275 : }
276 : }
277 :
278 : inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
279 1604075 : if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
280 : *value = scanner_.AsUnsigned();
281 730379 : scanner_.Next();
282 : return true;
283 : } else {
284 : return false;
285 : }
286 : }
287 :
288 : inline AsmJsScanner::token_t Consume() {
289 234164 : AsmJsScanner::token_t ret = scanner_.Token();
290 1341557 : scanner_.Next();
291 : return ret;
292 : }
293 :
294 : void SkipSemicolon();
295 :
296 : VarInfo* GetVarInfo(AsmJsScanner::token_t token);
297 : uint32_t VarIndex(VarInfo* info);
298 : void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
299 : ValueType vtype,
300 : const WasmInitExpr& init = WasmInitExpr());
301 : void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
302 : void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
303 : bool mutable_variable, VarInfo* info);
304 :
305 : // Allocates a temporary local variable. The given {index} is absolute within
306 : // the function body, consider using {TemporaryVariableScope} when nesting.
307 : uint32_t TempVariable(int index);
308 :
309 : // Preserves a copy of the scanner's current identifier string in the zone.
310 : Vector<const char> CopyCurrentIdentifierString();
311 :
312 : // Use to set up block stack layers (including synthetic ones for if-else).
313 : // Begin/Loop/End below are implemented with these plus code generation.
314 : void BareBegin(BlockKind kind = BlockKind::kOther,
315 : AsmJsScanner::token_t label = 0);
316 : void BareEnd();
317 : int FindContinueLabelDepth(AsmJsScanner::token_t label);
318 : int FindBreakLabelDepth(AsmJsScanner::token_t label);
319 :
320 : // Use to set up actual wasm blocks/loops.
321 : void Begin(AsmJsScanner::token_t label = 0);
322 : void Loop(AsmJsScanner::token_t label = 0);
323 : void End();
324 :
325 : void InitializeStdlibTypes();
326 :
327 : FunctionSig* ConvertSignature(AsmType* return_type,
328 : const ZoneVector<AsmType*>& params);
329 :
330 : void ValidateModule(); // 6.1 ValidateModule
331 : void ValidateModuleParameters(); // 6.1 ValidateModule - parameters
332 : void ValidateModuleVars(); // 6.1 ValidateModule - variables
333 : void ValidateModuleVar(bool mutable_variable);
334 : void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
335 : void ValidateModuleVarStdlib(VarInfo* info);
336 : void ValidateModuleVarNewStdlib(VarInfo* info);
337 : void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
338 :
339 : void ValidateExport(); // 6.2 ValidateExport
340 : void ValidateFunctionTable(); // 6.3 ValidateFunctionTable
341 : void ValidateFunction(); // 6.4 ValidateFunction
342 : void ValidateFunctionParams(ZoneVector<AsmType*>* params);
343 : void ValidateFunctionLocals(size_t param_count,
344 : ZoneVector<ValueType>* locals);
345 : void ValidateStatement(); // 6.5 ValidateStatement
346 : void Block(); // 6.5.1 Block
347 : void ExpressionStatement(); // 6.5.2 ExpressionStatement
348 : void EmptyStatement(); // 6.5.3 EmptyStatement
349 : void IfStatement(); // 6.5.4 IfStatement
350 : void ReturnStatement(); // 6.5.5 ReturnStatement
351 : bool IterationStatement(); // 6.5.6 IterationStatement
352 : void WhileStatement(); // 6.5.6 IterationStatement - while
353 : void DoStatement(); // 6.5.6 IterationStatement - do
354 : void ForStatement(); // 6.5.6 IterationStatement - for
355 : void BreakStatement(); // 6.5.7 BreakStatement
356 : void ContinueStatement(); // 6.5.8 ContinueStatement
357 : void LabelledStatement(); // 6.5.9 LabelledStatement
358 : void SwitchStatement(); // 6.5.10 SwitchStatement
359 : void ValidateCase(); // 6.6. ValidateCase
360 : void ValidateDefault(); // 6.7 ValidateDefault
361 : AsmType* ValidateExpression(); // 6.8 ValidateExpression
362 : AsmType* Expression(AsmType* expect); // 6.8.1 Expression
363 : AsmType* NumericLiteral(); // 6.8.2 NumericLiteral
364 : AsmType* Identifier(); // 6.8.3 Identifier
365 : AsmType* CallExpression(); // 6.8.4 CallExpression
366 : AsmType* MemberExpression(); // 6.8.5 MemberExpression
367 : AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression
368 : AsmType* UnaryExpression(); // 6.8.7 UnaryExpression
369 : AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression
370 : AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression
371 : AsmType* ShiftExpression(); // 6.8.10 ShiftExpression
372 : AsmType* RelationalExpression(); // 6.8.11 RelationalExpression
373 : AsmType* EqualityExpression(); // 6.8.12 EqualityExpression
374 : AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression
375 : AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression
376 : AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression
377 : AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression
378 : AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression
379 : AsmType* ValidateCall(); // 6.9 ValidateCall
380 : bool PeekCall(); // 6.9 ValidateCall - helper
381 : void ValidateHeapAccess(); // 6.10 ValidateHeapAccess
382 : void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion
383 :
384 : // Used as part of {ForStatement}. Scans forward to the next `)` in order to
385 : // skip over the third expression in a for-statement. This is one piece that
386 : // makes this parser not be a pure single-pass.
387 : void ScanToClosingParenthesis();
388 :
389 : // Used as part of {SwitchStatement}. Collects all case labels in the current
390 : // switch-statement, then resets the scanner position. This is one piece that
391 : // makes this parser not be a pure single-pass.
392 : void GatherCases(ZoneVector<int32_t>* cases);
393 : };
394 :
395 : } // namespace wasm
396 : } // namespace internal
397 : } // namespace v8
398 :
399 : #endif // V8_ASMJS_ASM_PARSER_H_
|