/src/jsonnet/core/parser.cpp
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <cassert> |
18 | | #include <cmath> |
19 | | #include <cstdlib> |
20 | | |
21 | | #include <iomanip> |
22 | | #include <list> |
23 | | #include <memory> |
24 | | #include <set> |
25 | | #include <sstream> |
26 | | #include <locale> |
27 | | #include <string> |
28 | | |
29 | | #include "ast.h" |
30 | | #include "desugarer.h" |
31 | | #include "lexer.h" |
32 | | #include "parser.h" |
33 | | #include "static_error.h" |
34 | | |
35 | | namespace jsonnet::internal { |
36 | | |
37 | | std::string jsonnet_unparse_number(double v) |
38 | 7.74M | { |
39 | 7.74M | std::stringstream ss; |
40 | | // Make sure we output the same thing, even if the user |
41 | | // of the library changed the global locale |
42 | 7.74M | ss.imbue(std::locale::classic()); |
43 | 7.74M | if (v == floor(v)) { |
44 | 7.60M | ss << std::fixed << std::setprecision(0) << v; |
45 | 7.60M | } else { |
46 | | // See "What Every Computer Scientist Should Know About Floating-Point Arithmetic" |
47 | | // Theorem 15 |
48 | | // https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html |
49 | 140k | ss << std::setprecision(17); |
50 | 140k | ss << v; |
51 | 140k | } |
52 | 7.74M | return ss.str(); |
53 | 7.74M | } |
54 | | |
55 | | namespace { |
56 | | |
57 | | static const Fodder EMPTY_FODDER; |
58 | | |
59 | | /** Maximum parsing depth to avoid stack overflow due to pathological or malicious code. |
60 | | * This is especially important when parsing deeply nested structures that could lead to |
61 | | * excessive recursion in the parser functions. |
62 | | */ |
63 | | static const unsigned MAX_PARSER_DEPTH = 1000; |
64 | | |
65 | | static bool op_is_unary(const std::string &op, UnaryOp &uop) |
66 | 1.76M | { |
67 | 1.76M | auto it = unary_map.find(op); |
68 | 1.76M | if (it == unary_map.end()) |
69 | 636 | return false; |
70 | 1.76M | uop = it->second; |
71 | 1.76M | return true; |
72 | 1.76M | } |
73 | | |
74 | | static bool op_is_binary(const std::string &op, BinaryOp &bop) |
75 | 24.9M | { |
76 | 24.9M | auto it = binary_map.find(op); |
77 | 24.9M | if (it == binary_map.end()) |
78 | 477 | return false; |
79 | 24.9M | bop = it->second; |
80 | 24.9M | return true; |
81 | 24.9M | } |
82 | | |
83 | | LocationRange span(const Token &begin) |
84 | 105M | { |
85 | 105M | return LocationRange(begin.location.file, begin.location.begin, begin.location.end); |
86 | 105M | } |
87 | | |
88 | | LocationRange span(const Token &begin, const Token &end) |
89 | 47.0M | { |
90 | 47.0M | return LocationRange(begin.location.file, begin.location.begin, end.location.end); |
91 | 47.0M | } |
92 | | |
93 | | LocationRange span(const Token &begin, AST *end) |
94 | 39.1M | { |
95 | 39.1M | return LocationRange(begin.location.file, begin.location.begin, end->location.end); |
96 | 39.1M | } |
97 | | |
98 | | /** Holds state while parsing a given token list. |
99 | | */ |
100 | | class Parser { |
101 | | // The private member functions are utilities for dealing with the token stream. |
102 | | |
103 | | StaticError unexpected(const Token &tok, const std::string &while_) |
104 | 1.02k | { |
105 | 1.02k | std::stringstream ss; |
106 | 1.02k | ss << "unexpected: " << tok.kind << " while " << while_; |
107 | 1.02k | return StaticError(tok.location, ss.str()); |
108 | 1.02k | } |
109 | | |
110 | | Token pop(void) |
111 | 334M | { |
112 | 334M | Token tok = peek(); |
113 | 334M | tokens.pop_front(); |
114 | 334M | return tok; |
115 | 334M | } |
116 | | |
117 | | void push(Token tok) |
118 | 0 | { |
119 | 0 | tokens.push_front(tok); |
120 | 0 | } |
121 | | |
122 | | const Token &peek(void) |
123 | 1.07G | { |
124 | 1.07G | return tokens.front(); |
125 | 1.07G | } |
126 | | |
127 | | /** Only call this is peek() is not an EOF token. */ |
128 | | Token doublePeek(void) |
129 | 37.0M | { |
130 | 37.0M | Tokens::iterator it = tokens.begin(); // First one. |
131 | 37.0M | it++; // Now pointing at the second one. |
132 | 37.0M | return *(it); |
133 | 37.0M | } |
134 | | |
135 | | Token popExpect(Token::Kind k, const char *data = nullptr) |
136 | 51.8M | { |
137 | 51.8M | Token tok = pop(); |
138 | 51.8M | if (tok.kind != k) { |
139 | 667 | std::stringstream ss; |
140 | 667 | ss << "expected token " << k << " but got " << tok; |
141 | 667 | throw StaticError(tok.location, ss.str()); |
142 | 667 | } |
143 | 51.8M | if (data != nullptr && tok.data != data) { |
144 | 47 | std::stringstream ss; |
145 | 47 | ss << "expected operator " << data << " but got " << tok.data; |
146 | 47 | throw StaticError(tok.location, ss.str()); |
147 | 47 | } |
148 | 51.8M | return tok; |
149 | 51.8M | } |
150 | | |
151 | | std::list<Token> &tokens; |
152 | | Allocator *alloc; |
153 | | |
154 | | public: |
155 | 46.3k | Parser(Tokens &tokens, Allocator *alloc) : tokens(tokens), alloc(alloc) {} |
156 | | |
157 | | /** Parse a comma-separated list of expressions. |
158 | | * |
159 | | * Allows an optional ending comma. |
160 | | * \param args Expressions added here. |
161 | | * \param element_kind Used in error messages when a comma was not found. |
162 | | * \param got_comma Whether a trailing comma was found. |
163 | | * \param current_depth Current recursion depth to prevent stack overflow. |
164 | | * \returns The last token (the one that matched parameter end). |
165 | | */ |
166 | | Token parseArgs(ArgParams &args, const std::string &element_kind, bool &got_comma, unsigned current_depth) |
167 | 24.8M | { |
168 | 24.8M | got_comma = false; |
169 | 24.8M | bool first = true; |
170 | 68.4M | do { |
171 | 68.4M | Token next = peek(); |
172 | 68.4M | if (next.kind == Token::PAREN_R) { |
173 | | // got_comma can be true or false here. |
174 | 24.8M | return pop(); |
175 | 24.8M | } |
176 | 43.6M | if (!first && !got_comma) { |
177 | 319 | std::stringstream ss; |
178 | 319 | ss << "expected a comma before next " << element_kind << "."; |
179 | 319 | throw StaticError(next.location, ss.str()); |
180 | 319 | } |
181 | | // Either id=expr or id or expr, but note that expr could be id==1 so this needs |
182 | | // look-ahead. |
183 | 43.6M | Fodder id_fodder; |
184 | 43.6M | const Identifier *id = nullptr; |
185 | 43.6M | Fodder eq_fodder; |
186 | 43.6M | if (peek().kind == Token::IDENTIFIER) { |
187 | 37.0M | Token maybe_eq = doublePeek(); |
188 | 37.0M | if (maybe_eq.kind == Token::OPERATOR && maybe_eq.data == "=") { |
189 | 587k | id_fodder = peek().fodder; |
190 | 587k | id = alloc->makeIdentifier(peek().data32()); |
191 | 587k | eq_fodder = maybe_eq.fodder; |
192 | 587k | pop(); // id |
193 | 587k | pop(); // eq |
194 | 587k | } |
195 | 37.0M | } |
196 | 43.6M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
197 | 43.6M | got_comma = false; |
198 | 43.6M | first = false; |
199 | 43.6M | Fodder comma_fodder; |
200 | 43.6M | if (peek().kind == Token::COMMA) { |
201 | 18.7M | Token comma = pop(); |
202 | 18.7M | comma_fodder = comma.fodder; |
203 | 18.7M | got_comma = true; |
204 | 18.7M | } |
205 | 43.6M | args.emplace_back(id_fodder, id, eq_fodder, expr, comma_fodder); |
206 | 43.6M | } while (true); |
207 | 24.8M | } |
208 | | |
209 | | /** Parse function parameters. |
210 | | * |
211 | | * \param element_kind Used in error messages. |
212 | | * \param got_comma Whether a trailing comma was found. |
213 | | * \param close_fodder Fodder after the closing parenthesis. |
214 | | * \param current_depth Current recursion depth to prevent stack overflow. |
215 | | * \returns The parameters as ArgParams. |
216 | | */ |
217 | | ArgParams parseParams(const std::string &element_kind, bool &got_comma, Fodder &close_fodder, unsigned current_depth) |
218 | 5.30M | { |
219 | 5.30M | ArgParams params; |
220 | 5.30M | Token paren_r = parseArgs(params, element_kind, got_comma, current_depth); |
221 | | |
222 | | // Check they're all identifiers |
223 | | // parseArgs returns f(x) with x as an expression. Convert it here. |
224 | 10.4M | for (auto &p : params) { |
225 | 10.4M | if (p.id == nullptr) { |
226 | 9.89M | if (p.expr->type != AST_VAR) { |
227 | 6 | throw StaticError(p.expr->location, "could not parse parameter here."); |
228 | 6 | } |
229 | 9.89M | auto *pv = static_cast<Var *>(p.expr); |
230 | 9.89M | p.id = pv->id; |
231 | 9.89M | p.idFodder = pv->openFodder; |
232 | 9.89M | p.expr = nullptr; |
233 | 9.89M | } |
234 | 10.4M | } |
235 | | |
236 | 5.30M | close_fodder = paren_r.fodder; |
237 | | |
238 | 5.30M | return params; |
239 | 5.30M | } |
240 | | |
241 | | /** Parse a local bind statement. |
242 | | * |
243 | | * \param binds The bindings to be populated. |
244 | | * \param current_depth Current recursion depth to prevent stack overflow. |
245 | | * \returns The token after the binding (comma or semicolon). |
246 | | */ |
247 | | Token parseBind(Local::Binds &binds, unsigned current_depth) |
248 | 6.09M | { |
249 | 6.09M | Token var_id = popExpect(Token::IDENTIFIER); |
250 | 6.09M | auto *id = alloc->makeIdentifier(var_id.data32()); |
251 | 6.09M | for (const auto &bind : binds) { |
252 | 852k | if (bind.var == id) |
253 | 11 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
254 | 852k | } |
255 | 6.09M | bool is_function = false; |
256 | 6.09M | ArgParams params; |
257 | 6.09M | bool trailing_comma = false; |
258 | 6.09M | Fodder fodder_l, fodder_r; |
259 | 6.09M | if (peek().kind == Token::PAREN_L) { |
260 | 1.78M | Token paren_l = pop(); |
261 | 1.78M | fodder_l = paren_l.fodder; |
262 | 1.78M | params = parseParams("function parameter", trailing_comma, fodder_r, current_depth); |
263 | 1.78M | is_function = true; |
264 | 1.78M | } |
265 | 6.09M | Token eq = popExpect(Token::OPERATOR, "="); |
266 | 6.09M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
267 | 6.09M | Token delim = pop(); |
268 | 6.09M | binds.emplace_back(var_id.fodder, |
269 | 6.09M | id, |
270 | 6.09M | eq.fodder, |
271 | 6.09M | body, |
272 | 6.09M | is_function, |
273 | 6.09M | fodder_l, |
274 | 6.09M | params, |
275 | 6.09M | trailing_comma, |
276 | 6.09M | fodder_r, |
277 | 6.09M | delim.fodder); |
278 | 6.09M | return delim; |
279 | 6.09M | } |
280 | | |
281 | | /** Parse the remainder of an object after the opening brace. |
282 | | * |
283 | | * \param obj The object AST to be populated. |
284 | | * \param tok The opening brace token. |
285 | | * \param current_depth Current recursion depth to prevent stack overflow. |
286 | | * \returns The closing brace token. |
287 | | */ |
288 | | Token parseObjectRemainder(AST *&obj, const Token &tok, unsigned current_depth) |
289 | 1.47M | { |
290 | 1.47M | if (current_depth >= MAX_PARSER_DEPTH) { |
291 | 5 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
292 | 5 | } |
293 | | |
294 | 1.47M | ObjectFields fields; |
295 | 1.47M | std::set<std::string> literal_fields; // For duplicate fields detection. |
296 | 1.47M | std::set<const Identifier *> binds; // For duplicate locals detection. |
297 | | |
298 | 1.47M | bool got_comma = false; |
299 | 1.47M | bool first = true; |
300 | 1.47M | Token next = pop(); |
301 | | |
302 | 7.57M | do { |
303 | 7.57M | if (next.kind == Token::BRACE_R) { |
304 | 1.31M | obj = alloc->make<Object>( |
305 | 1.31M | span(tok, next), tok.fodder, fields, got_comma, next.fodder); |
306 | 1.31M | return next; |
307 | | |
308 | 6.25M | } else if (next.kind == Token::FOR) { |
309 | | // It's a comprehension |
310 | 133k | unsigned num_fields = 0; |
311 | 133k | unsigned num_asserts = 0; |
312 | 133k | const ObjectField *field_ptr = nullptr; |
313 | 134k | for (const auto &field : fields) { |
314 | 134k | if (field.kind == ObjectField::LOCAL) |
315 | 3 | continue; |
316 | 134k | if (field.kind == ObjectField::ASSERT) { |
317 | 524 | num_asserts++; |
318 | 524 | continue; |
319 | 524 | } |
320 | 133k | field_ptr = &field; |
321 | 133k | num_fields++; |
322 | 133k | } |
323 | 133k | if (num_asserts > 0) { |
324 | 26 | auto msg = "object comprehension cannot have asserts."; |
325 | 26 | throw StaticError(next.location, msg); |
326 | 26 | } |
327 | 133k | if (num_fields != 1) { |
328 | 12 | auto msg = "object comprehension can only have one field."; |
329 | 12 | throw StaticError(next.location, msg); |
330 | 12 | } |
331 | 133k | const ObjectField &field = *field_ptr; |
332 | | |
333 | 133k | if (field.hide != ObjectField::INHERIT) { |
334 | 3 | auto msg = "object comprehensions cannot have hidden fields."; |
335 | 3 | throw StaticError(next.location, msg); |
336 | 3 | } |
337 | | |
338 | 133k | if (field.kind != ObjectField::FIELD_EXPR) { |
339 | 3 | auto msg = "object comprehensions can only have [e] fields."; |
340 | 3 | throw StaticError(next.location, msg); |
341 | 3 | } |
342 | | |
343 | 133k | std::vector<ComprehensionSpec> specs; |
344 | 133k | Token last = parseComprehensionSpecs(Token::BRACE_R, next.fodder, specs, current_depth + 1); |
345 | 133k | obj = alloc->make<ObjectComprehension>( |
346 | 133k | span(tok, last), tok.fodder, fields, got_comma, specs, last.fodder); |
347 | | |
348 | 133k | return last; |
349 | 133k | } |
350 | | |
351 | 6.11M | if (!got_comma && !first) |
352 | 333 | throw StaticError(next.location, "expected a comma before next field."); |
353 | | |
354 | 6.11M | first = false; |
355 | 6.11M | got_comma = false; |
356 | | |
357 | 6.11M | switch (next.kind) { |
358 | 146k | case Token::BRACKET_L: |
359 | 5.60M | case Token::IDENTIFIER: |
360 | 5.74M | case Token::STRING_DOUBLE: |
361 | 5.84M | case Token::STRING_SINGLE: |
362 | 5.84M | case Token::STRING_BLOCK: |
363 | 5.85M | case Token::VERBATIM_STRING_DOUBLE: |
364 | 5.85M | case Token::VERBATIM_STRING_SINGLE: { |
365 | 5.85M | ObjectField::Kind kind; |
366 | 5.85M | AST *expr1 = nullptr; |
367 | 5.85M | const Identifier *id = nullptr; |
368 | 5.85M | Fodder fodder1, fodder2; |
369 | 5.85M | LocationRange idLocation; |
370 | 5.85M | if (next.kind == Token::IDENTIFIER) { |
371 | 5.46M | fodder1 = next.fodder; |
372 | 5.46M | kind = ObjectField::FIELD_ID; |
373 | 5.46M | id = alloc->makeIdentifier(next.data32()); |
374 | 5.46M | idLocation = next.location; |
375 | 5.46M | } else if (next.kind == Token::STRING_DOUBLE) { |
376 | 139k | kind = ObjectField::FIELD_STR; |
377 | 139k | expr1 = alloc->make<LiteralString>(next.location, |
378 | 139k | next.fodder, |
379 | 139k | next.data32(), |
380 | 139k | LiteralString::DOUBLE, |
381 | 139k | "", |
382 | 139k | ""); |
383 | 253k | } else if (next.kind == Token::STRING_SINGLE) { |
384 | 97.7k | kind = ObjectField::FIELD_STR; |
385 | 97.7k | expr1 = alloc->make<LiteralString>(next.location, |
386 | 97.7k | next.fodder, |
387 | 97.7k | next.data32(), |
388 | 97.7k | LiteralString::SINGLE, |
389 | 97.7k | "", |
390 | 97.7k | ""); |
391 | 155k | } else if (next.kind == Token::STRING_BLOCK) { |
392 | 5.35k | kind = ObjectField::FIELD_STR; |
393 | 5.35k | expr1 = alloc->make<LiteralString>(next.location, |
394 | 5.35k | next.fodder, |
395 | 5.35k | next.data32(), |
396 | 5.35k | LiteralString::BLOCK, |
397 | 5.35k | next.stringBlockIndent, |
398 | 5.35k | next.stringBlockTermIndent); |
399 | 149k | } else if (next.kind == Token::VERBATIM_STRING_SINGLE) { |
400 | 994 | kind = ObjectField::FIELD_STR; |
401 | 994 | expr1 = alloc->make<LiteralString>(next.location, |
402 | 994 | next.fodder, |
403 | 994 | next.data32(), |
404 | 994 | LiteralString::VERBATIM_SINGLE, |
405 | 994 | "", |
406 | 994 | ""); |
407 | 148k | } else if (next.kind == Token::VERBATIM_STRING_DOUBLE) { |
408 | 2.33k | kind = ObjectField::FIELD_STR; |
409 | 2.33k | expr1 = alloc->make<LiteralString>(next.location, |
410 | 2.33k | next.fodder, |
411 | 2.33k | next.data32(), |
412 | 2.33k | LiteralString::VERBATIM_DOUBLE, |
413 | 2.33k | "", |
414 | 2.33k | ""); |
415 | 146k | } else { |
416 | 146k | kind = ObjectField::FIELD_EXPR; |
417 | 146k | fodder1 = next.fodder; |
418 | 146k | expr1 = parse(MAX_PRECEDENCE, current_depth + 1); |
419 | 146k | Token bracket_r = popExpect(Token::BRACKET_R); |
420 | 146k | fodder2 = bracket_r.fodder; |
421 | 146k | } |
422 | | |
423 | 5.85M | bool is_method = false; |
424 | 5.85M | bool meth_comma = false; |
425 | 5.85M | ArgParams params; |
426 | 5.85M | Fodder fodder_l; |
427 | 5.85M | Fodder fodder_r; |
428 | 5.85M | if (peek().kind == Token::PAREN_L) { |
429 | 3.01M | Token paren_l = pop(); |
430 | 3.01M | fodder_l = paren_l.fodder; |
431 | 3.01M | params = parseParams("method parameter", meth_comma, fodder_r, current_depth); |
432 | 3.01M | is_method = true; |
433 | 3.01M | } |
434 | | |
435 | 5.85M | bool plus_sugar = false; |
436 | | |
437 | 5.85M | Token op = popExpect(Token::OPERATOR); |
438 | 5.85M | const char *od = op.data.c_str(); |
439 | 5.85M | if (*od == '+') { |
440 | 55.8k | plus_sugar = true; |
441 | 55.8k | od++; |
442 | 55.8k | } |
443 | 5.85M | unsigned colons = 0; |
444 | 14.7M | for (; *od != '\0'; ++od) { |
445 | 8.85M | if (*od != ':') { |
446 | 50 | throw StaticError( |
447 | 50 | next.location, |
448 | 50 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
449 | 50 | } |
450 | 8.85M | ++colons; |
451 | 8.85M | } |
452 | 5.85M | ObjectField::Hide field_hide; |
453 | 5.85M | switch (colons) { |
454 | 2.83M | case 1: field_hide = ObjectField::INHERIT; break; |
455 | | |
456 | 3.00M | case 2: field_hide = ObjectField::HIDDEN; break; |
457 | | |
458 | 95 | case 3: field_hide = ObjectField::VISIBLE; break; |
459 | | |
460 | 30 | default: |
461 | 30 | throw StaticError( |
462 | 30 | next.location, |
463 | 30 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
464 | 5.85M | } |
465 | | |
466 | | // Basic checks for invalid Jsonnet code. |
467 | 5.84M | if (is_method && plus_sugar) { |
468 | 3 | throw StaticError(next.location, |
469 | 3 | "cannot use +: syntax sugar in a method: " + next.data); |
470 | 3 | } |
471 | 5.84M | if (kind != ObjectField::FIELD_EXPR) { |
472 | 5.70M | if (!literal_fields.insert(next.data).second) { |
473 | 35 | throw StaticError(next.location, "duplicate field: " + next.data); |
474 | 35 | } |
475 | 5.70M | } |
476 | | |
477 | 5.84M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
478 | | |
479 | 5.84M | Fodder comma_fodder; |
480 | 5.84M | next = pop(); |
481 | 5.84M | if (next.kind == Token::COMMA) { |
482 | 4.92M | comma_fodder = next.fodder; |
483 | 4.92M | next = pop(); |
484 | 4.92M | got_comma = true; |
485 | 4.92M | } |
486 | 5.84M | fields.emplace_back(kind, |
487 | 5.84M | fodder1, |
488 | 5.84M | fodder2, |
489 | 5.84M | fodder_l, |
490 | 5.84M | fodder_r, |
491 | 5.84M | field_hide, |
492 | 5.84M | plus_sugar, |
493 | 5.84M | is_method, |
494 | 5.84M | expr1, |
495 | 5.84M | id, |
496 | 5.84M | idLocation, |
497 | 5.84M | params, |
498 | 5.84M | meth_comma, |
499 | 5.84M | op.fodder, |
500 | 5.84M | body, |
501 | 5.84M | nullptr, |
502 | 5.84M | comma_fodder); |
503 | 5.84M | } break; |
504 | | |
505 | 170k | case Token::LOCAL: { |
506 | 170k | Fodder local_fodder = next.fodder; |
507 | 170k | Token var_id = popExpect(Token::IDENTIFIER); |
508 | 170k | auto *id = alloc->makeIdentifier(var_id.data32()); |
509 | | |
510 | 170k | if (binds.find(id) != binds.end()) { |
511 | 5 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
512 | 5 | } |
513 | 170k | bool is_method = false; |
514 | 170k | bool func_comma = false; |
515 | 170k | ArgParams params; |
516 | 170k | Fodder paren_l_fodder; |
517 | 170k | Fodder paren_r_fodder; |
518 | 170k | if (peek().kind == Token::PAREN_L) { |
519 | 25.1k | Token paren_l = pop(); |
520 | 25.1k | paren_l_fodder = paren_l.fodder; |
521 | 25.1k | is_method = true; |
522 | 25.1k | params = parseParams("function parameter", func_comma, paren_r_fodder, current_depth); |
523 | 25.1k | } |
524 | 170k | Token eq = popExpect(Token::OPERATOR, "="); |
525 | 170k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
526 | 170k | binds.insert(id); |
527 | | |
528 | 170k | Fodder comma_fodder; |
529 | 170k | next = pop(); |
530 | 170k | if (next.kind == Token::COMMA) { |
531 | 168k | comma_fodder = next.fodder; |
532 | 168k | next = pop(); |
533 | 168k | got_comma = true; |
534 | 168k | } |
535 | 170k | fields.push_back(ObjectField::Local(local_fodder, |
536 | 170k | var_id.fodder, |
537 | 170k | paren_l_fodder, |
538 | 170k | paren_r_fodder, |
539 | 170k | is_method, |
540 | 170k | id, |
541 | 170k | params, |
542 | 170k | func_comma, |
543 | 170k | eq.fodder, |
544 | 170k | body, |
545 | 170k | comma_fodder)); |
546 | | |
547 | 170k | } break; |
548 | | |
549 | 93.8k | case Token::ASSERT: { |
550 | 93.8k | Fodder assert_fodder = next.fodder; |
551 | 93.8k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
552 | 93.8k | AST *msg = nullptr; |
553 | 93.8k | Fodder colon_fodder; |
554 | 93.8k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
555 | 33.3k | Token colon = pop(); |
556 | 33.3k | colon_fodder = colon.fodder; |
557 | 33.3k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
558 | 33.3k | } |
559 | | |
560 | 93.8k | Fodder comma_fodder; |
561 | 93.8k | next = pop(); |
562 | 93.8k | if (next.kind == Token::COMMA) { |
563 | 83.6k | comma_fodder = next.fodder; |
564 | 83.6k | next = pop(); |
565 | 83.6k | got_comma = true; |
566 | 83.6k | } |
567 | 93.8k | fields.push_back( |
568 | 93.8k | ObjectField::Assert(assert_fodder, cond, colon_fodder, msg, comma_fodder)); |
569 | 93.8k | } break; |
570 | | |
571 | 193 | default: throw unexpected(next, "parsing field definition"); |
572 | 6.11M | } |
573 | | |
574 | 6.11M | } while (true); |
575 | 1.47M | } |
576 | | |
577 | | /** Parses for x in expr for y in expr if expr for z in expr ... |
578 | | * |
579 | | * \param end The token that ends the comprehension (e.g. ] or }). |
580 | | * \param for_fodder Fodder before the first 'for'. |
581 | | * \param specs The comprehension specs to be populated. |
582 | | * \param current_depth Current recursion depth to prevent stack overflow. |
583 | | * \returns The closing token. |
584 | | */ |
585 | | Token parseComprehensionSpecs(Token::Kind end, Fodder for_fodder, |
586 | | std::vector<ComprehensionSpec> &specs, |
587 | | unsigned current_depth) |
588 | 905k | { |
589 | 905k | if (current_depth >= MAX_PARSER_DEPTH) { |
590 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
591 | 0 | } |
592 | | |
593 | 993k | while (true) { |
594 | 991k | LocationRange l; |
595 | 991k | Token id_token = popExpect(Token::IDENTIFIER); |
596 | 991k | const Identifier *id = alloc->makeIdentifier(id_token.data32()); |
597 | 991k | Token in_token = popExpect(Token::IN); |
598 | 991k | AST *arr = parse(MAX_PRECEDENCE, current_depth + 1); |
599 | 991k | specs.emplace_back( |
600 | 991k | ComprehensionSpec::FOR, for_fodder, id_token.fodder, id, in_token.fodder, arr); |
601 | | |
602 | 991k | Token maybe_if = pop(); |
603 | 1.48M | for (; maybe_if.kind == Token::IF; maybe_if = pop()) { |
604 | 489k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
605 | 489k | specs.emplace_back( |
606 | 489k | ComprehensionSpec::IF, maybe_if.fodder, Fodder{}, nullptr, Fodder{}, cond); |
607 | 489k | } |
608 | 991k | if (maybe_if.kind == end) { |
609 | 903k | return maybe_if; |
610 | 903k | } |
611 | 88.0k | if (maybe_if.kind != Token::FOR) { |
612 | 232 | std::stringstream ss; |
613 | 232 | ss << "expected for, if or " << end << " after for clause, got: " << maybe_if; |
614 | 232 | throw StaticError(maybe_if.location, ss.str()); |
615 | 232 | } |
616 | 87.7k | for_fodder = maybe_if.fodder; |
617 | 87.7k | } |
618 | 905k | } |
619 | | |
620 | | /** Parse a terminal (literal, var, import, etc.), an object declaration, unary operator, |
621 | | * or a parenthesized expression. |
622 | | * |
623 | | * \param current_depth Current recursion depth to prevent stack overflow. |
624 | | * \returns The parsed AST. |
625 | | */ |
626 | | AST *parseTerminalBracketsOrUnary(unsigned current_depth) |
627 | 113M | { |
628 | 113M | if (current_depth >= MAX_PARSER_DEPTH) { |
629 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
630 | 0 | } |
631 | | |
632 | 113M | Token tok = pop(); |
633 | 113M | switch (tok.kind) { |
634 | 0 | case Token::ASSERT: |
635 | 30 | case Token::BRACE_R: |
636 | 58 | case Token::BRACKET_R: |
637 | 156 | case Token::COMMA: |
638 | 195 | case Token::DOT: |
639 | 198 | case Token::ELSE: |
640 | 198 | case Token::ERROR: |
641 | 201 | case Token::FOR: |
642 | 201 | case Token::FUNCTION: |
643 | 201 | case Token::IF: |
644 | 213 | case Token::IN: |
645 | 213 | case Token::IMPORT: |
646 | 213 | case Token::IMPORTSTR: |
647 | 213 | case Token::IMPORTBIN: |
648 | 213 | case Token::LOCAL: |
649 | 268 | case Token::PAREN_R: |
650 | 304 | case Token::SEMICOLON: |
651 | 307 | case Token::TAILSTRICT: |
652 | 310 | case Token::THEN: throw unexpected(tok, "parsing terminal"); |
653 | | |
654 | 1.98k | case Token::END_OF_FILE: throw StaticError(tok.location, "unexpected end of file."); |
655 | | |
656 | 1.76M | case Token::OPERATOR: { |
657 | 1.76M | UnaryOp uop; |
658 | 1.76M | if (!op_is_unary(tok.data, uop)) { |
659 | 636 | std::stringstream ss; |
660 | 636 | ss << "not a unary operator: " << tok.data; |
661 | 636 | throw StaticError(tok.location, ss.str()); |
662 | 636 | } |
663 | 1.76M | AST *expr = parse(UNARY_PRECEDENCE, current_depth + 1); |
664 | 1.76M | return alloc->make<Unary>(span(tok, expr), tok.fodder, uop, expr); |
665 | 1.76M | } |
666 | 1.21M | case Token::BRACE_L: { |
667 | 1.21M | AST *obj; |
668 | 1.21M | parseObjectRemainder(obj, tok, current_depth + 1); |
669 | 1.21M | return obj; |
670 | 1.76M | } |
671 | | |
672 | 3.34M | case Token::BRACKET_L: { |
673 | 3.34M | Token next = peek(); |
674 | 3.34M | if (next.kind == Token::BRACKET_R) { |
675 | 547k | Token bracket_r = pop(); |
676 | 547k | return alloc->make<Array>( |
677 | 547k | span(tok, next), tok.fodder, Array::Elements{}, false, bracket_r.fodder); |
678 | 547k | } |
679 | 2.79M | AST *first = parse(MAX_PRECEDENCE, current_depth + 1); |
680 | 2.79M | bool got_comma = false; |
681 | 2.79M | Fodder comma_fodder; |
682 | 2.79M | next = peek(); |
683 | 2.79M | if (!got_comma && next.kind == Token::COMMA) { |
684 | 425k | Token comma = pop(); |
685 | 425k | comma_fodder = comma.fodder; |
686 | 425k | next = peek(); |
687 | 425k | got_comma = true; |
688 | 425k | } |
689 | | |
690 | 2.79M | if (next.kind == Token::FOR) { |
691 | | // It's a comprehension |
692 | 772k | Token for_token = pop(); |
693 | 772k | std::vector<ComprehensionSpec> specs; |
694 | 772k | Token last = parseComprehensionSpecs(Token::BRACKET_R, for_token.fodder, specs, current_depth + 1); |
695 | 772k | return alloc->make<ArrayComprehension>(span(tok, last), |
696 | 772k | tok.fodder, |
697 | 772k | first, |
698 | 772k | comma_fodder, |
699 | 772k | got_comma, |
700 | 772k | specs, |
701 | 772k | last.fodder); |
702 | 772k | } |
703 | | |
704 | | // Not a comprehension: It can have more elements. |
705 | 2.02M | Array::Elements elements; |
706 | 2.02M | elements.emplace_back(first, comma_fodder); |
707 | 9.43M | do { |
708 | 9.43M | if (next.kind == Token::BRACKET_R) { |
709 | 2.01M | Token bracket_r = pop(); |
710 | 2.01M | return alloc->make<Array>( |
711 | 2.01M | span(tok, next), tok.fodder, elements, got_comma, bracket_r.fodder); |
712 | 2.01M | } |
713 | 7.42M | if (!got_comma) { |
714 | 742 | std::stringstream ss; |
715 | 742 | ss << "expected a comma before next array element."; |
716 | 742 | throw StaticError(next.location, ss.str()); |
717 | 742 | } |
718 | 7.42M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
719 | 7.42M | comma_fodder.clear(); |
720 | 7.42M | got_comma = false; |
721 | 7.42M | next = peek(); |
722 | 7.42M | if (next.kind == Token::COMMA) { |
723 | 7.04M | Token comma = pop(); |
724 | 7.04M | comma_fodder = comma.fodder; |
725 | 7.04M | next = peek(); |
726 | 7.04M | got_comma = true; |
727 | 7.04M | } |
728 | 7.42M | elements.emplace_back(expr, comma_fodder); |
729 | 7.42M | } while (true); |
730 | 2.02M | } |
731 | | |
732 | 1.17M | case Token::PAREN_L: { |
733 | 1.17M | auto *inner = parse(MAX_PRECEDENCE, current_depth + 1); |
734 | 1.17M | Token close = popExpect(Token::PAREN_R); |
735 | 1.17M | return alloc->make<Parens>(span(tok, close), tok.fodder, inner, close.fodder); |
736 | 2.02M | } |
737 | | |
738 | | // Literals |
739 | 16.5M | case Token::NUMBER: return alloc->make<LiteralNumber>(span(tok), tok.fodder, tok.data); |
740 | | |
741 | 11.2M | case Token::STRING_SINGLE: |
742 | 11.2M | return alloc->make<LiteralString>( |
743 | 11.2M | span(tok), tok.fodder, tok.data32(), LiteralString::SINGLE, "", ""); |
744 | 191k | case Token::STRING_DOUBLE: |
745 | 191k | return alloc->make<LiteralString>( |
746 | 191k | span(tok), tok.fodder, tok.data32(), LiteralString::DOUBLE, "", ""); |
747 | 6.07k | case Token::STRING_BLOCK: |
748 | 6.07k | return alloc->make<LiteralString>(span(tok), |
749 | 6.07k | tok.fodder, |
750 | 6.07k | tok.data32(), |
751 | 6.07k | LiteralString::BLOCK, |
752 | 6.07k | tok.stringBlockIndent, |
753 | 6.07k | tok.stringBlockTermIndent); |
754 | 1.71k | case Token::VERBATIM_STRING_SINGLE: |
755 | 1.71k | return alloc->make<LiteralString>( |
756 | 1.71k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", ""); |
757 | 4.33k | case Token::VERBATIM_STRING_DOUBLE: |
758 | 4.33k | return alloc->make<LiteralString>( |
759 | 4.33k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", ""); |
760 | | |
761 | 1.18M | case Token::FALSE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false); |
762 | | |
763 | 867k | case Token::TRUE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, true); |
764 | | |
765 | 414k | case Token::NULL_LIT: return alloc->make<LiteralNull>(span(tok), tok.fodder); |
766 | | |
767 | | // Variables |
768 | 109k | case Token::DOLLAR: return alloc->make<Dollar>(span(tok), tok.fodder); |
769 | | |
770 | 75.1M | case Token::IDENTIFIER: |
771 | 75.1M | return alloc->make<Var>(span(tok), tok.fodder, alloc->makeIdentifier(tok.data32())); |
772 | | |
773 | 97.8k | case Token::SELF: return alloc->make<Self>(span(tok), tok.fodder); |
774 | | |
775 | 19.0k | case Token::SUPER: { |
776 | 19.0k | Token next = pop(); |
777 | 19.0k | AST *index = nullptr; |
778 | 19.0k | const Identifier *id = nullptr; |
779 | 19.0k | Fodder id_fodder; |
780 | 19.0k | switch (next.kind) { |
781 | 17.5k | case Token::DOT: { |
782 | 17.5k | Token field_id = popExpect(Token::IDENTIFIER); |
783 | 17.5k | id_fodder = field_id.fodder; |
784 | 17.5k | id = alloc->makeIdentifier(field_id.data32()); |
785 | 17.5k | } break; |
786 | 1.52k | case Token::BRACKET_L: { |
787 | 1.52k | index = parse(MAX_PRECEDENCE, current_depth + 1); |
788 | 1.52k | Token bracket_r = popExpect(Token::BRACKET_R); |
789 | 1.52k | id_fodder = bracket_r.fodder; // Not id_fodder, but use the same var. |
790 | 1.52k | } break; |
791 | 10 | default: throw StaticError(tok.location, "expected . or [ after super."); |
792 | 19.0k | } |
793 | 18.3k | return alloc->make<SuperIndex>( |
794 | 18.3k | span(tok), tok.fodder, next.fodder, index, id_fodder, id); |
795 | 19.0k | } |
796 | 113M | } |
797 | | |
798 | 0 | std::cerr << "INTERNAL ERROR: Unknown tok kind: " << tok.kind << std::endl; |
799 | 0 | std::abort(); |
800 | 0 | return nullptr; // Quiet, compiler. |
801 | 113M | } |
802 | | |
803 | | /** If the first token makes it clear that we will be parsing a greedy construct, return the AST. |
804 | | * Otherwise, return nullptr. Greedy constructs are those that consume as many tokens as possible |
805 | | * on the right hand side because they have no closing token. |
806 | | * |
807 | | * \param current_depth Current recursion depth to prevent stack overflow. |
808 | | * \returns The parsed AST or nullptr. |
809 | | */ |
810 | | AST *maybeParseGreedy(unsigned current_depth) |
811 | 129M | { |
812 | 129M | if (current_depth >= MAX_PARSER_DEPTH) { |
813 | 10 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
814 | 10 | } |
815 | | |
816 | | // Allocate this on the heap to control stack growth. |
817 | 129M | std::unique_ptr<Token> begin_(new Token(peek())); |
818 | 129M | const Token &begin = *begin_; |
819 | | |
820 | 129M | switch (begin.kind) { |
821 | | // These cases have effectively MAX_PRECEDENCE as the first |
822 | | // call to parse will parse them. |
823 | 990k | case Token::ASSERT: { |
824 | 990k | pop(); |
825 | 990k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
826 | 990k | Fodder colonFodder; |
827 | 990k | AST *msg = nullptr; |
828 | 990k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
829 | 915k | Token colon = pop(); |
830 | 915k | colonFodder = colon.fodder; |
831 | 915k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
832 | 915k | } |
833 | 990k | Token semicolon = popExpect(Token::SEMICOLON); |
834 | 990k | AST *rest = parse(MAX_PRECEDENCE, current_depth + 1); |
835 | 990k | return alloc->make<Assert>(span(begin, rest), |
836 | 990k | begin.fodder, |
837 | 990k | cond, |
838 | 990k | colonFodder, |
839 | 990k | msg, |
840 | 990k | semicolon.fodder, |
841 | 990k | rest); |
842 | 0 | } |
843 | | |
844 | 1.52M | case Token::ERROR: { |
845 | 1.52M | pop(); |
846 | 1.52M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
847 | 1.52M | return alloc->make<Error>(span(begin, expr), begin.fodder, expr); |
848 | 0 | } |
849 | | |
850 | 7.80M | case Token::IF: { |
851 | 7.80M | pop(); |
852 | 7.80M | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
853 | 7.80M | Token then = popExpect(Token::THEN); |
854 | 7.80M | AST *branch_true = parse(MAX_PRECEDENCE, current_depth + 1); |
855 | 7.80M | if (peek().kind == Token::ELSE) { |
856 | 7.70M | Token else_ = pop(); |
857 | 7.70M | AST *branch_false = parse(MAX_PRECEDENCE, current_depth + 1); |
858 | 7.70M | return alloc->make<Conditional>(span(begin, branch_false), |
859 | 7.70M | begin.fodder, |
860 | 7.70M | cond, |
861 | 7.70M | then.fodder, |
862 | 7.70M | branch_true, |
863 | 7.70M | else_.fodder, |
864 | 7.70M | branch_false); |
865 | 7.70M | } |
866 | 97.4k | return alloc->make<Conditional>(span(begin, branch_true), |
867 | 97.4k | begin.fodder, |
868 | 97.4k | cond, |
869 | 97.4k | then.fodder, |
870 | 97.4k | branch_true, |
871 | 97.4k | Fodder{}, |
872 | 97.4k | nullptr); |
873 | 7.80M | } |
874 | | |
875 | 484k | case Token::FUNCTION: { |
876 | 484k | pop(); // Still available in 'begin'. |
877 | 484k | Token paren_l = pop(); |
878 | 484k | if (paren_l.kind == Token::PAREN_L) { |
879 | 484k | std::vector<AST *> params_asts; |
880 | 484k | bool got_comma; |
881 | 484k | Fodder paren_r_fodder; |
882 | 484k | ArgParams params = parseParams("function parameter", got_comma, paren_r_fodder, current_depth); |
883 | 484k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
884 | 484k | return alloc->make<Function>(span(begin, body), |
885 | 484k | begin.fodder, |
886 | 484k | paren_l.fodder, |
887 | 484k | params, |
888 | 484k | got_comma, |
889 | 484k | paren_r_fodder, |
890 | 484k | body); |
891 | 484k | } else { |
892 | 17 | std::stringstream ss; |
893 | 17 | ss << "expected ( but got " << paren_l; |
894 | 17 | throw StaticError(paren_l.location, ss.str()); |
895 | 17 | } |
896 | 484k | } |
897 | | |
898 | 1.90k | case Token::IMPORT: { |
899 | 1.90k | pop(); |
900 | 1.90k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
901 | 1.90k | if (body->type == AST_LITERAL_STRING) { |
902 | 1.22k | auto *lit = static_cast<LiteralString *>(body); |
903 | 1.22k | if (lit->tokenKind == LiteralString::BLOCK) { |
904 | 3 | throw StaticError(lit->location, |
905 | 3 | "Cannot use text blocks in import statements."); |
906 | 3 | } |
907 | 1.22k | return alloc->make<Import>(span(begin, body), begin.fodder, lit); |
908 | 1.22k | } else { |
909 | 678 | std::stringstream ss; |
910 | 678 | ss << "computed imports are not allowed."; |
911 | 678 | throw StaticError(body->location, ss.str()); |
912 | 678 | } |
913 | 1.90k | } |
914 | | |
915 | 3.56k | case Token::IMPORTSTR: { |
916 | 3.56k | pop(); |
917 | 3.56k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
918 | 3.56k | if (body->type == AST_LITERAL_STRING) { |
919 | 3.06k | auto *lit = static_cast<LiteralString *>(body); |
920 | 3.06k | if (lit->tokenKind == LiteralString::BLOCK) { |
921 | 0 | throw StaticError(lit->location, |
922 | 0 | "Cannot use text blocks in import statements."); |
923 | 0 | } |
924 | 3.06k | return alloc->make<Importstr>(span(begin, body), begin.fodder, lit); |
925 | 3.06k | } else { |
926 | 500 | std::stringstream ss; |
927 | 500 | ss << "computed imports are not allowed."; |
928 | 500 | throw StaticError(body->location, ss.str()); |
929 | 500 | } |
930 | 3.56k | } |
931 | | |
932 | 1.37k | case Token::IMPORTBIN: { |
933 | 1.37k | pop(); |
934 | 1.37k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
935 | 1.37k | if (body->type == AST_LITERAL_STRING) { |
936 | 1.15k | auto *lit = static_cast<LiteralString *>(body); |
937 | 1.15k | if (lit->tokenKind == LiteralString::BLOCK) { |
938 | 0 | throw StaticError(lit->location, |
939 | 0 | "Cannot use text blocks in import statements."); |
940 | 0 | } |
941 | 1.15k | return alloc->make<Importbin>(span(begin, body), begin.fodder, lit); |
942 | 1.15k | } else { |
943 | 214 | std::stringstream ss; |
944 | 214 | ss << "computed imports are not allowed."; |
945 | 214 | throw StaticError(body->location, ss.str()); |
946 | 214 | } |
947 | 1.37k | } |
948 | | |
949 | 5.78M | case Token::LOCAL: { |
950 | 5.78M | pop(); |
951 | 5.78M | Local::Binds binds; |
952 | 6.09M | do { |
953 | 6.09M | Token delim = parseBind(binds, current_depth + 1); |
954 | 6.09M | if (delim.kind != Token::SEMICOLON && delim.kind != Token::COMMA) { |
955 | 41 | std::stringstream ss; |
956 | 41 | ss << "expected , or ; but got " << delim; |
957 | 41 | throw StaticError(delim.location, ss.str()); |
958 | 41 | } |
959 | 6.09M | if (delim.kind == Token::SEMICOLON) |
960 | 5.77M | break; |
961 | 6.09M | } while (true); |
962 | 5.78M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
963 | 5.78M | return alloc->make<Local>(span(begin, body), begin.fodder, binds, body); |
964 | 5.78M | } |
965 | | |
966 | 113M | default: |
967 | 113M | return nullptr; |
968 | 129M | } |
969 | 129M | } |
970 | | |
971 | | |
972 | | /** Parse a general expression. |
973 | | * |
974 | | * Consume infix tokens up to (but not including) max_precedence, then stop. |
975 | | * \param max_precedence The maximum precedence to consider. |
976 | | * \param current_depth Current recursion depth to prevent stack overflow. |
977 | | * \returns The parsed AST. |
978 | | */ |
979 | | AST *parse(unsigned max_precedence, unsigned current_depth) |
980 | 129M | { |
981 | 129M | if (current_depth >= MAX_PARSER_DEPTH) { |
982 | 14 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
983 | 14 | } |
984 | | |
985 | 129M | AST *ast = maybeParseGreedy(current_depth + 1); |
986 | | // There cannot be an operator after a greedy parse. |
987 | 129M | if (ast != nullptr) return ast; |
988 | | |
989 | | // If we get here, we could be parsing an infix construct. |
990 | | |
991 | | // Allocate this on the heap to control stack growth. |
992 | 113M | std::unique_ptr<Token> begin_(new Token(peek())); |
993 | 113M | const Token &begin = *begin_; |
994 | | |
995 | 113M | AST *lhs = parseTerminalBracketsOrUnary(current_depth + 1); |
996 | | |
997 | 113M | return parseInfix(lhs, begin, max_precedence, current_depth + 1); |
998 | 129M | } |
999 | | |
1000 | | /** Parse infix operators (binary operators, indexing, function calls). |
1001 | | * |
1002 | | * \param lhs Left-hand side of the operator. |
1003 | | * \param begin The token representing the beginning of the expression. |
1004 | | * \param max_precedence The maximum precedence to consider. |
1005 | | * \param current_depth Current recursion depth to prevent stack overflow. |
1006 | | * \returns The parsed AST. |
1007 | | */ |
1008 | | AST *parseInfix(AST *lhs, const Token &begin, unsigned max_precedence, unsigned current_depth) |
1009 | 113M | { |
1010 | 113M | if (current_depth >= MAX_PARSER_DEPTH) { |
1011 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
1012 | 0 | } |
1013 | | |
1014 | 175M | while (true) { |
1015 | | |
1016 | 175M | BinaryOp bop = BOP_PLUS; |
1017 | 175M | unsigned op_precedence = 0; |
1018 | | |
1019 | 175M | switch (peek().kind) { |
1020 | | // Logical / arithmetic binary operator. |
1021 | 7.19k | case Token::IN: |
1022 | 26.8M | case Token::OPERATOR: |
1023 | | // These occur if the outer statement was an assert or array slice. |
1024 | | // Either way, we terminate the parsing here. |
1025 | 26.8M | if (peek().data == ":" || peek().data == "::") { |
1026 | 1.94M | return lhs; |
1027 | 1.94M | } |
1028 | 24.9M | if (!op_is_binary(peek().data, bop)) { |
1029 | 477 | std::stringstream ss; |
1030 | 477 | ss << "not a binary operator: " << peek().data; |
1031 | 477 | throw StaticError(peek().location, ss.str()); |
1032 | 477 | } |
1033 | 24.9M | op_precedence = precedence_map[bop]; |
1034 | 24.9M | break; |
1035 | | |
1036 | | // Index, Apply |
1037 | 17.2M | case Token::DOT: |
1038 | 21.3M | case Token::BRACKET_L: |
1039 | 40.8M | case Token::PAREN_L: |
1040 | 41.1M | case Token::BRACE_L: |
1041 | 41.1M | op_precedence = APPLY_PRECEDENCE; |
1042 | 41.1M | break; |
1043 | | |
1044 | 107M | default: |
1045 | | // This happens when we reach EOF or the terminating token of an outer context. |
1046 | 107M | return lhs; |
1047 | 175M | } |
1048 | | |
1049 | | // If higher precedence than the outer recursive call, let that handle it. |
1050 | 66.0M | if (op_precedence >= max_precedence) |
1051 | 4.04M | return lhs; |
1052 | | |
1053 | 62.0M | Token op = pop(); |
1054 | | |
1055 | 62.0M | switch (op.kind) { |
1056 | 4.13M | case Token::BRACKET_L: { |
1057 | 4.13M | bool is_slice; |
1058 | 4.13M | AST *first = nullptr; |
1059 | 4.13M | Fodder second_fodder; |
1060 | 4.13M | AST *second = nullptr; |
1061 | 4.13M | Fodder third_fodder; |
1062 | 4.13M | AST *third = nullptr; |
1063 | | |
1064 | 4.13M | if (peek().kind == Token::BRACKET_R) |
1065 | 8 | throw unexpected(pop(), "parsing index"); |
1066 | | |
1067 | 4.13M | if (peek().data != ":" && peek().data != "::") { |
1068 | 4.07M | first = parse(MAX_PRECEDENCE, current_depth + 1); |
1069 | 4.07M | } |
1070 | | |
1071 | 4.13M | if (peek().kind == Token::OPERATOR && peek().data == "::") { |
1072 | | // Handle :: |
1073 | 2.95k | is_slice = true; |
1074 | 2.95k | Token joined = pop(); |
1075 | 2.95k | second_fodder = joined.fodder; |
1076 | | |
1077 | 2.95k | if (peek().kind != Token::BRACKET_R) |
1078 | 2.27k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1079 | | |
1080 | 4.12M | } else if (peek().kind != Token::BRACKET_R) { |
1081 | 445k | is_slice = true; |
1082 | 445k | Token delim = pop(); |
1083 | 445k | if (delim.data != ":") |
1084 | 401 | throw unexpected(delim, "parsing slice"); |
1085 | | |
1086 | 445k | second_fodder = delim.fodder; |
1087 | | |
1088 | 445k | if (peek().data != ":" && peek().kind != Token::BRACKET_R) |
1089 | 198k | second = parse(MAX_PRECEDENCE, current_depth + 1); |
1090 | | |
1091 | 445k | if (peek().kind != Token::BRACKET_R) { |
1092 | 26.9k | Token delim = pop(); |
1093 | 26.9k | if (delim.data != ":") |
1094 | 113 | throw unexpected(delim, "parsing slice"); |
1095 | | |
1096 | 26.8k | third_fodder = delim.fodder; |
1097 | | |
1098 | 26.8k | if (peek().kind != Token::BRACKET_R) |
1099 | 26.1k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1100 | 26.8k | } |
1101 | 3.68M | } else { |
1102 | 3.68M | is_slice = false; |
1103 | 3.68M | } |
1104 | 4.13M | Token end = popExpect(Token::BRACKET_R); |
1105 | 4.13M | lhs = alloc->make<Index>(span(begin, end), |
1106 | 4.13M | EMPTY_FODDER, |
1107 | 4.13M | lhs, |
1108 | 4.13M | op.fodder, |
1109 | 4.13M | is_slice, |
1110 | 4.13M | first, |
1111 | 4.13M | second_fodder, |
1112 | 4.13M | second, |
1113 | 4.13M | third_fodder, |
1114 | 4.13M | third, |
1115 | 4.13M | end.fodder); |
1116 | 4.13M | break; |
1117 | 4.13M | } |
1118 | 17.2M | case Token::DOT: { |
1119 | 17.2M | Token field_id = popExpect(Token::IDENTIFIER); |
1120 | 17.2M | const Identifier *id = alloc->makeIdentifier(field_id.data32()); |
1121 | 17.2M | lhs = alloc->make<Index>(span(begin, field_id), |
1122 | 17.2M | EMPTY_FODDER, |
1123 | 17.2M | lhs, |
1124 | 17.2M | op.fodder, |
1125 | 17.2M | field_id.fodder, |
1126 | 17.2M | id); |
1127 | 17.2M | break; |
1128 | 4.13M | } |
1129 | 19.5M | case Token::PAREN_L: { |
1130 | 19.5M | ArgParams args; |
1131 | 19.5M | bool got_comma; |
1132 | 19.5M | Token end = parseArgs(args, "function argument", got_comma, current_depth); |
1133 | 19.5M | bool got_named = false; |
1134 | 32.5M | for (const auto& arg : args) { |
1135 | 32.5M | if (arg.id != nullptr) { |
1136 | 76.0k | got_named = true; |
1137 | 32.4M | } else { |
1138 | 32.4M | if (got_named) { |
1139 | 14 | throw StaticError(arg.expr->location, "Positional argument after a named argument is not allowed"); |
1140 | 14 | } |
1141 | 32.4M | } |
1142 | 32.5M | } |
1143 | 19.5M | bool tailstrict = false; |
1144 | 19.5M | Fodder tailstrict_fodder; |
1145 | 19.5M | if (peek().kind == Token::TAILSTRICT) { |
1146 | 963k | Token tailstrict_token = pop(); |
1147 | 963k | tailstrict_fodder = tailstrict_token.fodder; |
1148 | 963k | tailstrict = true; |
1149 | 963k | } |
1150 | 19.5M | lhs = alloc->make<Apply>(span(begin, end), |
1151 | 19.5M | EMPTY_FODDER, |
1152 | 19.5M | lhs, |
1153 | 19.5M | op.fodder, |
1154 | 19.5M | args, |
1155 | 19.5M | got_comma, |
1156 | 19.5M | end.fodder, |
1157 | 19.5M | tailstrict_fodder, |
1158 | 19.5M | tailstrict); |
1159 | 19.5M | break; |
1160 | 19.5M | } |
1161 | 256k | case Token::BRACE_L: { |
1162 | 256k | AST *obj; |
1163 | 256k | Token end = parseObjectRemainder(obj, op, current_depth + 1); |
1164 | 256k | lhs = alloc->make<ApplyBrace>(span(begin, end), EMPTY_FODDER, lhs, obj); |
1165 | 256k | break; |
1166 | 19.5M | } |
1167 | | |
1168 | 5.71k | case Token::IN: { |
1169 | 5.71k | if (peek().kind == Token::SUPER) { |
1170 | 737 | Token super = pop(); |
1171 | 737 | lhs = alloc->make<InSuper>( |
1172 | 737 | span(begin, super), EMPTY_FODDER, lhs, op.fodder, super.fodder); |
1173 | 4.97k | } else { |
1174 | 4.97k | AST *rhs = parse(op_precedence, current_depth + 1); |
1175 | 4.97k | lhs = alloc->make<Binary>( |
1176 | 4.97k | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1177 | 4.97k | } |
1178 | 5.71k | break; |
1179 | 19.5M | } |
1180 | | |
1181 | 20.8M | case Token::OPERATOR: { |
1182 | 20.8M | AST *rhs = parse(op_precedence, current_depth + 1); |
1183 | 20.8M | lhs = alloc->make<Binary>( |
1184 | 20.8M | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1185 | 20.8M | break; |
1186 | 19.5M | } |
1187 | | |
1188 | 0 | default: { |
1189 | 0 | std::cerr << "Should not be here." << std::endl; |
1190 | 0 | abort(); |
1191 | 19.5M | } |
1192 | 62.0M | } |
1193 | 62.0M | } |
1194 | | |
1195 | | // (1 & ((1 + (1 * 1)) + 1)) & 1 |
1196 | | // |
1197 | | // |
1198 | | |
1199 | | /* |
1200 | | // Allocate this on the heap to control stack growth. |
1201 | | std::unique_ptr<Token> begin_(new Token(peek())); |
1202 | | const Token &begin = *begin_; |
1203 | | */ |
1204 | 113M | } |
1205 | | }; |
1206 | | |
1207 | | } // namespace |
1208 | | |
1209 | | AST *jsonnet_parse(Allocator *alloc, Tokens &tokens) |
1210 | 46.3k | { |
1211 | 46.3k | Parser parser(tokens, alloc); |
1212 | 46.3k | unsigned parse_depth = 0; |
1213 | 46.3k | AST *expr = parser.parse(MAX_PRECEDENCE, parse_depth); |
1214 | 46.3k | if (tokens.front().kind != Token::END_OF_FILE) { |
1215 | 697 | std::stringstream ss; |
1216 | 697 | ss << "did not expect: " << tokens.front(); |
1217 | 697 | throw StaticError(tokens.front().location, ss.str()); |
1218 | 697 | } |
1219 | | |
1220 | 45.6k | return expr; |
1221 | 46.3k | } |
1222 | | |
1223 | | } // namespace jsonnet::internal |