/src/jsonnet/core/parser.cpp
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <cassert> |
18 | | #include <cmath> |
19 | | #include <cstdlib> |
20 | | |
21 | | #include <iomanip> |
22 | | #include <list> |
23 | | #include <memory> |
24 | | #include <set> |
25 | | #include <sstream> |
26 | | #include <locale> |
27 | | #include <string> |
28 | | |
29 | | #include "ast.h" |
30 | | #include "desugarer.h" |
31 | | #include "lexer.h" |
32 | | #include "parser.h" |
33 | | #include "static_error.h" |
34 | | |
35 | | namespace jsonnet::internal { |
36 | | |
37 | | std::string jsonnet_unparse_number(double v) |
38 | 6.46M | { |
39 | 6.46M | std::stringstream ss; |
40 | | // Make sure we output the same thing, even if the user |
41 | | // of the library changed the global locale |
42 | 6.46M | ss.imbue(std::locale::classic()); |
43 | 6.46M | if (v == floor(v)) { |
44 | 6.34M | ss << std::fixed << std::setprecision(0) << v; |
45 | 6.34M | } else { |
46 | | // See "What Every Computer Scientist Should Know About Floating-Point Arithmetic" |
47 | | // Theorem 15 |
48 | | // https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html |
49 | 123k | ss << std::setprecision(17); |
50 | 123k | ss << v; |
51 | 123k | } |
52 | 6.46M | return ss.str(); |
53 | 6.46M | } |
54 | | |
55 | | namespace { |
56 | | |
57 | | static const Fodder EMPTY_FODDER; |
58 | | |
59 | | /** Maximum parsing depth to avoid stack overflow due to pathological or malicious code. |
60 | | * This is especially important when parsing deeply nested structures that could lead to |
61 | | * excessive recursion in the parser functions. |
62 | | */ |
63 | | static const unsigned MAX_PARSER_DEPTH = 1000; |
64 | | |
65 | | static bool op_is_unary(const std::string &op, UnaryOp &uop) |
66 | 1.40M | { |
67 | 1.40M | auto it = unary_map.find(op); |
68 | 1.40M | if (it == unary_map.end()) |
69 | 597 | return false; |
70 | 1.40M | uop = it->second; |
71 | 1.40M | return true; |
72 | 1.40M | } |
73 | | |
74 | | static bool op_is_binary(const std::string &op, BinaryOp &bop) |
75 | 22.3M | { |
76 | 22.3M | auto it = binary_map.find(op); |
77 | 22.3M | if (it == binary_map.end()) |
78 | 433 | return false; |
79 | 22.3M | bop = it->second; |
80 | 22.3M | return true; |
81 | 22.3M | } |
82 | | |
83 | | LocationRange span(const Token &begin) |
84 | 93.8M | { |
85 | 93.8M | return LocationRange(begin.location.file, begin.location.begin, begin.location.end); |
86 | 93.8M | } |
87 | | |
88 | | LocationRange span(const Token &begin, const Token &end) |
89 | 42.7M | { |
90 | 42.7M | return LocationRange(begin.location.file, begin.location.begin, end.location.end); |
91 | 42.7M | } |
92 | | |
93 | | LocationRange span(const Token &begin, AST *end) |
94 | 35.4M | { |
95 | 35.4M | return LocationRange(begin.location.file, begin.location.begin, end->location.end); |
96 | 35.4M | } |
97 | | |
98 | | /** Holds state while parsing a given token list. |
99 | | */ |
100 | | class Parser { |
101 | | // The private member functions are utilities for dealing with the token stream. |
102 | | |
103 | | StaticError unexpected(const Token &tok, const std::string &while_) |
104 | 985 | { |
105 | 985 | std::stringstream ss; |
106 | 985 | ss << "unexpected: " << tok.kind << " while " << while_; |
107 | 985 | return StaticError(tok.location, ss.str()); |
108 | 985 | } |
109 | | |
110 | | Token pop(void) |
111 | 299M | { |
112 | 299M | Token tok = peek(); |
113 | 299M | tokens.pop_front(); |
114 | 299M | return tok; |
115 | 299M | } |
116 | | |
117 | | void push(Token tok) |
118 | 0 | { |
119 | 0 | tokens.push_front(tok); |
120 | 0 | } |
121 | | |
122 | | const Token &peek(void) |
123 | 956M | { |
124 | 956M | return tokens.front(); |
125 | 956M | } |
126 | | |
127 | | /** Only call this is peek() is not an EOF token. */ |
128 | | Token doublePeek(void) |
129 | 33.4M | { |
130 | 33.4M | Tokens::iterator it = tokens.begin(); // First one. |
131 | 33.4M | it++; // Now pointing at the second one. |
132 | 33.4M | return *(it); |
133 | 33.4M | } |
134 | | |
135 | | Token popExpect(Token::Kind k, const char *data = nullptr) |
136 | 47.1M | { |
137 | 47.1M | Token tok = pop(); |
138 | 47.1M | if (tok.kind != k) { |
139 | 702 | std::stringstream ss; |
140 | 702 | ss << "expected token " << k << " but got " << tok; |
141 | 702 | throw StaticError(tok.location, ss.str()); |
142 | 702 | } |
143 | 47.1M | if (data != nullptr && tok.data != data) { |
144 | 45 | std::stringstream ss; |
145 | 45 | ss << "expected operator " << data << " but got " << tok.data; |
146 | 45 | throw StaticError(tok.location, ss.str()); |
147 | 45 | } |
148 | 47.1M | return tok; |
149 | 47.1M | } |
150 | | |
151 | | std::list<Token> &tokens; |
152 | | Allocator *alloc; |
153 | | |
154 | | public: |
155 | 42.0k | Parser(Tokens &tokens, Allocator *alloc) : tokens(tokens), alloc(alloc) {} |
156 | | |
157 | | /** Parse a comma-separated list of expressions. |
158 | | * |
159 | | * Allows an optional ending comma. |
160 | | * \param args Expressions added here. |
161 | | * \param element_kind Used in error messages when a comma was not found. |
162 | | * \param got_comma Whether a trailing comma was found. |
163 | | * \param current_depth Current recursion depth to prevent stack overflow. |
164 | | * \returns The last token (the one that matched parameter end). |
165 | | */ |
166 | | Token parseArgs(ArgParams &args, const std::string &element_kind, bool &got_comma, unsigned current_depth) |
167 | 22.5M | { |
168 | 22.5M | got_comma = false; |
169 | 22.5M | bool first = true; |
170 | 61.7M | do { |
171 | 61.7M | Token next = peek(); |
172 | 61.7M | if (next.kind == Token::PAREN_R) { |
173 | | // got_comma can be true or false here. |
174 | 22.4M | return pop(); |
175 | 22.4M | } |
176 | 39.2M | if (!first && !got_comma) { |
177 | 265 | std::stringstream ss; |
178 | 265 | ss << "expected a comma before next " << element_kind << "."; |
179 | 265 | throw StaticError(next.location, ss.str()); |
180 | 265 | } |
181 | | // Either id=expr or id or expr, but note that expr could be id==1 so this needs |
182 | | // look-ahead. |
183 | 39.2M | Fodder id_fodder; |
184 | 39.2M | const Identifier *id = nullptr; |
185 | 39.2M | Fodder eq_fodder; |
186 | 39.2M | if (peek().kind == Token::IDENTIFIER) { |
187 | 33.4M | Token maybe_eq = doublePeek(); |
188 | 33.4M | if (maybe_eq.kind == Token::OPERATOR && maybe_eq.data == "=") { |
189 | 532k | id_fodder = peek().fodder; |
190 | 532k | id = alloc->makeIdentifier(peek().data32()); |
191 | 532k | eq_fodder = maybe_eq.fodder; |
192 | 532k | pop(); // id |
193 | 532k | pop(); // eq |
194 | 532k | } |
195 | 33.4M | } |
196 | 39.2M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
197 | 39.2M | got_comma = false; |
198 | 39.2M | first = false; |
199 | 39.2M | Fodder comma_fodder; |
200 | 39.2M | if (peek().kind == Token::COMMA) { |
201 | 16.7M | Token comma = pop(); |
202 | 16.7M | comma_fodder = comma.fodder; |
203 | 16.7M | got_comma = true; |
204 | 16.7M | } |
205 | 39.2M | args.emplace_back(id_fodder, id, eq_fodder, expr, comma_fodder); |
206 | 39.2M | } while (true); |
207 | 22.5M | } |
208 | | |
209 | | /** Parse function parameters. |
210 | | * |
211 | | * \param element_kind Used in error messages. |
212 | | * \param got_comma Whether a trailing comma was found. |
213 | | * \param close_fodder Fodder after the closing parenthesis. |
214 | | * \param current_depth Current recursion depth to prevent stack overflow. |
215 | | * \returns The parameters as ArgParams. |
216 | | */ |
217 | | ArgParams parseParams(const std::string &element_kind, bool &got_comma, Fodder &close_fodder, unsigned current_depth) |
218 | 4.80M | { |
219 | 4.80M | ArgParams params; |
220 | 4.80M | Token paren_r = parseArgs(params, element_kind, got_comma, current_depth); |
221 | | |
222 | | // Check they're all identifiers |
223 | | // parseArgs returns f(x) with x as an expression. Convert it here. |
224 | 9.40M | for (auto &p : params) { |
225 | 9.40M | if (p.id == nullptr) { |
226 | 8.94M | if (p.expr->type != AST_VAR) { |
227 | 9 | throw StaticError(p.expr->location, "could not parse parameter here."); |
228 | 9 | } |
229 | 8.94M | auto *pv = static_cast<Var *>(p.expr); |
230 | 8.94M | p.id = pv->id; |
231 | 8.94M | p.idFodder = pv->openFodder; |
232 | 8.94M | p.expr = nullptr; |
233 | 8.94M | } |
234 | 9.40M | } |
235 | | |
236 | 4.80M | close_fodder = paren_r.fodder; |
237 | | |
238 | 4.80M | return params; |
239 | 4.80M | } |
240 | | |
241 | | /** Parse a local bind statement. |
242 | | * |
243 | | * \param binds The bindings to be populated. |
244 | | * \param current_depth Current recursion depth to prevent stack overflow. |
245 | | * \returns The token after the binding (comma or semicolon). |
246 | | */ |
247 | | Token parseBind(Local::Binds &binds, unsigned current_depth) |
248 | 5.56M | { |
249 | 5.56M | Token var_id = popExpect(Token::IDENTIFIER); |
250 | 5.56M | auto *id = alloc->makeIdentifier(var_id.data32()); |
251 | 5.56M | for (const auto &bind : binds) { |
252 | 779k | if (bind.var == id) |
253 | 13 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
254 | 779k | } |
255 | 5.56M | bool is_function = false; |
256 | 5.56M | ArgParams params; |
257 | 5.56M | bool trailing_comma = false; |
258 | 5.56M | Fodder fodder_l, fodder_r; |
259 | 5.56M | if (peek().kind == Token::PAREN_L) { |
260 | 1.61M | Token paren_l = pop(); |
261 | 1.61M | fodder_l = paren_l.fodder; |
262 | 1.61M | params = parseParams("function parameter", trailing_comma, fodder_r, current_depth); |
263 | 1.61M | is_function = true; |
264 | 1.61M | } |
265 | 5.56M | Token eq = popExpect(Token::OPERATOR, "="); |
266 | 5.56M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
267 | 5.56M | Token delim = pop(); |
268 | 5.56M | binds.emplace_back(var_id.fodder, |
269 | 5.56M | id, |
270 | 5.56M | eq.fodder, |
271 | 5.56M | body, |
272 | 5.56M | is_function, |
273 | 5.56M | fodder_l, |
274 | 5.56M | params, |
275 | 5.56M | trailing_comma, |
276 | 5.56M | fodder_r, |
277 | 5.56M | delim.fodder); |
278 | 5.56M | return delim; |
279 | 5.56M | } |
280 | | |
281 | | /** Parse the remainder of an object after the opening brace. |
282 | | * |
283 | | * \param obj The object AST to be populated. |
284 | | * \param tok The opening brace token. |
285 | | * \param current_depth Current recursion depth to prevent stack overflow. |
286 | | * \returns The closing brace token. |
287 | | */ |
288 | | Token parseObjectRemainder(AST *&obj, const Token &tok, unsigned current_depth) |
289 | 1.32M | { |
290 | 1.32M | if (current_depth >= MAX_PARSER_DEPTH) { |
291 | 6 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
292 | 6 | } |
293 | | |
294 | 1.32M | ObjectFields fields; |
295 | 1.32M | std::set<std::string> literal_fields; // For duplicate fields detection. |
296 | 1.32M | std::set<const Identifier *> binds; // For duplicate locals detection. |
297 | | |
298 | 1.32M | bool got_comma = false; |
299 | 1.32M | bool first = true; |
300 | 1.32M | Token next = pop(); |
301 | | |
302 | 6.87M | do { |
303 | 6.87M | if (next.kind == Token::BRACE_R) { |
304 | 1.20M | obj = alloc->make<Object>( |
305 | 1.20M | span(tok, next), tok.fodder, fields, got_comma, next.fodder); |
306 | 1.20M | return next; |
307 | | |
308 | 5.66M | } else if (next.kind == Token::FOR) { |
309 | | // It's a comprehension |
310 | 99.7k | unsigned num_fields = 0; |
311 | 99.7k | unsigned num_asserts = 0; |
312 | 99.7k | const ObjectField *field_ptr = nullptr; |
313 | 100k | for (const auto &field : fields) { |
314 | 100k | if (field.kind == ObjectField::LOCAL) |
315 | 3 | continue; |
316 | 100k | if (field.kind == ObjectField::ASSERT) { |
317 | 380 | num_asserts++; |
318 | 380 | continue; |
319 | 380 | } |
320 | 99.7k | field_ptr = &field; |
321 | 99.7k | num_fields++; |
322 | 99.7k | } |
323 | 99.7k | if (num_asserts > 0) { |
324 | 27 | auto msg = "object comprehension cannot have asserts."; |
325 | 27 | throw StaticError(next.location, msg); |
326 | 27 | } |
327 | 99.7k | if (num_fields != 1) { |
328 | 12 | auto msg = "object comprehension can only have one field."; |
329 | 12 | throw StaticError(next.location, msg); |
330 | 12 | } |
331 | 99.7k | const ObjectField &field = *field_ptr; |
332 | | |
333 | 99.7k | if (field.hide != ObjectField::INHERIT) { |
334 | 3 | auto msg = "object comprehensions cannot have hidden fields."; |
335 | 3 | throw StaticError(next.location, msg); |
336 | 3 | } |
337 | | |
338 | 99.7k | if (field.kind != ObjectField::FIELD_EXPR) { |
339 | 3 | auto msg = "object comprehensions can only have [e] fields."; |
340 | 3 | throw StaticError(next.location, msg); |
341 | 3 | } |
342 | | |
343 | 99.6k | std::vector<ComprehensionSpec> specs; |
344 | 99.6k | Token last = parseComprehensionSpecs(Token::BRACE_R, next.fodder, specs, current_depth + 1); |
345 | 99.6k | obj = alloc->make<ObjectComprehension>( |
346 | 99.6k | span(tok, last), tok.fodder, fields, got_comma, specs, last.fodder); |
347 | | |
348 | 99.6k | return last; |
349 | 99.7k | } |
350 | | |
351 | 5.56M | if (!got_comma && !first) |
352 | 288 | throw StaticError(next.location, "expected a comma before next field."); |
353 | | |
354 | 5.56M | first = false; |
355 | 5.56M | got_comma = false; |
356 | | |
357 | 5.56M | switch (next.kind) { |
358 | 112k | case Token::BRACKET_L: |
359 | 5.05M | case Token::IDENTIFIER: |
360 | 5.22M | case Token::STRING_DOUBLE: |
361 | 5.31M | case Token::STRING_SINGLE: |
362 | 5.31M | case Token::STRING_BLOCK: |
363 | 5.31M | case Token::VERBATIM_STRING_DOUBLE: |
364 | 5.31M | case Token::VERBATIM_STRING_SINGLE: { |
365 | 5.31M | ObjectField::Kind kind; |
366 | 5.31M | AST *expr1 = nullptr; |
367 | 5.31M | const Identifier *id = nullptr; |
368 | 5.31M | Fodder fodder1, fodder2; |
369 | 5.31M | LocationRange idLocation; |
370 | 5.31M | if (next.kind == Token::IDENTIFIER) { |
371 | 4.94M | fodder1 = next.fodder; |
372 | 4.94M | kind = ObjectField::FIELD_ID; |
373 | 4.94M | id = alloc->makeIdentifier(next.data32()); |
374 | 4.94M | idLocation = next.location; |
375 | 4.94M | } else if (next.kind == Token::STRING_DOUBLE) { |
376 | 166k | kind = ObjectField::FIELD_STR; |
377 | 166k | expr1 = alloc->make<LiteralString>(next.location, |
378 | 166k | next.fodder, |
379 | 166k | next.data32(), |
380 | 166k | LiteralString::DOUBLE, |
381 | 166k | "", |
382 | 166k | ""); |
383 | 208k | } else if (next.kind == Token::STRING_SINGLE) { |
384 | 88.4k | kind = ObjectField::FIELD_STR; |
385 | 88.4k | expr1 = alloc->make<LiteralString>(next.location, |
386 | 88.4k | next.fodder, |
387 | 88.4k | next.data32(), |
388 | 88.4k | LiteralString::SINGLE, |
389 | 88.4k | "", |
390 | 88.4k | ""); |
391 | 120k | } else if (next.kind == Token::STRING_BLOCK) { |
392 | 3.95k | kind = ObjectField::FIELD_STR; |
393 | 3.95k | expr1 = alloc->make<LiteralString>(next.location, |
394 | 3.95k | next.fodder, |
395 | 3.95k | next.data32(), |
396 | 3.95k | LiteralString::BLOCK, |
397 | 3.95k | next.stringBlockIndent, |
398 | 3.95k | next.stringBlockTermIndent); |
399 | 116k | } else if (next.kind == Token::VERBATIM_STRING_SINGLE) { |
400 | 1.22k | kind = ObjectField::FIELD_STR; |
401 | 1.22k | expr1 = alloc->make<LiteralString>(next.location, |
402 | 1.22k | next.fodder, |
403 | 1.22k | next.data32(), |
404 | 1.22k | LiteralString::VERBATIM_SINGLE, |
405 | 1.22k | "", |
406 | 1.22k | ""); |
407 | 114k | } else if (next.kind == Token::VERBATIM_STRING_DOUBLE) { |
408 | 2.12k | kind = ObjectField::FIELD_STR; |
409 | 2.12k | expr1 = alloc->make<LiteralString>(next.location, |
410 | 2.12k | next.fodder, |
411 | 2.12k | next.data32(), |
412 | 2.12k | LiteralString::VERBATIM_DOUBLE, |
413 | 2.12k | "", |
414 | 2.12k | ""); |
415 | 112k | } else { |
416 | 112k | kind = ObjectField::FIELD_EXPR; |
417 | 112k | fodder1 = next.fodder; |
418 | 112k | expr1 = parse(MAX_PRECEDENCE, current_depth + 1); |
419 | 112k | Token bracket_r = popExpect(Token::BRACKET_R); |
420 | 112k | fodder2 = bracket_r.fodder; |
421 | 112k | } |
422 | | |
423 | 5.31M | bool is_method = false; |
424 | 5.31M | bool meth_comma = false; |
425 | 5.31M | ArgParams params; |
426 | 5.31M | Fodder fodder_l; |
427 | 5.31M | Fodder fodder_r; |
428 | 5.31M | if (peek().kind == Token::PAREN_L) { |
429 | 2.73M | Token paren_l = pop(); |
430 | 2.73M | fodder_l = paren_l.fodder; |
431 | 2.73M | params = parseParams("method parameter", meth_comma, fodder_r, current_depth); |
432 | 2.73M | is_method = true; |
433 | 2.73M | } |
434 | | |
435 | 5.31M | bool plus_sugar = false; |
436 | | |
437 | 5.31M | Token op = popExpect(Token::OPERATOR); |
438 | 5.31M | const char *od = op.data.c_str(); |
439 | 5.31M | if (*od == '+') { |
440 | 47.6k | plus_sugar = true; |
441 | 47.6k | od++; |
442 | 47.6k | } |
443 | 5.31M | unsigned colons = 0; |
444 | 13.3M | for (; *od != '\0'; ++od) { |
445 | 8.03M | if (*od != ':') { |
446 | 52 | throw StaticError( |
447 | 52 | next.location, |
448 | 52 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
449 | 52 | } |
450 | 8.03M | ++colons; |
451 | 8.03M | } |
452 | 5.31M | ObjectField::Hide field_hide; |
453 | 5.31M | switch (colons) { |
454 | 2.58M | case 1: field_hide = ObjectField::INHERIT; break; |
455 | | |
456 | 2.72M | case 2: field_hide = ObjectField::HIDDEN; break; |
457 | | |
458 | 34 | case 3: field_hide = ObjectField::VISIBLE; break; |
459 | | |
460 | 27 | default: |
461 | 27 | throw StaticError( |
462 | 27 | next.location, |
463 | 27 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
464 | 5.31M | } |
465 | | |
466 | | // Basic checks for invalid Jsonnet code. |
467 | 5.31M | if (is_method && plus_sugar) { |
468 | 3 | throw StaticError(next.location, |
469 | 3 | "cannot use +: syntax sugar in a method: " + next.data); |
470 | 3 | } |
471 | 5.31M | if (kind != ObjectField::FIELD_EXPR) { |
472 | 5.20M | if (!literal_fields.insert(next.data).second) { |
473 | 30 | throw StaticError(next.location, "duplicate field: " + next.data); |
474 | 30 | } |
475 | 5.20M | } |
476 | | |
477 | 5.31M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
478 | | |
479 | 5.31M | Fodder comma_fodder; |
480 | 5.31M | next = pop(); |
481 | 5.31M | if (next.kind == Token::COMMA) { |
482 | 4.42M | comma_fodder = next.fodder; |
483 | 4.42M | next = pop(); |
484 | 4.42M | got_comma = true; |
485 | 4.42M | } |
486 | 5.31M | fields.emplace_back(kind, |
487 | 5.31M | fodder1, |
488 | 5.31M | fodder2, |
489 | 5.31M | fodder_l, |
490 | 5.31M | fodder_r, |
491 | 5.31M | field_hide, |
492 | 5.31M | plus_sugar, |
493 | 5.31M | is_method, |
494 | 5.31M | expr1, |
495 | 5.31M | id, |
496 | 5.31M | idLocation, |
497 | 5.31M | params, |
498 | 5.31M | meth_comma, |
499 | 5.31M | op.fodder, |
500 | 5.31M | body, |
501 | 5.31M | nullptr, |
502 | 5.31M | comma_fodder); |
503 | 5.31M | } break; |
504 | | |
505 | 155k | case Token::LOCAL: { |
506 | 155k | Fodder local_fodder = next.fodder; |
507 | 155k | Token var_id = popExpect(Token::IDENTIFIER); |
508 | 155k | auto *id = alloc->makeIdentifier(var_id.data32()); |
509 | | |
510 | 155k | if (binds.find(id) != binds.end()) { |
511 | 4 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
512 | 4 | } |
513 | 155k | bool is_method = false; |
514 | 155k | bool func_comma = false; |
515 | 155k | ArgParams params; |
516 | 155k | Fodder paren_l_fodder; |
517 | 155k | Fodder paren_r_fodder; |
518 | 155k | if (peek().kind == Token::PAREN_L) { |
519 | 23.5k | Token paren_l = pop(); |
520 | 23.5k | paren_l_fodder = paren_l.fodder; |
521 | 23.5k | is_method = true; |
522 | 23.5k | params = parseParams("function parameter", func_comma, paren_r_fodder, current_depth); |
523 | 23.5k | } |
524 | 155k | Token eq = popExpect(Token::OPERATOR, "="); |
525 | 155k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
526 | 155k | binds.insert(id); |
527 | | |
528 | 155k | Fodder comma_fodder; |
529 | 155k | next = pop(); |
530 | 155k | if (next.kind == Token::COMMA) { |
531 | 152k | comma_fodder = next.fodder; |
532 | 152k | next = pop(); |
533 | 152k | got_comma = true; |
534 | 152k | } |
535 | 155k | fields.push_back(ObjectField::Local(local_fodder, |
536 | 155k | var_id.fodder, |
537 | 155k | paren_l_fodder, |
538 | 155k | paren_r_fodder, |
539 | 155k | is_method, |
540 | 155k | id, |
541 | 155k | params, |
542 | 155k | func_comma, |
543 | 155k | eq.fodder, |
544 | 155k | body, |
545 | 155k | comma_fodder)); |
546 | | |
547 | 155k | } break; |
548 | | |
549 | 89.0k | case Token::ASSERT: { |
550 | 89.0k | Fodder assert_fodder = next.fodder; |
551 | 89.0k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
552 | 89.0k | AST *msg = nullptr; |
553 | 89.0k | Fodder colon_fodder; |
554 | 89.0k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
555 | 47.3k | Token colon = pop(); |
556 | 47.3k | colon_fodder = colon.fodder; |
557 | 47.3k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
558 | 47.3k | } |
559 | | |
560 | 89.0k | Fodder comma_fodder; |
561 | 89.0k | next = pop(); |
562 | 89.0k | if (next.kind == Token::COMMA) { |
563 | 82.1k | comma_fodder = next.fodder; |
564 | 82.1k | next = pop(); |
565 | 82.1k | got_comma = true; |
566 | 82.1k | } |
567 | 89.0k | fields.push_back( |
568 | 89.0k | ObjectField::Assert(assert_fodder, cond, colon_fodder, msg, comma_fodder)); |
569 | 89.0k | } break; |
570 | | |
571 | 216 | default: throw unexpected(next, "parsing field definition"); |
572 | 5.56M | } |
573 | | |
574 | 5.56M | } while (true); |
575 | 1.32M | } |
576 | | |
577 | | /** Parses for x in expr for y in expr if expr for z in expr ... |
578 | | * |
579 | | * \param end The token that ends the comprehension (e.g. ] or }). |
580 | | * \param for_fodder Fodder before the first 'for'. |
581 | | * \param specs The comprehension specs to be populated. |
582 | | * \param current_depth Current recursion depth to prevent stack overflow. |
583 | | * \returns The closing token. |
584 | | */ |
585 | | Token parseComprehensionSpecs(Token::Kind end, Fodder for_fodder, |
586 | | std::vector<ComprehensionSpec> &specs, |
587 | | unsigned current_depth) |
588 | 800k | { |
589 | 800k | if (current_depth >= MAX_PARSER_DEPTH) { |
590 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
591 | 0 | } |
592 | | |
593 | 883k | while (true) { |
594 | 880k | LocationRange l; |
595 | 880k | Token id_token = popExpect(Token::IDENTIFIER); |
596 | 880k | const Identifier *id = alloc->makeIdentifier(id_token.data32()); |
597 | 880k | Token in_token = popExpect(Token::IN); |
598 | 880k | AST *arr = parse(MAX_PRECEDENCE, current_depth + 1); |
599 | 880k | specs.emplace_back( |
600 | 880k | ComprehensionSpec::FOR, for_fodder, id_token.fodder, id, in_token.fodder, arr); |
601 | | |
602 | 880k | Token maybe_if = pop(); |
603 | 1.22M | for (; maybe_if.kind == Token::IF; maybe_if = pop()) { |
604 | 344k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
605 | 344k | specs.emplace_back( |
606 | 344k | ComprehensionSpec::IF, maybe_if.fodder, Fodder{}, nullptr, Fodder{}, cond); |
607 | 344k | } |
608 | 880k | if (maybe_if.kind == end) { |
609 | 797k | return maybe_if; |
610 | 797k | } |
611 | 83.4k | if (maybe_if.kind != Token::FOR) { |
612 | 86 | std::stringstream ss; |
613 | 86 | ss << "expected for, if or " << end << " after for clause, got: " << maybe_if; |
614 | 86 | throw StaticError(maybe_if.location, ss.str()); |
615 | 86 | } |
616 | 83.3k | for_fodder = maybe_if.fodder; |
617 | 83.3k | } |
618 | 800k | } |
619 | | |
620 | | /** Parse a terminal (literal, var, import, etc.), an object declaration, unary operator, |
621 | | * or a parenthesized expression. |
622 | | * |
623 | | * \param current_depth Current recursion depth to prevent stack overflow. |
624 | | * \returns The parsed AST. |
625 | | */ |
626 | | AST *parseTerminalBracketsOrUnary(unsigned current_depth) |
627 | 100M | { |
628 | 100M | if (current_depth >= MAX_PARSER_DEPTH) { |
629 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
630 | 0 | } |
631 | | |
632 | 100M | Token tok = pop(); |
633 | 100M | switch (tok.kind) { |
634 | 0 | case Token::ASSERT: |
635 | 30 | case Token::BRACE_R: |
636 | 59 | case Token::BRACKET_R: |
637 | 147 | case Token::COMMA: |
638 | 190 | case Token::DOT: |
639 | 193 | case Token::ELSE: |
640 | 193 | case Token::ERROR: |
641 | 198 | case Token::FOR: |
642 | 198 | case Token::FUNCTION: |
643 | 198 | case Token::IF: |
644 | 209 | case Token::IN: |
645 | 209 | case Token::IMPORT: |
646 | 209 | case Token::IMPORTSTR: |
647 | 209 | case Token::IMPORTBIN: |
648 | 209 | case Token::LOCAL: |
649 | 253 | case Token::PAREN_R: |
650 | 285 | case Token::SEMICOLON: |
651 | 289 | case Token::TAILSTRICT: |
652 | 292 | case Token::THEN: throw unexpected(tok, "parsing terminal"); |
653 | | |
654 | 1.97k | case Token::END_OF_FILE: throw StaticError(tok.location, "unexpected end of file."); |
655 | | |
656 | 1.40M | case Token::OPERATOR: { |
657 | 1.40M | UnaryOp uop; |
658 | 1.40M | if (!op_is_unary(tok.data, uop)) { |
659 | 597 | std::stringstream ss; |
660 | 597 | ss << "not a unary operator: " << tok.data; |
661 | 597 | throw StaticError(tok.location, ss.str()); |
662 | 597 | } |
663 | 1.40M | AST *expr = parse(UNARY_PRECEDENCE, current_depth + 1); |
664 | 1.40M | return alloc->make<Unary>(span(tok, expr), tok.fodder, uop, expr); |
665 | 1.40M | } |
666 | 1.07M | case Token::BRACE_L: { |
667 | 1.07M | AST *obj; |
668 | 1.07M | parseObjectRemainder(obj, tok, current_depth + 1); |
669 | 1.07M | return obj; |
670 | 1.40M | } |
671 | | |
672 | 3.07M | case Token::BRACKET_L: { |
673 | 3.07M | Token next = peek(); |
674 | 3.07M | if (next.kind == Token::BRACKET_R) { |
675 | 506k | Token bracket_r = pop(); |
676 | 506k | return alloc->make<Array>( |
677 | 506k | span(tok, next), tok.fodder, Array::Elements{}, false, bracket_r.fodder); |
678 | 506k | } |
679 | 2.56M | AST *first = parse(MAX_PRECEDENCE, current_depth + 1); |
680 | 2.56M | bool got_comma = false; |
681 | 2.56M | Fodder comma_fodder; |
682 | 2.56M | next = peek(); |
683 | 2.56M | if (!got_comma && next.kind == Token::COMMA) { |
684 | 388k | Token comma = pop(); |
685 | 388k | comma_fodder = comma.fodder; |
686 | 388k | next = peek(); |
687 | 388k | got_comma = true; |
688 | 388k | } |
689 | | |
690 | 2.56M | if (next.kind == Token::FOR) { |
691 | | // It's a comprehension |
692 | 700k | Token for_token = pop(); |
693 | 700k | std::vector<ComprehensionSpec> specs; |
694 | 700k | Token last = parseComprehensionSpecs(Token::BRACKET_R, for_token.fodder, specs, current_depth + 1); |
695 | 700k | return alloc->make<ArrayComprehension>(span(tok, last), |
696 | 700k | tok.fodder, |
697 | 700k | first, |
698 | 700k | comma_fodder, |
699 | 700k | got_comma, |
700 | 700k | specs, |
701 | 700k | last.fodder); |
702 | 700k | } |
703 | | |
704 | | // Not a comprehension: It can have more elements. |
705 | 1.86M | Array::Elements elements; |
706 | 1.86M | elements.emplace_back(first, comma_fodder); |
707 | 6.84M | do { |
708 | 6.84M | if (next.kind == Token::BRACKET_R) { |
709 | 1.85M | Token bracket_r = pop(); |
710 | 1.85M | return alloc->make<Array>( |
711 | 1.85M | span(tok, next), tok.fodder, elements, got_comma, bracket_r.fodder); |
712 | 1.85M | } |
713 | 4.98M | if (!got_comma) { |
714 | 766 | std::stringstream ss; |
715 | 766 | ss << "expected a comma before next array element."; |
716 | 766 | throw StaticError(next.location, ss.str()); |
717 | 766 | } |
718 | 4.98M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
719 | 4.98M | comma_fodder.clear(); |
720 | 4.98M | got_comma = false; |
721 | 4.98M | next = peek(); |
722 | 4.98M | if (next.kind == Token::COMMA) { |
723 | 4.63M | Token comma = pop(); |
724 | 4.63M | comma_fodder = comma.fodder; |
725 | 4.63M | next = peek(); |
726 | 4.63M | got_comma = true; |
727 | 4.63M | } |
728 | 4.98M | elements.emplace_back(expr, comma_fodder); |
729 | 4.98M | } while (true); |
730 | 1.86M | } |
731 | | |
732 | 1.10M | case Token::PAREN_L: { |
733 | 1.10M | auto *inner = parse(MAX_PRECEDENCE, current_depth + 1); |
734 | 1.10M | Token close = popExpect(Token::PAREN_R); |
735 | 1.10M | return alloc->make<Parens>(span(tok, close), tok.fodder, inner, close.fodder); |
736 | 1.86M | } |
737 | | |
738 | | // Literals |
739 | 13.0M | case Token::NUMBER: return alloc->make<LiteralNumber>(span(tok), tok.fodder, tok.data); |
740 | | |
741 | 10.1M | case Token::STRING_SINGLE: |
742 | 10.1M | return alloc->make<LiteralString>( |
743 | 10.1M | span(tok), tok.fodder, tok.data32(), LiteralString::SINGLE, "", ""); |
744 | 207k | case Token::STRING_DOUBLE: |
745 | 207k | return alloc->make<LiteralString>( |
746 | 207k | span(tok), tok.fodder, tok.data32(), LiteralString::DOUBLE, "", ""); |
747 | 6.77k | case Token::STRING_BLOCK: |
748 | 6.77k | return alloc->make<LiteralString>(span(tok), |
749 | 6.77k | tok.fodder, |
750 | 6.77k | tok.data32(), |
751 | 6.77k | LiteralString::BLOCK, |
752 | 6.77k | tok.stringBlockIndent, |
753 | 6.77k | tok.stringBlockTermIndent); |
754 | 2.44k | case Token::VERBATIM_STRING_SINGLE: |
755 | 2.44k | return alloc->make<LiteralString>( |
756 | 2.44k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", ""); |
757 | 4.64k | case Token::VERBATIM_STRING_DOUBLE: |
758 | 4.64k | return alloc->make<LiteralString>( |
759 | 4.64k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", ""); |
760 | | |
761 | 1.06M | case Token::FALSE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false); |
762 | | |
763 | 785k | case Token::TRUE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, true); |
764 | | |
765 | 397k | case Token::NULL_LIT: return alloc->make<LiteralNull>(span(tok), tok.fodder); |
766 | | |
767 | | // Variables |
768 | 110k | case Token::DOLLAR: return alloc->make<Dollar>(span(tok), tok.fodder); |
769 | | |
770 | 67.8M | case Token::IDENTIFIER: |
771 | 67.8M | return alloc->make<Var>(span(tok), tok.fodder, alloc->makeIdentifier(tok.data32())); |
772 | | |
773 | 153k | case Token::SELF: return alloc->make<Self>(span(tok), tok.fodder); |
774 | | |
775 | 7.47k | case Token::SUPER: { |
776 | 7.47k | Token next = pop(); |
777 | 7.47k | AST *index = nullptr; |
778 | 7.47k | const Identifier *id = nullptr; |
779 | 7.47k | Fodder id_fodder; |
780 | 7.47k | switch (next.kind) { |
781 | 6.41k | case Token::DOT: { |
782 | 6.41k | Token field_id = popExpect(Token::IDENTIFIER); |
783 | 6.41k | id_fodder = field_id.fodder; |
784 | 6.41k | id = alloc->makeIdentifier(field_id.data32()); |
785 | 6.41k | } break; |
786 | 1.05k | case Token::BRACKET_L: { |
787 | 1.05k | index = parse(MAX_PRECEDENCE, current_depth + 1); |
788 | 1.05k | Token bracket_r = popExpect(Token::BRACKET_R); |
789 | 1.05k | id_fodder = bracket_r.fodder; // Not id_fodder, but use the same var. |
790 | 1.05k | } break; |
791 | 9 | default: throw StaticError(tok.location, "expected . or [ after super."); |
792 | 7.47k | } |
793 | 6.96k | return alloc->make<SuperIndex>( |
794 | 6.96k | span(tok), tok.fodder, next.fodder, index, id_fodder, id); |
795 | 7.47k | } |
796 | 100M | } |
797 | | |
798 | 0 | std::cerr << "INTERNAL ERROR: Unknown tok kind: " << tok.kind << std::endl; |
799 | 0 | std::abort(); |
800 | 0 | return nullptr; // Quiet, compiler. |
801 | 100M | } |
802 | | |
803 | | /** If the first token makes it clear that we will be parsing a greedy construct, return the AST. |
804 | | * Otherwise, return nullptr. Greedy constructs are those that consume as many tokens as possible |
805 | | * on the right hand side because they have no closing token. |
806 | | * |
807 | | * \param current_depth Current recursion depth to prevent stack overflow. |
808 | | * \returns The parsed AST or nullptr. |
809 | | */ |
810 | | AST *maybeParseGreedy(unsigned current_depth) |
811 | 115M | { |
812 | 115M | if (current_depth >= MAX_PARSER_DEPTH) { |
813 | 10 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
814 | 10 | } |
815 | | |
816 | | // Allocate this on the heap to control stack growth. |
817 | 115M | std::unique_ptr<Token> begin_(new Token(peek())); |
818 | 115M | const Token &begin = *begin_; |
819 | | |
820 | 115M | switch (begin.kind) { |
821 | | // These cases have effectively MAX_PRECEDENCE as the first |
822 | | // call to parse will parse them. |
823 | 897k | case Token::ASSERT: { |
824 | 897k | pop(); |
825 | 897k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
826 | 897k | Fodder colonFodder; |
827 | 897k | AST *msg = nullptr; |
828 | 897k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
829 | 830k | Token colon = pop(); |
830 | 830k | colonFodder = colon.fodder; |
831 | 830k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
832 | 830k | } |
833 | 897k | Token semicolon = popExpect(Token::SEMICOLON); |
834 | 897k | AST *rest = parse(MAX_PRECEDENCE, current_depth + 1); |
835 | 897k | return alloc->make<Assert>(span(begin, rest), |
836 | 897k | begin.fodder, |
837 | 897k | cond, |
838 | 897k | colonFodder, |
839 | 897k | msg, |
840 | 897k | semicolon.fodder, |
841 | 897k | rest); |
842 | 0 | } |
843 | | |
844 | 1.42M | case Token::ERROR: { |
845 | 1.42M | pop(); |
846 | 1.42M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
847 | 1.42M | return alloc->make<Error>(span(begin, expr), begin.fodder, expr); |
848 | 0 | } |
849 | | |
850 | 7.11M | case Token::IF: { |
851 | 7.11M | pop(); |
852 | 7.11M | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
853 | 7.11M | Token then = popExpect(Token::THEN); |
854 | 7.11M | AST *branch_true = parse(MAX_PRECEDENCE, current_depth + 1); |
855 | 7.11M | if (peek().kind == Token::ELSE) { |
856 | 7.02M | Token else_ = pop(); |
857 | 7.02M | AST *branch_false = parse(MAX_PRECEDENCE, current_depth + 1); |
858 | 7.02M | return alloc->make<Conditional>(span(begin, branch_false), |
859 | 7.02M | begin.fodder, |
860 | 7.02M | cond, |
861 | 7.02M | then.fodder, |
862 | 7.02M | branch_true, |
863 | 7.02M | else_.fodder, |
864 | 7.02M | branch_false); |
865 | 7.02M | } |
866 | 88.8k | return alloc->make<Conditional>(span(begin, branch_true), |
867 | 88.8k | begin.fodder, |
868 | 88.8k | cond, |
869 | 88.8k | then.fodder, |
870 | 88.8k | branch_true, |
871 | 88.8k | Fodder{}, |
872 | 88.8k | nullptr); |
873 | 7.11M | } |
874 | | |
875 | 439k | case Token::FUNCTION: { |
876 | 439k | pop(); // Still available in 'begin'. |
877 | 439k | Token paren_l = pop(); |
878 | 439k | if (paren_l.kind == Token::PAREN_L) { |
879 | 439k | std::vector<AST *> params_asts; |
880 | 439k | bool got_comma; |
881 | 439k | Fodder paren_r_fodder; |
882 | 439k | ArgParams params = parseParams("function parameter", got_comma, paren_r_fodder, current_depth); |
883 | 439k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
884 | 439k | return alloc->make<Function>(span(begin, body), |
885 | 439k | begin.fodder, |
886 | 439k | paren_l.fodder, |
887 | 439k | params, |
888 | 439k | got_comma, |
889 | 439k | paren_r_fodder, |
890 | 439k | body); |
891 | 439k | } else { |
892 | 15 | std::stringstream ss; |
893 | 15 | ss << "expected ( but got " << paren_l; |
894 | 15 | throw StaticError(paren_l.location, ss.str()); |
895 | 15 | } |
896 | 439k | } |
897 | | |
898 | 1.99k | case Token::IMPORT: { |
899 | 1.99k | pop(); |
900 | 1.99k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
901 | 1.99k | if (body->type == AST_LITERAL_STRING) { |
902 | 1.23k | auto *lit = static_cast<LiteralString *>(body); |
903 | 1.23k | if (lit->tokenKind == LiteralString::BLOCK) { |
904 | 3 | throw StaticError(lit->location, |
905 | 3 | "Cannot use text blocks in import statements."); |
906 | 3 | } |
907 | 1.23k | return alloc->make<Import>(span(begin, body), begin.fodder, lit); |
908 | 1.23k | } else { |
909 | 765 | std::stringstream ss; |
910 | 765 | ss << "computed imports are not allowed."; |
911 | 765 | throw StaticError(body->location, ss.str()); |
912 | 765 | } |
913 | 1.99k | } |
914 | | |
915 | 2.57k | case Token::IMPORTSTR: { |
916 | 2.57k | pop(); |
917 | 2.57k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
918 | 2.57k | if (body->type == AST_LITERAL_STRING) { |
919 | 2.05k | auto *lit = static_cast<LiteralString *>(body); |
920 | 2.05k | if (lit->tokenKind == LiteralString::BLOCK) { |
921 | 2 | throw StaticError(lit->location, |
922 | 2 | "Cannot use text blocks in import statements."); |
923 | 2 | } |
924 | 2.04k | return alloc->make<Importstr>(span(begin, body), begin.fodder, lit); |
925 | 2.05k | } else { |
926 | 519 | std::stringstream ss; |
927 | 519 | ss << "computed imports are not allowed."; |
928 | 519 | throw StaticError(body->location, ss.str()); |
929 | 519 | } |
930 | 2.57k | } |
931 | | |
932 | 1.46k | case Token::IMPORTBIN: { |
933 | 1.46k | pop(); |
934 | 1.46k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
935 | 1.46k | if (body->type == AST_LITERAL_STRING) { |
936 | 947 | auto *lit = static_cast<LiteralString *>(body); |
937 | 947 | if (lit->tokenKind == LiteralString::BLOCK) { |
938 | 0 | throw StaticError(lit->location, |
939 | 0 | "Cannot use text blocks in import statements."); |
940 | 0 | } |
941 | 947 | return alloc->make<Importbin>(span(begin, body), begin.fodder, lit); |
942 | 947 | } else { |
943 | 515 | std::stringstream ss; |
944 | 515 | ss << "computed imports are not allowed."; |
945 | 515 | throw StaticError(body->location, ss.str()); |
946 | 515 | } |
947 | 1.46k | } |
948 | | |
949 | 5.28M | case Token::LOCAL: { |
950 | 5.28M | pop(); |
951 | 5.28M | Local::Binds binds; |
952 | 5.56M | do { |
953 | 5.56M | Token delim = parseBind(binds, current_depth + 1); |
954 | 5.56M | if (delim.kind != Token::SEMICOLON && delim.kind != Token::COMMA) { |
955 | 42 | std::stringstream ss; |
956 | 42 | ss << "expected , or ; but got " << delim; |
957 | 42 | throw StaticError(delim.location, ss.str()); |
958 | 42 | } |
959 | 5.56M | if (delim.kind == Token::SEMICOLON) |
960 | 5.27M | break; |
961 | 5.56M | } while (true); |
962 | 5.28M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
963 | 5.28M | return alloc->make<Local>(span(begin, body), begin.fodder, binds, body); |
964 | 5.28M | } |
965 | | |
966 | 100M | default: |
967 | 100M | return nullptr; |
968 | 115M | } |
969 | 115M | } |
970 | | |
971 | | |
972 | | /** Parse a general expression. |
973 | | * |
974 | | * Consume infix tokens up to (but not including) max_precedence, then stop. |
975 | | * \param max_precedence The maximum precedence to consider. |
976 | | * \param current_depth Current recursion depth to prevent stack overflow. |
977 | | * \returns The parsed AST. |
978 | | */ |
979 | | AST *parse(unsigned max_precedence, unsigned current_depth) |
980 | 115M | { |
981 | 115M | if (current_depth >= MAX_PARSER_DEPTH) { |
982 | 13 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
983 | 13 | } |
984 | | |
985 | 115M | AST *ast = maybeParseGreedy(current_depth + 1); |
986 | | // There cannot be an operator after a greedy parse. |
987 | 115M | if (ast != nullptr) return ast; |
988 | | |
989 | | // If we get here, we could be parsing an infix construct. |
990 | | |
991 | | // Allocate this on the heap to control stack growth. |
992 | 100M | std::unique_ptr<Token> begin_(new Token(peek())); |
993 | 100M | const Token &begin = *begin_; |
994 | | |
995 | 100M | AST *lhs = parseTerminalBracketsOrUnary(current_depth + 1); |
996 | | |
997 | 100M | return parseInfix(lhs, begin, max_precedence, current_depth + 1); |
998 | 115M | } |
999 | | |
1000 | | /** Parse infix operators (binary operators, indexing, function calls). |
1001 | | * |
1002 | | * \param lhs Left-hand side of the operator. |
1003 | | * \param begin The token representing the beginning of the expression. |
1004 | | * \param max_precedence The maximum precedence to consider. |
1005 | | * \param current_depth Current recursion depth to prevent stack overflow. |
1006 | | * \returns The parsed AST. |
1007 | | */ |
1008 | | AST *parseInfix(AST *lhs, const Token &begin, unsigned max_precedence, unsigned current_depth) |
1009 | 100M | { |
1010 | 100M | if (current_depth >= MAX_PARSER_DEPTH) { |
1011 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
1012 | 0 | } |
1013 | | |
1014 | 156M | while (true) { |
1015 | | |
1016 | 156M | BinaryOp bop = BOP_PLUS; |
1017 | 156M | unsigned op_precedence = 0; |
1018 | | |
1019 | 156M | switch (peek().kind) { |
1020 | | // Logical / arithmetic binary operator. |
1021 | 6.68k | case Token::IN: |
1022 | 24.1M | case Token::OPERATOR: |
1023 | | // These occur if the outer statement was an assert or array slice. |
1024 | | // Either way, we terminate the parsing here. |
1025 | 24.1M | if (peek().data == ":" || peek().data == "::") { |
1026 | 1.78M | return lhs; |
1027 | 1.78M | } |
1028 | 22.3M | if (!op_is_binary(peek().data, bop)) { |
1029 | 433 | std::stringstream ss; |
1030 | 433 | ss << "not a binary operator: " << peek().data; |
1031 | 433 | throw StaticError(peek().location, ss.str()); |
1032 | 433 | } |
1033 | 22.3M | op_precedence = precedence_map[bop]; |
1034 | 22.3M | break; |
1035 | | |
1036 | | // Index, Apply |
1037 | 15.6M | case Token::DOT: |
1038 | 19.4M | case Token::BRACKET_L: |
1039 | 37.1M | case Token::PAREN_L: |
1040 | 37.3M | case Token::BRACE_L: |
1041 | 37.3M | op_precedence = APPLY_PRECEDENCE; |
1042 | 37.3M | break; |
1043 | | |
1044 | 95.1M | default: |
1045 | | // This happens when we reach EOF or the terminating token of an outer context. |
1046 | 95.1M | return lhs; |
1047 | 156M | } |
1048 | | |
1049 | | // If higher precedence than the outer recursive call, let that handle it. |
1050 | 59.7M | if (op_precedence >= max_precedence) |
1051 | 3.48M | return lhs; |
1052 | | |
1053 | 56.2M | Token op = pop(); |
1054 | | |
1055 | 56.2M | switch (op.kind) { |
1056 | 3.75M | case Token::BRACKET_L: { |
1057 | 3.75M | bool is_slice; |
1058 | 3.75M | AST *first = nullptr; |
1059 | 3.75M | Fodder second_fodder; |
1060 | 3.75M | AST *second = nullptr; |
1061 | 3.75M | Fodder third_fodder; |
1062 | 3.75M | AST *third = nullptr; |
1063 | | |
1064 | 3.75M | if (peek().kind == Token::BRACKET_R) |
1065 | 9 | throw unexpected(pop(), "parsing index"); |
1066 | | |
1067 | 3.75M | if (peek().data != ":" && peek().data != "::") { |
1068 | 3.69M | first = parse(MAX_PRECEDENCE, current_depth + 1); |
1069 | 3.69M | } |
1070 | | |
1071 | 3.75M | if (peek().kind == Token::OPERATOR && peek().data == "::") { |
1072 | | // Handle :: |
1073 | 2.79k | is_slice = true; |
1074 | 2.79k | Token joined = pop(); |
1075 | 2.79k | second_fodder = joined.fodder; |
1076 | | |
1077 | 2.79k | if (peek().kind != Token::BRACKET_R) |
1078 | 2.07k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1079 | | |
1080 | 3.74M | } else if (peek().kind != Token::BRACKET_R) { |
1081 | 409k | is_slice = true; |
1082 | 409k | Token delim = pop(); |
1083 | 409k | if (delim.data != ":") |
1084 | 352 | throw unexpected(delim, "parsing slice"); |
1085 | | |
1086 | 408k | second_fodder = delim.fodder; |
1087 | | |
1088 | 408k | if (peek().data != ":" && peek().kind != Token::BRACKET_R) |
1089 | 183k | second = parse(MAX_PRECEDENCE, current_depth + 1); |
1090 | | |
1091 | 408k | if (peek().kind != Token::BRACKET_R) { |
1092 | 26.8k | Token delim = pop(); |
1093 | 26.8k | if (delim.data != ":") |
1094 | 116 | throw unexpected(delim, "parsing slice"); |
1095 | | |
1096 | 26.6k | third_fodder = delim.fodder; |
1097 | | |
1098 | 26.6k | if (peek().kind != Token::BRACKET_R) |
1099 | 24.4k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1100 | 26.6k | } |
1101 | 3.33M | } else { |
1102 | 3.33M | is_slice = false; |
1103 | 3.33M | } |
1104 | 3.75M | Token end = popExpect(Token::BRACKET_R); |
1105 | 3.75M | lhs = alloc->make<Index>(span(begin, end), |
1106 | 3.75M | EMPTY_FODDER, |
1107 | 3.75M | lhs, |
1108 | 3.75M | op.fodder, |
1109 | 3.75M | is_slice, |
1110 | 3.75M | first, |
1111 | 3.75M | second_fodder, |
1112 | 3.75M | second, |
1113 | 3.75M | third_fodder, |
1114 | 3.75M | third, |
1115 | 3.75M | end.fodder); |
1116 | 3.75M | break; |
1117 | 3.75M | } |
1118 | 15.6M | case Token::DOT: { |
1119 | 15.6M | Token field_id = popExpect(Token::IDENTIFIER); |
1120 | 15.6M | const Identifier *id = alloc->makeIdentifier(field_id.data32()); |
1121 | 15.6M | lhs = alloc->make<Index>(span(begin, field_id), |
1122 | 15.6M | EMPTY_FODDER, |
1123 | 15.6M | lhs, |
1124 | 15.6M | op.fodder, |
1125 | 15.6M | field_id.fodder, |
1126 | 15.6M | id); |
1127 | 15.6M | break; |
1128 | 3.75M | } |
1129 | 17.7M | case Token::PAREN_L: { |
1130 | 17.7M | ArgParams args; |
1131 | 17.7M | bool got_comma; |
1132 | 17.7M | Token end = parseArgs(args, "function argument", got_comma, current_depth); |
1133 | 17.7M | bool got_named = false; |
1134 | 29.5M | for (const auto& arg : args) { |
1135 | 29.5M | if (arg.id != nullptr) { |
1136 | 69.2k | got_named = true; |
1137 | 29.4M | } else { |
1138 | 29.4M | if (got_named) { |
1139 | 11 | throw StaticError(arg.expr->location, "Positional argument after a named argument is not allowed"); |
1140 | 11 | } |
1141 | 29.4M | } |
1142 | 29.5M | } |
1143 | 17.7M | bool tailstrict = false; |
1144 | 17.7M | Fodder tailstrict_fodder; |
1145 | 17.7M | if (peek().kind == Token::TAILSTRICT) { |
1146 | 872k | Token tailstrict_token = pop(); |
1147 | 872k | tailstrict_fodder = tailstrict_token.fodder; |
1148 | 872k | tailstrict = true; |
1149 | 872k | } |
1150 | 17.7M | lhs = alloc->make<Apply>(span(begin, end), |
1151 | 17.7M | EMPTY_FODDER, |
1152 | 17.7M | lhs, |
1153 | 17.7M | op.fodder, |
1154 | 17.7M | args, |
1155 | 17.7M | got_comma, |
1156 | 17.7M | end.fodder, |
1157 | 17.7M | tailstrict_fodder, |
1158 | 17.7M | tailstrict); |
1159 | 17.7M | break; |
1160 | 17.7M | } |
1161 | 249k | case Token::BRACE_L: { |
1162 | 249k | AST *obj; |
1163 | 249k | Token end = parseObjectRemainder(obj, op, current_depth + 1); |
1164 | 249k | lhs = alloc->make<ApplyBrace>(span(begin, end), EMPTY_FODDER, lhs, obj); |
1165 | 249k | break; |
1166 | 17.7M | } |
1167 | | |
1168 | 5.37k | case Token::IN: { |
1169 | 5.37k | if (peek().kind == Token::SUPER) { |
1170 | 780 | Token super = pop(); |
1171 | 780 | lhs = alloc->make<InSuper>( |
1172 | 780 | span(begin, super), EMPTY_FODDER, lhs, op.fodder, super.fodder); |
1173 | 4.59k | } else { |
1174 | 4.59k | AST *rhs = parse(op_precedence, current_depth + 1); |
1175 | 4.59k | lhs = alloc->make<Binary>( |
1176 | 4.59k | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1177 | 4.59k | } |
1178 | 5.37k | break; |
1179 | 17.7M | } |
1180 | | |
1181 | 18.9M | case Token::OPERATOR: { |
1182 | 18.9M | AST *rhs = parse(op_precedence, current_depth + 1); |
1183 | 18.9M | lhs = alloc->make<Binary>( |
1184 | 18.9M | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1185 | 18.9M | break; |
1186 | 17.7M | } |
1187 | | |
1188 | 0 | default: { |
1189 | 0 | std::cerr << "Should not be here." << std::endl; |
1190 | 0 | abort(); |
1191 | 17.7M | } |
1192 | 56.2M | } |
1193 | 56.2M | } |
1194 | | |
1195 | | // (1 & ((1 + (1 * 1)) + 1)) & 1 |
1196 | | // |
1197 | | // |
1198 | | |
1199 | | /* |
1200 | | // Allocate this on the heap to control stack growth. |
1201 | | std::unique_ptr<Token> begin_(new Token(peek())); |
1202 | | const Token &begin = *begin_; |
1203 | | */ |
1204 | 100M | } |
1205 | | }; |
1206 | | |
1207 | | } // namespace |
1208 | | |
1209 | | AST *jsonnet_parse(Allocator *alloc, Tokens &tokens) |
1210 | 42.0k | { |
1211 | 42.0k | Parser parser(tokens, alloc); |
1212 | 42.0k | unsigned parse_depth = 0; |
1213 | 42.0k | AST *expr = parser.parse(MAX_PRECEDENCE, parse_depth); |
1214 | 42.0k | if (tokens.front().kind != Token::END_OF_FILE) { |
1215 | 607 | std::stringstream ss; |
1216 | 607 | ss << "did not expect: " << tokens.front(); |
1217 | 607 | throw StaticError(tokens.front().location, ss.str()); |
1218 | 607 | } |
1219 | | |
1220 | 41.4k | return expr; |
1221 | 42.0k | } |
1222 | | |
1223 | | } // namespace jsonnet::internal |