/src/jsonnet/core/parser.cpp
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <cassert> |
18 | | #include <cmath> |
19 | | #include <cstdlib> |
20 | | |
21 | | #include <iomanip> |
22 | | #include <list> |
23 | | #include <memory> |
24 | | #include <set> |
25 | | #include <sstream> |
26 | | #include <locale> |
27 | | #include <string> |
28 | | |
29 | | #include "ast.h" |
30 | | #include "desugarer.h" |
31 | | #include "lexer.h" |
32 | | #include "parser.h" |
33 | | #include "static_error.h" |
34 | | |
35 | | namespace jsonnet::internal { |
36 | | |
37 | | std::string jsonnet_unparse_number(double v) |
38 | 7.33M | { |
39 | 7.33M | std::stringstream ss; |
40 | | // Make sure we output the same thing, even if the user |
41 | | // of the library changed the global locale |
42 | 7.33M | ss.imbue(std::locale::classic()); |
43 | 7.33M | if (v == floor(v)) { |
44 | 7.09M | ss << std::fixed << std::setprecision(0) << v; |
45 | 7.09M | } else { |
46 | | // See "What Every Computer Scientist Should Know About Floating-Point Arithmetic" |
47 | | // Theorem 15 |
48 | | // https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html |
49 | 235k | ss << std::setprecision(17); |
50 | 235k | ss << v; |
51 | 235k | } |
52 | 7.33M | return ss.str(); |
53 | 7.33M | } |
54 | | |
55 | | namespace { |
56 | | |
57 | | static const Fodder EMPTY_FODDER; |
58 | | |
59 | | /** Maximum parsing depth to avoid stack overflow due to pathological or malicious code. |
60 | | * This is especially important when parsing deeply nested structures that could lead to |
61 | | * excessive recursion in the parser functions. |
62 | | */ |
63 | | static const unsigned MAX_PARSER_DEPTH = 1000; |
64 | | |
65 | | static bool op_is_unary(const std::string &op, UnaryOp &uop) |
66 | 1.79M | { |
67 | 1.79M | auto it = unary_map.find(op); |
68 | 1.79M | if (it == unary_map.end()) |
69 | 619 | return false; |
70 | 1.79M | uop = it->second; |
71 | 1.79M | return true; |
72 | 1.79M | } |
73 | | |
74 | | static bool op_is_binary(const std::string &op, BinaryOp &bop) |
75 | 24.5M | { |
76 | 24.5M | auto it = binary_map.find(op); |
77 | 24.5M | if (it == binary_map.end()) |
78 | 489 | return false; |
79 | 24.5M | bop = it->second; |
80 | 24.5M | return true; |
81 | 24.5M | } |
82 | | |
83 | | LocationRange span(const Token &begin) |
84 | 102M | { |
85 | 102M | return LocationRange(begin.location.file, begin.location.begin, begin.location.end); |
86 | 102M | } |
87 | | |
88 | | LocationRange span(const Token &begin, const Token &end) |
89 | 46.3M | { |
90 | 46.3M | return LocationRange(begin.location.file, begin.location.begin, end.location.end); |
91 | 46.3M | } |
92 | | |
93 | | LocationRange span(const Token &begin, AST *end) |
94 | 38.5M | { |
95 | 38.5M | return LocationRange(begin.location.file, begin.location.begin, end->location.end); |
96 | 38.5M | } |
97 | | |
98 | | /** Holds state while parsing a given token list. |
99 | | */ |
100 | | class Parser { |
101 | | // The private member functions are utilities for dealing with the token stream. |
102 | | |
103 | | StaticError unexpected(const Token &tok, const std::string &while_) |
104 | 980 | { |
105 | 980 | std::stringstream ss; |
106 | 980 | ss << "unexpected: " << tok.kind << " while " << while_; |
107 | 980 | return StaticError(tok.location, ss.str()); |
108 | 980 | } |
109 | | |
110 | | Token pop(void) |
111 | 327M | { |
112 | 327M | Token tok = peek(); |
113 | 327M | tokens.pop_front(); |
114 | 327M | return tok; |
115 | 327M | } |
116 | | |
117 | | void push(Token tok) |
118 | 0 | { |
119 | 0 | tokens.push_front(tok); |
120 | 0 | } |
121 | | |
122 | | const Token &peek(void) |
123 | 1.04G | { |
124 | 1.04G | return tokens.front(); |
125 | 1.04G | } |
126 | | |
127 | | /** Only call this is peek() is not an EOF token. */ |
128 | | Token doublePeek(void) |
129 | 36.3M | { |
130 | 36.3M | Tokens::iterator it = tokens.begin(); // First one. |
131 | 36.3M | it++; // Now pointing at the second one. |
132 | 36.3M | return *(it); |
133 | 36.3M | } |
134 | | |
135 | | Token popExpect(Token::Kind k, const char *data = nullptr) |
136 | 50.9M | { |
137 | 50.9M | Token tok = pop(); |
138 | 50.9M | if (tok.kind != k) { |
139 | 685 | std::stringstream ss; |
140 | 685 | ss << "expected token " << k << " but got " << tok; |
141 | 685 | throw StaticError(tok.location, ss.str()); |
142 | 685 | } |
143 | 50.9M | if (data != nullptr && tok.data != data) { |
144 | 52 | std::stringstream ss; |
145 | 52 | ss << "expected operator " << data << " but got " << tok.data; |
146 | 52 | throw StaticError(tok.location, ss.str()); |
147 | 52 | } |
148 | 50.9M | return tok; |
149 | 50.9M | } |
150 | | |
151 | | std::list<Token> &tokens; |
152 | | Allocator *alloc; |
153 | | |
154 | | public: |
155 | 45.4k | Parser(Tokens &tokens, Allocator *alloc) : tokens(tokens), alloc(alloc) {} |
156 | | |
157 | | /** Parse a comma-separated list of expressions. |
158 | | * |
159 | | * Allows an optional ending comma. |
160 | | * \param args Expressions added here. |
161 | | * \param element_kind Used in error messages when a comma was not found. |
162 | | * \param got_comma Whether a trailing comma was found. |
163 | | * \param current_depth Current recursion depth to prevent stack overflow. |
164 | | * \returns The last token (the one that matched parameter end). |
165 | | */ |
166 | | Token parseArgs(ArgParams &args, const std::string &element_kind, bool &got_comma, unsigned current_depth) |
167 | 24.3M | { |
168 | 24.3M | got_comma = false; |
169 | 24.3M | bool first = true; |
170 | 67.1M | do { |
171 | 67.1M | Token next = peek(); |
172 | 67.1M | if (next.kind == Token::PAREN_R) { |
173 | | // got_comma can be true or false here. |
174 | 24.3M | return pop(); |
175 | 24.3M | } |
176 | 42.7M | if (!first && !got_comma) { |
177 | 277 | std::stringstream ss; |
178 | 277 | ss << "expected a comma before next " << element_kind << "."; |
179 | 277 | throw StaticError(next.location, ss.str()); |
180 | 277 | } |
181 | | // Either id=expr or id or expr, but note that expr could be id==1 so this needs |
182 | | // look-ahead. |
183 | 42.7M | Fodder id_fodder; |
184 | 42.7M | const Identifier *id = nullptr; |
185 | 42.7M | Fodder eq_fodder; |
186 | 42.7M | if (peek().kind == Token::IDENTIFIER) { |
187 | 36.3M | Token maybe_eq = doublePeek(); |
188 | 36.3M | if (maybe_eq.kind == Token::OPERATOR && maybe_eq.data == "=") { |
189 | 577k | id_fodder = peek().fodder; |
190 | 577k | id = alloc->makeIdentifier(peek().data32()); |
191 | 577k | eq_fodder = maybe_eq.fodder; |
192 | 577k | pop(); // id |
193 | 577k | pop(); // eq |
194 | 577k | } |
195 | 36.3M | } |
196 | 42.7M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
197 | 42.7M | got_comma = false; |
198 | 42.7M | first = false; |
199 | 42.7M | Fodder comma_fodder; |
200 | 42.7M | if (peek().kind == Token::COMMA) { |
201 | 18.3M | Token comma = pop(); |
202 | 18.3M | comma_fodder = comma.fodder; |
203 | 18.3M | got_comma = true; |
204 | 18.3M | } |
205 | 42.7M | args.emplace_back(id_fodder, id, eq_fodder, expr, comma_fodder); |
206 | 42.7M | } while (true); |
207 | 24.3M | } |
208 | | |
209 | | /** Parse function parameters. |
210 | | * |
211 | | * \param element_kind Used in error messages. |
212 | | * \param got_comma Whether a trailing comma was found. |
213 | | * \param close_fodder Fodder after the closing parenthesis. |
214 | | * \param current_depth Current recursion depth to prevent stack overflow. |
215 | | * \returns The parameters as ArgParams. |
216 | | */ |
217 | | ArgParams parseParams(const std::string &element_kind, bool &got_comma, Fodder &close_fodder, unsigned current_depth) |
218 | 5.20M | { |
219 | 5.20M | ArgParams params; |
220 | 5.20M | Token paren_r = parseArgs(params, element_kind, got_comma, current_depth); |
221 | | |
222 | | // Check they're all identifiers |
223 | | // parseArgs returns f(x) with x as an expression. Convert it here. |
224 | 10.2M | for (auto &p : params) { |
225 | 10.2M | if (p.id == nullptr) { |
226 | 9.70M | if (p.expr->type != AST_VAR) { |
227 | 8 | throw StaticError(p.expr->location, "could not parse parameter here."); |
228 | 8 | } |
229 | 9.70M | auto *pv = static_cast<Var *>(p.expr); |
230 | 9.70M | p.id = pv->id; |
231 | 9.70M | p.idFodder = pv->openFodder; |
232 | 9.70M | p.expr = nullptr; |
233 | 9.70M | } |
234 | 10.2M | } |
235 | | |
236 | 5.20M | close_fodder = paren_r.fodder; |
237 | | |
238 | 5.20M | return params; |
239 | 5.20M | } |
240 | | |
241 | | /** Parse a local bind statement. |
242 | | * |
243 | | * \param binds The bindings to be populated. |
244 | | * \param current_depth Current recursion depth to prevent stack overflow. |
245 | | * \returns The token after the binding (comma or semicolon). |
246 | | */ |
247 | | Token parseBind(Local::Binds &binds, unsigned current_depth) |
248 | 5.98M | { |
249 | 5.98M | Token var_id = popExpect(Token::IDENTIFIER); |
250 | 5.98M | auto *id = alloc->makeIdentifier(var_id.data32()); |
251 | 5.98M | for (const auto &bind : binds) { |
252 | 837k | if (bind.var == id) |
253 | 10 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
254 | 837k | } |
255 | 5.98M | bool is_function = false; |
256 | 5.98M | ArgParams params; |
257 | 5.98M | bool trailing_comma = false; |
258 | 5.98M | Fodder fodder_l, fodder_r; |
259 | 5.98M | if (peek().kind == Token::PAREN_L) { |
260 | 1.75M | Token paren_l = pop(); |
261 | 1.75M | fodder_l = paren_l.fodder; |
262 | 1.75M | params = parseParams("function parameter", trailing_comma, fodder_r, current_depth); |
263 | 1.75M | is_function = true; |
264 | 1.75M | } |
265 | 5.98M | Token eq = popExpect(Token::OPERATOR, "="); |
266 | 5.98M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
267 | 5.98M | Token delim = pop(); |
268 | 5.98M | binds.emplace_back(var_id.fodder, |
269 | 5.98M | id, |
270 | 5.98M | eq.fodder, |
271 | 5.98M | body, |
272 | 5.98M | is_function, |
273 | 5.98M | fodder_l, |
274 | 5.98M | params, |
275 | 5.98M | trailing_comma, |
276 | 5.98M | fodder_r, |
277 | 5.98M | delim.fodder); |
278 | 5.98M | return delim; |
279 | 5.98M | } |
280 | | |
281 | | /** Parse the remainder of an object after the opening brace. |
282 | | * |
283 | | * \param obj The object AST to be populated. |
284 | | * \param tok The opening brace token. |
285 | | * \param current_depth Current recursion depth to prevent stack overflow. |
286 | | * \returns The closing brace token. |
287 | | */ |
288 | | Token parseObjectRemainder(AST *&obj, const Token &tok, unsigned current_depth) |
289 | 1.47M | { |
290 | 1.47M | if (current_depth >= MAX_PARSER_DEPTH) { |
291 | 6 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
292 | 6 | } |
293 | | |
294 | 1.47M | ObjectFields fields; |
295 | 1.47M | std::set<std::string> literal_fields; // For duplicate fields detection. |
296 | 1.47M | std::set<const Identifier *> binds; // For duplicate locals detection. |
297 | | |
298 | 1.47M | bool got_comma = false; |
299 | 1.47M | bool first = true; |
300 | 1.47M | Token next = pop(); |
301 | | |
302 | 7.53M | do { |
303 | 7.53M | if (next.kind == Token::BRACE_R) { |
304 | 1.32M | obj = alloc->make<Object>( |
305 | 1.32M | span(tok, next), tok.fodder, fields, got_comma, next.fodder); |
306 | 1.32M | return next; |
307 | | |
308 | 6.21M | } else if (next.kind == Token::FOR) { |
309 | | // It's a comprehension |
310 | 132k | unsigned num_fields = 0; |
311 | 132k | unsigned num_asserts = 0; |
312 | 132k | const ObjectField *field_ptr = nullptr; |
313 | 133k | for (const auto &field : fields) { |
314 | 133k | if (field.kind == ObjectField::LOCAL) |
315 | 3 | continue; |
316 | 133k | if (field.kind == ObjectField::ASSERT) { |
317 | 489 | num_asserts++; |
318 | 489 | continue; |
319 | 489 | } |
320 | 132k | field_ptr = &field; |
321 | 132k | num_fields++; |
322 | 132k | } |
323 | 132k | if (num_asserts > 0) { |
324 | 25 | auto msg = "object comprehension cannot have asserts."; |
325 | 25 | throw StaticError(next.location, msg); |
326 | 25 | } |
327 | 132k | if (num_fields != 1) { |
328 | 13 | auto msg = "object comprehension can only have one field."; |
329 | 13 | throw StaticError(next.location, msg); |
330 | 13 | } |
331 | 132k | const ObjectField &field = *field_ptr; |
332 | | |
333 | 132k | if (field.hide != ObjectField::INHERIT) { |
334 | 3 | auto msg = "object comprehensions cannot have hidden fields."; |
335 | 3 | throw StaticError(next.location, msg); |
336 | 3 | } |
337 | | |
338 | 132k | if (field.kind != ObjectField::FIELD_EXPR) { |
339 | 3 | auto msg = "object comprehensions can only have [e] fields."; |
340 | 3 | throw StaticError(next.location, msg); |
341 | 3 | } |
342 | | |
343 | 132k | std::vector<ComprehensionSpec> specs; |
344 | 132k | Token last = parseComprehensionSpecs(Token::BRACE_R, next.fodder, specs, current_depth + 1); |
345 | 132k | obj = alloc->make<ObjectComprehension>( |
346 | 132k | span(tok, last), tok.fodder, fields, got_comma, specs, last.fodder); |
347 | | |
348 | 132k | return last; |
349 | 132k | } |
350 | | |
351 | 6.07M | if (!got_comma && !first) |
352 | 336 | throw StaticError(next.location, "expected a comma before next field."); |
353 | | |
354 | 6.07M | first = false; |
355 | 6.07M | got_comma = false; |
356 | | |
357 | 6.07M | switch (next.kind) { |
358 | 144k | case Token::BRACKET_L: |
359 | 5.50M | case Token::IDENTIFIER: |
360 | 5.70M | case Token::STRING_DOUBLE: |
361 | 5.80M | case Token::STRING_SINGLE: |
362 | 5.80M | case Token::STRING_BLOCK: |
363 | 5.81M | case Token::VERBATIM_STRING_DOUBLE: |
364 | 5.81M | case Token::VERBATIM_STRING_SINGLE: { |
365 | 5.81M | ObjectField::Kind kind; |
366 | 5.81M | AST *expr1 = nullptr; |
367 | 5.81M | const Identifier *id = nullptr; |
368 | 5.81M | Fodder fodder1, fodder2; |
369 | 5.81M | LocationRange idLocation; |
370 | 5.81M | if (next.kind == Token::IDENTIFIER) { |
371 | 5.35M | fodder1 = next.fodder; |
372 | 5.35M | kind = ObjectField::FIELD_ID; |
373 | 5.35M | id = alloc->makeIdentifier(next.data32()); |
374 | 5.35M | idLocation = next.location; |
375 | 5.35M | } else if (next.kind == Token::STRING_DOUBLE) { |
376 | 206k | kind = ObjectField::FIELD_STR; |
377 | 206k | expr1 = alloc->make<LiteralString>(next.location, |
378 | 206k | next.fodder, |
379 | 206k | next.data32(), |
380 | 206k | LiteralString::DOUBLE, |
381 | 206k | "", |
382 | 206k | ""); |
383 | 249k | } else if (next.kind == Token::STRING_SINGLE) { |
384 | 95.9k | kind = ObjectField::FIELD_STR; |
385 | 95.9k | expr1 = alloc->make<LiteralString>(next.location, |
386 | 95.9k | next.fodder, |
387 | 95.9k | next.data32(), |
388 | 95.9k | LiteralString::SINGLE, |
389 | 95.9k | "", |
390 | 95.9k | ""); |
391 | 153k | } else if (next.kind == Token::STRING_BLOCK) { |
392 | 5.23k | kind = ObjectField::FIELD_STR; |
393 | 5.23k | expr1 = alloc->make<LiteralString>(next.location, |
394 | 5.23k | next.fodder, |
395 | 5.23k | next.data32(), |
396 | 5.23k | LiteralString::BLOCK, |
397 | 5.23k | next.stringBlockIndent, |
398 | 5.23k | next.stringBlockTermIndent); |
399 | 148k | } else if (next.kind == Token::VERBATIM_STRING_SINGLE) { |
400 | 1.00k | kind = ObjectField::FIELD_STR; |
401 | 1.00k | expr1 = alloc->make<LiteralString>(next.location, |
402 | 1.00k | next.fodder, |
403 | 1.00k | next.data32(), |
404 | 1.00k | LiteralString::VERBATIM_SINGLE, |
405 | 1.00k | "", |
406 | 1.00k | ""); |
407 | 147k | } else if (next.kind == Token::VERBATIM_STRING_DOUBLE) { |
408 | 2.54k | kind = ObjectField::FIELD_STR; |
409 | 2.54k | expr1 = alloc->make<LiteralString>(next.location, |
410 | 2.54k | next.fodder, |
411 | 2.54k | next.data32(), |
412 | 2.54k | LiteralString::VERBATIM_DOUBLE, |
413 | 2.54k | "", |
414 | 2.54k | ""); |
415 | 144k | } else { |
416 | 144k | kind = ObjectField::FIELD_EXPR; |
417 | 144k | fodder1 = next.fodder; |
418 | 144k | expr1 = parse(MAX_PRECEDENCE, current_depth + 1); |
419 | 144k | Token bracket_r = popExpect(Token::BRACKET_R); |
420 | 144k | fodder2 = bracket_r.fodder; |
421 | 144k | } |
422 | | |
423 | 5.81M | bool is_method = false; |
424 | 5.81M | bool meth_comma = false; |
425 | 5.81M | ArgParams params; |
426 | 5.81M | Fodder fodder_l; |
427 | 5.81M | Fodder fodder_r; |
428 | 5.81M | if (peek().kind == Token::PAREN_L) { |
429 | 2.95M | Token paren_l = pop(); |
430 | 2.95M | fodder_l = paren_l.fodder; |
431 | 2.95M | params = parseParams("method parameter", meth_comma, fodder_r, current_depth); |
432 | 2.95M | is_method = true; |
433 | 2.95M | } |
434 | | |
435 | 5.81M | bool plus_sugar = false; |
436 | | |
437 | 5.81M | Token op = popExpect(Token::OPERATOR); |
438 | 5.81M | const char *od = op.data.c_str(); |
439 | 5.81M | if (*od == '+') { |
440 | 55.6k | plus_sugar = true; |
441 | 55.6k | od++; |
442 | 55.6k | } |
443 | 5.81M | unsigned colons = 0; |
444 | 14.5M | for (; *od != '\0'; ++od) { |
445 | 8.76M | if (*od != ':') { |
446 | 52 | throw StaticError( |
447 | 52 | next.location, |
448 | 52 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
449 | 52 | } |
450 | 8.76M | ++colons; |
451 | 8.76M | } |
452 | 5.81M | ObjectField::Hide field_hide; |
453 | 5.81M | switch (colons) { |
454 | 2.84M | case 1: field_hide = ObjectField::INHERIT; break; |
455 | | |
456 | 2.95M | case 2: field_hide = ObjectField::HIDDEN; break; |
457 | | |
458 | 98 | case 3: field_hide = ObjectField::VISIBLE; break; |
459 | | |
460 | 25 | default: |
461 | 25 | throw StaticError( |
462 | 25 | next.location, |
463 | 25 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
464 | 5.81M | } |
465 | | |
466 | | // Basic checks for invalid Jsonnet code. |
467 | 5.80M | if (is_method && plus_sugar) { |
468 | 3 | throw StaticError(next.location, |
469 | 3 | "cannot use +: syntax sugar in a method: " + next.data); |
470 | 3 | } |
471 | 5.80M | if (kind != ObjectField::FIELD_EXPR) { |
472 | 5.66M | if (!literal_fields.insert(next.data).second) { |
473 | 27 | throw StaticError(next.location, "duplicate field: " + next.data); |
474 | 27 | } |
475 | 5.66M | } |
476 | | |
477 | 5.80M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
478 | | |
479 | 5.80M | Fodder comma_fodder; |
480 | 5.80M | next = pop(); |
481 | 5.80M | if (next.kind == Token::COMMA) { |
482 | 4.86M | comma_fodder = next.fodder; |
483 | 4.86M | next = pop(); |
484 | 4.86M | got_comma = true; |
485 | 4.86M | } |
486 | 5.80M | fields.emplace_back(kind, |
487 | 5.80M | fodder1, |
488 | 5.80M | fodder2, |
489 | 5.80M | fodder_l, |
490 | 5.80M | fodder_r, |
491 | 5.80M | field_hide, |
492 | 5.80M | plus_sugar, |
493 | 5.80M | is_method, |
494 | 5.80M | expr1, |
495 | 5.80M | id, |
496 | 5.80M | idLocation, |
497 | 5.80M | params, |
498 | 5.80M | meth_comma, |
499 | 5.80M | op.fodder, |
500 | 5.80M | body, |
501 | 5.80M | nullptr, |
502 | 5.80M | comma_fodder); |
503 | 5.80M | } break; |
504 | | |
505 | 167k | case Token::LOCAL: { |
506 | 167k | Fodder local_fodder = next.fodder; |
507 | 167k | Token var_id = popExpect(Token::IDENTIFIER); |
508 | 167k | auto *id = alloc->makeIdentifier(var_id.data32()); |
509 | | |
510 | 167k | if (binds.find(id) != binds.end()) { |
511 | 6 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
512 | 6 | } |
513 | 167k | bool is_method = false; |
514 | 167k | bool func_comma = false; |
515 | 167k | ArgParams params; |
516 | 167k | Fodder paren_l_fodder; |
517 | 167k | Fodder paren_r_fodder; |
518 | 167k | if (peek().kind == Token::PAREN_L) { |
519 | 24.7k | Token paren_l = pop(); |
520 | 24.7k | paren_l_fodder = paren_l.fodder; |
521 | 24.7k | is_method = true; |
522 | 24.7k | params = parseParams("function parameter", func_comma, paren_r_fodder, current_depth); |
523 | 24.7k | } |
524 | 167k | Token eq = popExpect(Token::OPERATOR, "="); |
525 | 167k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
526 | 167k | binds.insert(id); |
527 | | |
528 | 167k | Fodder comma_fodder; |
529 | 167k | next = pop(); |
530 | 167k | if (next.kind == Token::COMMA) { |
531 | 165k | comma_fodder = next.fodder; |
532 | 165k | next = pop(); |
533 | 165k | got_comma = true; |
534 | 165k | } |
535 | 167k | fields.push_back(ObjectField::Local(local_fodder, |
536 | 167k | var_id.fodder, |
537 | 167k | paren_l_fodder, |
538 | 167k | paren_r_fodder, |
539 | 167k | is_method, |
540 | 167k | id, |
541 | 167k | params, |
542 | 167k | func_comma, |
543 | 167k | eq.fodder, |
544 | 167k | body, |
545 | 167k | comma_fodder)); |
546 | | |
547 | 167k | } break; |
548 | | |
549 | 97.7k | case Token::ASSERT: { |
550 | 97.7k | Fodder assert_fodder = next.fodder; |
551 | 97.7k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
552 | 97.7k | AST *msg = nullptr; |
553 | 97.7k | Fodder colon_fodder; |
554 | 97.7k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
555 | 33.2k | Token colon = pop(); |
556 | 33.2k | colon_fodder = colon.fodder; |
557 | 33.2k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
558 | 33.2k | } |
559 | | |
560 | 97.7k | Fodder comma_fodder; |
561 | 97.7k | next = pop(); |
562 | 97.7k | if (next.kind == Token::COMMA) { |
563 | 88.9k | comma_fodder = next.fodder; |
564 | 88.9k | next = pop(); |
565 | 88.9k | got_comma = true; |
566 | 88.9k | } |
567 | 97.7k | fields.push_back( |
568 | 97.7k | ObjectField::Assert(assert_fodder, cond, colon_fodder, msg, comma_fodder)); |
569 | 97.7k | } break; |
570 | | |
571 | 205 | default: throw unexpected(next, "parsing field definition"); |
572 | 6.07M | } |
573 | | |
574 | 6.07M | } while (true); |
575 | 1.47M | } |
576 | | |
577 | | /** Parses for x in expr for y in expr if expr for z in expr ... |
578 | | * |
579 | | * \param end The token that ends the comprehension (e.g. ] or }). |
580 | | * \param for_fodder Fodder before the first 'for'. |
581 | | * \param specs The comprehension specs to be populated. |
582 | | * \param current_depth Current recursion depth to prevent stack overflow. |
583 | | * \returns The closing token. |
584 | | */ |
585 | | Token parseComprehensionSpecs(Token::Kind end, Fodder for_fodder, |
586 | | std::vector<ComprehensionSpec> &specs, |
587 | | unsigned current_depth) |
588 | 891k | { |
589 | 891k | if (current_depth >= MAX_PARSER_DEPTH) { |
590 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
591 | 0 | } |
592 | | |
593 | 984k | while (true) { |
594 | 982k | LocationRange l; |
595 | 982k | Token id_token = popExpect(Token::IDENTIFIER); |
596 | 982k | const Identifier *id = alloc->makeIdentifier(id_token.data32()); |
597 | 982k | Token in_token = popExpect(Token::IN); |
598 | 982k | AST *arr = parse(MAX_PRECEDENCE, current_depth + 1); |
599 | 982k | specs.emplace_back( |
600 | 982k | ComprehensionSpec::FOR, for_fodder, id_token.fodder, id, in_token.fodder, arr); |
601 | | |
602 | 982k | Token maybe_if = pop(); |
603 | 1.41M | for (; maybe_if.kind == Token::IF; maybe_if = pop()) { |
604 | 431k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
605 | 431k | specs.emplace_back( |
606 | 431k | ComprehensionSpec::IF, maybe_if.fodder, Fodder{}, nullptr, Fodder{}, cond); |
607 | 431k | } |
608 | 982k | if (maybe_if.kind == end) { |
609 | 888k | return maybe_if; |
610 | 888k | } |
611 | 93.5k | if (maybe_if.kind != Token::FOR) { |
612 | 226 | std::stringstream ss; |
613 | 226 | ss << "expected for, if or " << end << " after for clause, got: " << maybe_if; |
614 | 226 | throw StaticError(maybe_if.location, ss.str()); |
615 | 226 | } |
616 | 93.3k | for_fodder = maybe_if.fodder; |
617 | 93.3k | } |
618 | 891k | } |
619 | | |
620 | | /** Parse a terminal (literal, var, import, etc.), an object declaration, unary operator, |
621 | | * or a parenthesized expression. |
622 | | * |
623 | | * \param current_depth Current recursion depth to prevent stack overflow. |
624 | | * \returns The parsed AST. |
625 | | */ |
626 | | AST *parseTerminalBracketsOrUnary(unsigned current_depth) |
627 | 110M | { |
628 | 110M | if (current_depth >= MAX_PARSER_DEPTH) { |
629 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
630 | 0 | } |
631 | | |
632 | 110M | Token tok = pop(); |
633 | 110M | switch (tok.kind) { |
634 | 0 | case Token::ASSERT: |
635 | 31 | case Token::BRACE_R: |
636 | 59 | case Token::BRACKET_R: |
637 | 141 | case Token::COMMA: |
638 | 183 | case Token::DOT: |
639 | 186 | case Token::ELSE: |
640 | 186 | case Token::ERROR: |
641 | 192 | case Token::FOR: |
642 | 192 | case Token::FUNCTION: |
643 | 192 | case Token::IF: |
644 | 206 | case Token::IN: |
645 | 206 | case Token::IMPORT: |
646 | 206 | case Token::IMPORTSTR: |
647 | 206 | case Token::IMPORTBIN: |
648 | 206 | case Token::LOCAL: |
649 | 260 | case Token::PAREN_R: |
650 | 289 | case Token::SEMICOLON: |
651 | 292 | case Token::TAILSTRICT: |
652 | 295 | case Token::THEN: throw unexpected(tok, "parsing terminal"); |
653 | | |
654 | 1.92k | case Token::END_OF_FILE: throw StaticError(tok.location, "unexpected end of file."); |
655 | | |
656 | 1.79M | case Token::OPERATOR: { |
657 | 1.79M | UnaryOp uop; |
658 | 1.79M | if (!op_is_unary(tok.data, uop)) { |
659 | 619 | std::stringstream ss; |
660 | 619 | ss << "not a unary operator: " << tok.data; |
661 | 619 | throw StaticError(tok.location, ss.str()); |
662 | 619 | } |
663 | 1.79M | AST *expr = parse(UNARY_PRECEDENCE, current_depth + 1); |
664 | 1.79M | return alloc->make<Unary>(span(tok, expr), tok.fodder, uop, expr); |
665 | 1.79M | } |
666 | 1.19M | case Token::BRACE_L: { |
667 | 1.19M | AST *obj; |
668 | 1.19M | parseObjectRemainder(obj, tok, current_depth + 1); |
669 | 1.19M | return obj; |
670 | 1.79M | } |
671 | | |
672 | 3.30M | case Token::BRACKET_L: { |
673 | 3.30M | Token next = peek(); |
674 | 3.30M | if (next.kind == Token::BRACKET_R) { |
675 | 538k | Token bracket_r = pop(); |
676 | 538k | return alloc->make<Array>( |
677 | 538k | span(tok, next), tok.fodder, Array::Elements{}, false, bracket_r.fodder); |
678 | 538k | } |
679 | 2.77M | AST *first = parse(MAX_PRECEDENCE, current_depth + 1); |
680 | 2.77M | bool got_comma = false; |
681 | 2.77M | Fodder comma_fodder; |
682 | 2.77M | next = peek(); |
683 | 2.77M | if (!got_comma && next.kind == Token::COMMA) { |
684 | 427k | Token comma = pop(); |
685 | 427k | comma_fodder = comma.fodder; |
686 | 427k | next = peek(); |
687 | 427k | got_comma = true; |
688 | 427k | } |
689 | | |
690 | 2.77M | if (next.kind == Token::FOR) { |
691 | | // It's a comprehension |
692 | 758k | Token for_token = pop(); |
693 | 758k | std::vector<ComprehensionSpec> specs; |
694 | 758k | Token last = parseComprehensionSpecs(Token::BRACKET_R, for_token.fodder, specs, current_depth + 1); |
695 | 758k | return alloc->make<ArrayComprehension>(span(tok, last), |
696 | 758k | tok.fodder, |
697 | 758k | first, |
698 | 758k | comma_fodder, |
699 | 758k | got_comma, |
700 | 758k | specs, |
701 | 758k | last.fodder); |
702 | 758k | } |
703 | | |
704 | | // Not a comprehension: It can have more elements. |
705 | 2.01M | Array::Elements elements; |
706 | 2.01M | elements.emplace_back(first, comma_fodder); |
707 | 8.37M | do { |
708 | 8.37M | if (next.kind == Token::BRACKET_R) { |
709 | 2.00M | Token bracket_r = pop(); |
710 | 2.00M | return alloc->make<Array>( |
711 | 2.00M | span(tok, next), tok.fodder, elements, got_comma, bracket_r.fodder); |
712 | 2.00M | } |
713 | 6.37M | if (!got_comma) { |
714 | 745 | std::stringstream ss; |
715 | 745 | ss << "expected a comma before next array element."; |
716 | 745 | throw StaticError(next.location, ss.str()); |
717 | 745 | } |
718 | 6.37M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
719 | 6.37M | comma_fodder.clear(); |
720 | 6.37M | got_comma = false; |
721 | 6.37M | next = peek(); |
722 | 6.37M | if (next.kind == Token::COMMA) { |
723 | 5.99M | Token comma = pop(); |
724 | 5.99M | comma_fodder = comma.fodder; |
725 | 5.99M | next = peek(); |
726 | 5.99M | got_comma = true; |
727 | 5.99M | } |
728 | 6.37M | elements.emplace_back(expr, comma_fodder); |
729 | 6.37M | } while (true); |
730 | 2.01M | } |
731 | | |
732 | 1.14M | case Token::PAREN_L: { |
733 | 1.14M | auto *inner = parse(MAX_PRECEDENCE, current_depth + 1); |
734 | 1.14M | Token close = popExpect(Token::PAREN_R); |
735 | 1.14M | return alloc->make<Parens>(span(tok, close), tok.fodder, inner, close.fodder); |
736 | 2.01M | } |
737 | | |
738 | | // Literals |
739 | 15.3M | case Token::NUMBER: return alloc->make<LiteralNumber>(span(tok), tok.fodder, tok.data); |
740 | | |
741 | 11.0M | case Token::STRING_SINGLE: |
742 | 11.0M | return alloc->make<LiteralString>( |
743 | 11.0M | span(tok), tok.fodder, tok.data32(), LiteralString::SINGLE, "", ""); |
744 | 202k | case Token::STRING_DOUBLE: |
745 | 202k | return alloc->make<LiteralString>( |
746 | 202k | span(tok), tok.fodder, tok.data32(), LiteralString::DOUBLE, "", ""); |
747 | 5.25k | case Token::STRING_BLOCK: |
748 | 5.25k | return alloc->make<LiteralString>(span(tok), |
749 | 5.25k | tok.fodder, |
750 | 5.25k | tok.data32(), |
751 | 5.25k | LiteralString::BLOCK, |
752 | 5.25k | tok.stringBlockIndent, |
753 | 5.25k | tok.stringBlockTermIndent); |
754 | 1.94k | case Token::VERBATIM_STRING_SINGLE: |
755 | 1.94k | return alloc->make<LiteralString>( |
756 | 1.94k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", ""); |
757 | 4.23k | case Token::VERBATIM_STRING_DOUBLE: |
758 | 4.23k | return alloc->make<LiteralString>( |
759 | 4.23k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", ""); |
760 | | |
761 | 1.15M | case Token::FALSE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false); |
762 | | |
763 | 852k | case Token::TRUE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, true); |
764 | | |
765 | 406k | case Token::NULL_LIT: return alloc->make<LiteralNull>(span(tok), tok.fodder); |
766 | | |
767 | | // Variables |
768 | 114k | case Token::DOLLAR: return alloc->make<Dollar>(span(tok), tok.fodder); |
769 | | |
770 | 73.7M | case Token::IDENTIFIER: |
771 | 73.7M | return alloc->make<Var>(span(tok), tok.fodder, alloc->makeIdentifier(tok.data32())); |
772 | | |
773 | 96.3k | case Token::SELF: return alloc->make<Self>(span(tok), tok.fodder); |
774 | | |
775 | 13.0k | case Token::SUPER: { |
776 | 13.0k | Token next = pop(); |
777 | 13.0k | AST *index = nullptr; |
778 | 13.0k | const Identifier *id = nullptr; |
779 | 13.0k | Fodder id_fodder; |
780 | 13.0k | switch (next.kind) { |
781 | 11.9k | case Token::DOT: { |
782 | 11.9k | Token field_id = popExpect(Token::IDENTIFIER); |
783 | 11.9k | id_fodder = field_id.fodder; |
784 | 11.9k | id = alloc->makeIdentifier(field_id.data32()); |
785 | 11.9k | } break; |
786 | 1.13k | case Token::BRACKET_L: { |
787 | 1.13k | index = parse(MAX_PRECEDENCE, current_depth + 1); |
788 | 1.13k | Token bracket_r = popExpect(Token::BRACKET_R); |
789 | 1.13k | id_fodder = bracket_r.fodder; // Not id_fodder, but use the same var. |
790 | 1.13k | } break; |
791 | 8 | default: throw StaticError(tok.location, "expected . or [ after super."); |
792 | 13.0k | } |
793 | 12.2k | return alloc->make<SuperIndex>( |
794 | 12.2k | span(tok), tok.fodder, next.fodder, index, id_fodder, id); |
795 | 13.0k | } |
796 | 110M | } |
797 | | |
798 | 0 | std::cerr << "INTERNAL ERROR: Unknown tok kind: " << tok.kind << std::endl; |
799 | 0 | std::abort(); |
800 | 0 | return nullptr; // Quiet, compiler. |
801 | 110M | } |
802 | | |
803 | | /** If the first token makes it clear that we will be parsing a greedy construct, return the AST. |
804 | | * Otherwise, return nullptr. Greedy constructs are those that consume as many tokens as possible |
805 | | * on the right hand side because they have no closing token. |
806 | | * |
807 | | * \param current_depth Current recursion depth to prevent stack overflow. |
808 | | * \returns The parsed AST or nullptr. |
809 | | */ |
810 | | AST *maybeParseGreedy(unsigned current_depth) |
811 | 126M | { |
812 | 126M | if (current_depth >= MAX_PARSER_DEPTH) { |
813 | 11 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
814 | 11 | } |
815 | | |
816 | | // Allocate this on the heap to control stack growth. |
817 | 126M | std::unique_ptr<Token> begin_(new Token(peek())); |
818 | 126M | const Token &begin = *begin_; |
819 | | |
820 | 126M | switch (begin.kind) { |
821 | | // These cases have effectively MAX_PRECEDENCE as the first |
822 | | // call to parse will parse them. |
823 | 973k | case Token::ASSERT: { |
824 | 973k | pop(); |
825 | 973k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
826 | 973k | Fodder colonFodder; |
827 | 973k | AST *msg = nullptr; |
828 | 973k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
829 | 900k | Token colon = pop(); |
830 | 900k | colonFodder = colon.fodder; |
831 | 900k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
832 | 900k | } |
833 | 973k | Token semicolon = popExpect(Token::SEMICOLON); |
834 | 973k | AST *rest = parse(MAX_PRECEDENCE, current_depth + 1); |
835 | 973k | return alloc->make<Assert>(span(begin, rest), |
836 | 973k | begin.fodder, |
837 | 973k | cond, |
838 | 973k | colonFodder, |
839 | 973k | msg, |
840 | 973k | semicolon.fodder, |
841 | 973k | rest); |
842 | 0 | } |
843 | | |
844 | 1.49M | case Token::ERROR: { |
845 | 1.49M | pop(); |
846 | 1.49M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
847 | 1.49M | return alloc->make<Error>(span(begin, expr), begin.fodder, expr); |
848 | 0 | } |
849 | | |
850 | 7.66M | case Token::IF: { |
851 | 7.66M | pop(); |
852 | 7.66M | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
853 | 7.66M | Token then = popExpect(Token::THEN); |
854 | 7.66M | AST *branch_true = parse(MAX_PRECEDENCE, current_depth + 1); |
855 | 7.66M | if (peek().kind == Token::ELSE) { |
856 | 7.56M | Token else_ = pop(); |
857 | 7.56M | AST *branch_false = parse(MAX_PRECEDENCE, current_depth + 1); |
858 | 7.56M | return alloc->make<Conditional>(span(begin, branch_false), |
859 | 7.56M | begin.fodder, |
860 | 7.56M | cond, |
861 | 7.56M | then.fodder, |
862 | 7.56M | branch_true, |
863 | 7.56M | else_.fodder, |
864 | 7.56M | branch_false); |
865 | 7.56M | } |
866 | 95.7k | return alloc->make<Conditional>(span(begin, branch_true), |
867 | 95.7k | begin.fodder, |
868 | 95.7k | cond, |
869 | 95.7k | then.fodder, |
870 | 95.7k | branch_true, |
871 | 95.7k | Fodder{}, |
872 | 95.7k | nullptr); |
873 | 7.66M | } |
874 | | |
875 | 475k | case Token::FUNCTION: { |
876 | 475k | pop(); // Still available in 'begin'. |
877 | 475k | Token paren_l = pop(); |
878 | 475k | if (paren_l.kind == Token::PAREN_L) { |
879 | 475k | std::vector<AST *> params_asts; |
880 | 475k | bool got_comma; |
881 | 475k | Fodder paren_r_fodder; |
882 | 475k | ArgParams params = parseParams("function parameter", got_comma, paren_r_fodder, current_depth); |
883 | 475k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
884 | 475k | return alloc->make<Function>(span(begin, body), |
885 | 475k | begin.fodder, |
886 | 475k | paren_l.fodder, |
887 | 475k | params, |
888 | 475k | got_comma, |
889 | 475k | paren_r_fodder, |
890 | 475k | body); |
891 | 475k | } else { |
892 | 19 | std::stringstream ss; |
893 | 19 | ss << "expected ( but got " << paren_l; |
894 | 19 | throw StaticError(paren_l.location, ss.str()); |
895 | 19 | } |
896 | 475k | } |
897 | | |
898 | 2.07k | case Token::IMPORT: { |
899 | 2.07k | pop(); |
900 | 2.07k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
901 | 2.07k | if (body->type == AST_LITERAL_STRING) { |
902 | 1.23k | auto *lit = static_cast<LiteralString *>(body); |
903 | 1.23k | if (lit->tokenKind == LiteralString::BLOCK) { |
904 | 3 | throw StaticError(lit->location, |
905 | 3 | "Cannot use text blocks in import statements."); |
906 | 3 | } |
907 | 1.23k | return alloc->make<Import>(span(begin, body), begin.fodder, lit); |
908 | 1.23k | } else { |
909 | 832 | std::stringstream ss; |
910 | 832 | ss << "computed imports are not allowed."; |
911 | 832 | throw StaticError(body->location, ss.str()); |
912 | 832 | } |
913 | 2.07k | } |
914 | | |
915 | 2.61k | case Token::IMPORTSTR: { |
916 | 2.61k | pop(); |
917 | 2.61k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
918 | 2.61k | if (body->type == AST_LITERAL_STRING) { |
919 | 2.26k | auto *lit = static_cast<LiteralString *>(body); |
920 | 2.26k | if (lit->tokenKind == LiteralString::BLOCK) { |
921 | 2 | throw StaticError(lit->location, |
922 | 2 | "Cannot use text blocks in import statements."); |
923 | 2 | } |
924 | 2.26k | return alloc->make<Importstr>(span(begin, body), begin.fodder, lit); |
925 | 2.26k | } else { |
926 | 350 | std::stringstream ss; |
927 | 350 | ss << "computed imports are not allowed."; |
928 | 350 | throw StaticError(body->location, ss.str()); |
929 | 350 | } |
930 | 2.61k | } |
931 | | |
932 | 1.21k | case Token::IMPORTBIN: { |
933 | 1.21k | pop(); |
934 | 1.21k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
935 | 1.21k | if (body->type == AST_LITERAL_STRING) { |
936 | 937 | auto *lit = static_cast<LiteralString *>(body); |
937 | 937 | if (lit->tokenKind == LiteralString::BLOCK) { |
938 | 0 | throw StaticError(lit->location, |
939 | 0 | "Cannot use text blocks in import statements."); |
940 | 0 | } |
941 | 937 | return alloc->make<Importbin>(span(begin, body), begin.fodder, lit); |
942 | 937 | } else { |
943 | 275 | std::stringstream ss; |
944 | 275 | ss << "computed imports are not allowed."; |
945 | 275 | throw StaticError(body->location, ss.str()); |
946 | 275 | } |
947 | 1.21k | } |
948 | | |
949 | 5.67M | case Token::LOCAL: { |
950 | 5.67M | pop(); |
951 | 5.67M | Local::Binds binds; |
952 | 5.98M | do { |
953 | 5.98M | Token delim = parseBind(binds, current_depth + 1); |
954 | 5.98M | if (delim.kind != Token::SEMICOLON && delim.kind != Token::COMMA) { |
955 | 35 | std::stringstream ss; |
956 | 35 | ss << "expected , or ; but got " << delim; |
957 | 35 | throw StaticError(delim.location, ss.str()); |
958 | 35 | } |
959 | 5.98M | if (delim.kind == Token::SEMICOLON) |
960 | 5.67M | break; |
961 | 5.98M | } while (true); |
962 | 5.67M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
963 | 5.67M | return alloc->make<Local>(span(begin, body), begin.fodder, binds, body); |
964 | 5.67M | } |
965 | | |
966 | 110M | default: |
967 | 110M | return nullptr; |
968 | 126M | } |
969 | 126M | } |
970 | | |
971 | | |
972 | | /** Parse a general expression. |
973 | | * |
974 | | * Consume infix tokens up to (but not including) max_precedence, then stop. |
975 | | * \param max_precedence The maximum precedence to consider. |
976 | | * \param current_depth Current recursion depth to prevent stack overflow. |
977 | | * \returns The parsed AST. |
978 | | */ |
979 | | AST *parse(unsigned max_precedence, unsigned current_depth) |
980 | 126M | { |
981 | 126M | if (current_depth >= MAX_PARSER_DEPTH) { |
982 | 10 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
983 | 10 | } |
984 | | |
985 | 126M | AST *ast = maybeParseGreedy(current_depth + 1); |
986 | | // There cannot be an operator after a greedy parse. |
987 | 126M | if (ast != nullptr) return ast; |
988 | | |
989 | | // If we get here, we could be parsing an infix construct. |
990 | | |
991 | | // Allocate this on the heap to control stack growth. |
992 | 110M | std::unique_ptr<Token> begin_(new Token(peek())); |
993 | 110M | const Token &begin = *begin_; |
994 | | |
995 | 110M | AST *lhs = parseTerminalBracketsOrUnary(current_depth + 1); |
996 | | |
997 | 110M | return parseInfix(lhs, begin, max_precedence, current_depth + 1); |
998 | 126M | } |
999 | | |
1000 | | /** Parse infix operators (binary operators, indexing, function calls). |
1001 | | * |
1002 | | * \param lhs Left-hand side of the operator. |
1003 | | * \param begin The token representing the beginning of the expression. |
1004 | | * \param max_precedence The maximum precedence to consider. |
1005 | | * \param current_depth Current recursion depth to prevent stack overflow. |
1006 | | * \returns The parsed AST. |
1007 | | */ |
1008 | | AST *parseInfix(AST *lhs, const Token &begin, unsigned max_precedence, unsigned current_depth) |
1009 | 110M | { |
1010 | 110M | if (current_depth >= MAX_PARSER_DEPTH) { |
1011 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
1012 | 0 | } |
1013 | | |
1014 | 171M | while (true) { |
1015 | | |
1016 | 171M | BinaryOp bop = BOP_PLUS; |
1017 | 171M | unsigned op_precedence = 0; |
1018 | | |
1019 | 171M | switch (peek().kind) { |
1020 | | // Logical / arithmetic binary operator. |
1021 | 7.74k | case Token::IN: |
1022 | 26.4M | case Token::OPERATOR: |
1023 | | // These occur if the outer statement was an assert or array slice. |
1024 | | // Either way, we terminate the parsing here. |
1025 | 26.4M | if (peek().data == ":" || peek().data == "::") { |
1026 | 1.91M | return lhs; |
1027 | 1.91M | } |
1028 | 24.5M | if (!op_is_binary(peek().data, bop)) { |
1029 | 489 | std::stringstream ss; |
1030 | 489 | ss << "not a binary operator: " << peek().data; |
1031 | 489 | throw StaticError(peek().location, ss.str()); |
1032 | 489 | } |
1033 | 24.5M | op_precedence = precedence_map[bop]; |
1034 | 24.5M | break; |
1035 | | |
1036 | | // Index, Apply |
1037 | 16.9M | case Token::DOT: |
1038 | 20.9M | case Token::BRACKET_L: |
1039 | 40.1M | case Token::PAREN_L: |
1040 | 40.4M | case Token::BRACE_L: |
1041 | 40.4M | op_precedence = APPLY_PRECEDENCE; |
1042 | 40.4M | break; |
1043 | | |
1044 | 104M | default: |
1045 | | // This happens when we reach EOF or the terminating token of an outer context. |
1046 | 104M | return lhs; |
1047 | 171M | } |
1048 | | |
1049 | | // If higher precedence than the outer recursive call, let that handle it. |
1050 | 64.9M | if (op_precedence >= max_precedence) |
1051 | 4.03M | return lhs; |
1052 | | |
1053 | 60.9M | Token op = pop(); |
1054 | | |
1055 | 60.9M | switch (op.kind) { |
1056 | 4.06M | case Token::BRACKET_L: { |
1057 | 4.06M | bool is_slice; |
1058 | 4.06M | AST *first = nullptr; |
1059 | 4.06M | Fodder second_fodder; |
1060 | 4.06M | AST *second = nullptr; |
1061 | 4.06M | Fodder third_fodder; |
1062 | 4.06M | AST *third = nullptr; |
1063 | | |
1064 | 4.06M | if (peek().kind == Token::BRACKET_R) |
1065 | 8 | throw unexpected(pop(), "parsing index"); |
1066 | | |
1067 | 4.06M | if (peek().data != ":" && peek().data != "::") { |
1068 | 4.00M | first = parse(MAX_PRECEDENCE, current_depth + 1); |
1069 | 4.00M | } |
1070 | | |
1071 | 4.06M | if (peek().kind == Token::OPERATOR && peek().data == "::") { |
1072 | | // Handle :: |
1073 | 2.71k | is_slice = true; |
1074 | 2.71k | Token joined = pop(); |
1075 | 2.71k | second_fodder = joined.fodder; |
1076 | | |
1077 | 2.71k | if (peek().kind != Token::BRACKET_R) |
1078 | 2.02k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1079 | | |
1080 | 4.06M | } else if (peek().kind != Token::BRACKET_R) { |
1081 | 437k | is_slice = true; |
1082 | 437k | Token delim = pop(); |
1083 | 437k | if (delim.data != ":") |
1084 | 364 | throw unexpected(delim, "parsing slice"); |
1085 | | |
1086 | 436k | second_fodder = delim.fodder; |
1087 | | |
1088 | 436k | if (peek().data != ":" && peek().kind != Token::BRACKET_R) |
1089 | 194k | second = parse(MAX_PRECEDENCE, current_depth + 1); |
1090 | | |
1091 | 436k | if (peek().kind != Token::BRACKET_R) { |
1092 | 26.2k | Token delim = pop(); |
1093 | 26.2k | if (delim.data != ":") |
1094 | 108 | throw unexpected(delim, "parsing slice"); |
1095 | | |
1096 | 26.1k | third_fodder = delim.fodder; |
1097 | | |
1098 | 26.1k | if (peek().kind != Token::BRACKET_R) |
1099 | 25.5k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1100 | 26.1k | } |
1101 | 3.62M | } else { |
1102 | 3.62M | is_slice = false; |
1103 | 3.62M | } |
1104 | 4.06M | Token end = popExpect(Token::BRACKET_R); |
1105 | 4.06M | lhs = alloc->make<Index>(span(begin, end), |
1106 | 4.06M | EMPTY_FODDER, |
1107 | 4.06M | lhs, |
1108 | 4.06M | op.fodder, |
1109 | 4.06M | is_slice, |
1110 | 4.06M | first, |
1111 | 4.06M | second_fodder, |
1112 | 4.06M | second, |
1113 | 4.06M | third_fodder, |
1114 | 4.06M | third, |
1115 | 4.06M | end.fodder); |
1116 | 4.06M | break; |
1117 | 4.06M | } |
1118 | 16.9M | case Token::DOT: { |
1119 | 16.9M | Token field_id = popExpect(Token::IDENTIFIER); |
1120 | 16.9M | const Identifier *id = alloc->makeIdentifier(field_id.data32()); |
1121 | 16.9M | lhs = alloc->make<Index>(span(begin, field_id), |
1122 | 16.9M | EMPTY_FODDER, |
1123 | 16.9M | lhs, |
1124 | 16.9M | op.fodder, |
1125 | 16.9M | field_id.fodder, |
1126 | 16.9M | id); |
1127 | 16.9M | break; |
1128 | 4.06M | } |
1129 | 19.1M | case Token::PAREN_L: { |
1130 | 19.1M | ArgParams args; |
1131 | 19.1M | bool got_comma; |
1132 | 19.1M | Token end = parseArgs(args, "function argument", got_comma, current_depth); |
1133 | 19.1M | bool got_named = false; |
1134 | 32.0M | for (const auto& arg : args) { |
1135 | 32.0M | if (arg.id != nullptr) { |
1136 | 74.9k | got_named = true; |
1137 | 32.0M | } else { |
1138 | 32.0M | if (got_named) { |
1139 | 10 | throw StaticError(arg.expr->location, "Positional argument after a named argument is not allowed"); |
1140 | 10 | } |
1141 | 32.0M | } |
1142 | 32.0M | } |
1143 | 19.1M | bool tailstrict = false; |
1144 | 19.1M | Fodder tailstrict_fodder; |
1145 | 19.1M | if (peek().kind == Token::TAILSTRICT) { |
1146 | 946k | Token tailstrict_token = pop(); |
1147 | 946k | tailstrict_fodder = tailstrict_token.fodder; |
1148 | 946k | tailstrict = true; |
1149 | 946k | } |
1150 | 19.1M | lhs = alloc->make<Apply>(span(begin, end), |
1151 | 19.1M | EMPTY_FODDER, |
1152 | 19.1M | lhs, |
1153 | 19.1M | op.fodder, |
1154 | 19.1M | args, |
1155 | 19.1M | got_comma, |
1156 | 19.1M | end.fodder, |
1157 | 19.1M | tailstrict_fodder, |
1158 | 19.1M | tailstrict); |
1159 | 19.1M | break; |
1160 | 19.1M | } |
1161 | 279k | case Token::BRACE_L: { |
1162 | 279k | AST *obj; |
1163 | 279k | Token end = parseObjectRemainder(obj, op, current_depth + 1); |
1164 | 279k | lhs = alloc->make<ApplyBrace>(span(begin, end), EMPTY_FODDER, lhs, obj); |
1165 | 279k | break; |
1166 | 19.1M | } |
1167 | | |
1168 | 6.36k | case Token::IN: { |
1169 | 6.36k | if (peek().kind == Token::SUPER) { |
1170 | 589 | Token super = pop(); |
1171 | 589 | lhs = alloc->make<InSuper>( |
1172 | 589 | span(begin, super), EMPTY_FODDER, lhs, op.fodder, super.fodder); |
1173 | 5.77k | } else { |
1174 | 5.77k | AST *rhs = parse(op_precedence, current_depth + 1); |
1175 | 5.77k | lhs = alloc->make<Binary>( |
1176 | 5.77k | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1177 | 5.77k | } |
1178 | 6.36k | break; |
1179 | 19.1M | } |
1180 | | |
1181 | 20.4M | case Token::OPERATOR: { |
1182 | 20.4M | AST *rhs = parse(op_precedence, current_depth + 1); |
1183 | 20.4M | lhs = alloc->make<Binary>( |
1184 | 20.4M | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1185 | 20.4M | break; |
1186 | 19.1M | } |
1187 | | |
1188 | 0 | default: { |
1189 | 0 | std::cerr << "Should not be here." << std::endl; |
1190 | 0 | abort(); |
1191 | 19.1M | } |
1192 | 60.9M | } |
1193 | 60.9M | } |
1194 | | |
1195 | | // (1 & ((1 + (1 * 1)) + 1)) & 1 |
1196 | | // |
1197 | | // |
1198 | | |
1199 | | /* |
1200 | | // Allocate this on the heap to control stack growth. |
1201 | | std::unique_ptr<Token> begin_(new Token(peek())); |
1202 | | const Token &begin = *begin_; |
1203 | | */ |
1204 | 110M | } |
1205 | | }; |
1206 | | |
1207 | | } // namespace |
1208 | | |
1209 | | AST *jsonnet_parse(Allocator *alloc, Tokens &tokens) |
1210 | 45.4k | { |
1211 | 45.4k | Parser parser(tokens, alloc); |
1212 | 45.4k | unsigned parse_depth = 0; |
1213 | 45.4k | AST *expr = parser.parse(MAX_PRECEDENCE, parse_depth); |
1214 | 45.4k | if (tokens.front().kind != Token::END_OF_FILE) { |
1215 | 660 | std::stringstream ss; |
1216 | 660 | ss << "did not expect: " << tokens.front(); |
1217 | 660 | throw StaticError(tokens.front().location, ss.str()); |
1218 | 660 | } |
1219 | | |
1220 | 44.7k | return expr; |
1221 | 45.4k | } |
1222 | | |
1223 | | } // namespace jsonnet::internal |