/src/jsonnet/core/parser.cpp
Line | Count | Source |
1 | | /* |
2 | | Copyright 2015 Google Inc. All rights reserved. |
3 | | |
4 | | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | you may not use this file except in compliance with the License. |
6 | | You may obtain a copy of the License at |
7 | | |
8 | | http://www.apache.org/licenses/LICENSE-2.0 |
9 | | |
10 | | Unless required by applicable law or agreed to in writing, software |
11 | | distributed under the License is distributed on an "AS IS" BASIS, |
12 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | See the License for the specific language governing permissions and |
14 | | limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <cassert> |
18 | | #include <cmath> |
19 | | #include <cstdlib> |
20 | | |
21 | | #include <iomanip> |
22 | | #include <list> |
23 | | #include <memory> |
24 | | #include <set> |
25 | | #include <sstream> |
26 | | #include <locale> |
27 | | #include <string> |
28 | | |
29 | | #include "ast.h" |
30 | | #include "desugarer.h" |
31 | | #include "lexer.h" |
32 | | #include "parser.h" |
33 | | #include "static_error.h" |
34 | | |
35 | | namespace jsonnet::internal { |
36 | | |
37 | | std::string jsonnet_unparse_number(double v) |
38 | 7.07M | { |
39 | 7.07M | std::stringstream ss; |
40 | | // Make sure we output the same thing, even if the user |
41 | | // of the library changed the global locale |
42 | 7.07M | ss.imbue(std::locale::classic()); |
43 | 7.07M | if (v == floor(v)) { |
44 | 6.79M | ss << std::fixed << std::setprecision(0) << v; |
45 | 6.79M | } else { |
46 | | // See "What Every Computer Scientist Should Know About Floating-Point Arithmetic" |
47 | | // Theorem 15 |
48 | | // https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html |
49 | 276k | ss << std::setprecision(17); |
50 | 276k | ss << v; |
51 | 276k | } |
52 | 7.07M | return ss.str(); |
53 | 7.07M | } |
54 | | |
55 | | namespace { |
56 | | |
57 | | static const Fodder EMPTY_FODDER; |
58 | | |
59 | | /** Maximum parsing depth to avoid stack overflow due to pathological or malicious code. |
60 | | * This is especially important when parsing deeply nested structures that could lead to |
61 | | * excessive recursion in the parser functions. |
62 | | */ |
63 | | static const unsigned MAX_PARSER_DEPTH = 1000; |
64 | | |
65 | | static bool op_is_unary(const std::string &op, UnaryOp &uop) |
66 | 1.79M | { |
67 | 1.79M | auto it = unary_map.find(op); |
68 | 1.79M | if (it == unary_map.end()) |
69 | 612 | return false; |
70 | 1.79M | uop = it->second; |
71 | 1.79M | return true; |
72 | 1.79M | } |
73 | | |
74 | | static bool op_is_binary(const std::string &op, BinaryOp &bop) |
75 | 24.9M | { |
76 | 24.9M | auto it = binary_map.find(op); |
77 | 24.9M | if (it == binary_map.end()) |
78 | 487 | return false; |
79 | 24.9M | bop = it->second; |
80 | 24.9M | return true; |
81 | 24.9M | } |
82 | | |
83 | | LocationRange span(const Token &begin) |
84 | 104M | { |
85 | 104M | return LocationRange(begin.location.file, begin.location.begin, begin.location.end); |
86 | 104M | } |
87 | | |
88 | | LocationRange span(const Token &begin, const Token &end) |
89 | 47.1M | { |
90 | 47.1M | return LocationRange(begin.location.file, begin.location.begin, end.location.end); |
91 | 47.1M | } |
92 | | |
93 | | LocationRange span(const Token &begin, AST *end) |
94 | 39.2M | { |
95 | 39.2M | return LocationRange(begin.location.file, begin.location.begin, end->location.end); |
96 | 39.2M | } |
97 | | |
98 | | /** Holds state while parsing a given token list. |
99 | | */ |
100 | | class Parser { |
101 | | // The private member functions are utilities for dealing with the token stream. |
102 | | |
103 | | StaticError unexpected(const Token &tok, const std::string &while_) |
104 | 983 | { |
105 | 983 | std::stringstream ss; |
106 | 983 | ss << "unexpected: " << tok.kind << " while " << while_; |
107 | 983 | return StaticError(tok.location, ss.str()); |
108 | 983 | } |
109 | | |
110 | | Token pop(void) |
111 | 332M | { |
112 | 332M | Token tok = peek(); |
113 | 332M | tokens.pop_front(); |
114 | 332M | return tok; |
115 | 332M | } |
116 | | |
117 | | void push(Token tok) |
118 | 0 | { |
119 | 0 | tokens.push_front(tok); |
120 | 0 | } |
121 | | |
122 | | const Token &peek(void) |
123 | 1.06G | { |
124 | 1.06G | return tokens.front(); |
125 | 1.06G | } |
126 | | |
127 | | /** Only call this is peek() is not an EOF token. */ |
128 | | Token doublePeek(void) |
129 | 36.9M | { |
130 | 36.9M | Tokens::iterator it = tokens.begin(); // First one. |
131 | 36.9M | it++; // Now pointing at the second one. |
132 | 36.9M | return *(it); |
133 | 36.9M | } |
134 | | |
135 | | Token popExpect(Token::Kind k, const char *data = nullptr) |
136 | 51.8M | { |
137 | 51.8M | Token tok = pop(); |
138 | 51.8M | if (tok.kind != k) { |
139 | 667 | std::stringstream ss; |
140 | 667 | ss << "expected token " << k << " but got " << tok; |
141 | 667 | throw StaticError(tok.location, ss.str()); |
142 | 667 | } |
143 | 51.8M | if (data != nullptr && tok.data != data) { |
144 | 51 | std::stringstream ss; |
145 | 51 | ss << "expected operator " << data << " but got " << tok.data; |
146 | 51 | throw StaticError(tok.location, ss.str()); |
147 | 51 | } |
148 | 51.8M | return tok; |
149 | 51.8M | } |
150 | | |
151 | | std::list<Token> &tokens; |
152 | | Allocator *alloc; |
153 | | |
154 | | public: |
155 | 46.1k | Parser(Tokens &tokens, Allocator *alloc) : tokens(tokens), alloc(alloc) {} |
156 | | |
157 | | /** Parse a comma-separated list of expressions. |
158 | | * |
159 | | * Allows an optional ending comma. |
160 | | * \param args Expressions added here. |
161 | | * \param element_kind Used in error messages when a comma was not found. |
162 | | * \param got_comma Whether a trailing comma was found. |
163 | | * \param current_depth Current recursion depth to prevent stack overflow. |
164 | | * \returns The last token (the one that matched parameter end). |
165 | | */ |
166 | | Token parseArgs(ArgParams &args, const std::string &element_kind, bool &got_comma, unsigned current_depth) |
167 | 24.8M | { |
168 | 24.8M | got_comma = false; |
169 | 24.8M | bool first = true; |
170 | 68.3M | do { |
171 | 68.3M | Token next = peek(); |
172 | 68.3M | if (next.kind == Token::PAREN_R) { |
173 | | // got_comma can be true or false here. |
174 | 24.8M | return pop(); |
175 | 24.8M | } |
176 | 43.5M | if (!first && !got_comma) { |
177 | 282 | std::stringstream ss; |
178 | 282 | ss << "expected a comma before next " << element_kind << "."; |
179 | 282 | throw StaticError(next.location, ss.str()); |
180 | 282 | } |
181 | | // Either id=expr or id or expr, but note that expr could be id==1 so this needs |
182 | | // look-ahead. |
183 | 43.5M | Fodder id_fodder; |
184 | 43.5M | const Identifier *id = nullptr; |
185 | 43.5M | Fodder eq_fodder; |
186 | 43.5M | if (peek().kind == Token::IDENTIFIER) { |
187 | 36.9M | Token maybe_eq = doublePeek(); |
188 | 36.9M | if (maybe_eq.kind == Token::OPERATOR && maybe_eq.data == "=") { |
189 | 588k | id_fodder = peek().fodder; |
190 | 588k | id = alloc->makeIdentifier(peek().data32()); |
191 | 588k | eq_fodder = maybe_eq.fodder; |
192 | 588k | pop(); // id |
193 | 588k | pop(); // eq |
194 | 588k | } |
195 | 36.9M | } |
196 | 43.5M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
197 | 43.5M | got_comma = false; |
198 | 43.5M | first = false; |
199 | 43.5M | Fodder comma_fodder; |
200 | 43.5M | if (peek().kind == Token::COMMA) { |
201 | 18.6M | Token comma = pop(); |
202 | 18.6M | comma_fodder = comma.fodder; |
203 | 18.6M | got_comma = true; |
204 | 18.6M | } |
205 | 43.5M | args.emplace_back(id_fodder, id, eq_fodder, expr, comma_fodder); |
206 | 43.5M | } while (true); |
207 | 24.8M | } |
208 | | |
209 | | /** Parse function parameters. |
210 | | * |
211 | | * \param element_kind Used in error messages. |
212 | | * \param got_comma Whether a trailing comma was found. |
213 | | * \param close_fodder Fodder after the closing parenthesis. |
214 | | * \param current_depth Current recursion depth to prevent stack overflow. |
215 | | * \returns The parameters as ArgParams. |
216 | | */ |
217 | | ArgParams parseParams(const std::string &element_kind, bool &got_comma, Fodder &close_fodder, unsigned current_depth) |
218 | 5.29M | { |
219 | 5.29M | ArgParams params; |
220 | 5.29M | Token paren_r = parseArgs(params, element_kind, got_comma, current_depth); |
221 | | |
222 | | // Check they're all identifiers |
223 | | // parseArgs returns f(x) with x as an expression. Convert it here. |
224 | 10.3M | for (auto &p : params) { |
225 | 10.3M | if (p.id == nullptr) { |
226 | 9.87M | if (p.expr->type != AST_VAR) { |
227 | 10 | throw StaticError(p.expr->location, "could not parse parameter here."); |
228 | 10 | } |
229 | 9.87M | auto *pv = static_cast<Var *>(p.expr); |
230 | 9.87M | p.id = pv->id; |
231 | 9.87M | p.idFodder = pv->openFodder; |
232 | 9.87M | p.expr = nullptr; |
233 | 9.87M | } |
234 | 10.3M | } |
235 | | |
236 | 5.29M | close_fodder = paren_r.fodder; |
237 | | |
238 | 5.29M | return params; |
239 | 5.29M | } |
240 | | |
241 | | /** Parse a local bind statement. |
242 | | * |
243 | | * \param binds The bindings to be populated. |
244 | | * \param current_depth Current recursion depth to prevent stack overflow. |
245 | | * \returns The token after the binding (comma or semicolon). |
246 | | */ |
247 | | Token parseBind(Local::Binds &binds, unsigned current_depth) |
248 | 6.09M | { |
249 | 6.09M | Token var_id = popExpect(Token::IDENTIFIER); |
250 | 6.09M | auto *id = alloc->makeIdentifier(var_id.data32()); |
251 | 6.09M | for (const auto &bind : binds) { |
252 | 854k | if (bind.var == id) |
253 | 12 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
254 | 854k | } |
255 | 6.09M | bool is_function = false; |
256 | 6.09M | ArgParams params; |
257 | 6.09M | bool trailing_comma = false; |
258 | 6.09M | Fodder fodder_l, fodder_r; |
259 | 6.09M | if (peek().kind == Token::PAREN_L) { |
260 | 1.78M | Token paren_l = pop(); |
261 | 1.78M | fodder_l = paren_l.fodder; |
262 | 1.78M | params = parseParams("function parameter", trailing_comma, fodder_r, current_depth); |
263 | 1.78M | is_function = true; |
264 | 1.78M | } |
265 | 6.09M | Token eq = popExpect(Token::OPERATOR, "="); |
266 | 6.09M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
267 | 6.09M | Token delim = pop(); |
268 | 6.09M | binds.emplace_back(var_id.fodder, |
269 | 6.09M | id, |
270 | 6.09M | eq.fodder, |
271 | 6.09M | body, |
272 | 6.09M | is_function, |
273 | 6.09M | fodder_l, |
274 | 6.09M | params, |
275 | 6.09M | trailing_comma, |
276 | 6.09M | fodder_r, |
277 | 6.09M | delim.fodder); |
278 | 6.09M | return delim; |
279 | 6.09M | } |
280 | | |
281 | | /** Parse the remainder of an object after the opening brace. |
282 | | * |
283 | | * \param obj The object AST to be populated. |
284 | | * \param tok The opening brace token. |
285 | | * \param current_depth Current recursion depth to prevent stack overflow. |
286 | | * \returns The closing brace token. |
287 | | */ |
288 | | Token parseObjectRemainder(AST *&obj, const Token &tok, unsigned current_depth) |
289 | 1.50M | { |
290 | 1.50M | if (current_depth >= MAX_PARSER_DEPTH) { |
291 | 6 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
292 | 6 | } |
293 | | |
294 | 1.50M | ObjectFields fields; |
295 | 1.50M | std::set<std::string> literal_fields; // For duplicate fields detection. |
296 | 1.50M | std::set<const Identifier *> binds; // For duplicate locals detection. |
297 | | |
298 | 1.50M | bool got_comma = false; |
299 | 1.50M | bool first = true; |
300 | 1.50M | Token next = pop(); |
301 | | |
302 | 7.65M | do { |
303 | 7.65M | if (next.kind == Token::BRACE_R) { |
304 | 1.35M | obj = alloc->make<Object>( |
305 | 1.35M | span(tok, next), tok.fodder, fields, got_comma, next.fodder); |
306 | 1.35M | return next; |
307 | | |
308 | 6.30M | } else if (next.kind == Token::FOR) { |
309 | | // It's a comprehension |
310 | 134k | unsigned num_fields = 0; |
311 | 134k | unsigned num_asserts = 0; |
312 | 134k | const ObjectField *field_ptr = nullptr; |
313 | 135k | for (const auto &field : fields) { |
314 | 135k | if (field.kind == ObjectField::LOCAL) |
315 | 3 | continue; |
316 | 135k | if (field.kind == ObjectField::ASSERT) { |
317 | 496 | num_asserts++; |
318 | 496 | continue; |
319 | 496 | } |
320 | 134k | field_ptr = &field; |
321 | 134k | num_fields++; |
322 | 134k | } |
323 | 134k | if (num_asserts > 0) { |
324 | 26 | auto msg = "object comprehension cannot have asserts."; |
325 | 26 | throw StaticError(next.location, msg); |
326 | 26 | } |
327 | 134k | if (num_fields != 1) { |
328 | 13 | auto msg = "object comprehension can only have one field."; |
329 | 13 | throw StaticError(next.location, msg); |
330 | 13 | } |
331 | 134k | const ObjectField &field = *field_ptr; |
332 | | |
333 | 134k | if (field.hide != ObjectField::INHERIT) { |
334 | 3 | auto msg = "object comprehensions cannot have hidden fields."; |
335 | 3 | throw StaticError(next.location, msg); |
336 | 3 | } |
337 | | |
338 | 134k | if (field.kind != ObjectField::FIELD_EXPR) { |
339 | 3 | auto msg = "object comprehensions can only have [e] fields."; |
340 | 3 | throw StaticError(next.location, msg); |
341 | 3 | } |
342 | | |
343 | 134k | std::vector<ComprehensionSpec> specs; |
344 | 134k | Token last = parseComprehensionSpecs(Token::BRACE_R, next.fodder, specs, current_depth + 1); |
345 | 134k | obj = alloc->make<ObjectComprehension>( |
346 | 134k | span(tok, last), tok.fodder, fields, got_comma, specs, last.fodder); |
347 | | |
348 | 134k | return last; |
349 | 134k | } |
350 | | |
351 | 6.17M | if (!got_comma && !first) |
352 | 332 | throw StaticError(next.location, "expected a comma before next field."); |
353 | | |
354 | 6.17M | first = false; |
355 | 6.17M | got_comma = false; |
356 | | |
357 | 6.17M | switch (next.kind) { |
358 | 147k | case Token::BRACKET_L: |
359 | 5.59M | case Token::IDENTIFIER: |
360 | 5.80M | case Token::STRING_DOUBLE: |
361 | 5.89M | case Token::STRING_SINGLE: |
362 | 5.90M | case Token::STRING_BLOCK: |
363 | 5.90M | case Token::VERBATIM_STRING_DOUBLE: |
364 | 5.90M | case Token::VERBATIM_STRING_SINGLE: { |
365 | 5.90M | ObjectField::Kind kind; |
366 | 5.90M | AST *expr1 = nullptr; |
367 | 5.90M | const Identifier *id = nullptr; |
368 | 5.90M | Fodder fodder1, fodder2; |
369 | 5.90M | LocationRange idLocation; |
370 | 5.90M | if (next.kind == Token::IDENTIFIER) { |
371 | 5.45M | fodder1 = next.fodder; |
372 | 5.45M | kind = ObjectField::FIELD_ID; |
373 | 5.45M | id = alloc->makeIdentifier(next.data32()); |
374 | 5.45M | idLocation = next.location; |
375 | 5.45M | } else if (next.kind == Token::STRING_DOUBLE) { |
376 | 204k | kind = ObjectField::FIELD_STR; |
377 | 204k | expr1 = alloc->make<LiteralString>(next.location, |
378 | 204k | next.fodder, |
379 | 204k | next.data32(), |
380 | 204k | LiteralString::DOUBLE, |
381 | 204k | "", |
382 | 204k | ""); |
383 | 253k | } else if (next.kind == Token::STRING_SINGLE) { |
384 | 97.6k | kind = ObjectField::FIELD_STR; |
385 | 97.6k | expr1 = alloc->make<LiteralString>(next.location, |
386 | 97.6k | next.fodder, |
387 | 97.6k | next.data32(), |
388 | 97.6k | LiteralString::SINGLE, |
389 | 97.6k | "", |
390 | 97.6k | ""); |
391 | 155k | } else if (next.kind == Token::STRING_BLOCK) { |
392 | 4.78k | kind = ObjectField::FIELD_STR; |
393 | 4.78k | expr1 = alloc->make<LiteralString>(next.location, |
394 | 4.78k | next.fodder, |
395 | 4.78k | next.data32(), |
396 | 4.78k | LiteralString::BLOCK, |
397 | 4.78k | next.stringBlockIndent, |
398 | 4.78k | next.stringBlockTermIndent); |
399 | 150k | } else if (next.kind == Token::VERBATIM_STRING_SINGLE) { |
400 | 882 | kind = ObjectField::FIELD_STR; |
401 | 882 | expr1 = alloc->make<LiteralString>(next.location, |
402 | 882 | next.fodder, |
403 | 882 | next.data32(), |
404 | 882 | LiteralString::VERBATIM_SINGLE, |
405 | 882 | "", |
406 | 882 | ""); |
407 | 149k | } else if (next.kind == Token::VERBATIM_STRING_DOUBLE) { |
408 | 2.53k | kind = ObjectField::FIELD_STR; |
409 | 2.53k | expr1 = alloc->make<LiteralString>(next.location, |
410 | 2.53k | next.fodder, |
411 | 2.53k | next.data32(), |
412 | 2.53k | LiteralString::VERBATIM_DOUBLE, |
413 | 2.53k | "", |
414 | 2.53k | ""); |
415 | 147k | } else { |
416 | 147k | kind = ObjectField::FIELD_EXPR; |
417 | 147k | fodder1 = next.fodder; |
418 | 147k | expr1 = parse(MAX_PRECEDENCE, current_depth + 1); |
419 | 147k | Token bracket_r = popExpect(Token::BRACKET_R); |
420 | 147k | fodder2 = bracket_r.fodder; |
421 | 147k | } |
422 | | |
423 | 5.90M | bool is_method = false; |
424 | 5.90M | bool meth_comma = false; |
425 | 5.90M | ArgParams params; |
426 | 5.90M | Fodder fodder_l; |
427 | 5.90M | Fodder fodder_r; |
428 | 5.90M | if (peek().kind == Token::PAREN_L) { |
429 | 3.00M | Token paren_l = pop(); |
430 | 3.00M | fodder_l = paren_l.fodder; |
431 | 3.00M | params = parseParams("method parameter", meth_comma, fodder_r, current_depth); |
432 | 3.00M | is_method = true; |
433 | 3.00M | } |
434 | | |
435 | 5.90M | bool plus_sugar = false; |
436 | | |
437 | 5.90M | Token op = popExpect(Token::OPERATOR); |
438 | 5.90M | const char *od = op.data.c_str(); |
439 | 5.90M | if (*od == '+') { |
440 | 56.7k | plus_sugar = true; |
441 | 56.7k | od++; |
442 | 56.7k | } |
443 | 5.90M | unsigned colons = 0; |
444 | 14.8M | for (; *od != '\0'; ++od) { |
445 | 8.90M | if (*od != ':') { |
446 | 51 | throw StaticError( |
447 | 51 | next.location, |
448 | 51 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
449 | 51 | } |
450 | 8.90M | ++colons; |
451 | 8.90M | } |
452 | 5.90M | ObjectField::Hide field_hide; |
453 | 5.90M | switch (colons) { |
454 | 2.89M | case 1: field_hide = ObjectField::INHERIT; break; |
455 | | |
456 | 3.00M | case 2: field_hide = ObjectField::HIDDEN; break; |
457 | | |
458 | 101 | case 3: field_hide = ObjectField::VISIBLE; break; |
459 | | |
460 | 23 | default: |
461 | 23 | throw StaticError( |
462 | 23 | next.location, |
463 | 23 | "expected one of :, ::, :::, +:, +::, +:::, got: " + op.data); |
464 | 5.90M | } |
465 | | |
466 | | // Basic checks for invalid Jsonnet code. |
467 | 5.89M | if (is_method && plus_sugar) { |
468 | 3 | throw StaticError(next.location, |
469 | 3 | "cannot use +: syntax sugar in a method: " + next.data); |
470 | 3 | } |
471 | 5.89M | if (kind != ObjectField::FIELD_EXPR) { |
472 | 5.75M | if (!literal_fields.insert(next.data).second) { |
473 | 29 | throw StaticError(next.location, "duplicate field: " + next.data); |
474 | 29 | } |
475 | 5.75M | } |
476 | | |
477 | 5.89M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
478 | | |
479 | 5.89M | Fodder comma_fodder; |
480 | 5.89M | next = pop(); |
481 | 5.89M | if (next.kind == Token::COMMA) { |
482 | 4.94M | comma_fodder = next.fodder; |
483 | 4.94M | next = pop(); |
484 | 4.94M | got_comma = true; |
485 | 4.94M | } |
486 | 5.89M | fields.emplace_back(kind, |
487 | 5.89M | fodder1, |
488 | 5.89M | fodder2, |
489 | 5.89M | fodder_l, |
490 | 5.89M | fodder_r, |
491 | 5.89M | field_hide, |
492 | 5.89M | plus_sugar, |
493 | 5.89M | is_method, |
494 | 5.89M | expr1, |
495 | 5.89M | id, |
496 | 5.89M | idLocation, |
497 | 5.89M | params, |
498 | 5.89M | meth_comma, |
499 | 5.89M | op.fodder, |
500 | 5.89M | body, |
501 | 5.89M | nullptr, |
502 | 5.89M | comma_fodder); |
503 | 5.89M | } break; |
504 | | |
505 | 170k | case Token::LOCAL: { |
506 | 170k | Fodder local_fodder = next.fodder; |
507 | 170k | Token var_id = popExpect(Token::IDENTIFIER); |
508 | 170k | auto *id = alloc->makeIdentifier(var_id.data32()); |
509 | | |
510 | 170k | if (binds.find(id) != binds.end()) { |
511 | 7 | throw StaticError(var_id.location, "duplicate local var: " + var_id.data); |
512 | 7 | } |
513 | 170k | bool is_method = false; |
514 | 170k | bool func_comma = false; |
515 | 170k | ArgParams params; |
516 | 170k | Fodder paren_l_fodder; |
517 | 170k | Fodder paren_r_fodder; |
518 | 170k | if (peek().kind == Token::PAREN_L) { |
519 | 25.2k | Token paren_l = pop(); |
520 | 25.2k | paren_l_fodder = paren_l.fodder; |
521 | 25.2k | is_method = true; |
522 | 25.2k | params = parseParams("function parameter", func_comma, paren_r_fodder, current_depth); |
523 | 25.2k | } |
524 | 170k | Token eq = popExpect(Token::OPERATOR, "="); |
525 | 170k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
526 | 170k | binds.insert(id); |
527 | | |
528 | 170k | Fodder comma_fodder; |
529 | 170k | next = pop(); |
530 | 170k | if (next.kind == Token::COMMA) { |
531 | 168k | comma_fodder = next.fodder; |
532 | 168k | next = pop(); |
533 | 168k | got_comma = true; |
534 | 168k | } |
535 | 170k | fields.push_back(ObjectField::Local(local_fodder, |
536 | 170k | var_id.fodder, |
537 | 170k | paren_l_fodder, |
538 | 170k | paren_r_fodder, |
539 | 170k | is_method, |
540 | 170k | id, |
541 | 170k | params, |
542 | 170k | func_comma, |
543 | 170k | eq.fodder, |
544 | 170k | body, |
545 | 170k | comma_fodder)); |
546 | | |
547 | 170k | } break; |
548 | | |
549 | 96.2k | case Token::ASSERT: { |
550 | 96.2k | Fodder assert_fodder = next.fodder; |
551 | 96.2k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
552 | 96.2k | AST *msg = nullptr; |
553 | 96.2k | Fodder colon_fodder; |
554 | 96.2k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
555 | 28.6k | Token colon = pop(); |
556 | 28.6k | colon_fodder = colon.fodder; |
557 | 28.6k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
558 | 28.6k | } |
559 | | |
560 | 96.2k | Fodder comma_fodder; |
561 | 96.2k | next = pop(); |
562 | 96.2k | if (next.kind == Token::COMMA) { |
563 | 85.3k | comma_fodder = next.fodder; |
564 | 85.3k | next = pop(); |
565 | 85.3k | got_comma = true; |
566 | 85.3k | } |
567 | 96.2k | fields.push_back( |
568 | 96.2k | ObjectField::Assert(assert_fodder, cond, colon_fodder, msg, comma_fodder)); |
569 | 96.2k | } break; |
570 | | |
571 | 203 | default: throw unexpected(next, "parsing field definition"); |
572 | 6.17M | } |
573 | | |
574 | 6.17M | } while (true); |
575 | 1.50M | } |
576 | | |
577 | | /** Parses for x in expr for y in expr if expr for z in expr ... |
578 | | * |
579 | | * \param end The token that ends the comprehension (e.g. ] or }). |
580 | | * \param for_fodder Fodder before the first 'for'. |
581 | | * \param specs The comprehension specs to be populated. |
582 | | * \param current_depth Current recursion depth to prevent stack overflow. |
583 | | * \returns The closing token. |
584 | | */ |
585 | | Token parseComprehensionSpecs(Token::Kind end, Fodder for_fodder, |
586 | | std::vector<ComprehensionSpec> &specs, |
587 | | unsigned current_depth) |
588 | 906k | { |
589 | 906k | if (current_depth >= MAX_PARSER_DEPTH) { |
590 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
591 | 0 | } |
592 | | |
593 | 1.00M | while (true) { |
594 | 999k | LocationRange l; |
595 | 999k | Token id_token = popExpect(Token::IDENTIFIER); |
596 | 999k | const Identifier *id = alloc->makeIdentifier(id_token.data32()); |
597 | 999k | Token in_token = popExpect(Token::IN); |
598 | 999k | AST *arr = parse(MAX_PRECEDENCE, current_depth + 1); |
599 | 999k | specs.emplace_back( |
600 | 999k | ComprehensionSpec::FOR, for_fodder, id_token.fodder, id, in_token.fodder, arr); |
601 | | |
602 | 999k | Token maybe_if = pop(); |
603 | 1.44M | for (; maybe_if.kind == Token::IF; maybe_if = pop()) { |
604 | 445k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
605 | 445k | specs.emplace_back( |
606 | 445k | ComprehensionSpec::IF, maybe_if.fodder, Fodder{}, nullptr, Fodder{}, cond); |
607 | 445k | } |
608 | 999k | if (maybe_if.kind == end) { |
609 | 904k | return maybe_if; |
610 | 904k | } |
611 | 95.3k | if (maybe_if.kind != Token::FOR) { |
612 | 220 | std::stringstream ss; |
613 | 220 | ss << "expected for, if or " << end << " after for clause, got: " << maybe_if; |
614 | 220 | throw StaticError(maybe_if.location, ss.str()); |
615 | 220 | } |
616 | 95.1k | for_fodder = maybe_if.fodder; |
617 | 95.1k | } |
618 | 906k | } |
619 | | |
620 | | /** Parse a terminal (literal, var, import, etc.), an object declaration, unary operator, |
621 | | * or a parenthesized expression. |
622 | | * |
623 | | * \param current_depth Current recursion depth to prevent stack overflow. |
624 | | * \returns The parsed AST. |
625 | | */ |
626 | | AST *parseTerminalBracketsOrUnary(unsigned current_depth) |
627 | 112M | { |
628 | 112M | if (current_depth >= MAX_PARSER_DEPTH) { |
629 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
630 | 0 | } |
631 | | |
632 | 112M | Token tok = pop(); |
633 | 112M | switch (tok.kind) { |
634 | 0 | case Token::ASSERT: |
635 | 29 | case Token::BRACE_R: |
636 | 55 | case Token::BRACKET_R: |
637 | 140 | case Token::COMMA: |
638 | 184 | case Token::DOT: |
639 | 187 | case Token::ELSE: |
640 | 187 | case Token::ERROR: |
641 | 191 | case Token::FOR: |
642 | 191 | case Token::FUNCTION: |
643 | 191 | case Token::IF: |
644 | 204 | case Token::IN: |
645 | 204 | case Token::IMPORT: |
646 | 204 | case Token::IMPORTSTR: |
647 | 204 | case Token::IMPORTBIN: |
648 | 204 | case Token::LOCAL: |
649 | 258 | case Token::PAREN_R: |
650 | 288 | case Token::SEMICOLON: |
651 | 291 | case Token::TAILSTRICT: |
652 | 294 | case Token::THEN: throw unexpected(tok, "parsing terminal"); |
653 | | |
654 | 1.94k | case Token::END_OF_FILE: throw StaticError(tok.location, "unexpected end of file."); |
655 | | |
656 | 1.79M | case Token::OPERATOR: { |
657 | 1.79M | UnaryOp uop; |
658 | 1.79M | if (!op_is_unary(tok.data, uop)) { |
659 | 612 | std::stringstream ss; |
660 | 612 | ss << "not a unary operator: " << tok.data; |
661 | 612 | throw StaticError(tok.location, ss.str()); |
662 | 612 | } |
663 | 1.79M | AST *expr = parse(UNARY_PRECEDENCE, current_depth + 1); |
664 | 1.79M | return alloc->make<Unary>(span(tok, expr), tok.fodder, uop, expr); |
665 | 1.79M | } |
666 | 1.22M | case Token::BRACE_L: { |
667 | 1.22M | AST *obj; |
668 | 1.22M | parseObjectRemainder(obj, tok, current_depth + 1); |
669 | 1.22M | return obj; |
670 | 1.79M | } |
671 | | |
672 | 3.36M | case Token::BRACKET_L: { |
673 | 3.36M | Token next = peek(); |
674 | 3.36M | if (next.kind == Token::BRACKET_R) { |
675 | 546k | Token bracket_r = pop(); |
676 | 546k | return alloc->make<Array>( |
677 | 546k | span(tok, next), tok.fodder, Array::Elements{}, false, bracket_r.fodder); |
678 | 546k | } |
679 | 2.81M | AST *first = parse(MAX_PRECEDENCE, current_depth + 1); |
680 | 2.81M | bool got_comma = false; |
681 | 2.81M | Fodder comma_fodder; |
682 | 2.81M | next = peek(); |
683 | 2.81M | if (!got_comma && next.kind == Token::COMMA) { |
684 | 434k | Token comma = pop(); |
685 | 434k | comma_fodder = comma.fodder; |
686 | 434k | next = peek(); |
687 | 434k | got_comma = true; |
688 | 434k | } |
689 | | |
690 | 2.81M | if (next.kind == Token::FOR) { |
691 | | // It's a comprehension |
692 | 772k | Token for_token = pop(); |
693 | 772k | std::vector<ComprehensionSpec> specs; |
694 | 772k | Token last = parseComprehensionSpecs(Token::BRACKET_R, for_token.fodder, specs, current_depth + 1); |
695 | 772k | return alloc->make<ArrayComprehension>(span(tok, last), |
696 | 772k | tok.fodder, |
697 | 772k | first, |
698 | 772k | comma_fodder, |
699 | 772k | got_comma, |
700 | 772k | specs, |
701 | 772k | last.fodder); |
702 | 772k | } |
703 | | |
704 | | // Not a comprehension: It can have more elements. |
705 | 2.04M | Array::Elements elements; |
706 | 2.04M | elements.emplace_back(first, comma_fodder); |
707 | 8.30M | do { |
708 | 8.30M | if (next.kind == Token::BRACKET_R) { |
709 | 2.03M | Token bracket_r = pop(); |
710 | 2.03M | return alloc->make<Array>( |
711 | 2.03M | span(tok, next), tok.fodder, elements, got_comma, bracket_r.fodder); |
712 | 2.03M | } |
713 | 6.27M | if (!got_comma) { |
714 | 741 | std::stringstream ss; |
715 | 741 | ss << "expected a comma before next array element."; |
716 | 741 | throw StaticError(next.location, ss.str()); |
717 | 741 | } |
718 | 6.27M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
719 | 6.27M | comma_fodder.clear(); |
720 | 6.27M | got_comma = false; |
721 | 6.27M | next = peek(); |
722 | 6.27M | if (next.kind == Token::COMMA) { |
723 | 5.87M | Token comma = pop(); |
724 | 5.87M | comma_fodder = comma.fodder; |
725 | 5.87M | next = peek(); |
726 | 5.87M | got_comma = true; |
727 | 5.87M | } |
728 | 6.27M | elements.emplace_back(expr, comma_fodder); |
729 | 6.27M | } while (true); |
730 | 2.04M | } |
731 | | |
732 | 1.16M | case Token::PAREN_L: { |
733 | 1.16M | auto *inner = parse(MAX_PRECEDENCE, current_depth + 1); |
734 | 1.16M | Token close = popExpect(Token::PAREN_R); |
735 | 1.16M | return alloc->make<Parens>(span(tok, close), tok.fodder, inner, close.fodder); |
736 | 2.04M | } |
737 | | |
738 | | // Literals |
739 | 15.3M | case Token::NUMBER: return alloc->make<LiteralNumber>(span(tok), tok.fodder, tok.data); |
740 | | |
741 | 11.2M | case Token::STRING_SINGLE: |
742 | 11.2M | return alloc->make<LiteralString>( |
743 | 11.2M | span(tok), tok.fodder, tok.data32(), LiteralString::SINGLE, "", ""); |
744 | 199k | case Token::STRING_DOUBLE: |
745 | 199k | return alloc->make<LiteralString>( |
746 | 199k | span(tok), tok.fodder, tok.data32(), LiteralString::DOUBLE, "", ""); |
747 | 5.62k | case Token::STRING_BLOCK: |
748 | 5.62k | return alloc->make<LiteralString>(span(tok), |
749 | 5.62k | tok.fodder, |
750 | 5.62k | tok.data32(), |
751 | 5.62k | LiteralString::BLOCK, |
752 | 5.62k | tok.stringBlockIndent, |
753 | 5.62k | tok.stringBlockTermIndent); |
754 | 2.11k | case Token::VERBATIM_STRING_SINGLE: |
755 | 2.11k | return alloc->make<LiteralString>( |
756 | 2.11k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", ""); |
757 | 4.05k | case Token::VERBATIM_STRING_DOUBLE: |
758 | 4.05k | return alloc->make<LiteralString>( |
759 | 4.05k | span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", ""); |
760 | | |
761 | 1.18M | case Token::FALSE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false); |
762 | | |
763 | 867k | case Token::TRUE: return alloc->make<LiteralBoolean>(span(tok), tok.fodder, true); |
764 | | |
765 | 415k | case Token::NULL_LIT: return alloc->make<LiteralNull>(span(tok), tok.fodder); |
766 | | |
767 | | // Variables |
768 | 117k | case Token::DOLLAR: return alloc->make<Dollar>(span(tok), tok.fodder); |
769 | | |
770 | 75.0M | case Token::IDENTIFIER: |
771 | 75.0M | return alloc->make<Var>(span(tok), tok.fodder, alloc->makeIdentifier(tok.data32())); |
772 | | |
773 | 97.9k | case Token::SELF: return alloc->make<Self>(span(tok), tok.fodder); |
774 | | |
775 | 13.5k | case Token::SUPER: { |
776 | 13.5k | Token next = pop(); |
777 | 13.5k | AST *index = nullptr; |
778 | 13.5k | const Identifier *id = nullptr; |
779 | 13.5k | Fodder id_fodder; |
780 | 13.5k | switch (next.kind) { |
781 | 12.0k | case Token::DOT: { |
782 | 12.0k | Token field_id = popExpect(Token::IDENTIFIER); |
783 | 12.0k | id_fodder = field_id.fodder; |
784 | 12.0k | id = alloc->makeIdentifier(field_id.data32()); |
785 | 12.0k | } break; |
786 | 1.49k | case Token::BRACKET_L: { |
787 | 1.49k | index = parse(MAX_PRECEDENCE, current_depth + 1); |
788 | 1.49k | Token bracket_r = popExpect(Token::BRACKET_R); |
789 | 1.49k | id_fodder = bracket_r.fodder; // Not id_fodder, but use the same var. |
790 | 1.49k | } break; |
791 | 8 | default: throw StaticError(tok.location, "expected . or [ after super."); |
792 | 13.5k | } |
793 | 12.8k | return alloc->make<SuperIndex>( |
794 | 12.8k | span(tok), tok.fodder, next.fodder, index, id_fodder, id); |
795 | 13.5k | } |
796 | 112M | } |
797 | | |
798 | 0 | std::cerr << "INTERNAL ERROR: Unknown tok kind: " << tok.kind << std::endl; |
799 | 0 | std::abort(); |
800 | 0 | return nullptr; // Quiet, compiler. |
801 | 112M | } |
802 | | |
803 | | /** If the first token makes it clear that we will be parsing a greedy construct, return the AST. |
804 | | * Otherwise, return nullptr. Greedy constructs are those that consume as many tokens as possible |
805 | | * on the right hand side because they have no closing token. |
806 | | * |
807 | | * \param current_depth Current recursion depth to prevent stack overflow. |
808 | | * \returns The parsed AST or nullptr. |
809 | | */ |
810 | | AST *maybeParseGreedy(unsigned current_depth) |
811 | 128M | { |
812 | 128M | if (current_depth >= MAX_PARSER_DEPTH) { |
813 | 9 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
814 | 9 | } |
815 | | |
816 | | // Allocate this on the heap to control stack growth. |
817 | 128M | std::unique_ptr<Token> begin_(new Token(peek())); |
818 | 128M | const Token &begin = *begin_; |
819 | | |
820 | 128M | switch (begin.kind) { |
821 | | // These cases have effectively MAX_PRECEDENCE as the first |
822 | | // call to parse will parse them. |
823 | 990k | case Token::ASSERT: { |
824 | 990k | pop(); |
825 | 990k | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
826 | 990k | Fodder colonFodder; |
827 | 990k | AST *msg = nullptr; |
828 | 990k | if (peek().kind == Token::OPERATOR && peek().data == ":") { |
829 | 916k | Token colon = pop(); |
830 | 916k | colonFodder = colon.fodder; |
831 | 916k | msg = parse(MAX_PRECEDENCE, current_depth + 1); |
832 | 916k | } |
833 | 990k | Token semicolon = popExpect(Token::SEMICOLON); |
834 | 990k | AST *rest = parse(MAX_PRECEDENCE, current_depth + 1); |
835 | 990k | return alloc->make<Assert>(span(begin, rest), |
836 | 990k | begin.fodder, |
837 | 990k | cond, |
838 | 990k | colonFodder, |
839 | 990k | msg, |
840 | 990k | semicolon.fodder, |
841 | 990k | rest); |
842 | 0 | } |
843 | | |
844 | 1.52M | case Token::ERROR: { |
845 | 1.52M | pop(); |
846 | 1.52M | AST *expr = parse(MAX_PRECEDENCE, current_depth + 1); |
847 | 1.52M | return alloc->make<Error>(span(begin, expr), begin.fodder, expr); |
848 | 0 | } |
849 | | |
850 | 7.79M | case Token::IF: { |
851 | 7.79M | pop(); |
852 | 7.79M | AST *cond = parse(MAX_PRECEDENCE, current_depth + 1); |
853 | 7.79M | Token then = popExpect(Token::THEN); |
854 | 7.79M | AST *branch_true = parse(MAX_PRECEDENCE, current_depth + 1); |
855 | 7.79M | if (peek().kind == Token::ELSE) { |
856 | 7.70M | Token else_ = pop(); |
857 | 7.70M | AST *branch_false = parse(MAX_PRECEDENCE, current_depth + 1); |
858 | 7.70M | return alloc->make<Conditional>(span(begin, branch_false), |
859 | 7.70M | begin.fodder, |
860 | 7.70M | cond, |
861 | 7.70M | then.fodder, |
862 | 7.70M | branch_true, |
863 | 7.70M | else_.fodder, |
864 | 7.70M | branch_false); |
865 | 7.70M | } |
866 | 97.3k | return alloc->make<Conditional>(span(begin, branch_true), |
867 | 97.3k | begin.fodder, |
868 | 97.3k | cond, |
869 | 97.3k | then.fodder, |
870 | 97.3k | branch_true, |
871 | 97.3k | Fodder{}, |
872 | 97.3k | nullptr); |
873 | 7.79M | } |
874 | | |
875 | 484k | case Token::FUNCTION: { |
876 | 484k | pop(); // Still available in 'begin'. |
877 | 484k | Token paren_l = pop(); |
878 | 484k | if (paren_l.kind == Token::PAREN_L) { |
879 | 484k | std::vector<AST *> params_asts; |
880 | 484k | bool got_comma; |
881 | 484k | Fodder paren_r_fodder; |
882 | 484k | ArgParams params = parseParams("function parameter", got_comma, paren_r_fodder, current_depth); |
883 | 484k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
884 | 484k | return alloc->make<Function>(span(begin, body), |
885 | 484k | begin.fodder, |
886 | 484k | paren_l.fodder, |
887 | 484k | params, |
888 | 484k | got_comma, |
889 | 484k | paren_r_fodder, |
890 | 484k | body); |
891 | 484k | } else { |
892 | 18 | std::stringstream ss; |
893 | 18 | ss << "expected ( but got " << paren_l; |
894 | 18 | throw StaticError(paren_l.location, ss.str()); |
895 | 18 | } |
896 | 484k | } |
897 | | |
898 | 1.90k | case Token::IMPORT: { |
899 | 1.90k | pop(); |
900 | 1.90k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
901 | 1.90k | if (body->type == AST_LITERAL_STRING) { |
902 | 1.19k | auto *lit = static_cast<LiteralString *>(body); |
903 | 1.19k | if (lit->tokenKind == LiteralString::BLOCK) { |
904 | 3 | throw StaticError(lit->location, |
905 | 3 | "Cannot use text blocks in import statements."); |
906 | 3 | } |
907 | 1.19k | return alloc->make<Import>(span(begin, body), begin.fodder, lit); |
908 | 1.19k | } else { |
909 | 702 | std::stringstream ss; |
910 | 702 | ss << "computed imports are not allowed."; |
911 | 702 | throw StaticError(body->location, ss.str()); |
912 | 702 | } |
913 | 1.90k | } |
914 | | |
915 | 3.24k | case Token::IMPORTSTR: { |
916 | 3.24k | pop(); |
917 | 3.24k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
918 | 3.24k | if (body->type == AST_LITERAL_STRING) { |
919 | 2.89k | auto *lit = static_cast<LiteralString *>(body); |
920 | 2.89k | if (lit->tokenKind == LiteralString::BLOCK) { |
921 | 2 | throw StaticError(lit->location, |
922 | 2 | "Cannot use text blocks in import statements."); |
923 | 2 | } |
924 | 2.89k | return alloc->make<Importstr>(span(begin, body), begin.fodder, lit); |
925 | 2.89k | } else { |
926 | 348 | std::stringstream ss; |
927 | 348 | ss << "computed imports are not allowed."; |
928 | 348 | throw StaticError(body->location, ss.str()); |
929 | 348 | } |
930 | 3.24k | } |
931 | | |
932 | 1.29k | case Token::IMPORTBIN: { |
933 | 1.29k | pop(); |
934 | 1.29k | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
935 | 1.29k | if (body->type == AST_LITERAL_STRING) { |
936 | 1.05k | auto *lit = static_cast<LiteralString *>(body); |
937 | 1.05k | if (lit->tokenKind == LiteralString::BLOCK) { |
938 | 0 | throw StaticError(lit->location, |
939 | 0 | "Cannot use text blocks in import statements."); |
940 | 0 | } |
941 | 1.05k | return alloc->make<Importbin>(span(begin, body), begin.fodder, lit); |
942 | 1.05k | } else { |
943 | 242 | std::stringstream ss; |
944 | 242 | ss << "computed imports are not allowed."; |
945 | 242 | throw StaticError(body->location, ss.str()); |
946 | 242 | } |
947 | 1.29k | } |
948 | | |
949 | 5.77M | case Token::LOCAL: { |
950 | 5.77M | pop(); |
951 | 5.77M | Local::Binds binds; |
952 | 6.09M | do { |
953 | 6.09M | Token delim = parseBind(binds, current_depth + 1); |
954 | 6.09M | if (delim.kind != Token::SEMICOLON && delim.kind != Token::COMMA) { |
955 | 40 | std::stringstream ss; |
956 | 40 | ss << "expected , or ; but got " << delim; |
957 | 40 | throw StaticError(delim.location, ss.str()); |
958 | 40 | } |
959 | 6.09M | if (delim.kind == Token::SEMICOLON) |
960 | 5.77M | break; |
961 | 6.09M | } while (true); |
962 | 5.77M | AST *body = parse(MAX_PRECEDENCE, current_depth + 1); |
963 | 5.77M | return alloc->make<Local>(span(begin, body), begin.fodder, binds, body); |
964 | 5.77M | } |
965 | | |
966 | 112M | default: |
967 | 112M | return nullptr; |
968 | 128M | } |
969 | 128M | } |
970 | | |
971 | | |
972 | | /** Parse a general expression. |
973 | | * |
974 | | * Consume infix tokens up to (but not including) max_precedence, then stop. |
975 | | * \param max_precedence The maximum precedence to consider. |
976 | | * \param current_depth Current recursion depth to prevent stack overflow. |
977 | | * \returns The parsed AST. |
978 | | */ |
979 | | AST *parse(unsigned max_precedence, unsigned current_depth) |
980 | 128M | { |
981 | 128M | if (current_depth >= MAX_PARSER_DEPTH) { |
982 | 11 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
983 | 11 | } |
984 | | |
985 | 128M | AST *ast = maybeParseGreedy(current_depth + 1); |
986 | | // There cannot be an operator after a greedy parse. |
987 | 128M | if (ast != nullptr) return ast; |
988 | | |
989 | | // If we get here, we could be parsing an infix construct. |
990 | | |
991 | | // Allocate this on the heap to control stack growth. |
992 | 112M | std::unique_ptr<Token> begin_(new Token(peek())); |
993 | 112M | const Token &begin = *begin_; |
994 | | |
995 | 112M | AST *lhs = parseTerminalBracketsOrUnary(current_depth + 1); |
996 | | |
997 | 112M | return parseInfix(lhs, begin, max_precedence, current_depth + 1); |
998 | 128M | } |
999 | | |
1000 | | /** Parse infix operators (binary operators, indexing, function calls). |
1001 | | * |
1002 | | * \param lhs Left-hand side of the operator. |
1003 | | * \param begin The token representing the beginning of the expression. |
1004 | | * \param max_precedence The maximum precedence to consider. |
1005 | | * \param current_depth Current recursion depth to prevent stack overflow. |
1006 | | * \returns The parsed AST. |
1007 | | */ |
1008 | | AST *parseInfix(AST *lhs, const Token &begin, unsigned max_precedence, unsigned current_depth) |
1009 | 111M | { |
1010 | 111M | if (current_depth >= MAX_PARSER_DEPTH) { |
1011 | 0 | throw StaticError(peek().location, "Exceeded maximum parse depth limit."); |
1012 | 0 | } |
1013 | | |
1014 | 173M | while (true) { |
1015 | | |
1016 | 173M | BinaryOp bop = BOP_PLUS; |
1017 | 173M | unsigned op_precedence = 0; |
1018 | | |
1019 | 173M | switch (peek().kind) { |
1020 | | // Logical / arithmetic binary operator. |
1021 | 8.29k | case Token::IN: |
1022 | 26.8M | case Token::OPERATOR: |
1023 | | // These occur if the outer statement was an assert or array slice. |
1024 | | // Either way, we terminate the parsing here. |
1025 | 26.8M | if (peek().data == ":" || peek().data == "::") { |
1026 | 1.93M | return lhs; |
1027 | 1.93M | } |
1028 | 24.9M | if (!op_is_binary(peek().data, bop)) { |
1029 | 487 | std::stringstream ss; |
1030 | 487 | ss << "not a binary operator: " << peek().data; |
1031 | 487 | throw StaticError(peek().location, ss.str()); |
1032 | 487 | } |
1033 | 24.9M | op_precedence = precedence_map[bop]; |
1034 | 24.9M | break; |
1035 | | |
1036 | | // Index, Apply |
1037 | 17.2M | case Token::DOT: |
1038 | 21.3M | case Token::BRACKET_L: |
1039 | 40.8M | case Token::PAREN_L: |
1040 | 41.1M | case Token::BRACE_L: |
1041 | 41.1M | op_precedence = APPLY_PRECEDENCE; |
1042 | 41.1M | break; |
1043 | | |
1044 | 105M | default: |
1045 | | // This happens when we reach EOF or the terminating token of an outer context. |
1046 | 105M | return lhs; |
1047 | 173M | } |
1048 | | |
1049 | | // If higher precedence than the outer recursive call, let that handle it. |
1050 | 66.1M | if (op_precedence >= max_precedence) |
1051 | 4.08M | return lhs; |
1052 | | |
1053 | 62.0M | Token op = pop(); |
1054 | | |
1055 | 62.0M | switch (op.kind) { |
1056 | 4.13M | case Token::BRACKET_L: { |
1057 | 4.13M | bool is_slice; |
1058 | 4.13M | AST *first = nullptr; |
1059 | 4.13M | Fodder second_fodder; |
1060 | 4.13M | AST *second = nullptr; |
1061 | 4.13M | Fodder third_fodder; |
1062 | 4.13M | AST *third = nullptr; |
1063 | | |
1064 | 4.13M | if (peek().kind == Token::BRACKET_R) |
1065 | 5 | throw unexpected(pop(), "parsing index"); |
1066 | | |
1067 | 4.13M | if (peek().data != ":" && peek().data != "::") { |
1068 | 4.07M | first = parse(MAX_PRECEDENCE, current_depth + 1); |
1069 | 4.07M | } |
1070 | | |
1071 | 4.13M | if (peek().kind == Token::OPERATOR && peek().data == "::") { |
1072 | | // Handle :: |
1073 | 2.69k | is_slice = true; |
1074 | 2.69k | Token joined = pop(); |
1075 | 2.69k | second_fodder = joined.fodder; |
1076 | | |
1077 | 2.69k | if (peek().kind != Token::BRACKET_R) |
1078 | 2.00k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1079 | | |
1080 | 4.13M | } else if (peek().kind != Token::BRACKET_R) { |
1081 | 445k | is_slice = true; |
1082 | 445k | Token delim = pop(); |
1083 | 445k | if (delim.data != ":") |
1084 | 370 | throw unexpected(delim, "parsing slice"); |
1085 | | |
1086 | 444k | second_fodder = delim.fodder; |
1087 | | |
1088 | 444k | if (peek().data != ":" && peek().kind != Token::BRACKET_R) |
1089 | 197k | second = parse(MAX_PRECEDENCE, current_depth + 1); |
1090 | | |
1091 | 444k | if (peek().kind != Token::BRACKET_R) { |
1092 | 26.7k | Token delim = pop(); |
1093 | 26.7k | if (delim.data != ":") |
1094 | 111 | throw unexpected(delim, "parsing slice"); |
1095 | | |
1096 | 26.6k | third_fodder = delim.fodder; |
1097 | | |
1098 | 26.6k | if (peek().kind != Token::BRACKET_R) |
1099 | 26.1k | third = parse(MAX_PRECEDENCE, current_depth + 1); |
1100 | 26.6k | } |
1101 | 3.68M | } else { |
1102 | 3.68M | is_slice = false; |
1103 | 3.68M | } |
1104 | 4.13M | Token end = popExpect(Token::BRACKET_R); |
1105 | 4.13M | lhs = alloc->make<Index>(span(begin, end), |
1106 | 4.13M | EMPTY_FODDER, |
1107 | 4.13M | lhs, |
1108 | 4.13M | op.fodder, |
1109 | 4.13M | is_slice, |
1110 | 4.13M | first, |
1111 | 4.13M | second_fodder, |
1112 | 4.13M | second, |
1113 | 4.13M | third_fodder, |
1114 | 4.13M | third, |
1115 | 4.13M | end.fodder); |
1116 | 4.13M | break; |
1117 | 4.13M | } |
1118 | 17.2M | case Token::DOT: { |
1119 | 17.2M | Token field_id = popExpect(Token::IDENTIFIER); |
1120 | 17.2M | const Identifier *id = alloc->makeIdentifier(field_id.data32()); |
1121 | 17.2M | lhs = alloc->make<Index>(span(begin, field_id), |
1122 | 17.2M | EMPTY_FODDER, |
1123 | 17.2M | lhs, |
1124 | 17.2M | op.fodder, |
1125 | 17.2M | field_id.fodder, |
1126 | 17.2M | id); |
1127 | 17.2M | break; |
1128 | 4.13M | } |
1129 | 19.5M | case Token::PAREN_L: { |
1130 | 19.5M | ArgParams args; |
1131 | 19.5M | bool got_comma; |
1132 | 19.5M | Token end = parseArgs(args, "function argument", got_comma, current_depth); |
1133 | 19.5M | bool got_named = false; |
1134 | 32.6M | for (const auto& arg : args) { |
1135 | 32.6M | if (arg.id != nullptr) { |
1136 | 76.7k | got_named = true; |
1137 | 32.5M | } else { |
1138 | 32.5M | if (got_named) { |
1139 | 10 | throw StaticError(arg.expr->location, "Positional argument after a named argument is not allowed"); |
1140 | 10 | } |
1141 | 32.5M | } |
1142 | 32.6M | } |
1143 | 19.5M | bool tailstrict = false; |
1144 | 19.5M | Fodder tailstrict_fodder; |
1145 | 19.5M | if (peek().kind == Token::TAILSTRICT) { |
1146 | 962k | Token tailstrict_token = pop(); |
1147 | 962k | tailstrict_fodder = tailstrict_token.fodder; |
1148 | 962k | tailstrict = true; |
1149 | 962k | } |
1150 | 19.5M | lhs = alloc->make<Apply>(span(begin, end), |
1151 | 19.5M | EMPTY_FODDER, |
1152 | 19.5M | lhs, |
1153 | 19.5M | op.fodder, |
1154 | 19.5M | args, |
1155 | 19.5M | got_comma, |
1156 | 19.5M | end.fodder, |
1157 | 19.5M | tailstrict_fodder, |
1158 | 19.5M | tailstrict); |
1159 | 19.5M | break; |
1160 | 19.5M | } |
1161 | 281k | case Token::BRACE_L: { |
1162 | 281k | AST *obj; |
1163 | 281k | Token end = parseObjectRemainder(obj, op, current_depth + 1); |
1164 | 281k | lhs = alloc->make<ApplyBrace>(span(begin, end), EMPTY_FODDER, lhs, obj); |
1165 | 281k | break; |
1166 | 19.5M | } |
1167 | | |
1168 | 6.88k | case Token::IN: { |
1169 | 6.88k | if (peek().kind == Token::SUPER) { |
1170 | 578 | Token super = pop(); |
1171 | 578 | lhs = alloc->make<InSuper>( |
1172 | 578 | span(begin, super), EMPTY_FODDER, lhs, op.fodder, super.fodder); |
1173 | 6.31k | } else { |
1174 | 6.31k | AST *rhs = parse(op_precedence, current_depth + 1); |
1175 | 6.31k | lhs = alloc->make<Binary>( |
1176 | 6.31k | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1177 | 6.31k | } |
1178 | 6.88k | break; |
1179 | 19.5M | } |
1180 | | |
1181 | 20.8M | case Token::OPERATOR: { |
1182 | 20.8M | AST *rhs = parse(op_precedence, current_depth + 1); |
1183 | 20.8M | lhs = alloc->make<Binary>( |
1184 | 20.8M | span(begin, rhs), EMPTY_FODDER, lhs, op.fodder, bop, rhs); |
1185 | 20.8M | break; |
1186 | 19.5M | } |
1187 | | |
1188 | 0 | default: { |
1189 | 0 | std::cerr << "Should not be here." << std::endl; |
1190 | 0 | abort(); |
1191 | 19.5M | } |
1192 | 62.0M | } |
1193 | 62.0M | } |
1194 | | |
1195 | | // (1 & ((1 + (1 * 1)) + 1)) & 1 |
1196 | | // |
1197 | | // |
1198 | | |
1199 | | /* |
1200 | | // Allocate this on the heap to control stack growth. |
1201 | | std::unique_ptr<Token> begin_(new Token(peek())); |
1202 | | const Token &begin = *begin_; |
1203 | | */ |
1204 | 111M | } |
1205 | | }; |
1206 | | |
1207 | | } // namespace |
1208 | | |
1209 | | AST *jsonnet_parse(Allocator *alloc, Tokens &tokens) |
1210 | 46.1k | { |
1211 | 46.1k | Parser parser(tokens, alloc); |
1212 | 46.1k | unsigned parse_depth = 0; |
1213 | 46.1k | AST *expr = parser.parse(MAX_PRECEDENCE, parse_depth); |
1214 | 46.1k | if (tokens.front().kind != Token::END_OF_FILE) { |
1215 | 671 | std::stringstream ss; |
1216 | 671 | ss << "did not expect: " << tokens.front(); |
1217 | 671 | throw StaticError(tokens.front().location, ss.str()); |
1218 | 671 | } |
1219 | | |
1220 | 45.4k | return expr; |
1221 | 46.1k | } |
1222 | | |
1223 | | } // namespace jsonnet::internal |