/src/Python-3.8.3/Python/ast.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This file includes functions to transform a concrete syntax tree (CST) to |
3 | | * an abstract syntax tree (AST). The main function is PyAST_FromNode(). |
4 | | * |
5 | | */ |
6 | | #include "Python.h" |
7 | | #include "Python-ast.h" |
8 | | #include "node.h" |
9 | | #include "ast.h" |
10 | | #include "token.h" |
11 | | #include "pythonrun.h" |
12 | | |
13 | | #include <assert.h> |
14 | | #include <stdbool.h> |
15 | | |
16 | 0 | #define MAXLEVEL 200 /* Max parentheses level */ |
17 | | |
18 | | static int validate_stmts(asdl_seq *); |
19 | | static int validate_exprs(asdl_seq *, expr_context_ty, int); |
20 | | static int validate_nonempty_seq(asdl_seq *, const char *, const char *); |
21 | | static int validate_stmt(stmt_ty); |
22 | | static int validate_expr(expr_ty, expr_context_ty); |
23 | | |
24 | | static int |
25 | | validate_comprehension(asdl_seq *gens) |
26 | 0 | { |
27 | 0 | Py_ssize_t i; |
28 | 0 | if (!asdl_seq_LEN(gens)) { |
29 | 0 | PyErr_SetString(PyExc_ValueError, "comprehension with no generators"); |
30 | 0 | return 0; |
31 | 0 | } |
32 | 0 | for (i = 0; i < asdl_seq_LEN(gens); i++) { |
33 | 0 | comprehension_ty comp = asdl_seq_GET(gens, i); |
34 | 0 | if (!validate_expr(comp->target, Store) || |
35 | 0 | !validate_expr(comp->iter, Load) || |
36 | 0 | !validate_exprs(comp->ifs, Load, 0)) |
37 | 0 | return 0; |
38 | 0 | } |
39 | 0 | return 1; |
40 | 0 | } |
41 | | |
42 | | static int |
43 | | validate_slice(slice_ty slice) |
44 | 0 | { |
45 | 0 | switch (slice->kind) { |
46 | 0 | case Slice_kind: |
47 | 0 | return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) && |
48 | 0 | (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) && |
49 | 0 | (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load)); |
50 | 0 | case ExtSlice_kind: { |
51 | 0 | Py_ssize_t i; |
52 | 0 | if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice")) |
53 | 0 | return 0; |
54 | 0 | for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++) |
55 | 0 | if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i))) |
56 | 0 | return 0; |
57 | 0 | return 1; |
58 | 0 | } |
59 | 0 | case Index_kind: |
60 | 0 | return validate_expr(slice->v.Index.value, Load); |
61 | 0 | default: |
62 | 0 | PyErr_SetString(PyExc_SystemError, "unknown slice node"); |
63 | 0 | return 0; |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | | static int |
68 | | validate_keywords(asdl_seq *keywords) |
69 | 0 | { |
70 | 0 | Py_ssize_t i; |
71 | 0 | for (i = 0; i < asdl_seq_LEN(keywords); i++) |
72 | 0 | if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load)) |
73 | 0 | return 0; |
74 | 0 | return 1; |
75 | 0 | } |
76 | | |
77 | | static int |
78 | | validate_args(asdl_seq *args) |
79 | 0 | { |
80 | 0 | Py_ssize_t i; |
81 | 0 | for (i = 0; i < asdl_seq_LEN(args); i++) { |
82 | 0 | arg_ty arg = asdl_seq_GET(args, i); |
83 | 0 | if (arg->annotation && !validate_expr(arg->annotation, Load)) |
84 | 0 | return 0; |
85 | 0 | } |
86 | 0 | return 1; |
87 | 0 | } |
88 | | |
89 | | static const char * |
90 | | expr_context_name(expr_context_ty ctx) |
91 | 0 | { |
92 | 0 | switch (ctx) { |
93 | 0 | case Load: |
94 | 0 | return "Load"; |
95 | 0 | case Store: |
96 | 0 | return "Store"; |
97 | 0 | case Del: |
98 | 0 | return "Del"; |
99 | 0 | case AugLoad: |
100 | 0 | return "AugLoad"; |
101 | 0 | case AugStore: |
102 | 0 | return "AugStore"; |
103 | 0 | case Param: |
104 | 0 | return "Param"; |
105 | 0 | default: |
106 | 0 | Py_UNREACHABLE(); |
107 | 0 | } |
108 | 0 | } |
109 | | |
110 | | static int |
111 | | validate_arguments(arguments_ty args) |
112 | 0 | { |
113 | 0 | if (!validate_args(args->posonlyargs) || !validate_args(args->args)) { |
114 | 0 | return 0; |
115 | 0 | } |
116 | 0 | if (args->vararg && args->vararg->annotation |
117 | 0 | && !validate_expr(args->vararg->annotation, Load)) { |
118 | 0 | return 0; |
119 | 0 | } |
120 | 0 | if (!validate_args(args->kwonlyargs)) |
121 | 0 | return 0; |
122 | 0 | if (args->kwarg && args->kwarg->annotation |
123 | 0 | && !validate_expr(args->kwarg->annotation, Load)) { |
124 | 0 | return 0; |
125 | 0 | } |
126 | 0 | if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) { |
127 | 0 | PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments"); |
128 | 0 | return 0; |
129 | 0 | } |
130 | 0 | if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) { |
131 | 0 | PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as " |
132 | 0 | "kw_defaults on arguments"); |
133 | 0 | return 0; |
134 | 0 | } |
135 | 0 | return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1); |
136 | 0 | } |
137 | | |
138 | | static int |
139 | | validate_constant(PyObject *value) |
140 | 0 | { |
141 | 0 | if (value == Py_None || value == Py_Ellipsis) |
142 | 0 | return 1; |
143 | | |
144 | 0 | if (PyLong_CheckExact(value) |
145 | 0 | || PyFloat_CheckExact(value) |
146 | 0 | || PyComplex_CheckExact(value) |
147 | 0 | || PyBool_Check(value) |
148 | 0 | || PyUnicode_CheckExact(value) |
149 | 0 | || PyBytes_CheckExact(value)) |
150 | 0 | return 1; |
151 | | |
152 | 0 | if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) { |
153 | 0 | PyObject *it; |
154 | |
|
155 | 0 | it = PyObject_GetIter(value); |
156 | 0 | if (it == NULL) |
157 | 0 | return 0; |
158 | | |
159 | 0 | while (1) { |
160 | 0 | PyObject *item = PyIter_Next(it); |
161 | 0 | if (item == NULL) { |
162 | 0 | if (PyErr_Occurred()) { |
163 | 0 | Py_DECREF(it); |
164 | 0 | return 0; |
165 | 0 | } |
166 | 0 | break; |
167 | 0 | } |
168 | | |
169 | 0 | if (!validate_constant(item)) { |
170 | 0 | Py_DECREF(it); |
171 | 0 | Py_DECREF(item); |
172 | 0 | return 0; |
173 | 0 | } |
174 | 0 | Py_DECREF(item); |
175 | 0 | } |
176 | | |
177 | 0 | Py_DECREF(it); |
178 | 0 | return 1; |
179 | 0 | } |
180 | | |
181 | 0 | return 0; |
182 | 0 | } |
183 | | |
184 | | static int |
185 | | validate_expr(expr_ty exp, expr_context_ty ctx) |
186 | 0 | { |
187 | 0 | int check_ctx = 1; |
188 | 0 | expr_context_ty actual_ctx; |
189 | | |
190 | | /* First check expression context. */ |
191 | 0 | switch (exp->kind) { |
192 | 0 | case Attribute_kind: |
193 | 0 | actual_ctx = exp->v.Attribute.ctx; |
194 | 0 | break; |
195 | 0 | case Subscript_kind: |
196 | 0 | actual_ctx = exp->v.Subscript.ctx; |
197 | 0 | break; |
198 | 0 | case Starred_kind: |
199 | 0 | actual_ctx = exp->v.Starred.ctx; |
200 | 0 | break; |
201 | 0 | case Name_kind: |
202 | 0 | actual_ctx = exp->v.Name.ctx; |
203 | 0 | break; |
204 | 0 | case List_kind: |
205 | 0 | actual_ctx = exp->v.List.ctx; |
206 | 0 | break; |
207 | 0 | case Tuple_kind: |
208 | 0 | actual_ctx = exp->v.Tuple.ctx; |
209 | 0 | break; |
210 | 0 | default: |
211 | 0 | if (ctx != Load) { |
212 | 0 | PyErr_Format(PyExc_ValueError, "expression which can't be " |
213 | 0 | "assigned to in %s context", expr_context_name(ctx)); |
214 | 0 | return 0; |
215 | 0 | } |
216 | 0 | check_ctx = 0; |
217 | | /* set actual_ctx to prevent gcc warning */ |
218 | 0 | actual_ctx = 0; |
219 | 0 | } |
220 | 0 | if (check_ctx && actual_ctx != ctx) { |
221 | 0 | PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead", |
222 | 0 | expr_context_name(ctx), expr_context_name(actual_ctx)); |
223 | 0 | return 0; |
224 | 0 | } |
225 | | |
226 | | /* Now validate expression. */ |
227 | 0 | switch (exp->kind) { |
228 | 0 | case BoolOp_kind: |
229 | 0 | if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) { |
230 | 0 | PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values"); |
231 | 0 | return 0; |
232 | 0 | } |
233 | 0 | return validate_exprs(exp->v.BoolOp.values, Load, 0); |
234 | 0 | case BinOp_kind: |
235 | 0 | return validate_expr(exp->v.BinOp.left, Load) && |
236 | 0 | validate_expr(exp->v.BinOp.right, Load); |
237 | 0 | case UnaryOp_kind: |
238 | 0 | return validate_expr(exp->v.UnaryOp.operand, Load); |
239 | 0 | case Lambda_kind: |
240 | 0 | return validate_arguments(exp->v.Lambda.args) && |
241 | 0 | validate_expr(exp->v.Lambda.body, Load); |
242 | 0 | case IfExp_kind: |
243 | 0 | return validate_expr(exp->v.IfExp.test, Load) && |
244 | 0 | validate_expr(exp->v.IfExp.body, Load) && |
245 | 0 | validate_expr(exp->v.IfExp.orelse, Load); |
246 | 0 | case Dict_kind: |
247 | 0 | if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) { |
248 | 0 | PyErr_SetString(PyExc_ValueError, |
249 | 0 | "Dict doesn't have the same number of keys as values"); |
250 | 0 | return 0; |
251 | 0 | } |
252 | | /* null_ok=1 for keys expressions to allow dict unpacking to work in |
253 | | dict literals, i.e. ``{**{a:b}}`` */ |
254 | 0 | return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) && |
255 | 0 | validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0); |
256 | 0 | case Set_kind: |
257 | 0 | return validate_exprs(exp->v.Set.elts, Load, 0); |
258 | 0 | #define COMP(NAME) \ |
259 | 0 | case NAME ## _kind: \ |
260 | 0 | return validate_comprehension(exp->v.NAME.generators) && \ |
261 | 0 | validate_expr(exp->v.NAME.elt, Load); |
262 | 0 | COMP(ListComp) |
263 | 0 | COMP(SetComp) |
264 | 0 | COMP(GeneratorExp) |
265 | 0 | #undef COMP |
266 | 0 | case DictComp_kind: |
267 | 0 | return validate_comprehension(exp->v.DictComp.generators) && |
268 | 0 | validate_expr(exp->v.DictComp.key, Load) && |
269 | 0 | validate_expr(exp->v.DictComp.value, Load); |
270 | 0 | case Yield_kind: |
271 | 0 | return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load); |
272 | 0 | case YieldFrom_kind: |
273 | 0 | return validate_expr(exp->v.YieldFrom.value, Load); |
274 | 0 | case Await_kind: |
275 | 0 | return validate_expr(exp->v.Await.value, Load); |
276 | 0 | case Compare_kind: |
277 | 0 | if (!asdl_seq_LEN(exp->v.Compare.comparators)) { |
278 | 0 | PyErr_SetString(PyExc_ValueError, "Compare with no comparators"); |
279 | 0 | return 0; |
280 | 0 | } |
281 | 0 | if (asdl_seq_LEN(exp->v.Compare.comparators) != |
282 | 0 | asdl_seq_LEN(exp->v.Compare.ops)) { |
283 | 0 | PyErr_SetString(PyExc_ValueError, "Compare has a different number " |
284 | 0 | "of comparators and operands"); |
285 | 0 | return 0; |
286 | 0 | } |
287 | 0 | return validate_exprs(exp->v.Compare.comparators, Load, 0) && |
288 | 0 | validate_expr(exp->v.Compare.left, Load); |
289 | 0 | case Call_kind: |
290 | 0 | return validate_expr(exp->v.Call.func, Load) && |
291 | 0 | validate_exprs(exp->v.Call.args, Load, 0) && |
292 | 0 | validate_keywords(exp->v.Call.keywords); |
293 | 0 | case Constant_kind: |
294 | 0 | if (!validate_constant(exp->v.Constant.value)) { |
295 | 0 | PyErr_Format(PyExc_TypeError, |
296 | 0 | "got an invalid type in Constant: %s", |
297 | 0 | Py_TYPE(exp->v.Constant.value)->tp_name); |
298 | 0 | return 0; |
299 | 0 | } |
300 | 0 | return 1; |
301 | 0 | case JoinedStr_kind: |
302 | 0 | return validate_exprs(exp->v.JoinedStr.values, Load, 0); |
303 | 0 | case FormattedValue_kind: |
304 | 0 | if (validate_expr(exp->v.FormattedValue.value, Load) == 0) |
305 | 0 | return 0; |
306 | 0 | if (exp->v.FormattedValue.format_spec) |
307 | 0 | return validate_expr(exp->v.FormattedValue.format_spec, Load); |
308 | 0 | return 1; |
309 | 0 | case Attribute_kind: |
310 | 0 | return validate_expr(exp->v.Attribute.value, Load); |
311 | 0 | case Subscript_kind: |
312 | 0 | return validate_slice(exp->v.Subscript.slice) && |
313 | 0 | validate_expr(exp->v.Subscript.value, Load); |
314 | 0 | case Starred_kind: |
315 | 0 | return validate_expr(exp->v.Starred.value, ctx); |
316 | 0 | case List_kind: |
317 | 0 | return validate_exprs(exp->v.List.elts, ctx, 0); |
318 | 0 | case Tuple_kind: |
319 | 0 | return validate_exprs(exp->v.Tuple.elts, ctx, 0); |
320 | 0 | case NamedExpr_kind: |
321 | 0 | return validate_expr(exp->v.NamedExpr.value, Load); |
322 | | /* This last case doesn't have any checking. */ |
323 | 0 | case Name_kind: |
324 | 0 | return 1; |
325 | 0 | } |
326 | 0 | PyErr_SetString(PyExc_SystemError, "unexpected expression"); |
327 | 0 | return 0; |
328 | 0 | } |
329 | | |
330 | | static int |
331 | | validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner) |
332 | 0 | { |
333 | 0 | if (asdl_seq_LEN(seq)) |
334 | 0 | return 1; |
335 | 0 | PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner); |
336 | 0 | return 0; |
337 | 0 | } |
338 | | |
339 | | static int |
340 | | validate_assignlist(asdl_seq *targets, expr_context_ty ctx) |
341 | 0 | { |
342 | 0 | return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") && |
343 | 0 | validate_exprs(targets, ctx, 0); |
344 | 0 | } |
345 | | |
346 | | static int |
347 | | validate_body(asdl_seq *body, const char *owner) |
348 | 0 | { |
349 | 0 | return validate_nonempty_seq(body, "body", owner) && validate_stmts(body); |
350 | 0 | } |
351 | | |
352 | | static int |
353 | | validate_stmt(stmt_ty stmt) |
354 | 0 | { |
355 | 0 | Py_ssize_t i; |
356 | 0 | switch (stmt->kind) { |
357 | 0 | case FunctionDef_kind: |
358 | 0 | return validate_body(stmt->v.FunctionDef.body, "FunctionDef") && |
359 | 0 | validate_arguments(stmt->v.FunctionDef.args) && |
360 | 0 | validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) && |
361 | 0 | (!stmt->v.FunctionDef.returns || |
362 | 0 | validate_expr(stmt->v.FunctionDef.returns, Load)); |
363 | 0 | case ClassDef_kind: |
364 | 0 | return validate_body(stmt->v.ClassDef.body, "ClassDef") && |
365 | 0 | validate_exprs(stmt->v.ClassDef.bases, Load, 0) && |
366 | 0 | validate_keywords(stmt->v.ClassDef.keywords) && |
367 | 0 | validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0); |
368 | 0 | case Return_kind: |
369 | 0 | return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load); |
370 | 0 | case Delete_kind: |
371 | 0 | return validate_assignlist(stmt->v.Delete.targets, Del); |
372 | 0 | case Assign_kind: |
373 | 0 | return validate_assignlist(stmt->v.Assign.targets, Store) && |
374 | 0 | validate_expr(stmt->v.Assign.value, Load); |
375 | 0 | case AugAssign_kind: |
376 | 0 | return validate_expr(stmt->v.AugAssign.target, Store) && |
377 | 0 | validate_expr(stmt->v.AugAssign.value, Load); |
378 | 0 | case AnnAssign_kind: |
379 | 0 | if (stmt->v.AnnAssign.target->kind != Name_kind && |
380 | 0 | stmt->v.AnnAssign.simple) { |
381 | 0 | PyErr_SetString(PyExc_TypeError, |
382 | 0 | "AnnAssign with simple non-Name target"); |
383 | 0 | return 0; |
384 | 0 | } |
385 | 0 | return validate_expr(stmt->v.AnnAssign.target, Store) && |
386 | 0 | (!stmt->v.AnnAssign.value || |
387 | 0 | validate_expr(stmt->v.AnnAssign.value, Load)) && |
388 | 0 | validate_expr(stmt->v.AnnAssign.annotation, Load); |
389 | 0 | case For_kind: |
390 | 0 | return validate_expr(stmt->v.For.target, Store) && |
391 | 0 | validate_expr(stmt->v.For.iter, Load) && |
392 | 0 | validate_body(stmt->v.For.body, "For") && |
393 | 0 | validate_stmts(stmt->v.For.orelse); |
394 | 0 | case AsyncFor_kind: |
395 | 0 | return validate_expr(stmt->v.AsyncFor.target, Store) && |
396 | 0 | validate_expr(stmt->v.AsyncFor.iter, Load) && |
397 | 0 | validate_body(stmt->v.AsyncFor.body, "AsyncFor") && |
398 | 0 | validate_stmts(stmt->v.AsyncFor.orelse); |
399 | 0 | case While_kind: |
400 | 0 | return validate_expr(stmt->v.While.test, Load) && |
401 | 0 | validate_body(stmt->v.While.body, "While") && |
402 | 0 | validate_stmts(stmt->v.While.orelse); |
403 | 0 | case If_kind: |
404 | 0 | return validate_expr(stmt->v.If.test, Load) && |
405 | 0 | validate_body(stmt->v.If.body, "If") && |
406 | 0 | validate_stmts(stmt->v.If.orelse); |
407 | 0 | case With_kind: |
408 | 0 | if (!validate_nonempty_seq(stmt->v.With.items, "items", "With")) |
409 | 0 | return 0; |
410 | 0 | for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) { |
411 | 0 | withitem_ty item = asdl_seq_GET(stmt->v.With.items, i); |
412 | 0 | if (!validate_expr(item->context_expr, Load) || |
413 | 0 | (item->optional_vars && !validate_expr(item->optional_vars, Store))) |
414 | 0 | return 0; |
415 | 0 | } |
416 | 0 | return validate_body(stmt->v.With.body, "With"); |
417 | 0 | case AsyncWith_kind: |
418 | 0 | if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith")) |
419 | 0 | return 0; |
420 | 0 | for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) { |
421 | 0 | withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i); |
422 | 0 | if (!validate_expr(item->context_expr, Load) || |
423 | 0 | (item->optional_vars && !validate_expr(item->optional_vars, Store))) |
424 | 0 | return 0; |
425 | 0 | } |
426 | 0 | return validate_body(stmt->v.AsyncWith.body, "AsyncWith"); |
427 | 0 | case Raise_kind: |
428 | 0 | if (stmt->v.Raise.exc) { |
429 | 0 | return validate_expr(stmt->v.Raise.exc, Load) && |
430 | 0 | (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load)); |
431 | 0 | } |
432 | 0 | if (stmt->v.Raise.cause) { |
433 | 0 | PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception"); |
434 | 0 | return 0; |
435 | 0 | } |
436 | 0 | return 1; |
437 | 0 | case Try_kind: |
438 | 0 | if (!validate_body(stmt->v.Try.body, "Try")) |
439 | 0 | return 0; |
440 | 0 | if (!asdl_seq_LEN(stmt->v.Try.handlers) && |
441 | 0 | !asdl_seq_LEN(stmt->v.Try.finalbody)) { |
442 | 0 | PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody"); |
443 | 0 | return 0; |
444 | 0 | } |
445 | 0 | if (!asdl_seq_LEN(stmt->v.Try.handlers) && |
446 | 0 | asdl_seq_LEN(stmt->v.Try.orelse)) { |
447 | 0 | PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers"); |
448 | 0 | return 0; |
449 | 0 | } |
450 | 0 | for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) { |
451 | 0 | excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i); |
452 | 0 | if ((handler->v.ExceptHandler.type && |
453 | 0 | !validate_expr(handler->v.ExceptHandler.type, Load)) || |
454 | 0 | !validate_body(handler->v.ExceptHandler.body, "ExceptHandler")) |
455 | 0 | return 0; |
456 | 0 | } |
457 | 0 | return (!asdl_seq_LEN(stmt->v.Try.finalbody) || |
458 | 0 | validate_stmts(stmt->v.Try.finalbody)) && |
459 | 0 | (!asdl_seq_LEN(stmt->v.Try.orelse) || |
460 | 0 | validate_stmts(stmt->v.Try.orelse)); |
461 | 0 | case Assert_kind: |
462 | 0 | return validate_expr(stmt->v.Assert.test, Load) && |
463 | 0 | (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load)); |
464 | 0 | case Import_kind: |
465 | 0 | return validate_nonempty_seq(stmt->v.Import.names, "names", "Import"); |
466 | 0 | case ImportFrom_kind: |
467 | 0 | if (stmt->v.ImportFrom.level < 0) { |
468 | 0 | PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level"); |
469 | 0 | return 0; |
470 | 0 | } |
471 | 0 | return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom"); |
472 | 0 | case Global_kind: |
473 | 0 | return validate_nonempty_seq(stmt->v.Global.names, "names", "Global"); |
474 | 0 | case Nonlocal_kind: |
475 | 0 | return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal"); |
476 | 0 | case Expr_kind: |
477 | 0 | return validate_expr(stmt->v.Expr.value, Load); |
478 | 0 | case AsyncFunctionDef_kind: |
479 | 0 | return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") && |
480 | 0 | validate_arguments(stmt->v.AsyncFunctionDef.args) && |
481 | 0 | validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) && |
482 | 0 | (!stmt->v.AsyncFunctionDef.returns || |
483 | 0 | validate_expr(stmt->v.AsyncFunctionDef.returns, Load)); |
484 | 0 | case Pass_kind: |
485 | 0 | case Break_kind: |
486 | 0 | case Continue_kind: |
487 | 0 | return 1; |
488 | 0 | default: |
489 | 0 | PyErr_SetString(PyExc_SystemError, "unexpected statement"); |
490 | 0 | return 0; |
491 | 0 | } |
492 | 0 | } |
493 | | |
494 | | static int |
495 | | validate_stmts(asdl_seq *seq) |
496 | 0 | { |
497 | 0 | Py_ssize_t i; |
498 | 0 | for (i = 0; i < asdl_seq_LEN(seq); i++) { |
499 | 0 | stmt_ty stmt = asdl_seq_GET(seq, i); |
500 | 0 | if (stmt) { |
501 | 0 | if (!validate_stmt(stmt)) |
502 | 0 | return 0; |
503 | 0 | } |
504 | 0 | else { |
505 | 0 | PyErr_SetString(PyExc_ValueError, |
506 | 0 | "None disallowed in statement list"); |
507 | 0 | return 0; |
508 | 0 | } |
509 | 0 | } |
510 | 0 | return 1; |
511 | 0 | } |
512 | | |
513 | | static int |
514 | | validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok) |
515 | 0 | { |
516 | 0 | Py_ssize_t i; |
517 | 0 | for (i = 0; i < asdl_seq_LEN(exprs); i++) { |
518 | 0 | expr_ty expr = asdl_seq_GET(exprs, i); |
519 | 0 | if (expr) { |
520 | 0 | if (!validate_expr(expr, ctx)) |
521 | 0 | return 0; |
522 | 0 | } |
523 | 0 | else if (!null_ok) { |
524 | 0 | PyErr_SetString(PyExc_ValueError, |
525 | 0 | "None disallowed in expression list"); |
526 | 0 | return 0; |
527 | 0 | } |
528 | |
|
529 | 0 | } |
530 | 0 | return 1; |
531 | 0 | } |
532 | | |
533 | | int |
534 | | PyAST_Validate(mod_ty mod) |
535 | 0 | { |
536 | 0 | int res = 0; |
537 | |
|
538 | 0 | switch (mod->kind) { |
539 | 0 | case Module_kind: |
540 | 0 | res = validate_stmts(mod->v.Module.body); |
541 | 0 | break; |
542 | 0 | case Interactive_kind: |
543 | 0 | res = validate_stmts(mod->v.Interactive.body); |
544 | 0 | break; |
545 | 0 | case Expression_kind: |
546 | 0 | res = validate_expr(mod->v.Expression.body, Load); |
547 | 0 | break; |
548 | 0 | case Suite_kind: |
549 | 0 | PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler"); |
550 | 0 | break; |
551 | 0 | default: |
552 | 0 | PyErr_SetString(PyExc_SystemError, "impossible module node"); |
553 | 0 | res = 0; |
554 | 0 | break; |
555 | 0 | } |
556 | 0 | return res; |
557 | 0 | } |
558 | | |
559 | | /* This is done here, so defines like "test" don't interfere with AST use above. */ |
560 | | #include "grammar.h" |
561 | | #include "parsetok.h" |
562 | | #include "graminit.h" |
563 | | |
564 | | /* Data structure used internally */ |
565 | | struct compiling { |
566 | | PyArena *c_arena; /* Arena for allocating memory. */ |
567 | | PyObject *c_filename; /* filename */ |
568 | | PyObject *c_normalize; /* Normalization function from unicodedata. */ |
569 | | int c_feature_version; /* Latest minor version of Python for allowed features */ |
570 | | }; |
571 | | |
572 | | static asdl_seq *seq_for_testlist(struct compiling *, const node *); |
573 | | static expr_ty ast_for_expr(struct compiling *, const node *); |
574 | | static stmt_ty ast_for_stmt(struct compiling *, const node *); |
575 | | static asdl_seq *ast_for_suite(struct compiling *c, const node *n); |
576 | | static asdl_seq *ast_for_exprlist(struct compiling *, const node *, |
577 | | expr_context_ty); |
578 | | static expr_ty ast_for_testlist(struct compiling *, const node *); |
579 | | static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *); |
580 | | |
581 | | static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool); |
582 | | static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool); |
583 | | |
584 | | /* Note different signature for ast_for_call */ |
585 | | static expr_ty ast_for_call(struct compiling *, const node *, expr_ty, |
586 | | const node *, const node *, const node *); |
587 | | |
588 | | static PyObject *parsenumber(struct compiling *, const char *); |
589 | | static expr_ty parsestrplus(struct compiling *, const node *n); |
590 | | static void get_last_end_pos(asdl_seq *, int *, int *); |
591 | | |
592 | 0 | #define COMP_GENEXP 0 |
593 | 0 | #define COMP_LISTCOMP 1 |
594 | 0 | #define COMP_SETCOMP 2 |
595 | | |
596 | | static int |
597 | | init_normalization(struct compiling *c) |
598 | 0 | { |
599 | 0 | PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); |
600 | 0 | if (!m) |
601 | 0 | return 0; |
602 | 0 | c->c_normalize = PyObject_GetAttrString(m, "normalize"); |
603 | 0 | Py_DECREF(m); |
604 | 0 | if (!c->c_normalize) |
605 | 0 | return 0; |
606 | 0 | return 1; |
607 | 0 | } |
608 | | |
609 | | static identifier |
610 | | new_identifier(const char *n, struct compiling *c) |
611 | 390 | { |
612 | 390 | PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); |
613 | 390 | if (!id) |
614 | 0 | return NULL; |
615 | | /* PyUnicode_DecodeUTF8 should always return a ready string. */ |
616 | 390 | assert(PyUnicode_IS_READY(id)); |
617 | | /* Check whether there are non-ASCII characters in the |
618 | | identifier; if so, normalize to NFKC. */ |
619 | 390 | if (!PyUnicode_IS_ASCII(id)) { |
620 | 0 | PyObject *id2; |
621 | 0 | _Py_IDENTIFIER(NFKC); |
622 | 0 | if (!c->c_normalize && !init_normalization(c)) { |
623 | 0 | Py_DECREF(id); |
624 | 0 | return NULL; |
625 | 0 | } |
626 | 0 | PyObject *form = _PyUnicode_FromId(&PyId_NFKC); |
627 | 0 | if (form == NULL) { |
628 | 0 | Py_DECREF(id); |
629 | 0 | return NULL; |
630 | 0 | } |
631 | 0 | PyObject *args[2] = {form, id}; |
632 | 0 | id2 = _PyObject_FastCall(c->c_normalize, args, 2); |
633 | 0 | Py_DECREF(id); |
634 | 0 | if (!id2) |
635 | 0 | return NULL; |
636 | 0 | if (!PyUnicode_Check(id2)) { |
637 | 0 | PyErr_Format(PyExc_TypeError, |
638 | 0 | "unicodedata.normalize() must return a string, not " |
639 | 0 | "%.200s", |
640 | 0 | Py_TYPE(id2)->tp_name); |
641 | 0 | Py_DECREF(id2); |
642 | 0 | return NULL; |
643 | 0 | } |
644 | 0 | id = id2; |
645 | 0 | } |
646 | 390 | PyUnicode_InternInPlace(&id); |
647 | 390 | if (PyArena_AddPyObject(c->c_arena, id) < 0) { |
648 | 0 | Py_DECREF(id); |
649 | 0 | return NULL; |
650 | 0 | } |
651 | 390 | return id; |
652 | 390 | } |
653 | | |
654 | 95 | #define NEW_IDENTIFIER(n) new_identifier(STR(n), c) |
655 | | |
656 | | static int |
657 | | ast_error(struct compiling *c, const node *n, const char *errmsg, ...) |
658 | 0 | { |
659 | 0 | PyObject *value, *errstr, *loc, *tmp; |
660 | 0 | va_list va; |
661 | |
|
662 | 0 | va_start(va, errmsg); |
663 | 0 | errstr = PyUnicode_FromFormatV(errmsg, va); |
664 | 0 | va_end(va); |
665 | 0 | if (!errstr) { |
666 | 0 | return 0; |
667 | 0 | } |
668 | 0 | loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n)); |
669 | 0 | if (!loc) { |
670 | 0 | Py_INCREF(Py_None); |
671 | 0 | loc = Py_None; |
672 | 0 | } |
673 | 0 | tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc); |
674 | 0 | if (!tmp) { |
675 | 0 | Py_DECREF(errstr); |
676 | 0 | return 0; |
677 | 0 | } |
678 | 0 | value = PyTuple_Pack(2, errstr, tmp); |
679 | 0 | Py_DECREF(errstr); |
680 | 0 | Py_DECREF(tmp); |
681 | 0 | if (value) { |
682 | 0 | PyErr_SetObject(PyExc_SyntaxError, value); |
683 | 0 | Py_DECREF(value); |
684 | 0 | } |
685 | 0 | return 0; |
686 | 0 | } |
687 | | |
688 | | /* num_stmts() returns number of contained statements. |
689 | | |
690 | | Use this routine to determine how big a sequence is needed for |
691 | | the statements in a parse tree. Its raison d'etre is this bit of |
692 | | grammar: |
693 | | |
694 | | stmt: simple_stmt | compound_stmt |
695 | | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE |
696 | | |
697 | | A simple_stmt can contain multiple small_stmt elements joined |
698 | | by semicolons. If the arg is a simple_stmt, the number of |
699 | | small_stmt elements is returned. |
700 | | */ |
701 | | |
702 | | static string |
703 | | new_type_comment(const char *s, struct compiling *c) |
704 | 0 | { |
705 | 0 | PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL); |
706 | 0 | if (res == NULL) |
707 | 0 | return NULL; |
708 | 0 | if (PyArena_AddPyObject(c->c_arena, res) < 0) { |
709 | 0 | Py_DECREF(res); |
710 | 0 | return NULL; |
711 | 0 | } |
712 | 0 | return res; |
713 | 0 | } |
714 | 0 | #define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c) |
715 | | |
716 | | static int |
717 | | num_stmts(const node *n) |
718 | 622 | { |
719 | 622 | int i, l; |
720 | 622 | node *ch; |
721 | | |
722 | 622 | switch (TYPE(n)) { |
723 | 0 | case single_input: |
724 | 0 | if (TYPE(CHILD(n, 0)) == NEWLINE) |
725 | 0 | return 0; |
726 | 0 | else |
727 | 0 | return num_stmts(CHILD(n, 0)); |
728 | 16 | case file_input: |
729 | 16 | l = 0; |
730 | 66 | for (i = 0; i < NCH(n); i++) { |
731 | 50 | ch = CHILD(n, i); |
732 | 50 | if (TYPE(ch) == stmt) |
733 | 18 | l += num_stmts(ch); |
734 | 50 | } |
735 | 16 | return l; |
736 | 272 | case stmt: |
737 | 272 | return num_stmts(CHILD(n, 0)); |
738 | 72 | case compound_stmt: |
739 | 72 | return 1; |
740 | 202 | case simple_stmt: |
741 | 202 | return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */ |
742 | 60 | case suite: |
743 | 60 | case func_body_suite: |
744 | | /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */ |
745 | | /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */ |
746 | 60 | if (NCH(n) == 1) |
747 | 2 | return num_stmts(CHILD(n, 0)); |
748 | 58 | else { |
749 | 58 | i = 2; |
750 | 58 | l = 0; |
751 | 58 | if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) |
752 | 0 | i += 2; |
753 | 176 | for (; i < (NCH(n) - 1); i++) |
754 | 118 | l += num_stmts(CHILD(n, i)); |
755 | 58 | return l; |
756 | 58 | } |
757 | 0 | default: { |
758 | 0 | char buf[128]; |
759 | |
|
760 | 0 | sprintf(buf, "Non-statement found: %d %d", |
761 | 0 | TYPE(n), NCH(n)); |
762 | 0 | Py_FatalError(buf); |
763 | 60 | } |
764 | 622 | } |
765 | 622 | Py_UNREACHABLE(); |
766 | 622 | } |
767 | | |
768 | | /* Transform the CST rooted at node * to the appropriate AST |
769 | | */ |
770 | | |
771 | | mod_ty |
772 | | PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags, |
773 | | PyObject *filename, PyArena *arena) |
774 | 16 | { |
775 | 16 | int i, j, k, num; |
776 | 16 | asdl_seq *stmts = NULL; |
777 | 16 | asdl_seq *type_ignores = NULL; |
778 | 16 | stmt_ty s; |
779 | 16 | node *ch; |
780 | 16 | struct compiling c; |
781 | 16 | mod_ty res = NULL; |
782 | 16 | asdl_seq *argtypes = NULL; |
783 | 16 | expr_ty ret, arg; |
784 | | |
785 | 16 | c.c_arena = arena; |
786 | | /* borrowed reference */ |
787 | 16 | c.c_filename = filename; |
788 | 16 | c.c_normalize = NULL; |
789 | 16 | c.c_feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION; |
790 | | |
791 | 16 | if (TYPE(n) == encoding_decl) |
792 | 2 | n = CHILD(n, 0); |
793 | | |
794 | 16 | k = 0; |
795 | 16 | switch (TYPE(n)) { |
796 | 16 | case file_input: |
797 | 16 | stmts = _Py_asdl_seq_new(num_stmts(n), arena); |
798 | 16 | if (!stmts) |
799 | 0 | goto out; |
800 | 50 | for (i = 0; i < NCH(n) - 1; i++) { |
801 | 34 | ch = CHILD(n, i); |
802 | 34 | if (TYPE(ch) == NEWLINE) |
803 | 16 | continue; |
804 | 18 | REQ(ch, stmt); |
805 | 18 | num = num_stmts(ch); |
806 | 18 | if (num == 1) { |
807 | 18 | s = ast_for_stmt(&c, ch); |
808 | 18 | if (!s) |
809 | 0 | goto out; |
810 | 18 | asdl_seq_SET(stmts, k++, s); |
811 | 18 | } |
812 | 0 | else { |
813 | 0 | ch = CHILD(ch, 0); |
814 | 0 | REQ(ch, simple_stmt); |
815 | 0 | for (j = 0; j < num; j++) { |
816 | 0 | s = ast_for_stmt(&c, CHILD(ch, j * 2)); |
817 | 0 | if (!s) |
818 | 0 | goto out; |
819 | 0 | asdl_seq_SET(stmts, k++, s); |
820 | 0 | } |
821 | 0 | } |
822 | 18 | } |
823 | | |
824 | | /* Type ignores are stored under the ENDMARKER in file_input. */ |
825 | 16 | ch = CHILD(n, NCH(n) - 1); |
826 | 16 | REQ(ch, ENDMARKER); |
827 | 16 | num = NCH(ch); |
828 | 16 | type_ignores = _Py_asdl_seq_new(num, arena); |
829 | 16 | if (!type_ignores) |
830 | 0 | goto out; |
831 | | |
832 | 16 | for (i = 0; i < num; i++) { |
833 | 0 | string type_comment = new_type_comment(STR(CHILD(ch, i)), &c); |
834 | 0 | if (!type_comment) |
835 | 0 | goto out; |
836 | 0 | type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena); |
837 | 0 | if (!ti) |
838 | 0 | goto out; |
839 | 0 | asdl_seq_SET(type_ignores, i, ti); |
840 | 0 | } |
841 | | |
842 | 16 | res = Module(stmts, type_ignores, arena); |
843 | 16 | break; |
844 | 0 | case eval_input: { |
845 | 0 | expr_ty testlist_ast; |
846 | | |
847 | | /* XXX Why not comp_for here? */ |
848 | 0 | testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); |
849 | 0 | if (!testlist_ast) |
850 | 0 | goto out; |
851 | 0 | res = Expression(testlist_ast, arena); |
852 | 0 | break; |
853 | 0 | } |
854 | 0 | case single_input: |
855 | 0 | if (TYPE(CHILD(n, 0)) == NEWLINE) { |
856 | 0 | stmts = _Py_asdl_seq_new(1, arena); |
857 | 0 | if (!stmts) |
858 | 0 | goto out; |
859 | 0 | asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset, |
860 | 0 | n->n_end_lineno, n->n_end_col_offset, |
861 | 0 | arena)); |
862 | 0 | if (!asdl_seq_GET(stmts, 0)) |
863 | 0 | goto out; |
864 | 0 | res = Interactive(stmts, arena); |
865 | 0 | } |
866 | 0 | else { |
867 | 0 | n = CHILD(n, 0); |
868 | 0 | num = num_stmts(n); |
869 | 0 | stmts = _Py_asdl_seq_new(num, arena); |
870 | 0 | if (!stmts) |
871 | 0 | goto out; |
872 | 0 | if (num == 1) { |
873 | 0 | s = ast_for_stmt(&c, n); |
874 | 0 | if (!s) |
875 | 0 | goto out; |
876 | 0 | asdl_seq_SET(stmts, 0, s); |
877 | 0 | } |
878 | 0 | else { |
879 | | /* Only a simple_stmt can contain multiple statements. */ |
880 | 0 | REQ(n, simple_stmt); |
881 | 0 | for (i = 0; i < NCH(n); i += 2) { |
882 | 0 | if (TYPE(CHILD(n, i)) == NEWLINE) |
883 | 0 | break; |
884 | 0 | s = ast_for_stmt(&c, CHILD(n, i)); |
885 | 0 | if (!s) |
886 | 0 | goto out; |
887 | 0 | asdl_seq_SET(stmts, i / 2, s); |
888 | 0 | } |
889 | 0 | } |
890 | | |
891 | 0 | res = Interactive(stmts, arena); |
892 | 0 | } |
893 | 0 | break; |
894 | 0 | case func_type_input: |
895 | 0 | n = CHILD(n, 0); |
896 | 0 | REQ(n, func_type); |
897 | |
|
898 | 0 | if (TYPE(CHILD(n, 1)) == typelist) { |
899 | 0 | ch = CHILD(n, 1); |
900 | | /* this is overly permissive -- we don't pay any attention to |
901 | | * stars on the args -- just parse them into an ordered list */ |
902 | 0 | num = 0; |
903 | 0 | for (i = 0; i < NCH(ch); i++) { |
904 | 0 | if (TYPE(CHILD(ch, i)) == test) { |
905 | 0 | num++; |
906 | 0 | } |
907 | 0 | } |
908 | |
|
909 | 0 | argtypes = _Py_asdl_seq_new(num, arena); |
910 | 0 | if (!argtypes) |
911 | 0 | goto out; |
912 | | |
913 | 0 | j = 0; |
914 | 0 | for (i = 0; i < NCH(ch); i++) { |
915 | 0 | if (TYPE(CHILD(ch, i)) == test) { |
916 | 0 | arg = ast_for_expr(&c, CHILD(ch, i)); |
917 | 0 | if (!arg) |
918 | 0 | goto out; |
919 | 0 | asdl_seq_SET(argtypes, j++, arg); |
920 | 0 | } |
921 | 0 | } |
922 | 0 | } |
923 | 0 | else { |
924 | 0 | argtypes = _Py_asdl_seq_new(0, arena); |
925 | 0 | if (!argtypes) |
926 | 0 | goto out; |
927 | 0 | } |
928 | | |
929 | 0 | ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1)); |
930 | 0 | if (!ret) |
931 | 0 | goto out; |
932 | 0 | res = FunctionType(argtypes, ret, arena); |
933 | 0 | break; |
934 | 0 | default: |
935 | 0 | PyErr_Format(PyExc_SystemError, |
936 | 0 | "invalid node %d for PyAST_FromNode", TYPE(n)); |
937 | 0 | goto out; |
938 | 16 | } |
939 | 16 | out: |
940 | 16 | if (c.c_normalize) { |
941 | 0 | Py_DECREF(c.c_normalize); |
942 | 0 | } |
943 | 16 | return res; |
944 | 16 | } |
945 | | |
946 | | mod_ty |
947 | | PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str, |
948 | | PyArena *arena) |
949 | 0 | { |
950 | 0 | mod_ty mod; |
951 | 0 | PyObject *filename; |
952 | 0 | filename = PyUnicode_DecodeFSDefault(filename_str); |
953 | 0 | if (filename == NULL) |
954 | 0 | return NULL; |
955 | 0 | mod = PyAST_FromNodeObject(n, flags, filename, arena); |
956 | 0 | Py_DECREF(filename); |
957 | 0 | return mod; |
958 | |
|
959 | 0 | } |
960 | | |
961 | | /* Return the AST repr. of the operator represented as syntax (|, ^, etc.) |
962 | | */ |
963 | | |
964 | | static operator_ty |
965 | | get_operator(struct compiling *c, const node *n) |
966 | 12 | { |
967 | 12 | switch (TYPE(n)) { |
968 | 0 | case VBAR: |
969 | 0 | return BitOr; |
970 | 0 | case CIRCUMFLEX: |
971 | 0 | return BitXor; |
972 | 0 | case AMPER: |
973 | 0 | return BitAnd; |
974 | 0 | case LEFTSHIFT: |
975 | 0 | return LShift; |
976 | 0 | case RIGHTSHIFT: |
977 | 0 | return RShift; |
978 | 2 | case PLUS: |
979 | 2 | return Add; |
980 | 4 | case MINUS: |
981 | 4 | return Sub; |
982 | 2 | case STAR: |
983 | 2 | return Mult; |
984 | 0 | case AT: |
985 | 0 | if (c->c_feature_version < 5) { |
986 | 0 | ast_error(c, n, |
987 | 0 | "The '@' operator is only supported in Python 3.5 and greater"); |
988 | 0 | return (operator_ty)0; |
989 | 0 | } |
990 | 0 | return MatMult; |
991 | 0 | case SLASH: |
992 | 0 | return Div; |
993 | 0 | case DOUBLESLASH: |
994 | 0 | return FloorDiv; |
995 | 4 | case PERCENT: |
996 | 4 | return Mod; |
997 | 0 | default: |
998 | 0 | return (operator_ty)0; |
999 | 12 | } |
1000 | 12 | } |
1001 | | |
1002 | | static const char * const FORBIDDEN[] = { |
1003 | | "None", |
1004 | | "True", |
1005 | | "False", |
1006 | | "__debug__", |
1007 | | NULL, |
1008 | | }; |
1009 | | |
1010 | | static int |
1011 | | forbidden_name(struct compiling *c, identifier name, const node *n, |
1012 | | int full_checks) |
1013 | 91 | { |
1014 | 91 | assert(PyUnicode_Check(name)); |
1015 | 91 | const char * const *p = FORBIDDEN; |
1016 | 91 | if (!full_checks) { |
1017 | | /* In most cases, the parser will protect True, False, and None |
1018 | | from being assign to. */ |
1019 | 91 | p += 3; |
1020 | 91 | } |
1021 | 182 | for (; *p; p++) { |
1022 | 91 | if (_PyUnicode_EqualToASCIIString(name, *p)) { |
1023 | 0 | ast_error(c, n, "cannot assign to %U", name); |
1024 | 0 | return 1; |
1025 | 0 | } |
1026 | 91 | } |
1027 | 91 | return 0; |
1028 | 91 | } |
1029 | | |
1030 | | static expr_ty |
1031 | | copy_location(expr_ty e, const node *n, const node *end) |
1032 | 12 | { |
1033 | 12 | if (e) { |
1034 | 12 | e->lineno = LINENO(n); |
1035 | 12 | e->col_offset = n->n_col_offset; |
1036 | 12 | e->end_lineno = end->n_end_lineno; |
1037 | 12 | e->end_col_offset = end->n_end_col_offset; |
1038 | 12 | } |
1039 | 12 | return e; |
1040 | 12 | } |
1041 | | |
1042 | | static const char * |
1043 | | get_expr_name(expr_ty e) |
1044 | 0 | { |
1045 | 0 | switch (e->kind) { |
1046 | 0 | case Attribute_kind: |
1047 | 0 | return "attribute"; |
1048 | 0 | case Subscript_kind: |
1049 | 0 | return "subscript"; |
1050 | 0 | case Starred_kind: |
1051 | 0 | return "starred"; |
1052 | 0 | case Name_kind: |
1053 | 0 | return "name"; |
1054 | 0 | case List_kind: |
1055 | 0 | return "list"; |
1056 | 0 | case Tuple_kind: |
1057 | 0 | return "tuple"; |
1058 | 0 | case Lambda_kind: |
1059 | 0 | return "lambda"; |
1060 | 0 | case Call_kind: |
1061 | 0 | return "function call"; |
1062 | 0 | case BoolOp_kind: |
1063 | 0 | case BinOp_kind: |
1064 | 0 | case UnaryOp_kind: |
1065 | 0 | return "operator"; |
1066 | 0 | case GeneratorExp_kind: |
1067 | 0 | return "generator expression"; |
1068 | 0 | case Yield_kind: |
1069 | 0 | case YieldFrom_kind: |
1070 | 0 | return "yield expression"; |
1071 | 0 | case Await_kind: |
1072 | 0 | return "await expression"; |
1073 | 0 | case ListComp_kind: |
1074 | 0 | return "list comprehension"; |
1075 | 0 | case SetComp_kind: |
1076 | 0 | return "set comprehension"; |
1077 | 0 | case DictComp_kind: |
1078 | 0 | return "dict comprehension"; |
1079 | 0 | case Dict_kind: |
1080 | 0 | return "dict display"; |
1081 | 0 | case Set_kind: |
1082 | 0 | return "set display"; |
1083 | 0 | case JoinedStr_kind: |
1084 | 0 | case FormattedValue_kind: |
1085 | 0 | return "f-string expression"; |
1086 | 0 | case Constant_kind: { |
1087 | 0 | PyObject *value = e->v.Constant.value; |
1088 | 0 | if (value == Py_None) { |
1089 | 0 | return "None"; |
1090 | 0 | } |
1091 | 0 | if (value == Py_False) { |
1092 | 0 | return "False"; |
1093 | 0 | } |
1094 | 0 | if (value == Py_True) { |
1095 | 0 | return "True"; |
1096 | 0 | } |
1097 | 0 | if (value == Py_Ellipsis) { |
1098 | 0 | return "Ellipsis"; |
1099 | 0 | } |
1100 | 0 | return "literal"; |
1101 | 0 | } |
1102 | 0 | case Compare_kind: |
1103 | 0 | return "comparison"; |
1104 | 0 | case IfExp_kind: |
1105 | 0 | return "conditional expression"; |
1106 | 0 | case NamedExpr_kind: |
1107 | 0 | return "named expression"; |
1108 | 0 | default: |
1109 | 0 | PyErr_Format(PyExc_SystemError, |
1110 | 0 | "unexpected expression in assignment %d (line %d)", |
1111 | 0 | e->kind, e->lineno); |
1112 | 0 | return NULL; |
1113 | 0 | } |
1114 | 0 | } |
1115 | | |
1116 | | /* Set the context ctx for expr_ty e, recursively traversing e. |
1117 | | |
1118 | | Only sets context for expr kinds that "can appear in assignment context" |
1119 | | (according to ../Parser/Python.asdl). For other expr kinds, it sets |
1120 | | an appropriate syntax error and returns false. |
1121 | | */ |
1122 | | |
1123 | | static int |
1124 | | set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n) |
1125 | 68 | { |
1126 | 68 | asdl_seq *s = NULL; |
1127 | | |
1128 | | /* The ast defines augmented store and load contexts, but the |
1129 | | implementation here doesn't actually use them. The code may be |
1130 | | a little more complex than necessary as a result. It also means |
1131 | | that expressions in an augmented assignment have a Store context. |
1132 | | Consider restructuring so that augmented assignment uses |
1133 | | set_context(), too. |
1134 | | */ |
1135 | 68 | assert(ctx != AugStore && ctx != AugLoad); |
1136 | | |
1137 | 68 | switch (e->kind) { |
1138 | 0 | case Attribute_kind: |
1139 | 0 | e->v.Attribute.ctx = ctx; |
1140 | 0 | if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1)) |
1141 | 0 | return 0; |
1142 | 0 | break; |
1143 | 4 | case Subscript_kind: |
1144 | 4 | e->v.Subscript.ctx = ctx; |
1145 | 4 | break; |
1146 | 0 | case Starred_kind: |
1147 | 0 | e->v.Starred.ctx = ctx; |
1148 | 0 | if (!set_context(c, e->v.Starred.value, ctx, n)) |
1149 | 0 | return 0; |
1150 | 0 | break; |
1151 | 56 | case Name_kind: |
1152 | 56 | if (ctx == Store) { |
1153 | 56 | if (forbidden_name(c, e->v.Name.id, n, 0)) |
1154 | 0 | return 0; /* forbidden_name() calls ast_error() */ |
1155 | 56 | } |
1156 | 56 | e->v.Name.ctx = ctx; |
1157 | 56 | break; |
1158 | 0 | case List_kind: |
1159 | 0 | e->v.List.ctx = ctx; |
1160 | 0 | s = e->v.List.elts; |
1161 | 0 | break; |
1162 | 8 | case Tuple_kind: |
1163 | 8 | e->v.Tuple.ctx = ctx; |
1164 | 8 | s = e->v.Tuple.elts; |
1165 | 8 | break; |
1166 | 0 | default: { |
1167 | 0 | const char *expr_name = get_expr_name(e); |
1168 | 0 | if (expr_name != NULL) { |
1169 | 0 | ast_error(c, n, "cannot %s %s", |
1170 | 0 | ctx == Store ? "assign to" : "delete", |
1171 | 0 | expr_name); |
1172 | 0 | } |
1173 | 0 | return 0; |
1174 | 56 | } |
1175 | 68 | } |
1176 | | |
1177 | | /* If the LHS is a list or tuple, we need to set the assignment |
1178 | | context for all the contained elements. |
1179 | | */ |
1180 | 68 | if (s) { |
1181 | 8 | Py_ssize_t i; |
1182 | | |
1183 | 26 | for (i = 0; i < asdl_seq_LEN(s); i++) { |
1184 | 18 | if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n)) |
1185 | 0 | return 0; |
1186 | 18 | } |
1187 | 8 | } |
1188 | 68 | return 1; |
1189 | 68 | } |
1190 | | |
1191 | | static operator_ty |
1192 | | ast_for_augassign(struct compiling *c, const node *n) |
1193 | 0 | { |
1194 | 0 | REQ(n, augassign); |
1195 | 0 | n = CHILD(n, 0); |
1196 | 0 | switch (STR(n)[0]) { |
1197 | 0 | case '+': |
1198 | 0 | return Add; |
1199 | 0 | case '-': |
1200 | 0 | return Sub; |
1201 | 0 | case '/': |
1202 | 0 | if (STR(n)[1] == '/') |
1203 | 0 | return FloorDiv; |
1204 | 0 | else |
1205 | 0 | return Div; |
1206 | 0 | case '%': |
1207 | 0 | return Mod; |
1208 | 0 | case '<': |
1209 | 0 | return LShift; |
1210 | 0 | case '>': |
1211 | 0 | return RShift; |
1212 | 0 | case '&': |
1213 | 0 | return BitAnd; |
1214 | 0 | case '^': |
1215 | 0 | return BitXor; |
1216 | 0 | case '|': |
1217 | 0 | return BitOr; |
1218 | 0 | case '*': |
1219 | 0 | if (STR(n)[1] == '*') |
1220 | 0 | return Pow; |
1221 | 0 | else |
1222 | 0 | return Mult; |
1223 | 0 | case '@': |
1224 | 0 | if (c->c_feature_version < 5) { |
1225 | 0 | ast_error(c, n, |
1226 | 0 | "The '@' operator is only supported in Python 3.5 and greater"); |
1227 | 0 | return (operator_ty)0; |
1228 | 0 | } |
1229 | 0 | return MatMult; |
1230 | 0 | default: |
1231 | 0 | PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n)); |
1232 | 0 | return (operator_ty)0; |
1233 | 0 | } |
1234 | 0 | } |
1235 | | |
1236 | | static cmpop_ty |
1237 | | ast_for_comp_op(struct compiling *c, const node *n) |
1238 | 24 | { |
1239 | | /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is' |
1240 | | |'is' 'not' |
1241 | | */ |
1242 | 24 | REQ(n, comp_op); |
1243 | 24 | if (NCH(n) == 1) { |
1244 | 24 | n = CHILD(n, 0); |
1245 | 24 | switch (TYPE(n)) { |
1246 | 0 | case LESS: |
1247 | 0 | return Lt; |
1248 | 0 | case GREATER: |
1249 | 0 | return Gt; |
1250 | 8 | case EQEQUAL: /* == */ |
1251 | 8 | return Eq; |
1252 | 0 | case LESSEQUAL: |
1253 | 0 | return LtE; |
1254 | 0 | case GREATEREQUAL: |
1255 | 0 | return GtE; |
1256 | 6 | case NOTEQUAL: |
1257 | 6 | return NotEq; |
1258 | 10 | case NAME: |
1259 | 10 | if (strcmp(STR(n), "in") == 0) |
1260 | 10 | return In; |
1261 | 0 | if (strcmp(STR(n), "is") == 0) |
1262 | 0 | return Is; |
1263 | | /* fall through */ |
1264 | 0 | default: |
1265 | 0 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s", |
1266 | 0 | STR(n)); |
1267 | 0 | return (cmpop_ty)0; |
1268 | 24 | } |
1269 | 24 | } |
1270 | 0 | else if (NCH(n) == 2) { |
1271 | | /* handle "not in" and "is not" */ |
1272 | 0 | switch (TYPE(CHILD(n, 0))) { |
1273 | 0 | case NAME: |
1274 | 0 | if (strcmp(STR(CHILD(n, 1)), "in") == 0) |
1275 | 0 | return NotIn; |
1276 | 0 | if (strcmp(STR(CHILD(n, 0)), "is") == 0) |
1277 | 0 | return IsNot; |
1278 | | /* fall through */ |
1279 | 0 | default: |
1280 | 0 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s", |
1281 | 0 | STR(CHILD(n, 0)), STR(CHILD(n, 1))); |
1282 | 0 | return (cmpop_ty)0; |
1283 | 0 | } |
1284 | 0 | } |
1285 | 0 | PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children", |
1286 | 0 | NCH(n)); |
1287 | 0 | return (cmpop_ty)0; |
1288 | 24 | } |
1289 | | |
1290 | | static asdl_seq * |
1291 | | seq_for_testlist(struct compiling *c, const node *n) |
1292 | 26 | { |
1293 | | /* testlist: test (',' test)* [','] |
1294 | | testlist_star_expr: test|star_expr (',' test|star_expr)* [','] |
1295 | | */ |
1296 | 26 | asdl_seq *seq; |
1297 | 26 | expr_ty expression; |
1298 | 26 | int i; |
1299 | 26 | assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp); |
1300 | | |
1301 | 26 | seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); |
1302 | 26 | if (!seq) |
1303 | 0 | return NULL; |
1304 | | |
1305 | 89 | for (i = 0; i < NCH(n); i += 2) { |
1306 | 63 | const node *ch = CHILD(n, i); |
1307 | 63 | assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test); |
1308 | | |
1309 | 63 | expression = ast_for_expr(c, ch); |
1310 | 63 | if (!expression) |
1311 | 0 | return NULL; |
1312 | | |
1313 | 63 | assert(i / 2 < seq->size); |
1314 | 63 | asdl_seq_SET(seq, i / 2, expression); |
1315 | 63 | } |
1316 | 26 | return seq; |
1317 | 26 | } |
1318 | | |
1319 | | static arg_ty |
1320 | | ast_for_arg(struct compiling *c, const node *n) |
1321 | 17 | { |
1322 | 17 | identifier name; |
1323 | 17 | expr_ty annotation = NULL; |
1324 | 17 | node *ch; |
1325 | 17 | arg_ty ret; |
1326 | | |
1327 | 17 | assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef); |
1328 | 17 | ch = CHILD(n, 0); |
1329 | 17 | name = NEW_IDENTIFIER(ch); |
1330 | 17 | if (!name) |
1331 | 0 | return NULL; |
1332 | 17 | if (forbidden_name(c, name, ch, 0)) |
1333 | 0 | return NULL; |
1334 | | |
1335 | 17 | if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) { |
1336 | 0 | annotation = ast_for_expr(c, CHILD(n, 2)); |
1337 | 0 | if (!annotation) |
1338 | 0 | return NULL; |
1339 | 0 | } |
1340 | | |
1341 | 17 | ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset, |
1342 | 17 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
1343 | 17 | if (!ret) |
1344 | 0 | return NULL; |
1345 | 17 | return ret; |
1346 | 17 | } |
1347 | | |
1348 | | /* returns -1 if failed to handle keyword only arguments |
1349 | | returns new position to keep processing if successful |
1350 | | (',' tfpdef ['=' test])* |
1351 | | ^^^ |
1352 | | start pointing here |
1353 | | */ |
1354 | | static int |
1355 | | handle_keywordonly_args(struct compiling *c, const node *n, int start, |
1356 | | asdl_seq *kwonlyargs, asdl_seq *kwdefaults) |
1357 | 0 | { |
1358 | 0 | PyObject *argname; |
1359 | 0 | node *ch; |
1360 | 0 | expr_ty expression, annotation; |
1361 | 0 | arg_ty arg = NULL; |
1362 | 0 | int i = start; |
1363 | 0 | int j = 0; /* index for kwdefaults and kwonlyargs */ |
1364 | |
|
1365 | 0 | if (kwonlyargs == NULL) { |
1366 | 0 | ast_error(c, CHILD(n, start), "named arguments must follow bare *"); |
1367 | 0 | return -1; |
1368 | 0 | } |
1369 | 0 | assert(kwdefaults != NULL); |
1370 | 0 | while (i < NCH(n)) { |
1371 | 0 | ch = CHILD(n, i); |
1372 | 0 | switch (TYPE(ch)) { |
1373 | 0 | case vfpdef: |
1374 | 0 | case tfpdef: |
1375 | 0 | if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { |
1376 | 0 | expression = ast_for_expr(c, CHILD(n, i + 2)); |
1377 | 0 | if (!expression) |
1378 | 0 | goto error; |
1379 | 0 | asdl_seq_SET(kwdefaults, j, expression); |
1380 | 0 | i += 2; /* '=' and test */ |
1381 | 0 | } |
1382 | 0 | else { /* setting NULL if no default value exists */ |
1383 | 0 | asdl_seq_SET(kwdefaults, j, NULL); |
1384 | 0 | } |
1385 | 0 | if (NCH(ch) == 3) { |
1386 | | /* ch is NAME ':' test */ |
1387 | 0 | annotation = ast_for_expr(c, CHILD(ch, 2)); |
1388 | 0 | if (!annotation) |
1389 | 0 | goto error; |
1390 | 0 | } |
1391 | 0 | else { |
1392 | 0 | annotation = NULL; |
1393 | 0 | } |
1394 | 0 | ch = CHILD(ch, 0); |
1395 | 0 | argname = NEW_IDENTIFIER(ch); |
1396 | 0 | if (!argname) |
1397 | 0 | goto error; |
1398 | 0 | if (forbidden_name(c, argname, ch, 0)) |
1399 | 0 | goto error; |
1400 | 0 | arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset, |
1401 | 0 | ch->n_end_lineno, ch->n_end_col_offset, |
1402 | 0 | c->c_arena); |
1403 | 0 | if (!arg) |
1404 | 0 | goto error; |
1405 | 0 | asdl_seq_SET(kwonlyargs, j++, arg); |
1406 | 0 | i += 1; /* the name */ |
1407 | 0 | if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA) |
1408 | 0 | i += 1; /* the comma, if present */ |
1409 | 0 | break; |
1410 | 0 | case TYPE_COMMENT: |
1411 | | /* arg will be equal to the last argument processed */ |
1412 | 0 | arg->type_comment = NEW_TYPE_COMMENT(ch); |
1413 | 0 | if (!arg->type_comment) |
1414 | 0 | goto error; |
1415 | 0 | i += 1; |
1416 | 0 | break; |
1417 | 0 | case DOUBLESTAR: |
1418 | 0 | return i; |
1419 | 0 | default: |
1420 | 0 | ast_error(c, ch, "unexpected node"); |
1421 | 0 | goto error; |
1422 | 0 | } |
1423 | 0 | } |
1424 | 0 | return i; |
1425 | 0 | error: |
1426 | 0 | return -1; |
1427 | 0 | } |
1428 | | |
1429 | | /* Create AST for argument list. */ |
1430 | | |
1431 | | static arguments_ty |
1432 | | ast_for_arguments(struct compiling *c, const node *n) |
1433 | 6 | { |
1434 | | /* This function handles both typedargslist (function definition) |
1435 | | and varargslist (lambda definition). |
1436 | | |
1437 | | parameters: '(' [typedargslist] ')' |
1438 | | |
1439 | | The following definition for typedarglist is equivalent to this set of rules: |
1440 | | |
1441 | | arguments = argument (',' [TYPE_COMMENT] argument)* |
1442 | | argument = tfpdef ['=' test] |
1443 | | kwargs = '**' tfpdef [','] [TYPE_COMMENT] |
1444 | | args = '*' [tfpdef] |
1445 | | kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [',' |
1446 | | [TYPE_COMMENT] [kwargs]]) |
1447 | | args_kwonly_kwargs = args kwonly_kwargs | kwargs |
1448 | | poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [',' |
1449 | | [TYPE_COMMENT] [args_kwonly_kwargs]]) |
1450 | | typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs |
1451 | | typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT] |
1452 | | typedargslist_no_posonly]])|(typedargslist_no_posonly)" |
1453 | | |
1454 | | typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* |
1455 | | ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ',' |
1456 | | [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*' |
1457 | | [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' |
1458 | | [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [','] |
1459 | | [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* |
1460 | | (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | |
1461 | | '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (',' |
1462 | | [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*' |
1463 | | [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' |
1464 | | [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [','] |
1465 | | [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* |
1466 | | (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | |
1467 | | '**' tfpdef [','] [TYPE_COMMENT])) |
1468 | | |
1469 | | tfpdef: NAME [':' test] |
1470 | | |
1471 | | The following definition for varargslist is equivalent to this set of rules: |
1472 | | |
1473 | | arguments = argument (',' argument )* |
1474 | | argument = vfpdef ['=' test] |
1475 | | kwargs = '**' vfpdef [','] |
1476 | | args = '*' [vfpdef] |
1477 | | kwonly_kwargs = (',' argument )* [',' [kwargs]] |
1478 | | args_kwonly_kwargs = args kwonly_kwargs | kwargs |
1479 | | poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] |
1480 | | vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs |
1481 | | varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | |
1482 | | (vararglist_no_posonly) |
1483 | | |
1484 | | varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' |
1485 | | test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' |
1486 | | ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* |
1487 | | [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef |
1488 | | ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] |
1489 | | | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef |
1490 | | [',']]] | '**' vfpdef [',']) |
1491 | | |
1492 | | vfpdef: NAME |
1493 | | |
1494 | | */ |
1495 | 6 | int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0; |
1496 | 6 | int nposdefaults = 0, found_default = 0; |
1497 | 6 | asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults; |
1498 | 6 | arg_ty vararg = NULL, kwarg = NULL; |
1499 | 6 | arg_ty arg = NULL; |
1500 | 6 | node *ch; |
1501 | | |
1502 | 6 | if (TYPE(n) == parameters) { |
1503 | 4 | if (NCH(n) == 2) /* () as argument list */ |
1504 | 0 | return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena); |
1505 | 4 | n = CHILD(n, 1); |
1506 | 4 | } |
1507 | 6 | assert(TYPE(n) == typedargslist || TYPE(n) == varargslist); |
1508 | | |
1509 | | /* First count the number of positional args & defaults. The |
1510 | | variable i is the loop index for this for loop and the next. |
1511 | | The next loop picks up where the first leaves off. |
1512 | | */ |
1513 | 34 | for (i = 0; i < NCH(n); i++) { |
1514 | 28 | ch = CHILD(n, i); |
1515 | 28 | if (TYPE(ch) == STAR) { |
1516 | | /* skip star */ |
1517 | 0 | i++; |
1518 | 0 | if (i < NCH(n) && /* skip argument following star */ |
1519 | 0 | (TYPE(CHILD(n, i)) == tfpdef || |
1520 | 0 | TYPE(CHILD(n, i)) == vfpdef)) { |
1521 | 0 | i++; |
1522 | 0 | } |
1523 | 0 | break; |
1524 | 0 | } |
1525 | 28 | if (TYPE(ch) == DOUBLESTAR) break; |
1526 | 28 | if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++; |
1527 | 28 | if (TYPE(ch) == EQUAL) nposdefaults++; |
1528 | 28 | if (TYPE(ch) == SLASH ) { |
1529 | 0 | nposonlyargs = nposargs; |
1530 | 0 | nposargs = 0; |
1531 | 0 | } |
1532 | 28 | } |
1533 | | /* count the number of keyword only args & |
1534 | | defaults for keyword only args */ |
1535 | 6 | for ( ; i < NCH(n); ++i) { |
1536 | 0 | ch = CHILD(n, i); |
1537 | 0 | if (TYPE(ch) == DOUBLESTAR) break; |
1538 | 0 | if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++; |
1539 | 0 | } |
1540 | 6 | posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL); |
1541 | 6 | if (!posonlyargs && nposonlyargs) { |
1542 | 0 | return NULL; |
1543 | 0 | } |
1544 | 6 | posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL); |
1545 | 6 | if (!posargs && nposargs) |
1546 | 0 | return NULL; |
1547 | 6 | kwonlyargs = (nkwonlyargs ? |
1548 | 0 | _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); |
1549 | 6 | if (!kwonlyargs && nkwonlyargs) |
1550 | 0 | return NULL; |
1551 | 6 | posdefaults = (nposdefaults ? |
1552 | 0 | _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL); |
1553 | 6 | if (!posdefaults && nposdefaults) |
1554 | 0 | return NULL; |
1555 | | /* The length of kwonlyargs and kwdefaults are same |
1556 | | since we set NULL as default for keyword only argument w/o default |
1557 | | - we have sequence data structure, but no dictionary */ |
1558 | 6 | kwdefaults = (nkwonlyargs ? |
1559 | 0 | _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); |
1560 | 6 | if (!kwdefaults && nkwonlyargs) |
1561 | 0 | return NULL; |
1562 | | |
1563 | | /* tfpdef: NAME [':' test] |
1564 | | vfpdef: NAME |
1565 | | */ |
1566 | 6 | i = 0; |
1567 | 6 | j = 0; /* index for defaults */ |
1568 | 6 | k = 0; /* index for args */ |
1569 | 6 | l = 0; /* index for posonlyargs */ |
1570 | 23 | while (i < NCH(n)) { |
1571 | 17 | ch = CHILD(n, i); |
1572 | 17 | switch (TYPE(ch)) { |
1573 | 15 | case tfpdef: |
1574 | 17 | case vfpdef: |
1575 | | /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is |
1576 | | anything other than EQUAL or a comma? */ |
1577 | | /* XXX Should NCH(n) check be made a separate check? */ |
1578 | 17 | if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { |
1579 | 0 | expr_ty expression = ast_for_expr(c, CHILD(n, i + 2)); |
1580 | 0 | if (!expression) |
1581 | 0 | return NULL; |
1582 | 0 | assert(posdefaults != NULL); |
1583 | 0 | asdl_seq_SET(posdefaults, j++, expression); |
1584 | 0 | i += 2; |
1585 | 0 | found_default = 1; |
1586 | 0 | } |
1587 | 17 | else if (found_default) { |
1588 | 0 | ast_error(c, n, |
1589 | 0 | "non-default argument follows default argument"); |
1590 | 0 | return NULL; |
1591 | 0 | } |
1592 | 17 | arg = ast_for_arg(c, ch); |
1593 | 17 | if (!arg) |
1594 | 0 | return NULL; |
1595 | 17 | if (l < nposonlyargs) { |
1596 | 0 | asdl_seq_SET(posonlyargs, l++, arg); |
1597 | 17 | } else { |
1598 | 17 | asdl_seq_SET(posargs, k++, arg); |
1599 | 17 | } |
1600 | 17 | i += 1; /* the name */ |
1601 | 17 | if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA) |
1602 | 11 | i += 1; /* the comma, if present */ |
1603 | 17 | break; |
1604 | 0 | case SLASH: |
1605 | | /* Advance the slash and the comma. If there are more names |
1606 | | * after the slash there will be a comma so we are advancing |
1607 | | * the correct number of nodes. If the slash is the last item, |
1608 | | * we will be advancing an extra token but then * i > NCH(n) |
1609 | | * and the enclosing while will finish correctly. */ |
1610 | 0 | i += 2; |
1611 | 0 | break; |
1612 | 0 | case STAR: |
1613 | 0 | if (i+1 >= NCH(n) || |
1614 | 0 | (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA |
1615 | 0 | || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) { |
1616 | 0 | ast_error(c, CHILD(n, i), |
1617 | 0 | "named arguments must follow bare *"); |
1618 | 0 | return NULL; |
1619 | 0 | } |
1620 | 0 | ch = CHILD(n, i+1); /* tfpdef or COMMA */ |
1621 | 0 | if (TYPE(ch) == COMMA) { |
1622 | 0 | int res = 0; |
1623 | 0 | i += 2; /* now follows keyword only arguments */ |
1624 | |
|
1625 | 0 | if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) { |
1626 | 0 | ast_error(c, CHILD(n, i), |
1627 | 0 | "bare * has associated type comment"); |
1628 | 0 | return NULL; |
1629 | 0 | } |
1630 | | |
1631 | 0 | res = handle_keywordonly_args(c, n, i, |
1632 | 0 | kwonlyargs, kwdefaults); |
1633 | 0 | if (res == -1) return NULL; |
1634 | 0 | i = res; /* res has new position to process */ |
1635 | 0 | } |
1636 | 0 | else { |
1637 | 0 | vararg = ast_for_arg(c, ch); |
1638 | 0 | if (!vararg) |
1639 | 0 | return NULL; |
1640 | | |
1641 | 0 | i += 2; /* the star and the name */ |
1642 | 0 | if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA) |
1643 | 0 | i += 1; /* the comma, if present */ |
1644 | |
|
1645 | 0 | if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) { |
1646 | 0 | vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i)); |
1647 | 0 | if (!vararg->type_comment) |
1648 | 0 | return NULL; |
1649 | 0 | i += 1; |
1650 | 0 | } |
1651 | | |
1652 | 0 | if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef |
1653 | 0 | || TYPE(CHILD(n, i)) == vfpdef)) { |
1654 | 0 | int res = 0; |
1655 | 0 | res = handle_keywordonly_args(c, n, i, |
1656 | 0 | kwonlyargs, kwdefaults); |
1657 | 0 | if (res == -1) return NULL; |
1658 | 0 | i = res; /* res has new position to process */ |
1659 | 0 | } |
1660 | 0 | } |
1661 | 0 | break; |
1662 | 0 | case DOUBLESTAR: |
1663 | 0 | ch = CHILD(n, i+1); /* tfpdef */ |
1664 | 0 | assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef); |
1665 | 0 | kwarg = ast_for_arg(c, ch); |
1666 | 0 | if (!kwarg) |
1667 | 0 | return NULL; |
1668 | 0 | i += 2; /* the double star and the name */ |
1669 | 0 | if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA) |
1670 | 0 | i += 1; /* the comma, if present */ |
1671 | 0 | break; |
1672 | 0 | case TYPE_COMMENT: |
1673 | 0 | assert(i); |
1674 | |
|
1675 | 0 | if (kwarg) |
1676 | 0 | arg = kwarg; |
1677 | | |
1678 | | /* arg will be equal to the last argument processed */ |
1679 | 0 | arg->type_comment = NEW_TYPE_COMMENT(ch); |
1680 | 0 | if (!arg->type_comment) |
1681 | 0 | return NULL; |
1682 | 0 | i += 1; |
1683 | 0 | break; |
1684 | 0 | default: |
1685 | 0 | PyErr_Format(PyExc_SystemError, |
1686 | 0 | "unexpected node in varargslist: %d @ %d", |
1687 | 0 | TYPE(ch), i); |
1688 | 0 | return NULL; |
1689 | 17 | } |
1690 | 17 | } |
1691 | 6 | return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena); |
1692 | 6 | } |
1693 | | |
1694 | | static expr_ty |
1695 | | ast_for_dotted_name(struct compiling *c, const node *n) |
1696 | 0 | { |
1697 | 0 | expr_ty e; |
1698 | 0 | identifier id; |
1699 | 0 | int lineno, col_offset; |
1700 | 0 | int i; |
1701 | 0 | node *ch; |
1702 | |
|
1703 | 0 | REQ(n, dotted_name); |
1704 | |
|
1705 | 0 | lineno = LINENO(n); |
1706 | 0 | col_offset = n->n_col_offset; |
1707 | |
|
1708 | 0 | ch = CHILD(n, 0); |
1709 | 0 | id = NEW_IDENTIFIER(ch); |
1710 | 0 | if (!id) |
1711 | 0 | return NULL; |
1712 | 0 | e = Name(id, Load, lineno, col_offset, |
1713 | 0 | ch->n_end_lineno, ch->n_end_col_offset, c->c_arena); |
1714 | 0 | if (!e) |
1715 | 0 | return NULL; |
1716 | | |
1717 | 0 | for (i = 2; i < NCH(n); i+=2) { |
1718 | 0 | const node *child = CHILD(n, i); |
1719 | 0 | id = NEW_IDENTIFIER(child); |
1720 | 0 | if (!id) |
1721 | 0 | return NULL; |
1722 | 0 | e = Attribute(e, id, Load, lineno, col_offset, |
1723 | 0 | child->n_end_lineno, child->n_end_col_offset, c->c_arena); |
1724 | 0 | if (!e) |
1725 | 0 | return NULL; |
1726 | 0 | } |
1727 | | |
1728 | 0 | return e; |
1729 | 0 | } |
1730 | | |
1731 | | static expr_ty |
1732 | | ast_for_decorator(struct compiling *c, const node *n) |
1733 | 0 | { |
1734 | | /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */ |
1735 | 0 | expr_ty d = NULL; |
1736 | 0 | expr_ty name_expr; |
1737 | |
|
1738 | 0 | REQ(n, decorator); |
1739 | 0 | REQ(CHILD(n, 0), AT); |
1740 | 0 | REQ(RCHILD(n, -1), NEWLINE); |
1741 | |
|
1742 | 0 | name_expr = ast_for_dotted_name(c, CHILD(n, 1)); |
1743 | 0 | if (!name_expr) |
1744 | 0 | return NULL; |
1745 | | |
1746 | 0 | if (NCH(n) == 3) { /* No arguments */ |
1747 | 0 | d = name_expr; |
1748 | 0 | name_expr = NULL; |
1749 | 0 | } |
1750 | 0 | else if (NCH(n) == 5) { /* Call with no arguments */ |
1751 | 0 | d = Call(name_expr, NULL, NULL, |
1752 | 0 | name_expr->lineno, name_expr->col_offset, |
1753 | 0 | CHILD(n, 3)->n_end_lineno, CHILD(n, 3)->n_end_col_offset, |
1754 | 0 | c->c_arena); |
1755 | 0 | if (!d) |
1756 | 0 | return NULL; |
1757 | 0 | name_expr = NULL; |
1758 | 0 | } |
1759 | 0 | else { |
1760 | 0 | d = ast_for_call(c, CHILD(n, 3), name_expr, |
1761 | 0 | CHILD(n, 1), CHILD(n, 2), CHILD(n, 4)); |
1762 | 0 | if (!d) |
1763 | 0 | return NULL; |
1764 | 0 | name_expr = NULL; |
1765 | 0 | } |
1766 | | |
1767 | 0 | return d; |
1768 | 0 | } |
1769 | | |
1770 | | static asdl_seq* |
1771 | | ast_for_decorators(struct compiling *c, const node *n) |
1772 | 0 | { |
1773 | 0 | asdl_seq* decorator_seq; |
1774 | 0 | expr_ty d; |
1775 | 0 | int i; |
1776 | |
|
1777 | 0 | REQ(n, decorators); |
1778 | 0 | decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena); |
1779 | 0 | if (!decorator_seq) |
1780 | 0 | return NULL; |
1781 | | |
1782 | 0 | for (i = 0; i < NCH(n); i++) { |
1783 | 0 | d = ast_for_decorator(c, CHILD(n, i)); |
1784 | 0 | if (!d) |
1785 | 0 | return NULL; |
1786 | 0 | asdl_seq_SET(decorator_seq, i, d); |
1787 | 0 | } |
1788 | 0 | return decorator_seq; |
1789 | 0 | } |
1790 | | |
1791 | | static stmt_ty |
1792 | | ast_for_funcdef_impl(struct compiling *c, const node *n0, |
1793 | | asdl_seq *decorator_seq, bool is_async) |
1794 | 4 | { |
1795 | | /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */ |
1796 | 4 | const node * const n = is_async ? CHILD(n0, 1) : n0; |
1797 | 4 | identifier name; |
1798 | 4 | arguments_ty args; |
1799 | 4 | asdl_seq *body; |
1800 | 4 | expr_ty returns = NULL; |
1801 | 4 | int name_i = 1; |
1802 | 4 | int end_lineno, end_col_offset; |
1803 | 4 | node *tc; |
1804 | 4 | string type_comment = NULL; |
1805 | | |
1806 | 4 | if (is_async && c->c_feature_version < 5) { |
1807 | 0 | ast_error(c, n, |
1808 | 0 | "Async functions are only supported in Python 3.5 and greater"); |
1809 | 0 | return NULL; |
1810 | 0 | } |
1811 | | |
1812 | 4 | REQ(n, funcdef); |
1813 | | |
1814 | 4 | name = NEW_IDENTIFIER(CHILD(n, name_i)); |
1815 | 4 | if (!name) |
1816 | 0 | return NULL; |
1817 | 4 | if (forbidden_name(c, name, CHILD(n, name_i), 0)) |
1818 | 0 | return NULL; |
1819 | 4 | args = ast_for_arguments(c, CHILD(n, name_i + 1)); |
1820 | 4 | if (!args) |
1821 | 0 | return NULL; |
1822 | 4 | if (TYPE(CHILD(n, name_i+2)) == RARROW) { |
1823 | 0 | returns = ast_for_expr(c, CHILD(n, name_i + 3)); |
1824 | 0 | if (!returns) |
1825 | 0 | return NULL; |
1826 | 0 | name_i += 2; |
1827 | 0 | } |
1828 | 4 | if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) { |
1829 | 0 | type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3)); |
1830 | 0 | if (!type_comment) |
1831 | 0 | return NULL; |
1832 | 0 | name_i += 1; |
1833 | 0 | } |
1834 | 4 | body = ast_for_suite(c, CHILD(n, name_i + 3)); |
1835 | 4 | if (!body) |
1836 | 0 | return NULL; |
1837 | 4 | get_last_end_pos(body, &end_lineno, &end_col_offset); |
1838 | | |
1839 | 4 | if (NCH(CHILD(n, name_i + 3)) > 1) { |
1840 | | /* Check if the suite has a type comment in it. */ |
1841 | 2 | tc = CHILD(CHILD(n, name_i + 3), 1); |
1842 | | |
1843 | 2 | if (TYPE(tc) == TYPE_COMMENT) { |
1844 | 0 | if (type_comment != NULL) { |
1845 | 0 | ast_error(c, n, "Cannot have two type comments on def"); |
1846 | 0 | return NULL; |
1847 | 0 | } |
1848 | 0 | type_comment = NEW_TYPE_COMMENT(tc); |
1849 | 0 | if (!type_comment) |
1850 | 0 | return NULL; |
1851 | 0 | } |
1852 | 2 | } |
1853 | | |
1854 | 4 | if (is_async) |
1855 | 0 | return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment, |
1856 | 4 | LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena); |
1857 | 4 | else |
1858 | 4 | return FunctionDef(name, args, body, decorator_seq, returns, type_comment, |
1859 | 4 | LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena); |
1860 | 4 | } |
1861 | | |
1862 | | static stmt_ty |
1863 | | ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) |
1864 | 0 | { |
1865 | | /* async_funcdef: ASYNC funcdef */ |
1866 | 0 | REQ(n, async_funcdef); |
1867 | 0 | REQ(CHILD(n, 0), ASYNC); |
1868 | 0 | REQ(CHILD(n, 1), funcdef); |
1869 | |
|
1870 | 0 | return ast_for_funcdef_impl(c, n, decorator_seq, |
1871 | 0 | true /* is_async */); |
1872 | 0 | } |
1873 | | |
1874 | | static stmt_ty |
1875 | | ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) |
1876 | 4 | { |
1877 | | /* funcdef: 'def' NAME parameters ['->' test] ':' suite */ |
1878 | 4 | return ast_for_funcdef_impl(c, n, decorator_seq, |
1879 | 4 | false /* is_async */); |
1880 | 4 | } |
1881 | | |
1882 | | |
1883 | | static stmt_ty |
1884 | | ast_for_async_stmt(struct compiling *c, const node *n) |
1885 | 0 | { |
1886 | | /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */ |
1887 | 0 | REQ(n, async_stmt); |
1888 | 0 | REQ(CHILD(n, 0), ASYNC); |
1889 | |
|
1890 | 0 | switch (TYPE(CHILD(n, 1))) { |
1891 | 0 | case funcdef: |
1892 | 0 | return ast_for_funcdef_impl(c, n, NULL, |
1893 | 0 | true /* is_async */); |
1894 | 0 | case with_stmt: |
1895 | 0 | return ast_for_with_stmt(c, n, |
1896 | 0 | true /* is_async */); |
1897 | | |
1898 | 0 | case for_stmt: |
1899 | 0 | return ast_for_for_stmt(c, n, |
1900 | 0 | true /* is_async */); |
1901 | | |
1902 | 0 | default: |
1903 | 0 | PyErr_Format(PyExc_SystemError, |
1904 | 0 | "invalid async stament: %s", |
1905 | 0 | STR(CHILD(n, 1))); |
1906 | 0 | return NULL; |
1907 | 0 | } |
1908 | 0 | } |
1909 | | |
1910 | | static stmt_ty |
1911 | | ast_for_decorated(struct compiling *c, const node *n) |
1912 | 0 | { |
1913 | | /* decorated: decorators (classdef | funcdef | async_funcdef) */ |
1914 | 0 | stmt_ty thing = NULL; |
1915 | 0 | asdl_seq *decorator_seq = NULL; |
1916 | |
|
1917 | 0 | REQ(n, decorated); |
1918 | |
|
1919 | 0 | decorator_seq = ast_for_decorators(c, CHILD(n, 0)); |
1920 | 0 | if (!decorator_seq) |
1921 | 0 | return NULL; |
1922 | | |
1923 | 0 | assert(TYPE(CHILD(n, 1)) == funcdef || |
1924 | 0 | TYPE(CHILD(n, 1)) == async_funcdef || |
1925 | 0 | TYPE(CHILD(n, 1)) == classdef); |
1926 | |
|
1927 | 0 | if (TYPE(CHILD(n, 1)) == funcdef) { |
1928 | 0 | thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq); |
1929 | 0 | } else if (TYPE(CHILD(n, 1)) == classdef) { |
1930 | 0 | thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq); |
1931 | 0 | } else if (TYPE(CHILD(n, 1)) == async_funcdef) { |
1932 | 0 | thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq); |
1933 | 0 | } |
1934 | 0 | return thing; |
1935 | 0 | } |
1936 | | |
1937 | | static expr_ty |
1938 | | ast_for_namedexpr(struct compiling *c, const node *n) |
1939 | 0 | { |
1940 | | /* namedexpr_test: test [':=' test] |
1941 | | argument: ( test [comp_for] | |
1942 | | test ':=' test | |
1943 | | test '=' test | |
1944 | | '**' test | |
1945 | | '*' test ) |
1946 | | */ |
1947 | 0 | expr_ty target, value; |
1948 | |
|
1949 | 0 | target = ast_for_expr(c, CHILD(n, 0)); |
1950 | 0 | if (!target) |
1951 | 0 | return NULL; |
1952 | | |
1953 | 0 | value = ast_for_expr(c, CHILD(n, 2)); |
1954 | 0 | if (!value) |
1955 | 0 | return NULL; |
1956 | | |
1957 | 0 | if (target->kind != Name_kind) { |
1958 | 0 | const char *expr_name = get_expr_name(target); |
1959 | 0 | if (expr_name != NULL) { |
1960 | 0 | ast_error(c, n, "cannot use assignment expressions with %s", expr_name); |
1961 | 0 | } |
1962 | 0 | return NULL; |
1963 | 0 | } |
1964 | | |
1965 | 0 | if (!set_context(c, target, Store, n)) |
1966 | 0 | return NULL; |
1967 | | |
1968 | 0 | return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno, |
1969 | 0 | n->n_end_col_offset, c->c_arena); |
1970 | 0 | } |
1971 | | |
1972 | | static expr_ty |
1973 | | ast_for_lambdef(struct compiling *c, const node *n) |
1974 | 2 | { |
1975 | | /* lambdef: 'lambda' [varargslist] ':' test |
1976 | | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */ |
1977 | 2 | arguments_ty args; |
1978 | 2 | expr_ty expression; |
1979 | | |
1980 | 2 | if (NCH(n) == 3) { |
1981 | 0 | args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena); |
1982 | 0 | if (!args) |
1983 | 0 | return NULL; |
1984 | 0 | expression = ast_for_expr(c, CHILD(n, 2)); |
1985 | 0 | if (!expression) |
1986 | 0 | return NULL; |
1987 | 0 | } |
1988 | 2 | else { |
1989 | 2 | args = ast_for_arguments(c, CHILD(n, 1)); |
1990 | 2 | if (!args) |
1991 | 0 | return NULL; |
1992 | 2 | expression = ast_for_expr(c, CHILD(n, 3)); |
1993 | 2 | if (!expression) |
1994 | 0 | return NULL; |
1995 | 2 | } |
1996 | | |
1997 | 2 | return Lambda(args, expression, LINENO(n), n->n_col_offset, |
1998 | 2 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
1999 | 2 | } |
2000 | | |
2001 | | static expr_ty |
2002 | | ast_for_ifexpr(struct compiling *c, const node *n) |
2003 | 0 | { |
2004 | | /* test: or_test 'if' or_test 'else' test */ |
2005 | 0 | expr_ty expression, body, orelse; |
2006 | |
|
2007 | 0 | assert(NCH(n) == 5); |
2008 | 0 | body = ast_for_expr(c, CHILD(n, 0)); |
2009 | 0 | if (!body) |
2010 | 0 | return NULL; |
2011 | 0 | expression = ast_for_expr(c, CHILD(n, 2)); |
2012 | 0 | if (!expression) |
2013 | 0 | return NULL; |
2014 | 0 | orelse = ast_for_expr(c, CHILD(n, 4)); |
2015 | 0 | if (!orelse) |
2016 | 0 | return NULL; |
2017 | 0 | return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset, |
2018 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2019 | 0 | c->c_arena); |
2020 | 0 | } |
2021 | | |
2022 | | /* |
2023 | | Count the number of 'for' loops in a comprehension. |
2024 | | |
2025 | | Helper for ast_for_comprehension(). |
2026 | | */ |
2027 | | |
2028 | | static int |
2029 | | count_comp_fors(struct compiling *c, const node *n) |
2030 | 0 | { |
2031 | 0 | int n_fors = 0; |
2032 | |
|
2033 | 0 | count_comp_for: |
2034 | 0 | n_fors++; |
2035 | 0 | REQ(n, comp_for); |
2036 | 0 | if (NCH(n) == 2) { |
2037 | 0 | REQ(CHILD(n, 0), ASYNC); |
2038 | 0 | n = CHILD(n, 1); |
2039 | 0 | } |
2040 | 0 | else if (NCH(n) == 1) { |
2041 | 0 | n = CHILD(n, 0); |
2042 | 0 | } |
2043 | 0 | else { |
2044 | 0 | goto error; |
2045 | 0 | } |
2046 | 0 | if (NCH(n) == (5)) { |
2047 | 0 | n = CHILD(n, 4); |
2048 | 0 | } |
2049 | 0 | else { |
2050 | 0 | return n_fors; |
2051 | 0 | } |
2052 | 0 | count_comp_iter: |
2053 | 0 | REQ(n, comp_iter); |
2054 | 0 | n = CHILD(n, 0); |
2055 | 0 | if (TYPE(n) == comp_for) |
2056 | 0 | goto count_comp_for; |
2057 | 0 | else if (TYPE(n) == comp_if) { |
2058 | 0 | if (NCH(n) == 3) { |
2059 | 0 | n = CHILD(n, 2); |
2060 | 0 | goto count_comp_iter; |
2061 | 0 | } |
2062 | 0 | else |
2063 | 0 | return n_fors; |
2064 | 0 | } |
2065 | | |
2066 | 0 | error: |
2067 | | /* Should never be reached */ |
2068 | 0 | PyErr_SetString(PyExc_SystemError, |
2069 | 0 | "logic error in count_comp_fors"); |
2070 | 0 | return -1; |
2071 | 0 | } |
2072 | | |
2073 | | /* Count the number of 'if' statements in a comprehension. |
2074 | | |
2075 | | Helper for ast_for_comprehension(). |
2076 | | */ |
2077 | | |
2078 | | static int |
2079 | | count_comp_ifs(struct compiling *c, const node *n) |
2080 | 0 | { |
2081 | 0 | int n_ifs = 0; |
2082 | |
|
2083 | 0 | while (1) { |
2084 | 0 | REQ(n, comp_iter); |
2085 | 0 | if (TYPE(CHILD(n, 0)) == comp_for) |
2086 | 0 | return n_ifs; |
2087 | 0 | n = CHILD(n, 0); |
2088 | 0 | REQ(n, comp_if); |
2089 | 0 | n_ifs++; |
2090 | 0 | if (NCH(n) == 2) |
2091 | 0 | return n_ifs; |
2092 | 0 | n = CHILD(n, 2); |
2093 | 0 | } |
2094 | 0 | } |
2095 | | |
2096 | | static asdl_seq * |
2097 | | ast_for_comprehension(struct compiling *c, const node *n) |
2098 | 0 | { |
2099 | 0 | int i, n_fors; |
2100 | 0 | asdl_seq *comps; |
2101 | |
|
2102 | 0 | n_fors = count_comp_fors(c, n); |
2103 | 0 | if (n_fors == -1) |
2104 | 0 | return NULL; |
2105 | | |
2106 | 0 | comps = _Py_asdl_seq_new(n_fors, c->c_arena); |
2107 | 0 | if (!comps) |
2108 | 0 | return NULL; |
2109 | | |
2110 | 0 | for (i = 0; i < n_fors; i++) { |
2111 | 0 | comprehension_ty comp; |
2112 | 0 | asdl_seq *t; |
2113 | 0 | expr_ty expression, first; |
2114 | 0 | node *for_ch; |
2115 | 0 | node *sync_n; |
2116 | 0 | int is_async = 0; |
2117 | |
|
2118 | 0 | REQ(n, comp_for); |
2119 | |
|
2120 | 0 | if (NCH(n) == 2) { |
2121 | 0 | is_async = 1; |
2122 | 0 | REQ(CHILD(n, 0), ASYNC); |
2123 | 0 | sync_n = CHILD(n, 1); |
2124 | 0 | } |
2125 | 0 | else { |
2126 | 0 | sync_n = CHILD(n, 0); |
2127 | 0 | } |
2128 | 0 | REQ(sync_n, sync_comp_for); |
2129 | | |
2130 | | /* Async comprehensions only allowed in Python 3.6 and greater */ |
2131 | 0 | if (is_async && c->c_feature_version < 6) { |
2132 | 0 | ast_error(c, n, |
2133 | 0 | "Async comprehensions are only supported in Python 3.6 and greater"); |
2134 | 0 | return NULL; |
2135 | 0 | } |
2136 | | |
2137 | 0 | for_ch = CHILD(sync_n, 1); |
2138 | 0 | t = ast_for_exprlist(c, for_ch, Store); |
2139 | 0 | if (!t) |
2140 | 0 | return NULL; |
2141 | 0 | expression = ast_for_expr(c, CHILD(sync_n, 3)); |
2142 | 0 | if (!expression) |
2143 | 0 | return NULL; |
2144 | | |
2145 | | /* Check the # of children rather than the length of t, since |
2146 | | (x for x, in ...) has 1 element in t, but still requires a Tuple. */ |
2147 | 0 | first = (expr_ty)asdl_seq_GET(t, 0); |
2148 | 0 | if (NCH(for_ch) == 1) |
2149 | 0 | comp = comprehension(first, expression, NULL, |
2150 | 0 | is_async, c->c_arena); |
2151 | 0 | else |
2152 | 0 | comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset, |
2153 | 0 | for_ch->n_end_lineno, for_ch->n_end_col_offset, |
2154 | 0 | c->c_arena), |
2155 | 0 | expression, NULL, is_async, c->c_arena); |
2156 | 0 | if (!comp) |
2157 | 0 | return NULL; |
2158 | | |
2159 | 0 | if (NCH(sync_n) == 5) { |
2160 | 0 | int j, n_ifs; |
2161 | 0 | asdl_seq *ifs; |
2162 | |
|
2163 | 0 | n = CHILD(sync_n, 4); |
2164 | 0 | n_ifs = count_comp_ifs(c, n); |
2165 | 0 | if (n_ifs == -1) |
2166 | 0 | return NULL; |
2167 | | |
2168 | 0 | ifs = _Py_asdl_seq_new(n_ifs, c->c_arena); |
2169 | 0 | if (!ifs) |
2170 | 0 | return NULL; |
2171 | | |
2172 | 0 | for (j = 0; j < n_ifs; j++) { |
2173 | 0 | REQ(n, comp_iter); |
2174 | 0 | n = CHILD(n, 0); |
2175 | 0 | REQ(n, comp_if); |
2176 | |
|
2177 | 0 | expression = ast_for_expr(c, CHILD(n, 1)); |
2178 | 0 | if (!expression) |
2179 | 0 | return NULL; |
2180 | 0 | asdl_seq_SET(ifs, j, expression); |
2181 | 0 | if (NCH(n) == 3) |
2182 | 0 | n = CHILD(n, 2); |
2183 | 0 | } |
2184 | | /* on exit, must guarantee that n is a comp_for */ |
2185 | 0 | if (TYPE(n) == comp_iter) |
2186 | 0 | n = CHILD(n, 0); |
2187 | 0 | comp->ifs = ifs; |
2188 | 0 | } |
2189 | 0 | asdl_seq_SET(comps, i, comp); |
2190 | 0 | } |
2191 | 0 | return comps; |
2192 | 0 | } |
2193 | | |
2194 | | static expr_ty |
2195 | | ast_for_itercomp(struct compiling *c, const node *n, int type) |
2196 | 0 | { |
2197 | | /* testlist_comp: (test|star_expr) |
2198 | | * ( comp_for | (',' (test|star_expr))* [','] ) */ |
2199 | 0 | expr_ty elt; |
2200 | 0 | asdl_seq *comps; |
2201 | 0 | node *ch; |
2202 | |
|
2203 | 0 | assert(NCH(n) > 1); |
2204 | |
|
2205 | 0 | ch = CHILD(n, 0); |
2206 | 0 | elt = ast_for_expr(c, ch); |
2207 | 0 | if (!elt) |
2208 | 0 | return NULL; |
2209 | 0 | if (elt->kind == Starred_kind) { |
2210 | 0 | ast_error(c, ch, "iterable unpacking cannot be used in comprehension"); |
2211 | 0 | return NULL; |
2212 | 0 | } |
2213 | | |
2214 | 0 | comps = ast_for_comprehension(c, CHILD(n, 1)); |
2215 | 0 | if (!comps) |
2216 | 0 | return NULL; |
2217 | | |
2218 | 0 | if (type == COMP_GENEXP) |
2219 | 0 | return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, |
2220 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2221 | 0 | else if (type == COMP_LISTCOMP) |
2222 | 0 | return ListComp(elt, comps, LINENO(n), n->n_col_offset, |
2223 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2224 | 0 | else if (type == COMP_SETCOMP) |
2225 | 0 | return SetComp(elt, comps, LINENO(n), n->n_col_offset, |
2226 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2227 | 0 | else |
2228 | | /* Should never happen */ |
2229 | 0 | return NULL; |
2230 | 0 | } |
2231 | | |
2232 | | /* Fills in the key, value pair corresponding to the dict element. In case |
2233 | | * of an unpacking, key is NULL. *i is advanced by the number of ast |
2234 | | * elements. Iff successful, nonzero is returned. |
2235 | | */ |
2236 | | static int |
2237 | | ast_for_dictelement(struct compiling *c, const node *n, int *i, |
2238 | | expr_ty *key, expr_ty *value) |
2239 | 0 | { |
2240 | 0 | expr_ty expression; |
2241 | 0 | if (TYPE(CHILD(n, *i)) == DOUBLESTAR) { |
2242 | 0 | assert(NCH(n) - *i >= 2); |
2243 | |
|
2244 | 0 | expression = ast_for_expr(c, CHILD(n, *i + 1)); |
2245 | 0 | if (!expression) |
2246 | 0 | return 0; |
2247 | 0 | *key = NULL; |
2248 | 0 | *value = expression; |
2249 | |
|
2250 | 0 | *i += 2; |
2251 | 0 | } |
2252 | 0 | else { |
2253 | 0 | assert(NCH(n) - *i >= 3); |
2254 | |
|
2255 | 0 | expression = ast_for_expr(c, CHILD(n, *i)); |
2256 | 0 | if (!expression) |
2257 | 0 | return 0; |
2258 | 0 | *key = expression; |
2259 | |
|
2260 | 0 | REQ(CHILD(n, *i + 1), COLON); |
2261 | |
|
2262 | 0 | expression = ast_for_expr(c, CHILD(n, *i + 2)); |
2263 | 0 | if (!expression) |
2264 | 0 | return 0; |
2265 | 0 | *value = expression; |
2266 | |
|
2267 | 0 | *i += 3; |
2268 | 0 | } |
2269 | 0 | return 1; |
2270 | 0 | } |
2271 | | |
2272 | | static expr_ty |
2273 | | ast_for_dictcomp(struct compiling *c, const node *n) |
2274 | 0 | { |
2275 | 0 | expr_ty key, value; |
2276 | 0 | asdl_seq *comps; |
2277 | 0 | int i = 0; |
2278 | |
|
2279 | 0 | if (!ast_for_dictelement(c, n, &i, &key, &value)) |
2280 | 0 | return NULL; |
2281 | 0 | assert(key); |
2282 | 0 | assert(NCH(n) - i >= 1); |
2283 | |
|
2284 | 0 | comps = ast_for_comprehension(c, CHILD(n, i)); |
2285 | 0 | if (!comps) |
2286 | 0 | return NULL; |
2287 | | |
2288 | 0 | return DictComp(key, value, comps, LINENO(n), n->n_col_offset, |
2289 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2290 | 0 | } |
2291 | | |
2292 | | static expr_ty |
2293 | | ast_for_dictdisplay(struct compiling *c, const node *n) |
2294 | 0 | { |
2295 | 0 | int i; |
2296 | 0 | int j; |
2297 | 0 | int size; |
2298 | 0 | asdl_seq *keys, *values; |
2299 | |
|
2300 | 0 | size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */ |
2301 | 0 | keys = _Py_asdl_seq_new(size, c->c_arena); |
2302 | 0 | if (!keys) |
2303 | 0 | return NULL; |
2304 | | |
2305 | 0 | values = _Py_asdl_seq_new(size, c->c_arena); |
2306 | 0 | if (!values) |
2307 | 0 | return NULL; |
2308 | | |
2309 | 0 | j = 0; |
2310 | 0 | for (i = 0; i < NCH(n); i++) { |
2311 | 0 | expr_ty key, value; |
2312 | |
|
2313 | 0 | if (!ast_for_dictelement(c, n, &i, &key, &value)) |
2314 | 0 | return NULL; |
2315 | 0 | asdl_seq_SET(keys, j, key); |
2316 | 0 | asdl_seq_SET(values, j, value); |
2317 | |
|
2318 | 0 | j++; |
2319 | 0 | } |
2320 | 0 | keys->size = j; |
2321 | 0 | values->size = j; |
2322 | 0 | return Dict(keys, values, LINENO(n), n->n_col_offset, |
2323 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2324 | 0 | } |
2325 | | |
2326 | | static expr_ty |
2327 | | ast_for_genexp(struct compiling *c, const node *n) |
2328 | 0 | { |
2329 | 0 | assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument)); |
2330 | 0 | return ast_for_itercomp(c, n, COMP_GENEXP); |
2331 | 0 | } |
2332 | | |
2333 | | static expr_ty |
2334 | | ast_for_listcomp(struct compiling *c, const node *n) |
2335 | 0 | { |
2336 | 0 | assert(TYPE(n) == (testlist_comp)); |
2337 | 0 | return ast_for_itercomp(c, n, COMP_LISTCOMP); |
2338 | 0 | } |
2339 | | |
2340 | | static expr_ty |
2341 | | ast_for_setcomp(struct compiling *c, const node *n) |
2342 | 0 | { |
2343 | 0 | assert(TYPE(n) == (dictorsetmaker)); |
2344 | 0 | return ast_for_itercomp(c, n, COMP_SETCOMP); |
2345 | 0 | } |
2346 | | |
2347 | | static expr_ty |
2348 | | ast_for_setdisplay(struct compiling *c, const node *n) |
2349 | 0 | { |
2350 | 0 | int i; |
2351 | 0 | int size; |
2352 | 0 | asdl_seq *elts; |
2353 | |
|
2354 | 0 | assert(TYPE(n) == (dictorsetmaker)); |
2355 | 0 | size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */ |
2356 | 0 | elts = _Py_asdl_seq_new(size, c->c_arena); |
2357 | 0 | if (!elts) |
2358 | 0 | return NULL; |
2359 | 0 | for (i = 0; i < NCH(n); i += 2) { |
2360 | 0 | expr_ty expression; |
2361 | 0 | expression = ast_for_expr(c, CHILD(n, i)); |
2362 | 0 | if (!expression) |
2363 | 0 | return NULL; |
2364 | 0 | asdl_seq_SET(elts, i / 2, expression); |
2365 | 0 | } |
2366 | 0 | return Set(elts, LINENO(n), n->n_col_offset, |
2367 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2368 | 0 | } |
2369 | | |
2370 | | static expr_ty |
2371 | | ast_for_atom(struct compiling *c, const node *n) |
2372 | 381 | { |
2373 | | /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' |
2374 | | | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+ |
2375 | | | '...' | 'None' | 'True' | 'False' |
2376 | | */ |
2377 | 381 | node *ch = CHILD(n, 0); |
2378 | | |
2379 | 381 | switch (TYPE(ch)) { |
2380 | 303 | case NAME: { |
2381 | 303 | PyObject *name; |
2382 | 303 | const char *s = STR(ch); |
2383 | 303 | size_t len = strlen(s); |
2384 | 303 | if (len >= 4 && len <= 5) { |
2385 | 126 | if (!strcmp(s, "None")) |
2386 | 2 | return Constant(Py_None, NULL, LINENO(n), n->n_col_offset, |
2387 | 126 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2388 | 124 | if (!strcmp(s, "True")) |
2389 | 4 | return Constant(Py_True, NULL, LINENO(n), n->n_col_offset, |
2390 | 124 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2391 | 120 | if (!strcmp(s, "False")) |
2392 | 2 | return Constant(Py_False, NULL, LINENO(n), n->n_col_offset, |
2393 | 120 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2394 | 120 | } |
2395 | 295 | name = new_identifier(s, c); |
2396 | 295 | if (!name) |
2397 | 0 | return NULL; |
2398 | | /* All names start in Load context, but may later be changed. */ |
2399 | 295 | return Name(name, Load, LINENO(n), n->n_col_offset, |
2400 | 295 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2401 | 295 | } |
2402 | 24 | case STRING: { |
2403 | 24 | expr_ty str = parsestrplus(c, n); |
2404 | 24 | if (!str) { |
2405 | 0 | const char *errtype = NULL; |
2406 | 0 | if (PyErr_ExceptionMatches(PyExc_UnicodeError)) |
2407 | 0 | errtype = "unicode error"; |
2408 | 0 | else if (PyErr_ExceptionMatches(PyExc_ValueError)) |
2409 | 0 | errtype = "value error"; |
2410 | 0 | if (errtype) { |
2411 | 0 | PyObject *type, *value, *tback, *errstr; |
2412 | 0 | PyErr_Fetch(&type, &value, &tback); |
2413 | 0 | errstr = PyObject_Str(value); |
2414 | 0 | if (errstr) { |
2415 | 0 | ast_error(c, n, "(%s) %U", errtype, errstr); |
2416 | 0 | Py_DECREF(errstr); |
2417 | 0 | } |
2418 | 0 | else { |
2419 | 0 | PyErr_Clear(); |
2420 | 0 | ast_error(c, n, "(%s) unknown error", errtype); |
2421 | 0 | } |
2422 | 0 | Py_DECREF(type); |
2423 | 0 | Py_XDECREF(value); |
2424 | 0 | Py_XDECREF(tback); |
2425 | 0 | } |
2426 | 0 | return NULL; |
2427 | 0 | } |
2428 | 24 | return str; |
2429 | 24 | } |
2430 | 30 | case NUMBER: { |
2431 | 30 | PyObject *pynum; |
2432 | | /* Underscores in numeric literals are only allowed in Python 3.6 or greater */ |
2433 | | /* Check for underscores here rather than in parse_number so we can report a line number on error */ |
2434 | 30 | if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) { |
2435 | 0 | ast_error(c, ch, |
2436 | 0 | "Underscores in numeric literals are only supported in Python 3.6 and greater"); |
2437 | 0 | return NULL; |
2438 | 0 | } |
2439 | 30 | pynum = parsenumber(c, STR(ch)); |
2440 | 30 | if (!pynum) |
2441 | 0 | return NULL; |
2442 | | |
2443 | 30 | if (PyArena_AddPyObject(c->c_arena, pynum) < 0) { |
2444 | 0 | Py_DECREF(pynum); |
2445 | 0 | return NULL; |
2446 | 0 | } |
2447 | 30 | return Constant(pynum, NULL, LINENO(n), n->n_col_offset, |
2448 | 30 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2449 | 30 | } |
2450 | 0 | case ELLIPSIS: /* Ellipsis */ |
2451 | 0 | return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset, |
2452 | 30 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2453 | 14 | case LPAR: /* some parenthesized expressions */ |
2454 | 14 | ch = CHILD(n, 1); |
2455 | | |
2456 | 14 | if (TYPE(ch) == RPAR) |
2457 | 0 | return Tuple(NULL, Load, LINENO(n), n->n_col_offset, |
2458 | 14 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2459 | | |
2460 | 14 | if (TYPE(ch) == yield_expr) |
2461 | 0 | return ast_for_expr(c, ch); |
2462 | | |
2463 | | /* testlist_comp: test ( comp_for | (',' test)* [','] ) */ |
2464 | 14 | if (NCH(ch) == 1) { |
2465 | 2 | return ast_for_testlist(c, ch); |
2466 | 2 | } |
2467 | | |
2468 | 12 | if (TYPE(CHILD(ch, 1)) == comp_for) { |
2469 | 0 | return copy_location(ast_for_genexp(c, ch), n, n); |
2470 | 0 | } |
2471 | 12 | else { |
2472 | 12 | return copy_location(ast_for_testlist(c, ch), n, n); |
2473 | 12 | } |
2474 | 4 | case LSQB: /* list (or list comprehension) */ |
2475 | 4 | ch = CHILD(n, 1); |
2476 | | |
2477 | 4 | if (TYPE(ch) == RSQB) |
2478 | 0 | return List(NULL, Load, LINENO(n), n->n_col_offset, |
2479 | 4 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2480 | | |
2481 | 4 | REQ(ch, testlist_comp); |
2482 | 4 | if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { |
2483 | 4 | asdl_seq *elts = seq_for_testlist(c, ch); |
2484 | 4 | if (!elts) |
2485 | 0 | return NULL; |
2486 | | |
2487 | 4 | return List(elts, Load, LINENO(n), n->n_col_offset, |
2488 | 4 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2489 | 4 | } |
2490 | 0 | else { |
2491 | 0 | return copy_location(ast_for_listcomp(c, ch), n, n); |
2492 | 0 | } |
2493 | 6 | case LBRACE: { |
2494 | | /* dictorsetmaker: ( ((test ':' test | '**' test) |
2495 | | * (comp_for | (',' (test ':' test | '**' test))* [','])) | |
2496 | | * ((test | '*' test) |
2497 | | * (comp_for | (',' (test | '*' test))* [','])) ) */ |
2498 | 6 | expr_ty res; |
2499 | 6 | ch = CHILD(n, 1); |
2500 | 6 | if (TYPE(ch) == RBRACE) { |
2501 | | /* It's an empty dict. */ |
2502 | 6 | return Dict(NULL, NULL, LINENO(n), n->n_col_offset, |
2503 | 6 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2504 | 6 | } |
2505 | 0 | else { |
2506 | 0 | int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR); |
2507 | 0 | if (NCH(ch) == 1 || |
2508 | 0 | (NCH(ch) > 1 && |
2509 | 0 | TYPE(CHILD(ch, 1)) == COMMA)) { |
2510 | | /* It's a set display. */ |
2511 | 0 | res = ast_for_setdisplay(c, ch); |
2512 | 0 | } |
2513 | 0 | else if (NCH(ch) > 1 && |
2514 | 0 | TYPE(CHILD(ch, 1)) == comp_for) { |
2515 | | /* It's a set comprehension. */ |
2516 | 0 | res = ast_for_setcomp(c, ch); |
2517 | 0 | } |
2518 | 0 | else if (NCH(ch) > 3 - is_dict && |
2519 | 0 | TYPE(CHILD(ch, 3 - is_dict)) == comp_for) { |
2520 | | /* It's a dictionary comprehension. */ |
2521 | 0 | if (is_dict) { |
2522 | 0 | ast_error(c, n, |
2523 | 0 | "dict unpacking cannot be used in dict comprehension"); |
2524 | 0 | return NULL; |
2525 | 0 | } |
2526 | 0 | res = ast_for_dictcomp(c, ch); |
2527 | 0 | } |
2528 | 0 | else { |
2529 | | /* It's a dictionary display. */ |
2530 | 0 | res = ast_for_dictdisplay(c, ch); |
2531 | 0 | } |
2532 | 0 | return copy_location(res, n, n); |
2533 | 0 | } |
2534 | 6 | } |
2535 | 0 | default: |
2536 | 0 | PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch)); |
2537 | 0 | return NULL; |
2538 | 381 | } |
2539 | 381 | } |
2540 | | |
2541 | | static slice_ty |
2542 | | ast_for_slice(struct compiling *c, const node *n) |
2543 | 14 | { |
2544 | 14 | node *ch; |
2545 | 14 | expr_ty lower = NULL, upper = NULL, step = NULL; |
2546 | | |
2547 | 14 | REQ(n, subscript); |
2548 | | |
2549 | | /* |
2550 | | subscript: test | [test] ':' [test] [sliceop] |
2551 | | sliceop: ':' [test] |
2552 | | */ |
2553 | 14 | ch = CHILD(n, 0); |
2554 | 14 | if (NCH(n) == 1 && TYPE(ch) == test) { |
2555 | | /* 'step' variable hold no significance in terms of being used over |
2556 | | other vars */ |
2557 | 12 | step = ast_for_expr(c, ch); |
2558 | 12 | if (!step) |
2559 | 0 | return NULL; |
2560 | | |
2561 | 12 | return Index(step, c->c_arena); |
2562 | 12 | } |
2563 | | |
2564 | 2 | if (TYPE(ch) == test) { |
2565 | 0 | lower = ast_for_expr(c, ch); |
2566 | 0 | if (!lower) |
2567 | 0 | return NULL; |
2568 | 0 | } |
2569 | | |
2570 | | /* If there's an upper bound it's in the second or third position. */ |
2571 | 2 | if (TYPE(ch) == COLON) { |
2572 | 2 | if (NCH(n) > 1) { |
2573 | 2 | node *n2 = CHILD(n, 1); |
2574 | | |
2575 | 2 | if (TYPE(n2) == test) { |
2576 | 2 | upper = ast_for_expr(c, n2); |
2577 | 2 | if (!upper) |
2578 | 0 | return NULL; |
2579 | 2 | } |
2580 | 2 | } |
2581 | 2 | } else if (NCH(n) > 2) { |
2582 | 0 | node *n2 = CHILD(n, 2); |
2583 | |
|
2584 | 0 | if (TYPE(n2) == test) { |
2585 | 0 | upper = ast_for_expr(c, n2); |
2586 | 0 | if (!upper) |
2587 | 0 | return NULL; |
2588 | 0 | } |
2589 | 0 | } |
2590 | | |
2591 | 2 | ch = CHILD(n, NCH(n) - 1); |
2592 | 2 | if (TYPE(ch) == sliceop) { |
2593 | 0 | if (NCH(ch) != 1) { |
2594 | 0 | ch = CHILD(ch, 1); |
2595 | 0 | if (TYPE(ch) == test) { |
2596 | 0 | step = ast_for_expr(c, ch); |
2597 | 0 | if (!step) |
2598 | 0 | return NULL; |
2599 | 0 | } |
2600 | 0 | } |
2601 | 0 | } |
2602 | | |
2603 | 2 | return Slice(lower, upper, step, c->c_arena); |
2604 | 2 | } |
2605 | | |
2606 | | static expr_ty |
2607 | | ast_for_binop(struct compiling *c, const node *n) |
2608 | 12 | { |
2609 | | /* Must account for a sequence of expressions. |
2610 | | How should A op B op C by represented? |
2611 | | BinOp(BinOp(A, op, B), op, C). |
2612 | | */ |
2613 | | |
2614 | 12 | int i, nops; |
2615 | 12 | expr_ty expr1, expr2, result; |
2616 | 12 | operator_ty newoperator; |
2617 | | |
2618 | 12 | expr1 = ast_for_expr(c, CHILD(n, 0)); |
2619 | 12 | if (!expr1) |
2620 | 0 | return NULL; |
2621 | | |
2622 | 12 | expr2 = ast_for_expr(c, CHILD(n, 2)); |
2623 | 12 | if (!expr2) |
2624 | 0 | return NULL; |
2625 | | |
2626 | 12 | newoperator = get_operator(c, CHILD(n, 1)); |
2627 | 12 | if (!newoperator) |
2628 | 0 | return NULL; |
2629 | | |
2630 | 12 | result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, |
2631 | 12 | CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset, |
2632 | 12 | c->c_arena); |
2633 | 12 | if (!result) |
2634 | 0 | return NULL; |
2635 | | |
2636 | 12 | nops = (NCH(n) - 1) / 2; |
2637 | 12 | for (i = 1; i < nops; i++) { |
2638 | 0 | expr_ty tmp_result, tmp; |
2639 | 0 | const node* next_oper = CHILD(n, i * 2 + 1); |
2640 | |
|
2641 | 0 | newoperator = get_operator(c, next_oper); |
2642 | 0 | if (!newoperator) |
2643 | 0 | return NULL; |
2644 | | |
2645 | 0 | tmp = ast_for_expr(c, CHILD(n, i * 2 + 2)); |
2646 | 0 | if (!tmp) |
2647 | 0 | return NULL; |
2648 | | |
2649 | 0 | tmp_result = BinOp(result, newoperator, tmp, |
2650 | 0 | LINENO(n), n->n_col_offset, |
2651 | 0 | CHILD(n, i * 2 + 2)->n_end_lineno, |
2652 | 0 | CHILD(n, i * 2 + 2)->n_end_col_offset, |
2653 | 0 | c->c_arena); |
2654 | 0 | if (!tmp_result) |
2655 | 0 | return NULL; |
2656 | 0 | result = tmp_result; |
2657 | 0 | } |
2658 | 12 | return result; |
2659 | 12 | } |
2660 | | |
2661 | | static expr_ty |
2662 | | ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start) |
2663 | 154 | { |
2664 | | /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME |
2665 | | subscriptlist: subscript (',' subscript)* [','] |
2666 | | subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] |
2667 | | */ |
2668 | 154 | const node *n_copy = n; |
2669 | 154 | REQ(n, trailer); |
2670 | 154 | if (TYPE(CHILD(n, 0)) == LPAR) { |
2671 | 80 | if (NCH(n) == 2) |
2672 | 14 | return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset, |
2673 | 80 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2674 | 66 | else |
2675 | 66 | return ast_for_call(c, CHILD(n, 1), left_expr, |
2676 | 66 | start, CHILD(n, 0), CHILD(n, 2)); |
2677 | 80 | } |
2678 | 74 | else if (TYPE(CHILD(n, 0)) == DOT) { |
2679 | 60 | PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1)); |
2680 | 60 | if (!attr_id) |
2681 | 0 | return NULL; |
2682 | 60 | return Attribute(left_expr, attr_id, Load, |
2683 | 60 | LINENO(start), start->n_col_offset, |
2684 | 60 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2685 | 60 | } |
2686 | 14 | else { |
2687 | 14 | REQ(CHILD(n, 0), LSQB); |
2688 | 14 | REQ(CHILD(n, 2), RSQB); |
2689 | 14 | n = CHILD(n, 1); |
2690 | 14 | if (NCH(n) == 1) { |
2691 | 14 | slice_ty slc = ast_for_slice(c, CHILD(n, 0)); |
2692 | 14 | if (!slc) |
2693 | 0 | return NULL; |
2694 | 14 | return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset, |
2695 | 14 | n_copy->n_end_lineno, n_copy->n_end_col_offset, |
2696 | 14 | c->c_arena); |
2697 | 14 | } |
2698 | 0 | else { |
2699 | | /* The grammar is ambiguous here. The ambiguity is resolved |
2700 | | by treating the sequence as a tuple literal if there are |
2701 | | no slice features. |
2702 | | */ |
2703 | 0 | Py_ssize_t j; |
2704 | 0 | slice_ty slc; |
2705 | 0 | expr_ty e; |
2706 | 0 | int simple = 1; |
2707 | 0 | asdl_seq *slices, *elts; |
2708 | 0 | slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); |
2709 | 0 | if (!slices) |
2710 | 0 | return NULL; |
2711 | 0 | for (j = 0; j < NCH(n); j += 2) { |
2712 | 0 | slc = ast_for_slice(c, CHILD(n, j)); |
2713 | 0 | if (!slc) |
2714 | 0 | return NULL; |
2715 | 0 | if (slc->kind != Index_kind) |
2716 | 0 | simple = 0; |
2717 | 0 | asdl_seq_SET(slices, j / 2, slc); |
2718 | 0 | } |
2719 | 0 | if (!simple) { |
2720 | 0 | return Subscript(left_expr, ExtSlice(slices, c->c_arena), |
2721 | 0 | Load, LINENO(start), start->n_col_offset, |
2722 | 0 | n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena); |
2723 | 0 | } |
2724 | | /* extract Index values and put them in a Tuple */ |
2725 | 0 | elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena); |
2726 | 0 | if (!elts) |
2727 | 0 | return NULL; |
2728 | 0 | for (j = 0; j < asdl_seq_LEN(slices); ++j) { |
2729 | 0 | slc = (slice_ty)asdl_seq_GET(slices, j); |
2730 | 0 | assert(slc->kind == Index_kind && slc->v.Index.value); |
2731 | 0 | asdl_seq_SET(elts, j, slc->v.Index.value); |
2732 | 0 | } |
2733 | 0 | e = Tuple(elts, Load, LINENO(n), n->n_col_offset, |
2734 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2735 | 0 | if (!e) |
2736 | 0 | return NULL; |
2737 | 0 | return Subscript(left_expr, Index(e, c->c_arena), |
2738 | 0 | Load, LINENO(start), start->n_col_offset, |
2739 | 0 | n_copy->n_end_lineno, n_copy->n_end_col_offset, c->c_arena); |
2740 | 0 | } |
2741 | 14 | } |
2742 | 154 | } |
2743 | | |
2744 | | static expr_ty |
2745 | | ast_for_factor(struct compiling *c, const node *n) |
2746 | 0 | { |
2747 | 0 | expr_ty expression; |
2748 | |
|
2749 | 0 | expression = ast_for_expr(c, CHILD(n, 1)); |
2750 | 0 | if (!expression) |
2751 | 0 | return NULL; |
2752 | | |
2753 | 0 | switch (TYPE(CHILD(n, 0))) { |
2754 | 0 | case PLUS: |
2755 | 0 | return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, |
2756 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2757 | 0 | c->c_arena); |
2758 | 0 | case MINUS: |
2759 | 0 | return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, |
2760 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2761 | 0 | c->c_arena); |
2762 | 0 | case TILDE: |
2763 | 0 | return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset, |
2764 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2765 | 0 | c->c_arena); |
2766 | 0 | } |
2767 | 0 | PyErr_Format(PyExc_SystemError, "unhandled factor: %d", |
2768 | 0 | TYPE(CHILD(n, 0))); |
2769 | 0 | return NULL; |
2770 | 0 | } |
2771 | | |
2772 | | static expr_ty |
2773 | | ast_for_atom_expr(struct compiling *c, const node *n) |
2774 | 381 | { |
2775 | 381 | int i, nch, start = 0; |
2776 | 381 | expr_ty e; |
2777 | | |
2778 | 381 | REQ(n, atom_expr); |
2779 | 381 | nch = NCH(n); |
2780 | | |
2781 | 381 | if (TYPE(CHILD(n, 0)) == AWAIT) { |
2782 | 0 | if (c->c_feature_version < 5) { |
2783 | 0 | ast_error(c, n, |
2784 | 0 | "Await expressions are only supported in Python 3.5 and greater"); |
2785 | 0 | return NULL; |
2786 | 0 | } |
2787 | 0 | start = 1; |
2788 | 0 | assert(nch > 1); |
2789 | 0 | } |
2790 | | |
2791 | 381 | e = ast_for_atom(c, CHILD(n, start)); |
2792 | 381 | if (!e) |
2793 | 0 | return NULL; |
2794 | 381 | if (nch == 1) |
2795 | 277 | return e; |
2796 | 104 | if (start && nch == 2) { |
2797 | 0 | return Await(e, LINENO(n), n->n_col_offset, |
2798 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2799 | 0 | } |
2800 | | |
2801 | 258 | for (i = start + 1; i < nch; i++) { |
2802 | 154 | node *ch = CHILD(n, i); |
2803 | 154 | if (TYPE(ch) != trailer) |
2804 | 0 | break; |
2805 | 154 | e = ast_for_trailer(c, ch, e, CHILD(n, start)); |
2806 | 154 | if (!e) |
2807 | 0 | return NULL; |
2808 | 154 | } |
2809 | | |
2810 | 104 | if (start) { |
2811 | | /* there was an 'await' */ |
2812 | 0 | return Await(e, LINENO(n), n->n_col_offset, |
2813 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2814 | 0 | } |
2815 | 104 | else { |
2816 | 104 | return e; |
2817 | 104 | } |
2818 | 104 | } |
2819 | | |
2820 | | static expr_ty |
2821 | | ast_for_power(struct compiling *c, const node *n) |
2822 | 381 | { |
2823 | | /* power: atom trailer* ('**' factor)* |
2824 | | */ |
2825 | 381 | expr_ty e; |
2826 | 381 | REQ(n, power); |
2827 | 381 | e = ast_for_atom_expr(c, CHILD(n, 0)); |
2828 | 381 | if (!e) |
2829 | 0 | return NULL; |
2830 | 381 | if (NCH(n) == 1) |
2831 | 381 | return e; |
2832 | 0 | if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { |
2833 | 0 | expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); |
2834 | 0 | if (!f) |
2835 | 0 | return NULL; |
2836 | 0 | e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, |
2837 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2838 | 0 | } |
2839 | 0 | return e; |
2840 | 0 | } |
2841 | | |
2842 | | static expr_ty |
2843 | | ast_for_starred(struct compiling *c, const node *n) |
2844 | 0 | { |
2845 | 0 | expr_ty tmp; |
2846 | 0 | REQ(n, star_expr); |
2847 | |
|
2848 | 0 | tmp = ast_for_expr(c, CHILD(n, 1)); |
2849 | 0 | if (!tmp) |
2850 | 0 | return NULL; |
2851 | | |
2852 | | /* The Load context is changed later. */ |
2853 | 0 | return Starred(tmp, Load, LINENO(n), n->n_col_offset, |
2854 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2855 | 0 | } |
2856 | | |
2857 | | |
2858 | | /* Do not name a variable 'expr'! Will cause a compile error. |
2859 | | */ |
2860 | | |
2861 | | static expr_ty |
2862 | | ast_for_expr(struct compiling *c, const node *n) |
2863 | 419 | { |
2864 | | /* handle the full range of simple expressions |
2865 | | namedexpr_test: test [':=' test] |
2866 | | test: or_test ['if' or_test 'else' test] | lambdef |
2867 | | test_nocond: or_test | lambdef_nocond |
2868 | | or_test: and_test ('or' and_test)* |
2869 | | and_test: not_test ('and' not_test)* |
2870 | | not_test: 'not' not_test | comparison |
2871 | | comparison: expr (comp_op expr)* |
2872 | | expr: xor_expr ('|' xor_expr)* |
2873 | | xor_expr: and_expr ('^' and_expr)* |
2874 | | and_expr: shift_expr ('&' shift_expr)* |
2875 | | shift_expr: arith_expr (('<<'|'>>') arith_expr)* |
2876 | | arith_expr: term (('+'|'-') term)* |
2877 | | term: factor (('*'|'@'|'/'|'%'|'//') factor)* |
2878 | | factor: ('+'|'-'|'~') factor | power |
2879 | | power: atom_expr ['**' factor] |
2880 | | atom_expr: [AWAIT] atom trailer* |
2881 | | yield_expr: 'yield' [yield_arg] |
2882 | | */ |
2883 | | |
2884 | 419 | asdl_seq *seq; |
2885 | 419 | int i; |
2886 | | |
2887 | 4.69k | loop: |
2888 | 4.69k | switch (TYPE(n)) { |
2889 | 51 | case namedexpr_test: |
2890 | 51 | if (NCH(n) == 3) |
2891 | 0 | return ast_for_namedexpr(c, n); |
2892 | | /* Fallthrough */ |
2893 | 384 | case test: |
2894 | 384 | case test_nocond: |
2895 | 384 | if (TYPE(CHILD(n, 0)) == lambdef || |
2896 | 384 | TYPE(CHILD(n, 0)) == lambdef_nocond) |
2897 | 2 | return ast_for_lambdef(c, CHILD(n, 0)); |
2898 | 382 | else if (NCH(n) > 1) |
2899 | 0 | return ast_for_ifexpr(c, n); |
2900 | | /* Fallthrough */ |
2901 | 713 | case or_test: |
2902 | 1.04k | case and_test: |
2903 | 1.04k | if (NCH(n) == 1) { |
2904 | 1.04k | n = CHILD(n, 0); |
2905 | 1.04k | goto loop; |
2906 | 1.04k | } |
2907 | 0 | seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); |
2908 | 0 | if (!seq) |
2909 | 0 | return NULL; |
2910 | 0 | for (i = 0; i < NCH(n); i += 2) { |
2911 | 0 | expr_ty e = ast_for_expr(c, CHILD(n, i)); |
2912 | 0 | if (!e) |
2913 | 0 | return NULL; |
2914 | 0 | asdl_seq_SET(seq, i / 2, e); |
2915 | 0 | } |
2916 | 0 | if (!strcmp(STR(CHILD(n, 1)), "and")) |
2917 | 0 | return BoolOp(And, seq, LINENO(n), n->n_col_offset, |
2918 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2919 | 0 | c->c_arena); |
2920 | 0 | assert(!strcmp(STR(CHILD(n, 1)), "or")); |
2921 | 0 | return BoolOp(Or, seq, LINENO(n), n->n_col_offset, |
2922 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2923 | 331 | case not_test: |
2924 | 331 | if (NCH(n) == 1) { |
2925 | 331 | n = CHILD(n, 0); |
2926 | 331 | goto loop; |
2927 | 331 | } |
2928 | 0 | else { |
2929 | 0 | expr_ty expression = ast_for_expr(c, CHILD(n, 1)); |
2930 | 0 | if (!expression) |
2931 | 0 | return NULL; |
2932 | | |
2933 | 0 | return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, |
2934 | 0 | n->n_end_lineno, n->n_end_col_offset, |
2935 | 0 | c->c_arena); |
2936 | 0 | } |
2937 | 331 | case comparison: |
2938 | 331 | if (NCH(n) == 1) { |
2939 | 307 | n = CHILD(n, 0); |
2940 | 307 | goto loop; |
2941 | 307 | } |
2942 | 24 | else { |
2943 | 24 | expr_ty expression; |
2944 | 24 | asdl_int_seq *ops; |
2945 | 24 | asdl_seq *cmps; |
2946 | 24 | ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena); |
2947 | 24 | if (!ops) |
2948 | 0 | return NULL; |
2949 | 24 | cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); |
2950 | 24 | if (!cmps) { |
2951 | 0 | return NULL; |
2952 | 0 | } |
2953 | 48 | for (i = 1; i < NCH(n); i += 2) { |
2954 | 24 | cmpop_ty newoperator; |
2955 | | |
2956 | 24 | newoperator = ast_for_comp_op(c, CHILD(n, i)); |
2957 | 24 | if (!newoperator) { |
2958 | 0 | return NULL; |
2959 | 0 | } |
2960 | | |
2961 | 24 | expression = ast_for_expr(c, CHILD(n, i + 1)); |
2962 | 24 | if (!expression) { |
2963 | 0 | return NULL; |
2964 | 0 | } |
2965 | | |
2966 | 24 | asdl_seq_SET(ops, i / 2, newoperator); |
2967 | 24 | asdl_seq_SET(cmps, i / 2, expression); |
2968 | 24 | } |
2969 | 24 | expression = ast_for_expr(c, CHILD(n, 0)); |
2970 | 24 | if (!expression) { |
2971 | 0 | return NULL; |
2972 | 0 | } |
2973 | | |
2974 | 24 | return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset, |
2975 | 24 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
2976 | 24 | } |
2977 | | |
2978 | 0 | case star_expr: |
2979 | 0 | return ast_for_starred(c, n); |
2980 | | /* The next five cases all handle BinOps. The main body of code |
2981 | | is the same in each case, but the switch turned inside out to |
2982 | | reuse the code for each type of operator. |
2983 | | */ |
2984 | 369 | case expr: |
2985 | 738 | case xor_expr: |
2986 | 1.10k | case and_expr: |
2987 | 1.47k | case shift_expr: |
2988 | 1.84k | case arith_expr: |
2989 | 2.22k | case term: |
2990 | 2.22k | if (NCH(n) == 1) { |
2991 | 2.20k | n = CHILD(n, 0); |
2992 | 2.20k | goto loop; |
2993 | 2.20k | } |
2994 | 12 | return ast_for_binop(c, n); |
2995 | 0 | case yield_expr: { |
2996 | 0 | node *an = NULL; |
2997 | 0 | node *en = NULL; |
2998 | 0 | int is_from = 0; |
2999 | 0 | expr_ty exp = NULL; |
3000 | 0 | if (NCH(n) > 1) |
3001 | 0 | an = CHILD(n, 1); /* yield_arg */ |
3002 | 0 | if (an) { |
3003 | 0 | en = CHILD(an, NCH(an) - 1); |
3004 | 0 | if (NCH(an) == 2) { |
3005 | 0 | is_from = 1; |
3006 | 0 | exp = ast_for_expr(c, en); |
3007 | 0 | } |
3008 | 0 | else |
3009 | 0 | exp = ast_for_testlist(c, en); |
3010 | 0 | if (!exp) |
3011 | 0 | return NULL; |
3012 | 0 | } |
3013 | 0 | if (is_from) |
3014 | 0 | return YieldFrom(exp, LINENO(n), n->n_col_offset, |
3015 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3016 | 0 | return Yield(exp, LINENO(n), n->n_col_offset, |
3017 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3018 | 0 | } |
3019 | 381 | case factor: |
3020 | 381 | if (NCH(n) == 1) { |
3021 | 381 | n = CHILD(n, 0); |
3022 | 381 | goto loop; |
3023 | 381 | } |
3024 | 0 | return ast_for_factor(c, n); |
3025 | 381 | case power: |
3026 | 381 | return ast_for_power(c, n); |
3027 | 0 | default: |
3028 | 0 | PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n)); |
3029 | 0 | return NULL; |
3030 | 4.69k | } |
3031 | | /* should never get here unless if error is set */ |
3032 | 0 | return NULL; |
3033 | 4.69k | } |
3034 | | |
3035 | | static expr_ty |
3036 | | ast_for_call(struct compiling *c, const node *n, expr_ty func, |
3037 | | const node *start, const node *maybegenbeg, const node *closepar) |
3038 | 66 | { |
3039 | | /* |
3040 | | arglist: argument (',' argument)* [','] |
3041 | | argument: ( test [comp_for] | '*' test | test '=' test | '**' test ) |
3042 | | */ |
3043 | | |
3044 | 66 | int i, nargs, nkeywords; |
3045 | 66 | int ndoublestars; |
3046 | 66 | asdl_seq *args; |
3047 | 66 | asdl_seq *keywords; |
3048 | | |
3049 | 66 | REQ(n, arglist); |
3050 | | |
3051 | 66 | nargs = 0; |
3052 | 66 | nkeywords = 0; |
3053 | 204 | for (i = 0; i < NCH(n); i++) { |
3054 | 138 | node *ch = CHILD(n, i); |
3055 | 138 | if (TYPE(ch) == argument) { |
3056 | 102 | if (NCH(ch) == 1) |
3057 | 102 | nargs++; |
3058 | 0 | else if (TYPE(CHILD(ch, 1)) == comp_for) { |
3059 | 0 | nargs++; |
3060 | 0 | if (!maybegenbeg) { |
3061 | 0 | ast_error(c, ch, "invalid syntax"); |
3062 | 0 | return NULL; |
3063 | 0 | } |
3064 | 0 | if (NCH(n) > 1) { |
3065 | 0 | ast_error(c, ch, "Generator expression must be parenthesized"); |
3066 | 0 | return NULL; |
3067 | 0 | } |
3068 | 0 | } |
3069 | 0 | else if (TYPE(CHILD(ch, 0)) == STAR) |
3070 | 0 | nargs++; |
3071 | 0 | else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) { |
3072 | 0 | nargs++; |
3073 | 0 | } |
3074 | 0 | else |
3075 | | /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */ |
3076 | 0 | nkeywords++; |
3077 | 102 | } |
3078 | 138 | } |
3079 | | |
3080 | 66 | args = _Py_asdl_seq_new(nargs, c->c_arena); |
3081 | 66 | if (!args) |
3082 | 0 | return NULL; |
3083 | 66 | keywords = _Py_asdl_seq_new(nkeywords, c->c_arena); |
3084 | 66 | if (!keywords) |
3085 | 0 | return NULL; |
3086 | | |
3087 | 66 | nargs = 0; /* positional arguments + iterable argument unpackings */ |
3088 | 66 | nkeywords = 0; /* keyword arguments + keyword argument unpackings */ |
3089 | 66 | ndoublestars = 0; /* just keyword argument unpackings */ |
3090 | 204 | for (i = 0; i < NCH(n); i++) { |
3091 | 138 | node *ch = CHILD(n, i); |
3092 | 138 | if (TYPE(ch) == argument) { |
3093 | 102 | expr_ty e; |
3094 | 102 | node *chch = CHILD(ch, 0); |
3095 | 102 | if (NCH(ch) == 1) { |
3096 | | /* a positional argument */ |
3097 | 102 | if (nkeywords) { |
3098 | 0 | if (ndoublestars) { |
3099 | 0 | ast_error(c, chch, |
3100 | 0 | "positional argument follows " |
3101 | 0 | "keyword argument unpacking"); |
3102 | 0 | } |
3103 | 0 | else { |
3104 | 0 | ast_error(c, chch, |
3105 | 0 | "positional argument follows " |
3106 | 0 | "keyword argument"); |
3107 | 0 | } |
3108 | 0 | return NULL; |
3109 | 0 | } |
3110 | 102 | e = ast_for_expr(c, chch); |
3111 | 102 | if (!e) |
3112 | 0 | return NULL; |
3113 | 102 | asdl_seq_SET(args, nargs++, e); |
3114 | 102 | } |
3115 | 0 | else if (TYPE(chch) == STAR) { |
3116 | | /* an iterable argument unpacking */ |
3117 | 0 | expr_ty starred; |
3118 | 0 | if (ndoublestars) { |
3119 | 0 | ast_error(c, chch, |
3120 | 0 | "iterable argument unpacking follows " |
3121 | 0 | "keyword argument unpacking"); |
3122 | 0 | return NULL; |
3123 | 0 | } |
3124 | 0 | e = ast_for_expr(c, CHILD(ch, 1)); |
3125 | 0 | if (!e) |
3126 | 0 | return NULL; |
3127 | 0 | starred = Starred(e, Load, LINENO(chch), |
3128 | 0 | chch->n_col_offset, |
3129 | 0 | e->end_lineno, e->end_col_offset, |
3130 | 0 | c->c_arena); |
3131 | 0 | if (!starred) |
3132 | 0 | return NULL; |
3133 | 0 | asdl_seq_SET(args, nargs++, starred); |
3134 | |
|
3135 | 0 | } |
3136 | 0 | else if (TYPE(chch) == DOUBLESTAR) { |
3137 | | /* a keyword argument unpacking */ |
3138 | 0 | keyword_ty kw; |
3139 | 0 | i++; |
3140 | 0 | e = ast_for_expr(c, CHILD(ch, 1)); |
3141 | 0 | if (!e) |
3142 | 0 | return NULL; |
3143 | 0 | kw = keyword(NULL, e, c->c_arena); |
3144 | 0 | asdl_seq_SET(keywords, nkeywords++, kw); |
3145 | 0 | ndoublestars++; |
3146 | 0 | } |
3147 | 0 | else if (TYPE(CHILD(ch, 1)) == comp_for) { |
3148 | | /* the lone generator expression */ |
3149 | 0 | e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar); |
3150 | 0 | if (!e) |
3151 | 0 | return NULL; |
3152 | 0 | asdl_seq_SET(args, nargs++, e); |
3153 | 0 | } |
3154 | 0 | else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) { |
3155 | | /* treat colon equal as positional argument */ |
3156 | 0 | if (nkeywords) { |
3157 | 0 | if (ndoublestars) { |
3158 | 0 | ast_error(c, chch, |
3159 | 0 | "positional argument follows " |
3160 | 0 | "keyword argument unpacking"); |
3161 | 0 | } |
3162 | 0 | else { |
3163 | 0 | ast_error(c, chch, |
3164 | 0 | "positional argument follows " |
3165 | 0 | "keyword argument"); |
3166 | 0 | } |
3167 | 0 | return NULL; |
3168 | 0 | } |
3169 | 0 | e = ast_for_namedexpr(c, ch); |
3170 | 0 | if (!e) |
3171 | 0 | return NULL; |
3172 | 0 | asdl_seq_SET(args, nargs++, e); |
3173 | 0 | } |
3174 | 0 | else { |
3175 | | /* a keyword argument */ |
3176 | 0 | keyword_ty kw; |
3177 | 0 | identifier key, tmp; |
3178 | 0 | int k; |
3179 | | |
3180 | | // To remain LL(1), the grammar accepts any test (basically, any |
3181 | | // expression) in the keyword slot of a call site. So, we need |
3182 | | // to manually enforce that the keyword is a NAME here. |
3183 | 0 | static const int name_tree[] = { |
3184 | 0 | test, |
3185 | 0 | or_test, |
3186 | 0 | and_test, |
3187 | 0 | not_test, |
3188 | 0 | comparison, |
3189 | 0 | expr, |
3190 | 0 | xor_expr, |
3191 | 0 | and_expr, |
3192 | 0 | shift_expr, |
3193 | 0 | arith_expr, |
3194 | 0 | term, |
3195 | 0 | factor, |
3196 | 0 | power, |
3197 | 0 | atom_expr, |
3198 | 0 | atom, |
3199 | 0 | 0, |
3200 | 0 | }; |
3201 | 0 | node *expr_node = chch; |
3202 | 0 | for (int i = 0; name_tree[i]; i++) { |
3203 | 0 | if (TYPE(expr_node) != name_tree[i]) |
3204 | 0 | break; |
3205 | 0 | if (NCH(expr_node) != 1) |
3206 | 0 | break; |
3207 | 0 | expr_node = CHILD(expr_node, 0); |
3208 | 0 | } |
3209 | 0 | if (TYPE(expr_node) != NAME) { |
3210 | 0 | ast_error(c, chch, |
3211 | 0 | "expression cannot contain assignment, " |
3212 | 0 | "perhaps you meant \"==\"?"); |
3213 | 0 | return NULL; |
3214 | 0 | } |
3215 | 0 | key = new_identifier(STR(expr_node), c); |
3216 | 0 | if (key == NULL) { |
3217 | 0 | return NULL; |
3218 | 0 | } |
3219 | 0 | if (forbidden_name(c, key, chch, 1)) { |
3220 | 0 | return NULL; |
3221 | 0 | } |
3222 | 0 | for (k = 0; k < nkeywords; k++) { |
3223 | 0 | tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg; |
3224 | 0 | if (tmp && !PyUnicode_Compare(tmp, key)) { |
3225 | 0 | ast_error(c, chch, |
3226 | 0 | "keyword argument repeated"); |
3227 | 0 | return NULL; |
3228 | 0 | } |
3229 | 0 | } |
3230 | 0 | e = ast_for_expr(c, CHILD(ch, 2)); |
3231 | 0 | if (!e) |
3232 | 0 | return NULL; |
3233 | 0 | kw = keyword(key, e, c->c_arena); |
3234 | 0 | if (!kw) |
3235 | 0 | return NULL; |
3236 | 0 | asdl_seq_SET(keywords, nkeywords++, kw); |
3237 | 0 | } |
3238 | 102 | } |
3239 | 138 | } |
3240 | | |
3241 | 66 | return Call(func, args, keywords, LINENO(start), start->n_col_offset, |
3242 | 66 | closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena); |
3243 | 66 | } |
3244 | | |
3245 | | static expr_ty |
3246 | | ast_for_testlist(struct compiling *c, const node* n) |
3247 | 134 | { |
3248 | | /* testlist_comp: test (comp_for | (',' test)* [',']) */ |
3249 | | /* testlist: test (',' test)* [','] */ |
3250 | 134 | assert(NCH(n) > 0); |
3251 | 134 | if (TYPE(n) == testlist_comp) { |
3252 | 14 | if (NCH(n) > 1) |
3253 | 12 | assert(TYPE(CHILD(n, 1)) != comp_for); |
3254 | 14 | } |
3255 | 120 | else { |
3256 | 120 | assert(TYPE(n) == testlist || |
3257 | 120 | TYPE(n) == testlist_star_expr); |
3258 | 120 | } |
3259 | 134 | if (NCH(n) == 1) |
3260 | 112 | return ast_for_expr(c, CHILD(n, 0)); |
3261 | 22 | else { |
3262 | 22 | asdl_seq *tmp = seq_for_testlist(c, n); |
3263 | 22 | if (!tmp) |
3264 | 0 | return NULL; |
3265 | 22 | return Tuple(tmp, Load, LINENO(n), n->n_col_offset, |
3266 | 22 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3267 | 22 | } |
3268 | 134 | } |
3269 | | |
3270 | | static stmt_ty |
3271 | | ast_for_expr_stmt(struct compiling *c, const node *n) |
3272 | 70 | { |
3273 | 70 | REQ(n, expr_stmt); |
3274 | | /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | |
3275 | | [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] ) |
3276 | | annassign: ':' test ['=' (yield_expr|testlist)] |
3277 | | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] |
3278 | | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | |
3279 | | '<<=' | '>>=' | '**=' | '//=') |
3280 | | test: ... here starts the operator precedence dance |
3281 | | */ |
3282 | 70 | int num = NCH(n); |
3283 | | |
3284 | 70 | if (num == 1) { |
3285 | 36 | expr_ty e = ast_for_testlist(c, CHILD(n, 0)); |
3286 | 36 | if (!e) |
3287 | 0 | return NULL; |
3288 | | |
3289 | 36 | return Expr(e, LINENO(n), n->n_col_offset, |
3290 | 36 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3291 | 36 | } |
3292 | 34 | else if (TYPE(CHILD(n, 1)) == augassign) { |
3293 | 0 | expr_ty expr1, expr2; |
3294 | 0 | operator_ty newoperator; |
3295 | 0 | node *ch = CHILD(n, 0); |
3296 | |
|
3297 | 0 | expr1 = ast_for_testlist(c, ch); |
3298 | 0 | if (!expr1) |
3299 | 0 | return NULL; |
3300 | 0 | if(!set_context(c, expr1, Store, ch)) |
3301 | 0 | return NULL; |
3302 | | /* set_context checks that most expressions are not the left side. |
3303 | | Augmented assignments can only have a name, a subscript, or an |
3304 | | attribute on the left, though, so we have to explicitly check for |
3305 | | those. */ |
3306 | 0 | switch (expr1->kind) { |
3307 | 0 | case Name_kind: |
3308 | 0 | case Attribute_kind: |
3309 | 0 | case Subscript_kind: |
3310 | 0 | break; |
3311 | 0 | default: |
3312 | 0 | ast_error(c, ch, "illegal expression for augmented assignment"); |
3313 | 0 | return NULL; |
3314 | 0 | } |
3315 | | |
3316 | 0 | ch = CHILD(n, 2); |
3317 | 0 | if (TYPE(ch) == testlist) |
3318 | 0 | expr2 = ast_for_testlist(c, ch); |
3319 | 0 | else |
3320 | 0 | expr2 = ast_for_expr(c, ch); |
3321 | 0 | if (!expr2) |
3322 | 0 | return NULL; |
3323 | | |
3324 | 0 | newoperator = ast_for_augassign(c, CHILD(n, 1)); |
3325 | 0 | if (!newoperator) |
3326 | 0 | return NULL; |
3327 | | |
3328 | 0 | return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, |
3329 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3330 | 0 | } |
3331 | 34 | else if (TYPE(CHILD(n, 1)) == annassign) { |
3332 | 0 | expr_ty expr1, expr2, expr3; |
3333 | 0 | node *ch = CHILD(n, 0); |
3334 | 0 | node *deep, *ann = CHILD(n, 1); |
3335 | 0 | int simple = 1; |
3336 | | |
3337 | | /* AnnAssigns are only allowed in Python 3.6 or greater */ |
3338 | 0 | if (c->c_feature_version < 6) { |
3339 | 0 | ast_error(c, ch, |
3340 | 0 | "Variable annotation syntax is only supported in Python 3.6 and greater"); |
3341 | 0 | return NULL; |
3342 | 0 | } |
3343 | | |
3344 | | /* we keep track of parens to qualify (x) as expression not name */ |
3345 | 0 | deep = ch; |
3346 | 0 | while (NCH(deep) == 1) { |
3347 | 0 | deep = CHILD(deep, 0); |
3348 | 0 | } |
3349 | 0 | if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) { |
3350 | 0 | simple = 0; |
3351 | 0 | } |
3352 | 0 | expr1 = ast_for_testlist(c, ch); |
3353 | 0 | if (!expr1) { |
3354 | 0 | return NULL; |
3355 | 0 | } |
3356 | 0 | switch (expr1->kind) { |
3357 | 0 | case Name_kind: |
3358 | 0 | if (forbidden_name(c, expr1->v.Name.id, n, 0)) { |
3359 | 0 | return NULL; |
3360 | 0 | } |
3361 | 0 | expr1->v.Name.ctx = Store; |
3362 | 0 | break; |
3363 | 0 | case Attribute_kind: |
3364 | 0 | if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) { |
3365 | 0 | return NULL; |
3366 | 0 | } |
3367 | 0 | expr1->v.Attribute.ctx = Store; |
3368 | 0 | break; |
3369 | 0 | case Subscript_kind: |
3370 | 0 | expr1->v.Subscript.ctx = Store; |
3371 | 0 | break; |
3372 | 0 | case List_kind: |
3373 | 0 | ast_error(c, ch, |
3374 | 0 | "only single target (not list) can be annotated"); |
3375 | 0 | return NULL; |
3376 | 0 | case Tuple_kind: |
3377 | 0 | ast_error(c, ch, |
3378 | 0 | "only single target (not tuple) can be annotated"); |
3379 | 0 | return NULL; |
3380 | 0 | default: |
3381 | 0 | ast_error(c, ch, |
3382 | 0 | "illegal target for annotation"); |
3383 | 0 | return NULL; |
3384 | 0 | } |
3385 | | |
3386 | 0 | if (expr1->kind != Name_kind) { |
3387 | 0 | simple = 0; |
3388 | 0 | } |
3389 | 0 | ch = CHILD(ann, 1); |
3390 | 0 | expr2 = ast_for_expr(c, ch); |
3391 | 0 | if (!expr2) { |
3392 | 0 | return NULL; |
3393 | 0 | } |
3394 | 0 | if (NCH(ann) == 2) { |
3395 | 0 | return AnnAssign(expr1, expr2, NULL, simple, |
3396 | 0 | LINENO(n), n->n_col_offset, |
3397 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3398 | 0 | } |
3399 | 0 | else { |
3400 | 0 | ch = CHILD(ann, 3); |
3401 | 0 | if (TYPE(ch) == testlist_star_expr) { |
3402 | 0 | expr3 = ast_for_testlist(c, ch); |
3403 | 0 | } |
3404 | 0 | else { |
3405 | 0 | expr3 = ast_for_expr(c, ch); |
3406 | 0 | } |
3407 | 0 | if (!expr3) { |
3408 | 0 | return NULL; |
3409 | 0 | } |
3410 | 0 | return AnnAssign(expr1, expr2, expr3, simple, |
3411 | 0 | LINENO(n), n->n_col_offset, |
3412 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3413 | 0 | } |
3414 | 0 | } |
3415 | 34 | else { |
3416 | 34 | int i, nch_minus_type, has_type_comment; |
3417 | 34 | asdl_seq *targets; |
3418 | 34 | node *value; |
3419 | 34 | expr_ty expression; |
3420 | 34 | string type_comment; |
3421 | | |
3422 | | /* a normal assignment */ |
3423 | 34 | REQ(CHILD(n, 1), EQUAL); |
3424 | | |
3425 | 34 | has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT; |
3426 | 34 | nch_minus_type = num - has_type_comment; |
3427 | | |
3428 | 34 | targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena); |
3429 | 34 | if (!targets) |
3430 | 0 | return NULL; |
3431 | 70 | for (i = 0; i < nch_minus_type - 2; i += 2) { |
3432 | 36 | expr_ty e; |
3433 | 36 | node *ch = CHILD(n, i); |
3434 | 36 | if (TYPE(ch) == yield_expr) { |
3435 | 0 | ast_error(c, ch, "assignment to yield expression not possible"); |
3436 | 0 | return NULL; |
3437 | 0 | } |
3438 | 36 | e = ast_for_testlist(c, ch); |
3439 | 36 | if (!e) |
3440 | 0 | return NULL; |
3441 | | |
3442 | | /* set context to assign */ |
3443 | 36 | if (!set_context(c, e, Store, CHILD(n, i))) |
3444 | 0 | return NULL; |
3445 | | |
3446 | 36 | asdl_seq_SET(targets, i / 2, e); |
3447 | 36 | } |
3448 | 34 | value = CHILD(n, nch_minus_type - 1); |
3449 | 34 | if (TYPE(value) == testlist_star_expr) |
3450 | 34 | expression = ast_for_testlist(c, value); |
3451 | 0 | else |
3452 | 0 | expression = ast_for_expr(c, value); |
3453 | 34 | if (!expression) |
3454 | 0 | return NULL; |
3455 | 34 | if (has_type_comment) { |
3456 | 0 | type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type)); |
3457 | 0 | if (!type_comment) |
3458 | 0 | return NULL; |
3459 | 0 | } |
3460 | 34 | else |
3461 | 34 | type_comment = NULL; |
3462 | 34 | return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset, |
3463 | 34 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3464 | 34 | } |
3465 | 70 | } |
3466 | | |
3467 | | |
3468 | | static asdl_seq * |
3469 | | ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context) |
3470 | 10 | { |
3471 | 10 | asdl_seq *seq; |
3472 | 10 | int i; |
3473 | 10 | expr_ty e; |
3474 | | |
3475 | 10 | REQ(n, exprlist); |
3476 | | |
3477 | 10 | seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); |
3478 | 10 | if (!seq) |
3479 | 0 | return NULL; |
3480 | 24 | for (i = 0; i < NCH(n); i += 2) { |
3481 | 14 | e = ast_for_expr(c, CHILD(n, i)); |
3482 | 14 | if (!e) |
3483 | 0 | return NULL; |
3484 | 14 | asdl_seq_SET(seq, i / 2, e); |
3485 | 14 | if (context && !set_context(c, e, context, CHILD(n, i))) |
3486 | 0 | return NULL; |
3487 | 14 | } |
3488 | 10 | return seq; |
3489 | 10 | } |
3490 | | |
3491 | | static stmt_ty |
3492 | | ast_for_del_stmt(struct compiling *c, const node *n) |
3493 | 2 | { |
3494 | 2 | asdl_seq *expr_list; |
3495 | | |
3496 | | /* del_stmt: 'del' exprlist */ |
3497 | 2 | REQ(n, del_stmt); |
3498 | | |
3499 | 2 | expr_list = ast_for_exprlist(c, CHILD(n, 1), Del); |
3500 | 2 | if (!expr_list) |
3501 | 0 | return NULL; |
3502 | 2 | return Delete(expr_list, LINENO(n), n->n_col_offset, |
3503 | 2 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3504 | 2 | } |
3505 | | |
3506 | | static stmt_ty |
3507 | | ast_for_flow_stmt(struct compiling *c, const node *n) |
3508 | 8 | { |
3509 | | /* |
3510 | | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt |
3511 | | | yield_stmt |
3512 | | break_stmt: 'break' |
3513 | | continue_stmt: 'continue' |
3514 | | return_stmt: 'return' [testlist] |
3515 | | yield_stmt: yield_expr |
3516 | | yield_expr: 'yield' testlist | 'yield' 'from' test |
3517 | | raise_stmt: 'raise' [test [',' test [',' test]]] |
3518 | | */ |
3519 | 8 | node *ch; |
3520 | | |
3521 | 8 | REQ(n, flow_stmt); |
3522 | 8 | ch = CHILD(n, 0); |
3523 | 8 | switch (TYPE(ch)) { |
3524 | 0 | case break_stmt: |
3525 | 0 | return Break(LINENO(n), n->n_col_offset, |
3526 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3527 | 0 | case continue_stmt: |
3528 | 0 | return Continue(LINENO(n), n->n_col_offset, |
3529 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3530 | 0 | case yield_stmt: { /* will reduce to yield_expr */ |
3531 | 0 | expr_ty exp = ast_for_expr(c, CHILD(ch, 0)); |
3532 | 0 | if (!exp) |
3533 | 0 | return NULL; |
3534 | 0 | return Expr(exp, LINENO(n), n->n_col_offset, |
3535 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3536 | 0 | } |
3537 | 6 | case return_stmt: |
3538 | 6 | if (NCH(ch) == 1) |
3539 | 0 | return Return(NULL, LINENO(n), n->n_col_offset, |
3540 | 6 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3541 | 6 | else { |
3542 | 6 | expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); |
3543 | 6 | if (!expression) |
3544 | 0 | return NULL; |
3545 | 6 | return Return(expression, LINENO(n), n->n_col_offset, |
3546 | 6 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3547 | 6 | } |
3548 | 2 | case raise_stmt: |
3549 | 2 | if (NCH(ch) == 1) |
3550 | 2 | return Raise(NULL, NULL, LINENO(n), n->n_col_offset, |
3551 | 2 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3552 | 0 | else if (NCH(ch) >= 2) { |
3553 | 0 | expr_ty cause = NULL; |
3554 | 0 | expr_ty expression = ast_for_expr(c, CHILD(ch, 1)); |
3555 | 0 | if (!expression) |
3556 | 0 | return NULL; |
3557 | 0 | if (NCH(ch) == 4) { |
3558 | 0 | cause = ast_for_expr(c, CHILD(ch, 3)); |
3559 | 0 | if (!cause) |
3560 | 0 | return NULL; |
3561 | 0 | } |
3562 | 0 | return Raise(expression, cause, LINENO(n), n->n_col_offset, |
3563 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3564 | 0 | } |
3565 | | /* fall through */ |
3566 | 0 | default: |
3567 | 0 | PyErr_Format(PyExc_SystemError, |
3568 | 0 | "unexpected flow_stmt: %d", TYPE(ch)); |
3569 | 0 | return NULL; |
3570 | 8 | } |
3571 | 8 | } |
3572 | | |
3573 | | static alias_ty |
3574 | | alias_for_import_name(struct compiling *c, const node *n, int store) |
3575 | 0 | { |
3576 | | /* |
3577 | | import_as_name: NAME ['as' NAME] |
3578 | | dotted_as_name: dotted_name ['as' NAME] |
3579 | | dotted_name: NAME ('.' NAME)* |
3580 | | */ |
3581 | 0 | identifier str, name; |
3582 | |
|
3583 | 0 | loop: |
3584 | 0 | switch (TYPE(n)) { |
3585 | 0 | case import_as_name: { |
3586 | 0 | node *name_node = CHILD(n, 0); |
3587 | 0 | str = NULL; |
3588 | 0 | name = NEW_IDENTIFIER(name_node); |
3589 | 0 | if (!name) |
3590 | 0 | return NULL; |
3591 | 0 | if (NCH(n) == 3) { |
3592 | 0 | node *str_node = CHILD(n, 2); |
3593 | 0 | str = NEW_IDENTIFIER(str_node); |
3594 | 0 | if (!str) |
3595 | 0 | return NULL; |
3596 | 0 | if (store && forbidden_name(c, str, str_node, 0)) |
3597 | 0 | return NULL; |
3598 | 0 | } |
3599 | 0 | else { |
3600 | 0 | if (forbidden_name(c, name, name_node, 0)) |
3601 | 0 | return NULL; |
3602 | 0 | } |
3603 | 0 | return alias(name, str, c->c_arena); |
3604 | 0 | } |
3605 | 0 | case dotted_as_name: |
3606 | 0 | if (NCH(n) == 1) { |
3607 | 0 | n = CHILD(n, 0); |
3608 | 0 | goto loop; |
3609 | 0 | } |
3610 | 0 | else { |
3611 | 0 | node *asname_node = CHILD(n, 2); |
3612 | 0 | alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0); |
3613 | 0 | if (!a) |
3614 | 0 | return NULL; |
3615 | 0 | assert(!a->asname); |
3616 | 0 | a->asname = NEW_IDENTIFIER(asname_node); |
3617 | 0 | if (!a->asname) |
3618 | 0 | return NULL; |
3619 | 0 | if (forbidden_name(c, a->asname, asname_node, 0)) |
3620 | 0 | return NULL; |
3621 | 0 | return a; |
3622 | 0 | } |
3623 | 0 | case dotted_name: |
3624 | 0 | if (NCH(n) == 1) { |
3625 | 0 | node *name_node = CHILD(n, 0); |
3626 | 0 | name = NEW_IDENTIFIER(name_node); |
3627 | 0 | if (!name) |
3628 | 0 | return NULL; |
3629 | 0 | if (store && forbidden_name(c, name, name_node, 0)) |
3630 | 0 | return NULL; |
3631 | 0 | return alias(name, NULL, c->c_arena); |
3632 | 0 | } |
3633 | 0 | else { |
3634 | | /* Create a string of the form "a.b.c" */ |
3635 | 0 | int i; |
3636 | 0 | size_t len; |
3637 | 0 | char *s; |
3638 | 0 | PyObject *uni; |
3639 | |
|
3640 | 0 | len = 0; |
3641 | 0 | for (i = 0; i < NCH(n); i += 2) |
3642 | | /* length of string plus one for the dot */ |
3643 | 0 | len += strlen(STR(CHILD(n, i))) + 1; |
3644 | 0 | len--; /* the last name doesn't have a dot */ |
3645 | 0 | str = PyBytes_FromStringAndSize(NULL, len); |
3646 | 0 | if (!str) |
3647 | 0 | return NULL; |
3648 | 0 | s = PyBytes_AS_STRING(str); |
3649 | 0 | if (!s) |
3650 | 0 | return NULL; |
3651 | 0 | for (i = 0; i < NCH(n); i += 2) { |
3652 | 0 | char *sch = STR(CHILD(n, i)); |
3653 | 0 | strcpy(s, STR(CHILD(n, i))); |
3654 | 0 | s += strlen(sch); |
3655 | 0 | *s++ = '.'; |
3656 | 0 | } |
3657 | 0 | --s; |
3658 | 0 | *s = '\0'; |
3659 | 0 | uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), |
3660 | 0 | PyBytes_GET_SIZE(str), |
3661 | 0 | NULL); |
3662 | 0 | Py_DECREF(str); |
3663 | 0 | if (!uni) |
3664 | 0 | return NULL; |
3665 | 0 | str = uni; |
3666 | 0 | PyUnicode_InternInPlace(&str); |
3667 | 0 | if (PyArena_AddPyObject(c->c_arena, str) < 0) { |
3668 | 0 | Py_DECREF(str); |
3669 | 0 | return NULL; |
3670 | 0 | } |
3671 | 0 | return alias(str, NULL, c->c_arena); |
3672 | 0 | } |
3673 | 0 | case STAR: |
3674 | 0 | str = PyUnicode_InternFromString("*"); |
3675 | 0 | if (!str) |
3676 | 0 | return NULL; |
3677 | 0 | if (PyArena_AddPyObject(c->c_arena, str) < 0) { |
3678 | 0 | Py_DECREF(str); |
3679 | 0 | return NULL; |
3680 | 0 | } |
3681 | 0 | return alias(str, NULL, c->c_arena); |
3682 | 0 | default: |
3683 | 0 | PyErr_Format(PyExc_SystemError, |
3684 | 0 | "unexpected import name: %d", TYPE(n)); |
3685 | 0 | return NULL; |
3686 | 0 | } |
3687 | | |
3688 | 0 | PyErr_SetString(PyExc_SystemError, "unhandled import name condition"); |
3689 | 0 | return NULL; |
3690 | 0 | } |
3691 | | |
3692 | | static stmt_ty |
3693 | | ast_for_import_stmt(struct compiling *c, const node *n) |
3694 | 0 | { |
3695 | | /* |
3696 | | import_stmt: import_name | import_from |
3697 | | import_name: 'import' dotted_as_names |
3698 | | import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) |
3699 | | 'import' ('*' | '(' import_as_names ')' | import_as_names) |
3700 | | */ |
3701 | 0 | int lineno; |
3702 | 0 | int col_offset; |
3703 | 0 | int i; |
3704 | 0 | asdl_seq *aliases; |
3705 | |
|
3706 | 0 | REQ(n, import_stmt); |
3707 | 0 | lineno = LINENO(n); |
3708 | 0 | col_offset = n->n_col_offset; |
3709 | 0 | n = CHILD(n, 0); |
3710 | 0 | if (TYPE(n) == import_name) { |
3711 | 0 | n = CHILD(n, 1); |
3712 | 0 | REQ(n, dotted_as_names); |
3713 | 0 | aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); |
3714 | 0 | if (!aliases) |
3715 | 0 | return NULL; |
3716 | 0 | for (i = 0; i < NCH(n); i += 2) { |
3717 | 0 | alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); |
3718 | 0 | if (!import_alias) |
3719 | 0 | return NULL; |
3720 | 0 | asdl_seq_SET(aliases, i / 2, import_alias); |
3721 | 0 | } |
3722 | | // Even though n is modified above, the end position is not changed |
3723 | 0 | return Import(aliases, lineno, col_offset, |
3724 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3725 | 0 | } |
3726 | 0 | else if (TYPE(n) == import_from) { |
3727 | 0 | int n_children; |
3728 | 0 | int idx, ndots = 0; |
3729 | 0 | const node *n_copy = n; |
3730 | 0 | alias_ty mod = NULL; |
3731 | 0 | identifier modname = NULL; |
3732 | | |
3733 | | /* Count the number of dots (for relative imports) and check for the |
3734 | | optional module name */ |
3735 | 0 | for (idx = 1; idx < NCH(n); idx++) { |
3736 | 0 | if (TYPE(CHILD(n, idx)) == dotted_name) { |
3737 | 0 | mod = alias_for_import_name(c, CHILD(n, idx), 0); |
3738 | 0 | if (!mod) |
3739 | 0 | return NULL; |
3740 | 0 | idx++; |
3741 | 0 | break; |
3742 | 0 | } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) { |
3743 | | /* three consecutive dots are tokenized as one ELLIPSIS */ |
3744 | 0 | ndots += 3; |
3745 | 0 | continue; |
3746 | 0 | } else if (TYPE(CHILD(n, idx)) != DOT) { |
3747 | 0 | break; |
3748 | 0 | } |
3749 | 0 | ndots++; |
3750 | 0 | } |
3751 | 0 | idx++; /* skip over the 'import' keyword */ |
3752 | 0 | switch (TYPE(CHILD(n, idx))) { |
3753 | 0 | case STAR: |
3754 | | /* from ... import * */ |
3755 | 0 | n = CHILD(n, idx); |
3756 | 0 | n_children = 1; |
3757 | 0 | break; |
3758 | 0 | case LPAR: |
3759 | | /* from ... import (x, y, z) */ |
3760 | 0 | n = CHILD(n, idx + 1); |
3761 | 0 | n_children = NCH(n); |
3762 | 0 | break; |
3763 | 0 | case import_as_names: |
3764 | | /* from ... import x, y, z */ |
3765 | 0 | n = CHILD(n, idx); |
3766 | 0 | n_children = NCH(n); |
3767 | 0 | if (n_children % 2 == 0) { |
3768 | 0 | ast_error(c, n, |
3769 | 0 | "trailing comma not allowed without" |
3770 | 0 | " surrounding parentheses"); |
3771 | 0 | return NULL; |
3772 | 0 | } |
3773 | 0 | break; |
3774 | 0 | default: |
3775 | 0 | ast_error(c, n, "Unexpected node-type in from-import"); |
3776 | 0 | return NULL; |
3777 | 0 | } |
3778 | | |
3779 | 0 | aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena); |
3780 | 0 | if (!aliases) |
3781 | 0 | return NULL; |
3782 | | |
3783 | | /* handle "from ... import *" special b/c there's no children */ |
3784 | 0 | if (TYPE(n) == STAR) { |
3785 | 0 | alias_ty import_alias = alias_for_import_name(c, n, 1); |
3786 | 0 | if (!import_alias) |
3787 | 0 | return NULL; |
3788 | 0 | asdl_seq_SET(aliases, 0, import_alias); |
3789 | 0 | } |
3790 | 0 | else { |
3791 | 0 | for (i = 0; i < NCH(n); i += 2) { |
3792 | 0 | alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); |
3793 | 0 | if (!import_alias) |
3794 | 0 | return NULL; |
3795 | 0 | asdl_seq_SET(aliases, i / 2, import_alias); |
3796 | 0 | } |
3797 | 0 | } |
3798 | 0 | if (mod != NULL) |
3799 | 0 | modname = mod->name; |
3800 | 0 | return ImportFrom(modname, aliases, ndots, lineno, col_offset, |
3801 | 0 | n_copy->n_end_lineno, n_copy->n_end_col_offset, |
3802 | 0 | c->c_arena); |
3803 | 0 | } |
3804 | 0 | PyErr_Format(PyExc_SystemError, |
3805 | 0 | "unknown import statement: starts with command '%s'", |
3806 | 0 | STR(CHILD(n, 0))); |
3807 | 0 | return NULL; |
3808 | 0 | } |
3809 | | |
3810 | | static stmt_ty |
3811 | | ast_for_global_stmt(struct compiling *c, const node *n) |
3812 | 0 | { |
3813 | | /* global_stmt: 'global' NAME (',' NAME)* */ |
3814 | 0 | identifier name; |
3815 | 0 | asdl_seq *s; |
3816 | 0 | int i; |
3817 | |
|
3818 | 0 | REQ(n, global_stmt); |
3819 | 0 | s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); |
3820 | 0 | if (!s) |
3821 | 0 | return NULL; |
3822 | 0 | for (i = 1; i < NCH(n); i += 2) { |
3823 | 0 | name = NEW_IDENTIFIER(CHILD(n, i)); |
3824 | 0 | if (!name) |
3825 | 0 | return NULL; |
3826 | 0 | asdl_seq_SET(s, i / 2, name); |
3827 | 0 | } |
3828 | 0 | return Global(s, LINENO(n), n->n_col_offset, |
3829 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3830 | 0 | } |
3831 | | |
3832 | | static stmt_ty |
3833 | | ast_for_nonlocal_stmt(struct compiling *c, const node *n) |
3834 | 0 | { |
3835 | | /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */ |
3836 | 0 | identifier name; |
3837 | 0 | asdl_seq *s; |
3838 | 0 | int i; |
3839 | |
|
3840 | 0 | REQ(n, nonlocal_stmt); |
3841 | 0 | s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena); |
3842 | 0 | if (!s) |
3843 | 0 | return NULL; |
3844 | 0 | for (i = 1; i < NCH(n); i += 2) { |
3845 | 0 | name = NEW_IDENTIFIER(CHILD(n, i)); |
3846 | 0 | if (!name) |
3847 | 0 | return NULL; |
3848 | 0 | asdl_seq_SET(s, i / 2, name); |
3849 | 0 | } |
3850 | 0 | return Nonlocal(s, LINENO(n), n->n_col_offset, |
3851 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3852 | 0 | } |
3853 | | |
3854 | | static stmt_ty |
3855 | | ast_for_assert_stmt(struct compiling *c, const node *n) |
3856 | 6 | { |
3857 | | /* assert_stmt: 'assert' test [',' test] */ |
3858 | 6 | REQ(n, assert_stmt); |
3859 | 6 | if (NCH(n) == 2) { |
3860 | 0 | expr_ty expression = ast_for_expr(c, CHILD(n, 1)); |
3861 | 0 | if (!expression) |
3862 | 0 | return NULL; |
3863 | 0 | return Assert(expression, NULL, LINENO(n), n->n_col_offset, |
3864 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3865 | 0 | } |
3866 | 6 | else if (NCH(n) == 4) { |
3867 | 6 | expr_ty expr1, expr2; |
3868 | | |
3869 | 6 | expr1 = ast_for_expr(c, CHILD(n, 1)); |
3870 | 6 | if (!expr1) |
3871 | 0 | return NULL; |
3872 | 6 | expr2 = ast_for_expr(c, CHILD(n, 3)); |
3873 | 6 | if (!expr2) |
3874 | 0 | return NULL; |
3875 | | |
3876 | 6 | return Assert(expr1, expr2, LINENO(n), n->n_col_offset, |
3877 | 6 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
3878 | 6 | } |
3879 | 0 | PyErr_Format(PyExc_SystemError, |
3880 | 0 | "improper number of parts to 'assert' statement: %d", |
3881 | 0 | NCH(n)); |
3882 | 0 | return NULL; |
3883 | 6 | } |
3884 | | |
3885 | | static asdl_seq * |
3886 | | ast_for_suite(struct compiling *c, const node *n) |
3887 | 60 | { |
3888 | | /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */ |
3889 | 60 | asdl_seq *seq; |
3890 | 60 | stmt_ty s; |
3891 | 60 | int i, total, num, end, pos = 0; |
3892 | 60 | node *ch; |
3893 | | |
3894 | 60 | if (TYPE(n) != func_body_suite) { |
3895 | 60 | REQ(n, suite); |
3896 | 60 | } |
3897 | | |
3898 | 60 | total = num_stmts(n); |
3899 | 60 | seq = _Py_asdl_seq_new(total, c->c_arena); |
3900 | 60 | if (!seq) |
3901 | 0 | return NULL; |
3902 | 60 | if (TYPE(CHILD(n, 0)) == simple_stmt) { |
3903 | 2 | n = CHILD(n, 0); |
3904 | | /* simple_stmt always ends with a NEWLINE, |
3905 | | and may have a trailing SEMI |
3906 | | */ |
3907 | 2 | end = NCH(n) - 1; |
3908 | 2 | if (TYPE(CHILD(n, end - 1)) == SEMI) |
3909 | 0 | end--; |
3910 | | /* loop by 2 to skip semi-colons */ |
3911 | 4 | for (i = 0; i < end; i += 2) { |
3912 | 2 | ch = CHILD(n, i); |
3913 | 2 | s = ast_for_stmt(c, ch); |
3914 | 2 | if (!s) |
3915 | 0 | return NULL; |
3916 | 2 | asdl_seq_SET(seq, pos++, s); |
3917 | 2 | } |
3918 | 2 | } |
3919 | 58 | else { |
3920 | 58 | i = 2; |
3921 | 58 | if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) { |
3922 | 0 | i += 2; |
3923 | 0 | REQ(CHILD(n, 2), NEWLINE); |
3924 | 0 | } |
3925 | | |
3926 | 176 | for (; i < (NCH(n) - 1); i++) { |
3927 | 118 | ch = CHILD(n, i); |
3928 | 118 | REQ(ch, stmt); |
3929 | 118 | num = num_stmts(ch); |
3930 | 118 | if (num == 1) { |
3931 | | /* small_stmt or compound_stmt with only one child */ |
3932 | 118 | s = ast_for_stmt(c, ch); |
3933 | 118 | if (!s) |
3934 | 0 | return NULL; |
3935 | 118 | asdl_seq_SET(seq, pos++, s); |
3936 | 118 | } |
3937 | 0 | else { |
3938 | 0 | int j; |
3939 | 0 | ch = CHILD(ch, 0); |
3940 | 0 | REQ(ch, simple_stmt); |
3941 | 0 | for (j = 0; j < NCH(ch); j += 2) { |
3942 | | /* statement terminates with a semi-colon ';' */ |
3943 | 0 | if (NCH(CHILD(ch, j)) == 0) { |
3944 | 0 | assert((j + 1) == NCH(ch)); |
3945 | 0 | break; |
3946 | 0 | } |
3947 | 0 | s = ast_for_stmt(c, CHILD(ch, j)); |
3948 | 0 | if (!s) |
3949 | 0 | return NULL; |
3950 | 0 | asdl_seq_SET(seq, pos++, s); |
3951 | 0 | } |
3952 | 0 | } |
3953 | 118 | } |
3954 | 58 | } |
3955 | 60 | assert(pos == seq->size); |
3956 | 60 | return seq; |
3957 | 60 | } |
3958 | | |
3959 | | static void |
3960 | | get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset) |
3961 | 40 | { |
3962 | 40 | Py_ssize_t tot = asdl_seq_LEN(s); |
3963 | | // There must be no empty suites. |
3964 | 40 | assert(tot > 0); |
3965 | 40 | stmt_ty last = asdl_seq_GET(s, tot - 1); |
3966 | 40 | *end_lineno = last->end_lineno; |
3967 | 40 | *end_col_offset = last->end_col_offset; |
3968 | 40 | } |
3969 | | |
3970 | | static stmt_ty |
3971 | | ast_for_if_stmt(struct compiling *c, const node *n) |
3972 | 6 | { |
3973 | | /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)* |
3974 | | ['else' ':' suite] |
3975 | | */ |
3976 | 6 | char *s; |
3977 | 6 | int end_lineno, end_col_offset; |
3978 | | |
3979 | 6 | REQ(n, if_stmt); |
3980 | | |
3981 | 6 | if (NCH(n) == 4) { |
3982 | 4 | expr_ty expression; |
3983 | 4 | asdl_seq *suite_seq; |
3984 | | |
3985 | 4 | expression = ast_for_expr(c, CHILD(n, 1)); |
3986 | 4 | if (!expression) |
3987 | 0 | return NULL; |
3988 | 4 | suite_seq = ast_for_suite(c, CHILD(n, 3)); |
3989 | 4 | if (!suite_seq) |
3990 | 0 | return NULL; |
3991 | 4 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
3992 | | |
3993 | 4 | return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, |
3994 | 4 | end_lineno, end_col_offset, c->c_arena); |
3995 | 4 | } |
3996 | | |
3997 | 2 | s = STR(CHILD(n, 4)); |
3998 | | /* s[2], the third character in the string, will be |
3999 | | 's' for el_s_e, or |
4000 | | 'i' for el_i_f |
4001 | | */ |
4002 | 2 | if (s[2] == 's') { |
4003 | 0 | expr_ty expression; |
4004 | 0 | asdl_seq *seq1, *seq2; |
4005 | |
|
4006 | 0 | expression = ast_for_expr(c, CHILD(n, 1)); |
4007 | 0 | if (!expression) |
4008 | 0 | return NULL; |
4009 | 0 | seq1 = ast_for_suite(c, CHILD(n, 3)); |
4010 | 0 | if (!seq1) |
4011 | 0 | return NULL; |
4012 | 0 | seq2 = ast_for_suite(c, CHILD(n, 6)); |
4013 | 0 | if (!seq2) |
4014 | 0 | return NULL; |
4015 | 0 | get_last_end_pos(seq2, &end_lineno, &end_col_offset); |
4016 | |
|
4017 | 0 | return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, |
4018 | 0 | end_lineno, end_col_offset, c->c_arena); |
4019 | 0 | } |
4020 | 2 | else if (s[2] == 'i') { |
4021 | 2 | int i, n_elif, has_else = 0; |
4022 | 2 | expr_ty expression; |
4023 | 2 | asdl_seq *suite_seq; |
4024 | 2 | asdl_seq *orelse = NULL; |
4025 | 2 | n_elif = NCH(n) - 4; |
4026 | | /* must reference the child n_elif+1 since 'else' token is third, |
4027 | | not fourth, child from the end. */ |
4028 | 2 | if (TYPE(CHILD(n, (n_elif + 1))) == NAME |
4029 | 2 | && STR(CHILD(n, (n_elif + 1)))[2] == 's') { |
4030 | 2 | has_else = 1; |
4031 | 2 | n_elif -= 3; |
4032 | 2 | } |
4033 | 2 | n_elif /= 4; |
4034 | | |
4035 | 2 | if (has_else) { |
4036 | 2 | asdl_seq *suite_seq2; |
4037 | | |
4038 | 2 | orelse = _Py_asdl_seq_new(1, c->c_arena); |
4039 | 2 | if (!orelse) |
4040 | 0 | return NULL; |
4041 | 2 | expression = ast_for_expr(c, CHILD(n, NCH(n) - 6)); |
4042 | 2 | if (!expression) |
4043 | 0 | return NULL; |
4044 | 2 | suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4)); |
4045 | 2 | if (!suite_seq) |
4046 | 0 | return NULL; |
4047 | 2 | suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1)); |
4048 | 2 | if (!suite_seq2) |
4049 | 0 | return NULL; |
4050 | 2 | get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset); |
4051 | | |
4052 | 2 | asdl_seq_SET(orelse, 0, |
4053 | 2 | If(expression, suite_seq, suite_seq2, |
4054 | 2 | LINENO(CHILD(n, NCH(n) - 7)), |
4055 | 2 | CHILD(n, NCH(n) - 7)->n_col_offset, |
4056 | 2 | end_lineno, end_col_offset, c->c_arena)); |
4057 | | /* the just-created orelse handled the last elif */ |
4058 | 2 | n_elif--; |
4059 | 2 | } |
4060 | | |
4061 | 2 | for (i = 0; i < n_elif; i++) { |
4062 | 0 | int off = 5 + (n_elif - i - 1) * 4; |
4063 | 0 | asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena); |
4064 | 0 | if (!newobj) |
4065 | 0 | return NULL; |
4066 | 0 | expression = ast_for_expr(c, CHILD(n, off)); |
4067 | 0 | if (!expression) |
4068 | 0 | return NULL; |
4069 | 0 | suite_seq = ast_for_suite(c, CHILD(n, off + 2)); |
4070 | 0 | if (!suite_seq) |
4071 | 0 | return NULL; |
4072 | | |
4073 | 0 | if (orelse != NULL) { |
4074 | 0 | get_last_end_pos(orelse, &end_lineno, &end_col_offset); |
4075 | 0 | } else { |
4076 | 0 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4077 | 0 | } |
4078 | 0 | asdl_seq_SET(newobj, 0, |
4079 | 0 | If(expression, suite_seq, orelse, |
4080 | 0 | LINENO(CHILD(n, off - 1)), |
4081 | 0 | CHILD(n, off - 1)->n_col_offset, |
4082 | 0 | end_lineno, end_col_offset, c->c_arena)); |
4083 | 0 | orelse = newobj; |
4084 | 0 | } |
4085 | 2 | expression = ast_for_expr(c, CHILD(n, 1)); |
4086 | 2 | if (!expression) |
4087 | 0 | return NULL; |
4088 | 2 | suite_seq = ast_for_suite(c, CHILD(n, 3)); |
4089 | 2 | if (!suite_seq) |
4090 | 0 | return NULL; |
4091 | 2 | get_last_end_pos(orelse, &end_lineno, &end_col_offset); |
4092 | 2 | return If(expression, suite_seq, orelse, |
4093 | 2 | LINENO(n), n->n_col_offset, |
4094 | 2 | end_lineno, end_col_offset, c->c_arena); |
4095 | 2 | } |
4096 | | |
4097 | 0 | PyErr_Format(PyExc_SystemError, |
4098 | 0 | "unexpected token in 'if' statement: %s", s); |
4099 | 0 | return NULL; |
4100 | 2 | } |
4101 | | |
4102 | | static stmt_ty |
4103 | | ast_for_while_stmt(struct compiling *c, const node *n) |
4104 | 0 | { |
4105 | | /* while_stmt: 'while' test ':' suite ['else' ':' suite] */ |
4106 | 0 | REQ(n, while_stmt); |
4107 | 0 | int end_lineno, end_col_offset; |
4108 | |
|
4109 | 0 | if (NCH(n) == 4) { |
4110 | 0 | expr_ty expression; |
4111 | 0 | asdl_seq *suite_seq; |
4112 | |
|
4113 | 0 | expression = ast_for_expr(c, CHILD(n, 1)); |
4114 | 0 | if (!expression) |
4115 | 0 | return NULL; |
4116 | 0 | suite_seq = ast_for_suite(c, CHILD(n, 3)); |
4117 | 0 | if (!suite_seq) |
4118 | 0 | return NULL; |
4119 | 0 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4120 | 0 | return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, |
4121 | 0 | end_lineno, end_col_offset, c->c_arena); |
4122 | 0 | } |
4123 | 0 | else if (NCH(n) == 7) { |
4124 | 0 | expr_ty expression; |
4125 | 0 | asdl_seq *seq1, *seq2; |
4126 | |
|
4127 | 0 | expression = ast_for_expr(c, CHILD(n, 1)); |
4128 | 0 | if (!expression) |
4129 | 0 | return NULL; |
4130 | 0 | seq1 = ast_for_suite(c, CHILD(n, 3)); |
4131 | 0 | if (!seq1) |
4132 | 0 | return NULL; |
4133 | 0 | seq2 = ast_for_suite(c, CHILD(n, 6)); |
4134 | 0 | if (!seq2) |
4135 | 0 | return NULL; |
4136 | 0 | get_last_end_pos(seq2, &end_lineno, &end_col_offset); |
4137 | |
|
4138 | 0 | return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, |
4139 | 0 | end_lineno, end_col_offset, c->c_arena); |
4140 | 0 | } |
4141 | | |
4142 | 0 | PyErr_Format(PyExc_SystemError, |
4143 | 0 | "wrong number of tokens for 'while' statement: %d", |
4144 | 0 | NCH(n)); |
4145 | 0 | return NULL; |
4146 | 0 | } |
4147 | | |
4148 | | static stmt_ty |
4149 | | ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async) |
4150 | 8 | { |
4151 | 8 | const node * const n = is_async ? CHILD(n0, 1) : n0; |
4152 | 8 | asdl_seq *_target, *seq = NULL, *suite_seq; |
4153 | 8 | expr_ty expression; |
4154 | 8 | expr_ty target, first; |
4155 | 8 | const node *node_target; |
4156 | 8 | int end_lineno, end_col_offset; |
4157 | 8 | int has_type_comment; |
4158 | 8 | string type_comment; |
4159 | | |
4160 | 8 | if (is_async && c->c_feature_version < 5) { |
4161 | 0 | ast_error(c, n, |
4162 | 0 | "Async for loops are only supported in Python 3.5 and greater"); |
4163 | 0 | return NULL; |
4164 | 0 | } |
4165 | | |
4166 | | /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */ |
4167 | 8 | REQ(n, for_stmt); |
4168 | | |
4169 | 8 | has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT; |
4170 | | |
4171 | 8 | if (NCH(n) == 9 + has_type_comment) { |
4172 | 0 | seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment)); |
4173 | 0 | if (!seq) |
4174 | 0 | return NULL; |
4175 | 0 | } |
4176 | | |
4177 | 8 | node_target = CHILD(n, 1); |
4178 | 8 | _target = ast_for_exprlist(c, node_target, Store); |
4179 | 8 | if (!_target) |
4180 | 0 | return NULL; |
4181 | | /* Check the # of children rather than the length of _target, since |
4182 | | for x, in ... has 1 element in _target, but still requires a Tuple. */ |
4183 | 8 | first = (expr_ty)asdl_seq_GET(_target, 0); |
4184 | 8 | if (NCH(node_target) == 1) |
4185 | 6 | target = first; |
4186 | 2 | else |
4187 | 2 | target = Tuple(_target, Store, first->lineno, first->col_offset, |
4188 | 8 | node_target->n_end_lineno, node_target->n_end_col_offset, |
4189 | 8 | c->c_arena); |
4190 | | |
4191 | 8 | expression = ast_for_testlist(c, CHILD(n, 3)); |
4192 | 8 | if (!expression) |
4193 | 0 | return NULL; |
4194 | 8 | suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment)); |
4195 | 8 | if (!suite_seq) |
4196 | 0 | return NULL; |
4197 | | |
4198 | 8 | if (seq != NULL) { |
4199 | 0 | get_last_end_pos(seq, &end_lineno, &end_col_offset); |
4200 | 8 | } else { |
4201 | 8 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4202 | 8 | } |
4203 | | |
4204 | 8 | if (has_type_comment) { |
4205 | 0 | type_comment = NEW_TYPE_COMMENT(CHILD(n, 5)); |
4206 | 0 | if (!type_comment) |
4207 | 0 | return NULL; |
4208 | 0 | } |
4209 | 8 | else |
4210 | 8 | type_comment = NULL; |
4211 | | |
4212 | 8 | if (is_async) |
4213 | 0 | return AsyncFor(target, expression, suite_seq, seq, type_comment, |
4214 | 8 | LINENO(n0), n0->n_col_offset, |
4215 | 8 | end_lineno, end_col_offset, c->c_arena); |
4216 | 8 | else |
4217 | 8 | return For(target, expression, suite_seq, seq, type_comment, |
4218 | 8 | LINENO(n), n->n_col_offset, |
4219 | 8 | end_lineno, end_col_offset, c->c_arena); |
4220 | 8 | } |
4221 | | |
4222 | | static excepthandler_ty |
4223 | | ast_for_except_clause(struct compiling *c, const node *exc, node *body) |
4224 | 20 | { |
4225 | | /* except_clause: 'except' [test ['as' test]] */ |
4226 | 20 | int end_lineno, end_col_offset; |
4227 | 20 | REQ(exc, except_clause); |
4228 | 20 | REQ(body, suite); |
4229 | | |
4230 | 20 | if (NCH(exc) == 1) { |
4231 | 0 | asdl_seq *suite_seq = ast_for_suite(c, body); |
4232 | 0 | if (!suite_seq) |
4233 | 0 | return NULL; |
4234 | 0 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4235 | |
|
4236 | 0 | return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc), |
4237 | 0 | exc->n_col_offset, |
4238 | 0 | end_lineno, end_col_offset, c->c_arena); |
4239 | 0 | } |
4240 | 20 | else if (NCH(exc) == 2) { |
4241 | 6 | expr_ty expression; |
4242 | 6 | asdl_seq *suite_seq; |
4243 | | |
4244 | 6 | expression = ast_for_expr(c, CHILD(exc, 1)); |
4245 | 6 | if (!expression) |
4246 | 0 | return NULL; |
4247 | 6 | suite_seq = ast_for_suite(c, body); |
4248 | 6 | if (!suite_seq) |
4249 | 0 | return NULL; |
4250 | 6 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4251 | | |
4252 | 6 | return ExceptHandler(expression, NULL, suite_seq, LINENO(exc), |
4253 | 6 | exc->n_col_offset, |
4254 | 6 | end_lineno, end_col_offset, c->c_arena); |
4255 | 6 | } |
4256 | 14 | else if (NCH(exc) == 4) { |
4257 | 14 | asdl_seq *suite_seq; |
4258 | 14 | expr_ty expression; |
4259 | 14 | identifier e = NEW_IDENTIFIER(CHILD(exc, 3)); |
4260 | 14 | if (!e) |
4261 | 0 | return NULL; |
4262 | 14 | if (forbidden_name(c, e, CHILD(exc, 3), 0)) |
4263 | 0 | return NULL; |
4264 | 14 | expression = ast_for_expr(c, CHILD(exc, 1)); |
4265 | 14 | if (!expression) |
4266 | 0 | return NULL; |
4267 | 14 | suite_seq = ast_for_suite(c, body); |
4268 | 14 | if (!suite_seq) |
4269 | 0 | return NULL; |
4270 | 14 | get_last_end_pos(suite_seq, &end_lineno, &end_col_offset); |
4271 | | |
4272 | 14 | return ExceptHandler(expression, e, suite_seq, LINENO(exc), |
4273 | 14 | exc->n_col_offset, |
4274 | 14 | end_lineno, end_col_offset, c->c_arena); |
4275 | 14 | } |
4276 | | |
4277 | 0 | PyErr_Format(PyExc_SystemError, |
4278 | 0 | "wrong number of children for 'except' clause: %d", |
4279 | 0 | NCH(exc)); |
4280 | 0 | return NULL; |
4281 | 20 | } |
4282 | | |
4283 | | static stmt_ty |
4284 | | ast_for_try_stmt(struct compiling *c, const node *n) |
4285 | 18 | { |
4286 | 18 | const int nch = NCH(n); |
4287 | 18 | int end_lineno, end_col_offset, n_except = (nch - 3)/3; |
4288 | 18 | asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL; |
4289 | 18 | excepthandler_ty last_handler; |
4290 | | |
4291 | 18 | REQ(n, try_stmt); |
4292 | | |
4293 | 18 | body = ast_for_suite(c, CHILD(n, 2)); |
4294 | 18 | if (body == NULL) |
4295 | 0 | return NULL; |
4296 | | |
4297 | 18 | if (TYPE(CHILD(n, nch - 3)) == NAME) { |
4298 | 0 | if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) { |
4299 | 0 | if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { |
4300 | | /* we can assume it's an "else", |
4301 | | because nch >= 9 for try-else-finally and |
4302 | | it would otherwise have a type of except_clause */ |
4303 | 0 | orelse = ast_for_suite(c, CHILD(n, nch - 4)); |
4304 | 0 | if (orelse == NULL) |
4305 | 0 | return NULL; |
4306 | 0 | n_except--; |
4307 | 0 | } |
4308 | | |
4309 | 0 | finally = ast_for_suite(c, CHILD(n, nch - 1)); |
4310 | 0 | if (finally == NULL) |
4311 | 0 | return NULL; |
4312 | 0 | n_except--; |
4313 | 0 | } |
4314 | 0 | else { |
4315 | | /* we can assume it's an "else", |
4316 | | otherwise it would have a type of except_clause */ |
4317 | 0 | orelse = ast_for_suite(c, CHILD(n, nch - 1)); |
4318 | 0 | if (orelse == NULL) |
4319 | 0 | return NULL; |
4320 | 0 | n_except--; |
4321 | 0 | } |
4322 | 0 | } |
4323 | 18 | else if (TYPE(CHILD(n, nch - 3)) != except_clause) { |
4324 | 0 | ast_error(c, n, "malformed 'try' statement"); |
4325 | 0 | return NULL; |
4326 | 0 | } |
4327 | | |
4328 | 18 | if (n_except > 0) { |
4329 | 18 | int i; |
4330 | | /* process except statements to create a try ... except */ |
4331 | 18 | handlers = _Py_asdl_seq_new(n_except, c->c_arena); |
4332 | 18 | if (handlers == NULL) |
4333 | 0 | return NULL; |
4334 | | |
4335 | 38 | for (i = 0; i < n_except; i++) { |
4336 | 20 | excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3), |
4337 | 20 | CHILD(n, 5 + i * 3)); |
4338 | 20 | if (!e) |
4339 | 0 | return NULL; |
4340 | 20 | asdl_seq_SET(handlers, i, e); |
4341 | 20 | } |
4342 | 18 | } |
4343 | | |
4344 | 18 | assert(finally != NULL || asdl_seq_LEN(handlers)); |
4345 | 18 | if (finally != NULL) { |
4346 | | // finally is always last |
4347 | 0 | get_last_end_pos(finally, &end_lineno, &end_col_offset); |
4348 | 18 | } else if (orelse != NULL) { |
4349 | | // otherwise else is last |
4350 | 0 | get_last_end_pos(orelse, &end_lineno, &end_col_offset); |
4351 | 18 | } else { |
4352 | | // inline the get_last_end_pos logic due to layout mismatch |
4353 | 18 | last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1); |
4354 | 18 | end_lineno = last_handler->end_lineno; |
4355 | 18 | end_col_offset = last_handler->end_col_offset; |
4356 | 18 | } |
4357 | 18 | return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, |
4358 | 18 | end_lineno, end_col_offset, c->c_arena); |
4359 | 18 | } |
4360 | | |
4361 | | /* with_item: test ['as' expr] */ |
4362 | | static withitem_ty |
4363 | | ast_for_with_item(struct compiling *c, const node *n) |
4364 | 0 | { |
4365 | 0 | expr_ty context_expr, optional_vars = NULL; |
4366 | |
|
4367 | 0 | REQ(n, with_item); |
4368 | 0 | context_expr = ast_for_expr(c, CHILD(n, 0)); |
4369 | 0 | if (!context_expr) |
4370 | 0 | return NULL; |
4371 | 0 | if (NCH(n) == 3) { |
4372 | 0 | optional_vars = ast_for_expr(c, CHILD(n, 2)); |
4373 | |
|
4374 | 0 | if (!optional_vars) { |
4375 | 0 | return NULL; |
4376 | 0 | } |
4377 | 0 | if (!set_context(c, optional_vars, Store, n)) { |
4378 | 0 | return NULL; |
4379 | 0 | } |
4380 | 0 | } |
4381 | | |
4382 | 0 | return withitem(context_expr, optional_vars, c->c_arena); |
4383 | 0 | } |
4384 | | |
4385 | | /* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */ |
4386 | | static stmt_ty |
4387 | | ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async) |
4388 | 0 | { |
4389 | 0 | const node * const n = is_async ? CHILD(n0, 1) : n0; |
4390 | 0 | int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset; |
4391 | 0 | asdl_seq *items, *body; |
4392 | 0 | string type_comment; |
4393 | |
|
4394 | 0 | if (is_async && c->c_feature_version < 5) { |
4395 | 0 | ast_error(c, n, |
4396 | 0 | "Async with statements are only supported in Python 3.5 and greater"); |
4397 | 0 | return NULL; |
4398 | 0 | } |
4399 | | |
4400 | 0 | REQ(n, with_stmt); |
4401 | |
|
4402 | 0 | has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT; |
4403 | 0 | nch_minus_type = NCH(n) - has_type_comment; |
4404 | |
|
4405 | 0 | n_items = (nch_minus_type - 2) / 2; |
4406 | 0 | items = _Py_asdl_seq_new(n_items, c->c_arena); |
4407 | 0 | if (!items) |
4408 | 0 | return NULL; |
4409 | 0 | for (i = 1; i < nch_minus_type - 2; i += 2) { |
4410 | 0 | withitem_ty item = ast_for_with_item(c, CHILD(n, i)); |
4411 | 0 | if (!item) |
4412 | 0 | return NULL; |
4413 | 0 | asdl_seq_SET(items, (i - 1) / 2, item); |
4414 | 0 | } |
4415 | | |
4416 | 0 | body = ast_for_suite(c, CHILD(n, NCH(n) - 1)); |
4417 | 0 | if (!body) |
4418 | 0 | return NULL; |
4419 | 0 | get_last_end_pos(body, &end_lineno, &end_col_offset); |
4420 | |
|
4421 | 0 | if (has_type_comment) { |
4422 | 0 | type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2)); |
4423 | 0 | if (!type_comment) |
4424 | 0 | return NULL; |
4425 | 0 | } |
4426 | 0 | else |
4427 | 0 | type_comment = NULL; |
4428 | | |
4429 | 0 | if (is_async) |
4430 | 0 | return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset, |
4431 | 0 | end_lineno, end_col_offset, c->c_arena); |
4432 | 0 | else |
4433 | 0 | return With(items, body, type_comment, LINENO(n), n->n_col_offset, |
4434 | 0 | end_lineno, end_col_offset, c->c_arena); |
4435 | 0 | } |
4436 | | |
4437 | | static stmt_ty |
4438 | | ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) |
4439 | 0 | { |
4440 | | /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */ |
4441 | 0 | PyObject *classname; |
4442 | 0 | asdl_seq *s; |
4443 | 0 | expr_ty call; |
4444 | 0 | int end_lineno, end_col_offset; |
4445 | |
|
4446 | 0 | REQ(n, classdef); |
4447 | |
|
4448 | 0 | if (NCH(n) == 4) { /* class NAME ':' suite */ |
4449 | 0 | s = ast_for_suite(c, CHILD(n, 3)); |
4450 | 0 | if (!s) |
4451 | 0 | return NULL; |
4452 | 0 | get_last_end_pos(s, &end_lineno, &end_col_offset); |
4453 | |
|
4454 | 0 | classname = NEW_IDENTIFIER(CHILD(n, 1)); |
4455 | 0 | if (!classname) |
4456 | 0 | return NULL; |
4457 | 0 | if (forbidden_name(c, classname, CHILD(n, 3), 0)) |
4458 | 0 | return NULL; |
4459 | 0 | return ClassDef(classname, NULL, NULL, s, decorator_seq, |
4460 | 0 | LINENO(n), n->n_col_offset, |
4461 | 0 | end_lineno, end_col_offset, c->c_arena); |
4462 | 0 | } |
4463 | | |
4464 | 0 | if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */ |
4465 | 0 | s = ast_for_suite(c, CHILD(n, 5)); |
4466 | 0 | if (!s) |
4467 | 0 | return NULL; |
4468 | 0 | get_last_end_pos(s, &end_lineno, &end_col_offset); |
4469 | |
|
4470 | 0 | classname = NEW_IDENTIFIER(CHILD(n, 1)); |
4471 | 0 | if (!classname) |
4472 | 0 | return NULL; |
4473 | 0 | if (forbidden_name(c, classname, CHILD(n, 3), 0)) |
4474 | 0 | return NULL; |
4475 | 0 | return ClassDef(classname, NULL, NULL, s, decorator_seq, |
4476 | 0 | LINENO(n), n->n_col_offset, |
4477 | 0 | end_lineno, end_col_offset, c->c_arena); |
4478 | 0 | } |
4479 | | |
4480 | | /* class NAME '(' arglist ')' ':' suite */ |
4481 | | /* build up a fake Call node so we can extract its pieces */ |
4482 | 0 | { |
4483 | 0 | PyObject *dummy_name; |
4484 | 0 | expr_ty dummy; |
4485 | 0 | dummy_name = NEW_IDENTIFIER(CHILD(n, 1)); |
4486 | 0 | if (!dummy_name) |
4487 | 0 | return NULL; |
4488 | 0 | dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, |
4489 | 0 | CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset, |
4490 | 0 | c->c_arena); |
4491 | 0 | call = ast_for_call(c, CHILD(n, 3), dummy, |
4492 | 0 | CHILD(n, 1), NULL, CHILD(n, 4)); |
4493 | 0 | if (!call) |
4494 | 0 | return NULL; |
4495 | 0 | } |
4496 | 0 | s = ast_for_suite(c, CHILD(n, 6)); |
4497 | 0 | if (!s) |
4498 | 0 | return NULL; |
4499 | 0 | get_last_end_pos(s, &end_lineno, &end_col_offset); |
4500 | |
|
4501 | 0 | classname = NEW_IDENTIFIER(CHILD(n, 1)); |
4502 | 0 | if (!classname) |
4503 | 0 | return NULL; |
4504 | 0 | if (forbidden_name(c, classname, CHILD(n, 1), 0)) |
4505 | 0 | return NULL; |
4506 | | |
4507 | 0 | return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s, |
4508 | 0 | decorator_seq, LINENO(n), n->n_col_offset, |
4509 | 0 | end_lineno, end_col_offset, c->c_arena); |
4510 | 0 | } |
4511 | | |
4512 | | static stmt_ty |
4513 | | ast_for_stmt(struct compiling *c, const node *n) |
4514 | 138 | { |
4515 | 138 | if (TYPE(n) == stmt) { |
4516 | 136 | assert(NCH(n) == 1); |
4517 | 136 | n = CHILD(n, 0); |
4518 | 136 | } |
4519 | 138 | if (TYPE(n) == simple_stmt) { |
4520 | 100 | assert(num_stmts(n) == 1); |
4521 | 100 | n = CHILD(n, 0); |
4522 | 100 | } |
4523 | 138 | if (TYPE(n) == small_stmt) { |
4524 | 102 | n = CHILD(n, 0); |
4525 | | /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt |
4526 | | | import_stmt | global_stmt | nonlocal_stmt | assert_stmt |
4527 | | */ |
4528 | 102 | switch (TYPE(n)) { |
4529 | 70 | case expr_stmt: |
4530 | 70 | return ast_for_expr_stmt(c, n); |
4531 | 2 | case del_stmt: |
4532 | 2 | return ast_for_del_stmt(c, n); |
4533 | 16 | case pass_stmt: |
4534 | 16 | return Pass(LINENO(n), n->n_col_offset, |
4535 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
4536 | 8 | case flow_stmt: |
4537 | 8 | return ast_for_flow_stmt(c, n); |
4538 | 0 | case import_stmt: |
4539 | 0 | return ast_for_import_stmt(c, n); |
4540 | 0 | case global_stmt: |
4541 | 0 | return ast_for_global_stmt(c, n); |
4542 | 0 | case nonlocal_stmt: |
4543 | 0 | return ast_for_nonlocal_stmt(c, n); |
4544 | 6 | case assert_stmt: |
4545 | 6 | return ast_for_assert_stmt(c, n); |
4546 | 0 | default: |
4547 | 0 | PyErr_Format(PyExc_SystemError, |
4548 | 0 | "unhandled small_stmt: TYPE=%d NCH=%d\n", |
4549 | 0 | TYPE(n), NCH(n)); |
4550 | 0 | return NULL; |
4551 | 102 | } |
4552 | 102 | } |
4553 | 36 | else { |
4554 | | /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt |
4555 | | | funcdef | classdef | decorated | async_stmt |
4556 | | */ |
4557 | 36 | node *ch = CHILD(n, 0); |
4558 | 36 | REQ(n, compound_stmt); |
4559 | 36 | switch (TYPE(ch)) { |
4560 | 6 | case if_stmt: |
4561 | 6 | return ast_for_if_stmt(c, ch); |
4562 | 0 | case while_stmt: |
4563 | 0 | return ast_for_while_stmt(c, ch); |
4564 | 8 | case for_stmt: |
4565 | 8 | return ast_for_for_stmt(c, ch, 0); |
4566 | 18 | case try_stmt: |
4567 | 18 | return ast_for_try_stmt(c, ch); |
4568 | 0 | case with_stmt: |
4569 | 0 | return ast_for_with_stmt(c, ch, 0); |
4570 | 4 | case funcdef: |
4571 | 4 | return ast_for_funcdef(c, ch, NULL); |
4572 | 0 | case classdef: |
4573 | 0 | return ast_for_classdef(c, ch, NULL); |
4574 | 0 | case decorated: |
4575 | 0 | return ast_for_decorated(c, ch); |
4576 | 0 | case async_stmt: |
4577 | 0 | return ast_for_async_stmt(c, ch); |
4578 | 0 | default: |
4579 | 0 | PyErr_Format(PyExc_SystemError, |
4580 | 0 | "unhandled compound_stmt: TYPE=%d NCH=%d\n", |
4581 | 0 | TYPE(n), NCH(n)); |
4582 | 0 | return NULL; |
4583 | 36 | } |
4584 | 36 | } |
4585 | 138 | } |
4586 | | |
4587 | | static PyObject * |
4588 | | parsenumber_raw(struct compiling *c, const char *s) |
4589 | 30 | { |
4590 | 30 | const char *end; |
4591 | 30 | long x; |
4592 | 30 | double dx; |
4593 | 30 | Py_complex compl; |
4594 | 30 | int imflag; |
4595 | | |
4596 | 30 | assert(s != NULL); |
4597 | 30 | errno = 0; |
4598 | 30 | end = s + strlen(s) - 1; |
4599 | 30 | imflag = *end == 'j' || *end == 'J'; |
4600 | 30 | if (s[0] == '0') { |
4601 | 14 | x = (long) PyOS_strtoul(s, (char **)&end, 0); |
4602 | 14 | if (x < 0 && errno == 0) { |
4603 | 0 | return PyLong_FromString(s, (char **)0, 0); |
4604 | 0 | } |
4605 | 14 | } |
4606 | 16 | else |
4607 | 16 | x = PyOS_strtol(s, (char **)&end, 0); |
4608 | 30 | if (*end == '\0') { |
4609 | 28 | if (errno != 0) |
4610 | 0 | return PyLong_FromString(s, (char **)0, 0); |
4611 | 28 | return PyLong_FromLong(x); |
4612 | 28 | } |
4613 | | /* XXX Huge floats may silently fail */ |
4614 | 2 | if (imflag) { |
4615 | 0 | compl.real = 0.; |
4616 | 0 | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); |
4617 | 0 | if (compl.imag == -1.0 && PyErr_Occurred()) |
4618 | 0 | return NULL; |
4619 | 0 | return PyComplex_FromCComplex(compl); |
4620 | 0 | } |
4621 | 2 | else |
4622 | 2 | { |
4623 | 2 | dx = PyOS_string_to_double(s, NULL, NULL); |
4624 | 2 | if (dx == -1.0 && PyErr_Occurred()) |
4625 | 0 | return NULL; |
4626 | 2 | return PyFloat_FromDouble(dx); |
4627 | 2 | } |
4628 | 2 | } |
4629 | | |
4630 | | static PyObject * |
4631 | | parsenumber(struct compiling *c, const char *s) |
4632 | 30 | { |
4633 | 30 | char *dup, *end; |
4634 | 30 | PyObject *res = NULL; |
4635 | | |
4636 | 30 | assert(s != NULL); |
4637 | | |
4638 | 30 | if (strchr(s, '_') == NULL) { |
4639 | 30 | return parsenumber_raw(c, s); |
4640 | 30 | } |
4641 | | /* Create a duplicate without underscores. */ |
4642 | 0 | dup = PyMem_Malloc(strlen(s) + 1); |
4643 | 0 | if (dup == NULL) { |
4644 | 0 | return PyErr_NoMemory(); |
4645 | 0 | } |
4646 | 0 | end = dup; |
4647 | 0 | for (; *s; s++) { |
4648 | 0 | if (*s != '_') { |
4649 | 0 | *end++ = *s; |
4650 | 0 | } |
4651 | 0 | } |
4652 | 0 | *end = '\0'; |
4653 | 0 | res = parsenumber_raw(c, dup); |
4654 | 0 | PyMem_Free(dup); |
4655 | 0 | return res; |
4656 | 0 | } |
4657 | | |
4658 | | static PyObject * |
4659 | | decode_utf8(struct compiling *c, const char **sPtr, const char *end) |
4660 | 0 | { |
4661 | 0 | const char *s, *t; |
4662 | 0 | t = s = *sPtr; |
4663 | | /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ |
4664 | 0 | while (s < end && (*s & 0x80)) s++; |
4665 | 0 | *sPtr = s; |
4666 | 0 | return PyUnicode_DecodeUTF8(t, s - t, NULL); |
4667 | 0 | } |
4668 | | |
4669 | | static int |
4670 | | warn_invalid_escape_sequence(struct compiling *c, const node *n, |
4671 | | unsigned char first_invalid_escape_char) |
4672 | 0 | { |
4673 | 0 | PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c", |
4674 | 0 | first_invalid_escape_char); |
4675 | 0 | if (msg == NULL) { |
4676 | 0 | return -1; |
4677 | 0 | } |
4678 | 0 | if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, |
4679 | 0 | c->c_filename, LINENO(n), |
4680 | 0 | NULL, NULL) < 0) |
4681 | 0 | { |
4682 | 0 | if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) { |
4683 | | /* Replace the DeprecationWarning exception with a SyntaxError |
4684 | | to get a more accurate error report */ |
4685 | 0 | PyErr_Clear(); |
4686 | 0 | ast_error(c, n, "%U", msg); |
4687 | 0 | } |
4688 | 0 | Py_DECREF(msg); |
4689 | 0 | return -1; |
4690 | 0 | } |
4691 | 0 | Py_DECREF(msg); |
4692 | 0 | return 0; |
4693 | 0 | } |
4694 | | |
4695 | | static PyObject * |
4696 | | decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, |
4697 | | size_t len) |
4698 | 4 | { |
4699 | 4 | PyObject *v, *u; |
4700 | 4 | char *buf; |
4701 | 4 | char *p; |
4702 | 4 | const char *end; |
4703 | | |
4704 | | /* check for integer overflow */ |
4705 | 4 | if (len > SIZE_MAX / 6) |
4706 | 0 | return NULL; |
4707 | | /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 |
4708 | | "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ |
4709 | 4 | u = PyBytes_FromStringAndSize((char *)NULL, len * 6); |
4710 | 4 | if (u == NULL) |
4711 | 0 | return NULL; |
4712 | 4 | p = buf = PyBytes_AsString(u); |
4713 | 4 | end = s + len; |
4714 | 8 | while (s < end) { |
4715 | 4 | if (*s == '\\') { |
4716 | 4 | *p++ = *s++; |
4717 | 4 | if (s >= end || *s & 0x80) { |
4718 | 0 | strcpy(p, "u005c"); |
4719 | 0 | p += 5; |
4720 | 0 | if (s >= end) |
4721 | 0 | break; |
4722 | 0 | } |
4723 | 4 | } |
4724 | 4 | if (*s & 0x80) { /* XXX inefficient */ |
4725 | 0 | PyObject *w; |
4726 | 0 | int kind; |
4727 | 0 | void *data; |
4728 | 0 | Py_ssize_t len, i; |
4729 | 0 | w = decode_utf8(c, &s, end); |
4730 | 0 | if (w == NULL) { |
4731 | 0 | Py_DECREF(u); |
4732 | 0 | return NULL; |
4733 | 0 | } |
4734 | 0 | kind = PyUnicode_KIND(w); |
4735 | 0 | data = PyUnicode_DATA(w); |
4736 | 0 | len = PyUnicode_GET_LENGTH(w); |
4737 | 0 | for (i = 0; i < len; i++) { |
4738 | 0 | Py_UCS4 chr = PyUnicode_READ(kind, data, i); |
4739 | 0 | sprintf(p, "\\U%08x", chr); |
4740 | 0 | p += 10; |
4741 | 0 | } |
4742 | | /* Should be impossible to overflow */ |
4743 | 0 | assert(p - buf <= PyBytes_GET_SIZE(u)); |
4744 | 0 | Py_DECREF(w); |
4745 | 4 | } else { |
4746 | 4 | *p++ = *s++; |
4747 | 4 | } |
4748 | 4 | } |
4749 | 4 | len = p - buf; |
4750 | 4 | s = buf; |
4751 | | |
4752 | 4 | const char *first_invalid_escape; |
4753 | 4 | v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape); |
4754 | | |
4755 | 4 | if (v != NULL && first_invalid_escape != NULL) { |
4756 | 0 | if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { |
4757 | | /* We have not decref u before because first_invalid_escape points |
4758 | | inside u. */ |
4759 | 0 | Py_XDECREF(u); |
4760 | 0 | Py_DECREF(v); |
4761 | 0 | return NULL; |
4762 | 0 | } |
4763 | 0 | } |
4764 | 4 | Py_XDECREF(u); |
4765 | 4 | return v; |
4766 | 4 | } |
4767 | | |
4768 | | static PyObject * |
4769 | | decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s, |
4770 | | size_t len) |
4771 | 2 | { |
4772 | 2 | const char *first_invalid_escape; |
4773 | 2 | PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL, |
4774 | 2 | &first_invalid_escape); |
4775 | 2 | if (result == NULL) |
4776 | 0 | return NULL; |
4777 | | |
4778 | 2 | if (first_invalid_escape != NULL) { |
4779 | 0 | if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) { |
4780 | 0 | Py_DECREF(result); |
4781 | 0 | return NULL; |
4782 | 0 | } |
4783 | 0 | } |
4784 | 2 | return result; |
4785 | 2 | } |
4786 | | |
4787 | | /* Shift locations for the given node and all its children by adding `lineno` |
4788 | | and `col_offset` to existing locations. */ |
4789 | | static void fstring_shift_node_locations(node *n, int lineno, int col_offset) |
4790 | 0 | { |
4791 | 0 | n->n_col_offset = n->n_col_offset + col_offset; |
4792 | 0 | n->n_end_col_offset = n->n_end_col_offset + col_offset; |
4793 | 0 | for (int i = 0; i < NCH(n); ++i) { |
4794 | 0 | if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) { |
4795 | | /* Shifting column offsets unnecessary if there's been newlines. */ |
4796 | 0 | col_offset = 0; |
4797 | 0 | } |
4798 | 0 | fstring_shift_node_locations(CHILD(n, i), lineno, col_offset); |
4799 | 0 | } |
4800 | 0 | n->n_lineno = n->n_lineno + lineno; |
4801 | 0 | n->n_end_lineno = n->n_end_lineno + lineno; |
4802 | 0 | } |
4803 | | |
4804 | | /* Fix locations for the given node and its children. |
4805 | | |
4806 | | `parent` is the enclosing node. |
4807 | | `n` is the node which locations are going to be fixed relative to parent. |
4808 | | `expr_str` is the child node's string representation, including braces. |
4809 | | */ |
4810 | | static void |
4811 | | fstring_fix_node_location(const node *parent, node *n, char *expr_str) |
4812 | 0 | { |
4813 | 0 | char *substr = NULL; |
4814 | 0 | char *start; |
4815 | 0 | int lines = LINENO(parent) - 1; |
4816 | 0 | int cols = parent->n_col_offset; |
4817 | | /* Find the full fstring to fix location information in `n`. */ |
4818 | 0 | while (parent && parent->n_type != STRING) |
4819 | 0 | parent = parent->n_child; |
4820 | 0 | if (parent && parent->n_str) { |
4821 | 0 | substr = strstr(parent->n_str, expr_str); |
4822 | 0 | if (substr) { |
4823 | 0 | start = substr; |
4824 | 0 | while (start > parent->n_str) { |
4825 | 0 | if (start[0] == '\n') |
4826 | 0 | break; |
4827 | 0 | start--; |
4828 | 0 | } |
4829 | 0 | cols += (int)(substr - start); |
4830 | | /* adjust the start based on the number of newlines encountered |
4831 | | before the f-string expression */ |
4832 | 0 | for (char* p = parent->n_str; p < substr; p++) { |
4833 | 0 | if (*p == '\n') { |
4834 | 0 | lines++; |
4835 | 0 | } |
4836 | 0 | } |
4837 | 0 | } |
4838 | 0 | } |
4839 | 0 | fstring_shift_node_locations(n, lines, cols); |
4840 | 0 | } |
4841 | | |
4842 | | /* Compile this expression in to an expr_ty. Add parens around the |
4843 | | expression, in order to allow leading spaces in the expression. */ |
4844 | | static expr_ty |
4845 | | fstring_compile_expr(const char *expr_start, const char *expr_end, |
4846 | | struct compiling *c, const node *n) |
4847 | | |
4848 | 0 | { |
4849 | 0 | node *mod_n; |
4850 | 0 | mod_ty mod; |
4851 | 0 | char *str; |
4852 | 0 | Py_ssize_t len; |
4853 | 0 | const char *s; |
4854 | |
|
4855 | 0 | assert(expr_end >= expr_start); |
4856 | 0 | assert(*(expr_start-1) == '{'); |
4857 | 0 | assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' || |
4858 | 0 | *expr_end == '='); |
4859 | | |
4860 | | /* If the substring is all whitespace, it's an error. We need to catch this |
4861 | | here, and not when we call PyParser_SimpleParseStringFlagsFilename, |
4862 | | because turning the expression '' in to '()' would go from being invalid |
4863 | | to valid. */ |
4864 | 0 | for (s = expr_start; s != expr_end; s++) { |
4865 | 0 | char c = *s; |
4866 | | /* The Python parser ignores only the following whitespace |
4867 | | characters (\r already is converted to \n). */ |
4868 | 0 | if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) { |
4869 | 0 | break; |
4870 | 0 | } |
4871 | 0 | } |
4872 | 0 | if (s == expr_end) { |
4873 | 0 | ast_error(c, n, "f-string: empty expression not allowed"); |
4874 | 0 | return NULL; |
4875 | 0 | } |
4876 | | |
4877 | 0 | len = expr_end - expr_start; |
4878 | | /* Allocate 3 extra bytes: open paren, close paren, null byte. */ |
4879 | 0 | str = PyMem_RawMalloc(len + 3); |
4880 | 0 | if (str == NULL) { |
4881 | 0 | PyErr_NoMemory(); |
4882 | 0 | return NULL; |
4883 | 0 | } |
4884 | | |
4885 | 0 | str[0] = '('; |
4886 | 0 | memcpy(str+1, expr_start, len); |
4887 | 0 | str[len+1] = ')'; |
4888 | 0 | str[len+2] = 0; |
4889 | |
|
4890 | 0 | PyCompilerFlags cf = _PyCompilerFlags_INIT; |
4891 | 0 | cf.cf_flags = PyCF_ONLY_AST; |
4892 | 0 | mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>", |
4893 | 0 | Py_eval_input, 0); |
4894 | 0 | if (!mod_n) { |
4895 | 0 | PyMem_RawFree(str); |
4896 | 0 | return NULL; |
4897 | 0 | } |
4898 | | /* Reuse str to find the correct column offset. */ |
4899 | 0 | str[0] = '{'; |
4900 | 0 | str[len+1] = '}'; |
4901 | 0 | fstring_fix_node_location(n, mod_n, str); |
4902 | 0 | mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena); |
4903 | 0 | PyMem_RawFree(str); |
4904 | 0 | PyNode_Free(mod_n); |
4905 | 0 | if (!mod) |
4906 | 0 | return NULL; |
4907 | 0 | return mod->v.Expression.body; |
4908 | 0 | } |
4909 | | |
4910 | | /* Return -1 on error. |
4911 | | |
4912 | | Return 0 if we reached the end of the literal. |
4913 | | |
4914 | | Return 1 if we haven't reached the end of the literal, but we want |
4915 | | the caller to process the literal up to this point. Used for |
4916 | | doubled braces. |
4917 | | */ |
4918 | | static int |
4919 | | fstring_find_literal(const char **str, const char *end, int raw, |
4920 | | PyObject **literal, int recurse_lvl, |
4921 | | struct compiling *c, const node *n) |
4922 | 0 | { |
4923 | | /* Get any literal string. It ends when we hit an un-doubled left |
4924 | | brace (which isn't part of a unicode name escape such as |
4925 | | "\N{EULER CONSTANT}"), or the end of the string. */ |
4926 | |
|
4927 | 0 | const char *s = *str; |
4928 | 0 | const char *literal_start = s; |
4929 | 0 | int result = 0; |
4930 | |
|
4931 | 0 | assert(*literal == NULL); |
4932 | 0 | while (s < end) { |
4933 | 0 | char ch = *s++; |
4934 | 0 | if (!raw && ch == '\\' && s < end) { |
4935 | 0 | ch = *s++; |
4936 | 0 | if (ch == 'N') { |
4937 | 0 | if (s < end && *s++ == '{') { |
4938 | 0 | while (s < end && *s++ != '}') { |
4939 | 0 | } |
4940 | 0 | continue; |
4941 | 0 | } |
4942 | 0 | break; |
4943 | 0 | } |
4944 | 0 | if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) { |
4945 | 0 | return -1; |
4946 | 0 | } |
4947 | 0 | } |
4948 | 0 | if (ch == '{' || ch == '}') { |
4949 | | /* Check for doubled braces, but only at the top level. If |
4950 | | we checked at every level, then f'{0:{3}}' would fail |
4951 | | with the two closing braces. */ |
4952 | 0 | if (recurse_lvl == 0) { |
4953 | 0 | if (s < end && *s == ch) { |
4954 | | /* We're going to tell the caller that the literal ends |
4955 | | here, but that they should continue scanning. But also |
4956 | | skip over the second brace when we resume scanning. */ |
4957 | 0 | *str = s + 1; |
4958 | 0 | result = 1; |
4959 | 0 | goto done; |
4960 | 0 | } |
4961 | | |
4962 | | /* Where a single '{' is the start of a new expression, a |
4963 | | single '}' is not allowed. */ |
4964 | 0 | if (ch == '}') { |
4965 | 0 | *str = s - 1; |
4966 | 0 | ast_error(c, n, "f-string: single '}' is not allowed"); |
4967 | 0 | return -1; |
4968 | 0 | } |
4969 | 0 | } |
4970 | | /* We're either at a '{', which means we're starting another |
4971 | | expression; or a '}', which means we're at the end of this |
4972 | | f-string (for a nested format_spec). */ |
4973 | 0 | s--; |
4974 | 0 | break; |
4975 | 0 | } |
4976 | 0 | } |
4977 | 0 | *str = s; |
4978 | 0 | assert(s <= end); |
4979 | 0 | assert(s == end || *s == '{' || *s == '}'); |
4980 | 0 | done: |
4981 | 0 | if (literal_start != s) { |
4982 | 0 | if (raw) |
4983 | 0 | *literal = PyUnicode_DecodeUTF8Stateful(literal_start, |
4984 | 0 | s - literal_start, |
4985 | 0 | NULL, NULL); |
4986 | 0 | else |
4987 | 0 | *literal = decode_unicode_with_escapes(c, n, literal_start, |
4988 | 0 | s - literal_start); |
4989 | 0 | if (!*literal) |
4990 | 0 | return -1; |
4991 | 0 | } |
4992 | 0 | return result; |
4993 | 0 | } |
4994 | | |
4995 | | /* Forward declaration because parsing is recursive. */ |
4996 | | static expr_ty |
4997 | | fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, |
4998 | | struct compiling *c, const node *n); |
4999 | | |
5000 | | /* Parse the f-string at *str, ending at end. We know *str starts an |
5001 | | expression (so it must be a '{'). Returns the FormattedValue node, which |
5002 | | includes the expression, conversion character, format_spec expression, and |
5003 | | optionally the text of the expression (if = is used). |
5004 | | |
5005 | | Note that I don't do a perfect job here: I don't make sure that a |
5006 | | closing brace doesn't match an opening paren, for example. It |
5007 | | doesn't need to error on all invalid expressions, just correctly |
5008 | | find the end of all valid ones. Any errors inside the expression |
5009 | | will be caught when we parse it later. |
5010 | | |
5011 | | *expression is set to the expression. For an '=' "debug" expression, |
5012 | | *expr_text is set to the debug text (the original text of the expression, |
5013 | | including the '=' and any whitespace around it, as a string object). If |
5014 | | not a debug expression, *expr_text set to NULL. */ |
5015 | | static int |
5016 | | fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl, |
5017 | | PyObject **expr_text, expr_ty *expression, |
5018 | | struct compiling *c, const node *n) |
5019 | 0 | { |
5020 | | /* Return -1 on error, else 0. */ |
5021 | |
|
5022 | 0 | const char *expr_start; |
5023 | 0 | const char *expr_end; |
5024 | 0 | expr_ty simple_expression; |
5025 | 0 | expr_ty format_spec = NULL; /* Optional format specifier. */ |
5026 | 0 | int conversion = -1; /* The conversion char. Use default if not |
5027 | | specified, or !r if using = and no format |
5028 | | spec. */ |
5029 | | |
5030 | | /* 0 if we're not in a string, else the quote char we're trying to |
5031 | | match (single or double quote). */ |
5032 | 0 | char quote_char = 0; |
5033 | | |
5034 | | /* If we're inside a string, 1=normal, 3=triple-quoted. */ |
5035 | 0 | int string_type = 0; |
5036 | | |
5037 | | /* Keep track of nesting level for braces/parens/brackets in |
5038 | | expressions. */ |
5039 | 0 | Py_ssize_t nested_depth = 0; |
5040 | 0 | char parenstack[MAXLEVEL]; |
5041 | |
|
5042 | 0 | *expr_text = NULL; |
5043 | | |
5044 | | /* Can only nest one level deep. */ |
5045 | 0 | if (recurse_lvl >= 2) { |
5046 | 0 | ast_error(c, n, "f-string: expressions nested too deeply"); |
5047 | 0 | goto error; |
5048 | 0 | } |
5049 | | |
5050 | | /* The first char must be a left brace, or we wouldn't have gotten |
5051 | | here. Skip over it. */ |
5052 | 0 | assert(**str == '{'); |
5053 | 0 | *str += 1; |
5054 | |
|
5055 | 0 | expr_start = *str; |
5056 | 0 | for (; *str < end; (*str)++) { |
5057 | 0 | char ch; |
5058 | | |
5059 | | /* Loop invariants. */ |
5060 | 0 | assert(nested_depth >= 0); |
5061 | 0 | assert(*str >= expr_start && *str < end); |
5062 | 0 | if (quote_char) |
5063 | 0 | assert(string_type == 1 || string_type == 3); |
5064 | 0 | else |
5065 | 0 | assert(string_type == 0); |
5066 | |
|
5067 | 0 | ch = **str; |
5068 | | /* Nowhere inside an expression is a backslash allowed. */ |
5069 | 0 | if (ch == '\\') { |
5070 | | /* Error: can't include a backslash character, inside |
5071 | | parens or strings or not. */ |
5072 | 0 | ast_error(c, n, |
5073 | 0 | "f-string expression part " |
5074 | 0 | "cannot include a backslash"); |
5075 | 0 | goto error; |
5076 | 0 | } |
5077 | 0 | if (quote_char) { |
5078 | | /* We're inside a string. See if we're at the end. */ |
5079 | | /* This code needs to implement the same non-error logic |
5080 | | as tok_get from tokenizer.c, at the letter_quote |
5081 | | label. To actually share that code would be a |
5082 | | nightmare. But, it's unlikely to change and is small, |
5083 | | so duplicate it here. Note we don't need to catch all |
5084 | | of the errors, since they'll be caught when parsing the |
5085 | | expression. We just need to match the non-error |
5086 | | cases. Thus we can ignore \n in single-quoted strings, |
5087 | | for example. Or non-terminated strings. */ |
5088 | 0 | if (ch == quote_char) { |
5089 | | /* Does this match the string_type (single or triple |
5090 | | quoted)? */ |
5091 | 0 | if (string_type == 3) { |
5092 | 0 | if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { |
5093 | | /* We're at the end of a triple quoted string. */ |
5094 | 0 | *str += 2; |
5095 | 0 | string_type = 0; |
5096 | 0 | quote_char = 0; |
5097 | 0 | continue; |
5098 | 0 | } |
5099 | 0 | } else { |
5100 | | /* We're at the end of a normal string. */ |
5101 | 0 | quote_char = 0; |
5102 | 0 | string_type = 0; |
5103 | 0 | continue; |
5104 | 0 | } |
5105 | 0 | } |
5106 | 0 | } else if (ch == '\'' || ch == '"') { |
5107 | | /* Is this a triple quoted string? */ |
5108 | 0 | if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { |
5109 | 0 | string_type = 3; |
5110 | 0 | *str += 2; |
5111 | 0 | } else { |
5112 | | /* Start of a normal string. */ |
5113 | 0 | string_type = 1; |
5114 | 0 | } |
5115 | | /* Start looking for the end of the string. */ |
5116 | 0 | quote_char = ch; |
5117 | 0 | } else if (ch == '[' || ch == '{' || ch == '(') { |
5118 | 0 | if (nested_depth >= MAXLEVEL) { |
5119 | 0 | ast_error(c, n, "f-string: too many nested parenthesis"); |
5120 | 0 | goto error; |
5121 | 0 | } |
5122 | 0 | parenstack[nested_depth] = ch; |
5123 | 0 | nested_depth++; |
5124 | 0 | } else if (ch == '#') { |
5125 | | /* Error: can't include a comment character, inside parens |
5126 | | or not. */ |
5127 | 0 | ast_error(c, n, "f-string expression part cannot include '#'"); |
5128 | 0 | goto error; |
5129 | 0 | } else if (nested_depth == 0 && |
5130 | 0 | (ch == '!' || ch == ':' || ch == '}' || |
5131 | 0 | ch == '=' || ch == '>' || ch == '<')) { |
5132 | | /* See if there's a next character. */ |
5133 | 0 | if (*str+1 < end) { |
5134 | 0 | char next = *(*str+1); |
5135 | | |
5136 | | /* For "!=". since '=' is not an allowed conversion character, |
5137 | | nothing is lost in this test. */ |
5138 | 0 | if ((ch == '!' && next == '=') || /* != */ |
5139 | 0 | (ch == '=' && next == '=') || /* == */ |
5140 | 0 | (ch == '<' && next == '=') || /* <= */ |
5141 | 0 | (ch == '>' && next == '=') /* >= */ |
5142 | 0 | ) { |
5143 | 0 | *str += 1; |
5144 | 0 | continue; |
5145 | 0 | } |
5146 | | /* Don't get out of the loop for these, if they're single |
5147 | | chars (not part of 2-char tokens). If by themselves, they |
5148 | | don't end an expression (unlike say '!'). */ |
5149 | 0 | if (ch == '>' || ch == '<') { |
5150 | 0 | continue; |
5151 | 0 | } |
5152 | 0 | } |
5153 | | |
5154 | | /* Normal way out of this loop. */ |
5155 | 0 | break; |
5156 | 0 | } else if (ch == ']' || ch == '}' || ch == ')') { |
5157 | 0 | if (!nested_depth) { |
5158 | 0 | ast_error(c, n, "f-string: unmatched '%c'", ch); |
5159 | 0 | goto error; |
5160 | 0 | } |
5161 | 0 | nested_depth--; |
5162 | 0 | int opening = parenstack[nested_depth]; |
5163 | 0 | if (!((opening == '(' && ch == ')') || |
5164 | 0 | (opening == '[' && ch == ']') || |
5165 | 0 | (opening == '{' && ch == '}'))) |
5166 | 0 | { |
5167 | 0 | ast_error(c, n, |
5168 | 0 | "f-string: closing parenthesis '%c' " |
5169 | 0 | "does not match opening parenthesis '%c'", |
5170 | 0 | ch, opening); |
5171 | 0 | goto error; |
5172 | 0 | } |
5173 | 0 | } else { |
5174 | | /* Just consume this char and loop around. */ |
5175 | 0 | } |
5176 | 0 | } |
5177 | 0 | expr_end = *str; |
5178 | | /* If we leave this loop in a string or with mismatched parens, we |
5179 | | don't care. We'll get a syntax error when compiling the |
5180 | | expression. But, we can produce a better error message, so |
5181 | | let's just do that.*/ |
5182 | 0 | if (quote_char) { |
5183 | 0 | ast_error(c, n, "f-string: unterminated string"); |
5184 | 0 | goto error; |
5185 | 0 | } |
5186 | 0 | if (nested_depth) { |
5187 | 0 | int opening = parenstack[nested_depth - 1]; |
5188 | 0 | ast_error(c, n, "f-string: unmatched '%c'", opening); |
5189 | 0 | goto error; |
5190 | 0 | } |
5191 | | |
5192 | 0 | if (*str >= end) |
5193 | 0 | goto unexpected_end_of_string; |
5194 | | |
5195 | | /* Compile the expression as soon as possible, so we show errors |
5196 | | related to the expression before errors related to the |
5197 | | conversion or format_spec. */ |
5198 | 0 | simple_expression = fstring_compile_expr(expr_start, expr_end, c, n); |
5199 | 0 | if (!simple_expression) |
5200 | 0 | goto error; |
5201 | | |
5202 | | /* Check for =, which puts the text value of the expression in |
5203 | | expr_text. */ |
5204 | 0 | if (**str == '=') { |
5205 | 0 | *str += 1; |
5206 | | |
5207 | | /* Skip over ASCII whitespace. No need to test for end of string |
5208 | | here, since we know there's at least a trailing quote somewhere |
5209 | | ahead. */ |
5210 | 0 | while (Py_ISSPACE(**str)) { |
5211 | 0 | *str += 1; |
5212 | 0 | } |
5213 | | |
5214 | | /* Set *expr_text to the text of the expression. */ |
5215 | 0 | *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start); |
5216 | 0 | if (!*expr_text) { |
5217 | 0 | goto error; |
5218 | 0 | } |
5219 | 0 | } |
5220 | | |
5221 | | /* Check for a conversion char, if present. */ |
5222 | 0 | if (**str == '!') { |
5223 | 0 | *str += 1; |
5224 | 0 | if (*str >= end) |
5225 | 0 | goto unexpected_end_of_string; |
5226 | | |
5227 | 0 | conversion = **str; |
5228 | 0 | *str += 1; |
5229 | | |
5230 | | /* Validate the conversion. */ |
5231 | 0 | if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) { |
5232 | 0 | ast_error(c, n, |
5233 | 0 | "f-string: invalid conversion character: " |
5234 | 0 | "expected 's', 'r', or 'a'"); |
5235 | 0 | goto error; |
5236 | 0 | } |
5237 | |
|
5238 | 0 | } |
5239 | | |
5240 | | /* Check for the format spec, if present. */ |
5241 | 0 | if (*str >= end) |
5242 | 0 | goto unexpected_end_of_string; |
5243 | 0 | if (**str == ':') { |
5244 | 0 | *str += 1; |
5245 | 0 | if (*str >= end) |
5246 | 0 | goto unexpected_end_of_string; |
5247 | | |
5248 | | /* Parse the format spec. */ |
5249 | 0 | format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n); |
5250 | 0 | if (!format_spec) |
5251 | 0 | goto error; |
5252 | 0 | } |
5253 | | |
5254 | 0 | if (*str >= end || **str != '}') |
5255 | 0 | goto unexpected_end_of_string; |
5256 | | |
5257 | | /* We're at a right brace. Consume it. */ |
5258 | 0 | assert(*str < end); |
5259 | 0 | assert(**str == '}'); |
5260 | 0 | *str += 1; |
5261 | | |
5262 | | /* If we're in = mode (detected by non-NULL expr_text), and have no format |
5263 | | spec and no explict conversion, set the conversion to 'r'. */ |
5264 | 0 | if (*expr_text && format_spec == NULL && conversion == -1) { |
5265 | 0 | conversion = 'r'; |
5266 | 0 | } |
5267 | | |
5268 | | /* And now create the FormattedValue node that represents this |
5269 | | entire expression with the conversion and format spec. */ |
5270 | 0 | *expression = FormattedValue(simple_expression, conversion, |
5271 | 0 | format_spec, LINENO(n), |
5272 | 0 | n->n_col_offset, n->n_end_lineno, |
5273 | 0 | n->n_end_col_offset, c->c_arena); |
5274 | 0 | if (!*expression) |
5275 | 0 | goto error; |
5276 | | |
5277 | 0 | return 0; |
5278 | | |
5279 | 0 | unexpected_end_of_string: |
5280 | 0 | ast_error(c, n, "f-string: expecting '}'"); |
5281 | | /* Falls through to error. */ |
5282 | |
|
5283 | 0 | error: |
5284 | 0 | Py_XDECREF(*expr_text); |
5285 | 0 | return -1; |
5286 | |
|
5287 | 0 | } |
5288 | | |
5289 | | /* Return -1 on error. |
5290 | | |
5291 | | Return 0 if we have a literal (possible zero length) and an |
5292 | | expression (zero length if at the end of the string. |
5293 | | |
5294 | | Return 1 if we have a literal, but no expression, and we want the |
5295 | | caller to call us again. This is used to deal with doubled |
5296 | | braces. |
5297 | | |
5298 | | When called multiple times on the string 'a{{b{0}c', this function |
5299 | | will return: |
5300 | | |
5301 | | 1. the literal 'a{' with no expression, and a return value |
5302 | | of 1. Despite the fact that there's no expression, the return |
5303 | | value of 1 means we're not finished yet. |
5304 | | |
5305 | | 2. the literal 'b' and the expression '0', with a return value of |
5306 | | 0. The fact that there's an expression means we're not finished. |
5307 | | |
5308 | | 3. literal 'c' with no expression and a return value of 0. The |
5309 | | combination of the return value of 0 with no expression means |
5310 | | we're finished. |
5311 | | */ |
5312 | | static int |
5313 | | fstring_find_literal_and_expr(const char **str, const char *end, int raw, |
5314 | | int recurse_lvl, PyObject **literal, |
5315 | | PyObject **expr_text, expr_ty *expression, |
5316 | | struct compiling *c, const node *n) |
5317 | 0 | { |
5318 | 0 | int result; |
5319 | |
|
5320 | 0 | assert(*literal == NULL && *expression == NULL); |
5321 | | |
5322 | | /* Get any literal string. */ |
5323 | 0 | result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n); |
5324 | 0 | if (result < 0) |
5325 | 0 | goto error; |
5326 | | |
5327 | 0 | assert(result == 0 || result == 1); |
5328 | |
|
5329 | 0 | if (result == 1) |
5330 | | /* We have a literal, but don't look at the expression. */ |
5331 | 0 | return 1; |
5332 | | |
5333 | 0 | if (*str >= end || **str == '}') |
5334 | | /* We're at the end of the string or the end of a nested |
5335 | | f-string: no expression. The top-level error case where we |
5336 | | expect to be at the end of the string but we're at a '}' is |
5337 | | handled later. */ |
5338 | 0 | return 0; |
5339 | | |
5340 | | /* We must now be the start of an expression, on a '{'. */ |
5341 | 0 | assert(**str == '{'); |
5342 | |
|
5343 | 0 | if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text, |
5344 | 0 | expression, c, n) < 0) |
5345 | 0 | goto error; |
5346 | | |
5347 | 0 | return 0; |
5348 | | |
5349 | 0 | error: |
5350 | 0 | Py_CLEAR(*literal); |
5351 | 0 | return -1; |
5352 | 0 | } |
5353 | | |
5354 | 24 | #define EXPRLIST_N_CACHED 64 |
5355 | | |
5356 | | typedef struct { |
5357 | | /* Incrementally build an array of expr_ty, so be used in an |
5358 | | asdl_seq. Cache some small but reasonably sized number of |
5359 | | expr_ty's, and then after that start dynamically allocating, |
5360 | | doubling the number allocated each time. Note that the f-string |
5361 | | f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one |
5362 | | Constant for the literal 'a'. So you add expr_ty's about twice as |
5363 | | fast as you add expressions in an f-string. */ |
5364 | | |
5365 | | Py_ssize_t allocated; /* Number we've allocated. */ |
5366 | | Py_ssize_t size; /* Number we've used. */ |
5367 | | expr_ty *p; /* Pointer to the memory we're actually |
5368 | | using. Will point to 'data' until we |
5369 | | start dynamically allocating. */ |
5370 | | expr_ty data[EXPRLIST_N_CACHED]; |
5371 | | } ExprList; |
5372 | | |
5373 | | #ifdef NDEBUG |
5374 | | #define ExprList_check_invariants(l) |
5375 | | #else |
5376 | | static void |
5377 | | ExprList_check_invariants(ExprList *l) |
5378 | | { |
5379 | | /* Check our invariants. Make sure this object is "live", and |
5380 | | hasn't been deallocated. */ |
5381 | | assert(l->size >= 0); |
5382 | | assert(l->p != NULL); |
5383 | | if (l->size <= EXPRLIST_N_CACHED) |
5384 | | assert(l->data == l->p); |
5385 | | } |
5386 | | #endif |
5387 | | |
5388 | | static void |
5389 | | ExprList_Init(ExprList *l) |
5390 | 24 | { |
5391 | 24 | l->allocated = EXPRLIST_N_CACHED; |
5392 | 24 | l->size = 0; |
5393 | | |
5394 | | /* Until we start allocating dynamically, p points to data. */ |
5395 | 24 | l->p = l->data; |
5396 | | |
5397 | 24 | ExprList_check_invariants(l); |
5398 | 24 | } |
5399 | | |
5400 | | static int |
5401 | | ExprList_Append(ExprList *l, expr_ty exp) |
5402 | 0 | { |
5403 | 0 | ExprList_check_invariants(l); |
5404 | 0 | if (l->size >= l->allocated) { |
5405 | | /* We need to alloc (or realloc) the memory. */ |
5406 | 0 | Py_ssize_t new_size = l->allocated * 2; |
5407 | | |
5408 | | /* See if we've ever allocated anything dynamically. */ |
5409 | 0 | if (l->p == l->data) { |
5410 | 0 | Py_ssize_t i; |
5411 | | /* We're still using the cached data. Switch to |
5412 | | alloc-ing. */ |
5413 | 0 | l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size); |
5414 | 0 | if (!l->p) |
5415 | 0 | return -1; |
5416 | | /* Copy the cached data into the new buffer. */ |
5417 | 0 | for (i = 0; i < l->size; i++) |
5418 | 0 | l->p[i] = l->data[i]; |
5419 | 0 | } else { |
5420 | | /* Just realloc. */ |
5421 | 0 | expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size); |
5422 | 0 | if (!tmp) { |
5423 | 0 | PyMem_RawFree(l->p); |
5424 | 0 | l->p = NULL; |
5425 | 0 | return -1; |
5426 | 0 | } |
5427 | 0 | l->p = tmp; |
5428 | 0 | } |
5429 | | |
5430 | 0 | l->allocated = new_size; |
5431 | 0 | assert(l->allocated == 2 * l->size); |
5432 | 0 | } |
5433 | | |
5434 | 0 | l->p[l->size++] = exp; |
5435 | |
|
5436 | 0 | ExprList_check_invariants(l); |
5437 | 0 | return 0; |
5438 | 0 | } |
5439 | | |
5440 | | static void |
5441 | | ExprList_Dealloc(ExprList *l) |
5442 | 0 | { |
5443 | 0 | ExprList_check_invariants(l); |
5444 | | |
5445 | | /* If there's been an error, or we've never dynamically allocated, |
5446 | | do nothing. */ |
5447 | 0 | if (!l->p || l->p == l->data) { |
5448 | | /* Do nothing. */ |
5449 | 0 | } else { |
5450 | | /* We have dynamically allocated. Free the memory. */ |
5451 | 0 | PyMem_RawFree(l->p); |
5452 | 0 | } |
5453 | 0 | l->p = NULL; |
5454 | 0 | l->size = -1; |
5455 | 0 | } |
5456 | | |
5457 | | static asdl_seq * |
5458 | | ExprList_Finish(ExprList *l, PyArena *arena) |
5459 | 0 | { |
5460 | 0 | asdl_seq *seq; |
5461 | |
|
5462 | 0 | ExprList_check_invariants(l); |
5463 | | |
5464 | | /* Allocate the asdl_seq and copy the expressions in to it. */ |
5465 | 0 | seq = _Py_asdl_seq_new(l->size, arena); |
5466 | 0 | if (seq) { |
5467 | 0 | Py_ssize_t i; |
5468 | 0 | for (i = 0; i < l->size; i++) |
5469 | 0 | asdl_seq_SET(seq, i, l->p[i]); |
5470 | 0 | } |
5471 | 0 | ExprList_Dealloc(l); |
5472 | 0 | return seq; |
5473 | 0 | } |
5474 | | |
5475 | | /* The FstringParser is designed to add a mix of strings and |
5476 | | f-strings, and concat them together as needed. Ultimately, it |
5477 | | generates an expr_ty. */ |
5478 | | typedef struct { |
5479 | | PyObject *last_str; |
5480 | | ExprList expr_list; |
5481 | | int fmode; |
5482 | | } FstringParser; |
5483 | | |
5484 | | #ifdef NDEBUG |
5485 | | #define FstringParser_check_invariants(state) |
5486 | | #else |
5487 | | static void |
5488 | | FstringParser_check_invariants(FstringParser *state) |
5489 | | { |
5490 | | if (state->last_str) |
5491 | | assert(PyUnicode_CheckExact(state->last_str)); |
5492 | | ExprList_check_invariants(&state->expr_list); |
5493 | | } |
5494 | | #endif |
5495 | | |
5496 | | static void |
5497 | | FstringParser_Init(FstringParser *state) |
5498 | 24 | { |
5499 | 24 | state->last_str = NULL; |
5500 | 24 | state->fmode = 0; |
5501 | 24 | ExprList_Init(&state->expr_list); |
5502 | 24 | FstringParser_check_invariants(state); |
5503 | 24 | } |
5504 | | |
5505 | | static void |
5506 | | FstringParser_Dealloc(FstringParser *state) |
5507 | 0 | { |
5508 | 0 | FstringParser_check_invariants(state); |
5509 | |
|
5510 | 0 | Py_XDECREF(state->last_str); |
5511 | 0 | ExprList_Dealloc(&state->expr_list); |
5512 | 0 | } |
5513 | | |
5514 | | /* Constants for the following */ |
5515 | | static PyObject *u_kind; |
5516 | | |
5517 | | /* Compute 'kind' field for string Constant (either 'u' or None) */ |
5518 | | static PyObject * |
5519 | | make_kind(struct compiling *c, const node *n) |
5520 | 18 | { |
5521 | 18 | char *s = NULL; |
5522 | 18 | PyObject *kind = NULL; |
5523 | | |
5524 | | /* Find the first string literal, if any */ |
5525 | 36 | while (TYPE(n) != STRING) { |
5526 | 18 | if (NCH(n) == 0) |
5527 | 0 | return NULL; |
5528 | 18 | n = CHILD(n, 0); |
5529 | 18 | } |
5530 | 18 | REQ(n, STRING); |
5531 | | |
5532 | | /* If it starts with 'u', return a PyUnicode "u" string */ |
5533 | 18 | s = STR(n); |
5534 | 18 | if (s && *s == 'u') { |
5535 | 0 | if (!u_kind) { |
5536 | 0 | u_kind = PyUnicode_InternFromString("u"); |
5537 | 0 | if (!u_kind) |
5538 | 0 | return NULL; |
5539 | 0 | } |
5540 | 0 | kind = u_kind; |
5541 | 0 | if (PyArena_AddPyObject(c->c_arena, kind) < 0) { |
5542 | 0 | return NULL; |
5543 | 0 | } |
5544 | 0 | Py_INCREF(kind); |
5545 | 0 | } |
5546 | 18 | return kind; |
5547 | 18 | } |
5548 | | |
5549 | | /* Make a Constant node, but decref the PyUnicode object being added. */ |
5550 | | static expr_ty |
5551 | | make_str_node_and_del(PyObject **str, struct compiling *c, const node* n) |
5552 | 18 | { |
5553 | 18 | PyObject *s = *str; |
5554 | 18 | PyObject *kind = NULL; |
5555 | 18 | *str = NULL; |
5556 | 18 | assert(PyUnicode_CheckExact(s)); |
5557 | 18 | if (PyArena_AddPyObject(c->c_arena, s) < 0) { |
5558 | 0 | Py_DECREF(s); |
5559 | 0 | return NULL; |
5560 | 0 | } |
5561 | 18 | kind = make_kind(c, n); |
5562 | 18 | if (kind == NULL && PyErr_Occurred()) |
5563 | 0 | return NULL; |
5564 | 18 | return Constant(s, kind, LINENO(n), n->n_col_offset, |
5565 | 18 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
5566 | 18 | } |
5567 | | |
5568 | | /* Add a non-f-string (that is, a regular literal string). str is |
5569 | | decref'd. */ |
5570 | | static int |
5571 | | FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) |
5572 | 18 | { |
5573 | 18 | FstringParser_check_invariants(state); |
5574 | | |
5575 | 18 | assert(PyUnicode_CheckExact(str)); |
5576 | | |
5577 | 18 | if (PyUnicode_GET_LENGTH(str) == 0) { |
5578 | 0 | Py_DECREF(str); |
5579 | 0 | return 0; |
5580 | 0 | } |
5581 | | |
5582 | 18 | if (!state->last_str) { |
5583 | | /* We didn't have a string before, so just remember this one. */ |
5584 | 18 | state->last_str = str; |
5585 | 18 | } else { |
5586 | | /* Concatenate this with the previous string. */ |
5587 | 0 | PyUnicode_AppendAndDel(&state->last_str, str); |
5588 | 0 | if (!state->last_str) |
5589 | 0 | return -1; |
5590 | 0 | } |
5591 | 18 | FstringParser_check_invariants(state); |
5592 | 18 | return 0; |
5593 | 18 | } |
5594 | | |
5595 | | /* Parse an f-string. The f-string is in *str to end, with no |
5596 | | 'f' or quotes. */ |
5597 | | static int |
5598 | | FstringParser_ConcatFstring(FstringParser *state, const char **str, |
5599 | | const char *end, int raw, int recurse_lvl, |
5600 | | struct compiling *c, const node *n) |
5601 | 0 | { |
5602 | 0 | FstringParser_check_invariants(state); |
5603 | 0 | state->fmode = 1; |
5604 | | |
5605 | | /* Parse the f-string. */ |
5606 | 0 | while (1) { |
5607 | 0 | PyObject *literal = NULL; |
5608 | 0 | PyObject *expr_text = NULL; |
5609 | 0 | expr_ty expression = NULL; |
5610 | | |
5611 | | /* If there's a zero length literal in front of the |
5612 | | expression, literal will be NULL. If we're at the end of |
5613 | | the f-string, expression will be NULL (unless result == 1, |
5614 | | see below). */ |
5615 | 0 | int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl, |
5616 | 0 | &literal, &expr_text, |
5617 | 0 | &expression, c, n); |
5618 | 0 | if (result < 0) |
5619 | 0 | return -1; |
5620 | | |
5621 | | /* Add the literal, if any. */ |
5622 | 0 | if (literal && FstringParser_ConcatAndDel(state, literal) < 0) { |
5623 | 0 | Py_XDECREF(expr_text); |
5624 | 0 | return -1; |
5625 | 0 | } |
5626 | | /* Add the expr_text, if any. */ |
5627 | 0 | if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) { |
5628 | 0 | return -1; |
5629 | 0 | } |
5630 | | |
5631 | | /* We've dealt with the literal and expr_text, their ownership has |
5632 | | been transferred to the state object. Don't look at them again. */ |
5633 | | |
5634 | | /* See if we should just loop around to get the next literal |
5635 | | and expression, while ignoring the expression this |
5636 | | time. This is used for un-doubling braces, as an |
5637 | | optimization. */ |
5638 | 0 | if (result == 1) |
5639 | 0 | continue; |
5640 | | |
5641 | 0 | if (!expression) |
5642 | | /* We're done with this f-string. */ |
5643 | 0 | break; |
5644 | | |
5645 | | /* We know we have an expression. Convert any existing string |
5646 | | to a Constant node. */ |
5647 | 0 | if (!state->last_str) { |
5648 | | /* Do nothing. No previous literal. */ |
5649 | 0 | } else { |
5650 | | /* Convert the existing last_str literal to a Constant node. */ |
5651 | 0 | expr_ty str = make_str_node_and_del(&state->last_str, c, n); |
5652 | 0 | if (!str || ExprList_Append(&state->expr_list, str) < 0) |
5653 | 0 | return -1; |
5654 | 0 | } |
5655 | | |
5656 | 0 | if (ExprList_Append(&state->expr_list, expression) < 0) |
5657 | 0 | return -1; |
5658 | 0 | } |
5659 | | |
5660 | | /* If recurse_lvl is zero, then we must be at the end of the |
5661 | | string. Otherwise, we must be at a right brace. */ |
5662 | | |
5663 | 0 | if (recurse_lvl == 0 && *str < end-1) { |
5664 | 0 | ast_error(c, n, "f-string: unexpected end of string"); |
5665 | 0 | return -1; |
5666 | 0 | } |
5667 | 0 | if (recurse_lvl != 0 && **str != '}') { |
5668 | 0 | ast_error(c, n, "f-string: expecting '}'"); |
5669 | 0 | return -1; |
5670 | 0 | } |
5671 | | |
5672 | 0 | FstringParser_check_invariants(state); |
5673 | 0 | return 0; |
5674 | 0 | } |
5675 | | |
5676 | | /* Convert the partial state reflected in last_str and expr_list to an |
5677 | | expr_ty. The expr_ty can be a Constant, or a JoinedStr. */ |
5678 | | static expr_ty |
5679 | | FstringParser_Finish(FstringParser *state, struct compiling *c, |
5680 | | const node *n) |
5681 | 18 | { |
5682 | 18 | asdl_seq *seq; |
5683 | | |
5684 | 18 | FstringParser_check_invariants(state); |
5685 | | |
5686 | | /* If we're just a constant string with no expressions, return |
5687 | | that. */ |
5688 | 18 | if (!state->fmode) { |
5689 | 18 | assert(!state->expr_list.size); |
5690 | 18 | if (!state->last_str) { |
5691 | | /* Create a zero length string. */ |
5692 | 0 | state->last_str = PyUnicode_FromStringAndSize(NULL, 0); |
5693 | 0 | if (!state->last_str) |
5694 | 0 | goto error; |
5695 | 0 | } |
5696 | 18 | return make_str_node_and_del(&state->last_str, c, n); |
5697 | 18 | } |
5698 | | |
5699 | | /* Create a Constant node out of last_str, if needed. It will be the |
5700 | | last node in our expression list. */ |
5701 | 0 | if (state->last_str) { |
5702 | 0 | expr_ty str = make_str_node_and_del(&state->last_str, c, n); |
5703 | 0 | if (!str || ExprList_Append(&state->expr_list, str) < 0) |
5704 | 0 | goto error; |
5705 | 0 | } |
5706 | | /* This has already been freed. */ |
5707 | 0 | assert(state->last_str == NULL); |
5708 | |
|
5709 | 0 | seq = ExprList_Finish(&state->expr_list, c->c_arena); |
5710 | 0 | if (!seq) |
5711 | 0 | goto error; |
5712 | | |
5713 | 0 | return JoinedStr(seq, LINENO(n), n->n_col_offset, |
5714 | 0 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
5715 | | |
5716 | 0 | error: |
5717 | 0 | FstringParser_Dealloc(state); |
5718 | 0 | return NULL; |
5719 | 0 | } |
5720 | | |
5721 | | /* Given an f-string (with no 'f' or quotes) that's in *str and ends |
5722 | | at end, parse it into an expr_ty. Return NULL on error. Adjust |
5723 | | str to point past the parsed portion. */ |
5724 | | static expr_ty |
5725 | | fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, |
5726 | | struct compiling *c, const node *n) |
5727 | 0 | { |
5728 | 0 | FstringParser state; |
5729 | |
|
5730 | 0 | FstringParser_Init(&state); |
5731 | 0 | if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl, |
5732 | 0 | c, n) < 0) { |
5733 | 0 | FstringParser_Dealloc(&state); |
5734 | 0 | return NULL; |
5735 | 0 | } |
5736 | | |
5737 | 0 | return FstringParser_Finish(&state, c, n); |
5738 | 0 | } |
5739 | | |
5740 | | /* n is a Python string literal, including the bracketing quote |
5741 | | characters, and r, b, u, &/or f prefixes (if any), and embedded |
5742 | | escape sequences (if any). parsestr parses it, and sets *result to |
5743 | | decoded Python string object. If the string is an f-string, set |
5744 | | *fstr and *fstrlen to the unparsed string object. Return 0 if no |
5745 | | errors occurred. |
5746 | | */ |
5747 | | static int |
5748 | | parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode, |
5749 | | PyObject **result, const char **fstr, Py_ssize_t *fstrlen) |
5750 | 24 | { |
5751 | 24 | size_t len; |
5752 | 24 | const char *s = STR(n); |
5753 | 24 | int quote = Py_CHARMASK(*s); |
5754 | 24 | int fmode = 0; |
5755 | 24 | *bytesmode = 0; |
5756 | 24 | *rawmode = 0; |
5757 | 24 | *result = NULL; |
5758 | 24 | *fstr = NULL; |
5759 | 24 | if (Py_ISALPHA(quote)) { |
5760 | 12 | while (!*bytesmode || !*rawmode) { |
5761 | 12 | if (quote == 'b' || quote == 'B') { |
5762 | 6 | quote = *++s; |
5763 | 6 | *bytesmode = 1; |
5764 | 6 | } |
5765 | 6 | else if (quote == 'u' || quote == 'U') { |
5766 | 0 | quote = *++s; |
5767 | 0 | } |
5768 | 6 | else if (quote == 'r' || quote == 'R') { |
5769 | 0 | quote = *++s; |
5770 | 0 | *rawmode = 1; |
5771 | 0 | } |
5772 | 6 | else if (quote == 'f' || quote == 'F') { |
5773 | 0 | quote = *++s; |
5774 | 0 | fmode = 1; |
5775 | 0 | } |
5776 | 6 | else { |
5777 | 6 | break; |
5778 | 6 | } |
5779 | 12 | } |
5780 | 6 | } |
5781 | | |
5782 | | /* fstrings are only allowed in Python 3.6 and greater */ |
5783 | 24 | if (fmode && c->c_feature_version < 6) { |
5784 | 0 | ast_error(c, n, "Format strings are only supported in Python 3.6 and greater"); |
5785 | 0 | return -1; |
5786 | 0 | } |
5787 | | |
5788 | 24 | if (fmode && *bytesmode) { |
5789 | 0 | PyErr_BadInternalCall(); |
5790 | 0 | return -1; |
5791 | 0 | } |
5792 | 24 | if (quote != '\'' && quote != '\"') { |
5793 | 0 | PyErr_BadInternalCall(); |
5794 | 0 | return -1; |
5795 | 0 | } |
5796 | | /* Skip the leading quote char. */ |
5797 | 24 | s++; |
5798 | 24 | len = strlen(s); |
5799 | 24 | if (len > INT_MAX) { |
5800 | 0 | PyErr_SetString(PyExc_OverflowError, |
5801 | 0 | "string to parse is too long"); |
5802 | 0 | return -1; |
5803 | 0 | } |
5804 | 24 | if (s[--len] != quote) { |
5805 | | /* Last quote char must match the first. */ |
5806 | 0 | PyErr_BadInternalCall(); |
5807 | 0 | return -1; |
5808 | 0 | } |
5809 | 24 | if (len >= 4 && s[0] == quote && s[1] == quote) { |
5810 | | /* A triple quoted string. We've already skipped one quote at |
5811 | | the start and one at the end of the string. Now skip the |
5812 | | two at the start. */ |
5813 | 0 | s += 2; |
5814 | 0 | len -= 2; |
5815 | | /* And check that the last two match. */ |
5816 | 0 | if (s[--len] != quote || s[--len] != quote) { |
5817 | 0 | PyErr_BadInternalCall(); |
5818 | 0 | return -1; |
5819 | 0 | } |
5820 | 0 | } |
5821 | | |
5822 | 24 | if (fmode) { |
5823 | | /* Just return the bytes. The caller will parse the resulting |
5824 | | string. */ |
5825 | 0 | *fstr = s; |
5826 | 0 | *fstrlen = len; |
5827 | 0 | return 0; |
5828 | 0 | } |
5829 | | |
5830 | | /* Not an f-string. */ |
5831 | | /* Avoid invoking escape decoding routines if possible. */ |
5832 | 24 | *rawmode = *rawmode || strchr(s, '\\') == NULL; |
5833 | 24 | if (*bytesmode) { |
5834 | | /* Disallow non-ASCII characters. */ |
5835 | 6 | const char *ch; |
5836 | 36 | for (ch = s; *ch; ch++) { |
5837 | 30 | if (Py_CHARMASK(*ch) >= 0x80) { |
5838 | 0 | ast_error(c, n, |
5839 | 0 | "bytes can only contain ASCII " |
5840 | 0 | "literal characters."); |
5841 | 0 | return -1; |
5842 | 0 | } |
5843 | 30 | } |
5844 | 6 | if (*rawmode) |
5845 | 4 | *result = PyBytes_FromStringAndSize(s, len); |
5846 | 2 | else |
5847 | 2 | *result = decode_bytes_with_escapes(c, n, s, len); |
5848 | 18 | } else { |
5849 | 18 | if (*rawmode) |
5850 | 14 | *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); |
5851 | 4 | else |
5852 | 4 | *result = decode_unicode_with_escapes(c, n, s, len); |
5853 | 18 | } |
5854 | 24 | return *result == NULL ? -1 : 0; |
5855 | 24 | } |
5856 | | |
5857 | | /* Accepts a STRING+ atom, and produces an expr_ty node. Run through |
5858 | | each STRING atom, and process it as needed. For bytes, just |
5859 | | concatenate them together, and the result will be a Constant node. For |
5860 | | normal strings and f-strings, concatenate them together. The result |
5861 | | will be a Constant node if there were no f-strings; a FormattedValue |
5862 | | node if there's just an f-string (with no leading or trailing |
5863 | | literals), or a JoinedStr node if there are multiple f-strings or |
5864 | | any literals involved. */ |
5865 | | static expr_ty |
5866 | | parsestrplus(struct compiling *c, const node *n) |
5867 | 24 | { |
5868 | 24 | int bytesmode = 0; |
5869 | 24 | PyObject *bytes_str = NULL; |
5870 | 24 | int i; |
5871 | | |
5872 | 24 | FstringParser state; |
5873 | 24 | FstringParser_Init(&state); |
5874 | | |
5875 | 48 | for (i = 0; i < NCH(n); i++) { |
5876 | 24 | int this_bytesmode; |
5877 | 24 | int this_rawmode; |
5878 | 24 | PyObject *s; |
5879 | 24 | const char *fstr; |
5880 | 24 | Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */ |
5881 | | |
5882 | 24 | REQ(CHILD(n, i), STRING); |
5883 | 24 | if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s, |
5884 | 24 | &fstr, &fstrlen) != 0) |
5885 | 0 | goto error; |
5886 | | |
5887 | | /* Check that we're not mixing bytes with unicode. */ |
5888 | 24 | if (i != 0 && bytesmode != this_bytesmode) { |
5889 | 0 | ast_error(c, n, "cannot mix bytes and nonbytes literals"); |
5890 | | /* s is NULL if the current string part is an f-string. */ |
5891 | 0 | Py_XDECREF(s); |
5892 | 0 | goto error; |
5893 | 0 | } |
5894 | 24 | bytesmode = this_bytesmode; |
5895 | | |
5896 | 24 | if (fstr != NULL) { |
5897 | 0 | int result; |
5898 | 0 | assert(s == NULL && !bytesmode); |
5899 | | /* This is an f-string. Parse and concatenate it. */ |
5900 | 0 | result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen, |
5901 | 0 | this_rawmode, 0, c, n); |
5902 | 0 | if (result < 0) |
5903 | 0 | goto error; |
5904 | 24 | } else { |
5905 | | /* A string or byte string. */ |
5906 | 24 | assert(s != NULL && fstr == NULL); |
5907 | | |
5908 | 24 | assert(bytesmode ? PyBytes_CheckExact(s) : |
5909 | 24 | PyUnicode_CheckExact(s)); |
5910 | | |
5911 | 24 | if (bytesmode) { |
5912 | | /* For bytes, concat as we go. */ |
5913 | 6 | if (i == 0) { |
5914 | | /* First time, just remember this value. */ |
5915 | 6 | bytes_str = s; |
5916 | 6 | } else { |
5917 | 0 | PyBytes_ConcatAndDel(&bytes_str, s); |
5918 | 0 | if (!bytes_str) |
5919 | 0 | goto error; |
5920 | 0 | } |
5921 | 18 | } else { |
5922 | | /* This is a regular string. Concatenate it. */ |
5923 | 18 | if (FstringParser_ConcatAndDel(&state, s) < 0) |
5924 | 0 | goto error; |
5925 | 18 | } |
5926 | 24 | } |
5927 | 24 | } |
5928 | 24 | if (bytesmode) { |
5929 | | /* Just return the bytes object and we're done. */ |
5930 | 6 | if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0) |
5931 | 0 | goto error; |
5932 | 6 | return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset, |
5933 | 6 | n->n_end_lineno, n->n_end_col_offset, c->c_arena); |
5934 | 6 | } |
5935 | | |
5936 | | /* We're not a bytes string, bytes_str should never have been set. */ |
5937 | 18 | assert(bytes_str == NULL); |
5938 | | |
5939 | 18 | return FstringParser_Finish(&state, c, n); |
5940 | | |
5941 | 0 | error: |
5942 | 0 | Py_XDECREF(bytes_str); |
5943 | 0 | FstringParser_Dealloc(&state); |
5944 | 0 | return NULL; |
5945 | 24 | } |
5946 | | |
5947 | | PyObject * |
5948 | | _PyAST_GetDocString(asdl_seq *body) |
5949 | 76 | { |
5950 | 76 | if (!asdl_seq_LEN(body)) { |
5951 | 0 | return NULL; |
5952 | 0 | } |
5953 | 76 | stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0); |
5954 | 76 | if (st->kind != Expr_kind) { |
5955 | 76 | return NULL; |
5956 | 76 | } |
5957 | 0 | expr_ty e = st->v.Expr.value; |
5958 | 0 | if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) { |
5959 | 0 | return e->v.Constant.value; |
5960 | 0 | } |
5961 | 0 | return NULL; |
5962 | 0 | } |