Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/action_helpers.c
Line
Count
Source
1
#include <Python.h>
2
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
3
#include "pycore_runtime.h"         // _PyRuntime
4
#include "pycore_unicodeobject.h"   // _PyUnicode_InternImmortal()
5
6
#include "pegen.h"
7
#include "string_parser.h"          // _PyPegen_decode_string()
8
9
10
void *
11
_PyPegen_dummy_name(Parser *p, ...)
12
55.1k
{
13
55.1k
    return &_PyRuntime.parser.dummy_name;
14
55.1k
}
15
16
/* Creates a single-element asdl_seq* that contains a */
17
asdl_seq *
18
_PyPegen_singleton_seq(Parser *p, void *a)
19
111k
{
20
111k
    assert(a != NULL);
21
111k
    asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
22
111k
    if (!seq) {
23
0
        return NULL;
24
0
    }
25
111k
    asdl_seq_SET_UNTYPED(seq, 0, a);
26
111k
    return seq;
27
111k
}
28
29
/* Creates a copy of seq and prepends a to it */
30
asdl_seq *
31
_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
32
254k
{
33
254k
    assert(a != NULL);
34
254k
    if (!seq) {
35
6.84k
        return _PyPegen_singleton_seq(p, a);
36
6.84k
    }
37
38
247k
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
39
247k
    if (!new_seq) {
40
0
        return NULL;
41
0
    }
42
43
247k
    asdl_seq_SET_UNTYPED(new_seq, 0, a);
44
468k
    for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
45
220k
        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
46
220k
    }
47
247k
    return new_seq;
48
247k
}
49
50
/* Creates a copy of seq and appends a to it */
51
asdl_seq *
52
_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
53
0
{
54
0
    assert(a != NULL);
55
0
    if (!seq) {
56
0
        return _PyPegen_singleton_seq(p, a);
57
0
    }
58
59
0
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
60
0
    if (!new_seq) {
61
0
        return NULL;
62
0
    }
63
64
0
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
65
0
        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
66
0
    }
67
0
    asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
68
0
    return new_seq;
69
0
}
70
71
static Py_ssize_t
72
_get_flattened_seq_size(asdl_seq *seqs)
73
17.9k
{
74
17.9k
    Py_ssize_t size = 0;
75
86.3k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
76
68.3k
        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
77
68.3k
        size += asdl_seq_LEN(inner_seq);
78
68.3k
    }
79
17.9k
    return size;
80
17.9k
}
81
82
/* Flattens an asdl_seq* of asdl_seq*s */
83
asdl_seq *
84
_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
85
17.9k
{
86
17.9k
    Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
87
17.9k
    assert(flattened_seq_size > 0);
88
89
17.9k
    asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
90
17.9k
    if (!flattened_seq) {
91
0
        return NULL;
92
0
    }
93
94
17.9k
    int flattened_seq_idx = 0;
95
86.3k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
96
68.3k
        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
97
142k
        for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
98
73.7k
            asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
99
73.7k
        }
100
68.3k
    }
101
17.9k
    assert(flattened_seq_idx == flattened_seq_size);
102
103
17.9k
    return flattened_seq;
104
17.9k
}
105
106
void *
107
_PyPegen_seq_last_item(asdl_seq *seq)
108
595
{
109
595
    Py_ssize_t len = asdl_seq_LEN(seq);
110
595
    return asdl_seq_GET_UNTYPED(seq, len - 1);
111
595
}
112
113
void *
114
_PyPegen_seq_first_item(asdl_seq *seq)
115
550
{
116
550
    return asdl_seq_GET_UNTYPED(seq, 0);
117
550
}
118
119
/* Creates a new name of the form <first_name>.<second_name> */
120
expr_ty
121
_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
122
1.24k
{
123
1.24k
    assert(first_name != NULL && second_name != NULL);
124
1.24k
    PyObject *uni = PyUnicode_FromFormat("%U.%U",
125
1.24k
            first_name->v.Name.id, second_name->v.Name.id);
126
1.24k
    if (!uni) {
127
0
        return NULL;
128
0
    }
129
1.24k
    PyInterpreterState *interp = _PyInterpreterState_GET();
130
1.24k
    _PyUnicode_InternImmortal(interp, &uni);
131
1.24k
    if (_PyArena_AddPyObject(p->arena, uni) < 0) {
132
0
        Py_DECREF(uni);
133
0
        return NULL;
134
0
    }
135
136
1.24k
    return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
137
1.24k
}
138
139
/* Counts the total number of dots in seq's tokens */
140
int
141
_PyPegen_seq_count_dots(asdl_seq *seq)
142
2.61k
{
143
2.61k
    int number_of_dots = 0;
144
4.19k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
145
1.58k
        Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
146
1.58k
        switch (current_expr->type) {
147
79
            case ELLIPSIS:
148
79
                number_of_dots += 3;
149
79
                break;
150
1.50k
            case DOT:
151
1.50k
                number_of_dots += 1;
152
1.50k
                break;
153
0
            default:
154
0
                Py_UNREACHABLE();
155
1.58k
        }
156
1.58k
    }
157
158
2.61k
    return number_of_dots;
159
2.61k
}
160
161
/* Creates an alias with '*' as the identifier name */
162
alias_ty
163
_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
164
364
                        int end_col_offset, PyArena *arena) {
165
364
    PyObject *str = PyUnicode_InternFromString("*");
166
364
    if (!str) {
167
0
        return NULL;
168
0
    }
169
364
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
170
0
        Py_DECREF(str);
171
0
        return NULL;
172
0
    }
173
364
    return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
174
364
}
175
176
/* Creates a new asdl_seq* with the identifiers of all the names in seq */
177
asdl_identifier_seq *
178
_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
179
1.42k
{
180
1.42k
    Py_ssize_t len = asdl_seq_LEN(seq);
181
1.42k
    assert(len > 0);
182
183
1.42k
    asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
184
1.42k
    if (!new_seq) {
185
0
        return NULL;
186
0
    }
187
4.00k
    for (Py_ssize_t i = 0; i < len; i++) {
188
2.58k
        expr_ty e = asdl_seq_GET(seq, i);
189
2.58k
        asdl_seq_SET(new_seq, i, e->v.Name.id);
190
2.58k
    }
191
1.42k
    return new_seq;
192
1.42k
}
193
194
/* Constructs a CmpopExprPair */
195
CmpopExprPair *
196
_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
197
11.6k
{
198
11.6k
    assert(expr != NULL);
199
11.6k
    CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
200
11.6k
    if (!a) {
201
0
        return NULL;
202
0
    }
203
11.6k
    a->cmpop = cmpop;
204
11.6k
    a->expr = expr;
205
11.6k
    return a;
206
11.6k
}
207
208
asdl_int_seq *
209
_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
210
4.87k
{
211
4.87k
    Py_ssize_t len = asdl_seq_LEN(seq);
212
4.87k
    assert(len > 0);
213
214
4.87k
    asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
215
4.87k
    if (!new_seq) {
216
0
        return NULL;
217
0
    }
218
15.9k
    for (Py_ssize_t i = 0; i < len; i++) {
219
11.0k
        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
220
11.0k
        asdl_seq_SET(new_seq, i, pair->cmpop);
221
11.0k
    }
222
4.87k
    return new_seq;
223
4.87k
}
224
225
asdl_expr_seq *
226
_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
227
4.87k
{
228
4.87k
    Py_ssize_t len = asdl_seq_LEN(seq);
229
4.87k
    assert(len > 0);
230
231
4.87k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
232
4.87k
    if (!new_seq) {
233
0
        return NULL;
234
0
    }
235
15.9k
    for (Py_ssize_t i = 0; i < len; i++) {
236
11.0k
        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
237
11.0k
        asdl_seq_SET(new_seq, i, pair->expr);
238
11.0k
    }
239
4.87k
    return new_seq;
240
4.87k
}
241
242
/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
243
static asdl_expr_seq *
244
_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
245
2.73k
{
246
2.73k
    Py_ssize_t len = asdl_seq_LEN(seq);
247
2.73k
    if (len == 0) {
248
1.47k
        return NULL;
249
1.47k
    }
250
251
1.25k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
252
1.25k
    if (!new_seq) {
253
0
        return NULL;
254
0
    }
255
3.99k
    for (Py_ssize_t i = 0; i < len; i++) {
256
2.73k
        expr_ty e = asdl_seq_GET(seq, i);
257
2.73k
        asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
258
2.73k
    }
259
1.25k
    return new_seq;
260
1.25k
}
261
262
static expr_ty
263
_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
264
147k
{
265
147k
    return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
266
147k
}
267
268
static expr_ty
269
_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
270
2.21k
{
271
2.21k
    return _PyAST_Tuple(
272
2.21k
            _set_seq_context(p, e->v.Tuple.elts, ctx),
273
2.21k
            ctx,
274
2.21k
            EXTRA_EXPR(e, e));
275
2.21k
}
276
277
static expr_ty
278
_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
279
520
{
280
520
    return _PyAST_List(
281
520
            _set_seq_context(p, e->v.List.elts, ctx),
282
520
            ctx,
283
520
            EXTRA_EXPR(e, e));
284
520
}
285
286
static expr_ty
287
_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
288
138
{
289
138
    return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
290
138
                            ctx, EXTRA_EXPR(e, e));
291
138
}
292
293
static expr_ty
294
_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
295
47
{
296
47
    return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
297
47
                            ctx, EXTRA_EXPR(e, e));
298
47
}
299
300
static expr_ty
301
_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
302
235
{
303
235
    return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
304
235
                          ctx, EXTRA_EXPR(e, e));
305
235
}
306
307
/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
308
expr_ty
309
_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
310
150k
{
311
150k
    assert(expr != NULL);
312
313
150k
    expr_ty new = NULL;
314
150k
    switch (expr->kind) {
315
147k
        case Name_kind:
316
147k
            new = _set_name_context(p, expr, ctx);
317
147k
            break;
318
2.21k
        case Tuple_kind:
319
2.21k
            new = _set_tuple_context(p, expr, ctx);
320
2.21k
            break;
321
520
        case List_kind:
322
520
            new = _set_list_context(p, expr, ctx);
323
520
            break;
324
138
        case Subscript_kind:
325
138
            new = _set_subscript_context(p, expr, ctx);
326
138
            break;
327
47
        case Attribute_kind:
328
47
            new = _set_attribute_context(p, expr, ctx);
329
47
            break;
330
235
        case Starred_kind:
331
235
            new = _set_starred_context(p, expr, ctx);
332
235
            break;
333
0
        default:
334
0
            new = expr;
335
150k
    }
336
150k
    return new;
337
150k
}
338
339
/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
340
KeyValuePair *
341
_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
342
29.7k
{
343
29.7k
    KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
344
29.7k
    if (!a) {
345
0
        return NULL;
346
0
    }
347
29.7k
    a->key = key;
348
29.7k
    a->value = value;
349
29.7k
    return a;
350
29.7k
}
351
352
/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
353
asdl_expr_seq *
354
_PyPegen_get_keys(Parser *p, asdl_seq *seq)
355
4.37k
{
356
4.37k
    Py_ssize_t len = asdl_seq_LEN(seq);
357
4.37k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
358
4.37k
    if (!new_seq) {
359
0
        return NULL;
360
0
    }
361
26.1k
    for (Py_ssize_t i = 0; i < len; i++) {
362
21.7k
        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
363
21.7k
        asdl_seq_SET(new_seq, i, pair->key);
364
21.7k
    }
365
4.37k
    return new_seq;
366
4.37k
}
367
368
/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
369
asdl_expr_seq *
370
_PyPegen_get_values(Parser *p, asdl_seq *seq)
371
4.37k
{
372
4.37k
    Py_ssize_t len = asdl_seq_LEN(seq);
373
4.37k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
374
4.37k
    if (!new_seq) {
375
0
        return NULL;
376
0
    }
377
26.1k
    for (Py_ssize_t i = 0; i < len; i++) {
378
21.7k
        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
379
21.7k
        asdl_seq_SET(new_seq, i, pair->value);
380
21.7k
    }
381
4.37k
    return new_seq;
382
4.37k
}
383
384
/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
385
KeyPatternPair *
386
_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
387
7.73k
{
388
7.73k
    KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
389
7.73k
    if (!a) {
390
0
        return NULL;
391
0
    }
392
7.73k
    a->key = key;
393
7.73k
    a->pattern = pattern;
394
7.73k
    return a;
395
7.73k
}
396
397
/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
398
asdl_expr_seq *
399
_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
400
1.29k
{
401
1.29k
    Py_ssize_t len = asdl_seq_LEN(seq);
402
1.29k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
403
1.29k
    if (!new_seq) {
404
0
        return NULL;
405
0
    }
406
3.37k
    for (Py_ssize_t i = 0; i < len; i++) {
407
2.08k
        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
408
2.08k
        asdl_seq_SET(new_seq, i, pair->key);
409
2.08k
    }
410
1.29k
    return new_seq;
411
1.29k
}
412
413
/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
414
asdl_pattern_seq *
415
_PyPegen_get_patterns(Parser *p, asdl_seq *seq)
416
1.29k
{
417
1.29k
    Py_ssize_t len = asdl_seq_LEN(seq);
418
1.29k
    asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
419
1.29k
    if (!new_seq) {
420
0
        return NULL;
421
0
    }
422
3.37k
    for (Py_ssize_t i = 0; i < len; i++) {
423
2.08k
        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
424
2.08k
        asdl_seq_SET(new_seq, i, pair->pattern);
425
2.08k
    }
426
1.29k
    return new_seq;
427
1.29k
}
428
429
/* Constructs a NameDefaultPair */
430
NameDefaultPair *
431
_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
432
56.4k
{
433
56.4k
    NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
434
56.4k
    if (!a) {
435
0
        return NULL;
436
0
    }
437
56.4k
    a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
438
56.4k
    a->value = value;
439
56.4k
    return a;
440
56.4k
}
441
442
/* Constructs a SlashWithDefault */
443
SlashWithDefault *
444
_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
445
5.03k
{
446
5.03k
    SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
447
5.03k
    if (!a) {
448
0
        return NULL;
449
0
    }
450
5.03k
    a->plain_names = plain_names;
451
5.03k
    a->names_with_defaults = names_with_defaults;
452
5.03k
    return a;
453
5.03k
}
454
455
/* Constructs a StarEtc */
456
StarEtc *
457
_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
458
4.19k
{
459
4.19k
    StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
460
4.19k
    if (!a) {
461
0
        return NULL;
462
0
    }
463
4.19k
    a->vararg = vararg;
464
4.19k
    a->kwonlyargs = kwonlyargs;
465
4.19k
    a->kwarg = kwarg;
466
4.19k
    return a;
467
4.19k
}
468
469
asdl_seq *
470
_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
471
10.1k
{
472
10.1k
    Py_ssize_t first_len = asdl_seq_LEN(a);
473
10.1k
    Py_ssize_t second_len = asdl_seq_LEN(b);
474
10.1k
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
475
10.1k
    if (!new_seq) {
476
0
        return NULL;
477
0
    }
478
479
10.1k
    int k = 0;
480
31.4k
    for (Py_ssize_t i = 0; i < first_len; i++) {
481
21.2k
        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
482
21.2k
    }
483
16.2k
    for (Py_ssize_t i = 0; i < second_len; i++) {
484
6.01k
        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
485
6.01k
    }
486
487
10.1k
    return new_seq;
488
10.1k
}
489
490
static asdl_arg_seq*
491
_get_names(Parser *p, asdl_seq *names_with_defaults)
492
14.1k
{
493
14.1k
    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
494
14.1k
    asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
495
14.1k
    if (!seq) {
496
0
        return NULL;
497
0
    }
498
25.5k
    for (Py_ssize_t i = 0; i < len; i++) {
499
11.3k
        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
500
11.3k
        asdl_seq_SET(seq, i, pair->arg);
501
11.3k
    }
502
14.1k
    return seq;
503
14.1k
}
504
505
static asdl_expr_seq *
506
_get_defaults(Parser *p, asdl_seq *names_with_defaults)
507
14.1k
{
508
14.1k
    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
509
14.1k
    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
510
14.1k
    if (!seq) {
511
0
        return NULL;
512
0
    }
513
25.5k
    for (Py_ssize_t i = 0; i < len; i++) {
514
11.3k
        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
515
11.3k
        asdl_seq_SET(seq, i, pair->value);
516
11.3k
    }
517
14.1k
    return seq;
518
14.1k
}
519
520
static int
521
_make_posonlyargs(Parser *p,
522
                  asdl_arg_seq *slash_without_default,
523
                  SlashWithDefault *slash_with_default,
524
13.1k
                  asdl_arg_seq **posonlyargs) {
525
13.1k
    if (slash_without_default != NULL) {
526
1.21k
        *posonlyargs = slash_without_default;
527
1.21k
    }
528
11.8k
    else if (slash_with_default != NULL) {
529
1.65k
        asdl_arg_seq *slash_with_default_names =
530
1.65k
                _get_names(p, slash_with_default->names_with_defaults);
531
1.65k
        if (!slash_with_default_names) {
532
0
            return -1;
533
0
        }
534
1.65k
        *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
535
1.65k
                p,
536
1.65k
                (asdl_seq*)slash_with_default->plain_names,
537
1.65k
                (asdl_seq*)slash_with_default_names);
538
1.65k
    }
539
10.2k
    else {
540
10.2k
        *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
541
10.2k
    }
542
13.1k
    return *posonlyargs == NULL ? -1 : 0;
543
13.1k
}
544
545
static int
546
_make_posargs(Parser *p,
547
              asdl_arg_seq *plain_names,
548
              asdl_seq *names_with_default,
549
13.1k
              asdl_arg_seq **posargs) {
550
551
13.1k
    if (names_with_default != NULL) {
552
9.40k
        if (plain_names != NULL) {
553
6.43k
            asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
554
6.43k
            if (!names_with_default_names) {
555
0
                return -1;
556
0
            }
557
6.43k
            *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
558
6.43k
                    p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
559
6.43k
        }
560
2.96k
        else {
561
2.96k
            *posargs = _get_names(p, names_with_default);
562
2.96k
        }
563
9.40k
    }
564
3.71k
    else {
565
3.71k
        if (plain_names != NULL) {
566
            // With the current grammar, we never get here.
567
            // If that has changed, remove the assert, and test thoroughly.
568
0
            assert(0);
569
0
            *posargs = plain_names;
570
0
        }
571
3.71k
        else {
572
3.71k
            *posargs = _Py_asdl_arg_seq_new(0, p->arena);
573
3.71k
        }
574
3.71k
    }
575
13.1k
    return *posargs == NULL ? -1 : 0;
576
13.1k
}
577
578
static int
579
_make_posdefaults(Parser *p,
580
                  SlashWithDefault *slash_with_default,
581
                  asdl_seq *names_with_default,
582
13.1k
                  asdl_expr_seq **posdefaults) {
583
13.1k
    if (slash_with_default != NULL && names_with_default != NULL) {
584
1.65k
        asdl_expr_seq *slash_with_default_values =
585
1.65k
                _get_defaults(p, slash_with_default->names_with_defaults);
586
1.65k
        if (!slash_with_default_values) {
587
0
            return -1;
588
0
        }
589
1.65k
        asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
590
1.65k
        if (!names_with_default_values) {
591
0
            return -1;
592
0
        }
593
1.65k
        *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
594
1.65k
                p,
595
1.65k
                (asdl_seq*)slash_with_default_values,
596
1.65k
                (asdl_seq*)names_with_default_values);
597
1.65k
    }
598
11.4k
    else if (slash_with_default == NULL && names_with_default != NULL) {
599
7.74k
        *posdefaults = _get_defaults(p, names_with_default);
600
7.74k
    }
601
3.71k
    else if (slash_with_default != NULL && names_with_default == NULL) {
602
0
        *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
603
0
    }
604
3.71k
    else {
605
3.71k
        *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
606
3.71k
    }
607
13.1k
    return *posdefaults == NULL ? -1 : 0;
608
13.1k
}
609
610
static int
611
_make_kwargs(Parser *p, StarEtc *star_etc,
612
             asdl_arg_seq **kwonlyargs,
613
13.1k
             asdl_expr_seq **kwdefaults) {
614
13.1k
    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
615
3.10k
        *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
616
3.10k
    }
617
10.0k
    else {
618
10.0k
        *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
619
10.0k
    }
620
621
13.1k
    if (*kwonlyargs == NULL) {
622
0
        return -1;
623
0
    }
624
625
13.1k
    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
626
3.10k
        *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
627
3.10k
    }
628
10.0k
    else {
629
10.0k
        *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
630
10.0k
    }
631
632
13.1k
    if (*kwdefaults == NULL) {
633
0
        return -1;
634
0
    }
635
636
13.1k
    return 0;
637
13.1k
}
638
639
/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
640
arguments_ty
641
_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
642
                        SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
643
                        asdl_seq *names_with_default, StarEtc *star_etc)
644
13.1k
{
645
13.1k
    asdl_arg_seq *posonlyargs;
646
13.1k
    if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
647
0
        return NULL;
648
0
    }
649
650
13.1k
    asdl_arg_seq *posargs;
651
13.1k
    if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
652
0
        return NULL;
653
0
    }
654
655
13.1k
    asdl_expr_seq *posdefaults;
656
13.1k
    if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
657
0
        return NULL;
658
0
    }
659
660
13.1k
    arg_ty vararg = NULL;
661
13.1k
    if (star_etc != NULL && star_etc->vararg != NULL) {
662
2.21k
        vararg = star_etc->vararg;
663
2.21k
    }
664
665
13.1k
    asdl_arg_seq *kwonlyargs;
666
13.1k
    asdl_expr_seq *kwdefaults;
667
13.1k
    if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
668
0
        return NULL;
669
0
    }
670
671
13.1k
    arg_ty kwarg = NULL;
672
13.1k
    if (star_etc != NULL && star_etc->kwarg != NULL) {
673
1.18k
        kwarg = star_etc->kwarg;
674
1.18k
    }
675
676
13.1k
    return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
677
13.1k
                            kwdefaults, kwarg, posdefaults, p->arena);
678
13.1k
}
679
680
681
/* Constructs an empty arguments_ty object, that gets used when a function accepts no
682
 * arguments. */
683
arguments_ty
684
_PyPegen_empty_arguments(Parser *p)
685
2.32k
{
686
2.32k
    asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
687
2.32k
    if (!posonlyargs) {
688
0
        return NULL;
689
0
    }
690
2.32k
    asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
691
2.32k
    if (!posargs) {
692
0
        return NULL;
693
0
    }
694
2.32k
    asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
695
2.32k
    if (!posdefaults) {
696
0
        return NULL;
697
0
    }
698
2.32k
    asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
699
2.32k
    if (!kwonlyargs) {
700
0
        return NULL;
701
0
    }
702
2.32k
    asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
703
2.32k
    if (!kwdefaults) {
704
0
        return NULL;
705
0
    }
706
707
2.32k
    return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
708
2.32k
                            kwdefaults, NULL, posdefaults, p->arena);
709
2.32k
}
710
711
/* Encapsulates the value of an operator_ty into an AugOperator struct */
712
AugOperator *
713
_PyPegen_augoperator(Parser *p, operator_ty kind)
714
2.77k
{
715
2.77k
    AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
716
2.77k
    if (!a) {
717
0
        return NULL;
718
0
    }
719
2.77k
    a->kind = kind;
720
2.77k
    return a;
721
2.77k
}
722
723
/* Construct a FunctionDef equivalent to function_def, but with decorators */
724
stmt_ty
725
_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
726
267
{
727
267
    assert(function_def != NULL);
728
267
    if (function_def->kind == AsyncFunctionDef_kind) {
729
29
        return _PyAST_AsyncFunctionDef(
730
29
            function_def->v.AsyncFunctionDef.name,
731
29
            function_def->v.AsyncFunctionDef.args,
732
29
            function_def->v.AsyncFunctionDef.body, decorators,
733
29
            function_def->v.AsyncFunctionDef.returns,
734
29
            function_def->v.AsyncFunctionDef.type_comment,
735
29
            function_def->v.AsyncFunctionDef.type_params,
736
29
            function_def->lineno, function_def->col_offset,
737
29
            function_def->end_lineno, function_def->end_col_offset, p->arena);
738
29
    }
739
740
238
    return _PyAST_FunctionDef(
741
238
        function_def->v.FunctionDef.name,
742
238
        function_def->v.FunctionDef.args,
743
238
        function_def->v.FunctionDef.body, decorators,
744
238
        function_def->v.FunctionDef.returns,
745
238
        function_def->v.FunctionDef.type_comment,
746
238
        function_def->v.FunctionDef.type_params,
747
238
        function_def->lineno, function_def->col_offset,
748
238
        function_def->end_lineno, function_def->end_col_offset, p->arena);
749
267
}
750
751
/* Construct a ClassDef equivalent to class_def, but with decorators */
752
stmt_ty
753
_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
754
343
{
755
343
    assert(class_def != NULL);
756
343
    return _PyAST_ClassDef(
757
343
        class_def->v.ClassDef.name,
758
343
        class_def->v.ClassDef.bases, class_def->v.ClassDef.keywords,
759
343
        class_def->v.ClassDef.body, decorators,
760
343
        class_def->v.ClassDef.type_params,
761
343
        class_def->lineno, class_def->col_offset, class_def->end_lineno,
762
343
        class_def->end_col_offset, p->arena);
763
343
}
764
765
/* Construct a KeywordOrStarred */
766
KeywordOrStarred *
767
_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
768
32.0k
{
769
32.0k
    KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
770
32.0k
    if (!a) {
771
0
        return NULL;
772
0
    }
773
32.0k
    a->element = element;
774
32.0k
    a->is_keyword = is_keyword;
775
32.0k
    return a;
776
32.0k
}
777
778
/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
779
static int
780
_seq_number_of_starred_exprs(asdl_seq *seq)
781
16.0k
{
782
16.0k
    int n = 0;
783
52.1k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
784
36.0k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
785
36.0k
        if (!k->is_keyword) {
786
1.85k
            n++;
787
1.85k
        }
788
36.0k
    }
789
16.0k
    return n;
790
16.0k
}
791
792
/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
793
asdl_expr_seq *
794
_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
795
8.01k
{
796
8.01k
    int new_len = _seq_number_of_starred_exprs(kwargs);
797
8.01k
    if (new_len == 0) {
798
7.70k
        return NULL;
799
7.70k
    }
800
305
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
801
305
    if (!new_seq) {
802
0
        return NULL;
803
0
    }
804
805
305
    int idx = 0;
806
1.85k
    for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
807
1.55k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
808
1.55k
        if (!k->is_keyword) {
809
927
            asdl_seq_SET(new_seq, idx++, k->element);
810
927
        }
811
1.55k
    }
812
305
    return new_seq;
813
305
}
814
815
/* Return a new asdl_seq* with only the keywords in kwargs */
816
asdl_keyword_seq*
817
_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
818
8.01k
{
819
8.01k
    Py_ssize_t len = asdl_seq_LEN(kwargs);
820
8.01k
    Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
821
8.01k
    if (new_len == 0) {
822
0
        return NULL;
823
0
    }
824
8.01k
    asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
825
8.01k
    if (!new_seq) {
826
0
        return NULL;
827
0
    }
828
829
8.01k
    int idx = 0;
830
26.0k
    for (Py_ssize_t i = 0; i < len; i++) {
831
18.0k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
832
18.0k
        if (k->is_keyword) {
833
17.1k
            asdl_seq_SET(new_seq, idx++, k->element);
834
17.1k
        }
835
18.0k
    }
836
8.01k
    return new_seq;
837
8.01k
}
838
839
expr_ty
840
_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
841
359
{
842
359
    if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
843
5
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
844
5
        return NULL;
845
5
    }
846
354
    return exp;
847
359
}
848
849
expr_ty
850
_PyPegen_ensure_real(Parser *p, expr_ty exp)
851
612
{
852
612
    if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
853
2
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
854
2
        return NULL;
855
2
    }
856
610
    return exp;
857
612
}
858
859
mod_ty
860
5.43k
_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
861
5.43k
    asdl_type_ignore_seq *type_ignores = NULL;
862
5.43k
    Py_ssize_t num = p->type_ignore_comments.num_items;
863
5.43k
    if (num > 0) {
864
        // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
865
0
        type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
866
0
        if (type_ignores == NULL) {
867
0
            return NULL;
868
0
        }
869
0
        for (Py_ssize_t i = 0; i < num; i++) {
870
0
            PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
871
0
            if (tag == NULL) {
872
0
                return NULL;
873
0
            }
874
0
            type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
875
0
                                                  tag, p->arena);
876
0
            if (ti == NULL) {
877
0
                return NULL;
878
0
            }
879
0
            asdl_seq_SET(type_ignores, i, ti);
880
0
        }
881
0
    }
882
5.43k
    return _PyAST_Module(a, type_ignores, p->arena);
883
5.43k
}
884
885
PyObject *
886
_PyPegen_new_type_comment(Parser *p, const char *s)
887
0
{
888
0
    PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
889
0
    if (res == NULL) {
890
0
        return NULL;
891
0
    }
892
0
    if (_PyArena_AddPyObject(p->arena, res) < 0) {
893
0
        Py_DECREF(res);
894
0
        return NULL;
895
0
    }
896
0
    return res;
897
0
}
898
899
arg_ty
900
_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
901
109k
{
902
109k
    if (tc == NULL) {
903
109k
        return a;
904
109k
    }
905
0
    const char *bytes = PyBytes_AsString(tc->bytes);
906
0
    if (bytes == NULL) {
907
0
        return NULL;
908
0
    }
909
0
    PyObject *tco = _PyPegen_new_type_comment(p, bytes);
910
0
    if (tco == NULL) {
911
0
        return NULL;
912
0
    }
913
0
    return _PyAST_arg(a->arg, a->annotation, tco,
914
0
                      a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
915
0
                      p->arena);
916
0
}
917
918
/* Checks if the NOTEQUAL token is valid given the current parser flags
919
0 indicates success and nonzero indicates failure (an exception may be set) */
920
int
921
1.01k
_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
922
1.01k
    assert(t->bytes != NULL);
923
1.01k
    assert(t->type == NOTEQUAL);
924
925
1.01k
    const char* tok_str = PyBytes_AS_STRING(t->bytes);
926
1.01k
    if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
927
1
        RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
928
1
        return -1;
929
1
    }
930
1.01k
    if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
931
1.01k
        return strcmp(tok_str, "!=");
932
1.01k
    }
933
0
    return 0;
934
1.01k
}
935
936
int
937
7.23k
_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
938
7.23k
    if (name->kind != Name_kind) {
939
2.06k
        return 0;
940
2.06k
    }
941
5.16k
    const char* candidates[2] = {"print", "exec"};
942
15.2k
    for (int i=0; i<2; i++) {
943
10.3k
        if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
944
204
            return 1;
945
204
        }
946
10.3k
    }
947
4.95k
    return 0;
948
5.16k
}
949
950
void *
951
_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b)
952
3.97k
{
953
    // Don't raise for legacy statements like "print x" or "exec x"
954
3.97k
    if (_PyPegen_check_legacy_stmt(p, a)) {
955
194
        return NULL;
956
194
    }
957
    // Only raise inside parentheses/brackets (level > 0)
958
3.77k
    if (p->tokens[p->mark - 1]->level == 0) {
959
3.64k
        return NULL;
960
3.64k
    }
961
    // For multi-line expressions (like string concatenations), point to the
962
    // last line instead of the first for a more helpful error message.
963
    // Use a->col_offset as the starting column since all strings in the
964
    // concatenation typically share the same indentation.
965
131
    if (a->end_lineno > a->lineno) {
966
4
        return RAISE_ERROR_KNOWN_LOCATION(
967
4
            p, PyExc_SyntaxError, a->end_lineno, a->col_offset,
968
4
            a->end_lineno, a->end_col_offset,
969
4
            "invalid syntax. Perhaps you forgot a comma?"
970
4
        );
971
4
    }
972
127
    return RAISE_ERROR_KNOWN_LOCATION(
973
127
        p, PyExc_SyntaxError, a->lineno, a->col_offset,
974
127
        b->end_lineno, b->end_col_offset,
975
127
        "invalid syntax. Perhaps you forgot a comma?"
976
127
    );
977
131
}
978
979
static ResultTokenWithMetadata *
980
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
981
6.21k
{
982
6.21k
    ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
983
6.21k
    if (res == NULL) {
984
0
        return NULL;
985
0
    }
986
6.21k
    res->metadata = metadata;
987
6.21k
    res->result = result;
988
6.21k
    return res;
989
6.21k
}
990
991
ResultTokenWithMetadata *
992
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
993
1.39k
{
994
1.39k
    if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
995
2
        return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
996
2
            conv_token, conv,
997
2
            "%c-string: conversion type must come right after the exclamation mark",
998
2
            TOK_GET_STRING_PREFIX(p->tok)
999
2
        );
1000
2
    }
1001
1002
1.39k
    Py_UCS4 first = PyUnicode_READ_CHAR(conv->v.Name.id, 0);
1003
1.39k
    if (PyUnicode_GET_LENGTH(conv->v.Name.id) > 1 ||
1004
1.39k
            !(first == 's' || first == 'r' || first == 'a')) {
1005
7
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conv,
1006
7
                                            "%c-string: invalid conversion character %R: expected 's', 'r', or 'a'",
1007
7
                                            TOK_GET_STRING_PREFIX(p->tok),
1008
7
                                            conv->v.Name.id);
1009
7
        return NULL;
1010
7
    }
1011
1012
1.38k
    return result_token_with_metadata(p, conv, conv_token->metadata);
1013
1.39k
}
1014
1015
ResultTokenWithMetadata *
1016
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
1017
                                int end_lineno, int end_col_offset, PyArena *arena)
1018
4.82k
{
1019
4.82k
    if (!spec) {
1020
0
        return NULL;
1021
0
    }
1022
1023
    // This is needed to keep compatibility with 3.11, where an empty format
1024
    // spec is parsed as an *empty* JoinedStr node, instead of having an empty
1025
    // constant in it.
1026
4.82k
    Py_ssize_t n_items = asdl_seq_LEN(spec);
1027
4.82k
    Py_ssize_t non_empty_count = 0;
1028
13.5k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1029
8.69k
        expr_ty item = asdl_seq_GET(spec, i);
1030
8.69k
        non_empty_count += !(item->kind == Constant_kind &&
1031
8.69k
                             PyUnicode_CheckExact(item->v.Constant.value) &&
1032
5.90k
                             PyUnicode_GET_LENGTH(item->v.Constant.value) == 0);
1033
8.69k
    }
1034
4.82k
    if (non_empty_count != n_items) {
1035
1.40k
        asdl_expr_seq *resized_spec =
1036
1.40k
            _Py_asdl_expr_seq_new(non_empty_count, p->arena);
1037
1.40k
        if (resized_spec == NULL) {
1038
0
            return NULL;
1039
0
        }
1040
1.40k
        Py_ssize_t j = 0;
1041
4.09k
        for (Py_ssize_t i = 0; i < n_items; i++) {
1042
2.68k
            expr_ty item = asdl_seq_GET(spec, i);
1043
2.68k
            if (item->kind == Constant_kind &&
1044
2.68k
                PyUnicode_CheckExact(item->v.Constant.value) &&
1045
2.03k
                PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1046
1.40k
                continue;
1047
1.40k
            }
1048
1.27k
            asdl_seq_SET(resized_spec, j++, item);
1049
1.27k
        }
1050
1.40k
        assert(j == non_empty_count);
1051
1.40k
        spec = resized_spec;
1052
1.40k
    }
1053
4.82k
    expr_ty res;
1054
4.82k
    Py_ssize_t n = asdl_seq_LEN(spec);
1055
4.82k
    if (n == 0 || (n == 1 && asdl_seq_GET(spec, 0)->kind == Constant_kind)) {
1056
4.25k
        res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1057
4.25k
                                    end_col_offset, p->arena);
1058
4.25k
    } else {
1059
571
        res = _PyPegen_concatenate_strings(p, spec,
1060
571
                             lineno, col_offset, end_lineno,
1061
571
                             end_col_offset, arena);
1062
571
    }
1063
4.82k
    if (!res) {
1064
0
        return NULL;
1065
0
    }
1066
4.82k
    return result_token_with_metadata(p, res, colon->metadata);
1067
4.82k
}
1068
1069
const char *
1070
_PyPegen_get_expr_name(expr_ty e)
1071
158
{
1072
158
    assert(e != NULL);
1073
158
    switch (e->kind) {
1074
1
        case Attribute_kind:
1075
1
            return "attribute";
1076
0
        case Subscript_kind:
1077
0
            return "subscript";
1078
1
        case Starred_kind:
1079
1
            return "starred";
1080
5
        case Name_kind:
1081
5
            return "name";
1082
1
        case List_kind:
1083
1
            return "list";
1084
3
        case Tuple_kind:
1085
3
            return "tuple";
1086
2
        case Lambda_kind:
1087
2
            return "lambda";
1088
12
        case Call_kind:
1089
12
            return "function call";
1090
3
        case BoolOp_kind:
1091
20
        case BinOp_kind:
1092
29
        case UnaryOp_kind:
1093
29
            return "expression";
1094
0
        case GeneratorExp_kind:
1095
0
            return "generator expression";
1096
1
        case Yield_kind:
1097
2
        case YieldFrom_kind:
1098
2
            return "yield expression";
1099
1
        case Await_kind:
1100
1
            return "await expression";
1101
1
        case ListComp_kind:
1102
1
            return "list comprehension";
1103
2
        case SetComp_kind:
1104
2
            return "set comprehension";
1105
1
        case DictComp_kind:
1106
1
            return "dict comprehension";
1107
1
        case Dict_kind:
1108
1
            return "dict literal";
1109
0
        case Set_kind:
1110
0
            return "set display";
1111
5
        case JoinedStr_kind:
1112
5
        case FormattedValue_kind:
1113
5
            return "f-string expression";
1114
6
        case TemplateStr_kind:
1115
6
        case Interpolation_kind:
1116
6
            return "t-string expression";
1117
69
        case Constant_kind: {
1118
69
            PyObject *value = e->v.Constant.value;
1119
69
            if (value == Py_None) {
1120
1
                return "None";
1121
1
            }
1122
68
            if (value == Py_False) {
1123
1
                return "False";
1124
1
            }
1125
67
            if (value == Py_True) {
1126
4
                return "True";
1127
4
            }
1128
63
            if (value == Py_Ellipsis) {
1129
1
                return "ellipsis";
1130
1
            }
1131
62
            return "literal";
1132
63
        }
1133
13
        case Compare_kind:
1134
13
            return "comparison";
1135
2
        case IfExp_kind:
1136
2
            return "conditional expression";
1137
1
        case NamedExpr_kind:
1138
1
            return "named expression";
1139
0
        default:
1140
0
            PyErr_Format(PyExc_SystemError,
1141
0
                         "unexpected expression in assignment %d (line %d)",
1142
0
                         e->kind, e->lineno);
1143
0
            return NULL;
1144
158
    }
1145
158
}
1146
1147
expr_ty
1148
20
_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
1149
20
    if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
1150
16
        return comprehension->iter;
1151
16
    }
1152
4
    return PyPegen_last_item(comprehension->ifs, expr_ty);
1153
20
}
1154
1155
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
1156
                     int lineno, int col_offset, int end_lineno,
1157
22.9k
                     int end_col_offset, PyArena *arena) {
1158
22.9k
    Py_ssize_t args_len = asdl_seq_LEN(a);
1159
22.9k
    Py_ssize_t total_len = args_len;
1160
1161
22.9k
    if (b == NULL) {
1162
20.5k
        return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
1163
20.5k
                        end_lineno, end_col_offset, arena);
1164
1165
20.5k
    }
1166
1167
2.35k
    asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
1168
2.35k
    asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
1169
1170
2.35k
    if (starreds) {
1171
207
        total_len += asdl_seq_LEN(starreds);
1172
207
    }
1173
1174
2.35k
    asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
1175
2.35k
    if (args == NULL) {
1176
0
        return NULL;
1177
0
    }
1178
1179
2.35k
    Py_ssize_t i = 0;
1180
5.19k
    for (i = 0; i < args_len; i++) {
1181
2.84k
        asdl_seq_SET(args, i, asdl_seq_GET(a, i));
1182
2.84k
    }
1183
2.91k
    for (; i < total_len; i++) {
1184
562
        asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
1185
562
    }
1186
1187
2.35k
    return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
1188
2.35k
                       col_offset, end_lineno, end_col_offset, arena);
1189
2.35k
}
1190
1191
// AST Error reporting helpers
1192
1193
expr_ty
1194
_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
1195
6.25k
{
1196
6.25k
    if (e == NULL) {
1197
0
        return NULL;
1198
0
    }
1199
1200
6.25k
#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
1201
1.37k
        Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
1202
4.51k
        for (Py_ssize_t i = 0; i < len; i++) {\
1203
3.27k
            expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
1204
3.27k
            expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
1205
3.27k
            if (child != NULL) {\
1206
134
                return child;\
1207
134
            }\
1208
3.27k
        }\
1209
1.37k
    } while (0)
1210
1211
    // We only need to visit List and Tuple nodes recursively as those
1212
    // are the only ones that can contain valid names in targets when
1213
    // they are parsed as expressions. Any other kind of expression
1214
    // that is a container (like Sets or Dicts) is directly invalid and
1215
    // we don't need to visit it recursively.
1216
1217
6.25k
    switch (e->kind) {
1218
415
        case List_kind:
1219
415
            VISIT_CONTAINER(e, List);
1220
381
            return NULL;
1221
955
        case Tuple_kind:
1222
955
            VISIT_CONTAINER(e, Tuple);
1223
855
            return NULL;
1224
1.31k
        case Starred_kind:
1225
1.31k
            if (targets_type == DEL_TARGETS) {
1226
1
                return e;
1227
1
            }
1228
1.31k
            return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
1229
685
        case Compare_kind:
1230
            // This is needed, because the `a in b` in `for a in b` gets parsed
1231
            // as a comparison, and so we need to search the left side of the comparison
1232
            // for invalid targets.
1233
685
            if (targets_type == FOR_TARGETS) {
1234
677
                cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
1235
677
                if (cmpop == In) {
1236
42
                    return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
1237
42
                }
1238
635
                return NULL;
1239
677
            }
1240
8
            return e;
1241
2.23k
        case Name_kind:
1242
2.52k
        case Subscript_kind:
1243
2.77k
        case Attribute_kind:
1244
2.77k
            return NULL;
1245
105
        default:
1246
105
            return e;
1247
6.25k
    }
1248
6.25k
}
1249
1250
35
void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
1251
35
    int kwarg_unpacking = 0;
1252
643
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
1253
608
        keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
1254
608
        if (!keyword->arg) {
1255
284
            kwarg_unpacking = 1;
1256
284
        }
1257
608
    }
1258
1259
35
    const char *msg = NULL;
1260
35
    if (kwarg_unpacking) {
1261
21
        msg = "positional argument follows keyword argument unpacking";
1262
21
    } else {
1263
14
        msg = "positional argument follows keyword argument";
1264
14
    }
1265
1266
35
    return RAISE_SYNTAX_ERROR(msg);
1267
35
}
1268
1269
void *
1270
_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
1271
224
{
1272
    /* The rule that calls this function is 'args for_if_clauses'.
1273
       For the input f(L, x for x in y), L and x are in args and
1274
       the for is parsed as a for_if_clause. We have to check if
1275
       len <= 1, so that input like dict((a, b) for a, b in x)
1276
       gets successfully parsed and then we pass the last
1277
       argument (x in the above example) as the location of the
1278
       error */
1279
224
    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
1280
224
    if (len <= 1) {
1281
221
        return NULL;
1282
221
    }
1283
1284
3
    comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
1285
1286
3
    return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
1287
224
        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
1288
224
        _PyPegen_get_last_comprehension_item(last_comprehension),
1289
224
        "Generator expression must be parenthesized"
1290
224
    );
1291
224
}
1292
1293
// Fstring stuff
1294
1295
static expr_ty
1296
25.4k
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) {
1297
25.4k
    assert(PyUnicode_CheckExact(constant->v.Constant.value));
1298
1299
25.4k
    const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
1300
25.4k
    if (bstr == NULL) {
1301
0
        return NULL;
1302
0
    }
1303
1304
25.4k
    size_t len;
1305
25.4k
    if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
1306
0
        len = 1;
1307
25.4k
    } else {
1308
25.4k
        len = strlen(bstr);
1309
25.4k
    }
1310
1311
25.4k
    is_raw = is_raw || strchr(bstr, '\\') == NULL;
1312
25.4k
    PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token);
1313
25.4k
    if (str == NULL) {
1314
7
        _Pypegen_raise_decode_error(p);
1315
7
        return NULL;
1316
7
    }
1317
25.4k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1318
0
        Py_DECREF(str);
1319
0
        return NULL;
1320
0
    }
1321
25.4k
    return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
1322
25.4k
                           constant->end_lineno, constant->end_col_offset,
1323
25.4k
                           p->arena);
1324
25.4k
}
1325
1326
static asdl_expr_seq *
1327
_get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind)
1328
17.2k
{
1329
17.2k
    Py_ssize_t n_items = asdl_seq_LEN(raw_expressions);
1330
17.2k
    Py_ssize_t total_items = n_items;
1331
63.6k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1332
46.3k
        expr_ty item = asdl_seq_GET(raw_expressions, i);
1333
46.3k
        if (item->kind == JoinedStr_kind) {
1334
5.73k
            total_items += asdl_seq_LEN(item->v.JoinedStr.values) - 1;
1335
5.73k
        }
1336
46.3k
    }
1337
1338
17.2k
    const char* quote_str = PyBytes_AsString(a->bytes);
1339
17.2k
    if (quote_str == NULL) {
1340
0
        return NULL;
1341
0
    }
1342
17.2k
    int is_raw = strpbrk(quote_str, "rR") != NULL;
1343
1344
17.2k
    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(total_items, p->arena);
1345
17.2k
    if (seq == NULL) {
1346
0
        return NULL;
1347
0
    }
1348
1349
17.2k
    Py_ssize_t index = 0;
1350
63.6k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1351
46.3k
        expr_ty item = asdl_seq_GET(raw_expressions, i);
1352
1353
        // This should correspond to a JoinedStr node of two elements
1354
        // created _PyPegen_formatted_value. This situation can only be the result of
1355
        // a (f|t)-string debug expression where the first element is a constant with the text and the second
1356
        // a formatted value with the expression.
1357
46.3k
        if (item->kind == JoinedStr_kind) {
1358
5.73k
            asdl_expr_seq *values = item->v.JoinedStr.values;
1359
5.73k
            if (asdl_seq_LEN(values) != 2) {
1360
0
                PyErr_Format(PyExc_SystemError,
1361
0
                             string_kind == TSTRING
1362
0
                             ? "unexpected TemplateStr node without debug data in t-string at line %d"
1363
0
                             : "unexpected JoinedStr node without debug data in f-string at line %d",
1364
0
                             item->lineno);
1365
0
                return NULL;
1366
0
            }
1367
1368
5.73k
            expr_ty first = asdl_seq_GET(values, 0);
1369
5.73k
            assert(first->kind == Constant_kind);
1370
5.73k
            asdl_seq_SET(seq, index++, first);
1371
1372
5.73k
            expr_ty second = asdl_seq_GET(values, 1);
1373
5.73k
            assert((string_kind == TSTRING && second->kind == Interpolation_kind) || second->kind == FormattedValue_kind);
1374
5.73k
            asdl_seq_SET(seq, index++, second);
1375
1376
5.73k
            continue;
1377
5.73k
        }
1378
1379
40.6k
        if (item->kind == Constant_kind) {
1380
25.4k
            item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
1381
25.4k
            if (item == NULL) {
1382
7
                return NULL;
1383
7
            }
1384
1385
            /* Tokenizer emits string parts even when the underlying string
1386
            might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
1387
            so we need to check for them and simplify it here. */
1388
25.4k
            if (PyUnicode_CheckExact(item->v.Constant.value)
1389
25.4k
                && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1390
94
                continue;
1391
94
            }
1392
25.4k
        }
1393
40.5k
        asdl_seq_SET(seq, index++, item);
1394
40.5k
    }
1395
1396
17.2k
    asdl_expr_seq *resized_exprs;
1397
17.2k
    if (index != total_items) {
1398
94
        resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
1399
94
        if (resized_exprs == NULL) {
1400
0
            return NULL;
1401
0
        }
1402
152
        for (Py_ssize_t i = 0; i < index; i++) {
1403
58
            asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
1404
58
        }
1405
94
    }
1406
17.1k
    else {
1407
17.1k
        resized_exprs = seq;
1408
17.1k
    }
1409
17.2k
    return resized_exprs;
1410
17.2k
}
1411
1412
expr_ty
1413
3.98k
_PyPegen_template_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b) {
1414
1415
3.98k
    asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, TSTRING);
1416
3.98k
    return _PyAST_TemplateStr(resized_exprs, a->lineno, a->col_offset,
1417
3.98k
                              b->end_lineno, b->end_col_offset,
1418
3.98k
                              p->arena);
1419
3.98k
}
1420
1421
expr_ty
1422
13.2k
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
1423
1424
13.2k
    asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, FSTRING);
1425
13.2k
    return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
1426
13.2k
                            b->end_lineno, b->end_col_offset,
1427
13.2k
                            p->arena);
1428
13.2k
}
1429
1430
6.38k
expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
1431
6.38k
    Py_ssize_t bsize;
1432
6.38k
    char* bstr;
1433
6.38k
    if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
1434
0
        return NULL;
1435
0
    }
1436
1437
    // Check if we're inside a raw f-string for format spec decoding
1438
6.38k
    int is_raw = 0;
1439
6.38k
    if (INSIDE_FSTRING(p->tok)) {
1440
3.87k
        tokenizer_mode *mode = TOK_GET_MODE(p->tok);
1441
3.87k
        is_raw = mode->raw;
1442
3.87k
    }
1443
1444
6.38k
    PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
1445
6.38k
    if (str == NULL) {
1446
3
        return NULL;
1447
3
    }
1448
6.37k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1449
0
        Py_DECREF(str);
1450
0
        return NULL;
1451
0
    }
1452
6.37k
    return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1453
6.37k
                           tok->end_lineno, tok->end_col_offset,
1454
6.37k
                           p->arena);
1455
6.37k
}
1456
1457
29.9k
expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
1458
29.9k
    char* bstr = PyBytes_AsString(tok->bytes);
1459
29.9k
    if (bstr == NULL) {
1460
0
        return NULL;
1461
0
    }
1462
29.9k
    PyObject* str = PyUnicode_FromString(bstr);
1463
29.9k
    if (str == NULL) {
1464
0
        return NULL;
1465
0
    }
1466
29.9k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1467
0
        Py_DECREF(str);
1468
0
        return NULL;
1469
0
    }
1470
29.9k
    return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1471
29.9k
                           tok->end_lineno, tok->end_col_offset,
1472
29.9k
                           p->arena);
1473
29.9k
}
1474
1475
46.5k
expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
1476
46.5k
    char* the_str = PyBytes_AsString(tok->bytes);
1477
46.5k
    if (the_str == NULL) {
1478
0
        return NULL;
1479
0
    }
1480
46.5k
    PyObject *s = _PyPegen_parse_string(p, tok);
1481
46.5k
    if (s == NULL) {
1482
85
        _Pypegen_raise_decode_error(p);
1483
85
        return NULL;
1484
85
    }
1485
46.5k
    if (_PyArena_AddPyObject(p->arena, s) < 0) {
1486
0
        Py_DECREF(s);
1487
0
        return NULL;
1488
0
    }
1489
46.5k
    PyObject *kind = NULL;
1490
46.5k
    if (the_str && the_str[0] == 'u') {
1491
415
        kind = _PyPegen_new_identifier(p, "u");
1492
415
        if (kind == NULL) {
1493
0
            return NULL;
1494
0
        }
1495
415
    }
1496
46.5k
    return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
1497
46.5k
}
1498
1499
static int
1500
_get_interpolation_conversion(Parser *p, Token *debug, ResultTokenWithMetadata *conversion,
1501
                              ResultTokenWithMetadata *format)
1502
27.0k
{
1503
27.0k
    if (conversion != NULL) {
1504
1.36k
        expr_ty conversion_expr = (expr_ty) conversion->result;
1505
1.36k
        assert(conversion_expr->kind == Name_kind);
1506
1.36k
        Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
1507
1.36k
        return Py_SAFE_DOWNCAST(first, Py_UCS4, int);
1508
1.36k
    }
1509
25.6k
    else if (debug && !format) {
1510
        /* If no conversion is specified, use !r for debug expressions */
1511
5.70k
        return (int)'r';
1512
5.70k
    }
1513
19.9k
    return -1;
1514
27.0k
}
1515
1516
static PyObject *
1517
_strip_interpolation_expr(PyObject *exprstr)
1518
4.20k
{
1519
4.20k
    Py_ssize_t len = PyUnicode_GET_LENGTH(exprstr);
1520
1521
8.14k
    for (Py_ssize_t i = len - 1; i >= 0; i--) {
1522
8.14k
        Py_UCS4 c = PyUnicode_READ_CHAR(exprstr, i);
1523
8.14k
        if (_PyUnicode_IsWhitespace(c) || c == '=') {
1524
3.93k
            len--;
1525
3.93k
        }
1526
4.20k
        else {
1527
4.20k
            break;
1528
4.20k
        }
1529
8.14k
    }
1530
1531
4.20k
    return PyUnicode_Substring(exprstr, 0, len);
1532
4.20k
}
1533
1534
expr_ty _PyPegen_interpolation(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1535
                                 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1536
4.20k
                                 int end_lineno, int end_col_offset, PyArena *arena) {
1537
1538
4.20k
    int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
1539
1540
    /* Find the non whitespace token after the "=" */
1541
4.20k
    int debug_end_line, debug_end_offset;
1542
4.20k
    PyObject *debug_metadata;
1543
4.20k
    constant exprstr;
1544
1545
4.20k
    if (conversion) {
1546
292
        debug_end_line = ((expr_ty) conversion->result)->lineno;
1547
292
        debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1548
292
        debug_metadata = exprstr = conversion->metadata;
1549
292
    }
1550
3.91k
    else if (format) {
1551
716
        debug_end_line = ((expr_ty) format->result)->lineno;
1552
716
        debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1553
716
        debug_metadata = exprstr = format->metadata;
1554
716
    }
1555
3.19k
    else {
1556
3.19k
        debug_end_line = end_lineno;
1557
3.19k
        debug_end_offset = end_col_offset;
1558
3.19k
        debug_metadata = exprstr = closing_brace->metadata;
1559
3.19k
    }
1560
1561
4.20k
    assert(exprstr != NULL);
1562
4.20k
    PyObject *final_exprstr = _strip_interpolation_expr(exprstr);
1563
4.20k
    if (!final_exprstr || _PyArena_AddPyObject(arena, final_exprstr) < 0) {
1564
0
        Py_XDECREF(final_exprstr);
1565
0
        return NULL;
1566
0
    }
1567
1568
4.20k
    expr_ty interpolation = _PyAST_Interpolation(
1569
4.20k
        expression, final_exprstr, conversion_val, format ? (expr_ty) format->result : NULL,
1570
4.20k
        lineno, col_offset, end_lineno,
1571
4.20k
        end_col_offset, arena
1572
4.20k
    );
1573
1574
4.20k
    if (!debug) {
1575
3.99k
        return interpolation;
1576
3.99k
    }
1577
1578
210
    expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1579
210
                                            debug_end_offset - 1, p->arena);
1580
210
    if (!debug_text) {
1581
0
        return NULL;
1582
0
    }
1583
1584
210
    asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1585
210
    asdl_seq_SET(values, 0, debug_text);
1586
210
    asdl_seq_SET(values, 1, interpolation);
1587
210
    return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1588
210
}
1589
1590
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1591
                                 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1592
22.8k
                                 int end_lineno, int end_col_offset, PyArena *arena) {
1593
22.8k
    int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
1594
1595
22.8k
    expr_ty formatted_value = _PyAST_FormattedValue(
1596
22.8k
        expression, conversion_val, format ? (expr_ty) format->result : NULL,
1597
22.8k
        lineno, col_offset, end_lineno,
1598
22.8k
        end_col_offset, arena
1599
22.8k
    );
1600
1601
22.8k
    if (!debug) {
1602
16.9k
        return formatted_value;
1603
16.9k
    }
1604
1605
    /* Find the non whitespace token after the "=" */
1606
5.85k
    int debug_end_line, debug_end_offset;
1607
5.85k
    PyObject *debug_metadata;
1608
1609
5.85k
    if (conversion) {
1610
200
        debug_end_line = ((expr_ty) conversion->result)->lineno;
1611
200
        debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1612
200
        debug_metadata = conversion->metadata;
1613
200
    }
1614
5.65k
    else if (format) {
1615
37
        debug_end_line = ((expr_ty) format->result)->lineno;
1616
37
        debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1617
37
        debug_metadata = format->metadata;
1618
37
    }
1619
5.61k
    else {
1620
5.61k
        debug_end_line = end_lineno;
1621
5.61k
        debug_end_offset = end_col_offset;
1622
5.61k
        debug_metadata = closing_brace->metadata;
1623
5.61k
    }
1624
5.85k
    expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1625
5.85k
                                            debug_end_offset - 1, p->arena);
1626
5.85k
    if (!debug_text) {
1627
2
        return NULL;
1628
2
    }
1629
1630
5.84k
    asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1631
5.84k
    asdl_seq_SET(values, 0, debug_text);
1632
5.84k
    asdl_seq_SET(values, 1, formatted_value);
1633
5.84k
    return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1634
5.85k
}
1635
1636
static expr_ty
1637
_build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
1638
                        int col_offset, int end_lineno, int end_col_offset,
1639
                        PyArena *arena)
1640
639
{
1641
639
    Py_ssize_t len = asdl_seq_LEN(strings);
1642
639
    assert(len > 0);
1643
1644
    /* Bytes literals never get a kind, but just for consistency
1645
        since they are represented as Constant nodes, we'll mirror
1646
        the same behavior as unicode strings for determining the
1647
        kind. */
1648
639
    PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1649
1650
639
    Py_ssize_t total = 0;
1651
2.69k
    for (Py_ssize_t i = 0; i < len; i++) {
1652
2.05k
        expr_ty elem = asdl_seq_GET(strings, i);
1653
2.05k
        PyObject *bytes = elem->v.Constant.value;
1654
2.05k
        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1655
2.05k
        if (part > PY_SSIZE_T_MAX - total) {
1656
0
            PyErr_NoMemory();
1657
0
            return NULL;
1658
0
        }
1659
2.05k
        total += part;
1660
2.05k
    }
1661
1662
639
    PyBytesWriter *writer = PyBytesWriter_Create(total);
1663
639
    if (writer == NULL) {
1664
0
        return NULL;
1665
0
    }
1666
639
    char *out = PyBytesWriter_GetData(writer);
1667
1668
2.69k
    for (Py_ssize_t i = 0; i < len; i++) {
1669
2.05k
        expr_ty elem = asdl_seq_GET(strings, i);
1670
2.05k
        PyObject *bytes = elem->v.Constant.value;
1671
2.05k
        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1672
2.05k
        if (part > 0) {
1673
337
            memcpy(out, PyBytes_AS_STRING(bytes), part);
1674
337
            out += part;
1675
337
        }
1676
2.05k
    }
1677
1678
639
    PyObject *res = PyBytesWriter_Finish(writer);
1679
639
    if (res == NULL) {
1680
0
        return NULL;
1681
0
    }
1682
639
    if (_PyArena_AddPyObject(arena, res) < 0) {
1683
0
        Py_DECREF(res);
1684
0
        return NULL;
1685
0
    }
1686
639
    return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1687
639
}
1688
1689
static expr_ty
1690
_build_concatenated_unicode(Parser *p, asdl_expr_seq *strings, int lineno,
1691
                        int col_offset, int end_lineno, int end_col_offset,
1692
                        PyArena *arena)
1693
1.18k
{
1694
1.18k
    Py_ssize_t len = asdl_seq_LEN(strings);
1695
1.18k
    assert(len > 1);
1696
1697
1.18k
    expr_ty first = asdl_seq_GET(strings, 0);
1698
1699
    /* When a string is getting concatenated, the kind of the string
1700
        is determined by the first string in the concatenation
1701
        sequence.
1702
1703
        u"abc" "def" -> u"abcdef"
1704
        "abc" u"abc" ->  "abcabc" */
1705
1.18k
    PyObject *kind = first->v.Constant.kind;
1706
1707
1.18k
    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1708
1.18k
    if (writer == NULL) {
1709
0
        return NULL;
1710
0
    }
1711
1712
21.7k
    for (Py_ssize_t i = 0; i < len; i++) {
1713
20.5k
        expr_ty current_elem = asdl_seq_GET(strings, i);
1714
20.5k
        assert(current_elem->kind == Constant_kind);
1715
1716
20.5k
        if (PyUnicodeWriter_WriteStr(writer,
1717
20.5k
                                     current_elem->v.Constant.value)) {
1718
0
            PyUnicodeWriter_Discard(writer);
1719
0
            return NULL;
1720
0
        }
1721
20.5k
    }
1722
1723
1.18k
    PyObject *final = PyUnicodeWriter_Finish(writer);
1724
1.18k
    if (final == NULL) {
1725
0
        return NULL;
1726
0
    }
1727
1.18k
    if (_PyArena_AddPyObject(p->arena, final) < 0) {
1728
0
        Py_DECREF(final);
1729
0
        return NULL;
1730
0
    }
1731
1.18k
    return _PyAST_Constant(final, kind, lineno, col_offset,
1732
1.18k
                           end_lineno, end_col_offset, arena);
1733
1.18k
}
1734
1735
static asdl_expr_seq *
1736
_build_concatenated_str(Parser *p, asdl_expr_seq *strings,
1737
                               int lineno, int col_offset, int end_lineno,
1738
                               int end_col_offset, PyArena *arena)
1739
11.2k
{
1740
11.2k
    Py_ssize_t len = asdl_seq_LEN(strings);
1741
11.2k
    assert(len > 0);
1742
1743
11.2k
    Py_ssize_t n_flattened_elements = 0;
1744
34.1k
    for (Py_ssize_t i = 0; i < len; i++) {
1745
22.9k
        expr_ty elem = asdl_seq_GET(strings, i);
1746
22.9k
        switch(elem->kind) {
1747
11.3k
            case JoinedStr_kind:
1748
11.3k
                n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1749
11.3k
                break;
1750
3.81k
            case TemplateStr_kind:
1751
3.81k
                n_flattened_elements += asdl_seq_LEN(elem->v.TemplateStr.values);
1752
3.81k
                break;
1753
7.72k
            default:
1754
7.72k
                n_flattened_elements++;
1755
7.72k
                break;
1756
22.9k
        }
1757
22.9k
    }
1758
1759
1760
11.2k
    asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
1761
11.2k
    if (flattened == NULL) {
1762
0
        return NULL;
1763
0
    }
1764
1765
    /* build flattened list */
1766
11.2k
    Py_ssize_t current_pos = 0;
1767
34.1k
    for (Py_ssize_t i = 0; i < len; i++) {
1768
22.9k
        expr_ty elem = asdl_seq_GET(strings, i);
1769
22.9k
        switch(elem->kind) {
1770
11.3k
            case JoinedStr_kind:
1771
52.7k
                for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1772
41.3k
                    expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1773
41.3k
                    if (subvalue == NULL) {
1774
0
                        return NULL;
1775
0
                    }
1776
41.3k
                    asdl_seq_SET(flattened, current_pos++, subvalue);
1777
41.3k
                }
1778
11.3k
                break;
1779
11.3k
            case TemplateStr_kind:
1780
10.2k
                for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.TemplateStr.values); j++) {
1781
6.39k
                    expr_ty subvalue = asdl_seq_GET(elem->v.TemplateStr.values, j);
1782
6.39k
                    if (subvalue == NULL) {
1783
0
                        return NULL;
1784
0
                    }
1785
6.39k
                    asdl_seq_SET(flattened, current_pos++, subvalue);
1786
6.39k
                }
1787
3.81k
                break;
1788
7.72k
            default:
1789
7.72k
                asdl_seq_SET(flattened, current_pos++, elem);
1790
7.72k
                break;
1791
22.9k
        }
1792
22.9k
    }
1793
1794
    /* calculate folded element count */
1795
11.2k
    Py_ssize_t n_elements = 0;
1796
11.2k
    int prev_is_constant = 0;
1797
66.7k
    for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
1798
55.4k
        expr_ty elem = asdl_seq_GET(flattened, i);
1799
1800
        /* The concatenation of a FormattedValue and an empty Constant should
1801
           lead to the FormattedValue itself. Thus, we will not take any empty
1802
           constants into account, just as in `_PyPegen_joined_str` */
1803
55.4k
        if (elem->kind == Constant_kind &&
1804
55.4k
            PyUnicode_CheckExact(elem->v.Constant.value) &&
1805
32.8k
            PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
1806
902
            continue;
1807
1808
54.5k
        if (!prev_is_constant || elem->kind != Constant_kind) {
1809
46.1k
            n_elements++;
1810
46.1k
        }
1811
54.5k
        prev_is_constant = elem->kind == Constant_kind;
1812
54.5k
    }
1813
1814
11.2k
    asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
1815
11.2k
    if (values == NULL) {
1816
0
        return NULL;
1817
0
    }
1818
1819
    /* build folded list */
1820
11.2k
    current_pos = 0;
1821
57.6k
    for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
1822
46.3k
        expr_ty elem = asdl_seq_GET(flattened, i);
1823
1824
        /* if the current elem and the following are constants,
1825
           fold them and all consequent constants */
1826
46.3k
        if (elem->kind == Constant_kind) {
1827
23.7k
            if (i + 1 < n_flattened_elements &&
1828
19.4k
                asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
1829
3.49k
                expr_ty first_elem = elem;
1830
1831
                /* When a string is getting concatenated, the kind of the string
1832
                   is determined by the first string in the concatenation
1833
                   sequence.
1834
1835
                   u"abc" "def" -> u"abcdef"
1836
                   "abc" u"abc" ->  "abcabc" */
1837
3.49k
                PyObject *kind = elem->v.Constant.kind;
1838
1839
3.49k
                PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1840
3.49k
                if (writer == NULL) {
1841
0
                    return NULL;
1842
0
                }
1843
3.49k
                expr_ty last_elem = elem;
1844
3.49k
                Py_ssize_t j;
1845
16.1k
                for (j = i; j < n_flattened_elements; j++) {
1846
14.9k
                    expr_ty current_elem = asdl_seq_GET(flattened, j);
1847
14.9k
                    if (current_elem->kind == Constant_kind) {
1848
12.6k
                        if (PyUnicodeWriter_WriteStr(writer,
1849
12.6k
                                                     current_elem->v.Constant.value)) {
1850
0
                            PyUnicodeWriter_Discard(writer);
1851
0
                            return NULL;
1852
0
                        }
1853
12.6k
                        last_elem = current_elem;
1854
12.6k
                    } else {
1855
2.29k
                        break;
1856
2.29k
                    }
1857
14.9k
                }
1858
3.49k
                i = j - 1;
1859
1860
3.49k
                PyObject *concat_str = PyUnicodeWriter_Finish(writer);
1861
3.49k
                if (concat_str == NULL) {
1862
0
                    return NULL;
1863
0
                }
1864
3.49k
                if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
1865
0
                    Py_DECREF(concat_str);
1866
0
                    return NULL;
1867
0
                }
1868
3.49k
                elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
1869
3.49k
                                       first_elem->col_offset,
1870
3.49k
                                       last_elem->end_lineno,
1871
3.49k
                                       last_elem->end_col_offset, p->arena);
1872
3.49k
                if (elem == NULL) {
1873
0
                    return NULL;
1874
0
                }
1875
3.49k
            }
1876
1877
            /* Drop all empty contanst strings */
1878
23.7k
            if (PyUnicode_CheckExact(elem->v.Constant.value) &&
1879
23.7k
                PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
1880
256
                continue;
1881
256
            }
1882
23.7k
        }
1883
1884
46.1k
        asdl_seq_SET(values, current_pos++, elem);
1885
46.1k
    }
1886
1887
11.2k
    assert(current_pos == n_elements);
1888
11.2k
    return values;
1889
11.2k
}
1890
1891
static expr_ty
1892
_build_concatenated_joined_str(Parser *p, asdl_expr_seq *strings,
1893
                               int lineno, int col_offset, int end_lineno,
1894
                               int end_col_offset, PyArena *arena)
1895
8.56k
{
1896
8.56k
    asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
1897
8.56k
        col_offset, end_lineno, end_col_offset, arena);
1898
8.56k
    return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1899
8.56k
}
1900
1901
expr_ty
1902
_PyPegen_concatenate_tstrings(Parser *p, asdl_expr_seq *strings,
1903
                               int lineno, int col_offset, int end_lineno,
1904
                               int end_col_offset, PyArena *arena)
1905
2.69k
{
1906
2.69k
    asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
1907
2.69k
        col_offset, end_lineno, end_col_offset, arena);
1908
2.69k
    return _PyAST_TemplateStr(values, lineno, col_offset, end_lineno,
1909
2.69k
        end_col_offset, arena);
1910
2.69k
}
1911
1912
expr_ty
1913
_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
1914
                             int lineno, int col_offset, int end_lineno,
1915
                             int end_col_offset, PyArena *arena)
1916
25.7k
{
1917
25.7k
    Py_ssize_t len = asdl_seq_LEN(strings);
1918
25.7k
    assert(len > 0);
1919
1920
25.7k
    int f_string_found = 0;
1921
25.7k
    int unicode_string_found = 0;
1922
25.7k
    int bytes_found = 0;
1923
1924
25.7k
    Py_ssize_t i = 0;
1925
83.0k
    for (i = 0; i < len; i++) {
1926
57.2k
        expr_ty elem = asdl_seq_GET(strings, i);
1927
57.2k
        switch(elem->kind) {
1928
43.1k
            case Constant_kind:
1929
43.1k
                if (PyBytes_CheckExact(elem->v.Constant.value)) {
1930
3.87k
                    bytes_found = 1;
1931
39.3k
                } else {
1932
39.3k
                    unicode_string_found = 1;
1933
39.3k
                }
1934
43.1k
                break;
1935
11.3k
            case JoinedStr_kind:
1936
11.3k
                f_string_found = 1;
1937
11.3k
                break;
1938
0
            case TemplateStr_kind:
1939
                // python.gram handles this; we should never get here
1940
0
                assert(0);
1941
0
                break;
1942
2.73k
            default:
1943
2.73k
                f_string_found = 1;
1944
2.73k
                break;
1945
57.2k
        }
1946
57.2k
    }
1947
1948
    // Cannot mix unicode and bytes
1949
25.7k
    if ((unicode_string_found || f_string_found) && bytes_found) {
1950
7
        RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1951
7
        return NULL;
1952
7
    }
1953
1954
    // If it's only bytes or only unicode string, do a simple concat
1955
25.7k
    if (!f_string_found) {
1956
17.2k
        if (len == 1) {
1957
15.4k
            return asdl_seq_GET(strings, 0);
1958
15.4k
        }
1959
1.81k
        else if (bytes_found) {
1960
639
            return _build_concatenated_bytes(p, strings, lineno, col_offset,
1961
639
                end_lineno, end_col_offset, arena);
1962
639
        }
1963
1.18k
        else {
1964
1.18k
            return _build_concatenated_unicode(p, strings, lineno, col_offset,
1965
1.18k
                end_lineno, end_col_offset, arena);
1966
1.18k
        }
1967
17.2k
    }
1968
1969
8.56k
    return _build_concatenated_joined_str(p, strings, lineno,
1970
8.56k
        col_offset, end_lineno, end_col_offset, arena);
1971
25.7k
}
1972
1973
stmt_ty
1974
_PyPegen_checked_future_import(Parser *p, identifier module, asdl_alias_seq * names,
1975
                               int level, expr_ty lazy_token, int lineno,
1976
                               int col_offset, int end_lineno, int end_col_offset,
1977
1.79k
                               PyArena *arena) {
1978
1.79k
    if (level == 0 && PyUnicode_CompareWithASCIIString(module, "__future__") == 0) {
1979
586
        if (lazy_token) {
1980
1
            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(lazy_token,
1981
1
                "lazy from __future__ import is not allowed");
1982
1
            return NULL;
1983
1
        }
1984
1.41k
        for (Py_ssize_t i = 0; i < asdl_seq_LEN(names); i++) {
1985
827
            alias_ty alias = asdl_seq_GET(names, i);
1986
827
            if (PyUnicode_CompareWithASCIIString(alias->name, "barry_as_FLUFL") == 0) {
1987
183
                p->flags |= PyPARSE_BARRY_AS_BDFL;
1988
183
            }
1989
827
        }
1990
585
    }
1991
1.79k
    return _PyAST_ImportFrom(module, names, level, lazy_token ? 1 : 0, lineno,
1992
1.79k
                             col_offset, end_lineno, end_col_offset, arena);
1993
1.79k
}
1994
1995
asdl_stmt_seq*
1996
24.1k
_PyPegen_register_stmts(Parser *p, asdl_stmt_seq* stmts) {
1997
24.1k
    if (!p->call_invalid_rules) {
1998
18.0k
        return stmts;
1999
18.0k
    }
2000
6.11k
    Py_ssize_t len = asdl_seq_LEN(stmts);
2001
6.11k
    if (len == 0) {
2002
0
        return stmts;
2003
0
    }
2004
6.11k
    stmt_ty last_stmt = asdl_seq_GET(stmts, len - 1);
2005
6.11k
    if (p->last_stmt_location.lineno > last_stmt->lineno) {
2006
142
        return stmts;
2007
142
    }
2008
5.97k
    p->last_stmt_location.lineno = last_stmt->lineno;
2009
5.97k
    p->last_stmt_location.col_offset = last_stmt->col_offset;
2010
5.97k
    p->last_stmt_location.end_lineno = last_stmt->end_lineno;
2011
5.97k
    p->last_stmt_location.end_col_offset = last_stmt->end_col_offset;
2012
5.97k
    return stmts;
2013
6.11k
}