Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/action_helpers.c
Line
Count
Source
1
#include <Python.h>
2
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
3
#include "pycore_runtime.h"         // _PyRuntime
4
#include "pycore_unicodeobject.h"   // _PyUnicode_InternImmortal()
5
6
#include "pegen.h"
7
#include "string_parser.h"          // _PyPegen_decode_string()
8
9
10
void *
11
_PyPegen_dummy_name(Parser *p, ...)
12
62.0k
{
13
62.0k
    return &_PyRuntime.parser.dummy_name;
14
62.0k
}
15
16
/* Creates a single-element asdl_seq* that contains a */
17
asdl_seq *
18
_PyPegen_singleton_seq(Parser *p, void *a)
19
137k
{
20
137k
    assert(a != NULL);
21
137k
    asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
22
137k
    if (!seq) {
23
0
        return NULL;
24
0
    }
25
137k
    asdl_seq_SET_UNTYPED(seq, 0, a);
26
137k
    return seq;
27
137k
}
28
29
/* Creates a copy of seq and prepends a to it */
30
asdl_seq *
31
_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
32
324k
{
33
324k
    assert(a != NULL);
34
324k
    if (!seq) {
35
9.67k
        return _PyPegen_singleton_seq(p, a);
36
9.67k
    }
37
38
315k
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
39
315k
    if (!new_seq) {
40
0
        return NULL;
41
0
    }
42
43
315k
    asdl_seq_SET_UNTYPED(new_seq, 0, a);
44
577k
    for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
45
261k
        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
46
261k
    }
47
315k
    return new_seq;
48
315k
}
49
50
/* Creates a copy of seq and appends a to it */
51
asdl_seq *
52
_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
53
0
{
54
0
    assert(a != NULL);
55
0
    if (!seq) {
56
0
        return _PyPegen_singleton_seq(p, a);
57
0
    }
58
59
0
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
60
0
    if (!new_seq) {
61
0
        return NULL;
62
0
    }
63
64
0
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
65
0
        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
66
0
    }
67
0
    asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
68
0
    return new_seq;
69
0
}
70
71
static Py_ssize_t
72
_get_flattened_seq_size(asdl_seq *seqs)
73
20.0k
{
74
20.0k
    Py_ssize_t size = 0;
75
105k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
76
85.8k
        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
77
85.8k
        size += asdl_seq_LEN(inner_seq);
78
85.8k
    }
79
20.0k
    return size;
80
20.0k
}
81
82
/* Flattens an asdl_seq* of asdl_seq*s */
83
asdl_seq *
84
_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
85
20.0k
{
86
20.0k
    Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
87
20.0k
    assert(flattened_seq_size > 0);
88
89
20.0k
    asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
90
20.0k
    if (!flattened_seq) {
91
0
        return NULL;
92
0
    }
93
94
20.0k
    int flattened_seq_idx = 0;
95
105k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
96
85.8k
        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
97
179k
        for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
98
93.4k
            asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
99
93.4k
        }
100
85.8k
    }
101
20.0k
    assert(flattened_seq_idx == flattened_seq_size);
102
103
20.0k
    return flattened_seq;
104
20.0k
}
105
106
void *
107
_PyPegen_seq_last_item(asdl_seq *seq)
108
104
{
109
104
    Py_ssize_t len = asdl_seq_LEN(seq);
110
104
    return asdl_seq_GET_UNTYPED(seq, len - 1);
111
104
}
112
113
void *
114
_PyPegen_seq_first_item(asdl_seq *seq)
115
50
{
116
50
    return asdl_seq_GET_UNTYPED(seq, 0);
117
50
}
118
119
/* Creates a new name of the form <first_name>.<second_name> */
120
expr_ty
121
_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
122
8.59k
{
123
8.59k
    assert(first_name != NULL && second_name != NULL);
124
8.59k
    PyObject *uni = PyUnicode_FromFormat("%U.%U",
125
8.59k
            first_name->v.Name.id, second_name->v.Name.id);
126
8.59k
    if (!uni) {
127
0
        return NULL;
128
0
    }
129
8.59k
    PyInterpreterState *interp = _PyInterpreterState_GET();
130
8.59k
    _PyUnicode_InternImmortal(interp, &uni);
131
8.59k
    if (_PyArena_AddPyObject(p->arena, uni) < 0) {
132
0
        Py_DECREF(uni);
133
0
        return NULL;
134
0
    }
135
136
8.59k
    return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
137
8.59k
}
138
139
/* Counts the total number of dots in seq's tokens */
140
int
141
_PyPegen_seq_count_dots(asdl_seq *seq)
142
2.66k
{
143
2.66k
    int number_of_dots = 0;
144
5.60k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
145
2.94k
        Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
146
2.94k
        switch (current_expr->type) {
147
988
            case ELLIPSIS:
148
988
                number_of_dots += 3;
149
988
                break;
150
1.96k
            case DOT:
151
1.96k
                number_of_dots += 1;
152
1.96k
                break;
153
0
            default:
154
0
                Py_UNREACHABLE();
155
2.94k
        }
156
2.94k
    }
157
158
2.66k
    return number_of_dots;
159
2.66k
}
160
161
/* Creates an alias with '*' as the identifier name */
162
alias_ty
163
_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
164
206
                        int end_col_offset, PyArena *arena) {
165
206
    PyObject *str = PyUnicode_InternFromString("*");
166
206
    if (!str) {
167
0
        return NULL;
168
0
    }
169
206
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
170
0
        Py_DECREF(str);
171
0
        return NULL;
172
0
    }
173
206
    return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
174
206
}
175
176
/* Creates a new asdl_seq* with the identifiers of all the names in seq */
177
asdl_identifier_seq *
178
_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
179
1.48k
{
180
1.48k
    Py_ssize_t len = asdl_seq_LEN(seq);
181
1.48k
    assert(len > 0);
182
183
1.48k
    asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
184
1.48k
    if (!new_seq) {
185
0
        return NULL;
186
0
    }
187
4.49k
    for (Py_ssize_t i = 0; i < len; i++) {
188
3.01k
        expr_ty e = asdl_seq_GET(seq, i);
189
3.01k
        asdl_seq_SET(new_seq, i, e->v.Name.id);
190
3.01k
    }
191
1.48k
    return new_seq;
192
1.48k
}
193
194
/* Constructs a CmpopExprPair */
195
CmpopExprPair *
196
_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
197
15.9k
{
198
15.9k
    assert(expr != NULL);
199
15.9k
    CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
200
15.9k
    if (!a) {
201
0
        return NULL;
202
0
    }
203
15.9k
    a->cmpop = cmpop;
204
15.9k
    a->expr = expr;
205
15.9k
    return a;
206
15.9k
}
207
208
asdl_int_seq *
209
_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
210
5.94k
{
211
5.94k
    Py_ssize_t len = asdl_seq_LEN(seq);
212
5.94k
    assert(len > 0);
213
214
5.94k
    asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
215
5.94k
    if (!new_seq) {
216
0
        return NULL;
217
0
    }
218
21.2k
    for (Py_ssize_t i = 0; i < len; i++) {
219
15.3k
        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
220
15.3k
        asdl_seq_SET(new_seq, i, pair->cmpop);
221
15.3k
    }
222
5.94k
    return new_seq;
223
5.94k
}
224
225
asdl_expr_seq *
226
_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
227
5.94k
{
228
5.94k
    Py_ssize_t len = asdl_seq_LEN(seq);
229
5.94k
    assert(len > 0);
230
231
5.94k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
232
5.94k
    if (!new_seq) {
233
0
        return NULL;
234
0
    }
235
21.2k
    for (Py_ssize_t i = 0; i < len; i++) {
236
15.3k
        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
237
15.3k
        asdl_seq_SET(new_seq, i, pair->expr);
238
15.3k
    }
239
5.94k
    return new_seq;
240
5.94k
}
241
242
/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
243
static asdl_expr_seq *
244
_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
245
4.58k
{
246
4.58k
    Py_ssize_t len = asdl_seq_LEN(seq);
247
4.58k
    if (len == 0) {
248
2.43k
        return NULL;
249
2.43k
    }
250
251
2.15k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
252
2.15k
    if (!new_seq) {
253
0
        return NULL;
254
0
    }
255
6.13k
    for (Py_ssize_t i = 0; i < len; i++) {
256
3.98k
        expr_ty e = asdl_seq_GET(seq, i);
257
3.98k
        asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
258
3.98k
    }
259
2.15k
    return new_seq;
260
2.15k
}
261
262
static expr_ty
263
_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
264
187k
{
265
187k
    return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
266
187k
}
267
268
static expr_ty
269
_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
270
4.10k
{
271
4.10k
    return _PyAST_Tuple(
272
4.10k
            _set_seq_context(p, e->v.Tuple.elts, ctx),
273
4.10k
            ctx,
274
4.10k
            EXTRA_EXPR(e, e));
275
4.10k
}
276
277
static expr_ty
278
_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
279
486
{
280
486
    return _PyAST_List(
281
486
            _set_seq_context(p, e->v.List.elts, ctx),
282
486
            ctx,
283
486
            EXTRA_EXPR(e, e));
284
486
}
285
286
static expr_ty
287
_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
288
320
{
289
320
    return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
290
320
                            ctx, EXTRA_EXPR(e, e));
291
320
}
292
293
static expr_ty
294
_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
295
217
{
296
217
    return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
297
217
                            ctx, EXTRA_EXPR(e, e));
298
217
}
299
300
static expr_ty
301
_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
302
286
{
303
286
    return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
304
286
                          ctx, EXTRA_EXPR(e, e));
305
286
}
306
307
/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
308
expr_ty
309
_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
310
192k
{
311
192k
    assert(expr != NULL);
312
313
192k
    expr_ty new = NULL;
314
192k
    switch (expr->kind) {
315
187k
        case Name_kind:
316
187k
            new = _set_name_context(p, expr, ctx);
317
187k
            break;
318
4.10k
        case Tuple_kind:
319
4.10k
            new = _set_tuple_context(p, expr, ctx);
320
4.10k
            break;
321
486
        case List_kind:
322
486
            new = _set_list_context(p, expr, ctx);
323
486
            break;
324
320
        case Subscript_kind:
325
320
            new = _set_subscript_context(p, expr, ctx);
326
320
            break;
327
217
        case Attribute_kind:
328
217
            new = _set_attribute_context(p, expr, ctx);
329
217
            break;
330
286
        case Starred_kind:
331
286
            new = _set_starred_context(p, expr, ctx);
332
286
            break;
333
0
        default:
334
0
            new = expr;
335
192k
    }
336
192k
    return new;
337
192k
}
338
339
/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
340
KeyValuePair *
341
_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
342
32.2k
{
343
32.2k
    KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
344
32.2k
    if (!a) {
345
0
        return NULL;
346
0
    }
347
32.2k
    a->key = key;
348
32.2k
    a->value = value;
349
32.2k
    return a;
350
32.2k
}
351
352
/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
353
asdl_expr_seq *
354
_PyPegen_get_keys(Parser *p, asdl_seq *seq)
355
4.50k
{
356
4.50k
    Py_ssize_t len = asdl_seq_LEN(seq);
357
4.50k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
358
4.50k
    if (!new_seq) {
359
0
        return NULL;
360
0
    }
361
25.5k
    for (Py_ssize_t i = 0; i < len; i++) {
362
21.0k
        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
363
21.0k
        asdl_seq_SET(new_seq, i, pair->key);
364
21.0k
    }
365
4.50k
    return new_seq;
366
4.50k
}
367
368
/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
369
asdl_expr_seq *
370
_PyPegen_get_values(Parser *p, asdl_seq *seq)
371
4.50k
{
372
4.50k
    Py_ssize_t len = asdl_seq_LEN(seq);
373
4.50k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
374
4.50k
    if (!new_seq) {
375
0
        return NULL;
376
0
    }
377
25.5k
    for (Py_ssize_t i = 0; i < len; i++) {
378
21.0k
        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
379
21.0k
        asdl_seq_SET(new_seq, i, pair->value);
380
21.0k
    }
381
4.50k
    return new_seq;
382
4.50k
}
383
384
/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
385
KeyPatternPair *
386
_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
387
12.7k
{
388
12.7k
    KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
389
12.7k
    if (!a) {
390
0
        return NULL;
391
0
    }
392
12.7k
    a->key = key;
393
12.7k
    a->pattern = pattern;
394
12.7k
    return a;
395
12.7k
}
396
397
/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
398
asdl_expr_seq *
399
_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
400
1.31k
{
401
1.31k
    Py_ssize_t len = asdl_seq_LEN(seq);
402
1.31k
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
403
1.31k
    if (!new_seq) {
404
0
        return NULL;
405
0
    }
406
3.32k
    for (Py_ssize_t i = 0; i < len; i++) {
407
2.01k
        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
408
2.01k
        asdl_seq_SET(new_seq, i, pair->key);
409
2.01k
    }
410
1.31k
    return new_seq;
411
1.31k
}
412
413
/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
414
asdl_pattern_seq *
415
_PyPegen_get_patterns(Parser *p, asdl_seq *seq)
416
1.31k
{
417
1.31k
    Py_ssize_t len = asdl_seq_LEN(seq);
418
1.31k
    asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
419
1.31k
    if (!new_seq) {
420
0
        return NULL;
421
0
    }
422
3.32k
    for (Py_ssize_t i = 0; i < len; i++) {
423
2.01k
        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
424
2.01k
        asdl_seq_SET(new_seq, i, pair->pattern);
425
2.01k
    }
426
1.31k
    return new_seq;
427
1.31k
}
428
429
/* Constructs a NameDefaultPair */
430
NameDefaultPair *
431
_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
432
545k
{
433
545k
    NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
434
545k
    if (!a) {
435
0
        return NULL;
436
0
    }
437
545k
    a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
438
545k
    if (!a->arg) {
439
0
        return NULL;
440
0
    }
441
545k
    a->value = value;
442
545k
    return a;
443
545k
}
444
445
/* Constructs a SlashWithDefault */
446
SlashWithDefault *
447
_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
448
4.94k
{
449
4.94k
    SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
450
4.94k
    if (!a) {
451
0
        return NULL;
452
0
    }
453
4.94k
    a->plain_names = plain_names;
454
4.94k
    a->names_with_defaults = names_with_defaults;
455
4.94k
    return a;
456
4.94k
}
457
458
/* Constructs a StarEtc */
459
StarEtc *
460
_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
461
4.47k
{
462
4.47k
    StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
463
4.47k
    if (!a) {
464
0
        return NULL;
465
0
    }
466
4.47k
    a->vararg = vararg;
467
4.47k
    a->kwonlyargs = kwonlyargs;
468
4.47k
    a->kwarg = kwarg;
469
4.47k
    return a;
470
4.47k
}
471
472
asdl_seq *
473
_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
474
337k
{
475
337k
    Py_ssize_t first_len = asdl_seq_LEN(a);
476
337k
    Py_ssize_t second_len = asdl_seq_LEN(b);
477
337k
    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
478
337k
    if (!new_seq) {
479
0
        return NULL;
480
0
    }
481
482
337k
    int k = 0;
483
690k
    for (Py_ssize_t i = 0; i < first_len; i++) {
484
352k
        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
485
352k
    }
486
347k
    for (Py_ssize_t i = 0; i < second_len; i++) {
487
9.54k
        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
488
9.54k
    }
489
490
337k
    return new_seq;
491
337k
}
492
493
static asdl_arg_seq*
494
_get_names(Parser *p, asdl_seq *names_with_defaults)
495
341k
{
496
341k
    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
497
341k
    asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
498
341k
    if (!seq) {
499
0
        return NULL;
500
0
    }
501
351k
    for (Py_ssize_t i = 0; i < len; i++) {
502
10.2k
        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
503
10.2k
        asdl_seq_SET(seq, i, pair->arg);
504
10.2k
    }
505
341k
    return seq;
506
341k
}
507
508
static asdl_expr_seq *
509
_get_defaults(Parser *p, asdl_seq *names_with_defaults)
510
341k
{
511
341k
    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
512
341k
    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
513
341k
    if (!seq) {
514
0
        return NULL;
515
0
    }
516
351k
    for (Py_ssize_t i = 0; i < len; i++) {
517
10.2k
        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
518
10.2k
        asdl_seq_SET(seq, i, pair->value);
519
10.2k
    }
520
341k
    return seq;
521
341k
}
522
523
static int
524
_make_posonlyargs(Parser *p,
525
                  asdl_arg_seq *slash_without_default,
526
                  SlashWithDefault *slash_with_default,
527
340k
                  asdl_arg_seq **posonlyargs) {
528
340k
    if (slash_without_default != NULL) {
529
1.29k
        *posonlyargs = slash_without_default;
530
1.29k
    }
531
339k
    else if (slash_with_default != NULL) {
532
1.62k
        asdl_arg_seq *slash_with_default_names =
533
1.62k
                _get_names(p, slash_with_default->names_with_defaults);
534
1.62k
        if (!slash_with_default_names) {
535
0
            return -1;
536
0
        }
537
1.62k
        *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
538
1.62k
                p,
539
1.62k
                (asdl_seq*)slash_with_default->plain_names,
540
1.62k
                (asdl_seq*)slash_with_default_names);
541
1.62k
    }
542
337k
    else {
543
337k
        *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
544
337k
    }
545
340k
    return *posonlyargs == NULL ? -1 : 0;
546
340k
}
547
548
static int
549
_make_posargs(Parser *p,
550
              asdl_arg_seq *plain_names,
551
              asdl_seq *names_with_default,
552
340k
              asdl_arg_seq **posargs) {
553
554
340k
    if (names_with_default != NULL) {
555
336k
        if (plain_names != NULL) {
556
333k
            asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
557
333k
            if (!names_with_default_names) {
558
0
                return -1;
559
0
            }
560
333k
            *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
561
333k
                    p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
562
333k
        }
563
2.95k
        else {
564
2.95k
            *posargs = _get_names(p, names_with_default);
565
2.95k
        }
566
336k
    }
567
3.68k
    else {
568
3.68k
        if (plain_names != NULL) {
569
            // With the current grammar, we never get here.
570
            // If that has changed, remove the assert, and test thoroughly.
571
0
            assert(0);
572
0
            *posargs = plain_names;
573
0
        }
574
3.68k
        else {
575
3.68k
            *posargs = _Py_asdl_arg_seq_new(0, p->arena);
576
3.68k
        }
577
3.68k
    }
578
340k
    return *posargs == NULL ? -1 : 0;
579
340k
}
580
581
static int
582
_make_posdefaults(Parser *p,
583
                  SlashWithDefault *slash_with_default,
584
                  asdl_seq *names_with_default,
585
340k
                  asdl_expr_seq **posdefaults) {
586
340k
    if (slash_with_default != NULL && names_with_default != NULL) {
587
1.62k
        asdl_expr_seq *slash_with_default_values =
588
1.62k
                _get_defaults(p, slash_with_default->names_with_defaults);
589
1.62k
        if (!slash_with_default_values) {
590
0
            return -1;
591
0
        }
592
1.62k
        asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
593
1.62k
        if (!names_with_default_values) {
594
0
            return -1;
595
0
        }
596
1.62k
        *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
597
1.62k
                p,
598
1.62k
                (asdl_seq*)slash_with_default_values,
599
1.62k
                (asdl_seq*)names_with_default_values);
600
1.62k
    }
601
338k
    else if (slash_with_default == NULL && names_with_default != NULL) {
602
335k
        *posdefaults = _get_defaults(p, names_with_default);
603
335k
    }
604
3.68k
    else if (slash_with_default != NULL && names_with_default == NULL) {
605
0
        *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
606
0
    }
607
3.68k
    else {
608
3.68k
        *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
609
3.68k
    }
610
340k
    return *posdefaults == NULL ? -1 : 0;
611
340k
}
612
613
static int
614
_make_kwargs(Parser *p, StarEtc *star_etc,
615
             asdl_arg_seq **kwonlyargs,
616
340k
             asdl_expr_seq **kwdefaults) {
617
340k
    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
618
3.16k
        *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
619
3.16k
    }
620
337k
    else {
621
337k
        *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
622
337k
    }
623
624
340k
    if (*kwonlyargs == NULL) {
625
0
        return -1;
626
0
    }
627
628
340k
    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
629
3.16k
        *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
630
3.16k
    }
631
337k
    else {
632
337k
        *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
633
337k
    }
634
635
340k
    if (*kwdefaults == NULL) {
636
0
        return -1;
637
0
    }
638
639
340k
    return 0;
640
340k
}
641
642
/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
643
arguments_ty
644
_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
645
                        SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
646
                        asdl_seq *names_with_default, StarEtc *star_etc)
647
340k
{
648
340k
    asdl_arg_seq *posonlyargs;
649
340k
    if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
650
0
        return NULL;
651
0
    }
652
653
340k
    asdl_arg_seq *posargs;
654
340k
    if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
655
0
        return NULL;
656
0
    }
657
658
340k
    asdl_expr_seq *posdefaults;
659
340k
    if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
660
0
        return NULL;
661
0
    }
662
663
340k
    arg_ty vararg = NULL;
664
340k
    if (star_etc != NULL && star_etc->vararg != NULL) {
665
2.13k
        vararg = star_etc->vararg;
666
2.13k
    }
667
668
340k
    asdl_arg_seq *kwonlyargs;
669
340k
    asdl_expr_seq *kwdefaults;
670
340k
    if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
671
0
        return NULL;
672
0
    }
673
674
340k
    arg_ty kwarg = NULL;
675
340k
    if (star_etc != NULL && star_etc->kwarg != NULL) {
676
1.40k
        kwarg = star_etc->kwarg;
677
1.40k
    }
678
679
340k
    return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
680
340k
                            kwdefaults, kwarg, posdefaults, p->arena);
681
340k
}
682
683
684
/* Constructs an empty arguments_ty object, that gets used when a function accepts no
685
 * arguments. */
686
arguments_ty
687
_PyPegen_empty_arguments(Parser *p)
688
2.22k
{
689
2.22k
    asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
690
2.22k
    if (!posonlyargs) {
691
0
        return NULL;
692
0
    }
693
2.22k
    asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
694
2.22k
    if (!posargs) {
695
0
        return NULL;
696
0
    }
697
2.22k
    asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
698
2.22k
    if (!posdefaults) {
699
0
        return NULL;
700
0
    }
701
2.22k
    asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
702
2.22k
    if (!kwonlyargs) {
703
0
        return NULL;
704
0
    }
705
2.22k
    asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
706
2.22k
    if (!kwdefaults) {
707
0
        return NULL;
708
0
    }
709
710
2.22k
    return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
711
2.22k
                            kwdefaults, NULL, posdefaults, p->arena);
712
2.22k
}
713
714
/* Encapsulates the value of an operator_ty into an AugOperator struct */
715
AugOperator *
716
_PyPegen_augoperator(Parser *p, operator_ty kind)
717
2.75k
{
718
2.75k
    AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
719
2.75k
    if (!a) {
720
0
        return NULL;
721
0
    }
722
2.75k
    a->kind = kind;
723
2.75k
    return a;
724
2.75k
}
725
726
/* Construct a FunctionDef equivalent to function_def, but with decorators */
727
stmt_ty
728
_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
729
443
{
730
443
    assert(function_def != NULL);
731
443
    if (function_def->kind == AsyncFunctionDef_kind) {
732
128
        return _PyAST_AsyncFunctionDef(
733
128
            function_def->v.AsyncFunctionDef.name,
734
128
            function_def->v.AsyncFunctionDef.args,
735
128
            function_def->v.AsyncFunctionDef.body, decorators,
736
128
            function_def->v.AsyncFunctionDef.returns,
737
128
            function_def->v.AsyncFunctionDef.type_comment,
738
128
            function_def->v.AsyncFunctionDef.type_params,
739
128
            function_def->lineno, function_def->col_offset,
740
128
            function_def->end_lineno, function_def->end_col_offset, p->arena);
741
128
    }
742
743
315
    return _PyAST_FunctionDef(
744
315
        function_def->v.FunctionDef.name,
745
315
        function_def->v.FunctionDef.args,
746
315
        function_def->v.FunctionDef.body, decorators,
747
315
        function_def->v.FunctionDef.returns,
748
315
        function_def->v.FunctionDef.type_comment,
749
315
        function_def->v.FunctionDef.type_params,
750
315
        function_def->lineno, function_def->col_offset,
751
315
        function_def->end_lineno, function_def->end_col_offset, p->arena);
752
443
}
753
754
/* Construct a ClassDef equivalent to class_def, but with decorators */
755
stmt_ty
756
_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
757
177
{
758
177
    assert(class_def != NULL);
759
177
    return _PyAST_ClassDef(
760
177
        class_def->v.ClassDef.name,
761
177
        class_def->v.ClassDef.bases, class_def->v.ClassDef.keywords,
762
177
        class_def->v.ClassDef.body, decorators,
763
177
        class_def->v.ClassDef.type_params,
764
177
        class_def->lineno, class_def->col_offset, class_def->end_lineno,
765
177
        class_def->end_col_offset, p->arena);
766
177
}
767
768
/* Construct a KeywordOrStarred */
769
KeywordOrStarred *
770
_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
771
36.6k
{
772
36.6k
    KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
773
36.6k
    if (!a) {
774
0
        return NULL;
775
0
    }
776
36.6k
    a->element = element;
777
36.6k
    a->is_keyword = is_keyword;
778
36.6k
    return a;
779
36.6k
}
780
781
/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
782
static int
783
_seq_number_of_starred_exprs(asdl_seq *seq)
784
19.2k
{
785
19.2k
    int n = 0;
786
60.3k
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
787
41.1k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
788
41.1k
        if (!k->is_keyword) {
789
2.97k
            n++;
790
2.97k
        }
791
41.1k
    }
792
19.2k
    return n;
793
19.2k
}
794
795
/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
796
asdl_expr_seq *
797
_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
798
9.60k
{
799
9.60k
    int new_len = _seq_number_of_starred_exprs(kwargs);
800
9.60k
    if (new_len == 0) {
801
9.03k
        return NULL;
802
9.03k
    }
803
575
    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
804
575
    if (!new_seq) {
805
0
        return NULL;
806
0
    }
807
808
575
    int idx = 0;
809
3.19k
    for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
810
2.62k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
811
2.62k
        if (!k->is_keyword) {
812
1.48k
            asdl_seq_SET(new_seq, idx++, k->element);
813
1.48k
        }
814
2.62k
    }
815
575
    return new_seq;
816
575
}
817
818
/* Return a new asdl_seq* with only the keywords in kwargs */
819
asdl_keyword_seq*
820
_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
821
9.60k
{
822
9.60k
    Py_ssize_t len = asdl_seq_LEN(kwargs);
823
9.60k
    Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
824
9.60k
    if (new_len == 0) {
825
0
        return NULL;
826
0
    }
827
9.60k
    asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
828
9.60k
    if (!new_seq) {
829
0
        return NULL;
830
0
    }
831
832
9.60k
    int idx = 0;
833
30.1k
    for (Py_ssize_t i = 0; i < len; i++) {
834
20.5k
        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
835
20.5k
        if (k->is_keyword) {
836
19.0k
            asdl_seq_SET(new_seq, idx++, k->element);
837
19.0k
        }
838
20.5k
    }
839
9.60k
    return new_seq;
840
9.60k
}
841
842
expr_ty
843
_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
844
980
{
845
980
    if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
846
5
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
847
5
        return NULL;
848
5
    }
849
975
    return exp;
850
980
}
851
852
expr_ty
853
_PyPegen_ensure_real(Parser *p, expr_ty exp)
854
1.87k
{
855
1.87k
    if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
856
5
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
857
5
        return NULL;
858
5
    }
859
1.86k
    return exp;
860
1.87k
}
861
862
mod_ty
863
7.32k
_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
864
7.32k
    asdl_type_ignore_seq *type_ignores = NULL;
865
7.32k
    Py_ssize_t num = p->type_ignore_comments.num_items;
866
7.32k
    if (num > 0) {
867
        // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
868
0
        type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
869
0
        if (type_ignores == NULL) {
870
0
            return NULL;
871
0
        }
872
0
        for (Py_ssize_t i = 0; i < num; i++) {
873
0
            PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
874
0
            if (tag == NULL) {
875
0
                return NULL;
876
0
            }
877
0
            type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
878
0
                                                  tag, p->arena);
879
0
            if (ti == NULL) {
880
0
                return NULL;
881
0
            }
882
0
            asdl_seq_SET(type_ignores, i, ti);
883
0
        }
884
0
    }
885
7.32k
    return _PyAST_Module(a, type_ignores, p->arena);
886
7.32k
}
887
888
PyObject *
889
_PyPegen_new_type_comment(Parser *p, const char *s)
890
0
{
891
0
    PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
892
0
    if (res == NULL) {
893
0
        return NULL;
894
0
    }
895
0
    if (_PyArena_AddPyObject(p->arena, res) < 0) {
896
0
        Py_DECREF(res);
897
0
        return NULL;
898
0
    }
899
0
    return res;
900
0
}
901
902
arg_ty
903
_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
904
605k
{
905
605k
    if (tc == NULL) {
906
605k
        return a;
907
605k
    }
908
0
    const char *bytes = PyBytes_AsString(tc->bytes);
909
0
    if (bytes == NULL) {
910
0
        return NULL;
911
0
    }
912
0
    PyObject *tco = _PyPegen_new_type_comment(p, bytes);
913
0
    if (tco == NULL) {
914
0
        return NULL;
915
0
    }
916
0
    return _PyAST_arg(a->arg, a->annotation, tco,
917
0
                      a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
918
0
                      p->arena);
919
0
}
920
921
/* Checks if the NOTEQUAL token is valid given the current parser flags
922
0 indicates success and nonzero indicates failure (an exception may be set) */
923
int
924
1.05k
_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
925
1.05k
    assert(t->bytes != NULL);
926
1.05k
    assert(t->type == NOTEQUAL);
927
928
1.05k
    const char* tok_str = PyBytes_AS_STRING(t->bytes);
929
1.05k
    if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
930
1
        RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
931
1
        return -1;
932
1
    }
933
1.05k
    if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
934
990
        return strcmp(tok_str, "!=");
935
990
    }
936
67
    return 0;
937
1.05k
}
938
939
int
940
8.43k
_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
941
8.43k
    if (name->kind != Name_kind) {
942
2.32k
        return 0;
943
2.32k
    }
944
6.11k
    const char* candidates[2] = {"print", "exec"};
945
18.1k
    for (int i=0; i<2; i++) {
946
12.2k
        if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
947
233
            return 1;
948
233
        }
949
12.2k
    }
950
5.87k
    return 0;
951
6.11k
}
952
953
void *
954
_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b)
955
4.09k
{
956
    // Don't raise for legacy statements like "print x" or "exec x"
957
4.09k
    if (_PyPegen_check_legacy_stmt(p, a)) {
958
223
        return NULL;
959
223
    }
960
    // Only raise inside parentheses/brackets (level > 0)
961
3.87k
    if (p->tokens[p->mark - 1]->level == 0) {
962
3.70k
        return NULL;
963
3.70k
    }
964
    // For multi-line expressions (like string concatenations), point to the
965
    // last line instead of the first for a more helpful error message.
966
    // Use a->col_offset as the starting column since all strings in the
967
    // concatenation typically share the same indentation.
968
167
    if (a->end_lineno > a->lineno) {
969
4
        return RAISE_ERROR_KNOWN_LOCATION(
970
4
            p, PyExc_SyntaxError, a->end_lineno, a->col_offset,
971
4
            a->end_lineno, a->end_col_offset,
972
4
            "invalid syntax. Perhaps you forgot a comma?"
973
4
        );
974
4
    }
975
163
    return RAISE_ERROR_KNOWN_LOCATION(
976
163
        p, PyExc_SyntaxError, a->lineno, a->col_offset,
977
163
        b->end_lineno, b->end_col_offset,
978
163
        "invalid syntax. Perhaps you forgot a comma?"
979
163
    );
980
167
}
981
982
static ResultTokenWithMetadata *
983
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
984
8.89k
{
985
8.89k
    ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
986
8.89k
    if (res == NULL) {
987
0
        return NULL;
988
0
    }
989
8.89k
    res->metadata = metadata;
990
8.89k
    res->result = result;
991
8.89k
    return res;
992
8.89k
}
993
994
ResultTokenWithMetadata *
995
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
996
2.20k
{
997
2.20k
    if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
998
3
        return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
999
3
            conv_token, conv,
1000
3
            "%c-string: conversion type must come right after the exclamation mark",
1001
3
            TOK_GET_STRING_PREFIX(p->tok)
1002
3
        );
1003
3
    }
1004
1005
2.20k
    Py_UCS4 first = PyUnicode_READ_CHAR(conv->v.Name.id, 0);
1006
2.20k
    if (PyUnicode_GET_LENGTH(conv->v.Name.id) > 1 ||
1007
2.20k
            !(first == 's' || first == 'r' || first == 'a')) {
1008
18
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conv,
1009
18
                                            "%c-string: invalid conversion character %R: expected 's', 'r', or 'a'",
1010
18
                                            TOK_GET_STRING_PREFIX(p->tok),
1011
18
                                            conv->v.Name.id);
1012
18
        return NULL;
1013
18
    }
1014
1015
2.18k
    return result_token_with_metadata(p, conv, conv_token->metadata);
1016
2.20k
}
1017
1018
ResultTokenWithMetadata *
1019
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
1020
                                int end_lineno, int end_col_offset, PyArena *arena)
1021
6.70k
{
1022
6.70k
    if (!spec) {
1023
0
        return NULL;
1024
0
    }
1025
1026
    // This is needed to keep compatibility with 3.11, where an empty format
1027
    // spec is parsed as an *empty* JoinedStr node, instead of having an empty
1028
    // constant in it.
1029
6.70k
    Py_ssize_t n_items = asdl_seq_LEN(spec);
1030
6.70k
    Py_ssize_t non_empty_count = 0;
1031
17.7k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1032
11.0k
        expr_ty item = asdl_seq_GET(spec, i);
1033
11.0k
        non_empty_count += !(item->kind == Constant_kind &&
1034
11.0k
                             PyUnicode_CheckExact(item->v.Constant.value) &&
1035
7.66k
                             PyUnicode_GET_LENGTH(item->v.Constant.value) == 0);
1036
11.0k
    }
1037
6.70k
    if (non_empty_count != n_items) {
1038
2.30k
        asdl_expr_seq *resized_spec =
1039
2.30k
            _Py_asdl_expr_seq_new(non_empty_count, p->arena);
1040
2.30k
        if (resized_spec == NULL) {
1041
0
            return NULL;
1042
0
        }
1043
2.30k
        Py_ssize_t j = 0;
1044
5.60k
        for (Py_ssize_t i = 0; i < n_items; i++) {
1045
3.29k
            expr_ty item = asdl_seq_GET(spec, i);
1046
3.29k
            if (item->kind == Constant_kind &&
1047
3.29k
                PyUnicode_CheckExact(item->v.Constant.value) &&
1048
2.66k
                PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1049
2.30k
                continue;
1050
2.30k
            }
1051
992
            asdl_seq_SET(resized_spec, j++, item);
1052
992
        }
1053
2.30k
        assert(j == non_empty_count);
1054
2.30k
        spec = resized_spec;
1055
2.30k
    }
1056
6.70k
    expr_ty res;
1057
6.70k
    Py_ssize_t n = asdl_seq_LEN(spec);
1058
6.70k
    if (n == 0 || (n == 1 && asdl_seq_GET(spec, 0)->kind == Constant_kind)) {
1059
6.01k
        res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
1060
6.01k
                                    end_col_offset, p->arena);
1061
6.01k
    } else {
1062
692
        res = _PyPegen_concatenate_strings(p, spec,
1063
692
                             lineno, col_offset, end_lineno,
1064
692
                             end_col_offset, arena);
1065
692
    }
1066
6.70k
    if (!res) {
1067
0
        return NULL;
1068
0
    }
1069
6.70k
    return result_token_with_metadata(p, res, colon->metadata);
1070
6.70k
}
1071
1072
const char *
1073
_PyPegen_get_expr_name(expr_ty e)
1074
203
{
1075
203
    assert(e != NULL);
1076
203
    switch (e->kind) {
1077
1
        case Attribute_kind:
1078
1
            return "attribute";
1079
1
        case Subscript_kind:
1080
1
            return "subscript";
1081
2
        case Starred_kind:
1082
2
            return "starred";
1083
8
        case Name_kind:
1084
8
            return "name";
1085
3
        case List_kind:
1086
3
            return "list";
1087
4
        case Tuple_kind:
1088
4
            return "tuple";
1089
3
        case Lambda_kind:
1090
3
            return "lambda";
1091
11
        case Call_kind:
1092
11
            return "function call";
1093
4
        case BoolOp_kind:
1094
26
        case BinOp_kind:
1095
38
        case UnaryOp_kind:
1096
38
            return "expression";
1097
1
        case GeneratorExp_kind:
1098
1
            return "generator expression";
1099
1
        case Yield_kind:
1100
1
        case YieldFrom_kind:
1101
1
            return "yield expression";
1102
1
        case Await_kind:
1103
1
            return "await expression";
1104
1
        case ListComp_kind:
1105
1
            return "list comprehension";
1106
1
        case SetComp_kind:
1107
1
            return "set comprehension";
1108
2
        case DictComp_kind:
1109
2
            return "dict comprehension";
1110
3
        case Dict_kind:
1111
3
            return "dict literal";
1112
1
        case Set_kind:
1113
1
            return "set display";
1114
10
        case JoinedStr_kind:
1115
10
        case FormattedValue_kind:
1116
10
            return "f-string expression";
1117
5
        case TemplateStr_kind:
1118
5
        case Interpolation_kind:
1119
5
            return "t-string expression";
1120
93
        case Constant_kind: {
1121
93
            PyObject *value = e->v.Constant.value;
1122
93
            if (value == Py_None) {
1123
1
                return "None";
1124
1
            }
1125
92
            if (value == Py_False) {
1126
1
                return "False";
1127
1
            }
1128
91
            if (value == Py_True) {
1129
2
                return "True";
1130
2
            }
1131
89
            if (value == Py_Ellipsis) {
1132
1
                return "ellipsis";
1133
1
            }
1134
88
            return "literal";
1135
89
        }
1136
11
        case Compare_kind:
1137
11
            return "comparison";
1138
1
        case IfExp_kind:
1139
1
            return "conditional expression";
1140
1
        case NamedExpr_kind:
1141
1
            return "named expression";
1142
0
        default:
1143
0
            PyErr_Format(PyExc_SystemError,
1144
0
                         "unexpected expression in assignment %d (line %d)",
1145
0
                         e->kind, e->lineno);
1146
0
            return NULL;
1147
203
    }
1148
203
}
1149
1150
expr_ty
1151
16
_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
1152
16
    if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
1153
12
        return comprehension->iter;
1154
12
    }
1155
4
    return PyPegen_last_item(comprehension->ifs, expr_ty);
1156
16
}
1157
1158
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
1159
                     int lineno, int col_offset, int end_lineno,
1160
23.6k
                     int end_col_offset, PyArena *arena) {
1161
23.6k
    Py_ssize_t args_len = asdl_seq_LEN(a);
1162
23.6k
    Py_ssize_t total_len = args_len;
1163
1164
23.6k
    if (b == NULL) {
1165
21.3k
        return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
1166
21.3k
                        end_lineno, end_col_offset, arena);
1167
1168
21.3k
    }
1169
1170
2.30k
    asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
1171
2.30k
    asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
1172
1173
2.30k
    if (starreds) {
1174
352
        total_len += asdl_seq_LEN(starreds);
1175
352
    }
1176
1177
2.30k
    asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
1178
2.30k
    if (args == NULL) {
1179
0
        return NULL;
1180
0
    }
1181
1182
2.30k
    Py_ssize_t i = 0;
1183
5.17k
    for (i = 0; i < args_len; i++) {
1184
2.87k
        asdl_seq_SET(args, i, asdl_seq_GET(a, i));
1185
2.87k
    }
1186
3.12k
    for (; i < total_len; i++) {
1187
820
        asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
1188
820
    }
1189
1190
2.30k
    return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
1191
2.30k
                       col_offset, end_lineno, end_col_offset, arena);
1192
2.30k
}
1193
1194
// AST Error reporting helpers
1195
1196
expr_ty
1197
_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
1198
7.12k
{
1199
7.12k
    if (e == NULL) {
1200
0
        return NULL;
1201
0
    }
1202
1203
7.12k
#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
1204
1.91k
        Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
1205
5.76k
        for (Py_ssize_t i = 0; i < len; i++) {\
1206
4.00k
            expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
1207
4.00k
            expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
1208
4.00k
            if (child != NULL) {\
1209
153
                return child;\
1210
153
            }\
1211
4.00k
        }\
1212
1.91k
    } while (0)
1213
1214
    // We only need to visit List and Tuple nodes recursively as those
1215
    // are the only ones that can contain valid names in targets when
1216
    // they are parsed as expressions. Any other kind of expression
1217
    // that is a container (like Sets or Dicts) is directly invalid and
1218
    // we don't need to visit it recursively.
1219
1220
7.12k
    switch (e->kind) {
1221
514
        case List_kind:
1222
514
            VISIT_CONTAINER(e, List);
1223
429
            return NULL;
1224
1.39k
        case Tuple_kind:
1225
1.39k
            VISIT_CONTAINER(e, Tuple);
1226
1.32k
            return NULL;
1227
1.18k
        case Starred_kind:
1228
1.18k
            if (targets_type == DEL_TARGETS) {
1229
1
                return e;
1230
1
            }
1231
1.18k
            return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
1232
1.11k
        case Compare_kind:
1233
            // This is needed, because the `a in b` in `for a in b` gets parsed
1234
            // as a comparison, and so we need to search the left side of the comparison
1235
            // for invalid targets.
1236
1.11k
            if (targets_type == FOR_TARGETS) {
1237
1.10k
                cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
1238
1.10k
                if (cmpop == In) {
1239
239
                    return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
1240
239
                }
1241
867
                return NULL;
1242
1.10k
            }
1243
9
            return e;
1244
2.15k
        case Name_kind:
1245
2.67k
        case Subscript_kind:
1246
2.78k
        case Attribute_kind:
1247
2.78k
            return NULL;
1248
135
        default:
1249
135
            return e;
1250
7.12k
    }
1251
7.12k
}
1252
1253
33
void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
1254
33
    int kwarg_unpacking = 0;
1255
795
    for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
1256
762
        keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
1257
762
        if (!keyword->arg) {
1258
416
            kwarg_unpacking = 1;
1259
416
        }
1260
762
    }
1261
1262
33
    const char *msg = NULL;
1263
33
    if (kwarg_unpacking) {
1264
19
        msg = "positional argument follows keyword argument unpacking";
1265
19
    } else {
1266
14
        msg = "positional argument follows keyword argument";
1267
14
    }
1268
1269
33
    return RAISE_SYNTAX_ERROR(msg);
1270
33
}
1271
1272
void *
1273
_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
1274
243
{
1275
    /* The rule that calls this function is 'args for_if_clauses'.
1276
       For the input f(L, x for x in y), L and x are in args and
1277
       the for is parsed as a for_if_clause. We have to check if
1278
       len <= 1, so that input like dict((a, b) for a, b in x)
1279
       gets successfully parsed and then we pass the last
1280
       argument (x in the above example) as the location of the
1281
       error */
1282
243
    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
1283
243
    if (len <= 1) {
1284
239
        return NULL;
1285
239
    }
1286
1287
4
    comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
1288
1289
4
    return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
1290
243
        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
1291
243
        _PyPegen_get_last_comprehension_item(last_comprehension),
1292
243
        "Generator expression must be parenthesized"
1293
243
    );
1294
243
}
1295
1296
// Fstring stuff
1297
1298
static expr_ty
1299
25.4k
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* token) {
1300
25.4k
    assert(PyUnicode_CheckExact(constant->v.Constant.value));
1301
1302
25.4k
    const char* bstr = PyUnicode_AsUTF8(constant->v.Constant.value);
1303
25.4k
    if (bstr == NULL) {
1304
0
        return NULL;
1305
0
    }
1306
1307
25.4k
    size_t len;
1308
25.4k
    if (strcmp(bstr, "{{") == 0 || strcmp(bstr, "}}") == 0) {
1309
0
        len = 1;
1310
25.4k
    } else {
1311
25.4k
        len = strlen(bstr);
1312
25.4k
    }
1313
1314
25.4k
    is_raw = is_raw || strchr(bstr, '\\') == NULL;
1315
25.4k
    PyObject *str = _PyPegen_decode_string(p, is_raw, bstr, len, token);
1316
25.4k
    if (str == NULL) {
1317
15
        _Pypegen_raise_decode_error(p);
1318
15
        return NULL;
1319
15
    }
1320
25.3k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1321
0
        Py_DECREF(str);
1322
0
        return NULL;
1323
0
    }
1324
25.3k
    return _PyAST_Constant(str, NULL, constant->lineno, constant->col_offset,
1325
25.3k
                           constant->end_lineno, constant->end_col_offset,
1326
25.3k
                           p->arena);
1327
25.3k
}
1328
1329
static asdl_expr_seq *
1330
_get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind)
1331
19.3k
{
1332
19.3k
    Py_ssize_t n_items = asdl_seq_LEN(raw_expressions);
1333
19.3k
    Py_ssize_t total_items = n_items;
1334
67.7k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1335
48.4k
        expr_ty item = asdl_seq_GET(raw_expressions, i);
1336
48.4k
        if (item->kind == JoinedStr_kind) {
1337
6.92k
            total_items += asdl_seq_LEN(item->v.JoinedStr.values) - 1;
1338
6.92k
        }
1339
48.4k
    }
1340
1341
19.3k
    const char* quote_str = PyBytes_AsString(a->bytes);
1342
19.3k
    if (quote_str == NULL) {
1343
0
        return NULL;
1344
0
    }
1345
19.3k
    int is_raw = strpbrk(quote_str, "rR") != NULL;
1346
1347
19.3k
    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(total_items, p->arena);
1348
19.3k
    if (seq == NULL) {
1349
0
        return NULL;
1350
0
    }
1351
1352
19.3k
    Py_ssize_t index = 0;
1353
67.7k
    for (Py_ssize_t i = 0; i < n_items; i++) {
1354
48.4k
        expr_ty item = asdl_seq_GET(raw_expressions, i);
1355
1356
        // This should correspond to a JoinedStr node of two elements
1357
        // created _PyPegen_formatted_value. This situation can only be the result of
1358
        // a (f|t)-string debug expression where the first element is a constant with the text and the second
1359
        // a formatted value with the expression.
1360
48.4k
        if (item->kind == JoinedStr_kind) {
1361
6.92k
            asdl_expr_seq *values = item->v.JoinedStr.values;
1362
6.92k
            if (asdl_seq_LEN(values) != 2) {
1363
0
                PyErr_Format(PyExc_SystemError,
1364
0
                             string_kind == TSTRING
1365
0
                             ? "unexpected TemplateStr node without debug data in t-string at line %d"
1366
0
                             : "unexpected JoinedStr node without debug data in f-string at line %d",
1367
0
                             item->lineno);
1368
0
                return NULL;
1369
0
            }
1370
1371
6.92k
            expr_ty first = asdl_seq_GET(values, 0);
1372
6.92k
            assert(first->kind == Constant_kind);
1373
6.92k
            asdl_seq_SET(seq, index++, first);
1374
1375
6.92k
            expr_ty second = asdl_seq_GET(values, 1);
1376
6.92k
            assert((string_kind == TSTRING && second->kind == Interpolation_kind) || second->kind == FormattedValue_kind);
1377
6.92k
            asdl_seq_SET(seq, index++, second);
1378
1379
6.92k
            continue;
1380
6.92k
        }
1381
1382
41.5k
        if (item->kind == Constant_kind) {
1383
25.4k
            item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
1384
25.4k
            if (item == NULL) {
1385
15
                return NULL;
1386
15
            }
1387
1388
            /* Tokenizer emits string parts even when the underlying string
1389
            might become an empty value (e.g. FSTRING_MIDDLE with the value \\n)
1390
            so we need to check for them and simplify it here. */
1391
25.3k
            if (PyUnicode_CheckExact(item->v.Constant.value)
1392
25.3k
                && PyUnicode_GET_LENGTH(item->v.Constant.value) == 0) {
1393
315
                continue;
1394
315
            }
1395
25.3k
        }
1396
41.2k
        asdl_seq_SET(seq, index++, item);
1397
41.2k
    }
1398
1399
19.2k
    asdl_expr_seq *resized_exprs;
1400
19.2k
    if (index != total_items) {
1401
315
        resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
1402
315
        if (resized_exprs == NULL) {
1403
0
            return NULL;
1404
0
        }
1405
911
        for (Py_ssize_t i = 0; i < index; i++) {
1406
596
            asdl_seq_SET(resized_exprs, i, asdl_seq_GET(seq, i));
1407
596
        }
1408
315
    }
1409
18.9k
    else {
1410
18.9k
        resized_exprs = seq;
1411
18.9k
    }
1412
19.2k
    return resized_exprs;
1413
19.2k
}
1414
1415
expr_ty
1416
5.01k
_PyPegen_template_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b) {
1417
1418
5.01k
    asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, TSTRING);
1419
5.01k
    return _PyAST_TemplateStr(resized_exprs, a->lineno, a->col_offset,
1420
5.01k
                              b->end_lineno, b->end_col_offset,
1421
5.01k
                              p->arena);
1422
5.01k
}
1423
1424
expr_ty
1425
14.2k
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
1426
1427
14.2k
    asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, FSTRING);
1428
14.2k
    return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
1429
14.2k
                            b->end_lineno, b->end_col_offset,
1430
14.2k
                            p->arena);
1431
14.2k
}
1432
1433
8.02k
expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
1434
8.02k
    Py_ssize_t bsize;
1435
8.02k
    char* bstr;
1436
8.02k
    if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
1437
0
        return NULL;
1438
0
    }
1439
1440
    // Check if we're inside a raw f-string for format spec decoding
1441
8.02k
    int is_raw = 0;
1442
8.02k
    if (INSIDE_FSTRING(p->tok)) {
1443
5.61k
        tokenizer_mode *mode = TOK_GET_MODE(p->tok);
1444
5.61k
        is_raw = mode->raw;
1445
5.61k
    }
1446
1447
8.02k
    PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
1448
8.02k
    if (str == NULL) {
1449
4
        return NULL;
1450
4
    }
1451
8.02k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1452
0
        Py_DECREF(str);
1453
0
        return NULL;
1454
0
    }
1455
8.02k
    return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1456
8.02k
                           tok->end_lineno, tok->end_col_offset,
1457
8.02k
                           p->arena);
1458
8.02k
}
1459
1460
28.3k
expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok) {
1461
28.3k
    char* bstr = PyBytes_AsString(tok->bytes);
1462
28.3k
    if (bstr == NULL) {
1463
0
        return NULL;
1464
0
    }
1465
28.3k
    PyObject* str = PyUnicode_FromString(bstr);
1466
28.3k
    if (str == NULL) {
1467
0
        return NULL;
1468
0
    }
1469
28.3k
    if (_PyArena_AddPyObject(p->arena, str) < 0) {
1470
0
        Py_DECREF(str);
1471
0
        return NULL;
1472
0
    }
1473
28.3k
    return _PyAST_Constant(str, NULL, tok->lineno, tok->col_offset,
1474
28.3k
                           tok->end_lineno, tok->end_col_offset,
1475
28.3k
                           p->arena);
1476
28.3k
}
1477
1478
50.3k
expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
1479
50.3k
    char* the_str = PyBytes_AsString(tok->bytes);
1480
50.3k
    if (the_str == NULL) {
1481
0
        return NULL;
1482
0
    }
1483
50.3k
    PyObject *s = _PyPegen_parse_string(p, tok);
1484
50.3k
    if (s == NULL) {
1485
109
        _Pypegen_raise_decode_error(p);
1486
109
        return NULL;
1487
109
    }
1488
50.2k
    if (_PyArena_AddPyObject(p->arena, s) < 0) {
1489
0
        Py_DECREF(s);
1490
0
        return NULL;
1491
0
    }
1492
50.2k
    PyObject *kind = NULL;
1493
50.2k
    if (the_str && the_str[0] == 'u') {
1494
368
        kind = _PyPegen_new_identifier(p, "u");
1495
368
        if (kind == NULL) {
1496
0
            return NULL;
1497
0
        }
1498
368
    }
1499
50.2k
    return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
1500
50.2k
}
1501
1502
static int
1503
_get_interpolation_conversion(Parser *p, Token *debug, ResultTokenWithMetadata *conversion,
1504
                              ResultTokenWithMetadata *format)
1505
28.5k
{
1506
28.5k
    if (conversion != NULL) {
1507
2.17k
        expr_ty conversion_expr = (expr_ty) conversion->result;
1508
2.17k
        assert(conversion_expr->kind == Name_kind);
1509
2.17k
        Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
1510
2.17k
        return Py_SAFE_DOWNCAST(first, Py_UCS4, int);
1511
2.17k
    }
1512
26.3k
    else if (debug && !format) {
1513
        /* If no conversion is specified, use !r for debug expressions */
1514
6.75k
        return (int)'r';
1515
6.75k
    }
1516
19.5k
    return -1;
1517
28.5k
}
1518
1519
static PyObject *
1520
_strip_interpolation_expr(PyObject *exprstr)
1521
6.40k
{
1522
6.40k
    Py_ssize_t len = PyUnicode_GET_LENGTH(exprstr);
1523
1524
15.0k
    for (Py_ssize_t i = len - 1; i >= 0; i--) {
1525
15.0k
        Py_UCS4 c = PyUnicode_READ_CHAR(exprstr, i);
1526
15.0k
        if (_PyUnicode_IsWhitespace(c) || c == '=') {
1527
8.63k
            len--;
1528
8.63k
        }
1529
6.40k
        else {
1530
6.40k
            break;
1531
6.40k
        }
1532
15.0k
    }
1533
1534
6.40k
    return PyUnicode_Substring(exprstr, 0, len);
1535
6.40k
}
1536
1537
expr_ty _PyPegen_interpolation(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1538
                                 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1539
6.40k
                                 int end_lineno, int end_col_offset, PyArena *arena) {
1540
1541
6.40k
    int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
1542
1543
    /* Find the non whitespace token after the "=" */
1544
6.40k
    int debug_end_line, debug_end_offset;
1545
6.40k
    PyObject *debug_metadata;
1546
6.40k
    constant exprstr;
1547
1548
6.40k
    if (conversion) {
1549
578
        debug_end_line = ((expr_ty) conversion->result)->lineno;
1550
578
        debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1551
578
        debug_metadata = exprstr = conversion->metadata;
1552
578
    }
1553
5.82k
    else if (format) {
1554
920
        debug_end_line = ((expr_ty) format->result)->lineno;
1555
920
        debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1556
920
        debug_metadata = exprstr = format->metadata;
1557
920
    }
1558
4.90k
    else {
1559
4.90k
        debug_end_line = end_lineno;
1560
4.90k
        debug_end_offset = end_col_offset;
1561
4.90k
        debug_metadata = exprstr = closing_brace->metadata;
1562
4.90k
    }
1563
1564
6.40k
    assert(exprstr != NULL);
1565
6.40k
    PyObject *final_exprstr = _strip_interpolation_expr(exprstr);
1566
6.40k
    if (!final_exprstr || _PyArena_AddPyObject(arena, final_exprstr) < 0) {
1567
0
        Py_XDECREF(final_exprstr);
1568
0
        return NULL;
1569
0
    }
1570
1571
6.40k
    expr_ty interpolation = _PyAST_Interpolation(
1572
6.40k
        expression, final_exprstr, conversion_val, format ? (expr_ty) format->result : NULL,
1573
6.40k
        lineno, col_offset, end_lineno,
1574
6.40k
        end_col_offset, arena
1575
6.40k
    );
1576
1577
6.40k
    if (!debug) {
1578
6.24k
        return interpolation;
1579
6.24k
    }
1580
1581
155
    expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1582
155
                                            debug_end_offset - 1, p->arena);
1583
155
    if (!debug_text) {
1584
0
        return NULL;
1585
0
    }
1586
1587
155
    asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1588
155
    asdl_seq_SET(values, 0, debug_text);
1589
155
    asdl_seq_SET(values, 1, interpolation);
1590
155
    return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1591
155
}
1592
1593
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1594
                                 ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1595
22.1k
                                 int end_lineno, int end_col_offset, PyArena *arena) {
1596
22.1k
    int conversion_val = _get_interpolation_conversion(p, debug, conversion, format);
1597
1598
22.1k
    expr_ty formatted_value = _PyAST_FormattedValue(
1599
22.1k
        expression, conversion_val, format ? (expr_ty) format->result : NULL,
1600
22.1k
        lineno, col_offset, end_lineno,
1601
22.1k
        end_col_offset, arena
1602
22.1k
    );
1603
1604
22.1k
    if (!debug) {
1605
15.0k
        return formatted_value;
1606
15.0k
    }
1607
1608
    /* Find the non whitespace token after the "=" */
1609
7.06k
    int debug_end_line, debug_end_offset;
1610
7.06k
    PyObject *debug_metadata;
1611
1612
7.06k
    if (conversion) {
1613
211
        debug_end_line = ((expr_ty) conversion->result)->lineno;
1614
211
        debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1615
211
        debug_metadata = conversion->metadata;
1616
211
    }
1617
6.85k
    else if (format) {
1618
220
        debug_end_line = ((expr_ty) format->result)->lineno;
1619
220
        debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1620
220
        debug_metadata = format->metadata;
1621
220
    }
1622
6.63k
    else {
1623
6.63k
        debug_end_line = end_lineno;
1624
6.63k
        debug_end_offset = end_col_offset;
1625
6.63k
        debug_metadata = closing_brace->metadata;
1626
6.63k
    }
1627
7.06k
    expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1628
7.06k
                                            debug_end_offset - 1, p->arena);
1629
7.06k
    if (!debug_text) {
1630
3
        return NULL;
1631
3
    }
1632
1633
7.06k
    asdl_expr_seq *values = _Py_asdl_expr_seq_new(2, arena);
1634
7.06k
    asdl_seq_SET(values, 0, debug_text);
1635
7.06k
    asdl_seq_SET(values, 1, formatted_value);
1636
7.06k
    return _PyAST_JoinedStr(values, lineno, col_offset, debug_end_line, debug_end_offset, p->arena);
1637
7.06k
}
1638
1639
static expr_ty
1640
_build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
1641
                        int col_offset, int end_lineno, int end_col_offset,
1642
                        PyArena *arena)
1643
607
{
1644
607
    Py_ssize_t len = asdl_seq_LEN(strings);
1645
607
    assert(len > 0);
1646
1647
    /* Bytes literals never get a kind, but just for consistency
1648
        since they are represented as Constant nodes, we'll mirror
1649
        the same behavior as unicode strings for determining the
1650
        kind. */
1651
607
    PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
1652
1653
607
    Py_ssize_t total = 0;
1654
2.96k
    for (Py_ssize_t i = 0; i < len; i++) {
1655
2.36k
        expr_ty elem = asdl_seq_GET(strings, i);
1656
2.36k
        PyObject *bytes = elem->v.Constant.value;
1657
2.36k
        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1658
2.36k
        if (part > PY_SSIZE_T_MAX - total) {
1659
0
            PyErr_NoMemory();
1660
0
            return NULL;
1661
0
        }
1662
2.36k
        total += part;
1663
2.36k
    }
1664
1665
607
    PyBytesWriter *writer = PyBytesWriter_Create(total);
1666
607
    if (writer == NULL) {
1667
0
        return NULL;
1668
0
    }
1669
607
    char *out = PyBytesWriter_GetData(writer);
1670
1671
2.96k
    for (Py_ssize_t i = 0; i < len; i++) {
1672
2.36k
        expr_ty elem = asdl_seq_GET(strings, i);
1673
2.36k
        PyObject *bytes = elem->v.Constant.value;
1674
2.36k
        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
1675
2.36k
        if (part > 0) {
1676
992
            memcpy(out, PyBytes_AS_STRING(bytes), part);
1677
992
            out += part;
1678
992
        }
1679
2.36k
    }
1680
1681
607
    PyObject *res = PyBytesWriter_Finish(writer);
1682
607
    if (res == NULL) {
1683
0
        return NULL;
1684
0
    }
1685
607
    if (_PyArena_AddPyObject(arena, res) < 0) {
1686
0
        Py_DECREF(res);
1687
0
        return NULL;
1688
0
    }
1689
607
    return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1690
607
}
1691
1692
static expr_ty
1693
_build_concatenated_unicode(Parser *p, asdl_expr_seq *strings, int lineno,
1694
                        int col_offset, int end_lineno, int end_col_offset,
1695
                        PyArena *arena)
1696
1.55k
{
1697
1.55k
    Py_ssize_t len = asdl_seq_LEN(strings);
1698
1.55k
    assert(len > 1);
1699
1700
1.55k
    expr_ty first = asdl_seq_GET(strings, 0);
1701
1702
    /* When a string is getting concatenated, the kind of the string
1703
        is determined by the first string in the concatenation
1704
        sequence.
1705
1706
        u"abc" "def" -> u"abcdef"
1707
        "abc" u"abc" ->  "abcabc" */
1708
1.55k
    PyObject *kind = first->v.Constant.kind;
1709
1710
1.55k
    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1711
1.55k
    if (writer == NULL) {
1712
0
        return NULL;
1713
0
    }
1714
1715
22.8k
    for (Py_ssize_t i = 0; i < len; i++) {
1716
21.3k
        expr_ty current_elem = asdl_seq_GET(strings, i);
1717
21.3k
        assert(current_elem->kind == Constant_kind);
1718
1719
21.3k
        if (PyUnicodeWriter_WriteStr(writer,
1720
21.3k
                                     current_elem->v.Constant.value)) {
1721
0
            PyUnicodeWriter_Discard(writer);
1722
0
            return NULL;
1723
0
        }
1724
21.3k
    }
1725
1726
1.55k
    PyObject *final = PyUnicodeWriter_Finish(writer);
1727
1.55k
    if (final == NULL) {
1728
0
        return NULL;
1729
0
    }
1730
1.55k
    if (_PyArena_AddPyObject(p->arena, final) < 0) {
1731
0
        Py_DECREF(final);
1732
0
        return NULL;
1733
0
    }
1734
1.55k
    return _PyAST_Constant(final, kind, lineno, col_offset,
1735
1.55k
                           end_lineno, end_col_offset, arena);
1736
1.55k
}
1737
1738
static asdl_expr_seq *
1739
_build_concatenated_str(Parser *p, asdl_expr_seq *strings,
1740
                               int lineno, int col_offset, int end_lineno,
1741
                               int end_col_offset, PyArena *arena)
1742
13.9k
{
1743
13.9k
    Py_ssize_t len = asdl_seq_LEN(strings);
1744
13.9k
    assert(len > 0);
1745
1746
13.9k
    Py_ssize_t n_flattened_elements = 0;
1747
39.8k
    for (Py_ssize_t i = 0; i < len; i++) {
1748
25.9k
        expr_ty elem = asdl_seq_GET(strings, i);
1749
25.9k
        switch(elem->kind) {
1750
12.6k
            case JoinedStr_kind:
1751
12.6k
                n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
1752
12.6k
                break;
1753
4.64k
            case TemplateStr_kind:
1754
4.64k
                n_flattened_elements += asdl_seq_LEN(elem->v.TemplateStr.values);
1755
4.64k
                break;
1756
8.67k
            default:
1757
8.67k
                n_flattened_elements++;
1758
8.67k
                break;
1759
25.9k
        }
1760
25.9k
    }
1761
1762
1763
13.9k
    asdl_expr_seq* flattened = _Py_asdl_expr_seq_new(n_flattened_elements, p->arena);
1764
13.9k
    if (flattened == NULL) {
1765
0
        return NULL;
1766
0
    }
1767
1768
    /* build flattened list */
1769
13.9k
    Py_ssize_t current_pos = 0;
1770
39.8k
    for (Py_ssize_t i = 0; i < len; i++) {
1771
25.9k
        expr_ty elem = asdl_seq_GET(strings, i);
1772
25.9k
        switch(elem->kind) {
1773
12.6k
            case JoinedStr_kind:
1774
54.5k
                for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1775
41.9k
                    expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
1776
41.9k
                    if (subvalue == NULL) {
1777
0
                        return NULL;
1778
0
                    }
1779
41.9k
                    asdl_seq_SET(flattened, current_pos++, subvalue);
1780
41.9k
                }
1781
12.6k
                break;
1782
12.6k
            case TemplateStr_kind:
1783
14.4k
                for (Py_ssize_t j = 0; j < asdl_seq_LEN(elem->v.TemplateStr.values); j++) {
1784
9.75k
                    expr_ty subvalue = asdl_seq_GET(elem->v.TemplateStr.values, j);
1785
9.75k
                    if (subvalue == NULL) {
1786
0
                        return NULL;
1787
0
                    }
1788
9.75k
                    asdl_seq_SET(flattened, current_pos++, subvalue);
1789
9.75k
                }
1790
4.64k
                break;
1791
8.67k
            default:
1792
8.67k
                asdl_seq_SET(flattened, current_pos++, elem);
1793
8.67k
                break;
1794
25.9k
        }
1795
25.9k
    }
1796
1797
    /* calculate folded element count */
1798
13.9k
    Py_ssize_t n_elements = 0;
1799
13.9k
    int prev_is_constant = 0;
1800
74.2k
    for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
1801
60.3k
        expr_ty elem = asdl_seq_GET(flattened, i);
1802
1803
        /* The concatenation of a FormattedValue and an empty Constant should
1804
           lead to the FormattedValue itself. Thus, we will not take any empty
1805
           constants into account, just as in `_PyPegen_joined_str` */
1806
60.3k
        if (elem->kind == Constant_kind &&
1807
60.3k
            PyUnicode_CheckExact(elem->v.Constant.value) &&
1808
34.8k
            PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
1809
1.17k
            continue;
1810
1811
59.1k
        if (!prev_is_constant || elem->kind != Constant_kind) {
1812
51.3k
            n_elements++;
1813
51.3k
        }
1814
59.1k
        prev_is_constant = elem->kind == Constant_kind;
1815
59.1k
    }
1816
1817
13.9k
    asdl_expr_seq* values = _Py_asdl_expr_seq_new(n_elements, p->arena);
1818
13.9k
    if (values == NULL) {
1819
0
        return NULL;
1820
0
    }
1821
1822
    /* build folded list */
1823
13.9k
    current_pos = 0;
1824
65.5k
    for (Py_ssize_t i = 0; i < n_flattened_elements; i++) {
1825
51.6k
        expr_ty elem = asdl_seq_GET(flattened, i);
1826
1827
        /* if the current elem and the following are constants,
1828
           fold them and all consequent constants */
1829
51.6k
        if (elem->kind == Constant_kind) {
1830
26.0k
            if (i + 1 < n_flattened_elements &&
1831
21.1k
                asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
1832
3.34k
                expr_ty first_elem = elem;
1833
1834
                /* When a string is getting concatenated, the kind of the string
1835
                   is determined by the first string in the concatenation
1836
                   sequence.
1837
1838
                   u"abc" "def" -> u"abcdef"
1839
                   "abc" u"abc" ->  "abcabc" */
1840
3.34k
                PyObject *kind = elem->v.Constant.kind;
1841
1842
3.34k
                PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1843
3.34k
                if (writer == NULL) {
1844
0
                    return NULL;
1845
0
                }
1846
3.34k
                expr_ty last_elem = elem;
1847
3.34k
                Py_ssize_t j;
1848
15.4k
                for (j = i; j < n_flattened_elements; j++) {
1849
14.1k
                    expr_ty current_elem = asdl_seq_GET(flattened, j);
1850
14.1k
                    if (current_elem->kind == Constant_kind) {
1851
12.0k
                        if (PyUnicodeWriter_WriteStr(writer,
1852
12.0k
                                                     current_elem->v.Constant.value)) {
1853
0
                            PyUnicodeWriter_Discard(writer);
1854
0
                            return NULL;
1855
0
                        }
1856
12.0k
                        last_elem = current_elem;
1857
12.0k
                    } else {
1858
2.07k
                        break;
1859
2.07k
                    }
1860
14.1k
                }
1861
3.34k
                i = j - 1;
1862
1863
3.34k
                PyObject *concat_str = PyUnicodeWriter_Finish(writer);
1864
3.34k
                if (concat_str == NULL) {
1865
0
                    return NULL;
1866
0
                }
1867
3.34k
                if (_PyArena_AddPyObject(p->arena, concat_str) < 0) {
1868
0
                    Py_DECREF(concat_str);
1869
0
                    return NULL;
1870
0
                }
1871
3.34k
                elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
1872
3.34k
                                       first_elem->col_offset,
1873
3.34k
                                       last_elem->end_lineno,
1874
3.34k
                                       last_elem->end_col_offset, p->arena);
1875
3.34k
                if (elem == NULL) {
1876
0
                    return NULL;
1877
0
                }
1878
3.34k
            }
1879
1880
            /* Drop all empty contanst strings */
1881
26.0k
            if (PyUnicode_CheckExact(elem->v.Constant.value) &&
1882
26.0k
                PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
1883
299
                continue;
1884
299
            }
1885
26.0k
        }
1886
1887
51.3k
        asdl_seq_SET(values, current_pos++, elem);
1888
51.3k
    }
1889
1890
13.9k
    assert(current_pos == n_elements);
1891
13.9k
    return values;
1892
13.9k
}
1893
1894
static expr_ty
1895
_build_concatenated_joined_str(Parser *p, asdl_expr_seq *strings,
1896
                               int lineno, int col_offset, int end_lineno,
1897
                               int end_col_offset, PyArena *arena)
1898
10.1k
{
1899
10.1k
    asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
1900
10.1k
        col_offset, end_lineno, end_col_offset, arena);
1901
10.1k
    return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1902
10.1k
}
1903
1904
expr_ty
1905
_PyPegen_concatenate_tstrings(Parser *p, asdl_expr_seq *strings,
1906
                               int lineno, int col_offset, int end_lineno,
1907
                               int end_col_offset, PyArena *arena)
1908
3.73k
{
1909
3.73k
    asdl_expr_seq *values = _build_concatenated_str(p, strings, lineno,
1910
3.73k
        col_offset, end_lineno, end_col_offset, arena);
1911
3.73k
    return _PyAST_TemplateStr(values, lineno, col_offset, end_lineno,
1912
3.73k
        end_col_offset, arena);
1913
3.73k
}
1914
1915
expr_ty
1916
_PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
1917
                             int lineno, int col_offset, int end_lineno,
1918
                             int end_col_offset, PyArena *arena)
1919
28.4k
{
1920
28.4k
    Py_ssize_t len = asdl_seq_LEN(strings);
1921
28.4k
    assert(len > 0);
1922
1923
28.4k
    int f_string_found = 0;
1924
28.4k
    int unicode_string_found = 0;
1925
28.4k
    int bytes_found = 0;
1926
1927
28.4k
    Py_ssize_t i = 0;
1928
89.5k
    for (i = 0; i < len; i++) {
1929
61.1k
        expr_ty elem = asdl_seq_GET(strings, i);
1930
61.1k
        switch(elem->kind) {
1931
45.2k
            case Constant_kind:
1932
45.2k
                if (PyBytes_CheckExact(elem->v.Constant.value)) {
1933
3.73k
                    bytes_found = 1;
1934
41.4k
                } else {
1935
41.4k
                    unicode_string_found = 1;
1936
41.4k
                }
1937
45.2k
                break;
1938
12.6k
            case JoinedStr_kind:
1939
12.6k
                f_string_found = 1;
1940
12.6k
                break;
1941
0
            case TemplateStr_kind:
1942
                // python.gram handles this; we should never get here
1943
0
                assert(0);
1944
0
                break;
1945
3.27k
            default:
1946
3.27k
                f_string_found = 1;
1947
3.27k
                break;
1948
61.1k
        }
1949
61.1k
    }
1950
1951
    // Cannot mix unicode and bytes
1952
28.4k
    if ((unicode_string_found || f_string_found) && bytes_found) {
1953
6
        RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1954
6
        return NULL;
1955
6
    }
1956
1957
    // If it's only bytes or only unicode string, do a simple concat
1958
28.4k
    if (!f_string_found) {
1959
18.2k
        if (len == 1) {
1960
16.1k
            return asdl_seq_GET(strings, 0);
1961
16.1k
        }
1962
2.16k
        else if (bytes_found) {
1963
607
            return _build_concatenated_bytes(p, strings, lineno, col_offset,
1964
607
                end_lineno, end_col_offset, arena);
1965
607
        }
1966
1.55k
        else {
1967
1.55k
            return _build_concatenated_unicode(p, strings, lineno, col_offset,
1968
1.55k
                end_lineno, end_col_offset, arena);
1969
1.55k
        }
1970
18.2k
    }
1971
1972
10.1k
    return _build_concatenated_joined_str(p, strings, lineno,
1973
10.1k
        col_offset, end_lineno, end_col_offset, arena);
1974
28.4k
}
1975
1976
stmt_ty
1977
_PyPegen_checked_future_import(Parser *p, identifier module, asdl_alias_seq * names,
1978
                               int level, expr_ty lazy_token, int lineno,
1979
                               int col_offset, int end_lineno, int end_col_offset,
1980
1.54k
                               PyArena *arena) {
1981
1.54k
    if (level == 0 && PyUnicode_CompareWithASCIIString(module, "__future__") == 0) {
1982
683
        if (lazy_token) {
1983
1
            RAISE_SYNTAX_ERROR_KNOWN_LOCATION(lazy_token,
1984
1
                "lazy from __future__ import is not allowed");
1985
1
            return NULL;
1986
1
        }
1987
1.65k
        for (Py_ssize_t i = 0; i < asdl_seq_LEN(names); i++) {
1988
976
            alias_ty alias = asdl_seq_GET(names, i);
1989
976
            if (PyUnicode_CompareWithASCIIString(alias->name, "barry_as_FLUFL") == 0) {
1990
291
                p->flags |= PyPARSE_BARRY_AS_BDFL;
1991
291
            }
1992
976
        }
1993
682
    }
1994
1.54k
    return _PyAST_ImportFrom(module, names, level, lazy_token ? 1 : 0, lineno,
1995
1.54k
                             col_offset, end_lineno, end_col_offset, arena);
1996
1.54k
}
1997
1998
asdl_stmt_seq*
1999
26.6k
_PyPegen_register_stmts(Parser *p, asdl_stmt_seq* stmts) {
2000
26.6k
    if (!p->call_invalid_rules) {
2001
19.6k
        return stmts;
2002
19.6k
    }
2003
7.03k
    Py_ssize_t len = asdl_seq_LEN(stmts);
2004
7.03k
    if (len == 0) {
2005
0
        return stmts;
2006
0
    }
2007
7.03k
    stmt_ty last_stmt = asdl_seq_GET(stmts, len - 1);
2008
7.03k
    if (p->last_stmt_location.lineno > last_stmt->lineno) {
2009
115
        return stmts;
2010
115
    }
2011
6.92k
    p->last_stmt_location.lineno = last_stmt->lineno;
2012
6.92k
    p->last_stmt_location.col_offset = last_stmt->col_offset;
2013
6.92k
    p->last_stmt_location.end_lineno = last_stmt->end_lineno;
2014
6.92k
    p->last_stmt_location.end_col_offset = last_stmt->end_col_offset;
2015
6.92k
    return stmts;
2016
7.03k
}