Coverage Report

Created: 2026-01-09 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
13.0M
#define SUCCESS 0
19
51.9k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
19.8M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
4.47M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
4.61M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
4.61M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
3.46k
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
18.2M
{
101
18.2M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
18.2M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
18.2M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
15.3M
{
108
15.3M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
15.3M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
1.05M
    do { \
114
1.05M
        assert(OPCODE_HAS_ARG(OP)); \
115
1.05M
        cfg_instr *_instr__ptr_ = (I); \
116
1.05M
        _instr__ptr_->i_opcode = (OP); \
117
1.05M
        _instr__ptr_->i_oparg = (ARG); \
118
1.05M
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
1.62M
    do { \
123
1.62M
        assert(!OPCODE_HAS_ARG(OP)); \
124
1.62M
        cfg_instr *_instr__ptr_ = (I); \
125
1.62M
        _instr__ptr_->i_opcode = (OP); \
126
1.62M
        _instr__ptr_->i_oparg = 0; \
127
1.62M
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
1.05M
    do { \
131
1.05M
        cfg_instr *_instr__ptr_ = (I); \
132
1.05M
        _instr__ptr_->i_loc = (LOC); \
133
1.05M
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
4.47M
{
144
4.47M
    assert(b != NULL);
145
4.47M
    _Py_c_array_t array = {
146
4.47M
        .array = (void*)b->b_instr,
147
4.47M
        .allocated_entries = b->b_ialloc,
148
4.47M
        .item_size = sizeof(cfg_instr),
149
4.47M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
4.47M
    };
151
152
4.47M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
4.47M
    b->b_instr = array.array;
154
4.47M
    b->b_ialloc = array.allocated_entries;
155
4.47M
    return b->b_iused++;
156
4.47M
}
157
158
static cfg_instr *
159
14.5M
basicblock_last_instr(const basicblock *b) {
160
14.5M
    assert(b->b_iused >= 0);
161
14.5M
    if (b->b_iused > 0) {
162
13.6M
        assert(b->b_instr != NULL);
163
13.6M
        return &b->b_instr[b->b_iused - 1];
164
13.6M
    }
165
866k
    return NULL;
166
14.5M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
396k
{
175
396k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
396k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
396k
    b->b_list = g->g_block_list;
182
396k
    g->g_block_list = b;
183
396k
    b->b_label = NO_LABEL;
184
396k
    return b;
185
396k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
4.40M
{
190
4.40M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
4.40M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
4.40M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
4.40M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
4.40M
    int off = basicblock_next_instr(b);
196
4.40M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
4.40M
    cfg_instr *i = &b->b_instr[off];
200
4.40M
    i->i_opcode = opcode;
201
4.40M
    i->i_oparg = oparg;
202
4.40M
    i->i_loc = loc;
203
    // memory is already zero initialized
204
4.40M
    assert(i->i_target == NULL);
205
4.40M
    assert(i->i_except == NULL);
206
207
4.40M
    return SUCCESS;
208
4.40M
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
8.73k
{
213
8.73k
    cfg_instr *last = basicblock_last_instr(b);
214
8.73k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
8.73k
    RETURN_IF_ERROR(
219
8.73k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
8.73k
    last = basicblock_last_instr(b);
221
8.73k
    assert(last && last->i_opcode == opcode);
222
8.73k
    last->i_target = target;
223
8.73k
    return SUCCESS;
224
8.73k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
13.0k
{
229
40.6k
    for (int i = 0; i < from->b_iused; i++) {
230
27.6k
        int n = basicblock_next_instr(to);
231
27.6k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
27.6k
        to->b_instr[n] = from->b_instr[i];
235
27.6k
    }
236
13.0k
    return SUCCESS;
237
13.0k
}
238
239
static inline int
240
3.94M
basicblock_nofallthrough(const basicblock *b) {
241
3.94M
    cfg_instr *last = basicblock_last_instr(b);
242
3.94M
    return (last &&
243
3.78M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
2.83M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
3.94M
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
6.14M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
2.32k
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
2.32k
    assert(BB_NO_FALLTHROUGH(block));
257
2.32k
    basicblock *result = cfg_builder_new_block(g);
258
2.32k
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
2.32k
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
2.32k
    return result;
265
2.32k
}
266
267
static int
268
39.3k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
39.3k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
728k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
689k
        block->b_instr[i] = block->b_instr[i-1];
272
689k
    }
273
39.3k
    block->b_instr[pos] = *instr;
274
39.3k
    return SUCCESS;
275
39.3k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
331k
{
343
331k
    assert(block != NULL);
344
331k
    g->g_curblock->b_next = block;
345
331k
    g->g_curblock = block;
346
331k
    return block;
347
331k
}
348
349
static inline int
350
881k
basicblock_exits_scope(const basicblock *b) {
351
881k
    cfg_instr *last = basicblock_last_instr(b);
352
881k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
881k
}
354
355
static inline int
356
661k
basicblock_has_eval_break(const basicblock *b) {
357
3.97M
    for (int i = 0; i < b->b_iused; i++) {
358
3.44M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
129k
            return true;
360
129k
        }
361
3.44M
    }
362
532k
    return false;
363
661k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
4.46M
{
368
4.46M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
4.46M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
266k
        return true;
371
266k
    }
372
4.19M
    if (IS_LABEL(g->g_current_label)) {
373
65.2k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
65.2k
            return true;
375
65.2k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
65.2k
    }
382
4.13M
    return false;
383
4.19M
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
4.46M
{
388
4.46M
    if (cfg_builder_current_block_is_terminated(g)) {
389
331k
        basicblock *b = cfg_builder_new_block(g);
390
331k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
331k
        b->b_label = g->g_current_label;
394
331k
        g->g_current_label = NO_LABEL;
395
331k
        cfg_builder_use_next_block(g, b);
396
331k
    }
397
4.46M
    return SUCCESS;
398
4.46M
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
104k
{
404
104k
    assert(g->g_entryblock->b_iused > 0);
405
884k
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
780k
        assert(!_PyMem_IsPtrFreed(block));
407
780k
        if (block->b_instr != NULL) {
408
780k
            assert(block->b_ialloc > 0);
409
780k
            assert(block->b_iused >= 0);
410
780k
            assert(block->b_ialloc >= block->b_iused);
411
780k
        }
412
0
        else {
413
0
            assert (block->b_iused == 0);
414
0
            assert (block->b_ialloc == 0);
415
0
        }
416
780k
    }
417
104k
    return true;
418
104k
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
52.1k
{
424
52.1k
    g->g_block_list = NULL;
425
52.1k
    basicblock *block = cfg_builder_new_block(g);
426
52.1k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
52.1k
    g->g_curblock = g->g_entryblock = block;
430
52.1k
    g->g_current_label = NO_LABEL;
431
52.1k
    return SUCCESS;
432
52.1k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
52.1k
{
437
52.1k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
52.1k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
52.1k
    memset(g, 0, sizeof(cfg_builder));
443
52.1k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
52.1k
    return g;
448
52.1k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
52.1k
{
453
52.1k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
52.1k
    assert(cfg_builder_check(g));
457
52.1k
    basicblock *b = g->g_block_list;
458
448k
    while (b != NULL) {
459
396k
        if (b->b_instr) {
460
396k
            PyMem_Free((void *)b->b_instr);
461
396k
        }
462
396k
        basicblock *next = b->b_list;
463
396k
        PyMem_Free((void *)b);
464
396k
        b = next;
465
396k
    }
466
52.1k
    PyMem_Free(g);
467
52.1k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
52.1k
{
472
52.1k
    int nblocks = 0;
473
436k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
384k
        nblocks++;
475
384k
    }
476
52.1k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
52.1k
    return SUCCESS;
481
52.1k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
172k
{
486
172k
    g->g_current_label = lbl;
487
172k
    return cfg_builder_maybe_start_new_block(g);
488
172k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
4.29M
{
493
4.29M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
4.29M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
4.29M
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
1.21M
{
501
1.29M
    while (b && b->b_iused == 0) {
502
80.2k
        b = b->b_next;
503
80.2k
    }
504
1.21M
    return b;
505
1.21M
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
52.1k
no_redundant_nops(cfg_builder *g) {
514
52.1k
    if (remove_redundant_nops(g) != 0) {
515
0
        return false;
516
0
    }
517
52.1k
    return true;
518
52.1k
}
519
520
static bool
521
104k
no_redundant_jumps(cfg_builder *g) {
522
887k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
782k
        cfg_instr *last = basicblock_last_instr(b);
524
782k
        if (last != NULL) {
525
689k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
117k
                basicblock *next = next_nonempty_block(b->b_next);
527
117k
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
117k
                if (jump_target == next) {
529
0
                    assert(next);
530
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
0
                        assert(0);
532
0
                        return false;
533
0
                    }
534
0
                }
535
117k
            }
536
689k
        }
537
782k
    }
538
104k
    return true;
539
104k
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
396k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
396k
    cfg_instr *last = basicblock_last_instr(b);
547
396k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
300k
        return SUCCESS;
549
300k
    }
550
396k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
95.9k
    bool is_forward = last->i_target->b_visited == 0;
553
95.9k
    if (is_forward) {
554
94.7k
        RETURN_IF_ERROR(
555
94.7k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
94.7k
        return SUCCESS;
557
94.7k
    }
558
559
1.23k
    int reversed_opcode = 0;
560
1.23k
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
0
        case POP_JUMP_IF_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
0
            break;
567
1.22k
        case POP_JUMP_IF_FALSE:
568
1.22k
            reversed_opcode = POP_JUMP_IF_TRUE;
569
1.22k
            break;
570
10
        case POP_JUMP_IF_TRUE:
571
10
            reversed_opcode = POP_JUMP_IF_FALSE;
572
10
            break;
573
1.23k
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
1.23k
    basicblock *target = last->i_target;
579
1.23k
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
1.23k
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
1.23k
    RETURN_IF_ERROR(
584
1.23k
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
1.23k
    RETURN_IF_ERROR(
586
1.23k
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
1.23k
    backwards_jump->b_startdepth = target->b_startdepth;
588
1.23k
    last->i_opcode = reversed_opcode;
589
1.23k
    last->i_target = b->b_next;
590
591
1.23k
    backwards_jump->b_cold = b->b_cold;
592
1.23k
    backwards_jump->b_next = b->b_next;
593
1.23k
    b->b_next = backwards_jump;
594
1.23k
    return SUCCESS;
595
1.23k
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
52.1k
{
601
52.1k
    basicblock *entryblock = g->g_entryblock;
602
447k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
395k
        b->b_visited = 0;
604
395k
    }
605
448k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
396k
        b->b_visited = 1;
607
396k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
396k
    }
609
52.1k
    return SUCCESS;
610
52.1k
}
611
612
static int
613
52.1k
check_cfg(cfg_builder *g) {
614
436k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
4.67M
        for (int i = 0; i < b->b_iused; i++) {
617
4.29M
            int opcode = b->b_instr[i].i_opcode;
618
4.29M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
4.29M
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
318k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
318k
            }
625
4.29M
        }
626
384k
    }
627
52.1k
    return SUCCESS;
628
52.1k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
188k
{
633
188k
    int lbl = -1;
634
1.71M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
1.52M
        if (b->b_label.id > lbl) {
636
607k
            lbl = b->b_label.id;
637
607k
        }
638
1.52M
    }
639
188k
    return lbl;
640
188k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
52.1k
{
646
52.1k
    int max_label = get_max_label(entryblock);
647
52.1k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
52.1k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
52.1k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
52.1k
    memset(label2block, 0, mapsize);
654
436k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
384k
        if (b->b_label.id >= 0) {
656
172k
            label2block[b->b_label.id] = b;
657
172k
        }
658
384k
    }
659
436k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
4.67M
        for (int i = 0; i < b->b_iused; i++) {
661
4.29M
            cfg_instr *instr = &b->b_instr[i];
662
4.29M
            assert(instr->i_target == NULL);
663
4.29M
            if (HAS_TARGET(instr->i_opcode)) {
664
245k
                int lbl = instr->i_oparg;
665
245k
                assert(lbl >= 0 && lbl <= max_label);
666
245k
                instr->i_target = label2block[lbl];
667
245k
                assert(instr->i_target != NULL);
668
245k
                assert(instr->i_target->b_label.id == lbl);
669
245k
            }
670
4.29M
        }
671
384k
    }
672
52.1k
    PyMem_Free(label2block);
673
52.1k
    return SUCCESS;
674
52.1k
}
675
676
static int
677
52.1k
mark_except_handlers(basicblock *entryblock) {
678
52.1k
#ifndef NDEBUG
679
436k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
384k
        assert(!b->b_except_handler);
681
384k
    }
682
52.1k
#endif
683
436k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
4.67M
        for (int i=0; i < b->b_iused; i++) {
685
4.29M
            cfg_instr *instr = &b->b_instr[i];
686
4.29M
            if (is_block_push(instr)) {
687
47.8k
                instr->i_target->b_except_handler = 1;
688
47.8k
            }
689
4.29M
        }
690
384k
    }
691
52.1k
    return SUCCESS;
692
52.1k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
47.6k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
47.6k
    assert(is_block_push(setup));
704
47.6k
    int opcode = setup->i_opcode;
705
47.6k
    basicblock * target = setup->i_target;
706
47.6k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
24.0k
        target->b_preserve_lasti = 1;
708
24.0k
    }
709
47.6k
    assert(stack->depth <= CO_MAXBLOCKS);
710
47.6k
    stack->handlers[++stack->depth] = target;
711
47.6k
    return target;
712
47.6k
}
713
714
static basicblock *
715
44.9k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
44.9k
    assert(stack->depth > 0);
717
44.9k
    return stack->handlers[--stack->depth];
718
44.9k
}
719
720
static basicblock *
721
351k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
351k
    return stack->handlers[stack->depth];
723
351k
}
724
725
static struct _PyCfgExceptStack *
726
52.1k
make_except_stack(void) {
727
52.1k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
52.1k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
52.1k
    new->depth = 0;
733
52.1k
    new->handlers[0] = NULL;
734
52.1k
    return new;
735
52.1k
}
736
737
static struct _PyCfgExceptStack *
738
121k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
121k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
121k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
121k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
121k
    return copy;
746
121k
}
747
748
static basicblock**
749
351k
make_cfg_traversal_stack(basicblock *entryblock) {
750
351k
    int nblocks = 0;
751
3.23M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
2.88M
        b->b_visited = 0;
753
2.88M
        nblocks++;
754
2.88M
    }
755
351k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
351k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
351k
    return stack;
760
351k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
3.18M
{
779
3.18M
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
3.18M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
3.18M
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
3.18M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
3.18M
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
3.18M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
47.6k
        effects->net = 0;
793
47.6k
        return 0;
794
47.6k
    }
795
3.13M
    effects->net = pushed - popped;
796
3.13M
    return 0;
797
3.18M
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
481k
{
802
481k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
481k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
359k
        assert(b->b_startdepth < 0);
808
359k
        b->b_startdepth = depth;
809
359k
        *(*sp)++ = b;
810
359k
    }
811
481k
    return SUCCESS;
812
481k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
52.1k
{
820
52.1k
    basicblock *entryblock = g->g_entryblock;
821
447k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
395k
        b->b_startdepth = INT_MIN;
823
395k
    }
824
52.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
52.1k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
52.1k
    int stackdepth = -1;
831
52.1k
    int maxdepth = 0;
832
52.1k
    basicblock **sp = stack;
833
52.1k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
411k
    while (sp != stack) {
837
359k
        basicblock *b = *--sp;
838
359k
        int depth = b->b_startdepth;
839
359k
        assert(depth >= 0);
840
359k
        basicblock *next = b->b_next;
841
3.16M
        for (int i = 0; i < b->b_iused; i++) {
842
2.95M
            cfg_instr *instr = &b->b_instr[i];
843
2.95M
            stack_effects effects;
844
2.95M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
2.95M
            int new_depth = depth + effects.net;
851
2.95M
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
2.95M
            maxdepth = Py_MAX(maxdepth, depth);
857
2.95M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
223k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
223k
                int target_depth = depth + effects.net;
865
223k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
223k
                maxdepth = Py_MAX(maxdepth, depth);
867
223k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
223k
            }
871
2.95M
            depth = new_depth;
872
2.95M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
2.95M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
2.89M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
153k
            {
876
                /* remaining code is dead */
877
153k
                next = NULL;
878
153k
                break;
879
153k
            }
880
2.95M
        }
881
359k
        if (next != NULL) {
882
205k
            assert(BB_HAS_FALLTHROUGH(b));
883
205k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
205k
        }
887
359k
    }
888
52.1k
    stackdepth = maxdepth;
889
52.1k
error:
890
52.1k
    PyMem_Free(stack);
891
52.1k
    return stackdepth;
892
52.1k
}
893
894
static int
895
52.1k
label_exception_targets(basicblock *entryblock) {
896
52.1k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
52.1k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
52.1k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
52.1k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
52.1k
    except_stack->depth = 0;
907
52.1k
    todo_stack[0] = entryblock;
908
52.1k
    entryblock->b_visited = 1;
909
52.1k
    entryblock->b_exceptstack = except_stack;
910
52.1k
    basicblock **todo = &todo_stack[1];
911
52.1k
    basicblock *handler = NULL;
912
403k
    while (todo > todo_stack) {
913
351k
        todo--;
914
351k
        basicblock *b = todo[0];
915
351k
        assert(b->b_visited == 1);
916
351k
        except_stack = b->b_exceptstack;
917
351k
        assert(except_stack != NULL);
918
351k
        b->b_exceptstack = NULL;
919
351k
        handler = except_stack_top(except_stack);
920
351k
        int last_yield_except_depth = -1;
921
4.49M
        for (int i = 0; i < b->b_iused; i++) {
922
4.14M
            cfg_instr *instr = &b->b_instr[i];
923
4.14M
            if (is_block_push(instr)) {
924
47.6k
                if (!instr->i_target->b_visited) {
925
47.6k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
47.6k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
47.6k
                    instr->i_target->b_exceptstack = copy;
930
47.6k
                    todo[0] = instr->i_target;
931
47.6k
                    instr->i_target->b_visited = 1;
932
47.6k
                    todo++;
933
47.6k
                }
934
47.6k
                handler = push_except_block(except_stack, instr);
935
47.6k
            }
936
4.09M
            else if (instr->i_opcode == POP_BLOCK) {
937
44.9k
                handler = pop_except_block(except_stack);
938
44.9k
                INSTR_SET_OP0(instr, NOP);
939
44.9k
            }
940
4.05M
            else if (is_jump(instr)) {
941
196k
                instr->i_except = handler;
942
196k
                assert(i == b->b_iused -1);
943
196k
                if (!instr->i_target->b_visited) {
944
98.8k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
73.6k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
73.6k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
73.6k
                        instr->i_target->b_exceptstack = copy;
950
73.6k
                    }
951
25.1k
                    else {
952
25.1k
                        instr->i_target->b_exceptstack = except_stack;
953
25.1k
                        except_stack = NULL;
954
25.1k
                    }
955
98.8k
                    todo[0] = instr->i_target;
956
98.8k
                    instr->i_target->b_visited = 1;
957
98.8k
                    todo++;
958
98.8k
                }
959
196k
            }
960
3.85M
            else if (instr->i_opcode == YIELD_VALUE) {
961
19.0k
                instr->i_except = handler;
962
19.0k
                last_yield_except_depth = except_stack->depth;
963
19.0k
            }
964
3.83M
            else if (instr->i_opcode == RESUME) {
965
71.1k
                instr->i_except = handler;
966
71.1k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
19.0k
                    assert(last_yield_except_depth >= 0);
968
19.0k
                    if (last_yield_except_depth == 1) {
969
1.85k
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
1.85k
                    }
971
19.0k
                    last_yield_except_depth = -1;
972
19.0k
                }
973
71.1k
            }
974
3.76M
            else {
975
3.76M
                instr->i_except = handler;
976
3.76M
            }
977
4.14M
        }
978
351k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
153k
            assert(except_stack != NULL);
980
153k
            b->b_next->b_exceptstack = except_stack;
981
153k
            todo[0] = b->b_next;
982
153k
            b->b_next->b_visited = 1;
983
153k
            todo++;
984
153k
        }
985
198k
        else if (except_stack != NULL) {
986
173k
           PyMem_Free(except_stack);
987
173k
        }
988
351k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
52.1k
    PyMem_Free(todo_stack);
995
52.1k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
52.1k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
104k
remove_unreachable(basicblock *entryblock) {
1006
875k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
770k
        b->b_predecessors = 0;
1008
770k
    }
1009
104k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
104k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
104k
    basicblock **sp = stack;
1014
104k
    entryblock->b_predecessors = 1;
1015
104k
    *sp++ = entryblock;
1016
104k
    entryblock->b_visited = 1;
1017
795k
    while (sp > stack) {
1018
691k
        basicblock *b = *(--sp);
1019
691k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
380k
            if (!b->b_next->b_visited) {
1021
316k
                assert(b->b_next->b_predecessors == 0);
1022
316k
                *sp++ = b->b_next;
1023
316k
                b->b_next->b_visited = 1;
1024
316k
            }
1025
380k
            b->b_next->b_predecessors++;
1026
380k
        }
1027
7.81M
        for (int i = 0; i < b->b_iused; i++) {
1028
7.12M
            basicblock *target;
1029
7.12M
            cfg_instr *instr = &b->b_instr[i];
1030
7.12M
            if (is_jump(instr) || is_block_push(instr)) {
1031
463k
                target = instr->i_target;
1032
463k
                if (!target->b_visited) {
1033
269k
                    *sp++ = target;
1034
269k
                    target->b_visited = 1;
1035
269k
                }
1036
463k
                target->b_predecessors++;
1037
463k
            }
1038
7.12M
        }
1039
691k
    }
1040
104k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
875k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
770k
       if (b->b_predecessors == 0) {
1045
79.4k
            b->b_iused = 0;
1046
79.4k
            b->b_except_handler = 0;
1047
79.4k
       }
1048
770k
    }
1049
104k
    return SUCCESS;
1050
104k
}
1051
1052
static int
1053
2.72M
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
2.72M
    int dest = 0;
1056
2.72M
    int prev_lineno = -1;
1057
22.7M
    for (int src = 0; src < bb->b_iused; src++) {
1058
20.0M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
20.0M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
1.24M
            if (lineno < 0) {
1062
1.04M
                continue;
1063
1.04M
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
207k
            if (prev_lineno == lineno) {
1066
126k
                continue;
1067
126k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
81.2k
            if (src < bb->b_iused - 1) {
1070
73.3k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
73.3k
                if (next_lineno == lineno) {
1072
52.6k
                    continue;
1073
52.6k
                }
1074
20.7k
                if (next_lineno < 0) {
1075
9.13k
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
9.13k
                    continue;
1077
9.13k
                }
1078
20.7k
            }
1079
7.83k
            else {
1080
7.83k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
7.83k
                if (next) {
1083
7.83k
                    location next_loc = NO_LOCATION;
1084
12.5k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
12.5k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
12.5k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
4.70k
                            continue;
1089
4.70k
                        }
1090
7.81k
                        next_loc = instr->i_loc;
1091
7.81k
                        break;
1092
12.5k
                    }
1093
7.83k
                    if (lineno == next_loc.lineno) {
1094
3.44k
                        continue;
1095
3.44k
                    }
1096
7.83k
                }
1097
7.83k
            }
1098
1099
81.2k
        }
1100
18.8M
        if (dest != src) {
1101
1.58M
            bb->b_instr[dest] = bb->b_instr[src];
1102
1.58M
        }
1103
18.8M
        dest++;
1104
18.8M
        prev_lineno = lineno;
1105
18.8M
    }
1106
2.72M
    assert(dest <= bb->b_iused);
1107
2.72M
    int num_removed = bb->b_iused - dest;
1108
2.72M
    bb->b_iused = dest;
1109
2.72M
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
2.72M
    return num_removed;
1111
2.72M
}
1112
1113
static int
1114
221k
remove_redundant_nops(cfg_builder *g) {
1115
221k
    int changes = 0;
1116
2.54M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
2.31M
        int change = basicblock_remove_redundant_nops(b);
1118
2.31M
        RETURN_IF_ERROR(change);
1119
2.31M
        changes += change;
1120
2.31M
    }
1121
221k
    return changes;
1122
221k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
52.1k
{
1127
52.1k
    bool done = false;
1128
1129
105k
    while (! done) {
1130
53.4k
        done = true;
1131
53.4k
        cfg_instr *prev_instr = NULL;
1132
53.4k
        cfg_instr *instr = NULL;
1133
457k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
404k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
404k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
182k
                instr = NULL;
1138
182k
            }
1139
3.58M
            for (int i = 0; i < b->b_iused; i++) {
1140
3.18M
                prev_instr = instr;
1141
3.18M
                instr = &b->b_instr[i];
1142
3.18M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
3.18M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
3.18M
                int opcode = instr->i_opcode;
1145
3.18M
                bool is_redundant_pair = false;
1146
3.18M
                if (opcode == POP_TOP) {
1147
221k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
6.28k
                       is_redundant_pair = true;
1149
6.28k
                   }
1150
215k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
453
                       is_redundant_pair = true;
1152
453
                   }
1153
221k
                }
1154
3.18M
                if (is_redundant_pair) {
1155
6.74k
                    INSTR_SET_OP0(prev_instr, NOP);
1156
6.74k
                    INSTR_SET_OP0(instr, NOP);
1157
6.74k
                    done = false;
1158
6.74k
                }
1159
3.18M
            }
1160
404k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
294k
                instr = NULL;
1162
294k
            }
1163
404k
        }
1164
53.4k
    }
1165
52.1k
    return SUCCESS;
1166
52.1k
}
1167
1168
static int
1169
116k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
116k
    int changes = 0;
1179
1.66M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
1.54M
        cfg_instr *last = basicblock_last_instr(b);
1181
1.54M
        if (last == NULL) {
1182
159k
            continue;
1183
159k
        }
1184
1.54M
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
1.38M
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
301k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
301k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
301k
            basicblock *next = next_nonempty_block(b->b_next);
1192
301k
            if (jump_target == next) {
1193
10.8k
                changes++;
1194
10.8k
                INSTR_SET_OP0(last, NOP);
1195
10.8k
            }
1196
301k
        }
1197
1.38M
    }
1198
1199
116k
    return changes;
1200
116k
}
1201
1202
static inline bool
1203
349k
basicblock_has_no_lineno(basicblock *b) {
1204
440k
    for (int i = 0; i < b->b_iused; i++) {
1205
408k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
317k
            return false;
1207
317k
        }
1208
408k
    }
1209
31.2k
    return true;
1210
349k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
8.79k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
633k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
633k
    cfg_instr *last = basicblock_last_instr(bb);
1223
633k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
633k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
507k
        return 0;
1228
507k
    }
1229
125k
    basicblock *target = last->i_target;
1230
125k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
8.79k
                             target->b_iused <= MAX_COPY_SIZE);
1232
125k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
14.5k
                                     !BB_HAS_FALLTHROUGH(target));
1234
125k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
10.6k
        assert(is_jump(last));
1236
10.6k
        int removed_jump_opcode = last->i_opcode;
1237
10.6k
        INSTR_SET_OP0(last, NOP);
1238
10.6k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
10.6k
        if (no_lineno_no_fallthrough) {
1240
9.61k
            last = basicblock_last_instr(bb);
1241
9.61k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
3.84k
                removed_jump_opcode == JUMP)
1243
156
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
156
                last->i_opcode = JUMP;
1246
156
            }
1247
9.61k
        }
1248
10.6k
        target->b_predecessors--;
1249
10.6k
        return 1;
1250
10.6k
    }
1251
115k
    return 0;
1252
125k
}
1253
1254
static int
1255
52.1k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
52.1k
    bool changes;
1257
54.8k
    do {
1258
54.8k
        changes = false;
1259
687k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
633k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
633k
            RETURN_IF_ERROR(res);
1262
633k
            if (res) {
1263
10.6k
                changes = true;
1264
10.6k
            }
1265
633k
        }
1266
54.8k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
52.1k
    return changes;
1268
52.1k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
7.49k
{
1276
7.49k
    assert(is_jump(inst));
1277
7.49k
    assert(is_jump(target));
1278
7.49k
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
7.49k
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
7.49k
        INSTR_SET_OP0(inst, NOP);
1286
1287
7.49k
        RETURN_IF_ERROR(
1288
7.49k
            basicblock_add_jump(
1289
7.49k
                bb, opcode, target->i_target, target->i_loc));
1290
1291
7.49k
        return true;
1292
7.49k
    }
1293
0
    return false;
1294
7.49k
}
1295
1296
static int
1297
loads_const(int opcode)
1298
4.87M
{
1299
4.87M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
4.87M
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
2.20M
{
1306
2.20M
    PyObject *constant = NULL;
1307
2.20M
    assert(loads_const(opcode));
1308
2.20M
    if (opcode == LOAD_CONST) {
1309
1.83M
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
1.83M
    }
1311
2.20M
    if (opcode == LOAD_SMALL_INT) {
1312
374k
        return PyLong_FromLong(oparg);
1313
374k
    }
1314
1315
1.83M
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
1.83M
    return Py_NewRef(constant);
1321
1.83M
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
531k
{
1327
531k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
531k
    Py_ssize_t index;
1333
43.2M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
86.2M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
415k
            break;
1336
415k
        }
1337
43.1M
    }
1338
531k
    if (index == PyList_GET_SIZE(consts)) {
1339
116k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
116k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
116k
    }
1349
531k
    Py_DECREF(newconst);
1350
531k
    return (int)index;
1351
531k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
910k
{
1363
910k
    assert(start < bb->b_iused);
1364
910k
    assert(size >= 0);
1365
910k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
2.97M
    for (; start >= 0 && size > 0; start--) {
1368
2.33M
        cfg_instr *instr = &bb->b_instr[start];
1369
2.33M
        if (instr->i_opcode == NOP) {
1370
845k
            continue;
1371
845k
        }
1372
1.48M
        if (!loads_const(instr->i_opcode)) {
1373
271k
            return false;
1374
271k
        }
1375
1.21M
        instrs[--size] = instr;
1376
1.21M
    }
1377
1378
639k
    return size == 0;
1379
910k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
608k
{
1388
1.65M
    for (int i = 0; i < size; i++) {
1389
1.04M
        cfg_instr *instr = instrs[i];
1390
1.04M
        assert(instr->i_opcode != NOP);
1391
1.04M
        INSTR_SET_OP0(instr, NOP);
1392
1.04M
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
1.04M
    }
1394
608k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
1.62M
{
1405
1.62M
    if (PyLong_CheckExact(newconst)) {
1406
948k
        int overflow;
1407
948k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
948k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
948k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
478k
            assert(_Py_IsImmortal(newconst));
1413
478k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
478k
            return 1;
1415
478k
        }
1416
948k
    }
1417
1.14M
    return 0;
1418
1.62M
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
576k
{
1426
576k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
576k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
576k
    if (res > 0) {
1432
54.8k
        return SUCCESS;
1433
54.8k
    }
1434
521k
    int oparg = add_const(newconst, consts, const_cache);
1435
521k
    RETURN_IF_ERROR(oparg);
1436
521k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
521k
    return SUCCESS;
1438
521k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
79.9k
{
1449
    /* Pre-conditions */
1450
79.9k
    assert(PyDict_CheckExact(const_cache));
1451
79.9k
    assert(PyList_CheckExact(consts));
1452
1453
79.9k
    cfg_instr *instr = &bb->b_instr[i];
1454
79.9k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
79.9k
    int seq_size = instr->i_oparg;
1457
79.9k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
23
        return SUCCESS;
1459
23
    }
1460
1461
79.9k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
79.9k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
38.8k
        return SUCCESS;
1465
38.8k
    }
1466
1467
41.0k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
41.0k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
104k
    for (int i = 0; i < seq_size; i++) {
1473
63.8k
        cfg_instr *inst = const_instrs[i];
1474
63.8k
        assert(loads_const(inst->i_opcode));
1475
63.8k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
63.8k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
63.8k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
63.8k
    }
1482
1483
41.0k
    nop_out(const_instrs, seq_size);
1484
41.0k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
41.0k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
7.58k
{
1504
7.58k
    assert(PyDict_CheckExact(const_cache));
1505
7.58k
    assert(PyList_CheckExact(consts));
1506
7.58k
    assert(i >= 0);
1507
7.58k
    assert(i < bb->b_iused);
1508
1509
7.58k
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
7.58k
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
7.58k
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
7.58k
    int consts_found = 0;
1514
7.58k
    bool expect_append = true;
1515
1516
43.0k
    for (int pos = i - 1; pos >= 0; pos--) {
1517
42.8k
        cfg_instr *instr = &bb->b_instr[pos];
1518
42.8k
        int opcode = instr->i_opcode;
1519
42.8k
        int oparg = instr->i_oparg;
1520
1521
42.8k
        if (opcode == NOP) {
1522
2.90k
            continue;
1523
2.90k
        }
1524
1525
39.9k
        if (opcode == BUILD_LIST && oparg == 0) {
1526
190
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
12
                return SUCCESS;
1529
12
            }
1530
1531
            /* Sequence start, we are done. */
1532
178
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
178
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
27.9k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
27.7k
                instr = &bb->b_instr[newpos];
1539
27.7k
                if (instr->i_opcode == NOP) {
1540
901
                    continue;
1541
901
                }
1542
26.8k
                if (loads_const(instr->i_opcode)) {
1543
13.3k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
13.3k
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
13.3k
                    assert(consts_found > 0);
1549
13.3k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
13.3k
                }
1551
26.8k
                nop_out(&instr, 1);
1552
26.8k
            }
1553
178
            assert(consts_found == 0);
1554
178
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
178
        }
1556
1557
39.7k
        if (expect_append) {
1558
21.9k
            if (opcode != LIST_APPEND || oparg != 1) {
1559
3.97k
                return SUCCESS;
1560
3.97k
            }
1561
21.9k
        }
1562
17.7k
        else {
1563
17.7k
            if (!loads_const(opcode)) {
1564
3.17k
                return SUCCESS;
1565
3.17k
            }
1566
14.5k
            consts_found++;
1567
14.5k
        }
1568
1569
32.5k
        expect_append = !expect_append;
1570
32.5k
    }
1571
1572
    /* Did not find sequence start. */
1573
244
    return SUCCESS;
1574
7.58k
}
1575
1576
50.9k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
25.4k
{
1592
25.4k
    assert(PyDict_CheckExact(const_cache));
1593
25.4k
    assert(PyList_CheckExact(consts));
1594
1595
25.4k
    cfg_instr *instr = &bb->b_instr[i];
1596
25.4k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
25.4k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
25.4k
    int seq_size = instr->i_oparg;
1600
25.4k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
25.4k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
19.5k
    {
1603
19.5k
        return SUCCESS;
1604
19.5k
    }
1605
1606
5.94k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
5.94k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
559
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
30
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
30
        }
1612
559
        return SUCCESS;
1613
559
    }
1614
1615
5.38k
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
5.38k
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
49.2k
    for (int i = 0; i < seq_size; i++) {
1621
43.8k
        cfg_instr *inst = const_instrs[i];
1622
43.8k
        assert(loads_const(inst->i_opcode));
1623
43.8k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
43.8k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
43.8k
        PyTuple_SET_ITEM(const_result, i, element);
1629
43.8k
    }
1630
1631
5.38k
    if (instr->i_opcode == BUILD_SET) {
1632
4.91k
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
4.91k
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
4.91k
        Py_SETREF(const_result, frozenset);
1638
4.91k
    }
1639
1640
5.38k
    int index = add_const(const_result, consts, const_cache);
1641
5.38k
    RETURN_IF_ERROR(index);
1642
5.38k
    nop_out(const_instrs, seq_size);
1643
1644
5.38k
    if (contains_or_iter) {
1645
43
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
43
    }
1647
5.34k
    else {
1648
5.34k
        assert(i >= 2);
1649
5.34k
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
5.34k
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
5.34k
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
5.34k
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
5.34k
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
5.34k
    }
1657
5.38k
    return SUCCESS;
1658
5.38k
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
131k
{
1667
131k
    if (PyTuple_Check(obj)) {
1668
74.3k
        Py_ssize_t i;
1669
74.3k
        limit -= PyTuple_GET_SIZE(obj);
1670
180k
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
106k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
106k
            if (limit < 0) {
1673
101
                return limit;
1674
101
            }
1675
106k
        }
1676
74.3k
    }
1677
131k
    return limit;
1678
131k
}
1679
1680
68.1k
#define MAX_INT_SIZE           128  /* bits */
1681
27.0k
#define MAX_COLLECTION_SIZE    256  /* items */
1682
11.6k
#define MAX_STR_SIZE          4096  /* characters */
1683
25.0k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
146k
{
1688
146k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
19.9k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
146k
    ) {
1691
16.4k
        int64_t vbits = _PyLong_NumBits(v);
1692
16.4k
        int64_t wbits = _PyLong_NumBits(w);
1693
16.4k
        assert(vbits >= 0);
1694
16.4k
        assert(wbits >= 0);
1695
16.4k
        if (vbits + wbits > MAX_INT_SIZE) {
1696
257
            return NULL;
1697
257
        }
1698
16.4k
    }
1699
129k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
27.4k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
27.4k
        if (size) {
1702
27.1k
            long n = PyLong_AsLong(v);
1703
27.1k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
249
                return NULL;
1705
249
            }
1706
26.9k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
23
                return NULL;
1708
23
            }
1709
26.9k
        }
1710
27.4k
    }
1711
102k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
26.6k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
26.6k
                                               PyBytes_GET_SIZE(w);
1714
26.6k
        if (size) {
1715
12.6k
            long n = PyLong_AsLong(v);
1716
12.6k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
1.60k
                return NULL;
1718
1.60k
            }
1719
12.6k
        }
1720
26.6k
    }
1721
75.6k
    else if (PyLong_Check(w) &&
1722
64.2k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
51.6k
    {
1724
51.6k
        return const_folding_safe_multiply(w, v);
1725
51.6k
    }
1726
1727
92.5k
    return PyNumber_Multiply(v, w);
1728
146k
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
62.7k
{
1733
62.7k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
38.5k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
62.7k
    ) {
1736
26.6k
        int64_t vbits = _PyLong_NumBits(v);
1737
26.6k
        size_t wbits = PyLong_AsSize_t(w);
1738
26.6k
        assert(vbits >= 0);
1739
26.6k
        if (wbits == (size_t)-1) {
1740
280
            return NULL;
1741
280
        }
1742
26.4k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
2.82k
            return NULL;
1744
2.82k
        }
1745
26.4k
    }
1746
1747
59.6k
    return PyNumber_Power(v, w, Py_None);
1748
62.7k
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
12.7k
{
1753
12.7k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
12.2k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
12.7k
    ) {
1756
9.71k
        int64_t vbits = _PyLong_NumBits(v);
1757
9.71k
        size_t wbits = PyLong_AsSize_t(w);
1758
9.71k
        assert(vbits >= 0);
1759
9.71k
        if (wbits == (size_t)-1) {
1760
602
            return NULL;
1761
602
        }
1762
9.11k
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
2.43k
            return NULL;
1764
2.43k
        }
1765
9.11k
    }
1766
1767
9.70k
    return PyNumber_Lshift(v, w);
1768
12.7k
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
28.1k
{
1773
28.1k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
1.52k
        return NULL;
1775
1.52k
    }
1776
1777
26.5k
    return PyNumber_Remainder(v, w);
1778
28.1k
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
435k
{
1783
435k
    assert(left != NULL && right != NULL);
1784
435k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
435k
    PyObject *result = NULL;
1787
435k
    switch (op) {
1788
41.0k
        case NB_ADD:
1789
41.0k
            result = PyNumber_Add(left, right);
1790
41.0k
            break;
1791
44.8k
        case NB_SUBTRACT:
1792
44.8k
            result = PyNumber_Subtract(left, right);
1793
44.8k
            break;
1794
94.6k
        case NB_MULTIPLY:
1795
94.6k
            result = const_folding_safe_multiply(left, right);
1796
94.6k
            break;
1797
73.8k
        case NB_TRUE_DIVIDE:
1798
73.8k
            result = PyNumber_TrueDivide(left, right);
1799
73.8k
            break;
1800
24.0k
        case NB_FLOOR_DIVIDE:
1801
24.0k
            result = PyNumber_FloorDivide(left, right);
1802
24.0k
            break;
1803
28.1k
        case NB_REMAINDER:
1804
28.1k
            result = const_folding_safe_mod(left, right);
1805
28.1k
            break;
1806
62.7k
        case NB_POWER:
1807
62.7k
            result = const_folding_safe_power(left, right);
1808
62.7k
            break;
1809
12.7k
        case NB_LSHIFT:
1810
12.7k
            result = const_folding_safe_lshift(left, right);
1811
12.7k
            break;
1812
16.5k
        case NB_RSHIFT:
1813
16.5k
            result = PyNumber_Rshift(left, right);
1814
16.5k
            break;
1815
5.28k
        case NB_OR:
1816
5.28k
            result = PyNumber_Or(left, right);
1817
5.28k
            break;
1818
7.53k
        case NB_XOR:
1819
7.53k
            result = PyNumber_Xor(left, right);
1820
7.53k
            break;
1821
8.18k
        case NB_AND:
1822
8.18k
            result = PyNumber_And(left, right);
1823
8.18k
            break;
1824
15.6k
        case NB_SUBSCR:
1825
15.6k
            result = PyObject_GetItem(left, right);
1826
15.6k
            break;
1827
604
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
604
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
435k
    }
1833
435k
    return result;
1834
435k
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
619k
{
1839
999k
    #define BINOP_OPERAND_COUNT 2
1840
619k
    assert(PyDict_CheckExact(const_cache));
1841
619k
    assert(PyList_CheckExact(consts));
1842
1843
619k
    cfg_instr *binop = &bb->b_instr[i];
1844
619k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
619k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
619k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
183k
        return SUCCESS;
1850
183k
    }
1851
1852
435k
    cfg_instr *lhs_instr = operands_instrs[0];
1853
435k
    assert(loads_const(lhs_instr->i_opcode));
1854
435k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
435k
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
435k
    cfg_instr *rhs_instr = operands_instrs[1];
1860
435k
    assert(loads_const(rhs_instr->i_opcode));
1861
435k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
435k
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
435k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
435k
    Py_DECREF(lhs);
1869
435k
    Py_DECREF(rhs);
1870
435k
    if (newconst == NULL) {
1871
55.9k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
55.9k
        PyErr_Clear();
1875
55.9k
        return SUCCESS;
1876
55.9k
    }
1877
1878
379k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
379k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
435k
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
156k
{
1885
156k
    assert(operand != NULL);
1886
156k
    assert(
1887
156k
        opcode == UNARY_NEGATIVE ||
1888
156k
        opcode == UNARY_INVERT ||
1889
156k
        opcode == UNARY_NOT ||
1890
156k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
156k
    );
1892
156k
    PyObject *result;
1893
156k
    switch (opcode) {
1894
103k
        case UNARY_NEGATIVE:
1895
103k
            result = PyNumber_Negative(operand);
1896
103k
            break;
1897
16.2k
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
16.2k
            if (PyBool_Check(operand)) {
1900
67
                return NULL;
1901
67
            }
1902
16.1k
            result = PyNumber_Invert(operand);
1903
16.1k
            break;
1904
665
        case UNARY_NOT: {
1905
665
            int r = PyObject_IsTrue(operand);
1906
665
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
665
            result = PyBool_FromLong(!r);
1910
665
            break;
1911
665
        }
1912
36.3k
        case CALL_INTRINSIC_1:
1913
36.3k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
36.3k
            result = PyNumber_Positive(operand);
1917
36.3k
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
156k
    }
1921
156k
    return result;
1922
156k
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
205k
{
1927
360k
    #define UNARYOP_OPERAND_COUNT 1
1928
205k
    assert(PyDict_CheckExact(const_cache));
1929
205k
    assert(PyList_CheckExact(consts));
1930
205k
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
205k
    cfg_instr *operand_instr;
1933
205k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
48.7k
        return SUCCESS;
1936
48.7k
    }
1937
1938
205k
    assert(loads_const(operand_instr->i_opcode));
1939
156k
    PyObject *operand = get_const_value(
1940
156k
        operand_instr->i_opcode,
1941
156k
        operand_instr->i_oparg,
1942
156k
        consts
1943
156k
    );
1944
156k
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
156k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
156k
    Py_DECREF(operand);
1950
156k
    if (newconst == NULL) {
1951
1.20k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
1.20k
        PyErr_Clear();
1955
1.20k
        return SUCCESS;
1956
1.20k
    }
1957
1958
155k
    if (unaryop->i_opcode == UNARY_NOT) {
1959
665
        assert(PyBool_Check(newconst));
1960
665
    }
1961
155k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
155k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
155k
}
1964
1965
139k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
77.3k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
77.3k
    assert(*ix < block->b_iused);
1975
77.3k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
77.3k
    assert(instructions[0].i_opcode == SWAP);
1979
77.3k
    int depth = instructions[0].i_oparg;
1980
77.3k
    int len = 0;
1981
77.3k
    int more = false;
1982
77.3k
    int limit = block->b_iused - *ix;
1983
92.7k
    while (++len < limit) {
1984
92.7k
        int opcode = instructions[len].i_opcode;
1985
92.7k
        if (opcode == SWAP) {
1986
14.3k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
14.3k
            more = true;
1988
14.3k
        }
1989
78.4k
        else if (opcode != NOP) {
1990
77.3k
            break;
1991
77.3k
        }
1992
92.7k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
77.3k
    if (!more) {
1995
66.7k
        return SUCCESS;
1996
66.7k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
10.6k
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
10.6k
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
43.8k
    for (int i = 0; i < depth; i++) {
2004
33.1k
        stack[i] = i;
2005
33.1k
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
36.6k
    for (int i = 0; i < len; i++) {
2009
26.0k
        if (instructions[i].i_opcode == SWAP) {
2010
24.9k
            int oparg = instructions[i].i_oparg;
2011
24.9k
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
24.9k
            stack[0] = stack[oparg - 1];
2014
24.9k
            stack[oparg - 1] = top;
2015
24.9k
        }
2016
26.0k
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
10.6k
    int current = len - 1;
2025
43.8k
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
33.1k
        if (stack[i] == VISITED || stack[i] == i) {
2029
23.2k
            continue;
2030
23.2k
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
9.91k
        int j = i;
2037
41.5k
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
41.5k
            if (j) {
2041
21.8k
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
21.8k
                instructions[current].i_opcode = SWAP;
2044
21.8k
                instructions[current--].i_oparg = j + 1;
2045
21.8k
            }
2046
41.5k
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
9.91k
                assert(j == i);
2049
9.91k
                break;
2050
9.91k
            }
2051
31.6k
            int next_j = stack[j];
2052
31.6k
            stack[j] = VISITED;
2053
31.6k
            j = next_j;
2054
31.6k
        }
2055
9.91k
    }
2056
    // NOP out any unused instructions:
2057
14.8k
    while (0 <= current) {
2058
4.23k
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
4.23k
    }
2060
10.6k
    PyMem_Free(stack);
2061
10.6k
    *ix += len - 1;
2062
10.6k
    return SUCCESS;
2063
10.6k
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
104k
    ((opcode) == STORE_FAST || \
2072
104k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
104k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
2.92k
    (((instr).i_opcode == STORE_FAST || \
2077
2.92k
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
2.92k
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
107k
{
2083
107k
    while (++i < block->b_iused) {
2084
103k
        cfg_instr *instruction = &block->b_instr[i];
2085
103k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
84
            return -1;
2089
84
        }
2090
103k
        if (instruction->i_opcode == NOP) {
2091
330
            continue;
2092
330
        }
2093
102k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
32.0k
            return i;
2095
32.0k
        }
2096
70.7k
        return -1;
2097
102k
    }
2098
4.76k
    return -1;
2099
107k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
77.3k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
80.9k
    for (; 0 <= i; i--) {
2109
80.8k
        assert(i < block->b_iused);
2110
80.8k
        cfg_instr *swap = &block->b_instr[i];
2111
80.8k
        if (swap->i_opcode != SWAP) {
2112
4.10k
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
2.74k
                continue;
2115
2.74k
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
1.36k
            return;
2118
4.10k
        }
2119
76.6k
        int j = next_swappable_instruction(block, i, -1);
2120
76.6k
        if (j < 0) {
2121
60.6k
            return;
2122
60.6k
        }
2123
16.0k
        int k = j;
2124
16.0k
        int lineno = block->b_instr[j].i_loc.lineno;
2125
32.0k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
30.9k
            k = next_swappable_instruction(block, k, lineno);
2127
30.9k
            if (k < 0) {
2128
15.0k
                return;
2129
15.0k
            }
2130
30.9k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
1.05k
        int store_j = STORES_TO(block->b_instr[j]);
2135
1.05k
        int store_k = STORES_TO(block->b_instr[k]);
2136
1.05k
        if (store_j >= 0 || store_k >= 0) {
2137
1.03k
            if (store_j == store_k) {
2138
217
                return;
2139
217
            }
2140
1.64k
            for (int idx = j + 1; idx < k; idx++) {
2141
819
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
819
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
819
            }
2146
821
        }
2147
2148
        // Success!
2149
835
        INSTR_SET_OP0(swap, NOP);
2150
835
        cfg_instr temp = block->b_instr[j];
2151
835
        block->b_instr[j] = block->b_instr[k];
2152
835
        block->b_instr[k] = temp;
2153
835
    }
2154
77.3k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
386k
{
2159
386k
    assert(PyDict_CheckExact(const_cache));
2160
386k
    assert(PyList_CheckExact(consts));
2161
386k
    int opcode = 0;
2162
386k
    int oparg = 0;
2163
4.54M
    for (int i = 0; i < bb->b_iused; i++) {
2164
4.16M
        cfg_instr *inst = &bb->b_instr[i];
2165
4.16M
        if (inst->i_opcode == LOAD_CONST) {
2166
1.04M
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
1.04M
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
1.04M
            Py_DECREF(constant);
2169
1.04M
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
1.04M
        }
2173
4.16M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
605k
                                      inst->i_opcode == COPY &&
2175
111
                                      inst->i_oparg == 1);
2176
4.16M
        if (! is_copy_of_load_const) {
2177
4.16M
            opcode = inst->i_opcode;
2178
4.16M
            oparg = inst->i_oparg;
2179
4.16M
        }
2180
4.16M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
4.16M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
3.11M
            continue;
2183
3.11M
        }
2184
1.04M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
1.04M
        switch(nextop) {
2186
2.11k
            case POP_JUMP_IF_FALSE:
2187
3.61k
            case POP_JUMP_IF_TRUE:
2188
5.72k
            case JUMP_IF_FALSE:
2189
6.58k
            case JUMP_IF_TRUE:
2190
6.58k
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
6.58k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
6.58k
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
6.58k
                int is_true = PyObject_IsTrue(cnt);
2197
6.58k
                Py_DECREF(cnt);
2198
6.58k
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
6.58k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
3.61k
                    INSTR_SET_OP0(inst, NOP);
2204
3.61k
                }
2205
6.58k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
6.58k
                if (is_true == jump_if_true) {
2207
2.49k
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
2.49k
                }
2209
4.09k
                else {
2210
4.09k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
4.09k
                }
2212
6.58k
                break;
2213
6.58k
            }
2214
6.58k
            case IS_OP:
2215
2.22k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
2.22k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
2.22k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
2.22k
                if (!Py_IsNone(cnt)) {
2231
1.12k
                    Py_DECREF(cnt);
2232
1.12k
                    break;
2233
1.12k
                }
2234
1.10k
                if (bb->b_iused <= i + 2) {
2235
0
                    break;
2236
0
                }
2237
1.10k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.10k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.10k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
0
                    INSTR_SET_OP0(jump_instr, NOP);
2242
0
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
0
                    jump_instr = &bb->b_instr[i + 3];
2246
0
                }
2247
1.10k
                bool invert = is_instr->i_oparg;
2248
1.10k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.10k
                    invert = !invert;
2250
1.10k
                }
2251
0
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
0
                    break;
2253
0
                }
2254
1.10k
                INSTR_SET_OP0(inst, NOP);
2255
1.10k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.10k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.10k
                                              : POP_JUMP_IF_NONE;
2258
1.10k
                break;
2259
1.10k
            }
2260
4.27k
            case TO_BOOL:
2261
4.27k
            {
2262
4.27k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
4.27k
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
4.27k
                int is_true = PyObject_IsTrue(cnt);
2267
4.27k
                Py_DECREF(cnt);
2268
4.27k
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
4.27k
                cnt = PyBool_FromLong(is_true);
2272
4.27k
                int index = add_const(cnt, consts, const_cache);
2273
4.27k
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
4.27k
                INSTR_SET_OP0(inst, NOP);
2277
4.27k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
4.27k
                break;
2279
4.27k
            }
2280
1.04M
        }
2281
1.04M
    }
2282
386k
    return SUCCESS;
2283
386k
}
2284
2285
static int
2286
52.1k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
438k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
386k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
386k
    }
2290
52.1k
    return SUCCESS;
2291
52.1k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
386k
{
2296
386k
    assert(PyDict_CheckExact(const_cache));
2297
386k
    assert(PyList_CheckExact(consts));
2298
386k
    cfg_instr nop;
2299
386k
    INSTR_SET_OP0(&nop, NOP);
2300
4.56M
    for (int i = 0; i < bb->b_iused; i++) {
2301
4.17M
        cfg_instr *inst = &bb->b_instr[i];
2302
4.17M
        cfg_instr *target;
2303
4.17M
        int opcode = inst->i_opcode;
2304
4.17M
        int oparg = inst->i_oparg;
2305
4.17M
        if (HAS_TARGET(opcode)) {
2306
240k
            assert(inst->i_target->b_iused > 0);
2307
240k
            target = &inst->i_target->b_instr[0];
2308
240k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
240k
        }
2310
3.93M
        else {
2311
3.93M
            target = &nop;
2312
3.93M
        }
2313
4.17M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
4.17M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
4.17M
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
81.0k
            case BUILD_TUPLE:
2321
81.0k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
1.18k
                    switch(oparg) {
2323
7
                        case 1:
2324
7
                            INSTR_SET_OP0(inst, NOP);
2325
7
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
7
                            continue;
2327
398
                        case 2:
2328
1.15k
                        case 3:
2329
1.15k
                            INSTR_SET_OP0(inst, NOP);
2330
1.15k
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
1.15k
                            continue;
2332
1.18k
                    }
2333
1.18k
                }
2334
79.9k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
79.9k
                break;
2336
15.1k
            case BUILD_LIST:
2337
25.4k
            case BUILD_SET:
2338
25.4k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
25.4k
                break;
2340
1.45k
            case POP_JUMP_IF_NOT_NONE:
2341
2.91k
            case POP_JUMP_IF_NONE:
2342
2.91k
                switch (target->i_opcode) {
2343
0
                    case JUMP:
2344
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
2.91k
                }
2346
2.91k
                break;
2347
85.0k
            case POP_JUMP_IF_FALSE:
2348
85.0k
                switch (target->i_opcode) {
2349
1.42k
                    case JUMP:
2350
1.42k
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
85.0k
                }
2352
85.0k
                break;
2353
85.0k
            case POP_JUMP_IF_TRUE:
2354
10.1k
                switch (target->i_opcode) {
2355
10
                    case JUMP:
2356
10
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
10.1k
                }
2358
10.1k
                break;
2359
10.1k
            case JUMP_IF_FALSE:
2360
2.37k
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
11
                    case JUMP_IF_FALSE:
2363
11
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
11
                        continue;
2365
591
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
591
                        assert(inst->i_target != inst->i_target->b_next);
2369
591
                        inst->i_target = inst->i_target->b_next;
2370
591
                        i--;
2371
591
                        continue;
2372
2.37k
                }
2373
1.77k
                break;
2374
6.61k
            case JUMP_IF_TRUE:
2375
6.61k
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
5.68k
                    case JUMP_IF_TRUE:
2378
5.68k
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
5.68k
                        continue;
2380
0
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
0
                        assert(inst->i_target != inst->i_target->b_next);
2384
0
                        inst->i_target = inst->i_target->b_next;
2385
0
                        i--;
2386
0
                        continue;
2387
6.61k
                }
2388
934
                break;
2389
20.1k
            case JUMP:
2390
62.0k
            case JUMP_NO_INTERRUPT:
2391
62.0k
                switch (target->i_opcode) {
2392
335
                    case JUMP:
2393
335
                        i -= jump_thread(bb, inst, target, JUMP);
2394
335
                        continue;
2395
32
                    case JUMP_NO_INTERRUPT:
2396
32
                        i -= jump_thread(bb, inst, target, opcode);
2397
32
                        continue;
2398
62.0k
                }
2399
61.6k
                break;
2400
61.6k
            case FOR_ITER:
2401
4.65k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
4.65k
                break;
2412
82.7k
            case STORE_FAST:
2413
82.7k
                if (opcode == nextop &&
2414
27.2k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
1.87k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
1.45k
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
1.45k
                    bb->b_instr[i].i_oparg = 0;
2418
1.45k
                }
2419
82.7k
                break;
2420
91.6k
            case SWAP:
2421
91.6k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
91.6k
                break;
2425
94.3k
            case LOAD_GLOBAL:
2426
94.3k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
1.73k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
1.73k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
1.73k
                }
2430
94.3k
                break;
2431
94.3k
            case COMPARE_OP:
2432
86.1k
                if (nextop == TO_BOOL) {
2433
15.8k
                    INSTR_SET_OP0(inst, NOP);
2434
15.8k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
15.8k
                    continue;
2436
15.8k
                }
2437
70.2k
                break;
2438
70.2k
            case CONTAINS_OP:
2439
21.7k
            case IS_OP:
2440
21.7k
                if (nextop == TO_BOOL) {
2441
3.27k
                    INSTR_SET_OP0(inst, NOP);
2442
3.27k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
3.27k
                    continue;
2444
3.27k
                }
2445
18.4k
                if (nextop == UNARY_NOT) {
2446
354
                    INSTR_SET_OP0(inst, NOP);
2447
354
                    int inverted = oparg ^ 1;
2448
354
                    assert(inverted == 0 || inverted == 1);
2449
354
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
354
                    continue;
2451
354
                }
2452
18.0k
                break;
2453
50.8k
            case TO_BOOL:
2454
50.8k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
50.8k
                break;
2459
50.8k
            case UNARY_NOT:
2460
1.11k
                if (nextop == TO_BOOL) {
2461
112
                    INSTR_SET_OP0(inst, NOP);
2462
112
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
112
                    continue;
2464
112
                }
2465
1.00k
                if (nextop == UNARY_NOT) {
2466
112
                    INSTR_SET_OP0(inst, NOP);
2467
112
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
112
                    continue;
2469
112
                }
2470
894
                _Py_FALLTHROUGH;
2471
19.4k
            case UNARY_INVERT:
2472
138k
            case UNARY_NEGATIVE:
2473
138k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
138k
                break;
2475
105k
            case CALL_INTRINSIC_1:
2476
105k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
7.62k
                    if (nextop == GET_ITER) {
2478
36
                        INSTR_SET_OP0(inst, NOP);
2479
36
                    }
2480
7.58k
                    else {
2481
7.58k
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
7.58k
                    }
2483
7.62k
                }
2484
97.8k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
67.0k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
67.0k
                }
2487
105k
                break;
2488
619k
            case BINARY_OP:
2489
619k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
619k
                break;
2491
4.17M
        }
2492
4.17M
    }
2493
2494
4.54M
    for (int i = 0; i < bb->b_iused; i++) {
2495
4.15M
        cfg_instr *inst = &bb->b_instr[i];
2496
4.15M
        if (inst->i_opcode == SWAP) {
2497
77.3k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
77.3k
            apply_static_swaps(bb, i);
2501
77.3k
        }
2502
4.15M
    }
2503
386k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
386k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
109k
{
2513
109k
    int removed_nops, removed_jumps;
2514
116k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
116k
        removed_nops = remove_redundant_nops(g);
2519
116k
        RETURN_IF_ERROR(removed_nops);
2520
116k
        removed_jumps = remove_redundant_jumps(g);
2521
116k
        RETURN_IF_ERROR(removed_jumps);
2522
116k
    } while(removed_nops + removed_jumps > 0);
2523
109k
    return SUCCESS;
2524
109k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
52.1k
{
2536
52.1k
    assert(PyDict_CheckExact(const_cache));
2537
52.1k
    RETURN_IF_ERROR(check_cfg(g));
2538
52.1k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
52.1k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
52.1k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
52.1k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
438k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
386k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
386k
    }
2545
52.1k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
52.1k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
52.1k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
52.1k
    assert(no_redundant_jumps(g));
2549
52.1k
    return SUCCESS;
2550
52.1k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
30.0k
{
2555
30.0k
    int32_t line1 = inst1->i_loc.lineno;
2556
30.0k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
30.0k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
560
        return;
2560
560
    }
2561
29.4k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
14.3k
        return;
2563
14.3k
    }
2564
15.1k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
15.1k
    INSTR_SET_OP0(inst2, NOP);
2566
15.1k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
52.1k
{
2571
438k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
3.35M
        for (int i = 0; i < b->b_iused; i++) {
2574
2.96M
            cfg_instr *inst = &b->b_instr[i];
2575
2.96M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
2.96M
            switch(inst->i_opcode) {
2577
17.3k
                case LOAD_FAST:
2578
17.3k
                    if (nextop == LOAD_FAST) {
2579
1.73k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
1.73k
                    }
2581
17.3k
                    break;
2582
73.2k
                case STORE_FAST:
2583
73.2k
                    switch (nextop) {
2584
7.31k
                        case LOAD_FAST:
2585
7.31k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
7.31k
                            break;
2587
20.9k
                        case STORE_FAST:
2588
20.9k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
20.9k
                            break;
2590
73.2k
                    }
2591
73.2k
                    break;
2592
2.96M
            }
2593
2.96M
        }
2594
386k
    }
2595
52.1k
    int res = remove_redundant_nops(g);
2596
52.1k
    assert(no_redundant_nops(g));
2597
52.1k
    return res;
2598
52.1k
}
2599
2600
#define NOT_LOCAL -1
2601
88.0k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
2.98M
{
2620
2.98M
    if (stack->size == stack->capacity) {
2621
52.4k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
52.4k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
52.4k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
52.4k
        stack->refs = refs;
2628
52.4k
        stack->capacity = new_cap;
2629
52.4k
    }
2630
2.98M
    stack->refs[stack->size] = r;
2631
2.98M
    stack->size++;
2632
2.98M
    return 0;
2633
2.98M
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
1.71M
{
2638
1.71M
    assert(stack->size > 0);
2639
1.71M
    stack->size--;
2640
1.71M
    ref r = stack->refs[stack->size];
2641
1.71M
    return r;
2642
1.71M
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
62.8k
{
2647
62.8k
    Py_ssize_t idx = stack->size - off;
2648
62.8k
    assert(idx >= 0 && idx < stack->size);
2649
62.8k
    ref tmp = stack->refs[idx];
2650
62.8k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
62.8k
    stack->refs[stack->size - 1] = tmp;
2652
62.8k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
5.39M
{
2657
5.39M
    assert(idx >= 0 && idx < stack->size);
2658
5.39M
    return stack->refs[idx];
2659
5.39M
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
226k
{
2664
226k
    stack->size = 0;
2665
226k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
52.1k
{
2670
52.1k
    if (stack->refs != NULL) {
2671
52.1k
        PyMem_Free(stack->refs);
2672
52.1k
    }
2673
52.1k
    stack->refs = NULL;
2674
52.1k
    stack->capacity = 0;
2675
52.1k
    stack->size = 0;
2676
52.1k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
99.3k
{
2690
4.08M
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
3.98M
        ref r = ref_stack_at(refs, i);
2692
3.98M
        if (r.local == local) {
2693
986
            assert(r.instr >= 0);
2694
986
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
986
        }
2696
3.98M
    }
2697
99.3k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
88.0k
{
2702
88.0k
    kill_local(instr_flags, refs, local);
2703
88.0k
    if (r.instr != DUMMY_INSTR) {
2704
73.8k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
73.8k
    }
2706
88.0k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
245k
{
2712
245k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
245k
    if (!target->b_visited) {
2714
174k
        target->b_visited = 1;
2715
174k
        *(*sp)++ = target;
2716
174k
    }
2717
245k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
52.1k
{
2759
52.1k
    int status;
2760
52.1k
    ref_stack refs = {0};
2761
52.1k
    int max_instrs = 0;
2762
52.1k
    basicblock *entryblock = g->g_entryblock;
2763
448k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
396k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
396k
    }
2766
52.1k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
52.1k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
52.1k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
52.1k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
52.1k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
52.1k
    basicblock **sp = blocks;
2778
52.1k
    *sp = entryblock;
2779
52.1k
    sp++;
2780
52.1k
    entryblock->b_startdepth = 0;
2781
52.1k
    entryblock->b_visited = 1;
2782
2783
52.1k
    #define PUSH_REF(instr, local)                \
2784
2.98M
        do {                                      \
2785
2.98M
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
2.98M
        } while(0)
2790
2791
278k
    while (sp != blocks) {
2792
226k
        basicblock *block = *--sp;
2793
226k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
226k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
226k
        ref_stack_clear(&refs);
2802
1.45M
        for (int i = 0; i < block->b_startdepth; i++) {
2803
1.23M
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
1.23M
        }
2805
2806
2.67M
        for (int i = 0; i < block->b_iused; i++) {
2807
2.44M
            cfg_instr *instr = &block->b_instr[i];
2808
2.44M
            int opcode = instr->i_opcode;
2809
2.44M
            int oparg = instr->i_oparg;
2810
2.44M
            assert(opcode != EXTENDED_ARG);
2811
2.44M
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
2.98k
                case DELETE_FAST: {
2814
2.98k
                    kill_local(instr_flags, &refs, oparg);
2815
2.98k
                    break;
2816
0
                }
2817
2818
38.3k
                case LOAD_FAST: {
2819
38.3k
                    PUSH_REF(i, oparg);
2820
38.3k
                    break;
2821
38.3k
                }
2822
2823
38.3k
                case LOAD_FAST_AND_CLEAR: {
2824
8.34k
                    kill_local(instr_flags, &refs, oparg);
2825
8.34k
                    PUSH_REF(i, oparg);
2826
8.34k
                    break;
2827
8.34k
                }
2828
2829
8.34k
                case LOAD_FAST_LOAD_FAST: {
2830
698
                    PUSH_REF(i, oparg >> 4);
2831
698
                    PUSH_REF(i, oparg & 15);
2832
698
                    break;
2833
698
                }
2834
2835
65.6k
                case STORE_FAST: {
2836
65.6k
                    ref r = ref_stack_pop(&refs);
2837
65.6k
                    store_local(instr_flags, &refs, oparg, r);
2838
65.6k
                    break;
2839
698
                }
2840
2841
6.37k
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
6.37k
                    ref r = ref_stack_pop(&refs);
2844
6.37k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
6.37k
                    PUSH_REF(i, oparg & 15);
2847
6.37k
                    break;
2848
6.37k
                }
2849
2850
8.01k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
8.01k
                    ref r = ref_stack_pop(&refs);
2853
8.01k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
8.01k
                    r = ref_stack_pop(&refs);
2856
8.01k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
8.01k
                    break;
2858
6.37k
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
132k
                case COPY: {
2862
132k
                    assert(oparg > 0);
2863
132k
                    Py_ssize_t idx = refs.size - oparg;
2864
132k
                    ref r = ref_stack_at(&refs, idx);
2865
132k
                    PUSH_REF(r.instr, r.local);
2866
132k
                    break;
2867
132k
                }
2868
2869
132k
                case SWAP: {
2870
62.8k
                    assert(oparg >= 2);
2871
62.8k
                    ref_stack_swap_top(&refs, oparg);
2872
62.8k
                    break;
2873
62.8k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
3.47k
                case FORMAT_SIMPLE:
2881
3.83k
                case GET_ANEXT:
2882
8.43k
                case GET_ITER:
2883
16.6k
                case GET_LEN:
2884
16.6k
                case GET_YIELD_FROM_ITER:
2885
22.7k
                case IMPORT_FROM:
2886
22.7k
                case MATCH_KEYS:
2887
22.7k
                case MATCH_MAPPING:
2888
24.5k
                case MATCH_SEQUENCE:
2889
24.5k
                case WITH_EXCEPT_START: {
2890
24.5k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
24.5k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
24.5k
                    int net_pushed = num_pushed - num_popped;
2893
24.5k
                    assert(net_pushed >= 0);
2894
45.6k
                    for (int j = 0; j < net_pushed; j++) {
2895
21.0k
                        PUSH_REF(i, NOT_LOCAL);
2896
21.0k
                    }
2897
24.5k
                    break;
2898
24.5k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
24.5k
                case DICT_MERGE:
2902
3.74k
                case DICT_UPDATE:
2903
39.4k
                case LIST_APPEND:
2904
45.0k
                case LIST_EXTEND:
2905
45.7k
                case MAP_ADD:
2906
45.7k
                case RERAISE:
2907
52.5k
                case SET_ADD:
2908
57.5k
                case SET_UPDATE: {
2909
57.5k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
57.5k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
57.5k
                    int net_popped = num_popped - num_pushed;
2912
57.5k
                    assert(net_popped > 0);
2913
115k
                    for (int i = 0; i < net_popped; i++) {
2914
58.3k
                        ref_stack_pop(&refs);
2915
58.3k
                    }
2916
57.5k
                    break;
2917
57.5k
                }
2918
2919
8.60k
                case END_SEND:
2920
32.0k
                case SET_FUNCTION_ATTRIBUTE: {
2921
32.0k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
32.0k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
32.0k
                    ref tos = ref_stack_pop(&refs);
2924
32.0k
                    ref_stack_pop(&refs);
2925
32.0k
                    PUSH_REF(tos.instr, tos.local);
2926
32.0k
                    break;
2927
32.0k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
32.0k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
4.60k
                case FOR_ITER: {
2937
4.60k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
4.60k
                    PUSH_REF(i, NOT_LOCAL);
2939
4.60k
                    break;
2940
4.60k
                }
2941
2942
12.4k
                case LOAD_ATTR:
2943
12.6k
                case LOAD_SUPER_ATTR: {
2944
12.6k
                    ref self = ref_stack_pop(&refs);
2945
12.6k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
229
                        ref_stack_pop(&refs);
2947
229
                        ref_stack_pop(&refs);
2948
229
                    }
2949
12.6k
                    PUSH_REF(i, NOT_LOCAL);
2950
12.6k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
921
                        PUSH_REF(self.instr, self.local);
2954
921
                    }
2955
12.6k
                    break;
2956
12.6k
                }
2957
2958
15.7k
                case LOAD_SPECIAL:
2959
15.7k
                case PUSH_EXC_INFO: {
2960
15.7k
                    ref tos = ref_stack_pop(&refs);
2961
15.7k
                    PUSH_REF(i, NOT_LOCAL);
2962
15.7k
                    PUSH_REF(tos.instr, tos.local);
2963
15.7k
                    break;
2964
15.7k
                }
2965
2966
15.7k
                case SEND: {
2967
8.60k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
8.60k
                    ref_stack_pop(&refs);
2969
8.60k
                    PUSH_REF(i, NOT_LOCAL);
2970
8.60k
                    break;
2971
8.60k
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
1.96M
                default: {
2975
1.96M
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
1.96M
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
1.96M
                    if (HAS_TARGET(instr->i_opcode)) {
2978
94.6k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
94.6k
                    }
2980
1.96M
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
3.43M
                        for (int j = 0; j < num_popped; j++) {
2984
1.46M
                            ref_stack_pop(&refs);
2985
1.46M
                        }
2986
3.42M
                        for (int j = 0; j < num_pushed; j++) {
2987
1.45M
                            PUSH_REF(i, NOT_LOCAL);
2988
1.45M
                        }
2989
1.96M
                    }
2990
1.96M
                    break;
2991
1.96M
                }
2992
2.44M
            }
2993
2.44M
        }
2994
2995
        // Push fallthrough block
2996
226k
        if (BB_HAS_FALLTHROUGH(block)) {
2997
137k
            assert(block->b_next != NULL);
2998
137k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
137k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
1.49M
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
1.26M
            ref r = ref_stack_at(&refs, i);
3005
1.26M
            if (r.instr != -1) {
3006
198k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
198k
            }
3008
1.26M
        }
3009
3010
        // Optimize instructions
3011
2.67M
        for (int i = 0; i < block->b_iused; i++) {
3012
2.44M
            if (!instr_flags[i]) {
3013
2.21M
                cfg_instr *instr = &block->b_instr[i];
3014
2.21M
                switch (instr->i_opcode) {
3015
35.0k
                    case LOAD_FAST:
3016
35.0k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
35.0k
                        break;
3018
671
                    case LOAD_FAST_LOAD_FAST:
3019
671
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
671
                        break;
3021
2.17M
                    default:
3022
2.17M
                        break;
3023
2.21M
                }
3024
2.21M
            }
3025
2.44M
        }
3026
226k
    }
3027
3028
52.1k
    #undef PUSH_REF
3029
3030
52.1k
    status = SUCCESS;
3031
3032
52.1k
done:
3033
52.1k
    ref_stack_fini(&refs);
3034
52.1k
    PyMem_Free(instr_flags);
3035
52.1k
    PyMem_Free(blocks);
3036
52.1k
    return status;
3037
52.1k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
974k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
974k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
974k
    if (b->b_unsafe_locals_mask != both) {
3048
67.8k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
67.8k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
67.3k
            *(*sp)++ = b;
3053
67.3k
            b->b_visited = 1;
3054
67.3k
        }
3055
67.8k
    }
3056
974k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
275k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
275k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
2.04M
    for (int i = 0; i < b->b_iused; i++) {
3064
1.76M
        cfg_instr *instr = &b->b_instr[i];
3065
1.76M
        assert(instr->i_opcode != EXTENDED_ARG);
3066
1.76M
        if (instr->i_except != NULL) {
3067
691k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
691k
        }
3069
1.76M
        if (instr->i_oparg >= 64) {
3070
166k
            continue;
3071
166k
        }
3072
1.76M
        assert(instr->i_oparg >= 0);
3073
1.59M
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
1.59M
        switch (instr->i_opcode) {
3075
5.72k
            case DELETE_FAST:
3076
24.7k
            case LOAD_FAST_AND_CLEAR:
3077
62.2k
            case STORE_FAST_MAYBE_NULL:
3078
62.2k
                unsafe_mask |= bit;
3079
62.2k
                break;
3080
155k
            case STORE_FAST:
3081
155k
                unsafe_mask &= ~bit;
3082
155k
                break;
3083
2.36k
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
2.36k
                unsafe_mask &= ~bit;
3086
2.36k
                break;
3087
38.7k
            case LOAD_FAST:
3088
38.7k
                if (unsafe_mask & bit) {
3089
2.33k
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
2.33k
                }
3091
38.7k
                unsafe_mask &= ~bit;
3092
38.7k
                break;
3093
1.59M
        }
3094
1.59M
    }
3095
275k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
139k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
139k
    }
3098
275k
    cfg_instr *last = basicblock_last_instr(b);
3099
275k
    if (last && is_jump(last)) {
3100
118k
        assert(last->i_target != NULL);
3101
118k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
118k
    }
3103
275k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
74
{
3108
74
    assert(nlocals > 64);
3109
74
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
74
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
74
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
887
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
813
        blocknum++;
3121
33.5k
        for (int i = 0; i < b->b_iused; i++) {
3122
32.7k
            cfg_instr *instr = &b->b_instr[i];
3123
32.7k
            assert(instr->i_opcode != EXTENDED_ARG);
3124
32.7k
            int arg = instr->i_oparg;
3125
32.7k
            if (arg < 64) {
3126
26.5k
                continue;
3127
26.5k
            }
3128
32.7k
            assert(arg >= 0);
3129
6.18k
            switch (instr->i_opcode) {
3130
124
                case DELETE_FAST:
3131
429
                case LOAD_FAST_AND_CLEAR:
3132
1.03k
                case STORE_FAST_MAYBE_NULL:
3133
1.03k
                    states[arg - 64] = blocknum - 1;
3134
1.03k
                    break;
3135
2.96k
                case STORE_FAST:
3136
2.96k
                    states[arg - 64] = blocknum;
3137
2.96k
                    break;
3138
1.49k
                case LOAD_FAST:
3139
1.49k
                    if (states[arg - 64] != blocknum) {
3140
129
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
129
                    }
3142
1.49k
                    states[arg - 64] = blocknum;
3143
1.49k
                    break;
3144
6.18k
                    Py_UNREACHABLE();
3145
6.18k
            }
3146
6.18k
        }
3147
813
    }
3148
74
    PyMem_Free(states);
3149
74
    return SUCCESS;
3150
74
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
52.1k
{
3155
52.1k
    assert(PyList_CheckExact(consts));
3156
52.1k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
52.1k
    if (nconsts == 0) {
3158
205
        return SUCCESS;  /* nothing to do */
3159
205
    }
3160
3161
51.9k
    Py_ssize_t *index_map = NULL;
3162
51.9k
    Py_ssize_t *reverse_index_map = NULL;
3163
51.9k
    int err = ERROR;
3164
3165
51.9k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
51.9k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
388k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
337k
        index_map[i] = -1;
3171
337k
    }
3172
    // The first constant may be docstring; keep it always.
3173
51.9k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
437k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
3.33M
        for (int i = 0; i < b->b_iused; i++) {
3178
2.94M
            int opcode = b->b_instr[i].i_opcode;
3179
2.94M
            if (OPCODE_HAS_CONST(opcode)) {
3180
441k
                int index = b->b_instr[i].i_oparg;
3181
441k
                index_map[index] = index;
3182
441k
            }
3183
2.94M
        }
3184
385k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
51.9k
    Py_ssize_t n_used_consts = 0;
3188
440k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
388k
        if (index_map[i] != -1) {
3190
199k
            assert(index_map[i] == i);
3191
199k
            index_map[n_used_consts++] = index_map[i];
3192
199k
        }
3193
388k
    }
3194
51.9k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
24.9k
        err = SUCCESS;
3197
24.9k
        goto end;
3198
24.9k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
51.9k
    assert(n_used_consts < nconsts);
3202
138k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
111k
        Py_ssize_t old_index = index_map[i];
3204
111k
        assert(i <= old_index && old_index < nconsts);
3205
111k
        if (i != old_index) {
3206
74.5k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
74.5k
            assert(value != NULL);
3208
74.5k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
74.5k
        }
3210
111k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
27.0k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
27.0k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
27.0k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
328k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
301k
        reverse_index_map[i] = -1;
3223
301k
    }
3224
138k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
111k
        assert(index_map[i] != -1);
3226
111k
        assert(reverse_index_map[index_map[i]] == -1);
3227
111k
        reverse_index_map[index_map[i]] = i;
3228
111k
    }
3229
3230
256k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
1.99M
        for (int i = 0; i < b->b_iused; i++) {
3232
1.76M
            int opcode = b->b_instr[i].i_opcode;
3233
1.76M
            if (OPCODE_HAS_CONST(opcode)) {
3234
301k
                int index = b->b_instr[i].i_oparg;
3235
301k
                assert(reverse_index_map[index] >= 0);
3236
301k
                assert(reverse_index_map[index] < n_used_consts);
3237
301k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
301k
            }
3239
1.76M
        }
3240
229k
    }
3241
3242
27.0k
    err = SUCCESS;
3243
51.9k
end:
3244
51.9k
    PyMem_Free(index_map);
3245
51.9k
    PyMem_Free(reverse_index_map);
3246
51.9k
    return err;
3247
27.0k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
52.1k
{
3256
52.1k
    if (nlocals == 0) {
3257
25.9k
        return SUCCESS;
3258
25.9k
    }
3259
26.2k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
74
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
74
        nlocals = 64;
3267
74
    }
3268
26.2k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
26.2k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
26.2k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
26.2k
    uint64_t start_mask = 0;
3277
80.0k
    for (int i = nparams; i < nlocals; i++) {
3278
53.8k
        start_mask |= (uint64_t)1 << i;
3279
53.8k
    }
3280
26.2k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
234k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
208k
        scan_block_for_locals(b, &sp);
3287
208k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
93.5k
    while (sp > stack) {
3291
67.3k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
67.3k
        b->b_visited = 0;
3294
67.3k
        scan_block_for_locals(b, &sp);
3295
67.3k
    }
3296
26.2k
    PyMem_Free(stack);
3297
26.2k
    return SUCCESS;
3298
26.2k
}
3299
3300
3301
static int
3302
32.1k
mark_warm(basicblock *entryblock) {
3303
32.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
32.1k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
32.1k
    basicblock **sp = stack;
3308
3309
32.1k
    *sp++ = entryblock;
3310
32.1k
    entryblock->b_visited = 1;
3311
235k
    while (sp > stack) {
3312
203k
        basicblock *b = *(--sp);
3313
203k
        assert(!b->b_except_handler);
3314
203k
        b->b_warm = 1;
3315
203k
        basicblock *next = b->b_next;
3316
203k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
113k
            *sp++ = next;
3318
113k
            next->b_visited = 1;
3319
113k
        }
3320
1.77M
        for (int i=0; i < b->b_iused; i++) {
3321
1.56M
            cfg_instr *instr = &b->b_instr[i];
3322
1.56M
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
57.9k
                *sp++ = instr->i_target;
3324
57.9k
                instr->i_target->b_visited = 1;
3325
57.9k
            }
3326
1.56M
        }
3327
203k
    }
3328
32.1k
    PyMem_Free(stack);
3329
32.1k
    return SUCCESS;
3330
32.1k
}
3331
3332
static int
3333
32.1k
mark_cold(basicblock *entryblock) {
3334
398k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
366k
        assert(!b->b_cold && !b->b_warm);
3336
366k
    }
3337
32.1k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
32.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
32.1k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
32.1k
    basicblock **sp = stack;
3347
398k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
366k
        if (b->b_except_handler) {
3349
47.6k
            assert(!b->b_warm);
3350
47.6k
            *sp++ = b;
3351
47.6k
            b->b_visited = 1;
3352
47.6k
        }
3353
366k
    }
3354
3355
157k
    while (sp > stack) {
3356
125k
        basicblock *b = *(--sp);
3357
125k
        b->b_cold = 1;
3358
125k
        basicblock *next = b->b_next;
3359
125k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
68.5k
            if (!next->b_warm && !next->b_visited) {
3361
54.0k
                *sp++ = next;
3362
54.0k
                next->b_visited = 1;
3363
54.0k
            }
3364
68.5k
        }
3365
709k
        for (int i = 0; i < b->b_iused; i++) {
3366
584k
            cfg_instr *instr = &b->b_instr[i];
3367
584k
            if (is_jump(instr)) {
3368
61.9k
                assert(i == b->b_iused - 1);
3369
61.9k
                basicblock *target = b->b_instr[i].i_target;
3370
61.9k
                if (!target->b_warm && !target->b_visited) {
3371
23.3k
                    *sp++ = target;
3372
23.3k
                    target->b_visited = 1;
3373
23.3k
                }
3374
61.9k
            }
3375
584k
        }
3376
125k
    }
3377
32.1k
    PyMem_Free(stack);
3378
32.1k
    return SUCCESS;
3379
32.1k
}
3380
3381
3382
static int
3383
52.1k
push_cold_blocks_to_end(cfg_builder *g) {
3384
52.1k
    basicblock *entryblock = g->g_entryblock;
3385
52.1k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
20.0k
        return SUCCESS;
3388
20.0k
    }
3389
32.1k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
32.1k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
407k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
375k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
8.60k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
8.60k
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
8.60k
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
8.60k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
8.60k
                             NO_LOCATION);
3406
8.60k
            explicit_jump->b_cold = 1;
3407
8.60k
            explicit_jump->b_next = b->b_next;
3408
8.60k
            explicit_jump->b_predecessors = 1;
3409
8.60k
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
8.60k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
8.60k
            last->i_target = explicit_jump->b_next;
3414
8.60k
        }
3415
375k
    }
3416
3417
32.1k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
32.1k
    basicblock *cold_blocks = NULL;
3419
32.1k
    basicblock *cold_blocks_tail = NULL;
3420
3421
32.1k
    basicblock *b = entryblock;
3422
56.6k
    while(b->b_next) {
3423
56.6k
        assert(!b->b_cold);
3424
265k
        while (b->b_next && !b->b_next->b_cold) {
3425
209k
            b = b->b_next;
3426
209k
        }
3427
56.6k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
32.1k
            break;
3430
32.1k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
56.6k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
24.4k
        basicblock *b_end = b->b_next;
3436
133k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
109k
            b_end = b_end->b_next;
3438
109k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
24.4k
        assert(b_end && b_end->b_cold);
3442
24.4k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
24.4k
        if (cold_blocks == NULL) {
3445
4.99k
            cold_blocks = b->b_next;
3446
4.99k
        }
3447
19.4k
        else {
3448
19.4k
            cold_blocks_tail->b_next = b->b_next;
3449
19.4k
        }
3450
24.4k
        cold_blocks_tail = b_end;
3451
24.4k
        b->b_next = b_end->b_next;
3452
24.4k
        b_end->b_next = NULL;
3453
24.4k
    }
3454
32.1k
    assert(b != NULL && b->b_next == NULL);
3455
32.1k
    b->b_next = cold_blocks;
3456
3457
32.1k
    if (cold_blocks != NULL) {
3458
4.99k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
4.99k
    }
3460
32.1k
    return SUCCESS;
3461
32.1k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
52.1k
{
3466
52.1k
    basicblock *entryblock = g->g_entryblock;
3467
447k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
3.34M
        for (int i = 0; i < b->b_iused; i++) {
3469
2.95M
            cfg_instr *instr = &b->b_instr[i];
3470
2.95M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
2.53k
                assert(i == b->b_iused - 1);
3472
2.53k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
1.68k
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
2.53k
                location loc = instr->i_loc;
3475
2.53k
                basicblock *except = instr->i_except;
3476
2.53k
                cfg_instr copy = {
3477
2.53k
                            .i_opcode = COPY,
3478
2.53k
                            .i_oparg = 1,
3479
2.53k
                            .i_loc = loc,
3480
2.53k
                            .i_target = NULL,
3481
2.53k
                            .i_except = except,
3482
2.53k
                };
3483
2.53k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3484
2.53k
                cfg_instr to_bool = {
3485
2.53k
                            .i_opcode = TO_BOOL,
3486
2.53k
                            .i_oparg = 0,
3487
2.53k
                            .i_loc = loc,
3488
2.53k
                            .i_target = NULL,
3489
2.53k
                            .i_except = except,
3490
2.53k
                };
3491
2.53k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3492
2.53k
            }
3493
2.95M
        }
3494
395k
    }
3495
52.1k
    return SUCCESS;
3496
52.1k
}
3497
3498
static int
3499
convert_pseudo_ops(cfg_builder *g)
3500
52.1k
{
3501
52.1k
    basicblock *entryblock = g->g_entryblock;
3502
447k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3503
3.38M
        for (int i = 0; i < b->b_iused; i++) {
3504
2.99M
            cfg_instr *instr = &b->b_instr[i];
3505
2.99M
            if (is_block_push(instr)) {
3506
47.6k
                INSTR_SET_OP0(instr, NOP);
3507
47.6k
            }
3508
2.94M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3509
21.7k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3510
21.7k
                instr->i_opcode = LOAD_FAST;
3511
21.7k
            }
3512
2.92M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3513
18.9k
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3514
18.9k
                instr->i_opcode = STORE_FAST;
3515
18.9k
            }
3516
2.99M
        }
3517
395k
    }
3518
52.1k
    return remove_redundant_nops_and_jumps(g);
3519
52.1k
}
3520
3521
static inline bool
3522
755k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3523
755k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3524
223k
        return basicblock_has_no_lineno(b);
3525
223k
    }
3526
532k
    else {
3527
532k
        return false;
3528
532k
    }
3529
755k
}
3530
3531
3532
/* PEP 626 mandates that the f_lineno of a frame is correct
3533
 * after a frame terminates. It would be prohibitively expensive
3534
 * to continuously update the f_lineno field at runtime,
3535
 * so we make sure that all exiting instruction (raises and returns)
3536
 * have a valid line number, allowing us to compute f_lineno lazily.
3537
 * We can do this by duplicating the exit blocks without line number
3538
 * so that none have more than one predecessor. We can then safely
3539
 * copy the line number from the sole predecessor block.
3540
 */
3541
static int
3542
duplicate_exits_without_lineno(cfg_builder *g)
3543
104k
{
3544
104k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3545
3546
    /* Copy all exit blocks without line number that are targets of a jump.
3547
     */
3548
104k
    basicblock *entryblock = g->g_entryblock;
3549
885k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3550
781k
        cfg_instr *last = basicblock_last_instr(b);
3551
781k
        if (last == NULL) {
3552
81.9k
            continue;
3553
81.9k
        }
3554
699k
        if (is_jump(last)) {
3555
365k
            basicblock *target = next_nonempty_block(last->i_target);
3556
365k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3557
2.32k
                basicblock *new_target = copy_basicblock(g, target);
3558
2.32k
                if (new_target == NULL) {
3559
0
                    return ERROR;
3560
0
                }
3561
2.32k
                new_target->b_instr[0].i_loc = last->i_loc;
3562
2.32k
                last->i_target = new_target;
3563
2.32k
                target->b_predecessors--;
3564
2.32k
                new_target->b_predecessors = 1;
3565
2.32k
                new_target->b_next = target->b_next;
3566
2.32k
                new_target->b_label.id = next_lbl++;
3567
2.32k
                target->b_next = new_target;
3568
2.32k
            }
3569
365k
        }
3570
699k
    }
3571
3572
    /* Any remaining reachable exit blocks without line number can only be reached by
3573
     * fall through, and thus can only have a single predecessor */
3574
885k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3575
781k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3576
389k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3577
10.6k
                cfg_instr *last = basicblock_last_instr(b);
3578
10.6k
                assert(last != NULL);
3579
10.6k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3580
10.6k
            }
3581
389k
        }
3582
781k
    }
3583
104k
    return SUCCESS;
3584
104k
}
3585
3586
3587
/* If an instruction has no line number, but it's predecessor in the BB does,
3588
 * then copy the line number. If a successor block has no line number, and only
3589
 * one predecessor, then inherit the line number.
3590
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3591
 * Also reduces the size of the line number table,
3592
 * but has no impact on the generated line number events.
3593
 */
3594
3595
static inline void
3596
maybe_propagate_location(basicblock *b, int i, location loc)
3597
7.45M
{
3598
7.45M
    assert(b->b_iused > i);
3599
7.45M
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3600
478k
         b->b_instr[i].i_loc = loc;
3601
478k
    }
3602
7.45M
}
3603
3604
static void
3605
propagate_line_numbers(basicblock *entryblock)
3606
104k
{
3607
885k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3608
781k
        cfg_instr *last = basicblock_last_instr(b);
3609
781k
        if (last == NULL) {
3610
81.9k
            continue;
3611
81.9k
        }
3612
3613
699k
        location prev_location = NO_LOCATION;
3614
7.81M
        for (int i = 0; i < b->b_iused; i++) {
3615
7.11M
            maybe_propagate_location(b, i, prev_location);
3616
7.11M
            prev_location = b->b_instr[i].i_loc;
3617
7.11M
        }
3618
699k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3619
265k
            if (b->b_next->b_iused > 0) {
3620
264k
                maybe_propagate_location(b->b_next, 0, prev_location);
3621
264k
            }
3622
265k
        }
3623
699k
        if (is_jump(last)) {
3624
365k
            basicblock *target = last->i_target;
3625
366k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3626
286
                target = target->b_next;
3627
286
            }
3628
365k
            if (target->b_predecessors == 1) {
3629
80.1k
                maybe_propagate_location(target, 0, prev_location);
3630
80.1k
            }
3631
365k
        }
3632
699k
    }
3633
104k
}
3634
3635
static int
3636
resolve_line_numbers(cfg_builder *g, int firstlineno)
3637
104k
{
3638
104k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3639
104k
    propagate_line_numbers(g->g_entryblock);
3640
104k
    return SUCCESS;
3641
104k
}
3642
3643
int
3644
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3645
                        int nlocals, int nparams, int firstlineno)
3646
52.1k
{
3647
52.1k
    assert(cfg_builder_check(g));
3648
    /** Preprocessing **/
3649
    /* Map labels to targets and mark exception handlers */
3650
52.1k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3651
52.1k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3652
52.1k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3653
3654
    /** Optimization **/
3655
52.1k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3656
52.1k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3657
52.1k
    RETURN_IF_ERROR(
3658
52.1k
        add_checks_for_loads_of_uninitialized_variables(
3659
52.1k
            g->g_entryblock, nlocals, nparams));
3660
52.1k
    RETURN_IF_ERROR(insert_superinstructions(g));
3661
3662
52.1k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3663
52.1k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3664
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3665
    // assert(all_exits_have_lineno(g->g_entryblock));
3666
52.1k
    return SUCCESS;
3667
52.1k
}
3668
3669
static int *
3670
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3671
52.1k
{
3672
52.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3673
52.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3674
52.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3675
3676
52.1k
    int noffsets = ncellvars + nfreevars;
3677
52.1k
    int *fixed = PyMem_New(int, noffsets);
3678
52.1k
    if (fixed == NULL) {
3679
0
        PyErr_NoMemory();
3680
0
        return NULL;
3681
0
    }
3682
83.6k
    for (int i = 0; i < noffsets; i++) {
3683
31.4k
        fixed[i] = nlocals + i;
3684
31.4k
    }
3685
3686
52.1k
    PyObject *varname, *cellindex;
3687
52.1k
    Py_ssize_t pos = 0;
3688
64.9k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3689
12.7k
        PyObject *varindex;
3690
12.7k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3691
0
            goto error;
3692
0
        }
3693
12.7k
        if (varindex == NULL) {
3694
12.0k
            continue;
3695
12.0k
        }
3696
3697
647
        int argoffset = PyLong_AsInt(varindex);
3698
647
        Py_DECREF(varindex);
3699
647
        if (argoffset == -1 && PyErr_Occurred()) {
3700
0
            goto error;
3701
0
        }
3702
3703
647
        int oldindex = PyLong_AsInt(cellindex);
3704
647
        if (oldindex == -1 && PyErr_Occurred()) {
3705
0
            goto error;
3706
0
        }
3707
647
        fixed[oldindex] = argoffset;
3708
647
    }
3709
52.1k
    return fixed;
3710
3711
0
error:
3712
0
    PyMem_Free(fixed);
3713
0
    return NULL;
3714
52.1k
}
3715
3716
#define IS_GENERATOR(CF) \
3717
52.1k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3718
3719
static int
3720
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3721
                           int *fixed, int nfreevars, int code_flags)
3722
52.1k
{
3723
52.1k
    assert(umd->u_firstlineno > 0);
3724
3725
    /* Add the generator prefix instructions. */
3726
52.1k
    if (IS_GENERATOR(code_flags)) {
3727
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3728
         * of 0. This is because RETURN_GENERATOR pushes an element
3729
         * with _PyFrame_StackPush before switching stacks.
3730
         */
3731
3732
3.46k
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3733
3.46k
        cfg_instr make_gen = {
3734
3.46k
            .i_opcode = RETURN_GENERATOR,
3735
3.46k
            .i_oparg = 0,
3736
3.46k
            .i_loc = loc,
3737
3.46k
            .i_target = NULL,
3738
3.46k
            .i_except = NULL,
3739
3.46k
        };
3740
3.46k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3741
3.46k
        cfg_instr pop_top = {
3742
3.46k
            .i_opcode = POP_TOP,
3743
3.46k
            .i_oparg = 0,
3744
3.46k
            .i_loc = loc,
3745
3.46k
            .i_target = NULL,
3746
3.46k
            .i_except = NULL,
3747
3.46k
        };
3748
3.46k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3749
3.46k
    }
3750
3751
    /* Set up cells for any variable that escapes, to be put in a closure. */
3752
52.1k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3753
52.1k
    if (ncellvars) {
3754
        // umd->u_cellvars has the cells out of order so we sort them
3755
        // before adding the MAKE_CELL instructions.  Note that we
3756
        // adjust for arg cells, which come first.
3757
10.7k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3758
10.7k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3759
10.7k
        if (sorted == NULL) {
3760
0
            PyErr_NoMemory();
3761
0
            return ERROR;
3762
0
        }
3763
23.4k
        for (int i = 0; i < ncellvars; i++) {
3764
12.7k
            sorted[fixed[i]] = i + 1;
3765
12.7k
        }
3766
39.2k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3767
28.5k
            int oldindex = sorted[i] - 1;
3768
28.5k
            if (oldindex == -1) {
3769
15.7k
                continue;
3770
15.7k
            }
3771
12.7k
            cfg_instr make_cell = {
3772
12.7k
                .i_opcode = MAKE_CELL,
3773
                // This will get fixed in offset_derefs().
3774
12.7k
                .i_oparg = oldindex,
3775
12.7k
                .i_loc = NO_LOCATION,
3776
12.7k
                .i_target = NULL,
3777
12.7k
                .i_except = NULL,
3778
12.7k
            };
3779
12.7k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3780
0
                PyMem_RawFree(sorted);
3781
0
                return ERROR;
3782
0
            }
3783
12.7k
            ncellsused += 1;
3784
12.7k
        }
3785
10.7k
        PyMem_RawFree(sorted);
3786
10.7k
    }
3787
3788
52.1k
    if (nfreevars) {
3789
14.5k
        cfg_instr copy_frees = {
3790
14.5k
            .i_opcode = COPY_FREE_VARS,
3791
14.5k
            .i_oparg = nfreevars,
3792
14.5k
            .i_loc = NO_LOCATION,
3793
14.5k
            .i_target = NULL,
3794
14.5k
            .i_except = NULL,
3795
14.5k
        };
3796
14.5k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3797
14.5k
    }
3798
3799
52.1k
    return SUCCESS;
3800
52.1k
}
3801
3802
static int
3803
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3804
52.1k
{
3805
52.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3806
52.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3807
52.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3808
52.1k
    int noffsets = ncellvars + nfreevars;
3809
3810
    // First deal with duplicates (arg cells).
3811
52.1k
    int numdropped = 0;
3812
83.6k
    for (int i = 0; i < noffsets ; i++) {
3813
31.4k
        if (fixedmap[i] == i + nlocals) {
3814
30.8k
            fixedmap[i] -= numdropped;
3815
30.8k
        }
3816
647
        else {
3817
            // It was a duplicate (cell/arg).
3818
647
            numdropped += 1;
3819
647
        }
3820
31.4k
    }
3821
3822
    // Then update offsets, either relative to locals or by cell2arg.
3823
447k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3824
3.38M
        for (int i = 0; i < b->b_iused; i++) {
3825
2.99M
            cfg_instr *inst = &b->b_instr[i];
3826
            // This is called before extended args are generated.
3827
2.99M
            assert(inst->i_opcode != EXTENDED_ARG);
3828
2.99M
            int oldoffset = inst->i_oparg;
3829
2.99M
            switch(inst->i_opcode) {
3830
12.7k
                case MAKE_CELL:
3831
34.4k
                case LOAD_CLOSURE:
3832
49.0k
                case LOAD_DEREF:
3833
58.9k
                case STORE_DEREF:
3834
58.9k
                case DELETE_DEREF:
3835
67.2k
                case LOAD_FROM_DICT_OR_DEREF:
3836
67.2k
                    assert(oldoffset >= 0);
3837
67.2k
                    assert(oldoffset < noffsets);
3838
67.2k
                    assert(fixedmap[oldoffset] >= 0);
3839
67.2k
                    inst->i_oparg = fixedmap[oldoffset];
3840
2.99M
            }
3841
2.99M
        }
3842
395k
    }
3843
3844
52.1k
    return numdropped;
3845
52.1k
}
3846
3847
static int
3848
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3849
52.1k
{
3850
52.1k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3851
52.1k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3852
52.1k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3853
52.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3854
52.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3855
52.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3856
52.1k
    assert(INT_MAX - nlocals - ncellvars > 0);
3857
52.1k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3858
52.1k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3859
52.1k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3860
52.1k
    if (cellfixedoffsets == NULL) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
    // This must be called before fix_cell_offsets().
3865
52.1k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3866
0
        PyMem_Free(cellfixedoffsets);
3867
0
        return ERROR;
3868
0
    }
3869
3870
52.1k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3871
52.1k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3872
52.1k
    cellfixedoffsets = NULL;
3873
52.1k
    if (numdropped < 0) {
3874
0
        return ERROR;
3875
0
    }
3876
3877
52.1k
    nlocalsplus -= numdropped;
3878
52.1k
    return nlocalsplus;
3879
52.1k
}
3880
3881
cfg_builder *
3882
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3883
52.1k
{
3884
52.1k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3885
0
        return NULL;
3886
0
    }
3887
52.1k
    cfg_builder *g = _PyCfgBuilder_New();
3888
52.1k
    if (g == NULL) {
3889
0
        return NULL;
3890
0
    }
3891
4.35M
    for (int i = 0; i < seq->s_used; i++) {
3892
4.30M
        seq->s_instrs[i].i_target = 0;
3893
4.30M
    }
3894
4.35M
    for (int i = 0; i < seq->s_used; i++) {
3895
4.30M
        _PyInstruction *instr = &seq->s_instrs[i];
3896
4.30M
        if (HAS_TARGET(instr->i_opcode)) {
3897
245k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3898
245k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3899
245k
        }
3900
4.30M
    }
3901
52.1k
    int offset = 0;
3902
4.35M
    for (int i = 0; i < seq->s_used; i++) {
3903
4.30M
        _PyInstruction *instr = &seq->s_instrs[i];
3904
4.30M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3905
12.8k
            if (seq->s_annotations_code != NULL) {
3906
616
                assert(seq->s_annotations_code->s_labelmap_size == 0
3907
616
                    && seq->s_annotations_code->s_nested == NULL);
3908
2.46k
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3909
1.84k
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3910
1.84k
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3911
1.84k
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3912
0
                        goto error;
3913
0
                    }
3914
1.84k
                }
3915
616
                offset += seq->s_annotations_code->s_used - 1;
3916
616
            }
3917
12.1k
            else {
3918
12.1k
                offset -= 1;
3919
12.1k
            }
3920
12.8k
            continue;
3921
12.8k
        }
3922
4.29M
        if (instr->i_target) {
3923
172k
            jump_target_label lbl_ = {i + offset};
3924
172k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3925
0
                goto error;
3926
0
            }
3927
172k
        }
3928
4.29M
        int opcode = instr->i_opcode;
3929
4.29M
        int oparg = instr->i_oparg;
3930
4.29M
        if (HAS_TARGET(opcode)) {
3931
245k
            oparg += offset;
3932
245k
        }
3933
4.29M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3934
0
            goto error;
3935
0
        }
3936
4.29M
    }
3937
52.1k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3938
0
        goto error;
3939
0
    }
3940
52.1k
    return g;
3941
0
error:
3942
0
    _PyCfgBuilder_Free(g);
3943
0
    return NULL;
3944
52.1k
}
3945
3946
int
3947
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3948
52.1k
{
3949
52.1k
    int lbl = 0;
3950
448k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3951
396k
        b->b_label = (jump_target_label){lbl};
3952
396k
        lbl += 1;
3953
396k
    }
3954
448k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3955
396k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3956
3.43M
        for (int i = 0; i < b->b_iused; i++) {
3957
3.04M
            cfg_instr *instr = &b->b_instr[i];
3958
3.04M
            if (HAS_TARGET(instr->i_opcode)) {
3959
                /* Set oparg to the label id (it will later be mapped to an offset) */
3960
178k
                instr->i_oparg = instr->i_target->b_label.id;
3961
178k
            }
3962
3.04M
            RETURN_IF_ERROR(
3963
3.04M
                _PyInstructionSequence_Addop(
3964
3.04M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3965
3966
3.04M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3967
3.04M
            if (instr->i_except != NULL) {
3968
832k
                hi->h_label = instr->i_except->b_label.id;
3969
832k
                hi->h_startdepth = instr->i_except->b_startdepth;
3970
832k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3971
832k
            }
3972
2.20M
            else {
3973
2.20M
                hi->h_label = -1;
3974
2.20M
            }
3975
3.04M
        }
3976
396k
    }
3977
52.1k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3978
0
        return ERROR;
3979
0
    }
3980
52.1k
    return SUCCESS;
3981
52.1k
}
3982
3983
3984
int
3985
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3986
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3987
                                     int *stackdepth, int *nlocalsplus,
3988
                                     _PyInstructionSequence *seq)
3989
52.1k
{
3990
52.1k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3991
3992
52.1k
    *stackdepth = calculate_stackdepth(g);
3993
52.1k
    if (*stackdepth < 0) {
3994
0
        return ERROR;
3995
0
    }
3996
3997
    /* prepare_localsplus adds instructions for generators that push
3998
     * and pop an item on the stack. This assertion makes sure there
3999
     * is space on the stack for that.
4000
     * It should always be true, because a generator must have at
4001
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
4002
     * which requires stackspace.
4003
     */
4004
52.1k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4005
4006
52.1k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4007
52.1k
    if (*nlocalsplus < 0) {
4008
0
        return ERROR;
4009
0
    }
4010
4011
52.1k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4012
4013
    /* Order of basic blocks must have been determined by now */
4014
4015
52.1k
    RETURN_IF_ERROR(normalize_jumps(g));
4016
52.1k
    assert(no_redundant_jumps(g));
4017
4018
    /* Can't modify the bytecode after inserting instructions that produce
4019
     * borrowed references.
4020
     */
4021
52.1k
    RETURN_IF_ERROR(optimize_load_fast(g));
4022
4023
    /* Can't modify the bytecode after computing jump offsets. */
4024
52.1k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4025
0
        return ERROR;
4026
0
    }
4027
4028
52.1k
    return SUCCESS;
4029
52.1k
}
4030
4031
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4032
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4033
 */
4034
int
4035
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4036
0
{
4037
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4038
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4039
0
    return SUCCESS;
4040
0
}
4041
4042
/* Exported API functions */
4043
4044
int
4045
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4046
0
{
4047
0
    stack_effects effs;
4048
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4049
0
        return PY_INVALID_STACK_EFFECT;
4050
0
    }
4051
0
    return effs.net;
4052
0
}
4053
4054
int
4055
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4056
6.58k
{
4057
6.58k
    stack_effects effs;
4058
6.58k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4059
0
        return PY_INVALID_STACK_EFFECT;
4060
0
    }
4061
6.58k
    return effs.net;
4062
6.58k
}
4063
4064
/* Access to compiler optimizations for unit tests.
4065
4066
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4067
 * a CFG, optimizes it and converts back to an instruction list.
4068
 */
4069
4070
static PyObject *
4071
cfg_to_instruction_sequence(cfg_builder *g)
4072
0
{
4073
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4074
0
    if (seq == NULL) {
4075
0
        return NULL;
4076
0
    }
4077
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4078
0
        PyInstructionSequence_Fini(seq);
4079
0
        return NULL;
4080
0
    }
4081
0
    return (PyObject*)seq;
4082
0
}
4083
4084
PyObject *
4085
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4086
0
{
4087
0
    if (!_PyInstructionSequence_Check(seq)) {
4088
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4089
0
        return NULL;
4090
0
    }
4091
0
    PyObject *const_cache = PyDict_New();
4092
0
    if (const_cache == NULL) {
4093
0
        return NULL;
4094
0
    }
4095
4096
0
    PyObject *res = NULL;
4097
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4098
0
    if (g == NULL) {
4099
0
        goto error;
4100
0
    }
4101
0
    int nparams = 0, firstlineno = 1;
4102
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4103
0
                                nparams, firstlineno) < 0) {
4104
0
        goto error;
4105
0
    }
4106
4107
0
    if (calculate_stackdepth(g) == ERROR) {
4108
0
        goto error;
4109
0
    }
4110
4111
0
    if (optimize_load_fast(g) != SUCCESS) {
4112
0
        goto error;
4113
0
    }
4114
4115
0
    res = cfg_to_instruction_sequence(g);
4116
0
error:
4117
0
    Py_DECREF(const_cache);
4118
0
    _PyCfgBuilder_Free(g);
4119
0
    return res;
4120
0
}