Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
#include "pycore_hashtable.h"     // _Py_hashtable_t
10
11
#include "pycore_opcode_utils.h"
12
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
13
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
14
#include "pycore_stackref.h"        // PyStackRef_AsPyObjectBorrow()
15
16
#include <stdbool.h>
17
18
19
#undef SUCCESS
20
#undef ERROR
21
708k
#define SUCCESS 0
22
5.35k
#define ERROR -1
23
24
#define RETURN_IF_ERROR(X)  \
25
1.06M
    if ((X) == -1) {        \
26
0
        return ERROR;       \
27
0
    }
28
29
227k
#define DEFAULT_BLOCK_SIZE 16
30
31
typedef _Py_SourceLocation location;
32
typedef _PyJumpTargetLabel jump_target_label;
33
34
typedef struct _PyCfgInstruction {
35
    int i_opcode;
36
    int i_oparg;
37
    _Py_SourceLocation i_loc;
38
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
39
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
40
} cfg_instr;
41
42
typedef struct _PyCfgBasicblock {
43
    /* Each basicblock in a compilation unit is linked via b_list in the
44
       reverse order that the block are allocated.  b_list points to the next
45
       block in this list, not to be confused with b_next, which is next by
46
       control flow. */
47
    struct _PyCfgBasicblock *b_list;
48
    /* The label of this block if it is a jump target, -1 otherwise */
49
    _PyJumpTargetLabel b_label;
50
    /* Exception stack at start of block, used by assembler to create the exception handling table */
51
    struct _PyCfgExceptStack *b_exceptstack;
52
    /* pointer to an array of instructions, initially NULL */
53
    cfg_instr *b_instr;
54
    /* If b_next is non-NULL, it is a pointer to the next
55
       block reached by normal control flow. */
56
    struct _PyCfgBasicblock *b_next;
57
    /* number of instructions used */
58
    int b_iused;
59
    /* length of instruction array (b_instr) */
60
    int b_ialloc;
61
    /* Used by add_checks_for_loads_of_unknown_variables */
62
    uint64_t b_unsafe_locals_mask;
63
    /* Number of predecessors that a block has. */
64
    int b_predecessors;
65
    /* depth of stack upon entry of block, computed by stackdepth() */
66
    int b_startdepth;
67
    /* Basic block is an exception handler that preserves lasti */
68
    unsigned b_preserve_lasti : 1;
69
    /* Used by compiler passes to mark whether they have visited a basic block. */
70
    unsigned b_visited : 1;
71
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
72
    unsigned b_except_handler : 1;
73
    /* b_cold is true if this block is not perf critical (like an exception handler) */
74
    unsigned b_cold : 1;
75
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
76
    unsigned b_warm : 1;
77
} basicblock;
78
79
80
struct _PyCfgBuilder {
81
    /* The entryblock, at which control flow begins. All blocks of the
82
       CFG are reachable through the b_next links */
83
    struct _PyCfgBasicblock *g_entryblock;
84
    /* Pointer to the most recently allocated block.  By following
85
       b_list links, you can reach all allocated blocks. */
86
    struct _PyCfgBasicblock *g_block_list;
87
    /* pointer to the block currently being constructed */
88
    struct _PyCfgBasicblock *g_curblock;
89
    /* label for the next instruction to be placed */
90
    _PyJumpTargetLabel g_current_label;
91
};
92
93
typedef struct _PyCfgBuilder cfg_builder;
94
95
233k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
96
233k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
97
98
#define LOCATION(LNO, END_LNO, COL, END_COL) \
99
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
100
101
static inline int
102
is_block_push(cfg_instr *i)
103
1.00M
{
104
1.00M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
105
1.00M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
106
1.00M
}
107
108
static inline int
109
is_jump(cfg_instr *i)
110
880k
{
111
880k
    return OPCODE_HAS_JUMP(i->i_opcode);
112
880k
}
113
114
/* One arg*/
115
#define INSTR_SET_OP1(I, OP, ARG) \
116
34.3k
    do { \
117
34.3k
        assert(OPCODE_HAS_ARG(OP)); \
118
34.3k
        cfg_instr *_instr__ptr_ = (I); \
119
34.3k
        _instr__ptr_->i_opcode = (OP); \
120
34.3k
        _instr__ptr_->i_oparg = (ARG); \
121
34.3k
    } while (0);
122
123
/* No args*/
124
#define INSTR_SET_OP0(I, OP) \
125
42.8k
    do { \
126
42.8k
        assert(!OPCODE_HAS_ARG(OP)); \
127
42.8k
        cfg_instr *_instr__ptr_ = (I); \
128
42.8k
        _instr__ptr_->i_opcode = (OP); \
129
42.8k
        _instr__ptr_->i_oparg = 0; \
130
42.8k
    } while (0);
131
132
#define INSTR_SET_LOC(I, LOC) \
133
3.93k
    do { \
134
3.93k
        cfg_instr *_instr__ptr_ = (I); \
135
3.93k
        _instr__ptr_->i_loc = (LOC); \
136
3.93k
    } while (0);
137
138
/***** Blocks *****/
139
140
/* Returns the offset of the next instruction in the current block's
141
   b_instr array.  Resizes the b_instr as necessary.
142
   Returns -1 on failure.
143
*/
144
static int
145
basicblock_next_instr(basicblock *b)
146
227k
{
147
227k
    assert(b != NULL);
148
227k
    _Py_c_array_t array = {
149
227k
        .array = (void*)b->b_instr,
150
227k
        .allocated_entries = b->b_ialloc,
151
227k
        .item_size = sizeof(cfg_instr),
152
227k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
153
227k
    };
154
155
227k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
156
227k
    b->b_instr = array.array;
157
227k
    b->b_ialloc = array.allocated_entries;
158
227k
    return b->b_iused++;
159
227k
}
160
161
static cfg_instr *
162
752k
basicblock_last_instr(const basicblock *b) {
163
752k
    assert(b->b_iused >= 0);
164
752k
    if (b->b_iused > 0) {
165
686k
        assert(b->b_instr != NULL);
166
686k
        return &b->b_instr[b->b_iused - 1];
167
686k
    }
168
65.8k
    return NULL;
169
752k
}
170
171
/* Allocate a new block and return a pointer to it.
172
   Returns NULL on error.
173
*/
174
175
static basicblock *
176
cfg_builder_new_block(cfg_builder *g)
177
24.2k
{
178
24.2k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
179
24.2k
    if (b == NULL) {
180
0
        PyErr_NoMemory();
181
0
        return NULL;
182
0
    }
183
    /* Extend the singly linked list of blocks with new block. */
184
24.2k
    b->b_list = g->g_block_list;
185
24.2k
    g->g_block_list = b;
186
24.2k
    b->b_label = NO_LABEL;
187
24.2k
    return b;
188
24.2k
}
189
190
static int
191
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
192
221k
{
193
221k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
194
221k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
195
221k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
196
221k
    assert(0 <= oparg && oparg < (1 << 30));
197
198
221k
    int off = basicblock_next_instr(b);
199
221k
    if (off < 0) {
200
0
        return ERROR;
201
0
    }
202
221k
    cfg_instr *i = &b->b_instr[off];
203
221k
    i->i_opcode = opcode;
204
221k
    i->i_oparg = oparg;
205
221k
    i->i_loc = loc;
206
    // memory is already zero initialized
207
221k
    assert(i->i_target == NULL);
208
221k
    assert(i->i_except == NULL);
209
210
221k
    return SUCCESS;
211
221k
}
212
213
static int
214
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
215
385
{
216
385
    cfg_instr *last = basicblock_last_instr(b);
217
385
    if (last && is_jump(last)) {
218
0
        return ERROR;
219
0
    }
220
221
385
    RETURN_IF_ERROR(
222
385
        basicblock_addop(b, opcode, target->b_label.id, loc));
223
385
    last = basicblock_last_instr(b);
224
385
    assert(last && last->i_opcode == opcode);
225
385
    last->i_target = target;
226
385
    return SUCCESS;
227
385
}
228
229
static inline int
230
basicblock_append_instructions(basicblock *to, basicblock *from)
231
1.52k
{
232
4.39k
    for (int i = 0; i < from->b_iused; i++) {
233
2.86k
        int n = basicblock_next_instr(to);
234
2.86k
        if (n < 0) {
235
0
            return ERROR;
236
0
        }
237
2.86k
        to->b_instr[n] = from->b_instr[i];
238
2.86k
    }
239
1.52k
    return SUCCESS;
240
1.52k
}
241
242
static inline int
243
228k
basicblock_nofallthrough(const basicblock *b) {
244
228k
    cfg_instr *last = basicblock_last_instr(b);
245
228k
    return (last &&
246
214k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
247
119k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
248
228k
}
249
250
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
251
373k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
252
253
static basicblock *
254
copy_basicblock(cfg_builder *g, basicblock *block)
255
381
{
256
    /* Cannot copy a block if it has a fallthrough, since
257
     * a block can only have one fallthrough predecessor.
258
     */
259
381
    assert(BB_NO_FALLTHROUGH(block));
260
381
    basicblock *result = cfg_builder_new_block(g);
261
381
    if (result == NULL) {
262
0
        return NULL;
263
0
    }
264
381
    if (basicblock_append_instructions(result, block) < 0) {
265
0
        return NULL;
266
0
    }
267
381
    return result;
268
381
}
269
270
static int
271
2.87k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
272
2.87k
    RETURN_IF_ERROR(basicblock_next_instr(block));
273
47.7k
    for (int i = block->b_iused - 1; i > pos; i--) {
274
44.9k
        block->b_instr[i] = block->b_instr[i-1];
275
44.9k
    }
276
2.87k
    block->b_instr[pos] = *instr;
277
2.87k
    return SUCCESS;
278
2.87k
}
279
280
/* For debugging purposes only */
281
#if 0
282
static void
283
dump_instr(cfg_instr *i)
284
{
285
    const char *jump = is_jump(i) ? "jump " : "";
286
287
    char arg[128];
288
289
    *arg = '\0';
290
    if (OPCODE_HAS_ARG(i->i_opcode)) {
291
        sprintf(arg, "arg: %d ", i->i_oparg);
292
    }
293
    if (HAS_TARGET(i->i_opcode)) {
294
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
295
    }
296
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
297
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
298
}
299
300
static inline int
301
basicblock_returns(const basicblock *b) {
302
    cfg_instr *last = basicblock_last_instr(b);
303
    return last && IS_RETURN_OPCODE(last->i_opcode);
304
}
305
306
static void
307
dump_basicblock(const basicblock *b, bool highlight)
308
{
309
    const char *b_return = basicblock_returns(b) ? "return " : "";
310
    if (highlight) {
311
        fprintf(stderr, ">>> ");
312
    }
313
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
314
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
315
        b->b_startdepth, b->b_predecessors, b_return);
316
    int depth = b->b_startdepth;
317
    if (b->b_instr) {
318
        int i;
319
        for (i = 0; i < b->b_iused; i++) {
320
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
321
            dump_instr(b->b_instr + i);
322
323
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
324
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
325
            depth += (pushed - popped);
326
        }
327
    }
328
}
329
330
void
331
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
332
{
333
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
334
        dump_basicblock(b, b == mark);
335
    }
336
}
337
338
#endif
339
340
341
/***** CFG construction and modification *****/
342
343
static basicblock *
344
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
345
18.1k
{
346
18.1k
    assert(block != NULL);
347
18.1k
    g->g_curblock->b_next = block;
348
18.1k
    g->g_curblock = block;
349
18.1k
    return block;
350
18.1k
}
351
352
static inline int
353
36.8k
basicblock_exits_scope(const basicblock *b) {
354
36.8k
    cfg_instr *last = basicblock_last_instr(b);
355
36.8k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
356
36.8k
}
357
358
static inline int
359
21.2k
basicblock_has_eval_break(const basicblock *b) {
360
109k
    for (int i = 0; i < b->b_iused; i++) {
361
96.5k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
362
8.25k
            return true;
363
8.25k
        }
364
96.5k
    }
365
12.9k
    return false;
366
21.2k
}
367
368
static bool
369
cfg_builder_current_block_is_terminated(cfg_builder *g)
370
224k
{
371
224k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
372
224k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
373
15.5k
        return true;
374
15.5k
    }
375
209k
    if (IS_LABEL(g->g_current_label)) {
376
2.58k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
377
2.58k
            return true;
378
2.58k
        }
379
0
        else {
380
            /* current block is empty, label it */
381
0
            g->g_curblock->b_label = g->g_current_label;
382
0
            g->g_current_label = NO_LABEL;
383
0
        }
384
2.58k
    }
385
206k
    return false;
386
209k
}
387
388
static int
389
cfg_builder_maybe_start_new_block(cfg_builder *g)
390
224k
{
391
224k
    if (cfg_builder_current_block_is_terminated(g)) {
392
18.1k
        basicblock *b = cfg_builder_new_block(g);
393
18.1k
        if (b == NULL) {
394
0
            return ERROR;
395
0
        }
396
18.1k
        b->b_label = g->g_current_label;
397
18.1k
        g->g_current_label = NO_LABEL;
398
18.1k
        cfg_builder_use_next_block(g, b);
399
18.1k
    }
400
224k
    return SUCCESS;
401
224k
}
402
403
#ifndef NDEBUG
404
static bool
405
cfg_builder_check(cfg_builder *g)
406
{
407
    assert(g->g_entryblock->b_iused > 0);
408
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
409
        assert(!_PyMem_IsPtrFreed(block));
410
        if (block->b_instr != NULL) {
411
            assert(block->b_ialloc > 0);
412
            assert(block->b_iused >= 0);
413
            assert(block->b_ialloc >= block->b_iused);
414
        }
415
        else {
416
            assert (block->b_iused == 0);
417
            assert (block->b_ialloc == 0);
418
        }
419
    }
420
    return true;
421
}
422
#endif
423
424
static int
425
init_cfg_builder(cfg_builder *g)
426
5.38k
{
427
5.38k
    g->g_block_list = NULL;
428
5.38k
    basicblock *block = cfg_builder_new_block(g);
429
5.38k
    if (block == NULL) {
430
0
        return ERROR;
431
0
    }
432
5.38k
    g->g_curblock = g->g_entryblock = block;
433
5.38k
    g->g_current_label = NO_LABEL;
434
5.38k
    return SUCCESS;
435
5.38k
}
436
437
cfg_builder *
438
_PyCfgBuilder_New(void)
439
5.38k
{
440
5.38k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
441
5.38k
    if (g == NULL) {
442
0
        PyErr_NoMemory();
443
0
        return NULL;
444
0
    }
445
5.38k
    memset(g, 0, sizeof(cfg_builder));
446
5.38k
    if (init_cfg_builder(g) < 0) {
447
0
        PyMem_Free(g);
448
0
        return NULL;
449
0
    }
450
5.38k
    return g;
451
5.38k
}
452
453
void
454
_PyCfgBuilder_Free(cfg_builder *g)
455
5.38k
{
456
5.38k
    if (g == NULL) {
457
0
        return;
458
0
    }
459
5.38k
    assert(cfg_builder_check(g));
460
5.38k
    basicblock *b = g->g_block_list;
461
29.6k
    while (b != NULL) {
462
24.2k
        if (b->b_instr) {
463
24.2k
            PyMem_Free((void *)b->b_instr);
464
24.2k
        }
465
24.2k
        basicblock *next = b->b_list;
466
24.2k
        PyMem_Free((void *)b);
467
24.2k
        b = next;
468
24.2k
    }
469
5.38k
    PyMem_Free(g);
470
5.38k
}
471
472
int
473
_PyCfgBuilder_CheckSize(cfg_builder *g)
474
5.38k
{
475
5.38k
    int nblocks = 0;
476
28.8k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
477
23.4k
        nblocks++;
478
23.4k
    }
479
5.38k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
480
0
        PyErr_NoMemory();
481
0
        return ERROR;
482
0
    }
483
5.38k
    return SUCCESS;
484
5.38k
}
485
486
int
487
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
488
8.56k
{
489
8.56k
    g->g_current_label = lbl;
490
8.56k
    return cfg_builder_maybe_start_new_block(g);
491
8.56k
}
492
493
int
494
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
495
216k
{
496
216k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
497
216k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
498
216k
}
499
500
501
static basicblock *
502
next_nonempty_block(basicblock *b)
503
29.6k
{
504
30.7k
    while (b && b->b_iused == 0) {
505
1.09k
        b = b->b_next;
506
1.09k
    }
507
29.6k
    return b;
508
29.6k
}
509
510
/***** debugging helpers *****/
511
512
#ifndef NDEBUG
513
static int remove_redundant_nops(cfg_builder *g);
514
515
static bool
516
no_redundant_nops(cfg_builder *g) {
517
    if (remove_redundant_nops(g) != 0) {
518
        return false;
519
    }
520
    return true;
521
}
522
523
static bool
524
no_redundant_jumps(cfg_builder *g) {
525
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
526
        cfg_instr *last = basicblock_last_instr(b);
527
        if (last != NULL) {
528
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
529
                basicblock *next = next_nonempty_block(b->b_next);
530
                basicblock *jump_target = next_nonempty_block(last->i_target);
531
                if (jump_target == next) {
532
                    assert(next);
533
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
534
                        assert(0);
535
                        return false;
536
                    }
537
                }
538
            }
539
        }
540
    }
541
    return true;
542
}
543
#endif
544
545
/***** CFG preprocessing (jump targets and exceptions) *****/
546
547
static int
548
24.2k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
549
24.2k
    cfg_instr *last = basicblock_last_instr(b);
550
24.2k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
551
18.9k
        return SUCCESS;
552
18.9k
    }
553
24.2k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
554
555
5.26k
    bool is_forward = last->i_target->b_visited == 0;
556
5.26k
    if (is_forward) {
557
5.06k
        RETURN_IF_ERROR(
558
5.06k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
559
5.06k
        return SUCCESS;
560
5.06k
    }
561
562
197
    int reversed_opcode = 0;
563
197
    switch(last->i_opcode) {
564
0
        case POP_JUMP_IF_NOT_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NONE;
566
0
            break;
567
16
        case POP_JUMP_IF_NONE:
568
16
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
569
16
            break;
570
142
        case POP_JUMP_IF_FALSE:
571
142
            reversed_opcode = POP_JUMP_IF_TRUE;
572
142
            break;
573
39
        case POP_JUMP_IF_TRUE:
574
39
            reversed_opcode = POP_JUMP_IF_FALSE;
575
39
            break;
576
197
    }
577
    /* transform 'conditional jump T' to
578
     * 'reversed_jump b_next' followed by 'jump_backwards T'
579
     */
580
581
197
    basicblock *target = last->i_target;
582
197
    basicblock *backwards_jump = cfg_builder_new_block(g);
583
197
    if (backwards_jump == NULL) {
584
0
        return ERROR;
585
0
    }
586
197
    RETURN_IF_ERROR(
587
197
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
588
197
    RETURN_IF_ERROR(
589
197
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
590
197
    backwards_jump->b_startdepth = target->b_startdepth;
591
197
    last->i_opcode = reversed_opcode;
592
197
    last->i_target = b->b_next;
593
594
197
    backwards_jump->b_cold = b->b_cold;
595
197
    backwards_jump->b_next = b->b_next;
596
197
    b->b_next = backwards_jump;
597
197
    return SUCCESS;
598
197
}
599
600
601
static int
602
normalize_jumps(cfg_builder *g)
603
5.38k
{
604
5.38k
    basicblock *entryblock = g->g_entryblock;
605
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
24.0k
        b->b_visited = 0;
607
24.0k
    }
608
29.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
609
24.2k
        b->b_visited = 1;
610
24.2k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
611
24.2k
    }
612
5.38k
    return SUCCESS;
613
5.38k
}
614
615
static int
616
5.38k
check_cfg(cfg_builder *g) {
617
28.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
618
        /* Raise SystemError if jump or exit is not last instruction in the block. */
619
239k
        for (int i = 0; i < b->b_iused; i++) {
620
216k
            int opcode = b->b_instr[i].i_opcode;
621
216k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
622
216k
            if (IS_TERMINATOR_OPCODE(opcode)) {
623
20.9k
                if (i != b->b_iused - 1) {
624
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
625
0
                    return ERROR;
626
0
                }
627
20.9k
            }
628
216k
        }
629
23.4k
    }
630
5.38k
    return SUCCESS;
631
5.38k
}
632
633
static int
634
get_max_label(basicblock *entryblock)
635
20.3k
{
636
20.3k
    int lbl = -1;
637
114k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
638
93.6k
        if (b->b_label.id > lbl) {
639
31.6k
            lbl = b->b_label.id;
640
31.6k
        }
641
93.6k
    }
642
20.3k
    return lbl;
643
20.3k
}
644
645
/* Calculate the actual jump target from the target_label */
646
static int
647
translate_jump_labels_to_targets(basicblock *entryblock)
648
5.38k
{
649
5.38k
    int max_label = get_max_label(entryblock);
650
5.38k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
651
5.38k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
652
5.38k
    if (!label2block) {
653
0
        PyErr_NoMemory();
654
0
        return ERROR;
655
0
    }
656
5.38k
    memset(label2block, 0, mapsize);
657
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
658
23.4k
        if (b->b_label.id >= 0) {
659
8.56k
            label2block[b->b_label.id] = b;
660
8.56k
        }
661
23.4k
    }
662
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
663
239k
        for (int i = 0; i < b->b_iused; i++) {
664
216k
            cfg_instr *instr = &b->b_instr[i];
665
216k
            assert(instr->i_target == NULL);
666
216k
            if (HAS_TARGET(instr->i_opcode)) {
667
10.5k
                int lbl = instr->i_oparg;
668
10.5k
                assert(lbl >= 0 && lbl <= max_label);
669
10.5k
                instr->i_target = label2block[lbl];
670
10.5k
                assert(instr->i_target != NULL);
671
10.5k
                assert(instr->i_target->b_label.id == lbl);
672
10.5k
            }
673
216k
        }
674
23.4k
    }
675
5.38k
    PyMem_Free(label2block);
676
5.38k
    return SUCCESS;
677
5.38k
}
678
679
static int
680
5.38k
mark_except_handlers(basicblock *entryblock) {
681
#ifndef NDEBUG
682
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
683
        assert(!b->b_except_handler);
684
    }
685
#endif
686
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
687
239k
        for (int i=0; i < b->b_iused; i++) {
688
216k
            cfg_instr *instr = &b->b_instr[i];
689
216k
            if (is_block_push(instr)) {
690
1.67k
                instr->i_target->b_except_handler = 1;
691
1.67k
            }
692
216k
        }
693
23.4k
    }
694
5.38k
    return SUCCESS;
695
5.38k
}
696
697
698
struct _PyCfgExceptStack {
699
    basicblock *handlers[CO_MAXBLOCKS+2];
700
    int depth;
701
};
702
703
704
static basicblock *
705
1.66k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
706
1.66k
    assert(is_block_push(setup));
707
1.66k
    int opcode = setup->i_opcode;
708
1.66k
    basicblock * target = setup->i_target;
709
1.66k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
710
1.02k
        target->b_preserve_lasti = 1;
711
1.02k
    }
712
1.66k
    assert(stack->depth <= CO_MAXBLOCKS);
713
1.66k
    stack->handlers[++stack->depth] = target;
714
1.66k
    return target;
715
1.66k
}
716
717
static basicblock *
718
1.37k
pop_except_block(struct _PyCfgExceptStack *stack) {
719
1.37k
    assert(stack->depth > 0);
720
1.37k
    return stack->handlers[--stack->depth];
721
1.37k
}
722
723
static basicblock *
724
19.5k
except_stack_top(struct _PyCfgExceptStack *stack) {
725
19.5k
    return stack->handlers[stack->depth];
726
19.5k
}
727
728
static struct _PyCfgExceptStack *
729
5.38k
make_except_stack(void) {
730
5.38k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
731
5.38k
    if (new == NULL) {
732
0
        PyErr_NoMemory();
733
0
        return NULL;
734
0
    }
735
5.38k
    new->depth = 0;
736
5.38k
    new->handlers[0] = NULL;
737
5.38k
    return new;
738
5.38k
}
739
740
static struct _PyCfgExceptStack *
741
6.65k
copy_except_stack(struct _PyCfgExceptStack *stack) {
742
6.65k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
743
6.65k
    if (copy == NULL) {
744
0
        PyErr_NoMemory();
745
0
        return NULL;
746
0
    }
747
6.65k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
748
6.65k
    return copy;
749
6.65k
}
750
751
static basicblock**
752
38.8k
make_cfg_traversal_stack(basicblock *entryblock) {
753
38.8k
    int nblocks = 0;
754
224k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
755
185k
        b->b_visited = 0;
756
185k
        nblocks++;
757
185k
    }
758
38.8k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
759
38.8k
    if (!stack) {
760
0
        PyErr_NoMemory();
761
0
    }
762
38.8k
    return stack;
763
38.8k
}
764
765
/* Compute the stack effects of opcode with argument oparg.
766
767
   Some opcodes have different stack effect when jump to the target and
768
   when not jump. The 'jump' parameter specifies the case:
769
770
   * 0 -- when not jump
771
   * 1 -- when jump
772
   * -1 -- maximal
773
 */
774
typedef struct {
775
    /* The stack effect of the instruction. */
776
    int net;
777
} stack_effects;
778
779
Py_LOCAL(int)
780
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
781
201k
{
782
201k
    if (opcode < 0) {
783
0
        return -1;
784
0
    }
785
201k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
786
        // Specialized instructions are not supported.
787
0
        return -1;
788
0
    }
789
201k
    int popped = _PyOpcode_num_popped(opcode, oparg);
790
201k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
791
201k
    if (popped < 0 || pushed < 0) {
792
0
        return -1;
793
0
    }
794
201k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
795
1.66k
        effects->net = 0;
796
1.66k
        return 0;
797
1.66k
    }
798
199k
    effects->net = pushed - popped;
799
199k
    return 0;
800
201k
}
801
802
Py_LOCAL_INLINE(int)
803
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
804
23.4k
{
805
23.4k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
806
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
807
0
        return ERROR;
808
0
    }
809
23.4k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
810
19.7k
        assert(b->b_startdepth < 0);
811
19.7k
        b->b_startdepth = depth;
812
19.7k
        *(*sp)++ = b;
813
19.7k
    }
814
23.4k
    return SUCCESS;
815
23.4k
}
816
817
/* Find the flow path that needs the largest stack.  We assume that
818
 * cycles in the flow graph have no net effect on the stack depth.
819
 */
820
static int
821
calculate_stackdepth(cfg_builder *g)
822
5.38k
{
823
5.38k
    basicblock *entryblock = g->g_entryblock;
824
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
825
24.0k
        b->b_startdepth = INT_MIN;
826
24.0k
    }
827
5.38k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
828
5.38k
    if (!stack) {
829
0
        return ERROR;
830
0
    }
831
832
833
5.38k
    int stackdepth = -1;
834
5.38k
    int maxdepth = 0;
835
5.38k
    basicblock **sp = stack;
836
5.38k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
837
0
        goto error;
838
0
    }
839
25.0k
    while (sp != stack) {
840
19.7k
        basicblock *b = *--sp;
841
19.7k
        int depth = b->b_startdepth;
842
19.7k
        assert(depth >= 0);
843
19.7k
        basicblock *next = b->b_next;
844
201k
        for (int i = 0; i < b->b_iused; i++) {
845
192k
            cfg_instr *instr = &b->b_instr[i];
846
192k
            stack_effects effects;
847
192k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
848
0
                PyErr_Format(PyExc_SystemError,
849
0
                             "Invalid stack effect for opcode=%d, arg=%i",
850
0
                             instr->i_opcode, instr->i_oparg);
851
0
                goto error;
852
0
            }
853
192k
            int new_depth = depth + effects.net;
854
192k
            if (new_depth < 0) {
855
0
                PyErr_Format(PyExc_ValueError,
856
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
857
0
                goto error;
858
0
            }
859
192k
            maxdepth = Py_MAX(maxdepth, depth);
860
192k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
861
9.19k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
862
0
                    PyErr_Format(PyExc_SystemError,
863
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
864
0
                                 instr->i_opcode, instr->i_oparg);
865
0
                    goto error;
866
0
                }
867
9.19k
                int target_depth = depth + effects.net;
868
9.19k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
869
9.19k
                maxdepth = Py_MAX(maxdepth, depth);
870
9.19k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
871
0
                    goto error;
872
0
                }
873
9.19k
            }
874
192k
            depth = new_depth;
875
192k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
876
192k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
877
190k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
878
10.8k
            {
879
                /* remaining code is dead */
880
10.8k
                next = NULL;
881
10.8k
                break;
882
10.8k
            }
883
192k
        }
884
19.7k
        if (next != NULL) {
885
8.85k
            assert(BB_HAS_FALLTHROUGH(b));
886
8.85k
            if (stackdepth_push(&sp, next, depth) < 0) {
887
0
                goto error;
888
0
            }
889
8.85k
        }
890
19.7k
    }
891
5.38k
    stackdepth = maxdepth;
892
5.38k
error:
893
5.38k
    PyMem_Free(stack);
894
5.38k
    return stackdepth;
895
5.38k
}
896
897
static int
898
5.38k
label_exception_targets(basicblock *entryblock) {
899
5.38k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
900
5.38k
    if (todo_stack == NULL) {
901
0
        return ERROR;
902
0
    }
903
5.38k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
904
5.38k
    if (except_stack == NULL) {
905
0
        PyMem_Free(todo_stack);
906
0
        PyErr_NoMemory();
907
0
        return ERROR;
908
0
    }
909
5.38k
    except_stack->depth = 0;
910
5.38k
    todo_stack[0] = entryblock;
911
5.38k
    entryblock->b_visited = 1;
912
5.38k
    entryblock->b_exceptstack = except_stack;
913
5.38k
    basicblock **todo = &todo_stack[1];
914
5.38k
    basicblock *handler = NULL;
915
24.8k
    while (todo > todo_stack) {
916
19.5k
        todo--;
917
19.5k
        basicblock *b = todo[0];
918
19.5k
        assert(b->b_visited == 1);
919
19.5k
        except_stack = b->b_exceptstack;
920
19.5k
        assert(except_stack != NULL);
921
19.5k
        b->b_exceptstack = NULL;
922
19.5k
        handler = except_stack_top(except_stack);
923
19.5k
        int last_yield_except_depth = -1;
924
227k
        for (int i = 0; i < b->b_iused; i++) {
925
207k
            cfg_instr *instr = &b->b_instr[i];
926
207k
            if (is_block_push(instr)) {
927
1.66k
                if (!instr->i_target->b_visited) {
928
1.66k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
929
1.66k
                    if (copy == NULL) {
930
0
                        goto error;
931
0
                    }
932
1.66k
                    instr->i_target->b_exceptstack = copy;
933
1.66k
                    todo[0] = instr->i_target;
934
1.66k
                    instr->i_target->b_visited = 1;
935
1.66k
                    todo++;
936
1.66k
                }
937
1.66k
                handler = push_except_block(except_stack, instr);
938
1.66k
            }
939
206k
            else if (instr->i_opcode == POP_BLOCK) {
940
1.37k
                handler = pop_except_block(except_stack);
941
1.37k
                INSTR_SET_OP0(instr, NOP);
942
1.37k
            }
943
204k
            else if (is_jump(instr)) {
944
8.35k
                instr->i_except = handler;
945
8.35k
                assert(i == b->b_iused -1);
946
8.35k
                if (!instr->i_target->b_visited) {
947
6.05k
                    if (BB_HAS_FALLTHROUGH(b)) {
948
4.99k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
949
4.99k
                        if (copy == NULL) {
950
0
                            goto error;
951
0
                        }
952
4.99k
                        instr->i_target->b_exceptstack = copy;
953
4.99k
                    }
954
1.06k
                    else {
955
1.06k
                        instr->i_target->b_exceptstack = except_stack;
956
1.06k
                        except_stack = NULL;
957
1.06k
                    }
958
6.05k
                    todo[0] = instr->i_target;
959
6.05k
                    instr->i_target->b_visited = 1;
960
6.05k
                    todo++;
961
6.05k
                }
962
8.35k
            }
963
196k
            else if (instr->i_opcode == YIELD_VALUE) {
964
308
                instr->i_except = handler;
965
308
                last_yield_except_depth = except_stack->depth;
966
308
            }
967
195k
            else if (instr->i_opcode == RESUME) {
968
5.74k
                instr->i_except = handler;
969
5.74k
                if (instr->i_oparg != RESUME_AT_FUNC_START && instr->i_oparg != RESUME_AT_GEN_EXPR_START) {
970
308
                    assert(last_yield_except_depth >= 0);
971
308
                    if (last_yield_except_depth == 1) {
972
137
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
973
137
                    }
974
308
                    last_yield_except_depth = -1;
975
308
                }
976
5.74k
            }
977
190k
            else if (instr->i_opcode == RETURN_GENERATOR) {
978
252
                instr->i_except = NULL;
979
252
            }
980
189k
            else {
981
189k
                instr->i_except = handler;
982
189k
            }
983
207k
        }
984
19.5k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
985
6.39k
            assert(except_stack != NULL);
986
6.39k
            b->b_next->b_exceptstack = except_stack;
987
6.39k
            todo[0] = b->b_next;
988
6.39k
            b->b_next->b_visited = 1;
989
6.39k
            todo++;
990
6.39k
        }
991
13.1k
        else if (except_stack != NULL) {
992
12.0k
           PyMem_Free(except_stack);
993
12.0k
        }
994
19.5k
    }
995
#ifdef Py_DEBUG
996
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
997
        assert(b->b_exceptstack == NULL);
998
    }
999
#endif
1000
5.38k
    PyMem_Free(todo_stack);
1001
5.38k
    return SUCCESS;
1002
0
error:
1003
0
    PyMem_Free(todo_stack);
1004
0
    PyMem_Free(except_stack);
1005
0
    return ERROR;
1006
5.38k
}
1007
1008
/***** CFG optimizations *****/
1009
1010
static int
1011
10.7k
remove_unreachable(basicblock *entryblock) {
1012
58.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1013
47.3k
        b->b_predecessors = 0;
1014
47.3k
    }
1015
10.7k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1016
10.7k
    if (stack == NULL) {
1017
0
        return ERROR;
1018
0
    }
1019
10.7k
    basicblock **sp = stack;
1020
10.7k
    entryblock->b_predecessors = 1;
1021
10.7k
    *sp++ = entryblock;
1022
10.7k
    entryblock->b_visited = 1;
1023
49.4k
    while (sp > stack) {
1024
38.6k
        basicblock *b = *(--sp);
1025
38.6k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1026
16.9k
            if (!b->b_next->b_visited) {
1027
14.0k
                assert(b->b_next->b_predecessors == 0);
1028
14.0k
                *sp++ = b->b_next;
1029
14.0k
                b->b_next->b_visited = 1;
1030
14.0k
            }
1031
16.9k
            b->b_next->b_predecessors++;
1032
16.9k
        }
1033
442k
        for (int i = 0; i < b->b_iused; i++) {
1034
403k
            basicblock *target;
1035
403k
            cfg_instr *instr = &b->b_instr[i];
1036
403k
            if (is_jump(instr) || is_block_push(instr)) {
1037
18.6k
                target = instr->i_target;
1038
18.6k
                if (!target->b_visited) {
1039
13.8k
                    *sp++ = target;
1040
13.8k
                    target->b_visited = 1;
1041
13.8k
                }
1042
18.6k
                target->b_predecessors++;
1043
18.6k
            }
1044
403k
        }
1045
38.6k
    }
1046
10.7k
    PyMem_Free(stack);
1047
1048
    /* Delete unreachable instructions */
1049
58.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1050
47.3k
       if (b->b_predecessors == 0) {
1051
8.73k
            b->b_iused = 0;
1052
8.73k
            b->b_except_handler = 0;
1053
8.73k
       }
1054
47.3k
    }
1055
10.7k
    return SUCCESS;
1056
10.7k
}
1057
1058
static int
1059
123k
basicblock_remove_redundant_nops(basicblock *bb) {
1060
    /* Remove NOPs when legal to do so. */
1061
123k
    int dest = 0;
1062
123k
    int prev_lineno = -1;
1063
1.06M
    for (int src = 0; src < bb->b_iused; src++) {
1064
945k
        int lineno = bb->b_instr[src].i_loc.lineno;
1065
945k
        if (bb->b_instr[src].i_opcode == NOP) {
1066
            /* Eliminate no-op if it doesn't have a line number */
1067
20.8k
            if (lineno < 0) {
1068
4.37k
                continue;
1069
4.37k
            }
1070
            /* or, if the previous instruction had the same line number. */
1071
16.4k
            if (prev_lineno == lineno) {
1072
13.2k
                continue;
1073
13.2k
            }
1074
            /* or, if the next instruction has same line number or no line number */
1075
3.26k
            if (src < bb->b_iused - 1) {
1076
3.00k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1077
3.00k
                if (next_lineno == lineno) {
1078
1.98k
                    continue;
1079
1.98k
                }
1080
1.01k
                if (next_lineno < 0) {
1081
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1082
0
                    continue;
1083
0
                }
1084
1.01k
            }
1085
267
            else {
1086
267
                basicblock *next = next_nonempty_block(bb->b_next);
1087
                /* or if last instruction in BB and next BB has same line number */
1088
267
                if (next) {
1089
267
                    location next_loc = NO_LOCATION;
1090
267
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1091
267
                        cfg_instr *instr = &next->b_instr[next_i];
1092
267
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1093
                            /* Skip over NOPs without a location, they will be removed */
1094
0
                            continue;
1095
0
                        }
1096
267
                        next_loc = instr->i_loc;
1097
267
                        break;
1098
267
                    }
1099
267
                    if (lineno == next_loc.lineno) {
1100
1
                        continue;
1101
1
                    }
1102
267
                }
1103
267
            }
1104
1105
3.26k
        }
1106
926k
        if (dest != src) {
1107
76.1k
            bb->b_instr[dest] = bb->b_instr[src];
1108
76.1k
        }
1109
926k
        dest++;
1110
926k
        prev_lineno = lineno;
1111
926k
    }
1112
123k
    assert(dest <= bb->b_iused);
1113
123k
    int num_removed = bb->b_iused - dest;
1114
123k
    bb->b_iused = dest;
1115
123k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1116
123k
    return num_removed;
1117
123k
}
1118
1119
static int
1120
17.6k
remove_redundant_nops(cfg_builder *g) {
1121
17.6k
    int changes = 0;
1122
117k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1123
100k
        int change = basicblock_remove_redundant_nops(b);
1124
100k
        RETURN_IF_ERROR(change);
1125
100k
        changes += change;
1126
100k
    }
1127
17.6k
    return changes;
1128
17.6k
}
1129
1130
static int loads_const(int opcode);
1131
1132
static int
1133
remove_redundant_nops_and_pairs(basicblock *entryblock)
1134
5.38k
{
1135
5.38k
    bool done = false;
1136
1137
10.7k
    while (! done) {
1138
5.38k
        done = true;
1139
5.38k
        cfg_instr *prev_instr = NULL;
1140
5.38k
        cfg_instr *instr = NULL;
1141
29.2k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1142
23.8k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1143
23.8k
            if (IS_LABEL(b->b_label)) {
1144
                /* this block is a jump target, forget instr */
1145
8.94k
                instr = NULL;
1146
8.94k
            }
1147
218k
            for (int i = 0; i < b->b_iused; i++) {
1148
195k
                prev_instr = instr;
1149
195k
                instr = &b->b_instr[i];
1150
195k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1151
195k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1152
195k
                int opcode = instr->i_opcode;
1153
195k
                bool is_redundant_pair = false;
1154
195k
                if (opcode == POP_TOP) {
1155
6.07k
                   if (loads_const(prev_opcode)) {
1156
0
                       is_redundant_pair = true;
1157
0
                   }
1158
6.07k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1159
0
                       is_redundant_pair = true;
1160
0
                   }
1161
6.07k
                }
1162
195k
                if (is_redundant_pair) {
1163
0
                    INSTR_SET_OP0(prev_instr, NOP);
1164
0
                    INSTR_SET_OP0(instr, NOP);
1165
0
                    done = false;
1166
0
                }
1167
195k
            }
1168
23.8k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1169
16.9k
                instr = NULL;
1170
16.9k
            }
1171
23.8k
        }
1172
5.38k
    }
1173
5.38k
    return SUCCESS;
1174
5.38k
}
1175
1176
static int
1177
12.2k
remove_redundant_jumps(cfg_builder *g) {
1178
    /* If a non-empty block ends with a jump instruction, check if the next
1179
     * non-empty block reached through normal flow control is the target
1180
     * of that jump. If it is, then the jump instruction is redundant and
1181
     * can be deleted.
1182
     *
1183
     * Return the number of changes applied, or -1 on error.
1184
     */
1185
1186
12.2k
    int changes = 0;
1187
88.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1188
76.1k
        cfg_instr *last = basicblock_last_instr(b);
1189
76.1k
        if (last == NULL) {
1190
12.2k
            continue;
1191
12.2k
        }
1192
76.1k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1193
63.8k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1194
7.02k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1195
7.02k
            if (jump_target == NULL) {
1196
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1197
0
                return ERROR;
1198
0
            }
1199
7.02k
            basicblock *next = next_nonempty_block(b->b_next);
1200
7.02k
            if (jump_target == next) {
1201
247
                changes++;
1202
247
                INSTR_SET_OP0(last, NOP);
1203
247
            }
1204
7.02k
        }
1205
63.8k
    }
1206
1207
12.2k
    return changes;
1208
12.2k
}
1209
1210
static inline bool
1211
23.8k
basicblock_has_no_lineno(basicblock *b) {
1212
29.8k
    for (int i = 0; i < b->b_iused; i++) {
1213
27.2k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1214
21.2k
            return false;
1215
21.2k
        }
1216
27.2k
    }
1217
2.60k
    return true;
1218
23.8k
}
1219
1220
/* Maximum size of basic block that should be copied in optimizer */
1221
1.20k
#define MAX_COPY_SIZE 4
1222
1223
/* If this block ends with an unconditional jump to a small exit block or
1224
 * a block that has no line numbers (and no fallthrough), then
1225
 * remove the jump and extend this block with the target.
1226
 * Returns 1 if extended, 0 if no change, and -1 on error.
1227
 */
1228
static int
1229
32.9k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1230
32.9k
    cfg_instr *last = basicblock_last_instr(bb);
1231
32.9k
    if (last == NULL) {
1232
0
        return 0;
1233
0
    }
1234
32.9k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1235
28.6k
        return 0;
1236
28.6k
    }
1237
4.31k
    basicblock *target = last->i_target;
1238
4.31k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1239
1.20k
                             target->b_iused <= MAX_COPY_SIZE);
1240
4.31k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1241
1.03k
                                     !BB_HAS_FALLTHROUGH(target));
1242
4.31k
    if (small_exit_block || no_lineno_no_fallthrough) {
1243
1.14k
        assert(is_jump(last));
1244
1.14k
        int removed_jump_opcode = last->i_opcode;
1245
1.14k
        INSTR_SET_OP0(last, NOP);
1246
1.14k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1247
1.14k
        if (no_lineno_no_fallthrough) {
1248
1.02k
            last = basicblock_last_instr(bb);
1249
1.02k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1250
308
                removed_jump_opcode == JUMP)
1251
26
            {
1252
                /* Make sure we don't lose eval breaker checks */
1253
26
                last->i_opcode = JUMP;
1254
26
            }
1255
1.02k
        }
1256
1.14k
        target->b_predecessors--;
1257
1.14k
        return 1;
1258
1.14k
    }
1259
3.16k
    return 0;
1260
4.31k
}
1261
1262
static int
1263
5.38k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1264
5.38k
    bool changes;
1265
5.91k
    do {
1266
5.91k
        changes = false;
1267
38.8k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1268
32.9k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1269
32.9k
            RETURN_IF_ERROR(res);
1270
32.9k
            if (res) {
1271
1.14k
                changes = true;
1272
1.14k
            }
1273
32.9k
        }
1274
5.91k
    } while(changes); /* every change removes a jump, ensuring convergence */
1275
5.38k
    return changes;
1276
5.38k
}
1277
1278
// Attempt to eliminate jumps to jumps by updating inst to jump to
1279
// target->i_target using the provided opcode. Return whether or not the
1280
// optimization was successful.
1281
static bool
1282
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1283
188
{
1284
188
    assert(is_jump(inst));
1285
188
    assert(is_jump(target));
1286
188
    assert(inst == basicblock_last_instr(bb));
1287
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1288
    // changes (and we fall into an infinite loop):
1289
188
    if (inst->i_target != target->i_target) {
1290
        /* Change inst to NOP and append a jump to target->i_target. The
1291
         * NOP will be removed later if it's not needed for the lineno.
1292
         */
1293
188
        INSTR_SET_OP0(inst, NOP);
1294
1295
188
        RETURN_IF_ERROR(
1296
188
            basicblock_add_jump(
1297
188
                bb, opcode, target->i_target, target->i_loc));
1298
1299
188
        return true;
1300
188
    }
1301
0
    return false;
1302
188
}
1303
1304
static int
1305
loads_const(int opcode)
1306
225k
{
1307
225k
    return OPCODE_HAS_CONST(opcode)
1308
195k
        || opcode == LOAD_SMALL_INT
1309
177k
        || opcode == LOAD_COMMON_CONSTANT;
1310
225k
}
1311
1312
/* Returns new reference */
1313
static PyObject*
1314
get_const_value(int opcode, int oparg, PyObject *co_consts)
1315
73.0k
{
1316
73.0k
    PyObject *constant = NULL;
1317
73.0k
    assert(loads_const(opcode));
1318
73.0k
    if (opcode == LOAD_CONST) {
1319
70.4k
        assert(PyList_Check(co_consts));
1320
70.4k
        Py_ssize_t n = PyList_GET_SIZE(co_consts);
1321
70.4k
        if (oparg < 0 || oparg >= n) {
1322
0
            PyErr_Format(PyExc_ValueError,
1323
0
                         "LOAD_CONST index %d is out of range for consts (len=%zd)",
1324
0
                         oparg, n);
1325
0
            return NULL;
1326
0
        }
1327
70.4k
        constant = PyList_GET_ITEM(co_consts, oparg);
1328
70.4k
    }
1329
73.0k
    if (opcode == LOAD_SMALL_INT) {
1330
1.93k
        return PyLong_FromLong(oparg);
1331
1.93k
    }
1332
71.0k
    if (opcode == LOAD_COMMON_CONSTANT) {
1333
603
        assert(oparg < NUM_COMMON_CONSTANTS);
1334
603
        return PyStackRef_AsPyObjectBorrow(
1335
603
            _PyInterpreterState_GET()->common_consts[oparg]);
1336
603
    }
1337
1338
70.4k
    if (constant == NULL) {
1339
0
        PyErr_SetString(PyExc_SystemError,
1340
0
                        "Internal error: failed to get value of a constant");
1341
0
        return NULL;
1342
0
    }
1343
70.4k
    return Py_NewRef(constant);
1344
70.4k
}
1345
1346
// Steals a reference to newconst.
1347
static int
1348
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1349
          _Py_hashtable_t *consts_index)
1350
2.25k
{
1351
2.25k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1352
0
        Py_DECREF(newconst);
1353
0
        return -1;
1354
0
    }
1355
1356
2.25k
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1357
2.25k
    if (entry != NULL) {
1358
1.53k
        Py_DECREF(newconst);
1359
1.53k
        return (int)(uintptr_t)entry->value;
1360
1.53k
    }
1361
1362
724
    Py_ssize_t index = PyList_GET_SIZE(consts);
1363
724
    if ((size_t)index >= (size_t)INT_MAX - 1) {
1364
0
        PyErr_SetString(PyExc_OverflowError, "too many constants");
1365
0
        Py_DECREF(newconst);
1366
0
        return -1;
1367
0
    }
1368
724
    if (PyList_Append(consts, newconst)) {
1369
0
        Py_DECREF(newconst);
1370
0
        return -1;
1371
0
    }
1372
1373
724
    if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1374
0
        PyList_SetSlice(consts, index, index + 1, NULL);
1375
0
        Py_DECREF(newconst);
1376
0
        PyErr_NoMemory();
1377
0
        return -1;
1378
0
    }
1379
1380
724
    Py_DECREF(newconst);
1381
724
    return (int)index;
1382
724
}
1383
1384
/*
1385
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1386
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1387
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1388
1389
  Return boolean indicating whether "size" such instructions were found.
1390
*/
1391
static bool
1392
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1393
6.00k
{
1394
6.00k
    assert(start < bb->b_iused);
1395
6.00k
    assert(size >= 0);
1396
6.00k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1397
1398
10.8k
    for (; start >= 0 && size > 0; start--) {
1399
8.07k
        cfg_instr *instr = &bb->b_instr[start];
1400
8.07k
        if (instr->i_opcode == NOP) {
1401
50
            continue;
1402
50
        }
1403
8.02k
        if (!loads_const(instr->i_opcode)) {
1404
3.23k
            return false;
1405
3.23k
        }
1406
4.78k
        instrs[--size] = instr;
1407
4.78k
    }
1408
1409
2.76k
    return size == 0;
1410
6.00k
}
1411
1412
/*
1413
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1414
  Caller must make sure "instrs" has at least "size" elements.
1415
*/
1416
static void
1417
nop_out(cfg_instr **instrs, int size)
1418
2.76k
{
1419
6.68k
    for (int i = 0; i < size; i++) {
1420
3.92k
        cfg_instr *instr = instrs[i];
1421
3.92k
        assert(instr->i_opcode != NOP);
1422
3.92k
        INSTR_SET_OP0(instr, NOP);
1423
3.92k
        INSTR_SET_LOC(instr, NO_LOCATION);
1424
3.92k
    }
1425
2.76k
}
1426
1427
/* Does not steal reference to "newconst".
1428
   Return 1 if changed instruction to LOAD_SMALL_INT.
1429
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1430
   Return -1 on error.
1431
*/
1432
static int
1433
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1434
                               PyObject *consts, PyObject *const_cache)
1435
44.8k
{
1436
44.8k
    if (PyLong_CheckExact(newconst)) {
1437
20.5k
        int overflow;
1438
20.5k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1439
20.5k
        if (val == -1 && PyErr_Occurred()) {
1440
0
            return -1;
1441
0
        }
1442
20.5k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1443
15.3k
            assert(_Py_IsImmortal(newconst));
1444
15.3k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1445
15.3k
            return 1;
1446
15.3k
        }
1447
20.5k
    }
1448
29.4k
    return 0;
1449
44.8k
}
1450
1451
/* Does not steal reference to "newconst".
1452
   Return 1 if changed instruction to LOAD_COMMON_CONSTANT.
1453
   Return 0 if could not change instruction to LOAD_COMMON_CONSTANT.
1454
   Return -1 on error.
1455
*/
1456
static int
1457
maybe_instr_make_load_common_const(cfg_instr *instr, PyObject *newconst)
1458
28.7k
{
1459
28.7k
    int oparg;
1460
28.7k
    if (newconst == Py_None) {
1461
6.10k
        oparg = CONSTANT_NONE;
1462
6.10k
    }
1463
22.6k
    else if (newconst == Py_True) {
1464
486
        oparg = CONSTANT_TRUE;
1465
486
    }
1466
22.1k
    else if (newconst == Py_False) {
1467
661
        oparg = CONSTANT_FALSE;
1468
661
    }
1469
21.5k
    else if (PyUnicode_CheckExact(newconst)
1470
8.99k
             && PyUnicode_GET_LENGTH(newconst) == 0) {
1471
130
        oparg = CONSTANT_EMPTY_STR;
1472
130
    }
1473
21.3k
    else if (PyTuple_CheckExact(newconst)
1474
2.58k
             && PyTuple_GET_SIZE(newconst) == 0) {
1475
694
        oparg = CONSTANT_EMPTY_TUPLE;
1476
694
    }
1477
20.6k
    else if (PyLong_CheckExact(newconst)) {
1478
5.22k
        int overflow;
1479
5.22k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1480
5.22k
        if (val == -1 && PyErr_Occurred()) {
1481
0
            return -1;
1482
0
        }
1483
5.22k
        if (overflow || val != -1) {
1484
4.73k
            return 0;
1485
4.73k
        }
1486
486
        oparg = CONSTANT_MINUS_ONE;
1487
486
    }
1488
15.4k
    else {
1489
15.4k
        return 0;
1490
15.4k
    }
1491
28.7k
    assert(_Py_IsImmortal(newconst));
1492
8.56k
    INSTR_SET_OP1(instr, LOAD_COMMON_CONSTANT, oparg);
1493
8.56k
    return 1;
1494
28.7k
}
1495
1496
/* Steals reference to "newconst" */
1497
static int
1498
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1499
                      PyObject *consts, PyObject *const_cache,
1500
                      _Py_hashtable_t *consts_index)
1501
2.61k
{
1502
2.61k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1503
2.61k
    if (res < 0) {
1504
0
        Py_DECREF(newconst);
1505
0
        return ERROR;
1506
0
    }
1507
2.61k
    if (res > 0) {
1508
0
        return SUCCESS;
1509
0
    }
1510
2.61k
    res = maybe_instr_make_load_common_const(instr, newconst);
1511
2.61k
    if (res < 0) {
1512
0
        Py_DECREF(newconst);
1513
0
        return ERROR;
1514
0
    }
1515
2.61k
    if (res > 0) {
1516
532
        return SUCCESS;
1517
532
    }
1518
2.08k
    int oparg = add_const(newconst, consts, const_cache, consts_index);
1519
2.08k
    RETURN_IF_ERROR(oparg);
1520
2.08k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1521
2.08k
    return SUCCESS;
1522
2.08k
}
1523
1524
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1525
   with    LOAD_CONST (c1, c2, ... cn).
1526
   The consts table must still be in list form so that the
1527
   new constant (c1, c2, ... cn) can be appended.
1528
   Called with codestr pointing to the first LOAD_CONST.
1529
*/
1530
static int
1531
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1532
                        PyObject *const_cache, _Py_hashtable_t *consts_index)
1533
2.39k
{
1534
    /* Pre-conditions */
1535
2.39k
    assert(PyDict_CheckExact(const_cache));
1536
2.39k
    assert(PyList_CheckExact(consts));
1537
1538
2.39k
    cfg_instr *instr = &bb->b_instr[i];
1539
2.39k
    assert(instr->i_opcode == BUILD_TUPLE);
1540
1541
2.39k
    int seq_size = instr->i_oparg;
1542
2.39k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1543
0
        return SUCCESS;
1544
0
    }
1545
1546
2.39k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1547
2.39k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1548
        /* not a const sequence */
1549
1.62k
        return SUCCESS;
1550
1.62k
    }
1551
1552
765
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1553
765
    if (const_tuple == NULL) {
1554
0
        return ERROR;
1555
0
    }
1556
1557
1.85k
    for (int i = 0; i < seq_size; i++) {
1558
1.08k
        cfg_instr *inst = const_instrs[i];
1559
1.08k
        assert(loads_const(inst->i_opcode));
1560
1.08k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1561
1.08k
        if (element == NULL) {
1562
0
            Py_DECREF(const_tuple);
1563
0
            return ERROR;
1564
0
        }
1565
1.08k
        PyTuple_SET_ITEM(const_tuple, i, element);
1566
1.08k
    }
1567
1568
765
    nop_out(const_instrs, seq_size);
1569
765
    return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
1570
765
}
1571
1572
/* Replace:
1573
    BUILD_LIST 0
1574
    LOAD_CONST c1
1575
    LIST_APPEND 1
1576
    LOAD_CONST c2
1577
    LIST_APPEND 1
1578
    ...
1579
    LOAD_CONST cN
1580
    LIST_APPEND 1
1581
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1582
   with:
1583
    LOAD_CONST (c1, c2, ... cN)
1584
*/
1585
static int
1586
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1587
                                      PyObject *consts, PyObject *const_cache,
1588
                                      _Py_hashtable_t *consts_index)
1589
64
{
1590
64
    assert(PyDict_CheckExact(const_cache));
1591
64
    assert(PyList_CheckExact(consts));
1592
64
    assert(i >= 0);
1593
64
    assert(i < bb->b_iused);
1594
1595
64
    cfg_instr *intrinsic = &bb->b_instr[i];
1596
64
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1597
64
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1598
1599
64
    int consts_found = 0;
1600
64
    bool expect_append = true;
1601
1602
5.89k
    for (int pos = i - 1; pos >= 0; pos--) {
1603
5.89k
        cfg_instr *instr = &bb->b_instr[pos];
1604
5.89k
        int opcode = instr->i_opcode;
1605
5.89k
        int oparg = instr->i_oparg;
1606
1607
5.89k
        if (opcode == NOP) {
1608
1.72k
            continue;
1609
1.72k
        }
1610
1611
4.17k
        if (opcode == BUILD_LIST && oparg == 0) {
1612
0
            if (!expect_append) {
1613
                /* Not a sequence start. */
1614
0
                return SUCCESS;
1615
0
            }
1616
1617
            /* Sequence start, we are done. */
1618
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1619
0
            if (newconst == NULL) {
1620
0
                return ERROR;
1621
0
            }
1622
1623
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1624
0
                instr = &bb->b_instr[newpos];
1625
0
                if (instr->i_opcode == NOP) {
1626
0
                    continue;
1627
0
                }
1628
0
                if (loads_const(instr->i_opcode)) {
1629
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1630
0
                    if (constant == NULL) {
1631
0
                        Py_DECREF(newconst);
1632
0
                        return ERROR;
1633
0
                    }
1634
0
                    assert(consts_found > 0);
1635
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1636
0
                }
1637
0
                nop_out(&instr, 1);
1638
0
            }
1639
0
            assert(consts_found == 0);
1640
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
1641
0
        }
1642
1643
4.17k
        if (expect_append) {
1644
2.11k
            if (opcode != LIST_APPEND || oparg != 1) {
1645
62
                return SUCCESS;
1646
62
            }
1647
2.11k
        }
1648
2.05k
        else {
1649
2.05k
            if (!loads_const(opcode)) {
1650
2
                return SUCCESS;
1651
2
            }
1652
2.05k
            consts_found++;
1653
2.05k
        }
1654
1655
4.10k
        expect_append = !expect_append;
1656
4.10k
    }
1657
1658
    /* Did not find sequence start. */
1659
0
    return SUCCESS;
1660
64
}
1661
1662
1.01k
#define MIN_CONST_SEQUENCE_SIZE 3
1663
/*
1664
Optimize lists and sets for:
1665
    1. "for" loop, comprehension or "in"/"not in" tests:
1666
           Change literal list or set of constants into constant
1667
           tuple or frozenset respectively. Change list of
1668
           non-constants into tuple.
1669
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1670
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1671
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1672
           or BUILD_SET & SET_UPDATE respectively.
1673
*/
1674
static int
1675
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1676
                        PyObject *consts, PyObject *const_cache,
1677
                        _Py_hashtable_t *consts_index)
1678
505
{
1679
505
    assert(PyDict_CheckExact(const_cache));
1680
505
    assert(PyList_CheckExact(consts));
1681
1682
505
    cfg_instr *instr = &bb->b_instr[i];
1683
505
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1684
1685
505
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1686
505
    int seq_size = instr->i_oparg;
1687
505
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1688
505
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1689
334
    {
1690
334
        return SUCCESS;
1691
334
    }
1692
1693
171
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1694
171
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1695
25
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1696
            /* iterate over a tuple instead of list */
1697
13
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1698
13
        }
1699
25
        return SUCCESS;
1700
25
    }
1701
1702
146
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1703
146
    if (const_result == NULL) {
1704
0
        return ERROR;
1705
0
    }
1706
1707
1.09k
    for (int i = 0; i < seq_size; i++) {
1708
951
        cfg_instr *inst = const_instrs[i];
1709
951
        assert(loads_const(inst->i_opcode));
1710
951
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1711
951
        if (element == NULL) {
1712
0
            Py_DECREF(const_result);
1713
0
            return ERROR;
1714
0
        }
1715
951
        PyTuple_SET_ITEM(const_result, i, element);
1716
951
    }
1717
1718
146
    if (instr->i_opcode == BUILD_SET) {
1719
138
        PyObject *frozenset = PyFrozenSet_New(const_result);
1720
138
        if (frozenset == NULL) {
1721
0
            Py_DECREF(const_result);
1722
0
            return ERROR;
1723
0
        }
1724
138
        Py_SETREF(const_result, frozenset);
1725
138
    }
1726
1727
146
    int index = add_const(const_result, consts, const_cache, consts_index);
1728
146
    RETURN_IF_ERROR(index);
1729
146
    nop_out(const_instrs, seq_size);
1730
1731
146
    if (contains_or_iter) {
1732
132
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1733
132
    }
1734
14
    else {
1735
14
        assert(i >= 2);
1736
14
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1737
1738
14
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1739
1740
14
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1741
14
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1742
14
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1743
14
    }
1744
146
    return SUCCESS;
1745
146
}
1746
1747
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1748
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1749
 * Used to avoid creating constants which are slow to hash.
1750
 */
1751
static Py_ssize_t
1752
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1753
0
{
1754
0
    if (PyTuple_Check(obj)) {
1755
0
        Py_ssize_t i;
1756
0
        limit -= PyTuple_GET_SIZE(obj);
1757
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1758
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1759
0
            if (limit < 0) {
1760
0
                return limit;
1761
0
            }
1762
0
        }
1763
0
    }
1764
0
    return limit;
1765
0
}
1766
1767
70
#define MAX_INT_SIZE           128  /* bits */
1768
0
#define MAX_COLLECTION_SIZE    256  /* items */
1769
2
#define MAX_STR_SIZE          4096  /* characters */
1770
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1771
1772
static PyObject *
1773
const_folding_safe_multiply(PyObject *v, PyObject *w)
1774
12
{
1775
12
    if (PyLong_Check(v) && PyLong_Check(w) &&
1776
8
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1777
12
    ) {
1778
8
        int64_t vbits = _PyLong_NumBits(v);
1779
8
        int64_t wbits = _PyLong_NumBits(w);
1780
8
        assert(vbits >= 0);
1781
8
        assert(wbits >= 0);
1782
8
        if (vbits + wbits > MAX_INT_SIZE) {
1783
0
            return NULL;
1784
0
        }
1785
8
    }
1786
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1787
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1788
0
        if (size) {
1789
0
            long n = PyLong_AsLong(v);
1790
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1791
0
                return NULL;
1792
0
            }
1793
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1794
0
                return NULL;
1795
0
            }
1796
0
        }
1797
0
    }
1798
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1799
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1800
2
                                               PyBytes_GET_SIZE(w);
1801
2
        if (size) {
1802
2
            long n = PyLong_AsLong(v);
1803
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1804
0
                return NULL;
1805
0
            }
1806
2
        }
1807
2
    }
1808
2
    else if (PyLong_Check(w) &&
1809
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1810
2
    {
1811
2
        return const_folding_safe_multiply(w, v);
1812
2
    }
1813
1814
10
    return PyNumber_Multiply(v, w);
1815
12
}
1816
1817
static PyObject *
1818
const_folding_safe_power(PyObject *v, PyObject *w)
1819
2
{
1820
2
    if (PyLong_Check(v) && PyLong_Check(w) &&
1821
2
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1822
2
    ) {
1823
2
        int64_t vbits = _PyLong_NumBits(v);
1824
2
        size_t wbits = PyLong_AsSize_t(w);
1825
2
        assert(vbits >= 0);
1826
2
        if (wbits == (size_t)-1) {
1827
0
            return NULL;
1828
0
        }
1829
2
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1830
0
            return NULL;
1831
0
        }
1832
2
    }
1833
1834
2
    return PyNumber_Power(v, w, Py_None);
1835
2
}
1836
1837
static PyObject *
1838
const_folding_safe_lshift(PyObject *v, PyObject *w)
1839
20
{
1840
20
    if (PyLong_Check(v) && PyLong_Check(w) &&
1841
20
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1842
20
    ) {
1843
20
        int64_t vbits = _PyLong_NumBits(v);
1844
20
        size_t wbits = PyLong_AsSize_t(w);
1845
20
        assert(vbits >= 0);
1846
20
        if (wbits == (size_t)-1) {
1847
0
            return NULL;
1848
0
        }
1849
20
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1850
0
            return NULL;
1851
0
        }
1852
20
    }
1853
1854
20
    return PyNumber_Lshift(v, w);
1855
20
}
1856
1857
static PyObject *
1858
const_folding_safe_mod(PyObject *v, PyObject *w)
1859
0
{
1860
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1861
0
        return NULL;
1862
0
    }
1863
1864
0
    return PyNumber_Remainder(v, w);
1865
0
}
1866
1867
static PyObject *
1868
eval_const_binop(PyObject *left, int op, PyObject *right)
1869
34
{
1870
34
    assert(left != NULL && right != NULL);
1871
34
    assert(op >= 0 && op <= NB_OPARG_LAST);
1872
1873
34
    PyObject *result = NULL;
1874
34
    switch (op) {
1875
1
        case NB_ADD:
1876
1
            result = PyNumber_Add(left, right);
1877
1
            break;
1878
1
        case NB_SUBTRACT:
1879
1
            result = PyNumber_Subtract(left, right);
1880
1
            break;
1881
10
        case NB_MULTIPLY:
1882
10
            result = const_folding_safe_multiply(left, right);
1883
10
            break;
1884
0
        case NB_TRUE_DIVIDE:
1885
0
            result = PyNumber_TrueDivide(left, right);
1886
0
            break;
1887
0
        case NB_FLOOR_DIVIDE:
1888
0
            result = PyNumber_FloorDivide(left, right);
1889
0
            break;
1890
0
        case NB_REMAINDER:
1891
0
            result = const_folding_safe_mod(left, right);
1892
0
            break;
1893
2
        case NB_POWER:
1894
2
            result = const_folding_safe_power(left, right);
1895
2
            break;
1896
20
        case NB_LSHIFT:
1897
20
            result = const_folding_safe_lshift(left, right);
1898
20
            break;
1899
0
        case NB_RSHIFT:
1900
0
            result = PyNumber_Rshift(left, right);
1901
0
            break;
1902
0
        case NB_OR:
1903
0
            result = PyNumber_Or(left, right);
1904
0
            break;
1905
0
        case NB_XOR:
1906
0
            result = PyNumber_Xor(left, right);
1907
0
            break;
1908
0
        case NB_AND:
1909
0
            result = PyNumber_And(left, right);
1910
0
            break;
1911
0
        case NB_SUBSCR:
1912
0
            result = PyObject_GetItem(left, right);
1913
0
            break;
1914
0
        case NB_MATRIX_MULTIPLY:
1915
            // No builtin constants implement matrix multiplication
1916
0
            break;
1917
0
        default:
1918
0
            Py_UNREACHABLE();
1919
34
    }
1920
34
    return result;
1921
34
}
1922
1923
static int
1924
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1925
                 PyObject *const_cache, _Py_hashtable_t *consts_index)
1926
1.60k
{
1927
1.63k
    #define BINOP_OPERAND_COUNT 2
1928
1.60k
    assert(PyDict_CheckExact(const_cache));
1929
1.60k
    assert(PyList_CheckExact(consts));
1930
1931
1.60k
    cfg_instr *binop = &bb->b_instr[i];
1932
1.60k
    assert(binop->i_opcode == BINARY_OP);
1933
1934
1.60k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1935
1.60k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1936
        /* not a const sequence */
1937
1.56k
        return SUCCESS;
1938
1.56k
    }
1939
1940
34
    cfg_instr *lhs_instr = operands_instrs[0];
1941
34
    assert(loads_const(lhs_instr->i_opcode));
1942
34
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1943
34
    if (lhs == NULL) {
1944
0
        return ERROR;
1945
0
    }
1946
1947
34
    cfg_instr *rhs_instr = operands_instrs[1];
1948
34
    assert(loads_const(rhs_instr->i_opcode));
1949
34
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1950
34
    if (rhs == NULL) {
1951
0
        Py_DECREF(lhs);
1952
0
        return ERROR;
1953
0
    }
1954
1955
34
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1956
34
    Py_DECREF(lhs);
1957
34
    Py_DECREF(rhs);
1958
34
    if (newconst == NULL) {
1959
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1960
0
            return ERROR;
1961
0
        }
1962
0
        PyErr_Clear();
1963
0
        return SUCCESS;
1964
0
    }
1965
1966
34
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1967
34
    return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
1968
34
}
1969
1970
static PyObject *
1971
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1972
1.81k
{
1973
1.81k
    assert(operand != NULL);
1974
1.81k
    assert(
1975
1.81k
        opcode == UNARY_NEGATIVE ||
1976
1.81k
        opcode == UNARY_INVERT ||
1977
1.81k
        opcode == UNARY_NOT ||
1978
1.81k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1979
1.81k
    );
1980
1.81k
    PyObject *result;
1981
1.81k
    switch (opcode) {
1982
1.81k
        case UNARY_NEGATIVE:
1983
1.81k
            result = PyNumber_Negative(operand);
1984
1.81k
            break;
1985
0
        case UNARY_INVERT:
1986
            // XXX: This should be removed once the ~bool depreciation expires.
1987
0
            if (PyBool_Check(operand)) {
1988
0
                return NULL;
1989
0
            }
1990
0
            result = PyNumber_Invert(operand);
1991
0
            break;
1992
0
        case UNARY_NOT: {
1993
0
            int r = PyObject_IsTrue(operand);
1994
0
            if (r < 0) {
1995
0
                return NULL;
1996
0
            }
1997
0
            result = PyBool_FromLong(!r);
1998
0
            break;
1999
0
        }
2000
0
        case CALL_INTRINSIC_1:
2001
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
2002
0
                Py_UNREACHABLE();
2003
0
            }
2004
0
            result = PyNumber_Positive(operand);
2005
0
            break;
2006
0
        default:
2007
0
            Py_UNREACHABLE();
2008
1.81k
    }
2009
1.81k
    return result;
2010
1.81k
}
2011
2012
static int
2013
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
2014
                   PyObject *const_cache, _Py_hashtable_t *consts_index)
2015
1.83k
{
2016
3.65k
    #define UNARYOP_OPERAND_COUNT 1
2017
1.83k
    assert(PyDict_CheckExact(const_cache));
2018
1.83k
    assert(PyList_CheckExact(consts));
2019
1.83k
    cfg_instr *unaryop = &bb->b_instr[i];
2020
2021
1.83k
    cfg_instr *operand_instr;
2022
1.83k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
2023
        /* not a const */
2024
22
        return SUCCESS;
2025
22
    }
2026
2027
1.83k
    assert(loads_const(operand_instr->i_opcode));
2028
1.81k
    PyObject *operand = get_const_value(
2029
1.81k
        operand_instr->i_opcode,
2030
1.81k
        operand_instr->i_oparg,
2031
1.81k
        consts
2032
1.81k
    );
2033
1.81k
    if (operand == NULL) {
2034
0
        return ERROR;
2035
0
    }
2036
2037
1.81k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
2038
1.81k
    Py_DECREF(operand);
2039
1.81k
    if (newconst == NULL) {
2040
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
2041
0
            return ERROR;
2042
0
        }
2043
0
        PyErr_Clear();
2044
0
        return SUCCESS;
2045
0
    }
2046
2047
1.81k
    if (unaryop->i_opcode == UNARY_NOT) {
2048
0
        assert(PyBool_Check(newconst));
2049
0
    }
2050
1.81k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
2051
1.81k
    return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
2052
1.81k
}
2053
2054
1.97k
#define VISITED (-1)
2055
2056
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
2057
// same effect.
2058
static int
2059
swaptimize(basicblock *block, int *ix)
2060
634
{
2061
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
2062
    // for this function. Make sure to blow away cached *.pyc files first!
2063
634
    assert(*ix < block->b_iused);
2064
634
    cfg_instr *instructions = &block->b_instr[*ix];
2065
    // Find the length of the current sequence of SWAPs and NOPs, and record the
2066
    // maximum depth of the stack manipulations:
2067
634
    assert(instructions[0].i_opcode == SWAP);
2068
634
    int depth = instructions[0].i_oparg;
2069
634
    int len = 0;
2070
634
    int more = false;
2071
634
    int limit = block->b_iused - *ix;
2072
808
    while (++len < limit) {
2073
808
        int opcode = instructions[len].i_opcode;
2074
808
        if (opcode == SWAP) {
2075
152
            depth = Py_MAX(depth, instructions[len].i_oparg);
2076
152
            more = true;
2077
152
        }
2078
656
        else if (opcode != NOP) {
2079
634
            break;
2080
634
        }
2081
808
    }
2082
    // It's already optimal if there's only one SWAP:
2083
634
    if (!more) {
2084
482
        return SUCCESS;
2085
482
    }
2086
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2087
152
    int *stack = PyMem_Malloc(depth * sizeof(int));
2088
152
    if (stack == NULL) {
2089
0
        PyErr_NoMemory();
2090
0
        return ERROR;
2091
0
    }
2092
608
    for (int i = 0; i < depth; i++) {
2093
456
        stack[i] = i;
2094
456
    }
2095
    // Simulate the combined effect of these instructions by "running" them on
2096
    // our "stack":
2097
456
    for (int i = 0; i < len; i++) {
2098
304
        if (instructions[i].i_opcode == SWAP) {
2099
304
            int oparg = instructions[i].i_oparg;
2100
304
            int top = stack[0];
2101
            // SWAPs are 1-indexed:
2102
304
            stack[0] = stack[oparg - 1];
2103
304
            stack[oparg - 1] = top;
2104
304
        }
2105
304
    }
2106
    // Now we can begin! Our approach here is based on a solution to a closely
2107
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2108
    // think of this algorithm as determining the steps needed to efficiently
2109
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2110
    // though, we can efficiently *shuffle* it! For this reason, we will be
2111
    // replacing instructions starting from the *end* of the run. Since the
2112
    // solution is optimal, we don't need to worry about running out of space:
2113
152
    int current = len - 1;
2114
608
    for (int i = 0; i < depth; i++) {
2115
        // Skip items that have already been visited, or just happen to be in
2116
        // the correct location:
2117
456
        if (stack[i] == VISITED || stack[i] == i) {
2118
304
            continue;
2119
304
        }
2120
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2121
        // with other items; traversing the cycle and swapping each item with
2122
        // the next will put them all in the correct place. The weird
2123
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2124
        // can only swap from that position:
2125
152
        int j = i;
2126
608
        while (true) {
2127
            // Skip the actual swap if our item is zero, since swapping the top
2128
            // item with itself is pointless:
2129
608
            if (j) {
2130
304
                assert(0 <= current);
2131
                // SWAPs are 1-indexed:
2132
304
                instructions[current].i_opcode = SWAP;
2133
304
                instructions[current--].i_oparg = j + 1;
2134
304
            }
2135
608
            if (stack[j] == VISITED) {
2136
                // Completed the cycle:
2137
152
                assert(j == i);
2138
152
                break;
2139
152
            }
2140
456
            int next_j = stack[j];
2141
456
            stack[j] = VISITED;
2142
456
            j = next_j;
2143
456
        }
2144
152
    }
2145
    // NOP out any unused instructions:
2146
152
    while (0 <= current) {
2147
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2148
0
    }
2149
152
    PyMem_Free(stack);
2150
152
    *ix += len - 1;
2151
152
    return SUCCESS;
2152
152
}
2153
2154
2155
// This list is pretty small, since it's only okay to reorder opcodes that:
2156
// - can't affect control flow (like jumping or raising exceptions)
2157
// - can't invoke arbitrary code (besides finalizers)
2158
// - only touch the TOS (and pop it when finished)
2159
#define SWAPPABLE(opcode) \
2160
977
    ((opcode) == STORE_FAST || \
2161
977
     (opcode) == STORE_FAST_MAYBE_NULL || \
2162
977
     (opcode) == POP_TOP)
2163
2164
#define STORES_TO(instr) \
2165
182
    (((instr).i_opcode == STORE_FAST || \
2166
182
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2167
182
     ? (instr).i_oparg : -1)
2168
2169
static int
2170
next_swappable_instruction(basicblock *block, int i, int lineno)
2171
935
{
2172
957
    while (++i < block->b_iused) {
2173
952
        cfg_instr *instruction = &block->b_instr[i];
2174
952
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2175
            // Optimizing across this instruction could cause user-visible
2176
            // changes in the names bound between line tracing events!
2177
4
            return -1;
2178
4
        }
2179
948
        if (instruction->i_opcode == NOP) {
2180
22
            continue;
2181
22
        }
2182
926
        if (SWAPPABLE(instruction->i_opcode)) {
2183
385
            return i;
2184
385
        }
2185
541
        return -1;
2186
926
    }
2187
5
    return -1;
2188
935
}
2189
2190
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2191
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2192
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2193
static void
2194
apply_static_swaps(basicblock *block, int i)
2195
634
{
2196
    // SWAPs are to our left, and potential swaperands are to our right:
2197
769
    for (; 0 <= i; i--) {
2198
736
        assert(i < block->b_iused);
2199
736
        cfg_instr *swap = &block->b_instr[i];
2200
736
        if (swap->i_opcode != SWAP) {
2201
102
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2202
                // Nope, but we know how to handle these. Keep looking:
2203
51
                continue;
2204
51
            }
2205
            // We can't reason about what this instruction does. Bail:
2206
51
            return;
2207
102
        }
2208
634
        int j = next_swappable_instruction(block, i, -1);
2209
634
        if (j < 0) {
2210
373
            return;
2211
373
        }
2212
261
        int k = j;
2213
261
        int lineno = block->b_instr[j].i_loc.lineno;
2214
385
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2215
301
            k = next_swappable_instruction(block, k, lineno);
2216
301
            if (k < 0) {
2217
177
                return;
2218
177
            }
2219
301
        }
2220
        // The reordering is not safe if the two instructions to be swapped
2221
        // store to the same location, or if any intervening instruction stores
2222
        // to the same location as either of them.
2223
84
        int store_j = STORES_TO(block->b_instr[j]);
2224
84
        int store_k = STORES_TO(block->b_instr[k]);
2225
84
        if (store_j >= 0 || store_k >= 0) {
2226
84
            if (store_j == store_k) {
2227
0
                return;
2228
0
            }
2229
98
            for (int idx = j + 1; idx < k; idx++) {
2230
14
                int store_idx = STORES_TO(block->b_instr[idx]);
2231
14
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2232
0
                    return;
2233
0
                }
2234
14
            }
2235
84
        }
2236
2237
        // Success!
2238
84
        INSTR_SET_OP0(swap, NOP);
2239
84
        cfg_instr temp = block->b_instr[j];
2240
84
        block->b_instr[j] = block->b_instr[k];
2241
84
        block->b_instr[k] = temp;
2242
84
    }
2243
634
}
2244
2245
static int
2246
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2247
                               PyObject *consts, _Py_hashtable_t *consts_index)
2248
23.8k
{
2249
23.8k
    assert(PyDict_CheckExact(const_cache));
2250
23.8k
    assert(PyList_CheckExact(consts));
2251
23.8k
    int opcode = 0;
2252
23.8k
    int oparg = 0;
2253
233k
    for (int i = 0; i < bb->b_iused; i++) {
2254
209k
        cfg_instr *inst = &bb->b_instr[i];
2255
209k
        if (inst->i_opcode == LOAD_CONST) {
2256
42.1k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2257
42.1k
            if (constant == NULL) {
2258
0
                return ERROR;
2259
0
            }
2260
42.1k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2261
42.1k
            Py_DECREF(constant);
2262
42.1k
            if (res < 0) {
2263
0
                return ERROR;
2264
0
            }
2265
42.1k
        }
2266
209k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2267
26.6k
                                      inst->i_opcode == COPY &&
2268
12
                                      inst->i_oparg == 1);
2269
209k
        if (! is_copy_of_load_const) {
2270
209k
            opcode = inst->i_opcode;
2271
209k
            oparg = inst->i_oparg;
2272
209k
        }
2273
209k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2274
209k
        if (!loads_const(opcode)) {
2275
166k
            continue;
2276
166k
        }
2277
42.3k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2278
42.3k
        switch(nextop) {
2279
28
            case POP_JUMP_IF_FALSE:
2280
28
            case POP_JUMP_IF_TRUE:
2281
28
            case JUMP_IF_FALSE:
2282
28
            case JUMP_IF_TRUE:
2283
28
            {
2284
                /* Remove LOAD_CONST const; conditional jump */
2285
28
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2286
28
                if (cnt == NULL) {
2287
0
                    return ERROR;
2288
0
                }
2289
28
                int is_true = PyObject_IsTrue(cnt);
2290
28
                Py_DECREF(cnt);
2291
28
                if (is_true == -1) {
2292
0
                    return ERROR;
2293
0
                }
2294
28
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2295
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2296
28
                    INSTR_SET_OP0(inst, NOP);
2297
28
                }
2298
28
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2299
28
                if (is_true == jump_if_true) {
2300
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2301
1
                }
2302
27
                else {
2303
27
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2304
27
                }
2305
28
                break;
2306
28
            }
2307
711
            case IS_OP:
2308
711
            {
2309
                // Fold to POP_JUMP_IF_NONE:
2310
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2311
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2312
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2313
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2314
                // Fold to POP_JUMP_IF_NOT_NONE:
2315
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2316
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2317
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2318
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2319
711
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2320
711
                if (cnt == NULL) {
2321
0
                    return ERROR;
2322
0
                }
2323
711
                if (!Py_IsNone(cnt)) {
2324
34
                    Py_DECREF(cnt);
2325
34
                    break;
2326
34
                }
2327
677
                if (bb->b_iused <= i + 2) {
2328
1
                    break;
2329
1
                }
2330
676
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2331
676
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2332
                // Get rid of TO_BOOL regardless:
2333
676
                if (jump_instr->i_opcode == TO_BOOL) {
2334
661
                    INSTR_SET_OP0(jump_instr, NOP);
2335
661
                    if (bb->b_iused <= i + 3) {
2336
0
                        break;
2337
0
                    }
2338
661
                    jump_instr = &bb->b_instr[i + 3];
2339
661
                }
2340
676
                bool invert = is_instr->i_oparg;
2341
676
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2342
628
                    invert = !invert;
2343
628
                }
2344
48
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2345
15
                    break;
2346
15
                }
2347
661
                INSTR_SET_OP0(inst, NOP);
2348
661
                INSTR_SET_OP0(is_instr, NOP);
2349
661
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2350
661
                                              : POP_JUMP_IF_NONE;
2351
661
                break;
2352
676
            }
2353
28
            case TO_BOOL:
2354
28
            {
2355
28
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2356
28
                if (cnt == NULL) {
2357
0
                    return ERROR;
2358
0
                }
2359
28
                int is_true = PyObject_IsTrue(cnt);
2360
28
                Py_DECREF(cnt);
2361
28
                if (is_true == -1) {
2362
0
                    return ERROR;
2363
0
                }
2364
28
                cnt = PyBool_FromLong(is_true);
2365
28
                int index = add_const(cnt, consts, const_cache, consts_index);
2366
28
                if (index < 0) {
2367
0
                    return ERROR;
2368
0
                }
2369
28
                INSTR_SET_OP0(inst, NOP);
2370
28
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2371
28
                break;
2372
28
            }
2373
42.3k
        }
2374
42.3k
        if (inst->i_opcode == LOAD_CONST) {
2375
26.1k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2376
26.1k
            if (constant == NULL) {
2377
0
                return ERROR;
2378
0
            }
2379
26.1k
            int res = maybe_instr_make_load_common_const(inst, constant);
2380
26.1k
            Py_DECREF(constant);
2381
26.1k
            if (res < 0) {
2382
0
                return ERROR;
2383
0
            }
2384
26.1k
        }
2385
42.3k
    }
2386
23.8k
    return SUCCESS;
2387
23.8k
}
2388
2389
static int
2390
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2391
5.38k
                    _Py_hashtable_t *consts_index) {
2392
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2393
23.8k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
2394
23.8k
    }
2395
5.38k
    return SUCCESS;
2396
5.38k
}
2397
2398
static int
2399
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2400
                     _Py_hashtable_t *consts_index)
2401
23.8k
{
2402
23.8k
    assert(PyDict_CheckExact(const_cache));
2403
23.8k
    assert(PyList_CheckExact(consts));
2404
23.8k
    cfg_instr nop;
2405
23.8k
    INSTR_SET_OP0(&nop, NOP);
2406
233k
    for (int i = 0; i < bb->b_iused; i++) {
2407
209k
        cfg_instr *inst = &bb->b_instr[i];
2408
209k
        cfg_instr *target;
2409
209k
        int opcode = inst->i_opcode;
2410
209k
        int oparg = inst->i_oparg;
2411
209k
        if (HAS_TARGET(opcode)) {
2412
9.64k
            assert(inst->i_target->b_iused > 0);
2413
9.64k
            target = &inst->i_target->b_instr[0];
2414
9.64k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2415
9.64k
        }
2416
200k
        else {
2417
200k
            target = &nop;
2418
200k
        }
2419
209k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2420
209k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2421
209k
        switch (opcode) {
2422
            /* Try to fold tuples of constants.
2423
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2424
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2425
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2426
2.45k
            case BUILD_TUPLE:
2427
2.45k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2428
60
                    switch(oparg) {
2429
0
                        case 1:
2430
0
                            INSTR_SET_OP0(inst, NOP);
2431
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
0
                            continue;
2433
50
                        case 2:
2434
59
                        case 3:
2435
59
                            INSTR_SET_OP0(inst, NOP);
2436
59
                            bb->b_instr[i+1].i_opcode = SWAP;
2437
59
                            continue;
2438
60
                    }
2439
60
                }
2440
2.39k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
2441
2.39k
                break;
2442
343
            case BUILD_LIST:
2443
505
            case BUILD_SET:
2444
505
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
2445
505
                break;
2446
252
            case POP_JUMP_IF_NOT_NONE:
2447
677
            case POP_JUMP_IF_NONE:
2448
677
                switch (target->i_opcode) {
2449
16
                    case JUMP:
2450
16
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2451
677
                }
2452
677
                break;
2453
3.27k
            case POP_JUMP_IF_FALSE:
2454
3.27k
                switch (target->i_opcode) {
2455
138
                    case JUMP:
2456
138
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2457
3.27k
                }
2458
3.27k
                break;
2459
3.27k
            case POP_JUMP_IF_TRUE:
2460
919
                switch (target->i_opcode) {
2461
34
                    case JUMP:
2462
34
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2463
919
                }
2464
919
                break;
2465
919
            case JUMP_IF_FALSE:
2466
513
                switch (target->i_opcode) {
2467
0
                    case JUMP:
2468
0
                    case JUMP_IF_FALSE:
2469
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2470
0
                        continue;
2471
2
                    case JUMP_IF_TRUE:
2472
                        // No need to check for loops here, a block's b_next
2473
                        // cannot point to itself.
2474
2
                        assert(inst->i_target != inst->i_target->b_next);
2475
2
                        inst->i_target = inst->i_target->b_next;
2476
2
                        i--;
2477
2
                        continue;
2478
513
                }
2479
511
                break;
2480
511
            case JUMP_IF_TRUE:
2481
66
                switch (target->i_opcode) {
2482
0
                    case JUMP:
2483
0
                    case JUMP_IF_TRUE:
2484
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2485
0
                        continue;
2486
0
                    case JUMP_IF_FALSE:
2487
                        // No need to check for loops here, a block's b_next
2488
                        // cannot point to itself.
2489
0
                        assert(inst->i_target != inst->i_target->b_next);
2490
0
                        inst->i_target = inst->i_target->b_next;
2491
0
                        i--;
2492
0
                        continue;
2493
66
                }
2494
66
                break;
2495
886
            case JUMP:
2496
1.89k
            case JUMP_NO_INTERRUPT:
2497
1.89k
                switch (target->i_opcode) {
2498
0
                    case JUMP:
2499
0
                        i -= jump_thread(bb, inst, target, JUMP);
2500
0
                        continue;
2501
0
                    case JUMP_NO_INTERRUPT:
2502
0
                        i -= jump_thread(bb, inst, target, opcode);
2503
0
                        continue;
2504
1.89k
                }
2505
1.89k
                break;
2506
1.89k
            case FOR_ITER:
2507
465
                if (target->i_opcode == JUMP) {
2508
                    /* This will not work now because the jump (at target) could
2509
                     * be forward or backward and FOR_ITER only jumps forward. We
2510
                     * can re-enable this if ever we implement a backward version
2511
                     * of FOR_ITER.
2512
                     */
2513
                    /*
2514
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2515
                    */
2516
0
                }
2517
465
                break;
2518
4.81k
            case STORE_FAST:
2519
4.81k
                if (opcode == nextop &&
2520
398
                    oparg == bb->b_instr[i+1].i_oparg &&
2521
19
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2522
19
                    bb->b_instr[i].i_opcode = POP_TOP;
2523
19
                    bb->b_instr[i].i_oparg = 0;
2524
19
                }
2525
4.81k
                break;
2526
786
            case SWAP:
2527
786
                if (oparg == 1) {
2528
0
                    INSTR_SET_OP0(inst, NOP);
2529
0
                }
2530
786
                break;
2531
7.81k
            case LOAD_GLOBAL:
2532
7.81k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2533
3.03k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2534
3.03k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2535
3.03k
                }
2536
7.81k
                break;
2537
2.28k
            case COMPARE_OP:
2538
2.28k
                if (nextop == TO_BOOL) {
2539
769
                    INSTR_SET_OP0(inst, NOP);
2540
769
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2541
769
                    continue;
2542
769
                }
2543
1.51k
                break;
2544
1.51k
            case CONTAINS_OP:
2545
1.64k
            case IS_OP:
2546
1.64k
                if (nextop == TO_BOOL) {
2547
796
                    INSTR_SET_OP0(inst, NOP);
2548
796
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2549
796
                    continue;
2550
796
                }
2551
853
                if (nextop == UNARY_NOT) {
2552
0
                    INSTR_SET_OP0(inst, NOP);
2553
0
                    int inverted = oparg ^ 1;
2554
0
                    assert(inverted == 0 || inverted == 1);
2555
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2556
0
                    continue;
2557
0
                }
2558
853
                break;
2559
1.95k
            case TO_BOOL:
2560
1.95k
                if (nextop == TO_BOOL) {
2561
0
                    INSTR_SET_OP0(inst, NOP);
2562
0
                    continue;
2563
0
                }
2564
1.95k
                break;
2565
1.95k
            case UNARY_NOT:
2566
17
                if (nextop == TO_BOOL) {
2567
0
                    INSTR_SET_OP0(inst, NOP);
2568
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2569
0
                    continue;
2570
0
                }
2571
17
                if (nextop == UNARY_NOT) {
2572
0
                    INSTR_SET_OP0(inst, NOP);
2573
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2574
0
                    continue;
2575
0
                }
2576
17
                _Py_FALLTHROUGH;
2577
19
            case UNARY_INVERT:
2578
1.83k
            case UNARY_NEGATIVE:
2579
1.83k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2580
1.83k
                break;
2581
339
            case CALL_INTRINSIC_1:
2582
339
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2583
64
                    if (nextop == GET_ITER) {
2584
0
                        INSTR_SET_OP0(inst, NOP);
2585
0
                    }
2586
64
                    else {
2587
64
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
2588
64
                    }
2589
64
                }
2590
275
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2591
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2592
0
                }
2593
339
                break;
2594
1.60k
            case BINARY_OP:
2595
1.60k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
2596
1.60k
                break;
2597
209k
        }
2598
209k
    }
2599
2600
233k
    for (int i = 0; i < bb->b_iused; i++) {
2601
209k
        cfg_instr *inst = &bb->b_instr[i];
2602
209k
        if (inst->i_opcode == SWAP) {
2603
634
            if (swaptimize(bb, &i) < 0) {
2604
0
                goto error;
2605
0
            }
2606
634
            apply_static_swaps(bb, i);
2607
634
        }
2608
209k
    }
2609
23.8k
    return SUCCESS;
2610
0
error:
2611
0
    return ERROR;
2612
23.8k
}
2613
2614
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2615
2616
static int
2617
remove_redundant_nops_and_jumps(cfg_builder *g)
2618
11.3k
{
2619
11.3k
    int removed_nops, removed_jumps;
2620
12.2k
    do {
2621
        /* Convergence is guaranteed because the number of
2622
         * redundant jumps and nops only decreases.
2623
         */
2624
12.2k
        removed_nops = remove_redundant_nops(g);
2625
12.2k
        RETURN_IF_ERROR(removed_nops);
2626
12.2k
        removed_jumps = remove_redundant_jumps(g);
2627
12.2k
        RETURN_IF_ERROR(removed_jumps);
2628
12.2k
    } while(removed_nops + removed_jumps > 0);
2629
11.3k
    return SUCCESS;
2630
11.3k
}
2631
2632
/* Perform optimizations on a control flow graph.
2633
   The consts object should still be in list form to allow new constants
2634
   to be appended.
2635
2636
   Code trasnformations that reduce code size initially fill the gaps with
2637
   NOPs.  Later those NOPs are removed.
2638
*/
2639
static int
2640
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2641
             _Py_hashtable_t *consts_index, int firstlineno)
2642
5.38k
{
2643
5.38k
    assert(PyDict_CheckExact(const_cache));
2644
5.38k
    RETURN_IF_ERROR(check_cfg(g));
2645
5.38k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2646
5.38k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2647
5.38k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2648
5.38k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
2649
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2650
23.8k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
2651
23.8k
    }
2652
5.38k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2653
5.38k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2654
5.38k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2655
5.38k
    assert(no_redundant_jumps(g));
2656
5.38k
    return SUCCESS;
2657
5.38k
}
2658
2659
static void
2660
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2661
5.70k
{
2662
5.70k
    int32_t line1 = inst1->i_loc.lineno;
2663
5.70k
    int32_t line2 = inst2->i_loc.lineno;
2664
    /* Skip if instructions are on different lines */
2665
5.70k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2666
1.90k
        return;
2667
1.90k
    }
2668
3.80k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2669
207
        return;
2670
207
    }
2671
3.59k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2672
3.59k
    INSTR_SET_OP0(inst2, NOP);
2673
3.59k
}
2674
2675
static int
2676
insert_superinstructions(cfg_builder *g)
2677
5.38k
{
2678
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2679
2680
218k
        for (int i = 0; i < b->b_iused; i++) {
2681
194k
            cfg_instr *inst = &b->b_instr[i];
2682
194k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2683
194k
            switch(inst->i_opcode) {
2684
22.0k
                case LOAD_FAST:
2685
22.0k
                    if (nextop == LOAD_FAST) {
2686
3.53k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2687
3.53k
                    }
2688
22.0k
                    break;
2689
4.54k
                case STORE_FAST:
2690
4.54k
                    switch (nextop) {
2691
1.85k
                        case LOAD_FAST:
2692
1.85k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2693
1.85k
                            break;
2694
312
                        case STORE_FAST:
2695
312
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2696
312
                            break;
2697
4.54k
                    }
2698
4.54k
                    break;
2699
194k
            }
2700
194k
        }
2701
23.8k
    }
2702
5.38k
    int res = remove_redundant_nops(g);
2703
5.38k
    assert(no_redundant_nops(g));
2704
5.38k
    return res;
2705
5.38k
}
2706
2707
#define NOT_LOCAL -1
2708
4.39k
#define DUMMY_INSTR -1
2709
2710
typedef struct {
2711
    // Index of instruction that produced the reference or DUMMY_INSTR.
2712
    int instr;
2713
2714
    // The local to which the reference refers or NOT_LOCAL.
2715
    int local;
2716
} ref;
2717
2718
typedef struct {
2719
    ref *refs;
2720
    Py_ssize_t size;
2721
    Py_ssize_t capacity;
2722
} ref_stack;
2723
2724
static int
2725
ref_stack_push(ref_stack *stack, ref r)
2726
154k
{
2727
154k
    if (stack->size == stack->capacity) {
2728
5.38k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2729
5.38k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2730
5.38k
        if (refs == NULL) {
2731
0
            PyErr_NoMemory();
2732
0
            return -1;
2733
0
        }
2734
5.38k
        stack->refs = refs;
2735
5.38k
        stack->capacity = new_cap;
2736
5.38k
    }
2737
154k
    stack->refs[stack->size] = r;
2738
154k
    stack->size++;
2739
154k
    return 0;
2740
154k
}
2741
2742
static ref
2743
ref_stack_pop(ref_stack *stack)
2744
137k
{
2745
137k
    assert(stack->size > 0);
2746
137k
    stack->size--;
2747
137k
    ref r = stack->refs[stack->size];
2748
137k
    return r;
2749
137k
}
2750
2751
static void
2752
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2753
545
{
2754
545
    Py_ssize_t idx = stack->size - off;
2755
545
    assert(idx >= 0 && idx < stack->size);
2756
545
    ref tmp = stack->refs[idx];
2757
545
    stack->refs[idx] = stack->refs[stack->size - 1];
2758
545
    stack->refs[stack->size - 1] = tmp;
2759
545
}
2760
2761
static ref
2762
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2763
20.9k
{
2764
20.9k
    assert(idx >= 0 && idx < stack->size);
2765
20.9k
    return stack->refs[idx];
2766
20.9k
}
2767
2768
static void
2769
ref_stack_clear(ref_stack *stack)
2770
16.6k
{
2771
16.6k
    stack->size = 0;
2772
16.6k
}
2773
2774
static void
2775
ref_stack_fini(ref_stack *stack)
2776
5.38k
{
2777
5.38k
    if (stack->refs != NULL) {
2778
5.38k
        PyMem_Free(stack->refs);
2779
5.38k
    }
2780
5.38k
    stack->refs = NULL;
2781
5.38k
    stack->capacity = 0;
2782
5.38k
    stack->size = 0;
2783
5.38k
}
2784
2785
typedef enum {
2786
    // The loaded reference is still on the stack when the local is killed
2787
    SUPPORT_KILLED  = 1,
2788
    // The loaded reference is stored into a local
2789
    STORED_AS_LOCAL = 2,
2790
    // The loaded reference is still on the stack at the end of the basic block
2791
    REF_UNCONSUMED  = 4,
2792
} LoadFastInstrFlag;
2793
2794
static void
2795
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2796
4.47k
{
2797
7.80k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2798
3.32k
        ref r = ref_stack_at(refs, i);
2799
3.32k
        if (r.local == local) {
2800
6
            assert(r.instr >= 0);
2801
6
            instr_flags[r.instr] |= SUPPORT_KILLED;
2802
6
        }
2803
3.32k
    }
2804
4.47k
}
2805
2806
static void
2807
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2808
4.39k
{
2809
4.39k
    kill_local(instr_flags, refs, local);
2810
4.39k
    if (r.instr != DUMMY_INSTR) {
2811
3.86k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2812
3.86k
    }
2813
4.39k
}
2814
2815
static void
2816
load_fast_push_block(basicblock ***sp, basicblock *target,
2817
                     Py_ssize_t start_depth)
2818
14.5k
{
2819
14.5k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2820
14.5k
    if (!target->b_visited) {
2821
11.2k
        target->b_visited = 1;
2822
11.2k
        *(*sp)++ = target;
2823
11.2k
    }
2824
14.5k
}
2825
2826
/*
2827
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2828
 * load borrowed references onto the operand stack.
2829
 *
2830
 * This is only safe when we can prove that the reference in the frame outlives
2831
 * the borrowed reference produced by the instruction. We make this tractable
2832
 * by enforcing the following lifetimes:
2833
 *
2834
 * 1. Borrowed references loaded onto the operand stack live until the end of
2835
 *    the instruction that consumes them from the stack. Any borrowed
2836
 *    references that would escape into the heap (e.g. into frame objects or
2837
 *    generators) are converted into new, strong references.
2838
 *
2839
 * 2. Locals live until they are either killed by an instruction
2840
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2841
 *    via `f_locals` is added to a tuple owned by the frame object.
2842
 *
2843
 * To simplify the problem of detecting which supporting references in the
2844
 * frame are killed by instructions that overwrite locals, we only allow
2845
 * borrowed references to be stored as a local in the frame if they were passed
2846
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2847
 * strong references.
2848
 *
2849
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2850
 * that meet the following criteria:
2851
 *
2852
 * 1. The produced reference must be consumed from the stack before the
2853
 *    supporting reference in the frame is killed.
2854
 *
2855
 * 2. The produced reference cannot be stored as a local.
2856
 *
2857
 * We use abstract interpretation to identify instructions that meet these
2858
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2859
 * stack of abstract references and note any instructions that violate the
2860
 * criteria above. Once we've processed all the instructions in a block, any
2861
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2862
 */
2863
static int
2864
optimize_load_fast(cfg_builder *g)
2865
5.38k
{
2866
5.38k
    int status;
2867
5.38k
    ref_stack refs = {0};
2868
5.38k
    int max_instrs = 0;
2869
5.38k
    basicblock *entryblock = g->g_entryblock;
2870
29.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2871
24.2k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2872
24.2k
    }
2873
5.38k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2874
5.38k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2875
5.38k
    if (instr_flags == NULL) {
2876
0
        PyErr_NoMemory();
2877
0
        return ERROR;
2878
0
    }
2879
5.38k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2880
5.38k
    if (blocks == NULL) {
2881
0
        status = ERROR;
2882
0
        goto done;
2883
0
    }
2884
5.38k
    basicblock **sp = blocks;
2885
5.38k
    *sp = entryblock;
2886
5.38k
    sp++;
2887
5.38k
    entryblock->b_startdepth = 0;
2888
5.38k
    entryblock->b_visited = 1;
2889
2890
5.38k
    #define PUSH_REF(instr, local)                \
2891
154k
        do {                                      \
2892
154k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2893
0
                status = ERROR;                   \
2894
0
                goto done;                        \
2895
0
            }                                     \
2896
154k
        } while(0)
2897
2898
22.0k
    while (sp != blocks) {
2899
16.6k
        basicblock *block = *--sp;
2900
16.6k
        assert(block->b_startdepth > -1);
2901
2902
        // Reset per-block state.
2903
16.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2904
2905
        // Reset the stack of refs. We don't track references on the stack
2906
        // across basic blocks, but the bytecode will expect their
2907
        // presence. Add dummy references as necessary.
2908
16.6k
        ref_stack_clear(&refs);
2909
27.2k
        for (int i = 0; i < block->b_startdepth; i++) {
2910
10.6k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2911
10.6k
        }
2912
2913
201k
        for (int i = 0; i < block->b_iused; i++) {
2914
184k
            cfg_instr *instr = &block->b_instr[i];
2915
184k
            int opcode = instr->i_opcode;
2916
184k
            int oparg = instr->i_oparg;
2917
184k
            assert(opcode != EXTENDED_ARG);
2918
184k
            switch (opcode) {
2919
                // Opcodes that load and store locals
2920
7
                case DELETE_FAST: {
2921
7
                    kill_local(instr_flags, &refs, oparg);
2922
7
                    break;
2923
0
                }
2924
2925
19.3k
                case LOAD_FAST: {
2926
19.3k
                    PUSH_REF(i, oparg);
2927
19.3k
                    break;
2928
19.3k
                }
2929
2930
19.3k
                case LOAD_FAST_AND_CLEAR: {
2931
72
                    kill_local(instr_flags, &refs, oparg);
2932
72
                    PUSH_REF(i, oparg);
2933
72
                    break;
2934
72
                }
2935
2936
3.19k
                case LOAD_FAST_LOAD_FAST: {
2937
3.19k
                    PUSH_REF(i, oparg >> 4);
2938
3.19k
                    PUSH_REF(i, oparg & 15);
2939
3.19k
                    break;
2940
3.19k
                }
2941
2942
3.85k
                case STORE_FAST: {
2943
3.85k
                    ref r = ref_stack_pop(&refs);
2944
3.85k
                    store_local(instr_flags, &refs, oparg, r);
2945
3.85k
                    break;
2946
3.19k
                }
2947
2948
45
                case STORE_FAST_LOAD_FAST: {
2949
                    // STORE_FAST
2950
45
                    ref r = ref_stack_pop(&refs);
2951
45
                    store_local(instr_flags, &refs, oparg >> 4, r);
2952
                    // LOAD_FAST
2953
45
                    PUSH_REF(i, oparg & 15);
2954
45
                    break;
2955
45
                }
2956
2957
247
                case STORE_FAST_STORE_FAST: {
2958
                    // STORE_FAST
2959
247
                    ref r = ref_stack_pop(&refs);
2960
247
                    store_local(instr_flags, &refs, oparg >> 4, r);
2961
                    // STORE_FAST
2962
247
                    r = ref_stack_pop(&refs);
2963
247
                    store_local(instr_flags, &refs, oparg & 15, r);
2964
247
                    break;
2965
45
                }
2966
2967
                // Opcodes that shuffle values on the stack
2968
1.07k
                case COPY: {
2969
1.07k
                    assert(oparg > 0);
2970
1.07k
                    Py_ssize_t idx = refs.size - oparg;
2971
1.07k
                    ref r = ref_stack_at(&refs, idx);
2972
1.07k
                    PUSH_REF(r.instr, r.local);
2973
1.07k
                    break;
2974
1.07k
                }
2975
2976
1.07k
                case SWAP: {
2977
545
                    assert(oparg >= 2);
2978
545
                    ref_stack_swap_top(&refs, oparg);
2979
545
                    break;
2980
1.07k
                }
2981
2982
                // We treat opcodes that do not consume all of their inputs on
2983
                // a case by case basis, as we have no generic way of knowing
2984
                // how many inputs should be left on the stack.
2985
2986
                // Opcodes that consume no inputs
2987
1.45k
                case FORMAT_SIMPLE:
2988
1.45k
                case GET_ANEXT:
2989
1.90k
                case GET_ITER:
2990
1.90k
                case GET_LEN:
2991
2.35k
                case IMPORT_FROM:
2992
2.35k
                case MATCH_KEYS:
2993
2.35k
                case MATCH_MAPPING:
2994
2.35k
                case MATCH_SEQUENCE:
2995
2.35k
                case WITH_EXCEPT_START: {
2996
2.35k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2997
2.35k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2998
2.35k
                    int net_pushed = num_pushed - num_popped;
2999
2.35k
                    assert(net_pushed >= 0);
3000
3.24k
                    for (int j = 0; j < net_pushed; j++) {
3001
898
                        PUSH_REF(i, NOT_LOCAL);
3002
898
                    }
3003
2.35k
                    break;
3004
2.35k
                }
3005
3006
                // Opcodes that consume some inputs and push no new values
3007
2.35k
                case DICT_MERGE:
3008
469
                case DICT_UPDATE:
3009
2.74k
                case LIST_APPEND:
3010
2.81k
                case LIST_EXTEND:
3011
9.19k
                case MAP_ADD:
3012
9.19k
                case RERAISE:
3013
9.26k
                case SET_ADD:
3014
9.27k
                case SET_UPDATE: {
3015
9.27k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3016
9.27k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3017
9.27k
                    int net_popped = num_popped - num_pushed;
3018
9.27k
                    assert(net_popped > 0);
3019
24.9k
                    for (int i = 0; i < net_popped; i++) {
3020
15.6k
                        ref_stack_pop(&refs);
3021
15.6k
                    }
3022
9.27k
                    break;
3023
9.26k
                }
3024
3025
145
                case END_SEND: {
3026
145
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
3027
145
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3028
145
                    ref tos = ref_stack_pop(&refs);
3029
145
                    ref_stack_pop(&refs);
3030
145
                    ref_stack_pop(&refs);
3031
145
                    PUSH_REF(tos.instr, tos.local);
3032
145
                    break;
3033
145
                }
3034
3035
1.66k
                case SET_FUNCTION_ATTRIBUTE: {
3036
1.66k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
3037
1.66k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3038
1.66k
                    ref tos = ref_stack_pop(&refs);
3039
1.66k
                    ref_stack_pop(&refs);
3040
1.66k
                    PUSH_REF(tos.instr, tos.local);
3041
1.66k
                    break;
3042
1.66k
                }
3043
3044
                // Opcodes that consume some inputs and push new values
3045
1.66k
                case CHECK_EXC_MATCH: {
3046
0
                    ref_stack_pop(&refs);
3047
0
                    PUSH_REF(i, NOT_LOCAL);
3048
0
                    break;
3049
0
                }
3050
3051
453
                case FOR_ITER: {
3052
453
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
3053
453
                    PUSH_REF(i, NOT_LOCAL);
3054
453
                    break;
3055
453
                }
3056
3057
14.0k
                case LOAD_ATTR:
3058
14.2k
                case LOAD_SUPER_ATTR: {
3059
14.2k
                    ref self = ref_stack_pop(&refs);
3060
14.2k
                    if (opcode == LOAD_SUPER_ATTR) {
3061
191
                        ref_stack_pop(&refs);
3062
191
                        ref_stack_pop(&refs);
3063
191
                    }
3064
14.2k
                    PUSH_REF(i, NOT_LOCAL);
3065
14.2k
                    if (oparg & 1) {
3066
                        // A method call; conservatively assume that self is pushed
3067
                        // back onto the stack
3068
4.26k
                        PUSH_REF(self.instr, self.local);
3069
4.26k
                    }
3070
14.2k
                    break;
3071
14.2k
                }
3072
3073
14.2k
                case LOAD_SPECIAL:
3074
246
                case PUSH_EXC_INFO: {
3075
246
                    ref tos = ref_stack_pop(&refs);
3076
246
                    PUSH_REF(i, NOT_LOCAL);
3077
246
                    PUSH_REF(tos.instr, tos.local);
3078
246
                    break;
3079
246
                }
3080
3081
246
                case SEND: {
3082
145
                    load_fast_push_block(&sp, instr->i_target, refs.size);
3083
145
                    ref_stack_pop(&refs);
3084
145
                    PUSH_REF(i, NOT_LOCAL);
3085
145
                    break;
3086
145
                }
3087
3088
                // Opcodes that consume all of their inputs
3089
128k
                default: {
3090
128k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3091
128k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3092
128k
                    if (HAS_TARGET(instr->i_opcode)) {
3093
6.06k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
3094
6.06k
                    }
3095
128k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
3096
                        // Block push opcodes only affect the stack when jumping
3097
                        // to the target.
3098
227k
                        for (int j = 0; j < num_popped; j++) {
3099
99.0k
                            ref_stack_pop(&refs);
3100
99.0k
                        }
3101
222k
                        for (int j = 0; j < num_pushed; j++) {
3102
94.5k
                            PUSH_REF(i, NOT_LOCAL);
3103
94.5k
                        }
3104
128k
                    }
3105
128k
                    break;
3106
128k
                }
3107
184k
            }
3108
184k
        }
3109
3110
        // Push fallthrough block
3111
16.6k
        if (BB_HAS_FALLTHROUGH(block)) {
3112
7.92k
            assert(block->b_next != NULL);
3113
7.92k
            load_fast_push_block(&sp, block->b_next, refs.size);
3114
7.92k
        }
3115
3116
        // Mark instructions that produce values that are on the stack at the
3117
        // end of the basic block
3118
33.1k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3119
16.5k
            ref r = ref_stack_at(&refs, i);
3120
16.5k
            if (r.instr != -1) {
3121
9.97k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3122
9.97k
            }
3123
16.5k
        }
3124
3125
        // Optimize instructions
3126
201k
        for (int i = 0; i < block->b_iused; i++) {
3127
184k
            if (!instr_flags[i]) {
3128
171k
                cfg_instr *instr = &block->b_instr[i];
3129
171k
                switch (instr->i_opcode) {
3130
18.9k
                    case LOAD_FAST:
3131
18.9k
                        instr->i_opcode = LOAD_FAST_BORROW;
3132
18.9k
                        break;
3133
3.18k
                    case LOAD_FAST_LOAD_FAST:
3134
3.18k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3135
3.18k
                        break;
3136
149k
                    default:
3137
149k
                        break;
3138
171k
                }
3139
171k
            }
3140
184k
        }
3141
16.6k
    }
3142
3143
5.38k
    #undef PUSH_REF
3144
3145
5.38k
    status = SUCCESS;
3146
3147
5.38k
done:
3148
5.38k
    ref_stack_fini(&refs);
3149
5.38k
    PyMem_Free(instr_flags);
3150
5.38k
    PyMem_Free(blocks);
3151
5.38k
    return status;
3152
5.38k
}
3153
3154
// helper functions for add_checks_for_loads_of_unknown_variables
3155
static inline void
3156
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3157
79.9k
{
3158
    // Push b if the unsafe mask is giving us any new information.
3159
    // To avoid overflowing the stack, only allow each block once.
3160
    // Use b->b_visited=1 to mean that b is currently on the stack.
3161
79.9k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3162
79.9k
    if (b->b_unsafe_locals_mask != both) {
3163
9.44k
        b->b_unsafe_locals_mask = both;
3164
        // More work left to do.
3165
9.44k
        if (!b->b_visited) {
3166
            // not on the stack, so push it.
3167
9.37k
            *(*sp)++ = b;
3168
9.37k
            b->b_visited = 1;
3169
9.37k
        }
3170
9.44k
    }
3171
79.9k
}
3172
3173
static void
3174
scan_block_for_locals(basicblock *b, basicblock ***sp)
3175
30.4k
{
3176
    // bit i is set if local i is potentially uninitialized
3177
30.4k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3178
225k
    for (int i = 0; i < b->b_iused; i++) {
3179
195k
        cfg_instr *instr = &b->b_instr[i];
3180
195k
        assert(instr->i_opcode != EXTENDED_ARG);
3181
195k
        if (instr->i_except != NULL) {
3182
49.6k
            maybe_push(instr->i_except, unsafe_mask, sp);
3183
49.6k
        }
3184
195k
        if (instr->i_oparg >= 64) {
3185
9.98k
            continue;
3186
9.98k
        }
3187
195k
        assert(instr->i_oparg >= 0);
3188
185k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3189
185k
        switch (instr->i_opcode) {
3190
399
            case DELETE_FAST:
3191
543
            case LOAD_FAST_AND_CLEAR:
3192
831
            case STORE_FAST_MAYBE_NULL:
3193
831
                unsafe_mask |= bit;
3194
831
                break;
3195
9.27k
            case STORE_FAST:
3196
9.27k
                unsafe_mask &= ~bit;
3197
9.27k
                break;
3198
43
            case LOAD_FAST_CHECK:
3199
                // If this doesn't raise, then the local is defined.
3200
43
                unsafe_mask &= ~bit;
3201
43
                break;
3202
37.7k
            case LOAD_FAST:
3203
37.7k
                if (unsafe_mask & bit) {
3204
43
                    instr->i_opcode = LOAD_FAST_CHECK;
3205
43
                }
3206
37.7k
                unsafe_mask &= ~bit;
3207
37.7k
                break;
3208
185k
        }
3209
185k
    }
3210
30.4k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3211
14.4k
        maybe_push(b->b_next, unsafe_mask, sp);
3212
14.4k
    }
3213
30.4k
    cfg_instr *last = basicblock_last_instr(b);
3214
30.4k
    if (last && is_jump(last)) {
3215
12.2k
        assert(last->i_target != NULL);
3216
12.2k
        maybe_push(last->i_target, unsafe_mask, sp);
3217
12.2k
    }
3218
30.4k
}
3219
3220
static int
3221
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3222
0
{
3223
0
    assert(nlocals > 64);
3224
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3225
0
    if (states == NULL) {
3226
0
        PyErr_NoMemory();
3227
0
        return ERROR;
3228
0
    }
3229
0
    Py_ssize_t blocknum = 0;
3230
    // state[i - 64] == blocknum if local i is guaranteed to
3231
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3232
    // STORE_FAST within that basicblock (not followed by
3233
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3234
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3235
0
        blocknum++;
3236
0
        for (int i = 0; i < b->b_iused; i++) {
3237
0
            cfg_instr *instr = &b->b_instr[i];
3238
0
            assert(instr->i_opcode != EXTENDED_ARG);
3239
0
            int arg = instr->i_oparg;
3240
0
            if (arg < 64) {
3241
0
                continue;
3242
0
            }
3243
0
            assert(arg >= 0);
3244
0
            switch (instr->i_opcode) {
3245
0
                case DELETE_FAST:
3246
0
                case LOAD_FAST_AND_CLEAR:
3247
0
                case STORE_FAST_MAYBE_NULL:
3248
0
                    states[arg - 64] = blocknum - 1;
3249
0
                    break;
3250
0
                case STORE_FAST:
3251
0
                    states[arg - 64] = blocknum;
3252
0
                    break;
3253
0
                case LOAD_FAST:
3254
0
                    if (states[arg - 64] != blocknum) {
3255
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3256
0
                    }
3257
0
                    states[arg - 64] = blocknum;
3258
0
                    break;
3259
0
                    Py_UNREACHABLE();
3260
0
            }
3261
0
        }
3262
0
    }
3263
0
    PyMem_Free(states);
3264
0
    return SUCCESS;
3265
0
}
3266
3267
static int
3268
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3269
5.38k
{
3270
5.38k
    assert(PyList_CheckExact(consts));
3271
5.38k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3272
5.38k
    if (nconsts == 0) {
3273
32
        return SUCCESS;  /* nothing to do */
3274
32
    }
3275
3276
5.35k
    Py_ssize_t *index_map = NULL;
3277
5.35k
    Py_ssize_t *reverse_index_map = NULL;
3278
5.35k
    int err = ERROR;
3279
3280
5.35k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3281
5.35k
    if (index_map == NULL) {
3282
0
        goto end;
3283
0
    }
3284
30.6k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3285
25.2k
        index_map[i] = -1;
3286
25.2k
    }
3287
    // The first constant may be docstring; keep it always.
3288
5.35k
    index_map[0] = 0;
3289
3290
    /* mark used consts */
3291
29.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3292
218k
        for (int i = 0; i < b->b_iused; i++) {
3293
194k
            int opcode = b->b_instr[i].i_opcode;
3294
194k
            if (OPCODE_HAS_CONST(opcode)) {
3295
18.9k
                int index = b->b_instr[i].i_oparg;
3296
18.9k
                index_map[index] = index;
3297
18.9k
            }
3298
194k
        }
3299
23.8k
    }
3300
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3301
    /* condense consts */
3302
5.35k
    Py_ssize_t n_used_consts = 0;
3303
35.9k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3304
30.6k
        if (index_map[i] != -1) {
3305
18.3k
            assert(index_map[i] == i);
3306
18.3k
            index_map[n_used_consts++] = index_map[i];
3307
18.3k
        }
3308
30.6k
    }
3309
5.35k
    if (n_used_consts == nconsts) {
3310
        /* nothing to do */
3311
1.40k
        err = SUCCESS;
3312
1.40k
        goto end;
3313
1.40k
    }
3314
3315
    /* move all used consts to the beginning of the consts list */
3316
5.35k
    assert(n_used_consts < nconsts);
3317
20.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3318
16.7k
        Py_ssize_t old_index = index_map[i];
3319
16.7k
        assert(i <= old_index && old_index < nconsts);
3320
16.7k
        if (i != old_index) {
3321
9.01k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3322
9.01k
            assert(value != NULL);
3323
9.01k
            PyList_SetItem(consts, i, Py_NewRef(value));
3324
9.01k
        }
3325
16.7k
    }
3326
3327
    /* truncate the consts list at its new size */
3328
3.94k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3329
0
        goto end;
3330
0
    }
3331
    /* adjust const indices in the bytecode */
3332
3.94k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3333
3.94k
    if (reverse_index_map == NULL) {
3334
0
        goto end;
3335
0
    }
3336
32.9k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3337
29.0k
        reverse_index_map[i] = -1;
3338
29.0k
    }
3339
20.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3340
16.7k
        assert(index_map[i] != -1);
3341
16.7k
        assert(reverse_index_map[index_map[i]] == -1);
3342
16.7k
        reverse_index_map[index_map[i]] = i;
3343
16.7k
    }
3344
3345
23.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3346
193k
        for (int i = 0; i < b->b_iused; i++) {
3347
173k
            int opcode = b->b_instr[i].i_opcode;
3348
173k
            if (OPCODE_HAS_CONST(opcode)) {
3349
18.5k
                int index = b->b_instr[i].i_oparg;
3350
18.5k
                assert(reverse_index_map[index] >= 0);
3351
18.5k
                assert(reverse_index_map[index] < n_used_consts);
3352
18.5k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3353
18.5k
            }
3354
173k
        }
3355
19.5k
    }
3356
3357
3.94k
    err = SUCCESS;
3358
5.35k
end:
3359
5.35k
    PyMem_Free(index_map);
3360
5.35k
    PyMem_Free(reverse_index_map);
3361
5.35k
    return err;
3362
3.94k
}
3363
3364
3365
3366
static int
3367
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3368
                                                int nlocals,
3369
                                                int nparams)
3370
5.38k
{
3371
5.38k
    if (nlocals == 0) {
3372
1.76k
        return SUCCESS;
3373
1.76k
    }
3374
3.61k
    if (nlocals > 64) {
3375
        // To avoid O(nlocals**2) compilation, locals beyond the first
3376
        // 64 are only analyzed one basicblock at a time: initialization
3377
        // info is not passed between basicblocks.
3378
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3379
0
            return ERROR;
3380
0
        }
3381
0
        nlocals = 64;
3382
0
    }
3383
3.61k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3384
3.61k
    if (stack == NULL) {
3385
0
        return ERROR;
3386
0
    }
3387
3.61k
    basicblock **sp = stack;
3388
3389
    // First origin of being uninitialized:
3390
    // The non-parameter locals in the entry block.
3391
3.61k
    uint64_t start_mask = 0;
3392
6.88k
    for (int i = nparams; i < nlocals; i++) {
3393
3.26k
        start_mask |= (uint64_t)1 << i;
3394
3.26k
    }
3395
3.61k
    maybe_push(entryblock, start_mask, &sp);
3396
3397
    // Second origin of being uninitialized:
3398
    // There could be DELETE_FAST somewhere, so
3399
    // be sure to scan each basicblock at least once.
3400
24.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3401
21.0k
        scan_block_for_locals(b, &sp);
3402
21.0k
    }
3403
    // Now propagate the uncertainty from the origins we found: Use
3404
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3405
12.9k
    while (sp > stack) {
3406
9.37k
        basicblock *b = *--sp;
3407
        // mark as no longer on stack
3408
9.37k
        b->b_visited = 0;
3409
9.37k
        scan_block_for_locals(b, &sp);
3410
9.37k
    }
3411
3.61k
    PyMem_Free(stack);
3412
3.61k
    return SUCCESS;
3413
3.61k
}
3414
3415
3416
static int
3417
4.18k
mark_warm(basicblock *entryblock) {
3418
4.18k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3419
4.18k
    if (stack == NULL) {
3420
0
        return ERROR;
3421
0
    }
3422
4.18k
    basicblock **sp = stack;
3423
3424
4.18k
    *sp++ = entryblock;
3425
4.18k
    entryblock->b_visited = 1;
3426
19.3k
    while (sp > stack) {
3427
15.1k
        basicblock *b = *(--sp);
3428
15.1k
        assert(!b->b_except_handler);
3429
15.1k
        b->b_warm = 1;
3430
15.1k
        basicblock *next = b->b_next;
3431
15.1k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3432
6.45k
            *sp++ = next;
3433
6.45k
            next->b_visited = 1;
3434
6.45k
        }
3435
149k
        for (int i=0; i < b->b_iused; i++) {
3436
133k
            cfg_instr *instr = &b->b_instr[i];
3437
133k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3438
4.50k
                *sp++ = instr->i_target;
3439
4.50k
                instr->i_target->b_visited = 1;
3440
4.50k
            }
3441
133k
        }
3442
15.1k
    }
3443
4.18k
    PyMem_Free(stack);
3444
4.18k
    return SUCCESS;
3445
4.18k
}
3446
3447
static int
3448
4.18k
mark_cold(basicblock *entryblock) {
3449
26.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3450
22.6k
        assert(!b->b_cold && !b->b_warm);
3451
22.6k
    }
3452
4.18k
    if (mark_warm(entryblock) < 0) {
3453
0
        return ERROR;
3454
0
    }
3455
3456
4.18k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3457
4.18k
    if (stack == NULL) {
3458
0
        return ERROR;
3459
0
    }
3460
3461
4.18k
    basicblock **sp = stack;
3462
26.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3463
22.6k
        if (b->b_except_handler) {
3464
1.66k
            assert(!b->b_warm);
3465
1.66k
            *sp++ = b;
3466
1.66k
            b->b_visited = 1;
3467
1.66k
        }
3468
22.6k
    }
3469
3470
7.26k
    while (sp > stack) {
3471
3.08k
        basicblock *b = *(--sp);
3472
3.08k
        b->b_cold = 1;
3473
3.08k
        basicblock *next = b->b_next;
3474
3.08k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3475
929
            if (!next->b_warm && !next->b_visited) {
3476
750
                *sp++ = next;
3477
750
                next->b_visited = 1;
3478
750
            }
3479
929
        }
3480
16.1k
        for (int i = 0; i < b->b_iused; i++) {
3481
13.0k
            cfg_instr *instr = &b->b_instr[i];
3482
13.0k
            if (is_jump(instr)) {
3483
912
                assert(i == b->b_iused - 1);
3484
912
                basicblock *target = b->b_instr[i].i_target;
3485
912
                if (!target->b_warm && !target->b_visited) {
3486
671
                    *sp++ = target;
3487
671
                    target->b_visited = 1;
3488
671
                }
3489
912
            }
3490
13.0k
        }
3491
3.08k
    }
3492
4.18k
    PyMem_Free(stack);
3493
4.18k
    return SUCCESS;
3494
4.18k
}
3495
3496
3497
static int
3498
5.38k
push_cold_blocks_to_end(cfg_builder *g) {
3499
5.38k
    basicblock *entryblock = g->g_entryblock;
3500
5.38k
    if (entryblock->b_next == NULL) {
3501
        /* single basicblock, no need to reorder */
3502
1.20k
        return SUCCESS;
3503
1.20k
    }
3504
4.18k
    RETURN_IF_ERROR(mark_cold(entryblock));
3505
3506
4.18k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3507
3508
    /* If we have a cold block with fallthrough to a warm block, add */
3509
    /* an explicit jump instead of fallthrough */
3510
27.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3511
22.8k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3512
145
            basicblock *explicit_jump = cfg_builder_new_block(g);
3513
145
            if (explicit_jump == NULL) {
3514
0
                return ERROR;
3515
0
            }
3516
145
            if (!IS_LABEL(b->b_next->b_label)) {
3517
0
                b->b_next->b_label.id = next_lbl++;
3518
0
            }
3519
145
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3520
145
                             NO_LOCATION);
3521
145
            explicit_jump->b_cold = 1;
3522
145
            explicit_jump->b_next = b->b_next;
3523
145
            explicit_jump->b_predecessors = 1;
3524
145
            b->b_next = explicit_jump;
3525
3526
            /* set target */
3527
145
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3528
145
            last->i_target = explicit_jump->b_next;
3529
145
        }
3530
22.8k
    }
3531
3532
4.18k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3533
4.18k
    basicblock *cold_blocks = NULL;
3534
4.18k
    basicblock *cold_blocks_tail = NULL;
3535
3536
4.18k
    basicblock *b = entryblock;
3537
5.42k
    while(b->b_next) {
3538
5.42k
        assert(!b->b_cold);
3539
20.8k
        while (b->b_next && !b->b_next->b_cold) {
3540
15.4k
            b = b->b_next;
3541
15.4k
        }
3542
5.42k
        if (b->b_next == NULL) {
3543
            /* no more cold blocks */
3544
4.18k
            break;
3545
4.18k
        }
3546
3547
        /* b->b_next is the beginning of a cold streak */
3548
5.42k
        assert(!b->b_cold && b->b_next->b_cold);
3549
3550
1.24k
        basicblock *b_end = b->b_next;
3551
3.23k
        while (b_end->b_next && b_end->b_next->b_cold) {
3552
1.98k
            b_end = b_end->b_next;
3553
1.98k
        }
3554
3555
        /* b_end is the end of the cold streak */
3556
1.24k
        assert(b_end && b_end->b_cold);
3557
1.24k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3558
3559
1.24k
        if (cold_blocks == NULL) {
3560
594
            cold_blocks = b->b_next;
3561
594
        }
3562
649
        else {
3563
649
            cold_blocks_tail->b_next = b->b_next;
3564
649
        }
3565
1.24k
        cold_blocks_tail = b_end;
3566
1.24k
        b->b_next = b_end->b_next;
3567
1.24k
        b_end->b_next = NULL;
3568
1.24k
    }
3569
4.18k
    assert(b != NULL && b->b_next == NULL);
3570
4.18k
    b->b_next = cold_blocks;
3571
3572
4.18k
    if (cold_blocks != NULL) {
3573
594
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3574
594
    }
3575
4.18k
    return SUCCESS;
3576
4.18k
}
3577
3578
static int
3579
convert_pseudo_conditional_jumps(cfg_builder *g)
3580
5.38k
{
3581
5.38k
    basicblock *entryblock = g->g_entryblock;
3582
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3583
215k
        for (int i = 0; i < b->b_iused; i++) {
3584
191k
            cfg_instr *instr = &b->b_instr[i];
3585
191k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3586
577
                assert(i == b->b_iused - 1);
3587
577
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3588
511
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3589
577
                location loc = instr->i_loc;
3590
577
                basicblock *except = instr->i_except;
3591
577
                cfg_instr copy = {
3592
577
                            .i_opcode = COPY,
3593
577
                            .i_oparg = 1,
3594
577
                            .i_loc = loc,
3595
577
                            .i_target = NULL,
3596
577
                            .i_except = except,
3597
577
                };
3598
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3599
577
                cfg_instr to_bool = {
3600
577
                            .i_opcode = TO_BOOL,
3601
577
                            .i_oparg = 0,
3602
577
                            .i_loc = loc,
3603
577
                            .i_target = NULL,
3604
577
                            .i_except = except,
3605
577
                };
3606
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3607
577
            }
3608
191k
        }
3609
24.0k
    }
3610
5.38k
    return SUCCESS;
3611
5.38k
}
3612
3613
static int
3614
convert_pseudo_ops(cfg_builder *g)
3615
5.38k
{
3616
5.38k
    basicblock *entryblock = g->g_entryblock;
3617
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3618
218k
        for (int i = 0; i < b->b_iused; i++) {
3619
194k
            cfg_instr *instr = &b->b_instr[i];
3620
194k
            if (is_block_push(instr)) {
3621
1.66k
                INSTR_SET_OP0(instr, NOP);
3622
1.66k
            }
3623
192k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3624
1.38k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3625
1.38k
                instr->i_opcode = LOAD_FAST;
3626
1.38k
            }
3627
190k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3628
144
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3629
144
                instr->i_opcode = STORE_FAST;
3630
144
            }
3631
194k
        }
3632
24.0k
    }
3633
5.38k
    return remove_redundant_nops_and_jumps(g);
3634
5.38k
}
3635
3636
static inline bool
3637
32.5k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3638
32.5k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3639
19.5k
        return basicblock_has_no_lineno(b);
3640
19.5k
    }
3641
12.9k
    else {
3642
12.9k
        return false;
3643
12.9k
    }
3644
32.5k
}
3645
3646
3647
/* PEP 626 mandates that the f_lineno of a frame is correct
3648
 * after a frame terminates. It would be prohibitively expensive
3649
 * to continuously update the f_lineno field at runtime,
3650
 * so we make sure that all exiting instruction (raises and returns)
3651
 * have a valid line number, allowing us to compute f_lineno lazily.
3652
 * We can do this by duplicating the exit blocks without line number
3653
 * so that none have more than one predecessor. We can then safely
3654
 * copy the line number from the sole predecessor block.
3655
 */
3656
static int
3657
duplicate_exits_without_lineno(cfg_builder *g)
3658
10.7k
{
3659
10.7k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3660
3661
    /* Copy all exit blocks without line number that are targets of a jump.
3662
     */
3663
10.7k
    basicblock *entryblock = g->g_entryblock;
3664
58.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3665
47.9k
        cfg_instr *last = basicblock_last_instr(b);
3666
47.9k
        if (last == NULL) {
3667
8.74k
            continue;
3668
8.74k
        }
3669
39.1k
        if (is_jump(last)) {
3670
15.3k
            basicblock *target = next_nonempty_block(last->i_target);
3671
15.3k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3672
381
                basicblock *new_target = copy_basicblock(g, target);
3673
381
                if (new_target == NULL) {
3674
0
                    return ERROR;
3675
0
                }
3676
381
                new_target->b_instr[0].i_loc = last->i_loc;
3677
381
                last->i_target = new_target;
3678
381
                target->b_predecessors--;
3679
381
                new_target->b_predecessors = 1;
3680
381
                new_target->b_next = target->b_next;
3681
381
                new_target->b_label.id = next_lbl++;
3682
381
                target->b_next = new_target;
3683
381
            }
3684
15.3k
        }
3685
39.1k
    }
3686
3687
    /* Any remaining reachable exit blocks without line number can only be reached by
3688
     * fall through, and thus can only have a single predecessor */
3689
58.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3690
47.9k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3691
17.2k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3692
523
                cfg_instr *last = basicblock_last_instr(b);
3693
523
                assert(last != NULL);
3694
523
                b->b_next->b_instr[0].i_loc = last->i_loc;
3695
523
            }
3696
17.2k
        }
3697
47.9k
    }
3698
10.7k
    return SUCCESS;
3699
10.7k
}
3700
3701
3702
/* If an instruction has no line number, but it's predecessor in the BB does,
3703
 * then copy the line number. If a successor block has no line number, and only
3704
 * one predecessor, then inherit the line number.
3705
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3706
 * Also reduces the size of the line number table,
3707
 * but has no impact on the generated line number events.
3708
 */
3709
3710
static inline void
3711
maybe_propagate_location(basicblock *b, int i, location loc)
3712
420k
{
3713
420k
    assert(b->b_iused > i);
3714
420k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3715
26.5k
         b->b_instr[i].i_loc = loc;
3716
26.5k
    }
3717
420k
}
3718
3719
static void
3720
propagate_line_numbers(basicblock *entryblock)
3721
10.7k
{
3722
58.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3723
47.9k
        cfg_instr *last = basicblock_last_instr(b);
3724
47.9k
        if (last == NULL) {
3725
8.74k
            continue;
3726
8.74k
        }
3727
3728
39.1k
        location prev_location = NO_LOCATION;
3729
439k
        for (int i = 0; i < b->b_iused; i++) {
3730
400k
            maybe_propagate_location(b, i, prev_location);
3731
400k
            prev_location = b->b_instr[i].i_loc;
3732
400k
        }
3733
39.1k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3734
12.5k
            if (b->b_next->b_iused > 0) {
3735
12.5k
                maybe_propagate_location(b->b_next, 0, prev_location);
3736
12.5k
            }
3737
12.5k
        }
3738
39.1k
        if (is_jump(last)) {
3739
15.3k
            basicblock *target = last->i_target;
3740
15.3k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3741
1
                target = target->b_next;
3742
1
            }
3743
15.3k
            if (target->b_predecessors == 1) {
3744
7.09k
                maybe_propagate_location(target, 0, prev_location);
3745
7.09k
            }
3746
15.3k
        }
3747
39.1k
    }
3748
10.7k
}
3749
3750
static int
3751
resolve_line_numbers(cfg_builder *g, int firstlineno)
3752
10.7k
{
3753
10.7k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3754
10.7k
    propagate_line_numbers(g->g_entryblock);
3755
10.7k
    return SUCCESS;
3756
10.7k
}
3757
3758
int
3759
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3760
                        int nlocals, int nparams, int firstlineno)
3761
5.38k
{
3762
5.38k
    assert(cfg_builder_check(g));
3763
    /** Preprocessing **/
3764
    /* Map labels to targets and mark exception handlers */
3765
5.38k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3766
5.38k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3767
5.38k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3768
3769
    /** Optimization **/
3770
3771
5.38k
    _Py_hashtable_t *consts_index = _Py_hashtable_new(
3772
5.38k
        _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3773
5.38k
    if (consts_index == NULL) {
3774
0
        PyErr_NoMemory();
3775
0
        return ERROR;
3776
0
    }
3777
3778
35.2k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3779
29.8k
        PyObject *item = PyList_GET_ITEM(consts, i);
3780
29.8k
        if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3781
0
            continue;
3782
0
        }
3783
29.8k
        if (_Py_hashtable_set(consts_index, (void *)item,
3784
29.8k
                              (void *)(uintptr_t)i) < 0) {
3785
0
            _Py_hashtable_destroy(consts_index);
3786
0
            PyErr_NoMemory();
3787
0
            return ERROR;
3788
0
        }
3789
29.8k
    }
3790
3791
5.38k
    int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3792
3793
5.38k
    _Py_hashtable_destroy(consts_index);
3794
3795
5.38k
    RETURN_IF_ERROR(ret);
3796
3797
5.38k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3798
5.38k
    RETURN_IF_ERROR(
3799
5.38k
        add_checks_for_loads_of_uninitialized_variables(
3800
5.38k
            g->g_entryblock, nlocals, nparams));
3801
5.38k
    RETURN_IF_ERROR(insert_superinstructions(g));
3802
3803
5.38k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3804
5.38k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3805
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3806
    // assert(all_exits_have_lineno(g->g_entryblock));
3807
5.38k
    return SUCCESS;
3808
5.38k
}
3809
3810
static int *
3811
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3812
5.38k
{
3813
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3814
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3815
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3816
3817
5.38k
    int noffsets = ncellvars + nfreevars;
3818
5.38k
    int *fixed = PyMem_New(int, noffsets);
3819
5.38k
    if (fixed == NULL) {
3820
0
        PyErr_NoMemory();
3821
0
        return NULL;
3822
0
    }
3823
7.36k
    for (int i = 0; i < noffsets; i++) {
3824
1.98k
        fixed[i] = nlocals + i;
3825
1.98k
    }
3826
3827
5.38k
    PyObject *varname, *cellindex;
3828
5.38k
    Py_ssize_t pos = 0;
3829
6.45k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3830
1.07k
        PyObject *varindex;
3831
1.07k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3832
0
            goto error;
3833
0
        }
3834
1.07k
        if (varindex == NULL) {
3835
555
            continue;
3836
555
        }
3837
3838
516
        int argoffset = PyLong_AsInt(varindex);
3839
516
        Py_DECREF(varindex);
3840
516
        if (argoffset == -1 && PyErr_Occurred()) {
3841
0
            goto error;
3842
0
        }
3843
3844
516
        int oldindex = PyLong_AsInt(cellindex);
3845
516
        if (oldindex == -1 && PyErr_Occurred()) {
3846
0
            goto error;
3847
0
        }
3848
516
        fixed[oldindex] = argoffset;
3849
516
    }
3850
5.38k
    return fixed;
3851
3852
0
error:
3853
0
    PyMem_Free(fixed);
3854
0
    return NULL;
3855
5.38k
}
3856
3857
#define IS_GENERATOR(CF) \
3858
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3859
3860
static int
3861
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3862
                           int *fixed, int nfreevars)
3863
5.38k
{
3864
5.38k
    assert(umd->u_firstlineno > 0);
3865
3866
    /* Set up cells for any variable that escapes, to be put in a closure. */
3867
5.38k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3868
5.38k
    if (ncellvars) {
3869
        // umd->u_cellvars has the cells out of order so we sort them
3870
        // before adding the MAKE_CELL instructions.  Note that we
3871
        // adjust for arg cells, which come first.
3872
769
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3873
769
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3874
769
        if (sorted == NULL) {
3875
0
            PyErr_NoMemory();
3876
0
            return ERROR;
3877
0
        }
3878
1.84k
        for (int i = 0; i < ncellvars; i++) {
3879
1.07k
            sorted[fixed[i]] = i + 1;
3880
1.07k
        }
3881
2.38k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3882
1.61k
            int oldindex = sorted[i] - 1;
3883
1.61k
            if (oldindex == -1) {
3884
546
                continue;
3885
546
            }
3886
1.07k
            cfg_instr make_cell = {
3887
1.07k
                .i_opcode = MAKE_CELL,
3888
                // This will get fixed in offset_derefs().
3889
1.07k
                .i_oparg = oldindex,
3890
1.07k
                .i_loc = NO_LOCATION,
3891
1.07k
                .i_target = NULL,
3892
1.07k
                .i_except = NULL,
3893
1.07k
            };
3894
1.07k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3895
0
                PyMem_RawFree(sorted);
3896
0
                return ERROR;
3897
0
            }
3898
1.07k
            ncellsused += 1;
3899
1.07k
        }
3900
769
        PyMem_RawFree(sorted);
3901
769
    }
3902
3903
5.38k
    if (nfreevars) {
3904
645
        cfg_instr copy_frees = {
3905
645
            .i_opcode = COPY_FREE_VARS,
3906
645
            .i_oparg = nfreevars,
3907
645
            .i_loc = NO_LOCATION,
3908
645
            .i_target = NULL,
3909
645
            .i_except = NULL,
3910
645
        };
3911
645
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3912
645
    }
3913
3914
5.38k
    return SUCCESS;
3915
5.38k
}
3916
3917
static int
3918
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3919
5.38k
{
3920
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3921
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3922
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3923
5.38k
    int noffsets = ncellvars + nfreevars;
3924
3925
    // First deal with duplicates (arg cells).
3926
5.38k
    int numdropped = 0;
3927
7.36k
    for (int i = 0; i < noffsets ; i++) {
3928
1.98k
        if (fixedmap[i] == i + nlocals) {
3929
1.47k
            fixedmap[i] -= numdropped;
3930
1.47k
        }
3931
516
        else {
3932
            // It was a duplicate (cell/arg).
3933
516
            numdropped += 1;
3934
516
        }
3935
1.98k
    }
3936
3937
    // Then update offsets, either relative to locals or by cell2arg.
3938
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3939
218k
        for (int i = 0; i < b->b_iused; i++) {
3940
194k
            cfg_instr *inst = &b->b_instr[i];
3941
            // This is called before extended args are generated.
3942
194k
            assert(inst->i_opcode != EXTENDED_ARG);
3943
194k
            int oldoffset = inst->i_oparg;
3944
194k
            switch(inst->i_opcode) {
3945
1.07k
                case MAKE_CELL:
3946
2.45k
                case LOAD_CLOSURE:
3947
4.64k
                case LOAD_DEREF:
3948
5.19k
                case STORE_DEREF:
3949
5.19k
                case DELETE_DEREF:
3950
5.19k
                case LOAD_FROM_DICT_OR_DEREF:
3951
5.19k
                    assert(oldoffset >= 0);
3952
5.19k
                    assert(oldoffset < noffsets);
3953
5.19k
                    assert(fixedmap[oldoffset] >= 0);
3954
5.19k
                    inst->i_oparg = fixedmap[oldoffset];
3955
194k
            }
3956
194k
        }
3957
24.0k
    }
3958
3959
5.38k
    return numdropped;
3960
5.38k
}
3961
3962
static int
3963
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3964
5.38k
{
3965
5.38k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3966
5.38k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3967
5.38k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3968
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3969
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3970
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3971
5.38k
    assert(INT_MAX - nlocals - ncellvars > 0);
3972
5.38k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3973
5.38k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3974
5.38k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3975
5.38k
    if (cellfixedoffsets == NULL) {
3976
0
        return ERROR;
3977
0
    }
3978
3979
    // This must be called before fix_cell_offsets().
3980
5.38k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3981
0
        PyMem_Free(cellfixedoffsets);
3982
0
        return ERROR;
3983
0
    }
3984
3985
5.38k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3986
5.38k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3987
5.38k
    cellfixedoffsets = NULL;
3988
5.38k
    if (numdropped < 0) {
3989
0
        return ERROR;
3990
0
    }
3991
3992
5.38k
    nlocalsplus -= numdropped;
3993
5.38k
    return nlocalsplus;
3994
5.38k
}
3995
3996
cfg_builder *
3997
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3998
5.38k
{
3999
5.38k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4000
0
        return NULL;
4001
0
    }
4002
5.38k
    cfg_builder *g = _PyCfgBuilder_New();
4003
5.38k
    if (g == NULL) {
4004
0
        return NULL;
4005
0
    }
4006
222k
    for (int i = 0; i < seq->s_used; i++) {
4007
216k
        seq->s_instrs[i].i_target = 0;
4008
216k
    }
4009
222k
    for (int i = 0; i < seq->s_used; i++) {
4010
216k
        _PyInstruction *instr = &seq->s_instrs[i];
4011
216k
        if (HAS_TARGET(instr->i_opcode)) {
4012
10.5k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
4013
10.5k
            seq->s_instrs[instr->i_oparg].i_target = 1;
4014
10.5k
        }
4015
216k
    }
4016
5.38k
    int offset = 0;
4017
222k
    for (int i = 0; i < seq->s_used; i++) {
4018
216k
        _PyInstruction *instr = &seq->s_instrs[i];
4019
216k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
4020
798
            if (seq->s_annotations_code != NULL) {
4021
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
4022
1
                    && seq->s_annotations_code->s_nested == NULL);
4023
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
4024
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
4025
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
4026
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
4027
0
                        goto error;
4028
0
                    }
4029
3
                }
4030
1
                offset += seq->s_annotations_code->s_used - 1;
4031
1
            }
4032
797
            else {
4033
797
                offset -= 1;
4034
797
            }
4035
798
            continue;
4036
798
        }
4037
216k
        if (instr->i_target) {
4038
8.56k
            jump_target_label lbl_ = {i + offset};
4039
8.56k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
4040
0
                goto error;
4041
0
            }
4042
8.56k
        }
4043
216k
        int opcode = instr->i_opcode;
4044
216k
        int oparg = instr->i_oparg;
4045
216k
        if (HAS_TARGET(opcode)) {
4046
10.5k
            oparg += offset;
4047
10.5k
        }
4048
216k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
4049
0
            goto error;
4050
0
        }
4051
216k
    }
4052
5.38k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
4053
0
        goto error;
4054
0
    }
4055
5.38k
    return g;
4056
0
error:
4057
0
    _PyCfgBuilder_Free(g);
4058
0
    return NULL;
4059
5.38k
}
4060
4061
int
4062
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
4063
5.38k
{
4064
5.38k
    int lbl = 0;
4065
29.6k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4066
24.2k
        b->b_label = (jump_target_label){lbl};
4067
24.2k
        lbl += 1;
4068
24.2k
    }
4069
29.6k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4070
24.2k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
4071
222k
        for (int i = 0; i < b->b_iused; i++) {
4072
198k
            cfg_instr *instr = &b->b_instr[i];
4073
198k
            if (HAS_TARGET(instr->i_opcode)) {
4074
                /* Set oparg to the label id (it will later be mapped to an offset) */
4075
7.72k
                instr->i_oparg = instr->i_target->b_label.id;
4076
7.72k
            }
4077
198k
            RETURN_IF_ERROR(
4078
198k
                _PyInstructionSequence_Addop(
4079
198k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
4080
4081
198k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
4082
198k
            if (instr->i_except != NULL) {
4083
27.5k
                hi->h_label = instr->i_except->b_label.id;
4084
27.5k
                hi->h_startdepth = instr->i_except->b_startdepth;
4085
27.5k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
4086
27.5k
            }
4087
170k
            else {
4088
170k
                hi->h_label = -1;
4089
170k
            }
4090
198k
        }
4091
24.2k
    }
4092
5.38k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4093
0
        return ERROR;
4094
0
    }
4095
5.38k
    return SUCCESS;
4096
5.38k
}
4097
4098
4099
int
4100
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
4101
                                     _PyCompile_CodeUnitMetadata *umd,
4102
                                     int *stackdepth, int *nlocalsplus,
4103
                                     _PyInstructionSequence *seq)
4104
5.38k
{
4105
5.38k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
4106
4107
5.38k
    *stackdepth = calculate_stackdepth(g);
4108
5.38k
    if (*stackdepth < 0) {
4109
0
        return ERROR;
4110
0
    }
4111
4112
5.38k
    *nlocalsplus = prepare_localsplus(umd, g);
4113
5.38k
    if (*nlocalsplus < 0) {
4114
0
        return ERROR;
4115
0
    }
4116
4117
5.38k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4118
4119
    /* Order of basic blocks must have been determined by now */
4120
4121
5.38k
    RETURN_IF_ERROR(normalize_jumps(g));
4122
5.38k
    assert(no_redundant_jumps(g));
4123
4124
    /* Can't modify the bytecode after inserting instructions that produce
4125
     * borrowed references.
4126
     */
4127
5.38k
    RETURN_IF_ERROR(optimize_load_fast(g));
4128
4129
    /* Can't modify the bytecode after computing jump offsets. */
4130
5.38k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4131
0
        return ERROR;
4132
0
    }
4133
4134
5.38k
    return SUCCESS;
4135
5.38k
}
4136
4137
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4138
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4139
 */
4140
int
4141
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4142
0
{
4143
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4144
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4145
0
    return SUCCESS;
4146
0
}
4147
4148
/* Exported API functions */
4149
4150
int
4151
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4152
0
{
4153
0
    stack_effects effs;
4154
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4155
0
        return PY_INVALID_STACK_EFFECT;
4156
0
    }
4157
0
    return effs.net;
4158
0
}
4159
4160
int
4161
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4162
28
{
4163
28
    stack_effects effs;
4164
28
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4165
0
        return PY_INVALID_STACK_EFFECT;
4166
0
    }
4167
28
    return effs.net;
4168
28
}
4169
4170
/* Access to compiler optimizations for unit tests.
4171
4172
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4173
 * a CFG, optimizes it and converts back to an instruction list.
4174
 */
4175
4176
static PyObject *
4177
cfg_to_instruction_sequence(cfg_builder *g)
4178
0
{
4179
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4180
0
    if (seq == NULL) {
4181
0
        return NULL;
4182
0
    }
4183
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4184
0
        PyInstructionSequence_Fini(seq);
4185
0
        return NULL;
4186
0
    }
4187
0
    return (PyObject*)seq;
4188
0
}
4189
4190
PyObject *
4191
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4192
0
{
4193
0
    if (!_PyInstructionSequence_Check(seq)) {
4194
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4195
0
        return NULL;
4196
0
    }
4197
0
    if (!PyList_Check(consts)) {
4198
0
        PyErr_SetString(PyExc_TypeError, "consts must be a list");
4199
0
        return NULL;
4200
0
    }
4201
0
    PyObject *const_cache = PyDict_New();
4202
0
    if (const_cache == NULL) {
4203
0
        return NULL;
4204
0
    }
4205
4206
0
    PyObject *res = NULL;
4207
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4208
0
    if (g == NULL) {
4209
0
        goto error;
4210
0
    }
4211
0
    int nparams = 0, firstlineno = 1;
4212
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4213
0
                                nparams, firstlineno) < 0) {
4214
0
        goto error;
4215
0
    }
4216
4217
0
    if (calculate_stackdepth(g) == ERROR) {
4218
0
        goto error;
4219
0
    }
4220
4221
0
    if (optimize_load_fast(g) != SUCCESS) {
4222
0
        goto error;
4223
0
    }
4224
4225
0
    res = cfg_to_instruction_sequence(g);
4226
0
error:
4227
0
    Py_DECREF(const_cache);
4228
0
    _PyCfgBuilder_Free(g);
4229
0
    return res;
4230
0
}