Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
#include "pycore_hashtable.h"     // _Py_hashtable_t
10
11
#include "pycore_opcode_utils.h"
12
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
13
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
14
#include "pycore_stackref.h"        // PyStackRef_AsPyObjectBorrow()
15
16
#include <stdbool.h>
17
18
19
#undef SUCCESS
20
#undef ERROR
21
704k
#define SUCCESS 0
22
5.31k
#define ERROR -1
23
24
#define RETURN_IF_ERROR(X)  \
25
1.06M
    if ((X) == -1) {        \
26
0
        return ERROR;       \
27
0
    }
28
29
226k
#define DEFAULT_BLOCK_SIZE 16
30
31
typedef _Py_SourceLocation location;
32
typedef _PyJumpTargetLabel jump_target_label;
33
34
typedef struct _PyCfgInstruction {
35
    int i_opcode;
36
    int i_oparg;
37
    _Py_SourceLocation i_loc;
38
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
39
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
40
} cfg_instr;
41
42
typedef struct _PyCfgBasicblock {
43
    /* Each basicblock in a compilation unit is linked via b_list in the
44
       reverse order that the block are allocated.  b_list points to the next
45
       block in this list, not to be confused with b_next, which is next by
46
       control flow. */
47
    struct _PyCfgBasicblock *b_list;
48
    /* The label of this block if it is a jump target, -1 otherwise */
49
    _PyJumpTargetLabel b_label;
50
    /* Exception stack at start of block, used by assembler to create the exception handling table */
51
    struct _PyCfgExceptStack *b_exceptstack;
52
    /* pointer to an array of instructions, initially NULL */
53
    cfg_instr *b_instr;
54
    /* If b_next is non-NULL, it is a pointer to the next
55
       block reached by normal control flow. */
56
    struct _PyCfgBasicblock *b_next;
57
    /* number of instructions used */
58
    int b_iused;
59
    /* length of instruction array (b_instr) */
60
    int b_ialloc;
61
    /* Used by add_checks_for_loads_of_unknown_variables */
62
    uint64_t b_unsafe_locals_mask;
63
    /* Number of predecessors that a block has. */
64
    int b_predecessors;
65
    /* depth of stack upon entry of block, computed by stackdepth() */
66
    int b_startdepth;
67
    /* Basic block is an exception handler that preserves lasti */
68
    unsigned b_preserve_lasti : 1;
69
    /* Used by compiler passes to mark whether they have visited a basic block. */
70
    unsigned b_visited : 1;
71
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
72
    unsigned b_except_handler : 1;
73
    /* b_cold is true if this block is not perf critical (like an exception handler) */
74
    unsigned b_cold : 1;
75
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
76
    unsigned b_warm : 1;
77
} basicblock;
78
79
80
struct _PyCfgBuilder {
81
    /* The entryblock, at which control flow begins. All blocks of the
82
       CFG are reachable through the b_next links */
83
    struct _PyCfgBasicblock *g_entryblock;
84
    /* Pointer to the most recently allocated block.  By following
85
       b_list links, you can reach all allocated blocks. */
86
    struct _PyCfgBasicblock *g_block_list;
87
    /* pointer to the block currently being constructed */
88
    struct _PyCfgBasicblock *g_curblock;
89
    /* label for the next instruction to be placed */
90
    _PyJumpTargetLabel g_current_label;
91
};
92
93
typedef struct _PyCfgBuilder cfg_builder;
94
95
231k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
96
231k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
97
98
#define LOCATION(LNO, END_LNO, COL, END_COL) \
99
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
100
101
static inline int
102
is_block_push(cfg_instr *i)
103
1.00M
{
104
1.00M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
105
1.00M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
106
1.00M
}
107
108
static inline int
109
is_jump(cfg_instr *i)
110
875k
{
111
875k
    return OPCODE_HAS_JUMP(i->i_opcode);
112
875k
}
113
114
/* One arg*/
115
#define INSTR_SET_OP1(I, OP, ARG) \
116
34.1k
    do { \
117
34.1k
        assert(OPCODE_HAS_ARG(OP)); \
118
34.1k
        cfg_instr *_instr__ptr_ = (I); \
119
34.1k
        _instr__ptr_->i_opcode = (OP); \
120
34.1k
        _instr__ptr_->i_oparg = (ARG); \
121
34.1k
    } while (0);
122
123
/* No args*/
124
#define INSTR_SET_OP0(I, OP) \
125
42.6k
    do { \
126
42.6k
        assert(!OPCODE_HAS_ARG(OP)); \
127
42.6k
        cfg_instr *_instr__ptr_ = (I); \
128
42.6k
        _instr__ptr_->i_opcode = (OP); \
129
42.6k
        _instr__ptr_->i_oparg = 0; \
130
42.6k
    } while (0);
131
132
#define INSTR_SET_LOC(I, LOC) \
133
3.86k
    do { \
134
3.86k
        cfg_instr *_instr__ptr_ = (I); \
135
3.86k
        _instr__ptr_->i_loc = (LOC); \
136
3.86k
    } while (0);
137
138
/***** Blocks *****/
139
140
/* Returns the offset of the next instruction in the current block's
141
   b_instr array.  Resizes the b_instr as necessary.
142
   Returns -1 on failure.
143
*/
144
static int
145
basicblock_next_instr(basicblock *b)
146
226k
{
147
226k
    assert(b != NULL);
148
226k
    _Py_c_array_t array = {
149
226k
        .array = (void*)b->b_instr,
150
226k
        .allocated_entries = b->b_ialloc,
151
226k
        .item_size = sizeof(cfg_instr),
152
226k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
153
226k
    };
154
155
226k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
156
226k
    b->b_instr = array.array;
157
226k
    b->b_ialloc = array.allocated_entries;
158
226k
    return b->b_iused++;
159
226k
}
160
161
static cfg_instr *
162
749k
basicblock_last_instr(const basicblock *b) {
163
749k
    assert(b->b_iused >= 0);
164
749k
    if (b->b_iused > 0) {
165
683k
        assert(b->b_instr != NULL);
166
683k
        return &b->b_instr[b->b_iused - 1];
167
683k
    }
168
65.4k
    return NULL;
169
749k
}
170
171
/* Allocate a new block and return a pointer to it.
172
   Returns NULL on error.
173
*/
174
175
static basicblock *
176
cfg_builder_new_block(cfg_builder *g)
177
24.1k
{
178
24.1k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
179
24.1k
    if (b == NULL) {
180
0
        PyErr_NoMemory();
181
0
        return NULL;
182
0
    }
183
    /* Extend the singly linked list of blocks with new block. */
184
24.1k
    b->b_list = g->g_block_list;
185
24.1k
    g->g_block_list = b;
186
24.1k
    b->b_label = NO_LABEL;
187
24.1k
    return b;
188
24.1k
}
189
190
static int
191
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
192
220k
{
193
220k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
194
220k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
195
220k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
196
220k
    assert(0 <= oparg && oparg < (1 << 30));
197
198
220k
    int off = basicblock_next_instr(b);
199
220k
    if (off < 0) {
200
0
        return ERROR;
201
0
    }
202
220k
    cfg_instr *i = &b->b_instr[off];
203
220k
    i->i_opcode = opcode;
204
220k
    i->i_oparg = oparg;
205
220k
    i->i_loc = loc;
206
    // memory is already zero initialized
207
220k
    assert(i->i_target == NULL);
208
220k
    assert(i->i_except == NULL);
209
210
220k
    return SUCCESS;
211
220k
}
212
213
static int
214
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
215
383
{
216
383
    cfg_instr *last = basicblock_last_instr(b);
217
383
    if (last && is_jump(last)) {
218
0
        return ERROR;
219
0
    }
220
221
383
    RETURN_IF_ERROR(
222
383
        basicblock_addop(b, opcode, target->b_label.id, loc));
223
383
    last = basicblock_last_instr(b);
224
383
    assert(last && last->i_opcode == opcode);
225
383
    last->i_target = target;
226
383
    return SUCCESS;
227
383
}
228
229
static inline int
230
basicblock_append_instructions(basicblock *to, basicblock *from)
231
1.52k
{
232
4.39k
    for (int i = 0; i < from->b_iused; i++) {
233
2.86k
        int n = basicblock_next_instr(to);
234
2.86k
        if (n < 0) {
235
0
            return ERROR;
236
0
        }
237
2.86k
        to->b_instr[n] = from->b_instr[i];
238
2.86k
    }
239
1.52k
    return SUCCESS;
240
1.52k
}
241
242
static inline int
243
228k
basicblock_nofallthrough(const basicblock *b) {
244
228k
    cfg_instr *last = basicblock_last_instr(b);
245
228k
    return (last &&
246
214k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
247
118k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
248
228k
}
249
250
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
251
372k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
252
253
static basicblock *
254
copy_basicblock(cfg_builder *g, basicblock *block)
255
381
{
256
    /* Cannot copy a block if it has a fallthrough, since
257
     * a block can only have one fallthrough predecessor.
258
     */
259
381
    assert(BB_NO_FALLTHROUGH(block));
260
381
    basicblock *result = cfg_builder_new_block(g);
261
381
    if (result == NULL) {
262
0
        return NULL;
263
0
    }
264
381
    if (basicblock_append_instructions(result, block) < 0) {
265
0
        return NULL;
266
0
    }
267
381
    return result;
268
381
}
269
270
static int
271
2.86k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
272
2.86k
    RETURN_IF_ERROR(basicblock_next_instr(block));
273
47.6k
    for (int i = block->b_iused - 1; i > pos; i--) {
274
44.7k
        block->b_instr[i] = block->b_instr[i-1];
275
44.7k
    }
276
2.86k
    block->b_instr[pos] = *instr;
277
2.86k
    return SUCCESS;
278
2.86k
}
279
280
/* For debugging purposes only */
281
#if 0
282
static void
283
dump_instr(cfg_instr *i)
284
{
285
    const char *jump = is_jump(i) ? "jump " : "";
286
287
    char arg[128];
288
289
    *arg = '\0';
290
    if (OPCODE_HAS_ARG(i->i_opcode)) {
291
        sprintf(arg, "arg: %d ", i->i_oparg);
292
    }
293
    if (HAS_TARGET(i->i_opcode)) {
294
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
295
    }
296
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
297
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
298
}
299
300
static inline int
301
basicblock_returns(const basicblock *b) {
302
    cfg_instr *last = basicblock_last_instr(b);
303
    return last && IS_RETURN_OPCODE(last->i_opcode);
304
}
305
306
static void
307
dump_basicblock(const basicblock *b, bool highlight)
308
{
309
    const char *b_return = basicblock_returns(b) ? "return " : "";
310
    if (highlight) {
311
        fprintf(stderr, ">>> ");
312
    }
313
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
314
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
315
        b->b_startdepth, b->b_predecessors, b_return);
316
    int depth = b->b_startdepth;
317
    if (b->b_instr) {
318
        int i;
319
        for (i = 0; i < b->b_iused; i++) {
320
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
321
            dump_instr(b->b_instr + i);
322
323
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
324
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
325
            depth += (pushed - popped);
326
        }
327
    }
328
}
329
330
void
331
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
332
{
333
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
334
        dump_basicblock(b, b == mark);
335
    }
336
}
337
338
#endif
339
340
341
/***** CFG construction and modification *****/
342
343
static basicblock *
344
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
345
18.0k
{
346
18.0k
    assert(block != NULL);
347
18.0k
    g->g_curblock->b_next = block;
348
18.0k
    g->g_curblock = block;
349
18.0k
    return block;
350
18.0k
}
351
352
static inline int
353
36.7k
basicblock_exits_scope(const basicblock *b) {
354
36.7k
    cfg_instr *last = basicblock_last_instr(b);
355
36.7k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
356
36.7k
}
357
358
static inline int
359
21.2k
basicblock_has_eval_break(const basicblock *b) {
360
109k
    for (int i = 0; i < b->b_iused; i++) {
361
96.3k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
362
8.23k
            return true;
363
8.23k
        }
364
96.3k
    }
365
12.9k
    return false;
366
21.2k
}
367
368
static bool
369
cfg_builder_current_block_is_terminated(cfg_builder *g)
370
223k
{
371
223k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
372
223k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
373
15.4k
        return true;
374
15.4k
    }
375
207k
    if (IS_LABEL(g->g_current_label)) {
376
2.58k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
377
2.58k
            return true;
378
2.58k
        }
379
0
        else {
380
            /* current block is empty, label it */
381
0
            g->g_curblock->b_label = g->g_current_label;
382
0
            g->g_current_label = NO_LABEL;
383
0
        }
384
2.58k
    }
385
205k
    return false;
386
207k
}
387
388
static int
389
cfg_builder_maybe_start_new_block(cfg_builder *g)
390
223k
{
391
223k
    if (cfg_builder_current_block_is_terminated(g)) {
392
18.0k
        basicblock *b = cfg_builder_new_block(g);
393
18.0k
        if (b == NULL) {
394
0
            return ERROR;
395
0
        }
396
18.0k
        b->b_label = g->g_current_label;
397
18.0k
        g->g_current_label = NO_LABEL;
398
18.0k
        cfg_builder_use_next_block(g, b);
399
18.0k
    }
400
223k
    return SUCCESS;
401
223k
}
402
403
#ifndef NDEBUG
404
static bool
405
cfg_builder_check(cfg_builder *g)
406
{
407
    assert(g->g_entryblock->b_iused > 0);
408
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
409
        assert(!_PyMem_IsPtrFreed(block));
410
        if (block->b_instr != NULL) {
411
            assert(block->b_ialloc > 0);
412
            assert(block->b_iused >= 0);
413
            assert(block->b_ialloc >= block->b_iused);
414
        }
415
        else {
416
            assert (block->b_iused == 0);
417
            assert (block->b_ialloc == 0);
418
        }
419
    }
420
    return true;
421
}
422
#endif
423
424
static int
425
init_cfg_builder(cfg_builder *g)
426
5.34k
{
427
5.34k
    g->g_block_list = NULL;
428
5.34k
    basicblock *block = cfg_builder_new_block(g);
429
5.34k
    if (block == NULL) {
430
0
        return ERROR;
431
0
    }
432
5.34k
    g->g_curblock = g->g_entryblock = block;
433
5.34k
    g->g_current_label = NO_LABEL;
434
5.34k
    return SUCCESS;
435
5.34k
}
436
437
cfg_builder *
438
_PyCfgBuilder_New(void)
439
5.34k
{
440
5.34k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
441
5.34k
    if (g == NULL) {
442
0
        PyErr_NoMemory();
443
0
        return NULL;
444
0
    }
445
5.34k
    memset(g, 0, sizeof(cfg_builder));
446
5.34k
    if (init_cfg_builder(g) < 0) {
447
0
        PyMem_Free(g);
448
0
        return NULL;
449
0
    }
450
5.34k
    return g;
451
5.34k
}
452
453
void
454
_PyCfgBuilder_Free(cfg_builder *g)
455
5.34k
{
456
5.34k
    if (g == NULL) {
457
0
        return;
458
0
    }
459
5.34k
    assert(cfg_builder_check(g));
460
5.34k
    basicblock *b = g->g_block_list;
461
29.4k
    while (b != NULL) {
462
24.1k
        if (b->b_instr) {
463
24.1k
            PyMem_Free((void *)b->b_instr);
464
24.1k
        }
465
24.1k
        basicblock *next = b->b_list;
466
24.1k
        PyMem_Free((void *)b);
467
24.1k
        b = next;
468
24.1k
    }
469
5.34k
    PyMem_Free(g);
470
5.34k
}
471
472
int
473
_PyCfgBuilder_CheckSize(cfg_builder *g)
474
5.34k
{
475
5.34k
    int nblocks = 0;
476
28.7k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
477
23.4k
        nblocks++;
478
23.4k
    }
479
5.34k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
480
0
        PyErr_NoMemory();
481
0
        return ERROR;
482
0
    }
483
5.34k
    return SUCCESS;
484
5.34k
}
485
486
int
487
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
488
8.54k
{
489
8.54k
    g->g_current_label = lbl;
490
8.54k
    return cfg_builder_maybe_start_new_block(g);
491
8.54k
}
492
493
int
494
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
495
214k
{
496
214k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
497
214k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
498
214k
}
499
500
501
static basicblock *
502
next_nonempty_block(basicblock *b)
503
29.5k
{
504
30.6k
    while (b && b->b_iused == 0) {
505
1.08k
        b = b->b_next;
506
1.08k
    }
507
29.5k
    return b;
508
29.5k
}
509
510
/***** debugging helpers *****/
511
512
#ifndef NDEBUG
513
static int remove_redundant_nops(cfg_builder *g);
514
515
static bool
516
no_redundant_nops(cfg_builder *g) {
517
    if (remove_redundant_nops(g) != 0) {
518
        return false;
519
    }
520
    return true;
521
}
522
523
static bool
524
no_redundant_jumps(cfg_builder *g) {
525
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
526
        cfg_instr *last = basicblock_last_instr(b);
527
        if (last != NULL) {
528
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
529
                basicblock *next = next_nonempty_block(b->b_next);
530
                basicblock *jump_target = next_nonempty_block(last->i_target);
531
                if (jump_target == next) {
532
                    assert(next);
533
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
534
                        assert(0);
535
                        return false;
536
                    }
537
                }
538
            }
539
        }
540
    }
541
    return true;
542
}
543
#endif
544
545
/***** CFG preprocessing (jump targets and exceptions) *****/
546
547
static int
548
24.1k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
549
24.1k
    cfg_instr *last = basicblock_last_instr(b);
550
24.1k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
551
18.8k
        return SUCCESS;
552
18.8k
    }
553
24.1k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
554
555
5.25k
    bool is_forward = last->i_target->b_visited == 0;
556
5.25k
    if (is_forward) {
557
5.05k
        RETURN_IF_ERROR(
558
5.05k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
559
5.05k
        return SUCCESS;
560
5.05k
    }
561
562
196
    int reversed_opcode = 0;
563
196
    switch(last->i_opcode) {
564
0
        case POP_JUMP_IF_NOT_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NONE;
566
0
            break;
567
16
        case POP_JUMP_IF_NONE:
568
16
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
569
16
            break;
570
141
        case POP_JUMP_IF_FALSE:
571
141
            reversed_opcode = POP_JUMP_IF_TRUE;
572
141
            break;
573
39
        case POP_JUMP_IF_TRUE:
574
39
            reversed_opcode = POP_JUMP_IF_FALSE;
575
39
            break;
576
196
    }
577
    /* transform 'conditional jump T' to
578
     * 'reversed_jump b_next' followed by 'jump_backwards T'
579
     */
580
581
196
    basicblock *target = last->i_target;
582
196
    basicblock *backwards_jump = cfg_builder_new_block(g);
583
196
    if (backwards_jump == NULL) {
584
0
        return ERROR;
585
0
    }
586
196
    RETURN_IF_ERROR(
587
196
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
588
196
    RETURN_IF_ERROR(
589
196
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
590
196
    backwards_jump->b_startdepth = target->b_startdepth;
591
196
    last->i_opcode = reversed_opcode;
592
196
    last->i_target = b->b_next;
593
594
196
    backwards_jump->b_cold = b->b_cold;
595
196
    backwards_jump->b_next = b->b_next;
596
196
    b->b_next = backwards_jump;
597
196
    return SUCCESS;
598
196
}
599
600
601
static int
602
normalize_jumps(cfg_builder *g)
603
5.34k
{
604
5.34k
    basicblock *entryblock = g->g_entryblock;
605
29.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
23.9k
        b->b_visited = 0;
607
23.9k
    }
608
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
609
24.1k
        b->b_visited = 1;
610
24.1k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
611
24.1k
    }
612
5.34k
    return SUCCESS;
613
5.34k
}
614
615
static int
616
5.34k
check_cfg(cfg_builder *g) {
617
28.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
618
        /* Raise SystemError if jump or exit is not last instruction in the block. */
619
238k
        for (int i = 0; i < b->b_iused; i++) {
620
214k
            int opcode = b->b_instr[i].i_opcode;
621
214k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
622
214k
            if (IS_TERMINATOR_OPCODE(opcode)) {
623
20.8k
                if (i != b->b_iused - 1) {
624
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
625
0
                    return ERROR;
626
0
                }
627
20.8k
            }
628
214k
        }
629
23.4k
    }
630
5.34k
    return SUCCESS;
631
5.34k
}
632
633
static int
634
get_max_label(basicblock *entryblock)
635
20.1k
{
636
20.1k
    int lbl = -1;
637
113k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
638
93.3k
        if (b->b_label.id > lbl) {
639
31.5k
            lbl = b->b_label.id;
640
31.5k
        }
641
93.3k
    }
642
20.1k
    return lbl;
643
20.1k
}
644
645
/* Calculate the actual jump target from the target_label */
646
static int
647
translate_jump_labels_to_targets(basicblock *entryblock)
648
5.34k
{
649
5.34k
    int max_label = get_max_label(entryblock);
650
5.34k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
651
5.34k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
652
5.34k
    if (!label2block) {
653
0
        PyErr_NoMemory();
654
0
        return ERROR;
655
0
    }
656
5.34k
    memset(label2block, 0, mapsize);
657
28.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
658
23.4k
        if (b->b_label.id >= 0) {
659
8.54k
            label2block[b->b_label.id] = b;
660
8.54k
        }
661
23.4k
    }
662
28.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
663
238k
        for (int i = 0; i < b->b_iused; i++) {
664
214k
            cfg_instr *instr = &b->b_instr[i];
665
214k
            assert(instr->i_target == NULL);
666
214k
            if (HAS_TARGET(instr->i_opcode)) {
667
10.4k
                int lbl = instr->i_oparg;
668
10.4k
                assert(lbl >= 0 && lbl <= max_label);
669
10.4k
                instr->i_target = label2block[lbl];
670
10.4k
                assert(instr->i_target != NULL);
671
10.4k
                assert(instr->i_target->b_label.id == lbl);
672
10.4k
            }
673
214k
        }
674
23.4k
    }
675
5.34k
    PyMem_Free(label2block);
676
5.34k
    return SUCCESS;
677
5.34k
}
678
679
static int
680
5.34k
mark_except_handlers(basicblock *entryblock) {
681
#ifndef NDEBUG
682
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
683
        assert(!b->b_except_handler);
684
    }
685
#endif
686
28.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
687
238k
        for (int i=0; i < b->b_iused; i++) {
688
214k
            cfg_instr *instr = &b->b_instr[i];
689
214k
            if (is_block_push(instr)) {
690
1.67k
                instr->i_target->b_except_handler = 1;
691
1.67k
            }
692
214k
        }
693
23.4k
    }
694
5.34k
    return SUCCESS;
695
5.34k
}
696
697
698
struct _PyCfgExceptStack {
699
    basicblock *handlers[CO_MAXBLOCKS+2];
700
    int depth;
701
};
702
703
704
static basicblock *
705
1.66k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
706
1.66k
    assert(is_block_push(setup));
707
1.66k
    int opcode = setup->i_opcode;
708
1.66k
    basicblock * target = setup->i_target;
709
1.66k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
710
1.02k
        target->b_preserve_lasti = 1;
711
1.02k
    }
712
1.66k
    assert(stack->depth <= CO_MAXBLOCKS);
713
1.66k
    stack->handlers[++stack->depth] = target;
714
1.66k
    return target;
715
1.66k
}
716
717
static basicblock *
718
1.37k
pop_except_block(struct _PyCfgExceptStack *stack) {
719
1.37k
    assert(stack->depth > 0);
720
1.37k
    return stack->handlers[--stack->depth];
721
1.37k
}
722
723
static basicblock *
724
19.4k
except_stack_top(struct _PyCfgExceptStack *stack) {
725
19.4k
    return stack->handlers[stack->depth];
726
19.4k
}
727
728
static struct _PyCfgExceptStack *
729
5.34k
make_except_stack(void) {
730
5.34k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
731
5.34k
    if (new == NULL) {
732
0
        PyErr_NoMemory();
733
0
        return NULL;
734
0
    }
735
5.34k
    new->depth = 0;
736
5.34k
    new->handlers[0] = NULL;
737
5.34k
    return new;
738
5.34k
}
739
740
static struct _PyCfgExceptStack *
741
6.64k
copy_except_stack(struct _PyCfgExceptStack *stack) {
742
6.64k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
743
6.64k
    if (copy == NULL) {
744
0
        PyErr_NoMemory();
745
0
        return NULL;
746
0
    }
747
6.64k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
748
6.64k
    return copy;
749
6.64k
}
750
751
static basicblock**
752
38.6k
make_cfg_traversal_stack(basicblock *entryblock) {
753
38.6k
    int nblocks = 0;
754
223k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
755
184k
        b->b_visited = 0;
756
184k
        nblocks++;
757
184k
    }
758
38.6k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
759
38.6k
    if (!stack) {
760
0
        PyErr_NoMemory();
761
0
    }
762
38.6k
    return stack;
763
38.6k
}
764
765
/* Compute the stack effects of opcode with argument oparg.
766
767
   Some opcodes have different stack effect when jump to the target and
768
   when not jump. The 'jump' parameter specifies the case:
769
770
   * 0 -- when not jump
771
   * 1 -- when jump
772
   * -1 -- maximal
773
 */
774
typedef struct {
775
    /* The stack effect of the instruction. */
776
    int net;
777
} stack_effects;
778
779
Py_LOCAL(int)
780
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
781
200k
{
782
200k
    if (opcode < 0) {
783
0
        return -1;
784
0
    }
785
200k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
786
        // Specialized instructions are not supported.
787
0
        return -1;
788
0
    }
789
200k
    int popped = _PyOpcode_num_popped(opcode, oparg);
790
200k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
791
200k
    if (popped < 0 || pushed < 0) {
792
0
        return -1;
793
0
    }
794
200k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
795
1.66k
        effects->net = 0;
796
1.66k
        return 0;
797
1.66k
    }
798
198k
    effects->net = pushed - popped;
799
198k
    return 0;
800
200k
}
801
802
Py_LOCAL_INLINE(int)
803
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
804
23.3k
{
805
23.3k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
806
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
807
0
        return ERROR;
808
0
    }
809
23.3k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
810
19.6k
        assert(b->b_startdepth < 0);
811
19.6k
        b->b_startdepth = depth;
812
19.6k
        *(*sp)++ = b;
813
19.6k
    }
814
23.3k
    return SUCCESS;
815
23.3k
}
816
817
/* Find the flow path that needs the largest stack.  We assume that
818
 * cycles in the flow graph have no net effect on the stack depth.
819
 */
820
static int
821
calculate_stackdepth(cfg_builder *g)
822
5.34k
{
823
5.34k
    basicblock *entryblock = g->g_entryblock;
824
29.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
825
23.9k
        b->b_startdepth = INT_MIN;
826
23.9k
    }
827
5.34k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
828
5.34k
    if (!stack) {
829
0
        return ERROR;
830
0
    }
831
832
833
5.34k
    int stackdepth = -1;
834
5.34k
    int maxdepth = 0;
835
5.34k
    basicblock **sp = stack;
836
5.34k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
837
0
        goto error;
838
0
    }
839
24.9k
    while (sp != stack) {
840
19.6k
        basicblock *b = *--sp;
841
19.6k
        int depth = b->b_startdepth;
842
19.6k
        assert(depth >= 0);
843
19.6k
        basicblock *next = b->b_next;
844
200k
        for (int i = 0; i < b->b_iused; i++) {
845
191k
            cfg_instr *instr = &b->b_instr[i];
846
191k
            stack_effects effects;
847
191k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
848
0
                PyErr_Format(PyExc_SystemError,
849
0
                             "Invalid stack effect for opcode=%d, arg=%i",
850
0
                             instr->i_opcode, instr->i_oparg);
851
0
                goto error;
852
0
            }
853
191k
            int new_depth = depth + effects.net;
854
191k
            if (new_depth < 0) {
855
0
                PyErr_Format(PyExc_ValueError,
856
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
857
0
                goto error;
858
0
            }
859
191k
            maxdepth = Py_MAX(maxdepth, depth);
860
191k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
861
9.17k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
862
0
                    PyErr_Format(PyExc_SystemError,
863
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
864
0
                                 instr->i_opcode, instr->i_oparg);
865
0
                    goto error;
866
0
                }
867
9.17k
                int target_depth = depth + effects.net;
868
9.17k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
869
9.17k
                maxdepth = Py_MAX(maxdepth, depth);
870
9.17k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
871
0
                    goto error;
872
0
                }
873
9.17k
            }
874
191k
            depth = new_depth;
875
191k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
876
191k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
877
189k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
878
10.7k
            {
879
                /* remaining code is dead */
880
10.7k
                next = NULL;
881
10.7k
                break;
882
10.7k
            }
883
191k
        }
884
19.6k
        if (next != NULL) {
885
8.84k
            assert(BB_HAS_FALLTHROUGH(b));
886
8.84k
            if (stackdepth_push(&sp, next, depth) < 0) {
887
0
                goto error;
888
0
            }
889
8.84k
        }
890
19.6k
    }
891
5.34k
    stackdepth = maxdepth;
892
5.34k
error:
893
5.34k
    PyMem_Free(stack);
894
5.34k
    return stackdepth;
895
5.34k
}
896
897
static int
898
5.34k
label_exception_targets(basicblock *entryblock) {
899
5.34k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
900
5.34k
    if (todo_stack == NULL) {
901
0
        return ERROR;
902
0
    }
903
5.34k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
904
5.34k
    if (except_stack == NULL) {
905
0
        PyMem_Free(todo_stack);
906
0
        PyErr_NoMemory();
907
0
        return ERROR;
908
0
    }
909
5.34k
    except_stack->depth = 0;
910
5.34k
    todo_stack[0] = entryblock;
911
5.34k
    entryblock->b_visited = 1;
912
5.34k
    entryblock->b_exceptstack = except_stack;
913
5.34k
    basicblock **todo = &todo_stack[1];
914
5.34k
    basicblock *handler = NULL;
915
24.7k
    while (todo > todo_stack) {
916
19.4k
        todo--;
917
19.4k
        basicblock *b = todo[0];
918
19.4k
        assert(b->b_visited == 1);
919
19.4k
        except_stack = b->b_exceptstack;
920
19.4k
        assert(except_stack != NULL);
921
19.4k
        b->b_exceptstack = NULL;
922
19.4k
        handler = except_stack_top(except_stack);
923
19.4k
        int last_yield_except_depth = -1;
924
225k
        for (int i = 0; i < b->b_iused; i++) {
925
206k
            cfg_instr *instr = &b->b_instr[i];
926
206k
            if (is_block_push(instr)) {
927
1.66k
                if (!instr->i_target->b_visited) {
928
1.66k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
929
1.66k
                    if (copy == NULL) {
930
0
                        goto error;
931
0
                    }
932
1.66k
                    instr->i_target->b_exceptstack = copy;
933
1.66k
                    todo[0] = instr->i_target;
934
1.66k
                    instr->i_target->b_visited = 1;
935
1.66k
                    todo++;
936
1.66k
                }
937
1.66k
                handler = push_except_block(except_stack, instr);
938
1.66k
            }
939
204k
            else if (instr->i_opcode == POP_BLOCK) {
940
1.37k
                handler = pop_except_block(except_stack);
941
1.37k
                INSTR_SET_OP0(instr, NOP);
942
1.37k
            }
943
203k
            else if (is_jump(instr)) {
944
8.34k
                instr->i_except = handler;
945
8.34k
                assert(i == b->b_iused -1);
946
8.34k
                if (!instr->i_target->b_visited) {
947
6.04k
                    if (BB_HAS_FALLTHROUGH(b)) {
948
4.98k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
949
4.98k
                        if (copy == NULL) {
950
0
                            goto error;
951
0
                        }
952
4.98k
                        instr->i_target->b_exceptstack = copy;
953
4.98k
                    }
954
1.06k
                    else {
955
1.06k
                        instr->i_target->b_exceptstack = except_stack;
956
1.06k
                        except_stack = NULL;
957
1.06k
                    }
958
6.04k
                    todo[0] = instr->i_target;
959
6.04k
                    instr->i_target->b_visited = 1;
960
6.04k
                    todo++;
961
6.04k
                }
962
8.34k
            }
963
195k
            else if (instr->i_opcode == YIELD_VALUE) {
964
308
                instr->i_except = handler;
965
308
                last_yield_except_depth = except_stack->depth;
966
308
            }
967
194k
            else if (instr->i_opcode == RESUME) {
968
5.71k
                instr->i_except = handler;
969
5.71k
                if (instr->i_oparg != RESUME_AT_FUNC_START && instr->i_oparg != RESUME_AT_GEN_EXPR_START) {
970
308
                    assert(last_yield_except_depth >= 0);
971
308
                    if (last_yield_except_depth == 1) {
972
137
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
973
137
                    }
974
308
                    last_yield_except_depth = -1;
975
308
                }
976
5.71k
            }
977
189k
            else if (instr->i_opcode == RETURN_GENERATOR) {
978
252
                instr->i_except = NULL;
979
252
            }
980
188k
            else {
981
188k
                instr->i_except = handler;
982
188k
            }
983
206k
        }
984
19.4k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
985
6.38k
            assert(except_stack != NULL);
986
6.38k
            b->b_next->b_exceptstack = except_stack;
987
6.38k
            todo[0] = b->b_next;
988
6.38k
            b->b_next->b_visited = 1;
989
6.38k
            todo++;
990
6.38k
        }
991
13.0k
        else if (except_stack != NULL) {
992
11.9k
           PyMem_Free(except_stack);
993
11.9k
        }
994
19.4k
    }
995
#ifdef Py_DEBUG
996
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
997
        assert(b->b_exceptstack == NULL);
998
    }
999
#endif
1000
5.34k
    PyMem_Free(todo_stack);
1001
5.34k
    return SUCCESS;
1002
0
error:
1003
0
    PyMem_Free(todo_stack);
1004
0
    PyMem_Free(except_stack);
1005
0
    return ERROR;
1006
5.34k
}
1007
1008
/***** CFG optimizations *****/
1009
1010
static int
1011
10.6k
remove_unreachable(basicblock *entryblock) {
1012
57.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1013
47.1k
        b->b_predecessors = 0;
1014
47.1k
    }
1015
10.6k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1016
10.6k
    if (stack == NULL) {
1017
0
        return ERROR;
1018
0
    }
1019
10.6k
    basicblock **sp = stack;
1020
10.6k
    entryblock->b_predecessors = 1;
1021
10.6k
    *sp++ = entryblock;
1022
10.6k
    entryblock->b_visited = 1;
1023
49.2k
    while (sp > stack) {
1024
38.5k
        basicblock *b = *(--sp);
1025
38.5k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1026
16.9k
            if (!b->b_next->b_visited) {
1027
14.0k
                assert(b->b_next->b_predecessors == 0);
1028
14.0k
                *sp++ = b->b_next;
1029
14.0k
                b->b_next->b_visited = 1;
1030
14.0k
            }
1031
16.9k
            b->b_next->b_predecessors++;
1032
16.9k
        }
1033
439k
        for (int i = 0; i < b->b_iused; i++) {
1034
401k
            basicblock *target;
1035
401k
            cfg_instr *instr = &b->b_instr[i];
1036
401k
            if (is_jump(instr) || is_block_push(instr)) {
1037
18.6k
                target = instr->i_target;
1038
18.6k
                if (!target->b_visited) {
1039
13.8k
                    *sp++ = target;
1040
13.8k
                    target->b_visited = 1;
1041
13.8k
                }
1042
18.6k
                target->b_predecessors++;
1043
18.6k
            }
1044
401k
        }
1045
38.5k
    }
1046
10.6k
    PyMem_Free(stack);
1047
1048
    /* Delete unreachable instructions */
1049
57.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1050
47.1k
       if (b->b_predecessors == 0) {
1051
8.67k
            b->b_iused = 0;
1052
8.67k
            b->b_except_handler = 0;
1053
8.67k
       }
1054
47.1k
    }
1055
10.6k
    return SUCCESS;
1056
10.6k
}
1057
1058
static int
1059
123k
basicblock_remove_redundant_nops(basicblock *bb) {
1060
    /* Remove NOPs when legal to do so. */
1061
123k
    int dest = 0;
1062
123k
    int prev_lineno = -1;
1063
1.06M
    for (int src = 0; src < bb->b_iused; src++) {
1064
940k
        int lineno = bb->b_instr[src].i_loc.lineno;
1065
940k
        if (bb->b_instr[src].i_opcode == NOP) {
1066
            /* Eliminate no-op if it doesn't have a line number */
1067
20.7k
            if (lineno < 0) {
1068
4.30k
                continue;
1069
4.30k
            }
1070
            /* or, if the previous instruction had the same line number. */
1071
16.4k
            if (prev_lineno == lineno) {
1072
13.1k
                continue;
1073
13.1k
            }
1074
            /* or, if the next instruction has same line number or no line number */
1075
3.23k
            if (src < bb->b_iused - 1) {
1076
2.99k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1077
2.99k
                if (next_lineno == lineno) {
1078
1.97k
                    continue;
1079
1.97k
                }
1080
1.01k
                if (next_lineno < 0) {
1081
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1082
0
                    continue;
1083
0
                }
1084
1.01k
            }
1085
242
            else {
1086
242
                basicblock *next = next_nonempty_block(bb->b_next);
1087
                /* or if last instruction in BB and next BB has same line number */
1088
242
                if (next) {
1089
242
                    location next_loc = NO_LOCATION;
1090
242
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1091
242
                        cfg_instr *instr = &next->b_instr[next_i];
1092
242
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1093
                            /* Skip over NOPs without a location, they will be removed */
1094
0
                            continue;
1095
0
                        }
1096
242
                        next_loc = instr->i_loc;
1097
242
                        break;
1098
242
                    }
1099
242
                    if (lineno == next_loc.lineno) {
1100
1
                        continue;
1101
1
                    }
1102
242
                }
1103
242
            }
1104
1105
3.23k
        }
1106
921k
        if (dest != src) {
1107
75.6k
            bb->b_instr[dest] = bb->b_instr[src];
1108
75.6k
        }
1109
921k
        dest++;
1110
921k
        prev_lineno = lineno;
1111
921k
    }
1112
123k
    assert(dest <= bb->b_iused);
1113
123k
    int num_removed = bb->b_iused - dest;
1114
123k
    bb->b_iused = dest;
1115
123k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1116
123k
    return num_removed;
1117
123k
}
1118
1119
static int
1120
17.5k
remove_redundant_nops(cfg_builder *g) {
1121
17.5k
    int changes = 0;
1122
117k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1123
99.6k
        int change = basicblock_remove_redundant_nops(b);
1124
99.6k
        RETURN_IF_ERROR(change);
1125
99.6k
        changes += change;
1126
99.6k
    }
1127
17.5k
    return changes;
1128
17.5k
}
1129
1130
static int loads_const(int opcode);
1131
1132
static int
1133
remove_redundant_nops_and_pairs(basicblock *entryblock)
1134
5.34k
{
1135
5.34k
    bool done = false;
1136
1137
10.6k
    while (! done) {
1138
5.34k
        done = true;
1139
5.34k
        cfg_instr *prev_instr = NULL;
1140
5.34k
        cfg_instr *instr = NULL;
1141
29.1k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1142
23.7k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1143
23.7k
            if (IS_LABEL(b->b_label)) {
1144
                /* this block is a jump target, forget instr */
1145
8.92k
                instr = NULL;
1146
8.92k
            }
1147
217k
            for (int i = 0; i < b->b_iused; i++) {
1148
193k
                prev_instr = instr;
1149
193k
                instr = &b->b_instr[i];
1150
193k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1151
193k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1152
193k
                int opcode = instr->i_opcode;
1153
193k
                bool is_redundant_pair = false;
1154
193k
                if (opcode == POP_TOP) {
1155
6.06k
                   if (loads_const(prev_opcode)) {
1156
0
                       is_redundant_pair = true;
1157
0
                   }
1158
6.06k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1159
0
                       is_redundant_pair = true;
1160
0
                   }
1161
6.06k
                }
1162
193k
                if (is_redundant_pair) {
1163
0
                    INSTR_SET_OP0(prev_instr, NOP);
1164
0
                    INSTR_SET_OP0(instr, NOP);
1165
0
                    done = false;
1166
0
                }
1167
193k
            }
1168
23.7k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1169
16.9k
                instr = NULL;
1170
16.9k
            }
1171
23.7k
        }
1172
5.34k
    }
1173
5.34k
    return SUCCESS;
1174
5.34k
}
1175
1176
static int
1177
12.1k
remove_redundant_jumps(cfg_builder *g) {
1178
    /* If a non-empty block ends with a jump instruction, check if the next
1179
     * non-empty block reached through normal flow control is the target
1180
     * of that jump. If it is, then the jump instruction is redundant and
1181
     * can be deleted.
1182
     *
1183
     * Return the number of changes applied, or -1 on error.
1184
     */
1185
1186
12.1k
    int changes = 0;
1187
88.0k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1188
75.8k
        cfg_instr *last = basicblock_last_instr(b);
1189
75.8k
        if (last == NULL) {
1190
12.1k
            continue;
1191
12.1k
        }
1192
75.8k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1193
63.6k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1194
6.99k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1195
6.99k
            if (jump_target == NULL) {
1196
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1197
0
                return ERROR;
1198
0
            }
1199
6.99k
            basicblock *next = next_nonempty_block(b->b_next);
1200
6.99k
            if (jump_target == next) {
1201
245
                changes++;
1202
245
                INSTR_SET_OP0(last, NOP);
1203
245
            }
1204
6.99k
        }
1205
63.6k
    }
1206
1207
12.1k
    return changes;
1208
12.1k
}
1209
1210
static inline bool
1211
23.8k
basicblock_has_no_lineno(basicblock *b) {
1212
29.7k
    for (int i = 0; i < b->b_iused; i++) {
1213
27.1k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1214
21.2k
            return false;
1215
21.2k
        }
1216
27.1k
    }
1217
2.59k
    return true;
1218
23.8k
}
1219
1220
/* Maximum size of basic block that should be copied in optimizer */
1221
1.20k
#define MAX_COPY_SIZE 4
1222
1223
/* If this block ends with an unconditional jump to a small exit block or
1224
 * a block that has no line numbers (and no fallthrough), then
1225
 * remove the jump and extend this block with the target.
1226
 * Returns 1 if extended, 0 if no change, and -1 on error.
1227
 */
1228
static int
1229
32.8k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1230
32.8k
    cfg_instr *last = basicblock_last_instr(bb);
1231
32.8k
    if (last == NULL) {
1232
0
        return 0;
1233
0
    }
1234
32.8k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1235
28.5k
        return 0;
1236
28.5k
    }
1237
4.30k
    basicblock *target = last->i_target;
1238
4.30k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1239
1.20k
                             target->b_iused <= MAX_COPY_SIZE);
1240
4.30k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1241
1.03k
                                     !BB_HAS_FALLTHROUGH(target));
1242
4.30k
    if (small_exit_block || no_lineno_no_fallthrough) {
1243
1.14k
        assert(is_jump(last));
1244
1.14k
        int removed_jump_opcode = last->i_opcode;
1245
1.14k
        INSTR_SET_OP0(last, NOP);
1246
1.14k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1247
1.14k
        if (no_lineno_no_fallthrough) {
1248
1.02k
            last = basicblock_last_instr(bb);
1249
1.02k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1250
308
                removed_jump_opcode == JUMP)
1251
26
            {
1252
                /* Make sure we don't lose eval breaker checks */
1253
26
                last->i_opcode = JUMP;
1254
26
            }
1255
1.02k
        }
1256
1.14k
        target->b_predecessors--;
1257
1.14k
        return 1;
1258
1.14k
    }
1259
3.16k
    return 0;
1260
4.30k
}
1261
1262
static int
1263
5.34k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1264
5.34k
    bool changes;
1265
5.88k
    do {
1266
5.88k
        changes = false;
1267
38.7k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1268
32.8k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1269
32.8k
            RETURN_IF_ERROR(res);
1270
32.8k
            if (res) {
1271
1.14k
                changes = true;
1272
1.14k
            }
1273
32.8k
        }
1274
5.88k
    } while(changes); /* every change removes a jump, ensuring convergence */
1275
5.34k
    return changes;
1276
5.34k
}
1277
1278
// Attempt to eliminate jumps to jumps by updating inst to jump to
1279
// target->i_target using the provided opcode. Return whether or not the
1280
// optimization was successful.
1281
static bool
1282
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1283
187
{
1284
187
    assert(is_jump(inst));
1285
187
    assert(is_jump(target));
1286
187
    assert(inst == basicblock_last_instr(bb));
1287
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1288
    // changes (and we fall into an infinite loop):
1289
187
    if (inst->i_target != target->i_target) {
1290
        /* Change inst to NOP and append a jump to target->i_target. The
1291
         * NOP will be removed later if it's not needed for the lineno.
1292
         */
1293
187
        INSTR_SET_OP0(inst, NOP);
1294
1295
187
        RETURN_IF_ERROR(
1296
187
            basicblock_add_jump(
1297
187
                bb, opcode, target->i_target, target->i_loc));
1298
1299
187
        return true;
1300
187
    }
1301
0
    return false;
1302
187
}
1303
1304
static int
1305
loads_const(int opcode)
1306
224k
{
1307
224k
    return OPCODE_HAS_CONST(opcode)
1308
194k
        || opcode == LOAD_SMALL_INT
1309
176k
        || opcode == LOAD_COMMON_CONSTANT;
1310
224k
}
1311
1312
/* Returns new reference */
1313
static PyObject*
1314
get_const_value(int opcode, int oparg, PyObject *co_consts)
1315
72.4k
{
1316
72.4k
    PyObject *constant = NULL;
1317
72.4k
    assert(loads_const(opcode));
1318
72.4k
    if (opcode == LOAD_CONST) {
1319
69.9k
        assert(PyList_Check(co_consts));
1320
69.9k
        Py_ssize_t n = PyList_GET_SIZE(co_consts);
1321
69.9k
        if (oparg < 0 || oparg >= n) {
1322
0
            PyErr_Format(PyExc_ValueError,
1323
0
                         "LOAD_CONST index %d is out of range for consts (len=%zd)",
1324
0
                         oparg, n);
1325
0
            return NULL;
1326
0
        }
1327
69.9k
        constant = PyList_GET_ITEM(co_consts, oparg);
1328
69.9k
    }
1329
72.4k
    if (opcode == LOAD_SMALL_INT) {
1330
1.87k
        return PyLong_FromLong(oparg);
1331
1.87k
    }
1332
70.5k
    if (opcode == LOAD_COMMON_CONSTANT) {
1333
599
        assert(oparg < NUM_COMMON_CONSTANTS);
1334
599
        return PyStackRef_AsPyObjectBorrow(
1335
599
            _PyInterpreterState_GET()->common_consts[oparg]);
1336
599
    }
1337
1338
69.9k
    if (constant == NULL) {
1339
0
        PyErr_SetString(PyExc_SystemError,
1340
0
                        "Internal error: failed to get value of a constant");
1341
0
        return NULL;
1342
0
    }
1343
69.9k
    return Py_NewRef(constant);
1344
69.9k
}
1345
1346
// Steals a reference to newconst.
1347
static int
1348
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1349
          _Py_hashtable_t *consts_index)
1350
2.19k
{
1351
2.19k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1352
0
        Py_DECREF(newconst);
1353
0
        return -1;
1354
0
    }
1355
1356
2.19k
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1357
2.19k
    if (entry != NULL) {
1358
1.47k
        Py_DECREF(newconst);
1359
1.47k
        return (int)(uintptr_t)entry->value;
1360
1.47k
    }
1361
1362
715
    Py_ssize_t index = PyList_GET_SIZE(consts);
1363
715
    if ((size_t)index >= (size_t)INT_MAX - 1) {
1364
0
        PyErr_SetString(PyExc_OverflowError, "too many constants");
1365
0
        Py_DECREF(newconst);
1366
0
        return -1;
1367
0
    }
1368
715
    if (PyList_Append(consts, newconst)) {
1369
0
        Py_DECREF(newconst);
1370
0
        return -1;
1371
0
    }
1372
1373
715
    if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1374
0
        PyList_SetSlice(consts, index, index + 1, NULL);
1375
0
        Py_DECREF(newconst);
1376
0
        PyErr_NoMemory();
1377
0
        return -1;
1378
0
    }
1379
1380
715
    Py_DECREF(newconst);
1381
715
    return (int)index;
1382
715
}
1383
1384
/*
1385
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1386
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1387
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1388
1389
  Return boolean indicating whether "size" such instructions were found.
1390
*/
1391
static bool
1392
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1393
5.91k
{
1394
5.91k
    assert(start < bb->b_iused);
1395
5.91k
    assert(size >= 0);
1396
5.91k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1397
1398
10.6k
    for (; start >= 0 && size > 0; start--) {
1399
7.97k
        cfg_instr *instr = &bb->b_instr[start];
1400
7.97k
        if (instr->i_opcode == NOP) {
1401
50
            continue;
1402
50
        }
1403
7.92k
        if (!loads_const(instr->i_opcode)) {
1404
3.21k
            return false;
1405
3.21k
        }
1406
4.70k
        instrs[--size] = instr;
1407
4.70k
    }
1408
1409
2.69k
    return size == 0;
1410
5.91k
}
1411
1412
/*
1413
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1414
  Caller must make sure "instrs" has at least "size" elements.
1415
*/
1416
static void
1417
nop_out(cfg_instr **instrs, int size)
1418
2.69k
{
1419
6.54k
    for (int i = 0; i < size; i++) {
1420
3.84k
        cfg_instr *instr = instrs[i];
1421
3.84k
        assert(instr->i_opcode != NOP);
1422
3.84k
        INSTR_SET_OP0(instr, NOP);
1423
3.84k
        INSTR_SET_LOC(instr, NO_LOCATION);
1424
3.84k
    }
1425
2.69k
}
1426
1427
/* Does not steal reference to "newconst".
1428
   Return 1 if changed instruction to LOAD_SMALL_INT.
1429
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1430
   Return -1 on error.
1431
*/
1432
static int
1433
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1434
                               PyObject *consts, PyObject *const_cache)
1435
44.4k
{
1436
44.4k
    if (PyLong_CheckExact(newconst)) {
1437
20.3k
        int overflow;
1438
20.3k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1439
20.3k
        if (val == -1 && PyErr_Occurred()) {
1440
0
            return -1;
1441
0
        }
1442
20.3k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1443
15.2k
            assert(_Py_IsImmortal(newconst));
1444
15.2k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1445
15.2k
            return 1;
1446
15.2k
        }
1447
20.3k
    }
1448
29.1k
    return 0;
1449
44.4k
}
1450
1451
/* Does not steal reference to "newconst".
1452
   Return 1 if changed instruction to LOAD_COMMON_CONSTANT.
1453
   Return 0 if could not change instruction to LOAD_COMMON_CONSTANT.
1454
   Return -1 on error.
1455
*/
1456
static int
1457
maybe_instr_make_load_common_const(cfg_instr *instr, PyObject *newconst)
1458
28.4k
{
1459
28.4k
    int oparg;
1460
28.4k
    if (newconst == Py_None) {
1461
6.07k
        oparg = CONSTANT_NONE;
1462
6.07k
    }
1463
22.4k
    else if (newconst == Py_True) {
1464
485
        oparg = CONSTANT_TRUE;
1465
485
    }
1466
21.9k
    else if (newconst == Py_False) {
1467
655
        oparg = CONSTANT_FALSE;
1468
655
    }
1469
21.2k
    else if (PyUnicode_CheckExact(newconst)
1470
8.94k
             && PyUnicode_GET_LENGTH(newconst) == 0) {
1471
130
        oparg = CONSTANT_EMPTY_STR;
1472
130
    }
1473
21.1k
    else if (PyTuple_CheckExact(newconst)
1474
2.55k
             && PyTuple_GET_SIZE(newconst) == 0) {
1475
679
        oparg = CONSTANT_EMPTY_TUPLE;
1476
679
    }
1477
20.4k
    else if (PyLong_CheckExact(newconst)) {
1478
5.09k
        int overflow;
1479
5.09k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1480
5.09k
        if (val == -1 && PyErr_Occurred()) {
1481
0
            return -1;
1482
0
        }
1483
5.09k
        if (overflow || val != -1) {
1484
4.61k
            return 0;
1485
4.61k
        }
1486
478
        oparg = CONSTANT_MINUS_ONE;
1487
478
    }
1488
15.3k
    else {
1489
15.3k
        return 0;
1490
15.3k
    }
1491
28.4k
    assert(_Py_IsImmortal(newconst));
1492
8.50k
    INSTR_SET_OP1(instr, LOAD_COMMON_CONSTANT, oparg);
1493
8.50k
    return 1;
1494
28.4k
}
1495
1496
/* Steals reference to "newconst" */
1497
static int
1498
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1499
                      PyObject *consts, PyObject *const_cache,
1500
                      _Py_hashtable_t *consts_index)
1501
2.54k
{
1502
2.54k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1503
2.54k
    if (res < 0) {
1504
0
        Py_DECREF(newconst);
1505
0
        return ERROR;
1506
0
    }
1507
2.54k
    if (res > 0) {
1508
0
        return SUCCESS;
1509
0
    }
1510
2.54k
    res = maybe_instr_make_load_common_const(instr, newconst);
1511
2.54k
    if (res < 0) {
1512
0
        Py_DECREF(newconst);
1513
0
        return ERROR;
1514
0
    }
1515
2.54k
    if (res > 0) {
1516
524
        return SUCCESS;
1517
524
    }
1518
2.02k
    int oparg = add_const(newconst, consts, const_cache, consts_index);
1519
2.02k
    RETURN_IF_ERROR(oparg);
1520
2.02k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1521
2.02k
    return SUCCESS;
1522
2.02k
}
1523
1524
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1525
   with    LOAD_CONST (c1, c2, ... cn).
1526
   The consts table must still be in list form so that the
1527
   new constant (c1, c2, ... cn) can be appended.
1528
   Called with codestr pointing to the first LOAD_CONST.
1529
*/
1530
static int
1531
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1532
                        PyObject *const_cache, _Py_hashtable_t *consts_index)
1533
2.37k
{
1534
    /* Pre-conditions */
1535
2.37k
    assert(PyDict_CheckExact(const_cache));
1536
2.37k
    assert(PyList_CheckExact(consts));
1537
1538
2.37k
    cfg_instr *instr = &bb->b_instr[i];
1539
2.37k
    assert(instr->i_opcode == BUILD_TUPLE);
1540
1541
2.37k
    int seq_size = instr->i_oparg;
1542
2.37k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1543
0
        return SUCCESS;
1544
0
    }
1545
1546
2.37k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1547
2.37k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1548
        /* not a const sequence */
1549
1.62k
        return SUCCESS;
1550
1.62k
    }
1551
1552
755
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1553
755
    if (const_tuple == NULL) {
1554
0
        return ERROR;
1555
0
    }
1556
1557
1.82k
    for (int i = 0; i < seq_size; i++) {
1558
1.07k
        cfg_instr *inst = const_instrs[i];
1559
1.07k
        assert(loads_const(inst->i_opcode));
1560
1.07k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1561
1.07k
        if (element == NULL) {
1562
0
            Py_DECREF(const_tuple);
1563
0
            return ERROR;
1564
0
        }
1565
1.07k
        PyTuple_SET_ITEM(const_tuple, i, element);
1566
1.07k
    }
1567
1568
755
    nop_out(const_instrs, seq_size);
1569
755
    return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
1570
755
}
1571
1572
/* Replace:
1573
    BUILD_LIST 0
1574
    LOAD_CONST c1
1575
    LIST_APPEND 1
1576
    LOAD_CONST c2
1577
    LIST_APPEND 1
1578
    ...
1579
    LOAD_CONST cN
1580
    LIST_APPEND 1
1581
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1582
   with:
1583
    LOAD_CONST (c1, c2, ... cN)
1584
*/
1585
static int
1586
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1587
                                      PyObject *consts, PyObject *const_cache,
1588
                                      _Py_hashtable_t *consts_index)
1589
63
{
1590
63
    assert(PyDict_CheckExact(const_cache));
1591
63
    assert(PyList_CheckExact(consts));
1592
63
    assert(i >= 0);
1593
63
    assert(i < bb->b_iused);
1594
1595
63
    cfg_instr *intrinsic = &bb->b_instr[i];
1596
63
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1597
63
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1598
1599
63
    int consts_found = 0;
1600
63
    bool expect_append = true;
1601
1602
5.57k
    for (int pos = i - 1; pos >= 0; pos--) {
1603
5.57k
        cfg_instr *instr = &bb->b_instr[pos];
1604
5.57k
        int opcode = instr->i_opcode;
1605
5.57k
        int oparg = instr->i_oparg;
1606
1607
5.57k
        if (opcode == NOP) {
1608
1.66k
            continue;
1609
1.66k
        }
1610
1611
3.91k
        if (opcode == BUILD_LIST && oparg == 0) {
1612
0
            if (!expect_append) {
1613
                /* Not a sequence start. */
1614
0
                return SUCCESS;
1615
0
            }
1616
1617
            /* Sequence start, we are done. */
1618
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1619
0
            if (newconst == NULL) {
1620
0
                return ERROR;
1621
0
            }
1622
1623
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1624
0
                instr = &bb->b_instr[newpos];
1625
0
                if (instr->i_opcode == NOP) {
1626
0
                    continue;
1627
0
                }
1628
0
                if (loads_const(instr->i_opcode)) {
1629
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1630
0
                    if (constant == NULL) {
1631
0
                        Py_DECREF(newconst);
1632
0
                        return ERROR;
1633
0
                    }
1634
0
                    assert(consts_found > 0);
1635
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1636
0
                }
1637
0
                nop_out(&instr, 1);
1638
0
            }
1639
0
            assert(consts_found == 0);
1640
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
1641
0
        }
1642
1643
3.91k
        if (expect_append) {
1644
1.98k
            if (opcode != LIST_APPEND || oparg != 1) {
1645
61
                return SUCCESS;
1646
61
            }
1647
1.98k
        }
1648
1.92k
        else {
1649
1.92k
            if (!loads_const(opcode)) {
1650
2
                return SUCCESS;
1651
2
            }
1652
1.92k
            consts_found++;
1653
1.92k
        }
1654
1655
3.85k
        expect_append = !expect_append;
1656
3.85k
    }
1657
1658
    /* Did not find sequence start. */
1659
0
    return SUCCESS;
1660
63
}
1661
1662
1.00k
#define MIN_CONST_SEQUENCE_SIZE 3
1663
/*
1664
Optimize lists and sets for:
1665
    1. "for" loop, comprehension or "in"/"not in" tests:
1666
           Change literal list or set of constants into constant
1667
           tuple or frozenset respectively. Change list of
1668
           non-constants into tuple.
1669
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1670
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1671
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1672
           or BUILD_SET & SET_UPDATE respectively.
1673
*/
1674
static int
1675
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1676
                        PyObject *consts, PyObject *const_cache,
1677
                        _Py_hashtable_t *consts_index)
1678
504
{
1679
504
    assert(PyDict_CheckExact(const_cache));
1680
504
    assert(PyList_CheckExact(consts));
1681
1682
504
    cfg_instr *instr = &bb->b_instr[i];
1683
504
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1684
1685
504
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1686
504
    int seq_size = instr->i_oparg;
1687
504
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1688
504
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1689
333
    {
1690
333
        return SUCCESS;
1691
333
    }
1692
1693
171
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1694
171
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1695
25
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1696
            /* iterate over a tuple instead of list */
1697
13
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1698
13
        }
1699
25
        return SUCCESS;
1700
25
    }
1701
1702
146
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1703
146
    if (const_result == NULL) {
1704
0
        return ERROR;
1705
0
    }
1706
1707
1.09k
    for (int i = 0; i < seq_size; i++) {
1708
951
        cfg_instr *inst = const_instrs[i];
1709
951
        assert(loads_const(inst->i_opcode));
1710
951
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1711
951
        if (element == NULL) {
1712
0
            Py_DECREF(const_result);
1713
0
            return ERROR;
1714
0
        }
1715
951
        PyTuple_SET_ITEM(const_result, i, element);
1716
951
    }
1717
1718
146
    if (instr->i_opcode == BUILD_SET) {
1719
138
        PyObject *frozenset = PyFrozenSet_New(const_result);
1720
138
        if (frozenset == NULL) {
1721
0
            Py_DECREF(const_result);
1722
0
            return ERROR;
1723
0
        }
1724
138
        Py_SETREF(const_result, frozenset);
1725
138
    }
1726
1727
146
    int index = add_const(const_result, consts, const_cache, consts_index);
1728
146
    RETURN_IF_ERROR(index);
1729
146
    nop_out(const_instrs, seq_size);
1730
1731
146
    if (contains_or_iter) {
1732
132
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1733
132
    }
1734
14
    else {
1735
14
        assert(i >= 2);
1736
14
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1737
1738
14
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1739
1740
14
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1741
14
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1742
14
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1743
14
    }
1744
146
    return SUCCESS;
1745
146
}
1746
1747
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1748
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1749
 * Used to avoid creating constants which are slow to hash.
1750
 */
1751
static Py_ssize_t
1752
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1753
0
{
1754
0
    if (PyTuple_Check(obj)) {
1755
0
        Py_ssize_t i;
1756
0
        limit -= PyTuple_GET_SIZE(obj);
1757
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1758
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1759
0
            if (limit < 0) {
1760
0
                return limit;
1761
0
            }
1762
0
        }
1763
0
    }
1764
0
    return limit;
1765
0
}
1766
1767
70
#define MAX_INT_SIZE           128  /* bits */
1768
0
#define MAX_COLLECTION_SIZE    256  /* items */
1769
2
#define MAX_STR_SIZE          4096  /* characters */
1770
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1771
1772
static PyObject *
1773
const_folding_safe_multiply(PyObject *v, PyObject *w)
1774
12
{
1775
12
    if (PyLong_Check(v) && PyLong_Check(w) &&
1776
8
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1777
12
    ) {
1778
8
        int64_t vbits = _PyLong_NumBits(v);
1779
8
        int64_t wbits = _PyLong_NumBits(w);
1780
8
        assert(vbits >= 0);
1781
8
        assert(wbits >= 0);
1782
8
        if (vbits + wbits > MAX_INT_SIZE) {
1783
0
            return NULL;
1784
0
        }
1785
8
    }
1786
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1787
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1788
0
        if (size) {
1789
0
            long n = PyLong_AsLong(v);
1790
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1791
0
                return NULL;
1792
0
            }
1793
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1794
0
                return NULL;
1795
0
            }
1796
0
        }
1797
0
    }
1798
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1799
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1800
2
                                               PyBytes_GET_SIZE(w);
1801
2
        if (size) {
1802
2
            long n = PyLong_AsLong(v);
1803
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1804
0
                return NULL;
1805
0
            }
1806
2
        }
1807
2
    }
1808
2
    else if (PyLong_Check(w) &&
1809
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1810
2
    {
1811
2
        return const_folding_safe_multiply(w, v);
1812
2
    }
1813
1814
10
    return PyNumber_Multiply(v, w);
1815
12
}
1816
1817
static PyObject *
1818
const_folding_safe_power(PyObject *v, PyObject *w)
1819
2
{
1820
2
    if (PyLong_Check(v) && PyLong_Check(w) &&
1821
2
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1822
2
    ) {
1823
2
        int64_t vbits = _PyLong_NumBits(v);
1824
2
        size_t wbits = PyLong_AsSize_t(w);
1825
2
        assert(vbits >= 0);
1826
2
        if (wbits == (size_t)-1) {
1827
0
            return NULL;
1828
0
        }
1829
2
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1830
0
            return NULL;
1831
0
        }
1832
2
    }
1833
1834
2
    return PyNumber_Power(v, w, Py_None);
1835
2
}
1836
1837
static PyObject *
1838
const_folding_safe_lshift(PyObject *v, PyObject *w)
1839
20
{
1840
20
    if (PyLong_Check(v) && PyLong_Check(w) &&
1841
20
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1842
20
    ) {
1843
20
        int64_t vbits = _PyLong_NumBits(v);
1844
20
        size_t wbits = PyLong_AsSize_t(w);
1845
20
        assert(vbits >= 0);
1846
20
        if (wbits == (size_t)-1) {
1847
0
            return NULL;
1848
0
        }
1849
20
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1850
0
            return NULL;
1851
0
        }
1852
20
    }
1853
1854
20
    return PyNumber_Lshift(v, w);
1855
20
}
1856
1857
static PyObject *
1858
const_folding_safe_mod(PyObject *v, PyObject *w)
1859
0
{
1860
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1861
0
        return NULL;
1862
0
    }
1863
1864
0
    return PyNumber_Remainder(v, w);
1865
0
}
1866
1867
static PyObject *
1868
eval_const_binop(PyObject *left, int op, PyObject *right)
1869
34
{
1870
34
    assert(left != NULL && right != NULL);
1871
34
    assert(op >= 0 && op <= NB_OPARG_LAST);
1872
1873
34
    PyObject *result = NULL;
1874
34
    switch (op) {
1875
1
        case NB_ADD:
1876
1
            result = PyNumber_Add(left, right);
1877
1
            break;
1878
1
        case NB_SUBTRACT:
1879
1
            result = PyNumber_Subtract(left, right);
1880
1
            break;
1881
10
        case NB_MULTIPLY:
1882
10
            result = const_folding_safe_multiply(left, right);
1883
10
            break;
1884
0
        case NB_TRUE_DIVIDE:
1885
0
            result = PyNumber_TrueDivide(left, right);
1886
0
            break;
1887
0
        case NB_FLOOR_DIVIDE:
1888
0
            result = PyNumber_FloorDivide(left, right);
1889
0
            break;
1890
0
        case NB_REMAINDER:
1891
0
            result = const_folding_safe_mod(left, right);
1892
0
            break;
1893
2
        case NB_POWER:
1894
2
            result = const_folding_safe_power(left, right);
1895
2
            break;
1896
20
        case NB_LSHIFT:
1897
20
            result = const_folding_safe_lshift(left, right);
1898
20
            break;
1899
0
        case NB_RSHIFT:
1900
0
            result = PyNumber_Rshift(left, right);
1901
0
            break;
1902
0
        case NB_OR:
1903
0
            result = PyNumber_Or(left, right);
1904
0
            break;
1905
0
        case NB_XOR:
1906
0
            result = PyNumber_Xor(left, right);
1907
0
            break;
1908
0
        case NB_AND:
1909
0
            result = PyNumber_And(left, right);
1910
0
            break;
1911
0
        case NB_SUBSCR:
1912
0
            result = PyObject_GetItem(left, right);
1913
0
            break;
1914
0
        case NB_MATRIX_MULTIPLY:
1915
            // No builtin constants implement matrix multiplication
1916
0
            break;
1917
0
        default:
1918
0
            Py_UNREACHABLE();
1919
34
    }
1920
34
    return result;
1921
34
}
1922
1923
static int
1924
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1925
                 PyObject *const_cache, _Py_hashtable_t *consts_index)
1926
1.58k
{
1927
1.62k
    #define BINOP_OPERAND_COUNT 2
1928
1.58k
    assert(PyDict_CheckExact(const_cache));
1929
1.58k
    assert(PyList_CheckExact(consts));
1930
1931
1.58k
    cfg_instr *binop = &bb->b_instr[i];
1932
1.58k
    assert(binop->i_opcode == BINARY_OP);
1933
1934
1.58k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1935
1.58k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1936
        /* not a const sequence */
1937
1.55k
        return SUCCESS;
1938
1.55k
    }
1939
1940
34
    cfg_instr *lhs_instr = operands_instrs[0];
1941
34
    assert(loads_const(lhs_instr->i_opcode));
1942
34
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1943
34
    if (lhs == NULL) {
1944
0
        return ERROR;
1945
0
    }
1946
1947
34
    cfg_instr *rhs_instr = operands_instrs[1];
1948
34
    assert(loads_const(rhs_instr->i_opcode));
1949
34
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1950
34
    if (rhs == NULL) {
1951
0
        Py_DECREF(lhs);
1952
0
        return ERROR;
1953
0
    }
1954
1955
34
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1956
34
    Py_DECREF(lhs);
1957
34
    Py_DECREF(rhs);
1958
34
    if (newconst == NULL) {
1959
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1960
0
            return ERROR;
1961
0
        }
1962
0
        PyErr_Clear();
1963
0
        return SUCCESS;
1964
0
    }
1965
1966
34
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1967
34
    return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
1968
34
}
1969
1970
static PyObject *
1971
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1972
1.75k
{
1973
1.75k
    assert(operand != NULL);
1974
1.75k
    assert(
1975
1.75k
        opcode == UNARY_NEGATIVE ||
1976
1.75k
        opcode == UNARY_INVERT ||
1977
1.75k
        opcode == UNARY_NOT ||
1978
1.75k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1979
1.75k
    );
1980
1.75k
    PyObject *result;
1981
1.75k
    switch (opcode) {
1982
1.75k
        case UNARY_NEGATIVE:
1983
1.75k
            result = PyNumber_Negative(operand);
1984
1.75k
            break;
1985
0
        case UNARY_INVERT:
1986
            // XXX: This should be removed once the ~bool depreciation expires.
1987
0
            if (PyBool_Check(operand)) {
1988
0
                return NULL;
1989
0
            }
1990
0
            result = PyNumber_Invert(operand);
1991
0
            break;
1992
0
        case UNARY_NOT: {
1993
0
            int r = PyObject_IsTrue(operand);
1994
0
            if (r < 0) {
1995
0
                return NULL;
1996
0
            }
1997
0
            result = PyBool_FromLong(!r);
1998
0
            break;
1999
0
        }
2000
0
        case CALL_INTRINSIC_1:
2001
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
2002
0
                Py_UNREACHABLE();
2003
0
            }
2004
0
            result = PyNumber_Positive(operand);
2005
0
            break;
2006
0
        default:
2007
0
            Py_UNREACHABLE();
2008
1.75k
    }
2009
1.75k
    return result;
2010
1.75k
}
2011
2012
static int
2013
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
2014
                   PyObject *const_cache, _Py_hashtable_t *consts_index)
2015
1.77k
{
2016
3.53k
    #define UNARYOP_OPERAND_COUNT 1
2017
1.77k
    assert(PyDict_CheckExact(const_cache));
2018
1.77k
    assert(PyList_CheckExact(consts));
2019
1.77k
    cfg_instr *unaryop = &bb->b_instr[i];
2020
2021
1.77k
    cfg_instr *operand_instr;
2022
1.77k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
2023
        /* not a const */
2024
22
        return SUCCESS;
2025
22
    }
2026
2027
1.77k
    assert(loads_const(operand_instr->i_opcode));
2028
1.75k
    PyObject *operand = get_const_value(
2029
1.75k
        operand_instr->i_opcode,
2030
1.75k
        operand_instr->i_oparg,
2031
1.75k
        consts
2032
1.75k
    );
2033
1.75k
    if (operand == NULL) {
2034
0
        return ERROR;
2035
0
    }
2036
2037
1.75k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
2038
1.75k
    Py_DECREF(operand);
2039
1.75k
    if (newconst == NULL) {
2040
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
2041
0
            return ERROR;
2042
0
        }
2043
0
        PyErr_Clear();
2044
0
        return SUCCESS;
2045
0
    }
2046
2047
1.75k
    if (unaryop->i_opcode == UNARY_NOT) {
2048
0
        assert(PyBool_Check(newconst));
2049
0
    }
2050
1.75k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
2051
1.75k
    return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
2052
1.75k
}
2053
2054
1.97k
#define VISITED (-1)
2055
2056
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
2057
// same effect.
2058
static int
2059
swaptimize(basicblock *block, int *ix)
2060
634
{
2061
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
2062
    // for this function. Make sure to blow away cached *.pyc files first!
2063
634
    assert(*ix < block->b_iused);
2064
634
    cfg_instr *instructions = &block->b_instr[*ix];
2065
    // Find the length of the current sequence of SWAPs and NOPs, and record the
2066
    // maximum depth of the stack manipulations:
2067
634
    assert(instructions[0].i_opcode == SWAP);
2068
634
    int depth = instructions[0].i_oparg;
2069
634
    int len = 0;
2070
634
    int more = false;
2071
634
    int limit = block->b_iused - *ix;
2072
808
    while (++len < limit) {
2073
808
        int opcode = instructions[len].i_opcode;
2074
808
        if (opcode == SWAP) {
2075
152
            depth = Py_MAX(depth, instructions[len].i_oparg);
2076
152
            more = true;
2077
152
        }
2078
656
        else if (opcode != NOP) {
2079
634
            break;
2080
634
        }
2081
808
    }
2082
    // It's already optimal if there's only one SWAP:
2083
634
    if (!more) {
2084
482
        return SUCCESS;
2085
482
    }
2086
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2087
152
    int *stack = PyMem_Malloc(depth * sizeof(int));
2088
152
    if (stack == NULL) {
2089
0
        PyErr_NoMemory();
2090
0
        return ERROR;
2091
0
    }
2092
608
    for (int i = 0; i < depth; i++) {
2093
456
        stack[i] = i;
2094
456
    }
2095
    // Simulate the combined effect of these instructions by "running" them on
2096
    // our "stack":
2097
456
    for (int i = 0; i < len; i++) {
2098
304
        if (instructions[i].i_opcode == SWAP) {
2099
304
            int oparg = instructions[i].i_oparg;
2100
304
            int top = stack[0];
2101
            // SWAPs are 1-indexed:
2102
304
            stack[0] = stack[oparg - 1];
2103
304
            stack[oparg - 1] = top;
2104
304
        }
2105
304
    }
2106
    // Now we can begin! Our approach here is based on a solution to a closely
2107
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2108
    // think of this algorithm as determining the steps needed to efficiently
2109
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2110
    // though, we can efficiently *shuffle* it! For this reason, we will be
2111
    // replacing instructions starting from the *end* of the run. Since the
2112
    // solution is optimal, we don't need to worry about running out of space:
2113
152
    int current = len - 1;
2114
608
    for (int i = 0; i < depth; i++) {
2115
        // Skip items that have already been visited, or just happen to be in
2116
        // the correct location:
2117
456
        if (stack[i] == VISITED || stack[i] == i) {
2118
304
            continue;
2119
304
        }
2120
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2121
        // with other items; traversing the cycle and swapping each item with
2122
        // the next will put them all in the correct place. The weird
2123
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2124
        // can only swap from that position:
2125
152
        int j = i;
2126
608
        while (true) {
2127
            // Skip the actual swap if our item is zero, since swapping the top
2128
            // item with itself is pointless:
2129
608
            if (j) {
2130
304
                assert(0 <= current);
2131
                // SWAPs are 1-indexed:
2132
304
                instructions[current].i_opcode = SWAP;
2133
304
                instructions[current--].i_oparg = j + 1;
2134
304
            }
2135
608
            if (stack[j] == VISITED) {
2136
                // Completed the cycle:
2137
152
                assert(j == i);
2138
152
                break;
2139
152
            }
2140
456
            int next_j = stack[j];
2141
456
            stack[j] = VISITED;
2142
456
            j = next_j;
2143
456
        }
2144
152
    }
2145
    // NOP out any unused instructions:
2146
152
    while (0 <= current) {
2147
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2148
0
    }
2149
152
    PyMem_Free(stack);
2150
152
    *ix += len - 1;
2151
152
    return SUCCESS;
2152
152
}
2153
2154
2155
// This list is pretty small, since it's only okay to reorder opcodes that:
2156
// - can't affect control flow (like jumping or raising exceptions)
2157
// - can't invoke arbitrary code (besides finalizers)
2158
// - only touch the TOS (and pop it when finished)
2159
#define SWAPPABLE(opcode) \
2160
977
    ((opcode) == STORE_FAST || \
2161
977
     (opcode) == STORE_FAST_MAYBE_NULL || \
2162
977
     (opcode) == POP_TOP)
2163
2164
#define STORES_TO(instr) \
2165
182
    (((instr).i_opcode == STORE_FAST || \
2166
182
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2167
182
     ? (instr).i_oparg : -1)
2168
2169
static int
2170
next_swappable_instruction(basicblock *block, int i, int lineno)
2171
935
{
2172
957
    while (++i < block->b_iused) {
2173
952
        cfg_instr *instruction = &block->b_instr[i];
2174
952
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2175
            // Optimizing across this instruction could cause user-visible
2176
            // changes in the names bound between line tracing events!
2177
4
            return -1;
2178
4
        }
2179
948
        if (instruction->i_opcode == NOP) {
2180
22
            continue;
2181
22
        }
2182
926
        if (SWAPPABLE(instruction->i_opcode)) {
2183
385
            return i;
2184
385
        }
2185
541
        return -1;
2186
926
    }
2187
5
    return -1;
2188
935
}
2189
2190
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2191
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2192
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2193
static void
2194
apply_static_swaps(basicblock *block, int i)
2195
634
{
2196
    // SWAPs are to our left, and potential swaperands are to our right:
2197
769
    for (; 0 <= i; i--) {
2198
736
        assert(i < block->b_iused);
2199
736
        cfg_instr *swap = &block->b_instr[i];
2200
736
        if (swap->i_opcode != SWAP) {
2201
102
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2202
                // Nope, but we know how to handle these. Keep looking:
2203
51
                continue;
2204
51
            }
2205
            // We can't reason about what this instruction does. Bail:
2206
51
            return;
2207
102
        }
2208
634
        int j = next_swappable_instruction(block, i, -1);
2209
634
        if (j < 0) {
2210
373
            return;
2211
373
        }
2212
261
        int k = j;
2213
261
        int lineno = block->b_instr[j].i_loc.lineno;
2214
385
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2215
301
            k = next_swappable_instruction(block, k, lineno);
2216
301
            if (k < 0) {
2217
177
                return;
2218
177
            }
2219
301
        }
2220
        // The reordering is not safe if the two instructions to be swapped
2221
        // store to the same location, or if any intervening instruction stores
2222
        // to the same location as either of them.
2223
84
        int store_j = STORES_TO(block->b_instr[j]);
2224
84
        int store_k = STORES_TO(block->b_instr[k]);
2225
84
        if (store_j >= 0 || store_k >= 0) {
2226
84
            if (store_j == store_k) {
2227
0
                return;
2228
0
            }
2229
98
            for (int idx = j + 1; idx < k; idx++) {
2230
14
                int store_idx = STORES_TO(block->b_instr[idx]);
2231
14
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2232
0
                    return;
2233
0
                }
2234
14
            }
2235
84
        }
2236
2237
        // Success!
2238
84
        INSTR_SET_OP0(swap, NOP);
2239
84
        cfg_instr temp = block->b_instr[j];
2240
84
        block->b_instr[j] = block->b_instr[k];
2241
84
        block->b_instr[k] = temp;
2242
84
    }
2243
634
}
2244
2245
static int
2246
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2247
                               PyObject *consts, _Py_hashtable_t *consts_index)
2248
23.7k
{
2249
23.7k
    assert(PyDict_CheckExact(const_cache));
2250
23.7k
    assert(PyList_CheckExact(consts));
2251
23.7k
    int opcode = 0;
2252
23.7k
    int oparg = 0;
2253
231k
    for (int i = 0; i < bb->b_iused; i++) {
2254
208k
        cfg_instr *inst = &bb->b_instr[i];
2255
208k
        if (inst->i_opcode == LOAD_CONST) {
2256
41.8k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2257
41.8k
            if (constant == NULL) {
2258
0
                return ERROR;
2259
0
            }
2260
41.8k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2261
41.8k
            Py_DECREF(constant);
2262
41.8k
            if (res < 0) {
2263
0
                return ERROR;
2264
0
            }
2265
41.8k
        }
2266
208k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2267
26.4k
                                      inst->i_opcode == COPY &&
2268
12
                                      inst->i_oparg == 1);
2269
208k
        if (! is_copy_of_load_const) {
2270
208k
            opcode = inst->i_opcode;
2271
208k
            oparg = inst->i_oparg;
2272
208k
        }
2273
208k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2274
208k
        if (!loads_const(opcode)) {
2275
166k
            continue;
2276
166k
        }
2277
42.0k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2278
42.0k
        switch(nextop) {
2279
26
            case POP_JUMP_IF_FALSE:
2280
26
            case POP_JUMP_IF_TRUE:
2281
26
            case JUMP_IF_FALSE:
2282
26
            case JUMP_IF_TRUE:
2283
26
            {
2284
                /* Remove LOAD_CONST const; conditional jump */
2285
26
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2286
26
                if (cnt == NULL) {
2287
0
                    return ERROR;
2288
0
                }
2289
26
                int is_true = PyObject_IsTrue(cnt);
2290
26
                Py_DECREF(cnt);
2291
26
                if (is_true == -1) {
2292
0
                    return ERROR;
2293
0
                }
2294
26
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2295
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2296
26
                    INSTR_SET_OP0(inst, NOP);
2297
26
                }
2298
26
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2299
26
                if (is_true == jump_if_true) {
2300
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2301
1
                }
2302
25
                else {
2303
25
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2304
25
                }
2305
26
                break;
2306
26
            }
2307
711
            case IS_OP:
2308
711
            {
2309
                // Fold to POP_JUMP_IF_NONE:
2310
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2311
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2312
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2313
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2314
                // Fold to POP_JUMP_IF_NOT_NONE:
2315
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2316
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2317
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2318
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2319
711
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2320
711
                if (cnt == NULL) {
2321
0
                    return ERROR;
2322
0
                }
2323
711
                if (!Py_IsNone(cnt)) {
2324
34
                    Py_DECREF(cnt);
2325
34
                    break;
2326
34
                }
2327
677
                if (bb->b_iused <= i + 2) {
2328
1
                    break;
2329
1
                }
2330
676
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2331
676
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2332
                // Get rid of TO_BOOL regardless:
2333
676
                if (jump_instr->i_opcode == TO_BOOL) {
2334
661
                    INSTR_SET_OP0(jump_instr, NOP);
2335
661
                    if (bb->b_iused <= i + 3) {
2336
0
                        break;
2337
0
                    }
2338
661
                    jump_instr = &bb->b_instr[i + 3];
2339
661
                }
2340
676
                bool invert = is_instr->i_oparg;
2341
676
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2342
628
                    invert = !invert;
2343
628
                }
2344
48
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2345
15
                    break;
2346
15
                }
2347
661
                INSTR_SET_OP0(inst, NOP);
2348
661
                INSTR_SET_OP0(is_instr, NOP);
2349
661
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2350
661
                                              : POP_JUMP_IF_NONE;
2351
661
                break;
2352
676
            }
2353
26
            case TO_BOOL:
2354
26
            {
2355
26
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2356
26
                if (cnt == NULL) {
2357
0
                    return ERROR;
2358
0
                }
2359
26
                int is_true = PyObject_IsTrue(cnt);
2360
26
                Py_DECREF(cnt);
2361
26
                if (is_true == -1) {
2362
0
                    return ERROR;
2363
0
                }
2364
26
                cnt = PyBool_FromLong(is_true);
2365
26
                int index = add_const(cnt, consts, const_cache, consts_index);
2366
26
                if (index < 0) {
2367
0
                    return ERROR;
2368
0
                }
2369
26
                INSTR_SET_OP0(inst, NOP);
2370
26
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2371
26
                break;
2372
26
            }
2373
42.0k
        }
2374
42.0k
        if (inst->i_opcode == LOAD_CONST) {
2375
25.9k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2376
25.9k
            if (constant == NULL) {
2377
0
                return ERROR;
2378
0
            }
2379
25.9k
            int res = maybe_instr_make_load_common_const(inst, constant);
2380
25.9k
            Py_DECREF(constant);
2381
25.9k
            if (res < 0) {
2382
0
                return ERROR;
2383
0
            }
2384
25.9k
        }
2385
42.0k
    }
2386
23.7k
    return SUCCESS;
2387
23.7k
}
2388
2389
static int
2390
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2391
5.34k
                    _Py_hashtable_t *consts_index) {
2392
29.1k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2393
23.7k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
2394
23.7k
    }
2395
5.34k
    return SUCCESS;
2396
5.34k
}
2397
2398
static int
2399
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2400
                     _Py_hashtable_t *consts_index)
2401
23.7k
{
2402
23.7k
    assert(PyDict_CheckExact(const_cache));
2403
23.7k
    assert(PyList_CheckExact(consts));
2404
23.7k
    cfg_instr nop;
2405
23.7k
    INSTR_SET_OP0(&nop, NOP);
2406
232k
    for (int i = 0; i < bb->b_iused; i++) {
2407
208k
        cfg_instr *inst = &bb->b_instr[i];
2408
208k
        cfg_instr *target;
2409
208k
        int opcode = inst->i_opcode;
2410
208k
        int oparg = inst->i_oparg;
2411
208k
        if (HAS_TARGET(opcode)) {
2412
9.62k
            assert(inst->i_target->b_iused > 0);
2413
9.62k
            target = &inst->i_target->b_instr[0];
2414
9.62k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2415
9.62k
        }
2416
198k
        else {
2417
198k
            target = &nop;
2418
198k
        }
2419
208k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2420
208k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2421
208k
        switch (opcode) {
2422
            /* Try to fold tuples of constants.
2423
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2424
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2425
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2426
2.43k
            case BUILD_TUPLE:
2427
2.43k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2428
60
                    switch(oparg) {
2429
0
                        case 1:
2430
0
                            INSTR_SET_OP0(inst, NOP);
2431
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
0
                            continue;
2433
50
                        case 2:
2434
59
                        case 3:
2435
59
                            INSTR_SET_OP0(inst, NOP);
2436
59
                            bb->b_instr[i+1].i_opcode = SWAP;
2437
59
                            continue;
2438
60
                    }
2439
60
                }
2440
2.37k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
2441
2.37k
                break;
2442
342
            case BUILD_LIST:
2443
504
            case BUILD_SET:
2444
504
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
2445
504
                break;
2446
252
            case POP_JUMP_IF_NOT_NONE:
2447
677
            case POP_JUMP_IF_NONE:
2448
677
                switch (target->i_opcode) {
2449
16
                    case JUMP:
2450
16
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2451
677
                }
2452
677
                break;
2453
3.27k
            case POP_JUMP_IF_FALSE:
2454
3.27k
                switch (target->i_opcode) {
2455
137
                    case JUMP:
2456
137
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2457
3.27k
                }
2458
3.27k
                break;
2459
3.27k
            case POP_JUMP_IF_TRUE:
2460
915
                switch (target->i_opcode) {
2461
34
                    case JUMP:
2462
34
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2463
915
                }
2464
915
                break;
2465
915
            case JUMP_IF_FALSE:
2466
513
                switch (target->i_opcode) {
2467
0
                    case JUMP:
2468
0
                    case JUMP_IF_FALSE:
2469
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2470
0
                        continue;
2471
2
                    case JUMP_IF_TRUE:
2472
                        // No need to check for loops here, a block's b_next
2473
                        // cannot point to itself.
2474
2
                        assert(inst->i_target != inst->i_target->b_next);
2475
2
                        inst->i_target = inst->i_target->b_next;
2476
2
                        i--;
2477
2
                        continue;
2478
513
                }
2479
511
                break;
2480
511
            case JUMP_IF_TRUE:
2481
66
                switch (target->i_opcode) {
2482
0
                    case JUMP:
2483
0
                    case JUMP_IF_TRUE:
2484
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2485
0
                        continue;
2486
0
                    case JUMP_IF_FALSE:
2487
                        // No need to check for loops here, a block's b_next
2488
                        // cannot point to itself.
2489
0
                        assert(inst->i_target != inst->i_target->b_next);
2490
0
                        inst->i_target = inst->i_target->b_next;
2491
0
                        i--;
2492
0
                        continue;
2493
66
                }
2494
66
                break;
2495
881
            case JUMP:
2496
1.89k
            case JUMP_NO_INTERRUPT:
2497
1.89k
                switch (target->i_opcode) {
2498
0
                    case JUMP:
2499
0
                        i -= jump_thread(bb, inst, target, JUMP);
2500
0
                        continue;
2501
0
                    case JUMP_NO_INTERRUPT:
2502
0
                        i -= jump_thread(bb, inst, target, opcode);
2503
0
                        continue;
2504
1.89k
                }
2505
1.89k
                break;
2506
1.89k
            case FOR_ITER:
2507
465
                if (target->i_opcode == JUMP) {
2508
                    /* This will not work now because the jump (at target) could
2509
                     * be forward or backward and FOR_ITER only jumps forward. We
2510
                     * can re-enable this if ever we implement a backward version
2511
                     * of FOR_ITER.
2512
                     */
2513
                    /*
2514
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2515
                    */
2516
0
                }
2517
465
                break;
2518
4.79k
            case STORE_FAST:
2519
4.79k
                if (opcode == nextop &&
2520
398
                    oparg == bb->b_instr[i+1].i_oparg &&
2521
19
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2522
19
                    bb->b_instr[i].i_opcode = POP_TOP;
2523
19
                    bb->b_instr[i].i_oparg = 0;
2524
19
                }
2525
4.79k
                break;
2526
786
            case SWAP:
2527
786
                if (oparg == 1) {
2528
0
                    INSTR_SET_OP0(inst, NOP);
2529
0
                }
2530
786
                break;
2531
7.76k
            case LOAD_GLOBAL:
2532
7.76k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2533
3.01k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2534
3.01k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2535
3.01k
                }
2536
7.76k
                break;
2537
2.27k
            case COMPARE_OP:
2538
2.27k
                if (nextop == TO_BOOL) {
2539
765
                    INSTR_SET_OP0(inst, NOP);
2540
765
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2541
765
                    continue;
2542
765
                }
2543
1.51k
                break;
2544
1.51k
            case CONTAINS_OP:
2545
1.64k
            case IS_OP:
2546
1.64k
                if (nextop == TO_BOOL) {
2547
796
                    INSTR_SET_OP0(inst, NOP);
2548
796
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2549
796
                    continue;
2550
796
                }
2551
853
                if (nextop == UNARY_NOT) {
2552
0
                    INSTR_SET_OP0(inst, NOP);
2553
0
                    int inverted = oparg ^ 1;
2554
0
                    assert(inverted == 0 || inverted == 1);
2555
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2556
0
                    continue;
2557
0
                }
2558
853
                break;
2559
1.95k
            case TO_BOOL:
2560
1.95k
                if (nextop == TO_BOOL) {
2561
0
                    INSTR_SET_OP0(inst, NOP);
2562
0
                    continue;
2563
0
                }
2564
1.95k
                break;
2565
1.95k
            case UNARY_NOT:
2566
17
                if (nextop == TO_BOOL) {
2567
0
                    INSTR_SET_OP0(inst, NOP);
2568
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2569
0
                    continue;
2570
0
                }
2571
17
                if (nextop == UNARY_NOT) {
2572
0
                    INSTR_SET_OP0(inst, NOP);
2573
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2574
0
                    continue;
2575
0
                }
2576
17
                _Py_FALLTHROUGH;
2577
19
            case UNARY_INVERT:
2578
1.77k
            case UNARY_NEGATIVE:
2579
1.77k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2580
1.77k
                break;
2581
338
            case CALL_INTRINSIC_1:
2582
338
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2583
63
                    if (nextop == GET_ITER) {
2584
0
                        INSTR_SET_OP0(inst, NOP);
2585
0
                    }
2586
63
                    else {
2587
63
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
2588
63
                    }
2589
63
                }
2590
275
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2591
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2592
0
                }
2593
338
                break;
2594
1.58k
            case BINARY_OP:
2595
1.58k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
2596
1.58k
                break;
2597
208k
        }
2598
208k
    }
2599
2600
231k
    for (int i = 0; i < bb->b_iused; i++) {
2601
208k
        cfg_instr *inst = &bb->b_instr[i];
2602
208k
        if (inst->i_opcode == SWAP) {
2603
634
            if (swaptimize(bb, &i) < 0) {
2604
0
                goto error;
2605
0
            }
2606
634
            apply_static_swaps(bb, i);
2607
634
        }
2608
208k
    }
2609
23.7k
    return SUCCESS;
2610
0
error:
2611
0
    return ERROR;
2612
23.7k
}
2613
2614
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2615
2616
static int
2617
remove_redundant_nops_and_jumps(cfg_builder *g)
2618
11.2k
{
2619
11.2k
    int removed_nops, removed_jumps;
2620
12.1k
    do {
2621
        /* Convergence is guaranteed because the number of
2622
         * redundant jumps and nops only decreases.
2623
         */
2624
12.1k
        removed_nops = remove_redundant_nops(g);
2625
12.1k
        RETURN_IF_ERROR(removed_nops);
2626
12.1k
        removed_jumps = remove_redundant_jumps(g);
2627
12.1k
        RETURN_IF_ERROR(removed_jumps);
2628
12.1k
    } while(removed_nops + removed_jumps > 0);
2629
11.2k
    return SUCCESS;
2630
11.2k
}
2631
2632
/* Perform optimizations on a control flow graph.
2633
   The consts object should still be in list form to allow new constants
2634
   to be appended.
2635
2636
   Code trasnformations that reduce code size initially fill the gaps with
2637
   NOPs.  Later those NOPs are removed.
2638
*/
2639
static int
2640
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2641
             _Py_hashtable_t *consts_index, int firstlineno)
2642
5.34k
{
2643
5.34k
    assert(PyDict_CheckExact(const_cache));
2644
5.34k
    RETURN_IF_ERROR(check_cfg(g));
2645
5.34k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2646
5.34k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2647
5.34k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2648
5.34k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
2649
29.1k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2650
23.7k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
2651
23.7k
    }
2652
5.34k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2653
5.34k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2654
5.34k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2655
5.34k
    assert(no_redundant_jumps(g));
2656
5.34k
    return SUCCESS;
2657
5.34k
}
2658
2659
static void
2660
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2661
5.68k
{
2662
5.68k
    int32_t line1 = inst1->i_loc.lineno;
2663
5.68k
    int32_t line2 = inst2->i_loc.lineno;
2664
    /* Skip if instructions are on different lines */
2665
5.68k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2666
1.89k
        return;
2667
1.89k
    }
2668
3.79k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2669
207
        return;
2670
207
    }
2671
3.58k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2672
3.58k
    INSTR_SET_OP0(inst2, NOP);
2673
3.58k
}
2674
2675
static int
2676
insert_superinstructions(cfg_builder *g)
2677
5.34k
{
2678
29.1k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2679
2680
217k
        for (int i = 0; i < b->b_iused; i++) {
2681
193k
            cfg_instr *inst = &b->b_instr[i];
2682
193k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2683
193k
            switch(inst->i_opcode) {
2684
22.0k
                case LOAD_FAST:
2685
22.0k
                    if (nextop == LOAD_FAST) {
2686
3.52k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2687
3.52k
                    }
2688
22.0k
                    break;
2689
4.52k
                case STORE_FAST:
2690
4.52k
                    switch (nextop) {
2691
1.84k
                        case LOAD_FAST:
2692
1.84k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2693
1.84k
                            break;
2694
312
                        case STORE_FAST:
2695
312
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2696
312
                            break;
2697
4.52k
                    }
2698
4.52k
                    break;
2699
193k
            }
2700
193k
        }
2701
23.7k
    }
2702
5.34k
    int res = remove_redundant_nops(g);
2703
5.34k
    assert(no_redundant_nops(g));
2704
5.34k
    return res;
2705
5.34k
}
2706
2707
#define NOT_LOCAL -1
2708
4.38k
#define DUMMY_INSTR -1
2709
2710
typedef struct {
2711
    // Index of instruction that produced the reference or DUMMY_INSTR.
2712
    int instr;
2713
2714
    // The local to which the reference refers or NOT_LOCAL.
2715
    int local;
2716
} ref;
2717
2718
typedef struct {
2719
    ref *refs;
2720
    Py_ssize_t size;
2721
    Py_ssize_t capacity;
2722
} ref_stack;
2723
2724
static int
2725
ref_stack_push(ref_stack *stack, ref r)
2726
153k
{
2727
153k
    if (stack->size == stack->capacity) {
2728
5.34k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2729
5.34k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2730
5.34k
        if (refs == NULL) {
2731
0
            PyErr_NoMemory();
2732
0
            return -1;
2733
0
        }
2734
5.34k
        stack->refs = refs;
2735
5.34k
        stack->capacity = new_cap;
2736
5.34k
    }
2737
153k
    stack->refs[stack->size] = r;
2738
153k
    stack->size++;
2739
153k
    return 0;
2740
153k
}
2741
2742
static ref
2743
ref_stack_pop(ref_stack *stack)
2744
137k
{
2745
137k
    assert(stack->size > 0);
2746
137k
    stack->size--;
2747
137k
    ref r = stack->refs[stack->size];
2748
137k
    return r;
2749
137k
}
2750
2751
static void
2752
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2753
545
{
2754
545
    Py_ssize_t idx = stack->size - off;
2755
545
    assert(idx >= 0 && idx < stack->size);
2756
545
    ref tmp = stack->refs[idx];
2757
545
    stack->refs[idx] = stack->refs[stack->size - 1];
2758
545
    stack->refs[stack->size - 1] = tmp;
2759
545
}
2760
2761
static ref
2762
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2763
20.8k
{
2764
20.8k
    assert(idx >= 0 && idx < stack->size);
2765
20.8k
    return stack->refs[idx];
2766
20.8k
}
2767
2768
static void
2769
ref_stack_clear(ref_stack *stack)
2770
16.6k
{
2771
16.6k
    stack->size = 0;
2772
16.6k
}
2773
2774
static void
2775
ref_stack_fini(ref_stack *stack)
2776
5.34k
{
2777
5.34k
    if (stack->refs != NULL) {
2778
5.34k
        PyMem_Free(stack->refs);
2779
5.34k
    }
2780
5.34k
    stack->refs = NULL;
2781
5.34k
    stack->capacity = 0;
2782
5.34k
    stack->size = 0;
2783
5.34k
}
2784
2785
typedef enum {
2786
    // The loaded reference is still on the stack when the local is killed
2787
    SUPPORT_KILLED  = 1,
2788
    // The loaded reference is stored into a local
2789
    STORED_AS_LOCAL = 2,
2790
    // The loaded reference is still on the stack at the end of the basic block
2791
    REF_UNCONSUMED  = 4,
2792
} LoadFastInstrFlag;
2793
2794
static void
2795
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2796
4.46k
{
2797
7.79k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2798
3.32k
        ref r = ref_stack_at(refs, i);
2799
3.32k
        if (r.local == local) {
2800
6
            assert(r.instr >= 0);
2801
6
            instr_flags[r.instr] |= SUPPORT_KILLED;
2802
6
        }
2803
3.32k
    }
2804
4.46k
}
2805
2806
static void
2807
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2808
4.38k
{
2809
4.38k
    kill_local(instr_flags, refs, local);
2810
4.38k
    if (r.instr != DUMMY_INSTR) {
2811
3.84k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2812
3.84k
    }
2813
4.38k
}
2814
2815
static void
2816
load_fast_push_block(basicblock ***sp, basicblock *target,
2817
                     Py_ssize_t start_depth)
2818
14.5k
{
2819
14.5k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2820
14.5k
    if (!target->b_visited) {
2821
11.2k
        target->b_visited = 1;
2822
11.2k
        *(*sp)++ = target;
2823
11.2k
    }
2824
14.5k
}
2825
2826
/*
2827
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2828
 * load borrowed references onto the operand stack.
2829
 *
2830
 * This is only safe when we can prove that the reference in the frame outlives
2831
 * the borrowed reference produced by the instruction. We make this tractable
2832
 * by enforcing the following lifetimes:
2833
 *
2834
 * 1. Borrowed references loaded onto the operand stack live until the end of
2835
 *    the instruction that consumes them from the stack. Any borrowed
2836
 *    references that would escape into the heap (e.g. into frame objects or
2837
 *    generators) are converted into new, strong references.
2838
 *
2839
 * 2. Locals live until they are either killed by an instruction
2840
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2841
 *    via `f_locals` is added to a tuple owned by the frame object.
2842
 *
2843
 * To simplify the problem of detecting which supporting references in the
2844
 * frame are killed by instructions that overwrite locals, we only allow
2845
 * borrowed references to be stored as a local in the frame if they were passed
2846
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2847
 * strong references.
2848
 *
2849
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2850
 * that meet the following criteria:
2851
 *
2852
 * 1. The produced reference must be consumed from the stack before the
2853
 *    supporting reference in the frame is killed.
2854
 *
2855
 * 2. The produced reference cannot be stored as a local.
2856
 *
2857
 * We use abstract interpretation to identify instructions that meet these
2858
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2859
 * stack of abstract references and note any instructions that violate the
2860
 * criteria above. Once we've processed all the instructions in a block, any
2861
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2862
 */
2863
static int
2864
optimize_load_fast(cfg_builder *g)
2865
5.34k
{
2866
5.34k
    int status;
2867
5.34k
    ref_stack refs = {0};
2868
5.34k
    int max_instrs = 0;
2869
5.34k
    basicblock *entryblock = g->g_entryblock;
2870
29.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2871
24.1k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2872
24.1k
    }
2873
5.34k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2874
5.34k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2875
5.34k
    if (instr_flags == NULL) {
2876
0
        PyErr_NoMemory();
2877
0
        return ERROR;
2878
0
    }
2879
5.34k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2880
5.34k
    if (blocks == NULL) {
2881
0
        status = ERROR;
2882
0
        goto done;
2883
0
    }
2884
5.34k
    basicblock **sp = blocks;
2885
5.34k
    *sp = entryblock;
2886
5.34k
    sp++;
2887
5.34k
    entryblock->b_startdepth = 0;
2888
5.34k
    entryblock->b_visited = 1;
2889
2890
5.34k
    #define PUSH_REF(instr, local)                \
2891
153k
        do {                                      \
2892
153k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2893
0
                status = ERROR;                   \
2894
0
                goto done;                        \
2895
0
            }                                     \
2896
153k
        } while(0)
2897
2898
21.9k
    while (sp != blocks) {
2899
16.6k
        basicblock *block = *--sp;
2900
16.6k
        assert(block->b_startdepth > -1);
2901
2902
        // Reset per-block state.
2903
16.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2904
2905
        // Reset the stack of refs. We don't track references on the stack
2906
        // across basic blocks, but the bytecode will expect their
2907
        // presence. Add dummy references as necessary.
2908
16.6k
        ref_stack_clear(&refs);
2909
27.2k
        for (int i = 0; i < block->b_startdepth; i++) {
2910
10.6k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2911
10.6k
        }
2912
2913
200k
        for (int i = 0; i < block->b_iused; i++) {
2914
183k
            cfg_instr *instr = &block->b_instr[i];
2915
183k
            int opcode = instr->i_opcode;
2916
183k
            int oparg = instr->i_oparg;
2917
183k
            assert(opcode != EXTENDED_ARG);
2918
183k
            switch (opcode) {
2919
                // Opcodes that load and store locals
2920
7
                case DELETE_FAST: {
2921
7
                    kill_local(instr_flags, &refs, oparg);
2922
7
                    break;
2923
0
                }
2924
2925
19.3k
                case LOAD_FAST: {
2926
19.3k
                    PUSH_REF(i, oparg);
2927
19.3k
                    break;
2928
19.3k
                }
2929
2930
19.3k
                case LOAD_FAST_AND_CLEAR: {
2931
72
                    kill_local(instr_flags, &refs, oparg);
2932
72
                    PUSH_REF(i, oparg);
2933
72
                    break;
2934
72
                }
2935
2936
3.18k
                case LOAD_FAST_LOAD_FAST: {
2937
3.18k
                    PUSH_REF(i, oparg >> 4);
2938
3.18k
                    PUSH_REF(i, oparg & 15);
2939
3.18k
                    break;
2940
3.18k
                }
2941
2942
3.84k
                case STORE_FAST: {
2943
3.84k
                    ref r = ref_stack_pop(&refs);
2944
3.84k
                    store_local(instr_flags, &refs, oparg, r);
2945
3.84k
                    break;
2946
3.18k
                }
2947
2948
45
                case STORE_FAST_LOAD_FAST: {
2949
                    // STORE_FAST
2950
45
                    ref r = ref_stack_pop(&refs);
2951
45
                    store_local(instr_flags, &refs, oparg >> 4, r);
2952
                    // LOAD_FAST
2953
45
                    PUSH_REF(i, oparg & 15);
2954
45
                    break;
2955
45
                }
2956
2957
247
                case STORE_FAST_STORE_FAST: {
2958
                    // STORE_FAST
2959
247
                    ref r = ref_stack_pop(&refs);
2960
247
                    store_local(instr_flags, &refs, oparg >> 4, r);
2961
                    // STORE_FAST
2962
247
                    r = ref_stack_pop(&refs);
2963
247
                    store_local(instr_flags, &refs, oparg & 15, r);
2964
247
                    break;
2965
45
                }
2966
2967
                // Opcodes that shuffle values on the stack
2968
1.07k
                case COPY: {
2969
1.07k
                    assert(oparg > 0);
2970
1.07k
                    Py_ssize_t idx = refs.size - oparg;
2971
1.07k
                    ref r = ref_stack_at(&refs, idx);
2972
1.07k
                    PUSH_REF(r.instr, r.local);
2973
1.07k
                    break;
2974
1.07k
                }
2975
2976
1.07k
                case SWAP: {
2977
545
                    assert(oparg >= 2);
2978
545
                    ref_stack_swap_top(&refs, oparg);
2979
545
                    break;
2980
1.07k
                }
2981
2982
                // We treat opcodes that do not consume all of their inputs on
2983
                // a case by case basis, as we have no generic way of knowing
2984
                // how many inputs should be left on the stack.
2985
2986
                // Opcodes that consume no inputs
2987
1.45k
                case FORMAT_SIMPLE:
2988
1.45k
                case GET_ANEXT:
2989
1.90k
                case GET_ITER:
2990
1.90k
                case GET_LEN:
2991
2.35k
                case IMPORT_FROM:
2992
2.35k
                case MATCH_KEYS:
2993
2.35k
                case MATCH_MAPPING:
2994
2.35k
                case MATCH_SEQUENCE:
2995
2.35k
                case WITH_EXCEPT_START: {
2996
2.35k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2997
2.35k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2998
2.35k
                    int net_pushed = num_pushed - num_popped;
2999
2.35k
                    assert(net_pushed >= 0);
3000
3.24k
                    for (int j = 0; j < net_pushed; j++) {
3001
897
                        PUSH_REF(i, NOT_LOCAL);
3002
897
                    }
3003
2.35k
                    break;
3004
2.35k
                }
3005
3006
                // Opcodes that consume some inputs and push no new values
3007
2.35k
                case DICT_MERGE:
3008
469
                case DICT_UPDATE:
3009
2.61k
                case LIST_APPEND:
3010
2.68k
                case LIST_EXTEND:
3011
9.06k
                case MAP_ADD:
3012
9.06k
                case RERAISE:
3013
9.13k
                case SET_ADD:
3014
9.14k
                case SET_UPDATE: {
3015
9.14k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3016
9.14k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3017
9.14k
                    int net_popped = num_popped - num_pushed;
3018
9.14k
                    assert(net_popped > 0);
3019
24.6k
                    for (int i = 0; i < net_popped; i++) {
3020
15.5k
                        ref_stack_pop(&refs);
3021
15.5k
                    }
3022
9.14k
                    break;
3023
9.13k
                }
3024
3025
145
                case END_SEND: {
3026
145
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
3027
145
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3028
145
                    ref tos = ref_stack_pop(&refs);
3029
145
                    ref_stack_pop(&refs);
3030
145
                    ref_stack_pop(&refs);
3031
145
                    PUSH_REF(tos.instr, tos.local);
3032
145
                    break;
3033
145
                }
3034
3035
1.65k
                case SET_FUNCTION_ATTRIBUTE: {
3036
1.65k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
3037
1.65k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3038
1.65k
                    ref tos = ref_stack_pop(&refs);
3039
1.65k
                    ref_stack_pop(&refs);
3040
1.65k
                    PUSH_REF(tos.instr, tos.local);
3041
1.65k
                    break;
3042
1.65k
                }
3043
3044
                // Opcodes that consume some inputs and push new values
3045
1.65k
                case CHECK_EXC_MATCH: {
3046
0
                    ref_stack_pop(&refs);
3047
0
                    PUSH_REF(i, NOT_LOCAL);
3048
0
                    break;
3049
0
                }
3050
3051
453
                case FOR_ITER: {
3052
453
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
3053
453
                    PUSH_REF(i, NOT_LOCAL);
3054
453
                    break;
3055
453
                }
3056
3057
13.9k
                case LOAD_ATTR:
3058
14.1k
                case LOAD_SUPER_ATTR: {
3059
14.1k
                    ref self = ref_stack_pop(&refs);
3060
14.1k
                    if (opcode == LOAD_SUPER_ATTR) {
3061
191
                        ref_stack_pop(&refs);
3062
191
                        ref_stack_pop(&refs);
3063
191
                    }
3064
14.1k
                    PUSH_REF(i, NOT_LOCAL);
3065
14.1k
                    if (oparg & 1) {
3066
                        // A method call; conservatively assume that self is pushed
3067
                        // back onto the stack
3068
4.25k
                        PUSH_REF(self.instr, self.local);
3069
4.25k
                    }
3070
14.1k
                    break;
3071
14.1k
                }
3072
3073
14.1k
                case LOAD_SPECIAL:
3074
246
                case PUSH_EXC_INFO: {
3075
246
                    ref tos = ref_stack_pop(&refs);
3076
246
                    PUSH_REF(i, NOT_LOCAL);
3077
246
                    PUSH_REF(tos.instr, tos.local);
3078
246
                    break;
3079
246
                }
3080
3081
246
                case SEND: {
3082
145
                    load_fast_push_block(&sp, instr->i_target, refs.size);
3083
145
                    ref_stack_pop(&refs);
3084
145
                    PUSH_REF(i, NOT_LOCAL);
3085
145
                    break;
3086
145
                }
3087
3088
                // Opcodes that consume all of their inputs
3089
127k
                default: {
3090
127k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3091
127k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3092
127k
                    if (HAS_TARGET(instr->i_opcode)) {
3093
6.05k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
3094
6.05k
                    }
3095
127k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
3096
                        // Block push opcodes only affect the stack when jumping
3097
                        // to the target.
3098
225k
                        for (int j = 0; j < num_popped; j++) {
3099
98.4k
                            ref_stack_pop(&refs);
3100
98.4k
                        }
3101
221k
                        for (int j = 0; j < num_pushed; j++) {
3102
93.8k
                            PUSH_REF(i, NOT_LOCAL);
3103
93.8k
                        }
3104
127k
                    }
3105
127k
                    break;
3106
127k
                }
3107
183k
            }
3108
183k
        }
3109
3110
        // Push fallthrough block
3111
16.6k
        if (BB_HAS_FALLTHROUGH(block)) {
3112
7.91k
            assert(block->b_next != NULL);
3113
7.91k
            load_fast_push_block(&sp, block->b_next, refs.size);
3114
7.91k
        }
3115
3116
        // Mark instructions that produce values that are on the stack at the
3117
        // end of the basic block
3118
33.0k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3119
16.4k
            ref r = ref_stack_at(&refs, i);
3120
16.4k
            if (r.instr != -1) {
3121
9.93k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3122
9.93k
            }
3123
16.4k
        }
3124
3125
        // Optimize instructions
3126
200k
        for (int i = 0; i < block->b_iused; i++) {
3127
183k
            if (!instr_flags[i]) {
3128
170k
                cfg_instr *instr = &block->b_instr[i];
3129
170k
                switch (instr->i_opcode) {
3130
18.8k
                    case LOAD_FAST:
3131
18.8k
                        instr->i_opcode = LOAD_FAST_BORROW;
3132
18.8k
                        break;
3133
3.17k
                    case LOAD_FAST_LOAD_FAST:
3134
3.17k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3135
3.17k
                        break;
3136
148k
                    default:
3137
148k
                        break;
3138
170k
                }
3139
170k
            }
3140
183k
        }
3141
16.6k
    }
3142
3143
5.34k
    #undef PUSH_REF
3144
3145
5.34k
    status = SUCCESS;
3146
3147
5.34k
done:
3148
5.34k
    ref_stack_fini(&refs);
3149
5.34k
    PyMem_Free(instr_flags);
3150
5.34k
    PyMem_Free(blocks);
3151
5.34k
    return status;
3152
5.34k
}
3153
3154
// helper functions for add_checks_for_loads_of_unknown_variables
3155
static inline void
3156
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3157
79.7k
{
3158
    // Push b if the unsafe mask is giving us any new information.
3159
    // To avoid overflowing the stack, only allow each block once.
3160
    // Use b->b_visited=1 to mean that b is currently on the stack.
3161
79.7k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3162
79.7k
    if (b->b_unsafe_locals_mask != both) {
3163
9.42k
        b->b_unsafe_locals_mask = both;
3164
        // More work left to do.
3165
9.42k
        if (!b->b_visited) {
3166
            // not on the stack, so push it.
3167
9.35k
            *(*sp)++ = b;
3168
9.35k
            b->b_visited = 1;
3169
9.35k
        }
3170
9.42k
    }
3171
79.7k
}
3172
3173
static void
3174
scan_block_for_locals(basicblock *b, basicblock ***sp)
3175
30.3k
{
3176
    // bit i is set if local i is potentially uninitialized
3177
30.3k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3178
225k
    for (int i = 0; i < b->b_iused; i++) {
3179
194k
        cfg_instr *instr = &b->b_instr[i];
3180
194k
        assert(instr->i_opcode != EXTENDED_ARG);
3181
194k
        if (instr->i_except != NULL) {
3182
49.5k
            maybe_push(instr->i_except, unsafe_mask, sp);
3183
49.5k
        }
3184
194k
        if (instr->i_oparg >= 64) {
3185
9.96k
            continue;
3186
9.96k
        }
3187
194k
        assert(instr->i_oparg >= 0);
3188
184k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3189
184k
        switch (instr->i_opcode) {
3190
399
            case DELETE_FAST:
3191
543
            case LOAD_FAST_AND_CLEAR:
3192
831
            case STORE_FAST_MAYBE_NULL:
3193
831
                unsafe_mask |= bit;
3194
831
                break;
3195
9.23k
            case STORE_FAST:
3196
9.23k
                unsafe_mask &= ~bit;
3197
9.23k
                break;
3198
43
            case LOAD_FAST_CHECK:
3199
                // If this doesn't raise, then the local is defined.
3200
43
                unsafe_mask &= ~bit;
3201
43
                break;
3202
37.6k
            case LOAD_FAST:
3203
37.6k
                if (unsafe_mask & bit) {
3204
43
                    instr->i_opcode = LOAD_FAST_CHECK;
3205
43
                }
3206
37.6k
                unsafe_mask &= ~bit;
3207
37.6k
                break;
3208
184k
        }
3209
184k
    }
3210
30.3k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3211
14.3k
        maybe_push(b->b_next, unsafe_mask, sp);
3212
14.3k
    }
3213
30.3k
    cfg_instr *last = basicblock_last_instr(b);
3214
30.3k
    if (last && is_jump(last)) {
3215
12.2k
        assert(last->i_target != NULL);
3216
12.2k
        maybe_push(last->i_target, unsafe_mask, sp);
3217
12.2k
    }
3218
30.3k
}
3219
3220
static int
3221
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3222
0
{
3223
0
    assert(nlocals > 64);
3224
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3225
0
    if (states == NULL) {
3226
0
        PyErr_NoMemory();
3227
0
        return ERROR;
3228
0
    }
3229
0
    Py_ssize_t blocknum = 0;
3230
    // state[i - 64] == blocknum if local i is guaranteed to
3231
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3232
    // STORE_FAST within that basicblock (not followed by
3233
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3234
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3235
0
        blocknum++;
3236
0
        for (int i = 0; i < b->b_iused; i++) {
3237
0
            cfg_instr *instr = &b->b_instr[i];
3238
0
            assert(instr->i_opcode != EXTENDED_ARG);
3239
0
            int arg = instr->i_oparg;
3240
0
            if (arg < 64) {
3241
0
                continue;
3242
0
            }
3243
0
            assert(arg >= 0);
3244
0
            switch (instr->i_opcode) {
3245
0
                case DELETE_FAST:
3246
0
                case LOAD_FAST_AND_CLEAR:
3247
0
                case STORE_FAST_MAYBE_NULL:
3248
0
                    states[arg - 64] = blocknum - 1;
3249
0
                    break;
3250
0
                case STORE_FAST:
3251
0
                    states[arg - 64] = blocknum;
3252
0
                    break;
3253
0
                case LOAD_FAST:
3254
0
                    if (states[arg - 64] != blocknum) {
3255
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3256
0
                    }
3257
0
                    states[arg - 64] = blocknum;
3258
0
                    break;
3259
0
                    Py_UNREACHABLE();
3260
0
            }
3261
0
        }
3262
0
    }
3263
0
    PyMem_Free(states);
3264
0
    return SUCCESS;
3265
0
}
3266
3267
static int
3268
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3269
5.34k
{
3270
5.34k
    assert(PyList_CheckExact(consts));
3271
5.34k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3272
5.34k
    if (nconsts == 0) {
3273
32
        return SUCCESS;  /* nothing to do */
3274
32
    }
3275
3276
5.31k
    Py_ssize_t *index_map = NULL;
3277
5.31k
    Py_ssize_t *reverse_index_map = NULL;
3278
5.31k
    int err = ERROR;
3279
3280
5.31k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3281
5.31k
    if (index_map == NULL) {
3282
0
        goto end;
3283
0
    }
3284
30.3k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3285
25.0k
        index_map[i] = -1;
3286
25.0k
    }
3287
    // The first constant may be docstring; keep it always.
3288
5.31k
    index_map[0] = 0;
3289
3290
    /* mark used consts */
3291
29.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3292
217k
        for (int i = 0; i < b->b_iused; i++) {
3293
193k
            int opcode = b->b_instr[i].i_opcode;
3294
193k
            if (OPCODE_HAS_CONST(opcode)) {
3295
18.7k
                int index = b->b_instr[i].i_oparg;
3296
18.7k
                index_map[index] = index;
3297
18.7k
            }
3298
193k
        }
3299
23.7k
    }
3300
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3301
    /* condense consts */
3302
5.31k
    Py_ssize_t n_used_consts = 0;
3303
35.6k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3304
30.3k
        if (index_map[i] != -1) {
3305
18.1k
            assert(index_map[i] == i);
3306
18.1k
            index_map[n_used_consts++] = index_map[i];
3307
18.1k
        }
3308
30.3k
    }
3309
5.31k
    if (n_used_consts == nconsts) {
3310
        /* nothing to do */
3311
1.39k
        err = SUCCESS;
3312
1.39k
        goto end;
3313
1.39k
    }
3314
3315
    /* move all used consts to the beginning of the consts list */
3316
5.31k
    assert(n_used_consts < nconsts);
3317
20.5k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3318
16.5k
        Py_ssize_t old_index = index_map[i];
3319
16.5k
        assert(i <= old_index && old_index < nconsts);
3320
16.5k
        if (i != old_index) {
3321
8.87k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3322
8.87k
            assert(value != NULL);
3323
8.87k
            PyList_SetItem(consts, i, Py_NewRef(value));
3324
8.87k
        }
3325
16.5k
    }
3326
3327
    /* truncate the consts list at its new size */
3328
3.91k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3329
0
        goto end;
3330
0
    }
3331
    /* adjust const indices in the bytecode */
3332
3.91k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3333
3.91k
    if (reverse_index_map == NULL) {
3334
0
        goto end;
3335
0
    }
3336
32.6k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3337
28.7k
        reverse_index_map[i] = -1;
3338
28.7k
    }
3339
20.5k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3340
16.5k
        assert(index_map[i] != -1);
3341
16.5k
        assert(reverse_index_map[index_map[i]] == -1);
3342
16.5k
        reverse_index_map[index_map[i]] = i;
3343
16.5k
    }
3344
3345
23.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3346
192k
        for (int i = 0; i < b->b_iused; i++) {
3347
172k
            int opcode = b->b_instr[i].i_opcode;
3348
172k
            if (OPCODE_HAS_CONST(opcode)) {
3349
18.3k
                int index = b->b_instr[i].i_oparg;
3350
18.3k
                assert(reverse_index_map[index] >= 0);
3351
18.3k
                assert(reverse_index_map[index] < n_used_consts);
3352
18.3k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3353
18.3k
            }
3354
172k
        }
3355
19.4k
    }
3356
3357
3.91k
    err = SUCCESS;
3358
5.31k
end:
3359
5.31k
    PyMem_Free(index_map);
3360
5.31k
    PyMem_Free(reverse_index_map);
3361
5.31k
    return err;
3362
3.91k
}
3363
3364
3365
3366
static int
3367
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3368
                                                int nlocals,
3369
                                                int nparams)
3370
5.34k
{
3371
5.34k
    if (nlocals == 0) {
3372
1.74k
        return SUCCESS;
3373
1.74k
    }
3374
3.60k
    if (nlocals > 64) {
3375
        // To avoid O(nlocals**2) compilation, locals beyond the first
3376
        // 64 are only analyzed one basicblock at a time: initialization
3377
        // info is not passed between basicblocks.
3378
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3379
0
            return ERROR;
3380
0
        }
3381
0
        nlocals = 64;
3382
0
    }
3383
3.60k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3384
3.60k
    if (stack == NULL) {
3385
0
        return ERROR;
3386
0
    }
3387
3.60k
    basicblock **sp = stack;
3388
3389
    // First origin of being uninitialized:
3390
    // The non-parameter locals in the entry block.
3391
3.60k
    uint64_t start_mask = 0;
3392
6.85k
    for (int i = nparams; i < nlocals; i++) {
3393
3.25k
        start_mask |= (uint64_t)1 << i;
3394
3.25k
    }
3395
3.60k
    maybe_push(entryblock, start_mask, &sp);
3396
3397
    // Second origin of being uninitialized:
3398
    // There could be DELETE_FAST somewhere, so
3399
    // be sure to scan each basicblock at least once.
3400
24.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3401
20.9k
        scan_block_for_locals(b, &sp);
3402
20.9k
    }
3403
    // Now propagate the uncertainty from the origins we found: Use
3404
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3405
12.9k
    while (sp > stack) {
3406
9.35k
        basicblock *b = *--sp;
3407
        // mark as no longer on stack
3408
9.35k
        b->b_visited = 0;
3409
9.35k
        scan_block_for_locals(b, &sp);
3410
9.35k
    }
3411
3.60k
    PyMem_Free(stack);
3412
3.60k
    return SUCCESS;
3413
3.60k
}
3414
3415
3416
static int
3417
4.15k
mark_warm(basicblock *entryblock) {
3418
4.15k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3419
4.15k
    if (stack == NULL) {
3420
0
        return ERROR;
3421
0
    }
3422
4.15k
    basicblock **sp = stack;
3423
3424
4.15k
    *sp++ = entryblock;
3425
4.15k
    entryblock->b_visited = 1;
3426
19.2k
    while (sp > stack) {
3427
15.0k
        basicblock *b = *(--sp);
3428
15.0k
        assert(!b->b_except_handler);
3429
15.0k
        b->b_warm = 1;
3430
15.0k
        basicblock *next = b->b_next;
3431
15.0k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3432
6.44k
            *sp++ = next;
3433
6.44k
            next->b_visited = 1;
3434
6.44k
        }
3435
148k
        for (int i=0; i < b->b_iused; i++) {
3436
133k
            cfg_instr *instr = &b->b_instr[i];
3437
133k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3438
4.49k
                *sp++ = instr->i_target;
3439
4.49k
                instr->i_target->b_visited = 1;
3440
4.49k
            }
3441
133k
        }
3442
15.0k
    }
3443
4.15k
    PyMem_Free(stack);
3444
4.15k
    return SUCCESS;
3445
4.15k
}
3446
3447
static int
3448
4.15k
mark_cold(basicblock *entryblock) {
3449
26.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3450
22.5k
        assert(!b->b_cold && !b->b_warm);
3451
22.5k
    }
3452
4.15k
    if (mark_warm(entryblock) < 0) {
3453
0
        return ERROR;
3454
0
    }
3455
3456
4.15k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3457
4.15k
    if (stack == NULL) {
3458
0
        return ERROR;
3459
0
    }
3460
3461
4.15k
    basicblock **sp = stack;
3462
26.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3463
22.5k
        if (b->b_except_handler) {
3464
1.66k
            assert(!b->b_warm);
3465
1.66k
            *sp++ = b;
3466
1.66k
            b->b_visited = 1;
3467
1.66k
        }
3468
22.5k
    }
3469
3470
7.23k
    while (sp > stack) {
3471
3.08k
        basicblock *b = *(--sp);
3472
3.08k
        b->b_cold = 1;
3473
3.08k
        basicblock *next = b->b_next;
3474
3.08k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3475
929
            if (!next->b_warm && !next->b_visited) {
3476
750
                *sp++ = next;
3477
750
                next->b_visited = 1;
3478
750
            }
3479
929
        }
3480
16.1k
        for (int i = 0; i < b->b_iused; i++) {
3481
13.0k
            cfg_instr *instr = &b->b_instr[i];
3482
13.0k
            if (is_jump(instr)) {
3483
911
                assert(i == b->b_iused - 1);
3484
911
                basicblock *target = b->b_instr[i].i_target;
3485
911
                if (!target->b_warm && !target->b_visited) {
3486
671
                    *sp++ = target;
3487
671
                    target->b_visited = 1;
3488
671
                }
3489
911
            }
3490
13.0k
        }
3491
3.08k
    }
3492
4.15k
    PyMem_Free(stack);
3493
4.15k
    return SUCCESS;
3494
4.15k
}
3495
3496
3497
static int
3498
5.34k
push_cold_blocks_to_end(cfg_builder *g) {
3499
5.34k
    basicblock *entryblock = g->g_entryblock;
3500
5.34k
    if (entryblock->b_next == NULL) {
3501
        /* single basicblock, no need to reorder */
3502
1.19k
        return SUCCESS;
3503
1.19k
    }
3504
4.15k
    RETURN_IF_ERROR(mark_cold(entryblock));
3505
3506
4.15k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3507
3508
    /* If we have a cold block with fallthrough to a warm block, add */
3509
    /* an explicit jump instead of fallthrough */
3510
26.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3511
22.7k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3512
145
            basicblock *explicit_jump = cfg_builder_new_block(g);
3513
145
            if (explicit_jump == NULL) {
3514
0
                return ERROR;
3515
0
            }
3516
145
            if (!IS_LABEL(b->b_next->b_label)) {
3517
0
                b->b_next->b_label.id = next_lbl++;
3518
0
            }
3519
145
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3520
145
                             NO_LOCATION);
3521
145
            explicit_jump->b_cold = 1;
3522
145
            explicit_jump->b_next = b->b_next;
3523
145
            explicit_jump->b_predecessors = 1;
3524
145
            b->b_next = explicit_jump;
3525
3526
            /* set target */
3527
145
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3528
145
            last->i_target = explicit_jump->b_next;
3529
145
        }
3530
22.7k
    }
3531
3532
4.15k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3533
4.15k
    basicblock *cold_blocks = NULL;
3534
4.15k
    basicblock *cold_blocks_tail = NULL;
3535
3536
4.15k
    basicblock *b = entryblock;
3537
5.39k
    while(b->b_next) {
3538
5.39k
        assert(!b->b_cold);
3539
20.7k
        while (b->b_next && !b->b_next->b_cold) {
3540
15.3k
            b = b->b_next;
3541
15.3k
        }
3542
5.39k
        if (b->b_next == NULL) {
3543
            /* no more cold blocks */
3544
4.15k
            break;
3545
4.15k
        }
3546
3547
        /* b->b_next is the beginning of a cold streak */
3548
5.39k
        assert(!b->b_cold && b->b_next->b_cold);
3549
3550
1.24k
        basicblock *b_end = b->b_next;
3551
3.22k
        while (b_end->b_next && b_end->b_next->b_cold) {
3552
1.98k
            b_end = b_end->b_next;
3553
1.98k
        }
3554
3555
        /* b_end is the end of the cold streak */
3556
1.24k
        assert(b_end && b_end->b_cold);
3557
1.24k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3558
3559
1.24k
        if (cold_blocks == NULL) {
3560
592
            cold_blocks = b->b_next;
3561
592
        }
3562
650
        else {
3563
650
            cold_blocks_tail->b_next = b->b_next;
3564
650
        }
3565
1.24k
        cold_blocks_tail = b_end;
3566
1.24k
        b->b_next = b_end->b_next;
3567
1.24k
        b_end->b_next = NULL;
3568
1.24k
    }
3569
4.15k
    assert(b != NULL && b->b_next == NULL);
3570
4.15k
    b->b_next = cold_blocks;
3571
3572
4.15k
    if (cold_blocks != NULL) {
3573
592
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3574
592
    }
3575
4.15k
    return SUCCESS;
3576
4.15k
}
3577
3578
static int
3579
convert_pseudo_conditional_jumps(cfg_builder *g)
3580
5.34k
{
3581
5.34k
    basicblock *entryblock = g->g_entryblock;
3582
29.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3583
213k
        for (int i = 0; i < b->b_iused; i++) {
3584
190k
            cfg_instr *instr = &b->b_instr[i];
3585
190k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3586
577
                assert(i == b->b_iused - 1);
3587
577
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3588
511
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3589
577
                location loc = instr->i_loc;
3590
577
                basicblock *except = instr->i_except;
3591
577
                cfg_instr copy = {
3592
577
                            .i_opcode = COPY,
3593
577
                            .i_oparg = 1,
3594
577
                            .i_loc = loc,
3595
577
                            .i_target = NULL,
3596
577
                            .i_except = except,
3597
577
                };
3598
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3599
577
                cfg_instr to_bool = {
3600
577
                            .i_opcode = TO_BOOL,
3601
577
                            .i_oparg = 0,
3602
577
                            .i_loc = loc,
3603
577
                            .i_target = NULL,
3604
577
                            .i_except = except,
3605
577
                };
3606
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3607
577
            }
3608
190k
        }
3609
23.9k
    }
3610
5.34k
    return SUCCESS;
3611
5.34k
}
3612
3613
static int
3614
convert_pseudo_ops(cfg_builder *g)
3615
5.34k
{
3616
5.34k
    basicblock *entryblock = g->g_entryblock;
3617
29.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3618
216k
        for (int i = 0; i < b->b_iused; i++) {
3619
192k
            cfg_instr *instr = &b->b_instr[i];
3620
192k
            if (is_block_push(instr)) {
3621
1.66k
                INSTR_SET_OP0(instr, NOP);
3622
1.66k
            }
3623
191k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3624
1.37k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3625
1.37k
                instr->i_opcode = LOAD_FAST;
3626
1.37k
            }
3627
189k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3628
144
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3629
144
                instr->i_opcode = STORE_FAST;
3630
144
            }
3631
192k
        }
3632
23.9k
    }
3633
5.34k
    return remove_redundant_nops_and_jumps(g);
3634
5.34k
}
3635
3636
static inline bool
3637
32.4k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3638
32.4k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3639
19.5k
        return basicblock_has_no_lineno(b);
3640
19.5k
    }
3641
12.9k
    else {
3642
12.9k
        return false;
3643
12.9k
    }
3644
32.4k
}
3645
3646
3647
/* PEP 626 mandates that the f_lineno of a frame is correct
3648
 * after a frame terminates. It would be prohibitively expensive
3649
 * to continuously update the f_lineno field at runtime,
3650
 * so we make sure that all exiting instruction (raises and returns)
3651
 * have a valid line number, allowing us to compute f_lineno lazily.
3652
 * We can do this by duplicating the exit blocks without line number
3653
 * so that none have more than one predecessor. We can then safely
3654
 * copy the line number from the sole predecessor block.
3655
 */
3656
static int
3657
duplicate_exits_without_lineno(cfg_builder *g)
3658
10.6k
{
3659
10.6k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3660
3661
    /* Copy all exit blocks without line number that are targets of a jump.
3662
     */
3663
10.6k
    basicblock *entryblock = g->g_entryblock;
3664
58.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3665
47.7k
        cfg_instr *last = basicblock_last_instr(b);
3666
47.7k
        if (last == NULL) {
3667
8.68k
            continue;
3668
8.68k
        }
3669
39.0k
        if (is_jump(last)) {
3670
15.3k
            basicblock *target = next_nonempty_block(last->i_target);
3671
15.3k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3672
381
                basicblock *new_target = copy_basicblock(g, target);
3673
381
                if (new_target == NULL) {
3674
0
                    return ERROR;
3675
0
                }
3676
381
                new_target->b_instr[0].i_loc = last->i_loc;
3677
381
                last->i_target = new_target;
3678
381
                target->b_predecessors--;
3679
381
                new_target->b_predecessors = 1;
3680
381
                new_target->b_next = target->b_next;
3681
381
                new_target->b_label.id = next_lbl++;
3682
381
                target->b_next = new_target;
3683
381
            }
3684
15.3k
        }
3685
39.0k
    }
3686
3687
    /* Any remaining reachable exit blocks without line number can only be reached by
3688
     * fall through, and thus can only have a single predecessor */
3689
58.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3690
47.7k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3691
17.1k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3692
523
                cfg_instr *last = basicblock_last_instr(b);
3693
523
                assert(last != NULL);
3694
523
                b->b_next->b_instr[0].i_loc = last->i_loc;
3695
523
            }
3696
17.1k
        }
3697
47.7k
    }
3698
10.6k
    return SUCCESS;
3699
10.6k
}
3700
3701
3702
/* If an instruction has no line number, but it's predecessor in the BB does,
3703
 * then copy the line number. If a successor block has no line number, and only
3704
 * one predecessor, then inherit the line number.
3705
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3706
 * Also reduces the size of the line number table,
3707
 * but has no impact on the generated line number events.
3708
 */
3709
3710
static inline void
3711
maybe_propagate_location(basicblock *b, int i, location loc)
3712
417k
{
3713
417k
    assert(b->b_iused > i);
3714
417k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3715
26.4k
         b->b_instr[i].i_loc = loc;
3716
26.4k
    }
3717
417k
}
3718
3719
static void
3720
propagate_line_numbers(basicblock *entryblock)
3721
10.6k
{
3722
58.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3723
47.7k
        cfg_instr *last = basicblock_last_instr(b);
3724
47.7k
        if (last == NULL) {
3725
8.68k
            continue;
3726
8.68k
        }
3727
3728
39.0k
        location prev_location = NO_LOCATION;
3729
437k
        for (int i = 0; i < b->b_iused; i++) {
3730
398k
            maybe_propagate_location(b, i, prev_location);
3731
398k
            prev_location = b->b_instr[i].i_loc;
3732
398k
        }
3733
39.0k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3734
12.4k
            if (b->b_next->b_iused > 0) {
3735
12.4k
                maybe_propagate_location(b->b_next, 0, prev_location);
3736
12.4k
            }
3737
12.4k
        }
3738
39.0k
        if (is_jump(last)) {
3739
15.3k
            basicblock *target = last->i_target;
3740
15.3k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3741
1
                target = target->b_next;
3742
1
            }
3743
15.3k
            if (target->b_predecessors == 1) {
3744
7.07k
                maybe_propagate_location(target, 0, prev_location);
3745
7.07k
            }
3746
15.3k
        }
3747
39.0k
    }
3748
10.6k
}
3749
3750
static int
3751
resolve_line_numbers(cfg_builder *g, int firstlineno)
3752
10.6k
{
3753
10.6k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3754
10.6k
    propagate_line_numbers(g->g_entryblock);
3755
10.6k
    return SUCCESS;
3756
10.6k
}
3757
3758
int
3759
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3760
                        int nlocals, int nparams, int firstlineno)
3761
5.34k
{
3762
5.34k
    assert(cfg_builder_check(g));
3763
    /** Preprocessing **/
3764
    /* Map labels to targets and mark exception handlers */
3765
5.34k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3766
5.34k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3767
5.34k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3768
3769
    /** Optimization **/
3770
3771
5.34k
    _Py_hashtable_t *consts_index = _Py_hashtable_new(
3772
5.34k
        _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3773
5.34k
    if (consts_index == NULL) {
3774
0
        PyErr_NoMemory();
3775
0
        return ERROR;
3776
0
    }
3777
3778
34.9k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3779
29.6k
        PyObject *item = PyList_GET_ITEM(consts, i);
3780
29.6k
        if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3781
0
            continue;
3782
0
        }
3783
29.6k
        if (_Py_hashtable_set(consts_index, (void *)item,
3784
29.6k
                              (void *)(uintptr_t)i) < 0) {
3785
0
            _Py_hashtable_destroy(consts_index);
3786
0
            PyErr_NoMemory();
3787
0
            return ERROR;
3788
0
        }
3789
29.6k
    }
3790
3791
5.34k
    int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3792
3793
5.34k
    _Py_hashtable_destroy(consts_index);
3794
3795
5.34k
    RETURN_IF_ERROR(ret);
3796
3797
5.34k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3798
5.34k
    RETURN_IF_ERROR(
3799
5.34k
        add_checks_for_loads_of_uninitialized_variables(
3800
5.34k
            g->g_entryblock, nlocals, nparams));
3801
5.34k
    RETURN_IF_ERROR(insert_superinstructions(g));
3802
3803
5.34k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3804
5.34k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3805
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3806
    // assert(all_exits_have_lineno(g->g_entryblock));
3807
5.34k
    return SUCCESS;
3808
5.34k
}
3809
3810
static int *
3811
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3812
5.34k
{
3813
5.34k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3814
5.34k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3815
5.34k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3816
3817
5.34k
    int noffsets = ncellvars + nfreevars;
3818
5.34k
    int *fixed = PyMem_New(int, noffsets);
3819
5.34k
    if (fixed == NULL) {
3820
0
        PyErr_NoMemory();
3821
0
        return NULL;
3822
0
    }
3823
7.32k
    for (int i = 0; i < noffsets; i++) {
3824
1.98k
        fixed[i] = nlocals + i;
3825
1.98k
    }
3826
3827
5.34k
    PyObject *varname, *cellindex;
3828
5.34k
    Py_ssize_t pos = 0;
3829
6.41k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3830
1.06k
        PyObject *varindex;
3831
1.06k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3832
0
            goto error;
3833
0
        }
3834
1.06k
        if (varindex == NULL) {
3835
549
            continue;
3836
549
        }
3837
3838
516
        int argoffset = PyLong_AsInt(varindex);
3839
516
        Py_DECREF(varindex);
3840
516
        if (argoffset == -1 && PyErr_Occurred()) {
3841
0
            goto error;
3842
0
        }
3843
3844
516
        int oldindex = PyLong_AsInt(cellindex);
3845
516
        if (oldindex == -1 && PyErr_Occurred()) {
3846
0
            goto error;
3847
0
        }
3848
516
        fixed[oldindex] = argoffset;
3849
516
    }
3850
5.34k
    return fixed;
3851
3852
0
error:
3853
0
    PyMem_Free(fixed);
3854
0
    return NULL;
3855
5.34k
}
3856
3857
#define IS_GENERATOR(CF) \
3858
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3859
3860
static int
3861
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3862
                           int *fixed, int nfreevars)
3863
5.34k
{
3864
5.34k
    assert(umd->u_firstlineno > 0);
3865
3866
    /* Set up cells for any variable that escapes, to be put in a closure. */
3867
5.34k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3868
5.34k
    if (ncellvars) {
3869
        // umd->u_cellvars has the cells out of order so we sort them
3870
        // before adding the MAKE_CELL instructions.  Note that we
3871
        // adjust for arg cells, which come first.
3872
763
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3873
763
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3874
763
        if (sorted == NULL) {
3875
0
            PyErr_NoMemory();
3876
0
            return ERROR;
3877
0
        }
3878
1.82k
        for (int i = 0; i < ncellvars; i++) {
3879
1.06k
            sorted[fixed[i]] = i + 1;
3880
1.06k
        }
3881
2.37k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3882
1.61k
            int oldindex = sorted[i] - 1;
3883
1.61k
            if (oldindex == -1) {
3884
546
                continue;
3885
546
            }
3886
1.06k
            cfg_instr make_cell = {
3887
1.06k
                .i_opcode = MAKE_CELL,
3888
                // This will get fixed in offset_derefs().
3889
1.06k
                .i_oparg = oldindex,
3890
1.06k
                .i_loc = NO_LOCATION,
3891
1.06k
                .i_target = NULL,
3892
1.06k
                .i_except = NULL,
3893
1.06k
            };
3894
1.06k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3895
0
                PyMem_RawFree(sorted);
3896
0
                return ERROR;
3897
0
            }
3898
1.06k
            ncellsused += 1;
3899
1.06k
        }
3900
763
        PyMem_RawFree(sorted);
3901
763
    }
3902
3903
5.34k
    if (nfreevars) {
3904
645
        cfg_instr copy_frees = {
3905
645
            .i_opcode = COPY_FREE_VARS,
3906
645
            .i_oparg = nfreevars,
3907
645
            .i_loc = NO_LOCATION,
3908
645
            .i_target = NULL,
3909
645
            .i_except = NULL,
3910
645
        };
3911
645
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3912
645
    }
3913
3914
5.34k
    return SUCCESS;
3915
5.34k
}
3916
3917
static int
3918
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3919
5.34k
{
3920
5.34k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3921
5.34k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3922
5.34k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3923
5.34k
    int noffsets = ncellvars + nfreevars;
3924
3925
    // First deal with duplicates (arg cells).
3926
5.34k
    int numdropped = 0;
3927
7.32k
    for (int i = 0; i < noffsets ; i++) {
3928
1.98k
        if (fixedmap[i] == i + nlocals) {
3929
1.46k
            fixedmap[i] -= numdropped;
3930
1.46k
        }
3931
516
        else {
3932
            // It was a duplicate (cell/arg).
3933
516
            numdropped += 1;
3934
516
        }
3935
1.98k
    }
3936
3937
    // Then update offsets, either relative to locals or by cell2arg.
3938
29.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3939
216k
        for (int i = 0; i < b->b_iused; i++) {
3940
192k
            cfg_instr *inst = &b->b_instr[i];
3941
            // This is called before extended args are generated.
3942
192k
            assert(inst->i_opcode != EXTENDED_ARG);
3943
192k
            int oldoffset = inst->i_oparg;
3944
192k
            switch(inst->i_opcode) {
3945
1.06k
                case MAKE_CELL:
3946
2.44k
                case LOAD_CLOSURE:
3947
4.62k
                case LOAD_DEREF:
3948
5.17k
                case STORE_DEREF:
3949
5.17k
                case DELETE_DEREF:
3950
5.18k
                case LOAD_FROM_DICT_OR_DEREF:
3951
5.18k
                    assert(oldoffset >= 0);
3952
5.18k
                    assert(oldoffset < noffsets);
3953
5.18k
                    assert(fixedmap[oldoffset] >= 0);
3954
5.18k
                    inst->i_oparg = fixedmap[oldoffset];
3955
192k
            }
3956
192k
        }
3957
23.9k
    }
3958
3959
5.34k
    return numdropped;
3960
5.34k
}
3961
3962
static int
3963
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3964
5.34k
{
3965
5.34k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3966
5.34k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3967
5.34k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3968
5.34k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3969
5.34k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3970
5.34k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3971
5.34k
    assert(INT_MAX - nlocals - ncellvars > 0);
3972
5.34k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3973
5.34k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3974
5.34k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3975
5.34k
    if (cellfixedoffsets == NULL) {
3976
0
        return ERROR;
3977
0
    }
3978
3979
    // This must be called before fix_cell_offsets().
3980
5.34k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3981
0
        PyMem_Free(cellfixedoffsets);
3982
0
        return ERROR;
3983
0
    }
3984
3985
5.34k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3986
5.34k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3987
5.34k
    cellfixedoffsets = NULL;
3988
5.34k
    if (numdropped < 0) {
3989
0
        return ERROR;
3990
0
    }
3991
3992
5.34k
    nlocalsplus -= numdropped;
3993
5.34k
    return nlocalsplus;
3994
5.34k
}
3995
3996
cfg_builder *
3997
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3998
5.34k
{
3999
5.34k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4000
0
        return NULL;
4001
0
    }
4002
5.34k
    cfg_builder *g = _PyCfgBuilder_New();
4003
5.34k
    if (g == NULL) {
4004
0
        return NULL;
4005
0
    }
4006
221k
    for (int i = 0; i < seq->s_used; i++) {
4007
215k
        seq->s_instrs[i].i_target = 0;
4008
215k
    }
4009
221k
    for (int i = 0; i < seq->s_used; i++) {
4010
215k
        _PyInstruction *instr = &seq->s_instrs[i];
4011
215k
        if (HAS_TARGET(instr->i_opcode)) {
4012
10.4k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
4013
10.4k
            seq->s_instrs[instr->i_oparg].i_target = 1;
4014
10.4k
        }
4015
215k
    }
4016
5.34k
    int offset = 0;
4017
221k
    for (int i = 0; i < seq->s_used; i++) {
4018
215k
        _PyInstruction *instr = &seq->s_instrs[i];
4019
215k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
4020
791
            if (seq->s_annotations_code != NULL) {
4021
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
4022
1
                    && seq->s_annotations_code->s_nested == NULL);
4023
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
4024
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
4025
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
4026
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
4027
0
                        goto error;
4028
0
                    }
4029
3
                }
4030
1
                offset += seq->s_annotations_code->s_used - 1;
4031
1
            }
4032
790
            else {
4033
790
                offset -= 1;
4034
790
            }
4035
791
            continue;
4036
791
        }
4037
214k
        if (instr->i_target) {
4038
8.54k
            jump_target_label lbl_ = {i + offset};
4039
8.54k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
4040
0
                goto error;
4041
0
            }
4042
8.54k
        }
4043
214k
        int opcode = instr->i_opcode;
4044
214k
        int oparg = instr->i_oparg;
4045
214k
        if (HAS_TARGET(opcode)) {
4046
10.4k
            oparg += offset;
4047
10.4k
        }
4048
214k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
4049
0
            goto error;
4050
0
        }
4051
214k
    }
4052
5.34k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
4053
0
        goto error;
4054
0
    }
4055
5.34k
    return g;
4056
0
error:
4057
0
    _PyCfgBuilder_Free(g);
4058
0
    return NULL;
4059
5.34k
}
4060
4061
int
4062
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
4063
5.34k
{
4064
5.34k
    int lbl = 0;
4065
29.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4066
24.1k
        b->b_label = (jump_target_label){lbl};
4067
24.1k
        lbl += 1;
4068
24.1k
    }
4069
29.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4070
24.1k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
4071
221k
        for (int i = 0; i < b->b_iused; i++) {
4072
197k
            cfg_instr *instr = &b->b_instr[i];
4073
197k
            if (HAS_TARGET(instr->i_opcode)) {
4074
                /* Set oparg to the label id (it will later be mapped to an offset) */
4075
7.71k
                instr->i_oparg = instr->i_target->b_label.id;
4076
7.71k
            }
4077
197k
            RETURN_IF_ERROR(
4078
197k
                _PyInstructionSequence_Addop(
4079
197k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
4080
4081
197k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
4082
197k
            if (instr->i_except != NULL) {
4083
27.4k
                hi->h_label = instr->i_except->b_label.id;
4084
27.4k
                hi->h_startdepth = instr->i_except->b_startdepth;
4085
27.4k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
4086
27.4k
            }
4087
169k
            else {
4088
169k
                hi->h_label = -1;
4089
169k
            }
4090
197k
        }
4091
24.1k
    }
4092
5.34k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4093
0
        return ERROR;
4094
0
    }
4095
5.34k
    return SUCCESS;
4096
5.34k
}
4097
4098
4099
int
4100
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
4101
                                     _PyCompile_CodeUnitMetadata *umd,
4102
                                     int *stackdepth, int *nlocalsplus,
4103
                                     _PyInstructionSequence *seq)
4104
5.34k
{
4105
5.34k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
4106
4107
5.34k
    *stackdepth = calculate_stackdepth(g);
4108
5.34k
    if (*stackdepth < 0) {
4109
0
        return ERROR;
4110
0
    }
4111
4112
5.34k
    *nlocalsplus = prepare_localsplus(umd, g);
4113
5.34k
    if (*nlocalsplus < 0) {
4114
0
        return ERROR;
4115
0
    }
4116
4117
5.34k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4118
4119
    /* Order of basic blocks must have been determined by now */
4120
4121
5.34k
    RETURN_IF_ERROR(normalize_jumps(g));
4122
5.34k
    assert(no_redundant_jumps(g));
4123
4124
    /* Can't modify the bytecode after inserting instructions that produce
4125
     * borrowed references.
4126
     */
4127
5.34k
    RETURN_IF_ERROR(optimize_load_fast(g));
4128
4129
    /* Can't modify the bytecode after computing jump offsets. */
4130
5.34k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4131
0
        return ERROR;
4132
0
    }
4133
4134
5.34k
    return SUCCESS;
4135
5.34k
}
4136
4137
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4138
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4139
 */
4140
int
4141
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4142
0
{
4143
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4144
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4145
0
    return SUCCESS;
4146
0
}
4147
4148
/* Exported API functions */
4149
4150
int
4151
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4152
0
{
4153
0
    stack_effects effs;
4154
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4155
0
        return PY_INVALID_STACK_EFFECT;
4156
0
    }
4157
0
    return effs.net;
4158
0
}
4159
4160
int
4161
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4162
26
{
4163
26
    stack_effects effs;
4164
26
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4165
0
        return PY_INVALID_STACK_EFFECT;
4166
0
    }
4167
26
    return effs.net;
4168
26
}
4169
4170
/* Access to compiler optimizations for unit tests.
4171
4172
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4173
 * a CFG, optimizes it and converts back to an instruction list.
4174
 */
4175
4176
static PyObject *
4177
cfg_to_instruction_sequence(cfg_builder *g)
4178
0
{
4179
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4180
0
    if (seq == NULL) {
4181
0
        return NULL;
4182
0
    }
4183
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4184
0
        PyInstructionSequence_Fini(seq);
4185
0
        return NULL;
4186
0
    }
4187
0
    return (PyObject*)seq;
4188
0
}
4189
4190
PyObject *
4191
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4192
0
{
4193
0
    if (!_PyInstructionSequence_Check(seq)) {
4194
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4195
0
        return NULL;
4196
0
    }
4197
0
    if (!PyList_Check(consts)) {
4198
0
        PyErr_SetString(PyExc_TypeError, "consts must be a list");
4199
0
        return NULL;
4200
0
    }
4201
0
    PyObject *const_cache = PyDict_New();
4202
0
    if (const_cache == NULL) {
4203
0
        return NULL;
4204
0
    }
4205
4206
0
    PyObject *res = NULL;
4207
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4208
0
    if (g == NULL) {
4209
0
        goto error;
4210
0
    }
4211
0
    int nparams = 0, firstlineno = 1;
4212
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4213
0
                                nparams, firstlineno) < 0) {
4214
0
        goto error;
4215
0
    }
4216
4217
0
    if (calculate_stackdepth(g) == ERROR) {
4218
0
        goto error;
4219
0
    }
4220
4221
0
    if (optimize_load_fast(g) != SUCCESS) {
4222
0
        goto error;
4223
0
    }
4224
4225
0
    res = cfg_to_instruction_sequence(g);
4226
0
error:
4227
0
    Py_DECREF(const_cache);
4228
0
    _PyCfgBuilder_Free(g);
4229
0
    return res;
4230
0
}