Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
1.54M
#define SUCCESS 0
19
8.54k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
2.42M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
516k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
529k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
529k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
458
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
2.23M
{
101
2.23M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
2.23M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
2.23M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
2.07M
{
108
2.07M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
2.07M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
51.1k
    do { \
114
51.1k
        assert(OPCODE_HAS_ARG(OP)); \
115
51.1k
        cfg_instr *_instr__ptr_ = (I); \
116
51.1k
        _instr__ptr_->i_opcode = (OP); \
117
51.1k
        _instr__ptr_->i_oparg = (ARG); \
118
51.1k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
111k
    do { \
123
111k
        assert(!OPCODE_HAS_ARG(OP)); \
124
111k
        cfg_instr *_instr__ptr_ = (I); \
125
111k
        _instr__ptr_->i_opcode = (OP); \
126
111k
        _instr__ptr_->i_oparg = 0; \
127
111k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
6.06k
    do { \
131
6.06k
        cfg_instr *_instr__ptr_ = (I); \
132
6.06k
        _instr__ptr_->i_loc = (LOC); \
133
6.06k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
516k
{
144
516k
    assert(b != NULL);
145
516k
    _Py_c_array_t array = {
146
516k
        .array = (void*)b->b_instr,
147
516k
        .allocated_entries = b->b_ialloc,
148
516k
        .item_size = sizeof(cfg_instr),
149
516k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
516k
    };
151
152
516k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
516k
    b->b_instr = array.array;
154
516k
    b->b_ialloc = array.allocated_entries;
155
516k
    return b->b_iused++;
156
516k
}
157
158
static cfg_instr *
159
1.97M
basicblock_last_instr(const basicblock *b) {
160
1.97M
    assert(b->b_iused >= 0);
161
1.97M
    if (b->b_iused > 0) {
162
1.82M
        assert(b->b_instr != NULL);
163
1.82M
        return &b->b_instr[b->b_iused - 1];
164
1.82M
    }
165
150k
    return NULL;
166
1.97M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
62.0k
{
175
62.0k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
62.0k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
62.0k
    b->b_list = g->g_block_list;
182
62.0k
    g->g_block_list = b;
183
62.0k
    b->b_label = NO_LABEL;
184
62.0k
    return b;
185
62.0k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
504k
{
190
504k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
504k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
504k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
504k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
504k
    int off = basicblock_next_instr(b);
196
504k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
504k
    cfg_instr *i = &b->b_instr[off];
200
504k
    i->i_opcode = opcode;
201
504k
    i->i_oparg = oparg;
202
504k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
504k
    assert(i->i_target == NULL);
205
504k
    assert(i->i_except == NULL);
206
207
504k
    return SUCCESS;
208
504k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
1.82k
{
213
1.82k
    cfg_instr *last = basicblock_last_instr(b);
214
1.82k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
1.82k
    RETURN_IF_ERROR(
219
1.82k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
1.82k
    last = basicblock_last_instr(b);
221
1.82k
    assert(last && last->i_opcode == opcode);
222
1.82k
    last->i_target = target;
223
1.82k
    return SUCCESS;
224
1.82k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
4.59k
{
229
12.0k
    for (int i = 0; i < from->b_iused; i++) {
230
7.46k
        int n = basicblock_next_instr(to);
231
7.46k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
7.46k
        to->b_instr[n] = from->b_instr[i];
235
7.46k
    }
236
4.59k
    return SUCCESS;
237
4.59k
}
238
239
static inline int
240
621k
basicblock_nofallthrough(const basicblock *b) {
241
621k
    cfg_instr *last = basicblock_last_instr(b);
242
621k
    return (last &&
243
590k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
389k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
621k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
1.00M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
813
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
813
    assert(BB_NO_FALLTHROUGH(block));
257
813
    basicblock *result = cfg_builder_new_block(g);
258
813
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
813
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
813
    return result;
265
813
}
266
267
static int
268
4.39k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
4.39k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
70.7k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
66.3k
        block->b_instr[i] = block->b_instr[i-1];
272
66.3k
    }
273
4.39k
    block->b_instr[pos] = *instr;
274
4.39k
    return SUCCESS;
275
4.39k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
51.6k
{
343
51.6k
    assert(block != NULL);
344
51.6k
    g->g_curblock->b_next = block;
345
51.6k
    g->g_curblock = block;
346
51.6k
    return block;
347
51.6k
}
348
349
static inline int
350
121k
basicblock_exits_scope(const basicblock *b) {
351
121k
    cfg_instr *last = basicblock_last_instr(b);
352
121k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
121k
}
354
355
static inline int
356
76.8k
basicblock_has_eval_break(const basicblock *b) {
357
382k
    for (int i = 0; i < b->b_iused; i++) {
358
337k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
31.2k
            return true;
360
31.2k
        }
361
337k
    }
362
45.5k
    return false;
363
76.8k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
512k
{
368
512k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
512k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
44.0k
        return true;
371
44.0k
    }
372
468k
    if (IS_LABEL(g->g_current_label)) {
373
7.56k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
7.56k
            return true;
375
7.56k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
7.56k
    }
382
460k
    return false;
383
468k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
512k
{
388
512k
    if (cfg_builder_current_block_is_terminated(g)) {
389
51.6k
        basicblock *b = cfg_builder_new_block(g);
390
51.6k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
51.6k
        b->b_label = g->g_current_label;
394
51.6k
        g->g_current_label = NO_LABEL;
395
51.6k
        cfg_builder_use_next_block(g, b);
396
51.6k
    }
397
512k
    return SUCCESS;
398
512k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
8.54k
{
424
8.54k
    g->g_block_list = NULL;
425
8.54k
    basicblock *block = cfg_builder_new_block(g);
426
8.54k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
8.54k
    g->g_curblock = g->g_entryblock = block;
430
8.54k
    g->g_current_label = NO_LABEL;
431
8.54k
    return SUCCESS;
432
8.54k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
8.54k
{
437
8.54k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
8.54k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
8.54k
    memset(g, 0, sizeof(cfg_builder));
443
8.54k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
8.54k
    return g;
448
8.54k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
8.54k
{
453
8.54k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
8.54k
    assert(cfg_builder_check(g));
457
8.54k
    basicblock *b = g->g_block_list;
458
70.5k
    while (b != NULL) {
459
62.0k
        if (b->b_instr) {
460
62.0k
            PyMem_Free((void *)b->b_instr);
461
62.0k
        }
462
62.0k
        basicblock *next = b->b_list;
463
62.0k
        PyMem_Free((void *)b);
464
62.0k
        b = next;
465
62.0k
    }
466
8.54k
    PyMem_Free(g);
467
8.54k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
8.54k
{
472
8.54k
    int nblocks = 0;
473
68.7k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
60.2k
        nblocks++;
475
60.2k
    }
476
8.54k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
8.54k
    return SUCCESS;
481
8.54k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
26.0k
{
486
26.0k
    g->g_current_label = lbl;
487
26.0k
    return cfg_builder_maybe_start_new_block(g);
488
26.0k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
486k
{
493
486k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
486k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
486k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
104k
{
501
109k
    while (b && b->b_iused == 0) {
502
4.27k
        b = b->b_next;
503
4.27k
    }
504
104k
    return b;
505
104k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
62.0k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
62.0k
    cfg_instr *last = basicblock_last_instr(b);
547
62.0k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
45.8k
        return SUCCESS;
549
45.8k
    }
550
62.0k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
16.1k
    bool is_forward = last->i_target->b_visited == 0;
553
16.1k
    if (is_forward) {
554
15.2k
        RETURN_IF_ERROR(
555
15.2k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
15.2k
        return SUCCESS;
557
15.2k
    }
558
559
937
    int reversed_opcode = 0;
560
937
    switch(last->i_opcode) {
561
39
        case POP_JUMP_IF_NOT_NONE:
562
39
            reversed_opcode = POP_JUMP_IF_NONE;
563
39
            break;
564
41
        case POP_JUMP_IF_NONE:
565
41
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
41
            break;
567
770
        case POP_JUMP_IF_FALSE:
568
770
            reversed_opcode = POP_JUMP_IF_TRUE;
569
770
            break;
570
87
        case POP_JUMP_IF_TRUE:
571
87
            reversed_opcode = POP_JUMP_IF_FALSE;
572
87
            break;
573
937
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
937
    basicblock *target = last->i_target;
579
937
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
937
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
937
    RETURN_IF_ERROR(
584
937
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
937
    RETURN_IF_ERROR(
586
937
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
937
    backwards_jump->b_startdepth = target->b_startdepth;
588
937
    last->i_opcode = reversed_opcode;
589
937
    last->i_target = b->b_next;
590
591
937
    backwards_jump->b_cold = b->b_cold;
592
937
    backwards_jump->b_next = b->b_next;
593
937
    b->b_next = backwards_jump;
594
937
    return SUCCESS;
595
937
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
8.54k
{
601
8.54k
    basicblock *entryblock = g->g_entryblock;
602
69.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
61.0k
        b->b_visited = 0;
604
61.0k
    }
605
70.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
62.0k
        b->b_visited = 1;
607
62.0k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
62.0k
    }
609
8.54k
    return SUCCESS;
610
8.54k
}
611
612
static int
613
8.54k
check_cfg(cfg_builder *g) {
614
68.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
546k
        for (int i = 0; i < b->b_iused; i++) {
617
486k
            int opcode = b->b_instr[i].i_opcode;
618
486k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
486k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
52.6k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
52.6k
            }
625
486k
        }
626
60.2k
    }
627
8.54k
    return SUCCESS;
628
8.54k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
33.1k
{
633
33.1k
    int lbl = -1;
634
274k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
241k
        if (b->b_label.id > lbl) {
636
94.2k
            lbl = b->b_label.id;
637
94.2k
        }
638
241k
    }
639
33.1k
    return lbl;
640
33.1k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
8.54k
{
646
8.54k
    int max_label = get_max_label(entryblock);
647
8.54k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
8.54k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
8.54k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
8.54k
    memset(label2block, 0, mapsize);
654
68.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
60.2k
        if (b->b_label.id >= 0) {
656
26.0k
            label2block[b->b_label.id] = b;
657
26.0k
        }
658
60.2k
    }
659
68.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
546k
        for (int i = 0; i < b->b_iused; i++) {
661
486k
            cfg_instr *instr = &b->b_instr[i];
662
486k
            assert(instr->i_target == NULL);
663
486k
            if (HAS_TARGET(instr->i_opcode)) {
664
32.3k
                int lbl = instr->i_oparg;
665
32.3k
                assert(lbl >= 0 && lbl <= max_label);
666
32.3k
                instr->i_target = label2block[lbl];
667
32.3k
                assert(instr->i_target != NULL);
668
32.3k
                assert(instr->i_target->b_label.id == lbl);
669
32.3k
            }
670
486k
        }
671
60.2k
    }
672
8.54k
    PyMem_Free(label2block);
673
8.54k
    return SUCCESS;
674
8.54k
}
675
676
static int
677
8.54k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
68.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
546k
        for (int i=0; i < b->b_iused; i++) {
685
486k
            cfg_instr *instr = &b->b_instr[i];
686
486k
            if (is_block_push(instr)) {
687
3.64k
                instr->i_target->b_except_handler = 1;
688
3.64k
            }
689
486k
        }
690
60.2k
    }
691
8.54k
    return SUCCESS;
692
8.54k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
3.63k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
3.63k
    assert(is_block_push(setup));
704
3.63k
    int opcode = setup->i_opcode;
705
3.63k
    basicblock * target = setup->i_target;
706
3.63k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
2.14k
        target->b_preserve_lasti = 1;
708
2.14k
    }
709
3.63k
    assert(stack->depth <= CO_MAXBLOCKS);
710
3.63k
    stack->handlers[++stack->depth] = target;
711
3.63k
    return target;
712
3.63k
}
713
714
static basicblock *
715
2.98k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
2.98k
    assert(stack->depth > 0);
717
2.98k
    return stack->handlers[--stack->depth];
718
2.98k
}
719
720
static basicblock *
721
51.9k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
51.9k
    return stack->handlers[stack->depth];
723
51.9k
}
724
725
static struct _PyCfgExceptStack *
726
8.54k
make_except_stack(void) {
727
8.54k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
8.54k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
8.54k
    new->depth = 0;
733
8.54k
    new->handlers[0] = NULL;
734
8.54k
    return new;
735
8.54k
}
736
737
static struct _PyCfgExceptStack *
738
19.6k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
19.6k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
19.6k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
19.6k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
19.6k
    return copy;
746
19.6k
}
747
748
static basicblock**
749
64.4k
make_cfg_traversal_stack(basicblock *entryblock) {
750
64.4k
    int nblocks = 0;
751
545k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
481k
        b->b_visited = 0;
753
481k
        nblocks++;
754
481k
    }
755
64.4k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
64.4k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
64.4k
    return stack;
760
64.4k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
453k
{
779
453k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
453k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
453k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
453k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
453k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
453k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
3.63k
        effects->net = 0;
793
3.63k
        return 0;
794
3.63k
    }
795
449k
    effects->net = pushed - popped;
796
449k
    return 0;
797
453k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
63.9k
{
802
63.9k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
63.9k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
51.8k
        assert(b->b_startdepth < 0);
808
51.8k
        b->b_startdepth = depth;
809
51.8k
        *(*sp)++ = b;
810
51.8k
    }
811
63.9k
    return SUCCESS;
812
63.9k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
8.54k
{
820
8.54k
    basicblock *entryblock = g->g_entryblock;
821
69.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
61.0k
        b->b_startdepth = INT_MIN;
823
61.0k
    }
824
8.54k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
8.54k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
8.54k
    int stackdepth = -1;
831
8.54k
    int maxdepth = 0;
832
8.54k
    basicblock **sp = stack;
833
8.54k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
60.3k
    while (sp != stack) {
837
51.8k
        basicblock *b = *--sp;
838
51.8k
        int depth = b->b_startdepth;
839
51.8k
        assert(depth >= 0);
840
51.8k
        basicblock *next = b->b_next;
841
452k
        for (int i = 0; i < b->b_iused; i++) {
842
425k
            cfg_instr *instr = &b->b_instr[i];
843
425k
            stack_effects effects;
844
425k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
425k
            int new_depth = depth + effects.net;
851
425k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
425k
            maxdepth = Py_MAX(maxdepth, depth);
857
425k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
28.1k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
28.1k
                int target_depth = depth + effects.net;
865
28.1k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
28.1k
                maxdepth = Py_MAX(maxdepth, depth);
867
28.1k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
28.1k
            }
871
425k
            depth = new_depth;
872
425k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
425k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
418k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
24.5k
            {
876
                /* remaining code is dead */
877
24.5k
                next = NULL;
878
24.5k
                break;
879
24.5k
            }
880
425k
        }
881
51.8k
        if (next != NULL) {
882
27.2k
            assert(BB_HAS_FALLTHROUGH(b));
883
27.2k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
27.2k
        }
887
51.8k
    }
888
8.54k
    stackdepth = maxdepth;
889
8.54k
error:
890
8.54k
    PyMem_Free(stack);
891
8.54k
    return stackdepth;
892
8.54k
}
893
894
static int
895
8.54k
label_exception_targets(basicblock *entryblock) {
896
8.54k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
8.54k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
8.54k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
8.54k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
8.54k
    except_stack->depth = 0;
907
8.54k
    todo_stack[0] = entryblock;
908
8.54k
    entryblock->b_visited = 1;
909
8.54k
    entryblock->b_exceptstack = except_stack;
910
8.54k
    basicblock **todo = &todo_stack[1];
911
8.54k
    basicblock *handler = NULL;
912
60.4k
    while (todo > todo_stack) {
913
51.9k
        todo--;
914
51.9k
        basicblock *b = todo[0];
915
51.9k
        assert(b->b_visited == 1);
916
51.9k
        except_stack = b->b_exceptstack;
917
51.9k
        assert(except_stack != NULL);
918
51.9k
        b->b_exceptstack = NULL;
919
51.9k
        handler = except_stack_top(except_stack);
920
51.9k
        int last_yield_except_depth = -1;
921
521k
        for (int i = 0; i < b->b_iused; i++) {
922
469k
            cfg_instr *instr = &b->b_instr[i];
923
469k
            if (is_block_push(instr)) {
924
3.63k
                if (!instr->i_target->b_visited) {
925
3.63k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
3.63k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
3.63k
                    instr->i_target->b_exceptstack = copy;
930
3.63k
                    todo[0] = instr->i_target;
931
3.63k
                    instr->i_target->b_visited = 1;
932
3.63k
                    todo++;
933
3.63k
                }
934
3.63k
                handler = push_except_block(except_stack, instr);
935
3.63k
            }
936
465k
            else if (instr->i_opcode == POP_BLOCK) {
937
2.98k
                handler = pop_except_block(except_stack);
938
2.98k
                INSTR_SET_OP0(instr, NOP);
939
2.98k
            }
940
462k
            else if (is_jump(instr)) {
941
27.0k
                instr->i_except = handler;
942
27.0k
                assert(i == b->b_iused -1);
943
27.0k
                if (!instr->i_target->b_visited) {
944
19.4k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
16.0k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
16.0k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
16.0k
                        instr->i_target->b_exceptstack = copy;
950
16.0k
                    }
951
3.44k
                    else {
952
3.44k
                        instr->i_target->b_exceptstack = except_stack;
953
3.44k
                        except_stack = NULL;
954
3.44k
                    }
955
19.4k
                    todo[0] = instr->i_target;
956
19.4k
                    instr->i_target->b_visited = 1;
957
19.4k
                    todo++;
958
19.4k
                }
959
27.0k
            }
960
435k
            else if (instr->i_opcode == YIELD_VALUE) {
961
594
                instr->i_except = handler;
962
594
                last_yield_except_depth = except_stack->depth;
963
594
            }
964
434k
            else if (instr->i_opcode == RESUME) {
965
9.13k
                instr->i_except = handler;
966
9.13k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
594
                    assert(last_yield_except_depth >= 0);
968
594
                    if (last_yield_except_depth == 1) {
969
516
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
516
                    }
971
594
                    last_yield_except_depth = -1;
972
594
                }
973
9.13k
            }
974
425k
            else {
975
425k
                instr->i_except = handler;
976
425k
            }
977
469k
        }
978
51.9k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
20.2k
            assert(except_stack != NULL);
980
20.2k
            b->b_next->b_exceptstack = except_stack;
981
20.2k
            todo[0] = b->b_next;
982
20.2k
            b->b_next->b_visited = 1;
983
20.2k
            todo++;
984
20.2k
        }
985
31.6k
        else if (except_stack != NULL) {
986
28.2k
           PyMem_Free(except_stack);
987
28.2k
        }
988
51.9k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
8.54k
    PyMem_Free(todo_stack);
995
8.54k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
8.54k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
17.0k
remove_unreachable(basicblock *entryblock) {
1006
138k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
121k
        b->b_predecessors = 0;
1008
121k
    }
1009
17.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
17.0k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
17.0k
    basicblock **sp = stack;
1014
17.0k
    entryblock->b_predecessors = 1;
1015
17.0k
    *sp++ = entryblock;
1016
17.0k
    entryblock->b_visited = 1;
1017
119k
    while (sp > stack) {
1018
102k
        basicblock *b = *(--sp);
1019
102k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
51.9k
            if (!b->b_next->b_visited) {
1021
44.5k
                assert(b->b_next->b_predecessors == 0);
1022
44.5k
                *sp++ = b->b_next;
1023
44.5k
                b->b_next->b_visited = 1;
1024
44.5k
            }
1025
51.9k
            b->b_next->b_predecessors++;
1026
51.9k
        }
1027
1.00M
        for (int i = 0; i < b->b_iused; i++) {
1028
906k
            basicblock *target;
1029
906k
            cfg_instr *instr = &b->b_instr[i];
1030
906k
            if (is_jump(instr) || is_block_push(instr)) {
1031
58.3k
                target = instr->i_target;
1032
58.3k
                if (!target->b_visited) {
1033
41.0k
                    *sp++ = target;
1034
41.0k
                    target->b_visited = 1;
1035
41.0k
                }
1036
58.3k
                target->b_predecessors++;
1037
58.3k
            }
1038
906k
        }
1039
102k
    }
1040
17.0k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
138k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
121k
       if (b->b_predecessors == 0) {
1045
18.5k
            b->b_iused = 0;
1046
18.5k
            b->b_except_handler = 0;
1047
18.5k
       }
1048
121k
    }
1049
17.0k
    return SUCCESS;
1050
17.0k
}
1051
1052
static int
1053
337k
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
337k
    int dest = 0;
1056
337k
    int prev_lineno = -1;
1057
2.63M
    for (int src = 0; src < bb->b_iused; src++) {
1058
2.29M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
2.29M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
56.3k
            if (lineno < 0) {
1062
7.44k
                continue;
1063
7.44k
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
48.9k
            if (prev_lineno == lineno) {
1066
39.2k
                continue;
1067
39.2k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
9.65k
            if (src < bb->b_iused - 1) {
1070
8.22k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
8.22k
                if (next_lineno == lineno) {
1072
5.49k
                    continue;
1073
5.49k
                }
1074
2.73k
                if (next_lineno < 0) {
1075
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
0
                    continue;
1077
0
                }
1078
2.73k
            }
1079
1.43k
            else {
1080
1.43k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
1.43k
                if (next) {
1083
1.43k
                    location next_loc = NO_LOCATION;
1084
1.43k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
1.43k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
1.43k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
0
                            continue;
1089
0
                        }
1090
1.43k
                        next_loc = instr->i_loc;
1091
1.43k
                        break;
1092
1.43k
                    }
1093
1.43k
                    if (lineno == next_loc.lineno) {
1094
9
                        continue;
1095
9
                    }
1096
1.43k
                }
1097
1.43k
            }
1098
1099
9.65k
        }
1100
2.24M
        if (dest != src) {
1101
213k
            bb->b_instr[dest] = bb->b_instr[src];
1102
213k
        }
1103
2.24M
        dest++;
1104
2.24M
        prev_lineno = lineno;
1105
2.24M
    }
1106
337k
    assert(dest <= bb->b_iused);
1107
337k
    int num_removed = bb->b_iused - dest;
1108
337k
    bb->b_iused = dest;
1109
337k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
337k
    return num_removed;
1111
337k
}
1112
1113
static int
1114
29.6k
remove_redundant_nops(cfg_builder *g) {
1115
29.6k
    int changes = 0;
1116
306k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
276k
        int change = basicblock_remove_redundant_nops(b);
1118
276k
        RETURN_IF_ERROR(change);
1119
276k
        changes += change;
1120
276k
    }
1121
29.6k
    return changes;
1122
29.6k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
8.54k
{
1127
8.54k
    bool done = false;
1128
1129
17.0k
    while (! done) {
1130
8.54k
        done = true;
1131
8.54k
        cfg_instr *prev_instr = NULL;
1132
8.54k
        cfg_instr *instr = NULL;
1133
69.5k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
61.0k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
61.0k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
26.8k
                instr = NULL;
1138
26.8k
            }
1139
495k
            for (int i = 0; i < b->b_iused; i++) {
1140
434k
                prev_instr = instr;
1141
434k
                instr = &b->b_instr[i];
1142
434k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
434k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
434k
                int opcode = instr->i_opcode;
1145
434k
                bool is_redundant_pair = false;
1146
434k
                if (opcode == POP_TOP) {
1147
10.8k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
10.8k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
10.8k
                }
1154
434k
                if (is_redundant_pair) {
1155
0
                    INSTR_SET_OP0(prev_instr, NOP);
1156
0
                    INSTR_SET_OP0(instr, NOP);
1157
0
                    done = false;
1158
0
                }
1159
434k
            }
1160
61.0k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
44.4k
                instr = NULL;
1162
44.4k
            }
1163
61.0k
        }
1164
8.54k
    }
1165
8.54k
    return SUCCESS;
1166
8.54k
}
1167
1168
static int
1169
21.0k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
21.0k
    int changes = 0;
1179
236k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
215k
        cfg_instr *last = basicblock_last_instr(b);
1181
215k
        if (last == NULL) {
1182
29.2k
            continue;
1183
29.2k
        }
1184
215k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
186k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
26.3k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
26.3k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
26.3k
            basicblock *next = next_nonempty_block(b->b_next);
1192
26.3k
            if (jump_target == next) {
1193
866
                changes++;
1194
866
                INSTR_SET_OP0(last, NOP);
1195
866
            }
1196
26.3k
        }
1197
186k
    }
1198
1199
21.0k
    return changes;
1200
21.0k
}
1201
1202
static inline bool
1203
75.7k
basicblock_has_no_lineno(basicblock *b) {
1204
90.5k
    for (int i = 0; i < b->b_iused; i++) {
1205
83.2k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
68.3k
            return false;
1207
68.3k
        }
1208
83.2k
    }
1209
7.38k
    return true;
1210
75.7k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
3.14k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
97.1k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
97.1k
    cfg_instr *last = basicblock_last_instr(bb);
1223
97.1k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
97.1k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
79.4k
        return 0;
1228
79.4k
    }
1229
17.6k
    basicblock *target = last->i_target;
1230
17.6k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
3.14k
                             target->b_iused <= MAX_COPY_SIZE);
1232
17.6k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
3.25k
                                     !BB_HAS_FALLTHROUGH(target));
1234
17.6k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
3.77k
        assert(is_jump(last));
1236
3.77k
        int removed_jump_opcode = last->i_opcode;
1237
3.77k
        INSTR_SET_OP0(last, NOP);
1238
3.77k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
3.77k
        if (no_lineno_no_fallthrough) {
1240
3.25k
            last = basicblock_last_instr(bb);
1241
3.25k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
1.99k
                removed_jump_opcode == JUMP)
1243
88
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
88
                last->i_opcode = JUMP;
1246
88
            }
1247
3.25k
        }
1248
3.77k
        target->b_predecessors--;
1249
3.77k
        return 1;
1250
3.77k
    }
1251
13.9k
    return 0;
1252
17.6k
}
1253
1254
static int
1255
8.54k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
8.54k
    bool changes;
1257
10.0k
    do {
1258
10.0k
        changes = false;
1259
107k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
97.1k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
97.1k
            RETURN_IF_ERROR(res);
1262
97.1k
            if (res) {
1263
3.77k
                changes = true;
1264
3.77k
            }
1265
97.1k
        }
1266
10.0k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
8.54k
    return changes;
1268
8.54k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
888
{
1276
888
    assert(is_jump(inst));
1277
888
    assert(is_jump(target));
1278
888
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
888
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
888
        INSTR_SET_OP0(inst, NOP);
1286
1287
888
        RETURN_IF_ERROR(
1288
888
            basicblock_add_jump(
1289
888
                bb, opcode, target->i_target, target->i_loc));
1290
1291
888
        return true;
1292
888
    }
1293
0
    return false;
1294
888
}
1295
1296
static int
1297
loads_const(int opcode)
1298
24.3k
{
1299
24.3k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
24.3k
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
88.7k
{
1306
88.7k
    PyObject *constant = NULL;
1307
88.7k
    assert(loads_const(opcode));
1308
88.7k
    if (opcode == LOAD_CONST) {
1309
87.5k
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
87.5k
    }
1311
88.7k
    if (opcode == LOAD_SMALL_INT) {
1312
1.19k
        return PyLong_FromLong(oparg);
1313
1.19k
    }
1314
1315
87.5k
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
87.5k
    return Py_NewRef(constant);
1321
87.5k
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
2.92k
{
1327
2.92k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
2.92k
    Py_ssize_t index;
1333
128k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
126k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
891
            break;
1336
891
        }
1337
126k
    }
1338
2.92k
    if (index == PyList_GET_SIZE(consts)) {
1339
2.03k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
2.03k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
2.03k
    }
1349
2.92k
    Py_DECREF(newconst);
1350
2.92k
    return (int)index;
1351
2.92k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
16.3k
{
1363
16.3k
    assert(start < bb->b_iused);
1364
16.3k
    assert(size >= 0);
1365
16.3k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
27.2k
    for (; start >= 0 && size > 0; start--) {
1368
24.3k
        cfg_instr *instr = &bb->b_instr[start];
1369
24.3k
        if (instr->i_opcode == NOP) {
1370
356
            continue;
1371
356
        }
1372
24.0k
        if (!loads_const(instr->i_opcode)) {
1373
13.4k
            return false;
1374
13.4k
        }
1375
10.5k
        instrs[--size] = instr;
1376
10.5k
    }
1377
1378
2.80k
    return size == 0;
1379
16.3k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
2.93k
{
1388
8.86k
    for (int i = 0; i < size; i++) {
1389
5.92k
        cfg_instr *instr = instrs[i];
1390
5.92k
        assert(instr->i_opcode != NOP);
1391
5.92k
        INSTR_SET_OP0(instr, NOP);
1392
5.92k
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
5.92k
    }
1394
2.93k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
83.2k
{
1405
83.2k
    if (PyLong_CheckExact(newconst)) {
1406
27.3k
        int overflow;
1407
27.3k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
27.3k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
27.3k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
21.4k
            assert(_Py_IsImmortal(newconst));
1413
21.4k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
21.4k
            return 1;
1415
21.4k
        }
1416
27.3k
    }
1417
61.8k
    return 0;
1418
83.2k
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
2.55k
{
1426
2.55k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
2.55k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
2.55k
    if (res > 0) {
1432
16
        return SUCCESS;
1433
16
    }
1434
2.53k
    int oparg = add_const(newconst, consts, const_cache);
1435
2.53k
    RETURN_IF_ERROR(oparg);
1436
2.53k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
2.53k
    return SUCCESS;
1438
2.53k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
5.11k
{
1449
    /* Pre-conditions */
1450
5.11k
    assert(PyDict_CheckExact(const_cache));
1451
5.11k
    assert(PyList_CheckExact(consts));
1452
1453
5.11k
    cfg_instr *instr = &bb->b_instr[i];
1454
5.11k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
5.11k
    int seq_size = instr->i_oparg;
1457
5.11k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
0
        return SUCCESS;
1459
0
    }
1460
1461
5.11k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
5.11k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
3.37k
        return SUCCESS;
1465
3.37k
    }
1466
1467
1.73k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
1.73k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
4.83k
    for (int i = 0; i < seq_size; i++) {
1473
3.09k
        cfg_instr *inst = const_instrs[i];
1474
3.09k
        assert(loads_const(inst->i_opcode));
1475
3.09k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
3.09k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
3.09k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
3.09k
    }
1482
1483
1.73k
    nop_out(const_instrs, seq_size);
1484
1.73k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
1.73k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
144
{
1504
144
    assert(PyDict_CheckExact(const_cache));
1505
144
    assert(PyList_CheckExact(consts));
1506
144
    assert(i >= 0);
1507
144
    assert(i < bb->b_iused);
1508
1509
144
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
144
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
144
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
144
    int consts_found = 0;
1514
144
    bool expect_append = true;
1515
1516
331
    for (int pos = i - 1; pos >= 0; pos--) {
1517
331
        cfg_instr *instr = &bb->b_instr[pos];
1518
331
        int opcode = instr->i_opcode;
1519
331
        int oparg = instr->i_oparg;
1520
1521
331
        if (opcode == NOP) {
1522
0
            continue;
1523
0
        }
1524
1525
331
        if (opcode == BUILD_LIST && oparg == 0) {
1526
2
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
0
                return SUCCESS;
1529
0
            }
1530
1531
            /* Sequence start, we are done. */
1532
2
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
2
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
184
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
182
                instr = &bb->b_instr[newpos];
1539
182
                if (instr->i_opcode == NOP) {
1540
0
                    continue;
1541
0
                }
1542
182
                if (loads_const(instr->i_opcode)) {
1543
90
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
90
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
90
                    assert(consts_found > 0);
1549
90
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
90
                }
1551
182
                nop_out(&instr, 1);
1552
182
            }
1553
2
            assert(consts_found == 0);
1554
2
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
2
        }
1556
1557
329
        if (expect_append) {
1558
233
            if (opcode != LIST_APPEND || oparg != 1) {
1559
137
                return SUCCESS;
1560
137
            }
1561
233
        }
1562
96
        else {
1563
96
            if (!loads_const(opcode)) {
1564
5
                return SUCCESS;
1565
5
            }
1566
91
            consts_found++;
1567
91
        }
1568
1569
187
        expect_append = !expect_append;
1570
187
    }
1571
1572
    /* Did not find sequence start. */
1573
0
    return SUCCESS;
1574
144
}
1575
1576
3.27k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
1.63k
{
1592
1.63k
    assert(PyDict_CheckExact(const_cache));
1593
1.63k
    assert(PyList_CheckExact(consts));
1594
1595
1.63k
    cfg_instr *instr = &bb->b_instr[i];
1596
1.63k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
1.63k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
1.63k
    int seq_size = instr->i_oparg;
1600
1.63k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
1.63k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
1.35k
    {
1603
1.35k
        return SUCCESS;
1604
1.35k
    }
1605
1606
280
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
280
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
74
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
0
        }
1612
74
        return SUCCESS;
1613
74
    }
1614
1615
206
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
206
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
1.93k
    for (int i = 0; i < seq_size; i++) {
1621
1.72k
        cfg_instr *inst = const_instrs[i];
1622
1.72k
        assert(loads_const(inst->i_opcode));
1623
1.72k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
1.72k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
1.72k
        PyTuple_SET_ITEM(const_result, i, element);
1629
1.72k
    }
1630
1631
206
    if (instr->i_opcode == BUILD_SET) {
1632
69
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
69
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
69
        Py_SETREF(const_result, frozenset);
1638
69
    }
1639
1640
206
    int index = add_const(const_result, consts, const_cache);
1641
206
    RETURN_IF_ERROR(index);
1642
206
    nop_out(const_instrs, seq_size);
1643
1644
206
    if (contains_or_iter) {
1645
63
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
63
    }
1647
143
    else {
1648
143
        assert(i >= 2);
1649
143
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
143
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
143
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
143
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
143
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
143
    }
1657
206
    return SUCCESS;
1658
206
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
0
{
1667
0
    if (PyTuple_Check(obj)) {
1668
0
        Py_ssize_t i;
1669
0
        limit -= PyTuple_GET_SIZE(obj);
1670
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
0
            if (limit < 0) {
1673
0
                return limit;
1674
0
            }
1675
0
        }
1676
0
    }
1677
0
    return limit;
1678
0
}
1679
1680
153
#define MAX_INT_SIZE           128  /* bits */
1681
0
#define MAX_COLLECTION_SIZE    256  /* items */
1682
19
#define MAX_STR_SIZE          4096  /* characters */
1683
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
45
{
1688
45
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
7
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
45
    ) {
1691
7
        int64_t vbits = _PyLong_NumBits(v);
1692
7
        int64_t wbits = _PyLong_NumBits(w);
1693
7
        assert(vbits >= 0);
1694
7
        assert(wbits >= 0);
1695
7
        if (vbits + wbits > MAX_INT_SIZE) {
1696
0
            return NULL;
1697
0
        }
1698
7
    }
1699
38
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
0
        if (size) {
1702
0
            long n = PyLong_AsLong(v);
1703
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
0
                return NULL;
1705
0
            }
1706
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
0
                return NULL;
1708
0
            }
1709
0
        }
1710
0
    }
1711
38
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
19
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
19
                                               PyBytes_GET_SIZE(w);
1714
19
        if (size) {
1715
19
            long n = PyLong_AsLong(v);
1716
19
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
7
                return NULL;
1718
7
            }
1719
19
        }
1720
19
    }
1721
19
    else if (PyLong_Check(w) &&
1722
19
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
19
    {
1724
19
        return const_folding_safe_multiply(w, v);
1725
19
    }
1726
1727
19
    return PyNumber_Multiply(v, w);
1728
45
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
14
{
1733
14
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
14
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
14
    ) {
1736
14
        int64_t vbits = _PyLong_NumBits(v);
1737
14
        size_t wbits = PyLong_AsSize_t(w);
1738
14
        assert(vbits >= 0);
1739
14
        if (wbits == (size_t)-1) {
1740
0
            return NULL;
1741
0
        }
1742
14
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
0
            return NULL;
1744
0
        }
1745
14
    }
1746
1747
14
    return PyNumber_Power(v, w, Py_None);
1748
14
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
45
{
1753
45
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
45
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
45
    ) {
1756
44
        int64_t vbits = _PyLong_NumBits(v);
1757
44
        size_t wbits = PyLong_AsSize_t(w);
1758
44
        assert(vbits >= 0);
1759
44
        if (wbits == (size_t)-1) {
1760
0
            return NULL;
1761
0
        }
1762
44
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
0
            return NULL;
1764
0
        }
1765
44
    }
1766
1767
45
    return PyNumber_Lshift(v, w);
1768
45
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
0
{
1773
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
0
        return NULL;
1775
0
    }
1776
1777
0
    return PyNumber_Remainder(v, w);
1778
0
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
113
{
1783
113
    assert(left != NULL && right != NULL);
1784
113
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
113
    PyObject *result = NULL;
1787
113
    switch (op) {
1788
13
        case NB_ADD:
1789
13
            result = PyNumber_Add(left, right);
1790
13
            break;
1791
4
        case NB_SUBTRACT:
1792
4
            result = PyNumber_Subtract(left, right);
1793
4
            break;
1794
26
        case NB_MULTIPLY:
1795
26
            result = const_folding_safe_multiply(left, right);
1796
26
            break;
1797
1
        case NB_TRUE_DIVIDE:
1798
1
            result = PyNumber_TrueDivide(left, right);
1799
1
            break;
1800
0
        case NB_FLOOR_DIVIDE:
1801
0
            result = PyNumber_FloorDivide(left, right);
1802
0
            break;
1803
0
        case NB_REMAINDER:
1804
0
            result = const_folding_safe_mod(left, right);
1805
0
            break;
1806
14
        case NB_POWER:
1807
14
            result = const_folding_safe_power(left, right);
1808
14
            break;
1809
45
        case NB_LSHIFT:
1810
45
            result = const_folding_safe_lshift(left, right);
1811
45
            break;
1812
0
        case NB_RSHIFT:
1813
0
            result = PyNumber_Rshift(left, right);
1814
0
            break;
1815
0
        case NB_OR:
1816
0
            result = PyNumber_Or(left, right);
1817
0
            break;
1818
0
        case NB_XOR:
1819
0
            result = PyNumber_Xor(left, right);
1820
0
            break;
1821
0
        case NB_AND:
1822
0
            result = PyNumber_And(left, right);
1823
0
            break;
1824
10
        case NB_SUBSCR:
1825
10
            result = PyObject_GetItem(left, right);
1826
10
            break;
1827
0
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
0
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
113
    }
1833
113
    return result;
1834
113
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
10.0k
{
1839
10.1k
    #define BINOP_OPERAND_COUNT 2
1840
10.0k
    assert(PyDict_CheckExact(const_cache));
1841
10.0k
    assert(PyList_CheckExact(consts));
1842
1843
10.0k
    cfg_instr *binop = &bb->b_instr[i];
1844
10.0k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
10.0k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
10.0k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
9.89k
        return SUCCESS;
1850
9.89k
    }
1851
1852
113
    cfg_instr *lhs_instr = operands_instrs[0];
1853
113
    assert(loads_const(lhs_instr->i_opcode));
1854
113
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
113
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
113
    cfg_instr *rhs_instr = operands_instrs[1];
1860
113
    assert(loads_const(rhs_instr->i_opcode));
1861
113
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
113
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
113
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
113
    Py_DECREF(lhs);
1869
113
    Py_DECREF(rhs);
1870
113
    if (newconst == NULL) {
1871
7
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
7
        PyErr_Clear();
1875
7
        return SUCCESS;
1876
7
    }
1877
1878
106
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
106
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
113
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
707
{
1885
707
    assert(operand != NULL);
1886
707
    assert(
1887
707
        opcode == UNARY_NEGATIVE ||
1888
707
        opcode == UNARY_INVERT ||
1889
707
        opcode == UNARY_NOT ||
1890
707
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
707
    );
1892
707
    PyObject *result;
1893
707
    switch (opcode) {
1894
706
        case UNARY_NEGATIVE:
1895
706
            result = PyNumber_Negative(operand);
1896
706
            break;
1897
1
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
1
            if (PyBool_Check(operand)) {
1900
0
                return NULL;
1901
0
            }
1902
1
            result = PyNumber_Invert(operand);
1903
1
            break;
1904
0
        case UNARY_NOT: {
1905
0
            int r = PyObject_IsTrue(operand);
1906
0
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
0
            result = PyBool_FromLong(!r);
1910
0
            break;
1911
0
        }
1912
0
        case CALL_INTRINSIC_1:
1913
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
0
            result = PyNumber_Positive(operand);
1917
0
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
707
    }
1921
707
    return result;
1922
707
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
906
{
1927
1.61k
    #define UNARYOP_OPERAND_COUNT 1
1928
906
    assert(PyDict_CheckExact(const_cache));
1929
906
    assert(PyList_CheckExact(consts));
1930
906
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
906
    cfg_instr *operand_instr;
1933
906
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
199
        return SUCCESS;
1936
199
    }
1937
1938
906
    assert(loads_const(operand_instr->i_opcode));
1939
707
    PyObject *operand = get_const_value(
1940
707
        operand_instr->i_opcode,
1941
707
        operand_instr->i_oparg,
1942
707
        consts
1943
707
    );
1944
707
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
707
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
707
    Py_DECREF(operand);
1950
707
    if (newconst == NULL) {
1951
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
0
        PyErr_Clear();
1955
0
        return SUCCESS;
1956
0
    }
1957
1958
707
    if (unaryop->i_opcode == UNARY_NOT) {
1959
0
        assert(PyBool_Check(newconst));
1960
0
    }
1961
707
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
707
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
707
}
1964
1965
2.80k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
2.09k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
2.09k
    assert(*ix < block->b_iused);
1975
2.09k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
2.09k
    assert(instructions[0].i_opcode == SWAP);
1979
2.09k
    int depth = instructions[0].i_oparg;
1980
2.09k
    int len = 0;
1981
2.09k
    int more = false;
1982
2.09k
    int limit = block->b_iused - *ix;
1983
2.42k
    while (++len < limit) {
1984
2.42k
        int opcode = instructions[len].i_opcode;
1985
2.42k
        if (opcode == SWAP) {
1986
216
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
216
            more = true;
1988
216
        }
1989
2.20k
        else if (opcode != NOP) {
1990
2.09k
            break;
1991
2.09k
        }
1992
2.42k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
2.09k
    if (!more) {
1995
1.88k
        return SUCCESS;
1996
1.88k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
216
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
216
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
864
    for (int i = 0; i < depth; i++) {
2004
648
        stack[i] = i;
2005
648
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
648
    for (int i = 0; i < len; i++) {
2009
432
        if (instructions[i].i_opcode == SWAP) {
2010
432
            int oparg = instructions[i].i_oparg;
2011
432
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
432
            stack[0] = stack[oparg - 1];
2014
432
            stack[oparg - 1] = top;
2015
432
        }
2016
432
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
216
    int current = len - 1;
2025
864
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
648
        if (stack[i] == VISITED || stack[i] == i) {
2029
432
            continue;
2030
432
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
216
        int j = i;
2037
864
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
864
            if (j) {
2041
432
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
432
                instructions[current].i_opcode = SWAP;
2044
432
                instructions[current--].i_oparg = j + 1;
2045
432
            }
2046
864
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
216
                assert(j == i);
2049
216
                break;
2050
216
            }
2051
648
            int next_j = stack[j];
2052
648
            stack[j] = VISITED;
2053
648
            j = next_j;
2054
648
        }
2055
216
    }
2056
    // NOP out any unused instructions:
2057
216
    while (0 <= current) {
2058
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
0
    }
2060
216
    PyMem_Free(stack);
2061
216
    *ix += len - 1;
2062
216
    return SUCCESS;
2063
216
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
3.46k
    ((opcode) == STORE_FAST || \
2072
3.46k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
3.46k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
480
    (((instr).i_opcode == STORE_FAST || \
2077
480
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
480
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
3.40k
{
2083
3.51k
    while (++i < block->b_iused) {
2084
3.48k
        cfg_instr *instruction = &block->b_instr[i];
2085
3.48k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
16
            return -1;
2089
16
        }
2090
3.47k
        if (instruction->i_opcode == NOP) {
2091
106
            continue;
2092
106
        }
2093
3.36k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
1.52k
            return i;
2095
1.52k
        }
2096
1.83k
        return -1;
2097
3.36k
    }
2098
25
    return -1;
2099
3.40k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
2.09k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
2.42k
    for (; 0 <= i; i--) {
2109
2.30k
        assert(i < block->b_iused);
2110
2.30k
        cfg_instr *swap = &block->b_instr[i];
2111
2.30k
        if (swap->i_opcode != SWAP) {
2112
208
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
104
                continue;
2115
104
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
104
            return;
2118
208
        }
2119
2.09k
        int j = next_swappable_instruction(block, i, -1);
2120
2.09k
        if (j < 0) {
2121
1.07k
            return;
2122
1.07k
        }
2123
1.02k
        int k = j;
2124
1.02k
        int lineno = block->b_instr[j].i_loc.lineno;
2125
1.52k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
1.30k
            k = next_swappable_instruction(block, k, lineno);
2127
1.30k
            if (k < 0) {
2128
805
                return;
2129
805
            }
2130
1.30k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
219
        int store_j = STORES_TO(block->b_instr[j]);
2135
219
        int store_k = STORES_TO(block->b_instr[k]);
2136
219
        if (store_j >= 0 || store_k >= 0) {
2137
219
            if (store_j == store_k) {
2138
0
                return;
2139
0
            }
2140
261
            for (int idx = j + 1; idx < k; idx++) {
2141
42
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
42
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
42
            }
2146
219
        }
2147
2148
        // Success!
2149
219
        INSTR_SET_OP0(swap, NOP);
2150
219
        cfg_instr temp = block->b_instr[j];
2151
219
        block->b_instr[j] = block->b_instr[k];
2152
219
        block->b_instr[k] = temp;
2153
219
    }
2154
2.09k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
61.0k
{
2159
61.0k
    assert(PyDict_CheckExact(const_cache));
2160
61.0k
    assert(PyList_CheckExact(consts));
2161
61.0k
    int opcode = 0;
2162
61.0k
    int oparg = 0;
2163
534k
    for (int i = 0; i < bb->b_iused; i++) {
2164
473k
        cfg_instr *inst = &bb->b_instr[i];
2165
473k
        if (inst->i_opcode == LOAD_CONST) {
2166
80.6k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
80.6k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
80.6k
            Py_DECREF(constant);
2169
80.6k
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
80.6k
        }
2173
473k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
58.9k
                                      inst->i_opcode == COPY &&
2175
43
                                      inst->i_oparg == 1);
2176
473k
        if (! is_copy_of_load_const) {
2177
473k
            opcode = inst->i_opcode;
2178
473k
            oparg = inst->i_oparg;
2179
473k
        }
2180
473k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
473k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
392k
            continue;
2183
392k
        }
2184
80.6k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
80.6k
        switch(nextop) {
2186
179
            case POP_JUMP_IF_FALSE:
2187
182
            case POP_JUMP_IF_TRUE:
2188
182
            case JUMP_IF_FALSE:
2189
182
            case JUMP_IF_TRUE:
2190
182
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
182
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
182
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
182
                int is_true = PyObject_IsTrue(cnt);
2197
182
                Py_DECREF(cnt);
2198
182
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
182
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
182
                    INSTR_SET_OP0(inst, NOP);
2204
182
                }
2205
182
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
182
                if (is_true == jump_if_true) {
2207
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
1
                }
2209
181
                else {
2210
181
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
181
                }
2212
182
                break;
2213
182
            }
2214
1.86k
            case IS_OP:
2215
1.86k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
1.86k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
1.86k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
1.86k
                if (!Py_IsNone(cnt)) {
2231
28
                    Py_DECREF(cnt);
2232
28
                    break;
2233
28
                }
2234
1.83k
                if (bb->b_iused <= i + 2) {
2235
11
                    break;
2236
11
                }
2237
1.82k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.82k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.82k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
1.77k
                    INSTR_SET_OP0(jump_instr, NOP);
2242
1.77k
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
1.77k
                    jump_instr = &bb->b_instr[i + 3];
2246
1.77k
                }
2247
1.82k
                bool invert = is_instr->i_oparg;
2248
1.82k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.70k
                    invert = !invert;
2250
1.70k
                }
2251
114
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
42
                    break;
2253
42
                }
2254
1.78k
                INSTR_SET_OP0(inst, NOP);
2255
1.78k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.78k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.78k
                                              : POP_JUMP_IF_NONE;
2258
1.78k
                break;
2259
1.82k
            }
2260
182
            case TO_BOOL:
2261
182
            {
2262
182
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
182
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
182
                int is_true = PyObject_IsTrue(cnt);
2267
182
                Py_DECREF(cnt);
2268
182
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
182
                cnt = PyBool_FromLong(is_true);
2272
182
                int index = add_const(cnt, consts, const_cache);
2273
182
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
182
                INSTR_SET_OP0(inst, NOP);
2277
182
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
182
                break;
2279
182
            }
2280
80.6k
        }
2281
80.6k
    }
2282
61.0k
    return SUCCESS;
2283
61.0k
}
2284
2285
static int
2286
8.54k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
69.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
61.0k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
61.0k
    }
2290
8.54k
    return SUCCESS;
2291
8.54k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
61.0k
{
2296
61.0k
    assert(PyDict_CheckExact(const_cache));
2297
61.0k
    assert(PyList_CheckExact(consts));
2298
61.0k
    cfg_instr nop;
2299
61.0k
    INSTR_SET_OP0(&nop, NOP);
2300
536k
    for (int i = 0; i < bb->b_iused; i++) {
2301
475k
        cfg_instr *inst = &bb->b_instr[i];
2302
475k
        cfg_instr *target;
2303
475k
        int opcode = inst->i_opcode;
2304
475k
        int oparg = inst->i_oparg;
2305
475k
        if (HAS_TARGET(opcode)) {
2306
30.6k
            assert(inst->i_target->b_iused > 0);
2307
30.6k
            target = &inst->i_target->b_instr[0];
2308
30.6k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
30.6k
        }
2310
444k
        else {
2311
444k
            target = &nop;
2312
444k
        }
2313
475k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
475k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
475k
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
5.22k
            case BUILD_TUPLE:
2321
5.22k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
115
                    switch(oparg) {
2323
0
                        case 1:
2324
0
                            INSTR_SET_OP0(inst, NOP);
2325
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
0
                            continue;
2327
99
                        case 2:
2328
111
                        case 3:
2329
111
                            INSTR_SET_OP0(inst, NOP);
2330
111
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
111
                            continue;
2332
115
                    }
2333
115
                }
2334
5.11k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
5.11k
                break;
2336
1.49k
            case BUILD_LIST:
2337
1.63k
            case BUILD_SET:
2338
1.63k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
1.63k
                break;
2340
955
            case POP_JUMP_IF_NOT_NONE:
2341
1.86k
            case POP_JUMP_IF_NONE:
2342
1.86k
                switch (target->i_opcode) {
2343
83
                    case JUMP:
2344
83
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
1.86k
                }
2346
1.86k
                break;
2347
12.1k
            case POP_JUMP_IF_FALSE:
2348
12.1k
                switch (target->i_opcode) {
2349
734
                    case JUMP:
2350
734
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
12.1k
                }
2352
12.1k
                break;
2353
12.1k
            case POP_JUMP_IF_TRUE:
2354
2.36k
                switch (target->i_opcode) {
2355
71
                    case JUMP:
2356
71
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
2.36k
                }
2358
2.36k
                break;
2359
2.36k
            case JUMP_IF_FALSE:
2360
346
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
0
                    case JUMP_IF_FALSE:
2363
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
0
                        continue;
2365
3
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
3
                        assert(inst->i_target != inst->i_target->b_next);
2369
3
                        inst->i_target = inst->i_target->b_next;
2370
3
                        i--;
2371
3
                        continue;
2372
346
                }
2373
343
                break;
2374
343
            case JUMP_IF_TRUE:
2375
342
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
0
                    case JUMP_IF_TRUE:
2378
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
0
                        continue;
2380
8
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
8
                        assert(inst->i_target != inst->i_target->b_next);
2384
8
                        inst->i_target = inst->i_target->b_next;
2385
8
                        i--;
2386
8
                        continue;
2387
342
                }
2388
334
                break;
2389
4.72k
            case JUMP:
2390
7.90k
            case JUMP_NO_INTERRUPT:
2391
7.90k
                switch (target->i_opcode) {
2392
0
                    case JUMP:
2393
0
                        i -= jump_thread(bb, inst, target, JUMP);
2394
0
                        continue;
2395
0
                    case JUMP_NO_INTERRUPT:
2396
0
                        i -= jump_thread(bb, inst, target, opcode);
2397
0
                        continue;
2398
7.90k
                }
2399
7.90k
                break;
2400
7.90k
            case FOR_ITER:
2401
2.00k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
2.00k
                break;
2412
17.4k
            case STORE_FAST:
2413
17.4k
                if (opcode == nextop &&
2414
2.06k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
31
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
31
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
31
                    bb->b_instr[i].i_oparg = 0;
2418
31
                }
2419
17.4k
                break;
2420
2.31k
            case SWAP:
2421
2.31k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
2.31k
                break;
2425
22.6k
            case LOAD_GLOBAL:
2426
22.6k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
10.9k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
10.9k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
10.9k
                }
2430
22.6k
                break;
2431
7.88k
            case COMPARE_OP:
2432
7.88k
                if (nextop == TO_BOOL) {
2433
3.63k
                    INSTR_SET_OP0(inst, NOP);
2434
3.63k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
3.63k
                    continue;
2436
3.63k
                }
2437
4.25k
                break;
2438
4.25k
            case CONTAINS_OP:
2439
5.37k
            case IS_OP:
2440
5.37k
                if (nextop == TO_BOOL) {
2441
2.50k
                    INSTR_SET_OP0(inst, NOP);
2442
2.50k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
2.50k
                    continue;
2444
2.50k
                }
2445
2.87k
                if (nextop == UNARY_NOT) {
2446
0
                    INSTR_SET_OP0(inst, NOP);
2447
0
                    int inverted = oparg ^ 1;
2448
0
                    assert(inverted == 0 || inverted == 1);
2449
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
0
                    continue;
2451
0
                }
2452
2.87k
                break;
2453
6.29k
            case TO_BOOL:
2454
6.29k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
6.29k
                break;
2459
6.29k
            case UNARY_NOT:
2460
77
                if (nextop == TO_BOOL) {
2461
0
                    INSTR_SET_OP0(inst, NOP);
2462
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
0
                    continue;
2464
0
                }
2465
77
                if (nextop == UNARY_NOT) {
2466
0
                    INSTR_SET_OP0(inst, NOP);
2467
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
0
                    continue;
2469
0
                }
2470
77
                _Py_FALLTHROUGH;
2471
134
            case UNARY_INVERT:
2472
903
            case UNARY_NEGATIVE:
2473
903
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
903
                break;
2475
648
            case CALL_INTRINSIC_1:
2476
648
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
145
                    if (nextop == GET_ITER) {
2478
1
                        INSTR_SET_OP0(inst, NOP);
2479
1
                    }
2480
144
                    else {
2481
144
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
144
                    }
2483
145
                }
2484
503
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
3
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
3
                }
2487
648
                break;
2488
10.0k
            case BINARY_OP:
2489
10.0k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
10.0k
                break;
2491
475k
        }
2492
475k
    }
2493
2494
535k
    for (int i = 0; i < bb->b_iused; i++) {
2495
474k
        cfg_instr *inst = &bb->b_instr[i];
2496
474k
        if (inst->i_opcode == SWAP) {
2497
2.09k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
2.09k
            apply_static_swaps(bb, i);
2501
2.09k
        }
2502
474k
    }
2503
61.0k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
61.0k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
18.5k
{
2513
18.5k
    int removed_nops, removed_jumps;
2514
21.0k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
21.0k
        removed_nops = remove_redundant_nops(g);
2519
21.0k
        RETURN_IF_ERROR(removed_nops);
2520
21.0k
        removed_jumps = remove_redundant_jumps(g);
2521
21.0k
        RETURN_IF_ERROR(removed_jumps);
2522
21.0k
    } while(removed_nops + removed_jumps > 0);
2523
18.5k
    return SUCCESS;
2524
18.5k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
8.54k
{
2536
8.54k
    assert(PyDict_CheckExact(const_cache));
2537
8.54k
    RETURN_IF_ERROR(check_cfg(g));
2538
8.54k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
8.54k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
8.54k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
8.54k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
69.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
61.0k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
61.0k
    }
2545
8.54k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
8.54k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
8.54k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
8.54k
    assert(no_redundant_jumps(g));
2549
8.54k
    return SUCCESS;
2550
8.54k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
16.5k
{
2555
16.5k
    int32_t line1 = inst1->i_loc.lineno;
2556
16.5k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
16.5k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
5.99k
        return;
2560
5.99k
    }
2561
10.5k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
1.05k
        return;
2563
1.05k
    }
2564
9.47k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
9.47k
    INSTR_SET_OP0(inst2, NOP);
2566
9.47k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
8.54k
{
2571
69.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
494k
        for (int i = 0; i < b->b_iused; i++) {
2574
433k
            cfg_instr *inst = &b->b_instr[i];
2575
433k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
433k
            switch(inst->i_opcode) {
2577
58.4k
                case LOAD_FAST:
2578
58.4k
                    if (nextop == LOAD_FAST) {
2579
8.78k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
8.78k
                    }
2581
58.4k
                    break;
2582
15.9k
                case STORE_FAST:
2583
15.9k
                    switch (nextop) {
2584
6.03k
                        case LOAD_FAST:
2585
6.03k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
6.03k
                            break;
2587
1.68k
                        case STORE_FAST:
2588
1.68k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
1.68k
                            break;
2590
15.9k
                    }
2591
15.9k
                    break;
2592
433k
            }
2593
433k
        }
2594
61.0k
    }
2595
8.54k
    int res = remove_redundant_nops(g);
2596
8.54k
    assert(no_redundant_nops(g));
2597
8.54k
    return res;
2598
8.54k
}
2599
2600
#define NOT_LOCAL -1
2601
16.7k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
364k
{
2620
364k
    if (stack->size == stack->capacity) {
2621
8.54k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
8.54k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
8.54k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
8.54k
        stack->refs = refs;
2628
8.54k
        stack->capacity = new_cap;
2629
8.54k
    }
2630
364k
    stack->refs[stack->size] = r;
2631
364k
    stack->size++;
2632
364k
    return 0;
2633
364k
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
314k
{
2638
314k
    assert(stack->size > 0);
2639
314k
    stack->size--;
2640
314k
    ref r = stack->refs[stack->size];
2641
314k
    return r;
2642
314k
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
1.44k
{
2647
1.44k
    Py_ssize_t idx = stack->size - off;
2648
1.44k
    assert(idx >= 0 && idx < stack->size);
2649
1.44k
    ref tmp = stack->refs[idx];
2650
1.44k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
1.44k
    stack->refs[stack->size - 1] = tmp;
2652
1.44k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
67.6k
{
2657
67.6k
    assert(idx >= 0 && idx < stack->size);
2658
67.6k
    return stack->refs[idx];
2659
67.6k
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
45.4k
{
2664
45.4k
    stack->size = 0;
2665
45.4k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
8.54k
{
2670
8.54k
    if (stack->refs != NULL) {
2671
8.54k
        PyMem_Free(stack->refs);
2672
8.54k
    }
2673
8.54k
    stack->refs = NULL;
2674
8.54k
    stack->capacity = 0;
2675
8.54k
    stack->size = 0;
2676
8.54k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
17.1k
{
2690
33.3k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
16.2k
        ref r = ref_stack_at(refs, i);
2692
16.2k
        if (r.local == local) {
2693
15
            assert(r.instr >= 0);
2694
15
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
15
        }
2696
16.2k
    }
2697
17.1k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
16.7k
{
2702
16.7k
    kill_local(instr_flags, refs, local);
2703
16.7k
    if (r.instr != DUMMY_INSTR) {
2704
14.6k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
14.6k
    }
2706
16.7k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
48.1k
{
2712
48.1k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
48.1k
    if (!target->b_visited) {
2714
36.8k
        target->b_visited = 1;
2715
36.8k
        *(*sp)++ = target;
2716
36.8k
    }
2717
48.1k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
8.54k
{
2759
8.54k
    int status;
2760
8.54k
    ref_stack refs = {0};
2761
8.54k
    int max_instrs = 0;
2762
8.54k
    basicblock *entryblock = g->g_entryblock;
2763
70.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
62.0k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
62.0k
    }
2766
8.54k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
8.54k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
8.54k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
8.54k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
8.54k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
8.54k
    basicblock **sp = blocks;
2778
8.54k
    *sp = entryblock;
2779
8.54k
    sp++;
2780
8.54k
    entryblock->b_startdepth = 0;
2781
8.54k
    entryblock->b_visited = 1;
2782
2783
8.54k
    #define PUSH_REF(instr, local)                \
2784
364k
        do {                                      \
2785
364k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
364k
        } while(0)
2790
2791
53.9k
    while (sp != blocks) {
2792
45.4k
        basicblock *block = *--sp;
2793
45.4k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
45.4k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
45.4k
        ref_stack_clear(&refs);
2802
86.5k
        for (int i = 0; i < block->b_startdepth; i++) {
2803
41.1k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
41.1k
        }
2805
2806
456k
        for (int i = 0; i < block->b_iused; i++) {
2807
410k
            cfg_instr *instr = &block->b_instr[i];
2808
410k
            int opcode = instr->i_opcode;
2809
410k
            int oparg = instr->i_oparg;
2810
410k
            assert(opcode != EXTENDED_ARG);
2811
410k
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
3
                case DELETE_FAST: {
2814
3
                    kill_local(instr_flags, &refs, oparg);
2815
3
                    break;
2816
0
                }
2817
2818
50.7k
                case LOAD_FAST: {
2819
50.7k
                    PUSH_REF(i, oparg);
2820
50.7k
                    break;
2821
50.7k
                }
2822
2823
50.7k
                case LOAD_FAST_AND_CLEAR: {
2824
340
                    kill_local(instr_flags, &refs, oparg);
2825
340
                    PUSH_REF(i, oparg);
2826
340
                    break;
2827
340
                }
2828
2829
7.56k
                case LOAD_FAST_LOAD_FAST: {
2830
7.56k
                    PUSH_REF(i, oparg >> 4);
2831
7.56k
                    PUSH_REF(i, oparg & 15);
2832
7.56k
                    break;
2833
7.56k
                }
2834
2835
13.7k
                case STORE_FAST: {
2836
13.7k
                    ref r = ref_stack_pop(&refs);
2837
13.7k
                    store_local(instr_flags, &refs, oparg, r);
2838
13.7k
                    break;
2839
7.56k
                }
2840
2841
266
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
266
                    ref r = ref_stack_pop(&refs);
2844
266
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
266
                    PUSH_REF(i, oparg & 15);
2847
266
                    break;
2848
266
                }
2849
2850
1.40k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
1.40k
                    ref r = ref_stack_pop(&refs);
2853
1.40k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
1.40k
                    r = ref_stack_pop(&refs);
2856
1.40k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
1.40k
                    break;
2858
266
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
1.64k
                case COPY: {
2862
1.64k
                    assert(oparg > 0);
2863
1.64k
                    Py_ssize_t idx = refs.size - oparg;
2864
1.64k
                    ref r = ref_stack_at(&refs, idx);
2865
1.64k
                    PUSH_REF(r.instr, r.local);
2866
1.64k
                    break;
2867
1.64k
                }
2868
2869
1.64k
                case SWAP: {
2870
1.44k
                    assert(oparg >= 2);
2871
1.44k
                    ref_stack_swap_top(&refs, oparg);
2872
1.44k
                    break;
2873
1.64k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
1.70k
                case FORMAT_SIMPLE:
2881
1.70k
                case GET_ANEXT:
2882
3.58k
                case GET_ITER:
2883
3.58k
                case GET_LEN:
2884
3.62k
                case GET_YIELD_FROM_ITER:
2885
4.27k
                case IMPORT_FROM:
2886
4.27k
                case MATCH_KEYS:
2887
4.27k
                case MATCH_MAPPING:
2888
4.27k
                case MATCH_SEQUENCE:
2889
4.27k
                case WITH_EXCEPT_START: {
2890
4.27k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
4.27k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
4.27k
                    int net_pushed = num_pushed - num_popped;
2893
4.27k
                    assert(net_pushed >= 0);
2894
6.79k
                    for (int j = 0; j < net_pushed; j++) {
2895
2.52k
                        PUSH_REF(i, NOT_LOCAL);
2896
2.52k
                    }
2897
4.27k
                    break;
2898
4.27k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
4.27k
                case DICT_MERGE:
2902
816
                case DICT_UPDATE:
2903
1.74k
                case LIST_APPEND:
2904
1.99k
                case LIST_EXTEND:
2905
13.2k
                case MAP_ADD:
2906
13.2k
                case RERAISE:
2907
13.3k
                case SET_ADD:
2908
13.3k
                case SET_UPDATE: {
2909
13.3k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
13.3k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
13.3k
                    int net_popped = num_popped - num_pushed;
2912
13.3k
                    assert(net_popped > 0);
2913
37.9k
                    for (int i = 0; i < net_popped; i++) {
2914
24.6k
                        ref_stack_pop(&refs);
2915
24.6k
                    }
2916
13.3k
                    break;
2917
13.3k
                }
2918
2919
55
                case END_SEND:
2920
2.32k
                case SET_FUNCTION_ATTRIBUTE: {
2921
2.32k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
2.32k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
2.32k
                    ref tos = ref_stack_pop(&refs);
2924
2.32k
                    ref_stack_pop(&refs);
2925
2.32k
                    PUSH_REF(tos.instr, tos.local);
2926
2.32k
                    break;
2927
2.32k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
2.32k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
1.96k
                case FOR_ITER: {
2937
1.96k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
1.96k
                    PUSH_REF(i, NOT_LOCAL);
2939
1.96k
                    break;
2940
1.96k
                }
2941
2942
25.9k
                case LOAD_ATTR:
2943
26.2k
                case LOAD_SUPER_ATTR: {
2944
26.2k
                    ref self = ref_stack_pop(&refs);
2945
26.2k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
264
                        ref_stack_pop(&refs);
2947
264
                        ref_stack_pop(&refs);
2948
264
                    }
2949
26.2k
                    PUSH_REF(i, NOT_LOCAL);
2950
26.2k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
10.3k
                        PUSH_REF(self.instr, self.local);
2954
10.3k
                    }
2955
26.2k
                    break;
2956
26.2k
                }
2957
2958
26.2k
                case LOAD_SPECIAL:
2959
334
                case PUSH_EXC_INFO: {
2960
334
                    ref tos = ref_stack_pop(&refs);
2961
334
                    PUSH_REF(i, NOT_LOCAL);
2962
334
                    PUSH_REF(tos.instr, tos.local);
2963
334
                    break;
2964
334
                }
2965
2966
334
                case SEND: {
2967
55
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
55
                    ref_stack_pop(&refs);
2969
55
                    PUSH_REF(i, NOT_LOCAL);
2970
55
                    break;
2971
55
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
285k
                default: {
2975
285k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
285k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
285k
                    if (HAS_TARGET(instr->i_opcode)) {
2978
20.8k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
20.8k
                    }
2980
285k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
526k
                        for (int j = 0; j < num_popped; j++) {
2984
241k
                            ref_stack_pop(&refs);
2985
241k
                        }
2986
496k
                        for (int j = 0; j < num_pushed; j++) {
2987
210k
                            PUSH_REF(i, NOT_LOCAL);
2988
210k
                        }
2989
285k
                    }
2990
285k
                    break;
2991
285k
                }
2992
410k
            }
2993
410k
        }
2994
2995
        // Push fallthrough block
2996
45.4k
        if (BB_HAS_FALLTHROUGH(block)) {
2997
25.2k
            assert(block->b_next != NULL);
2998
25.2k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
25.2k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
95.1k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
49.7k
            ref r = ref_stack_at(&refs, i);
3005
49.7k
            if (r.instr != -1) {
3006
20.8k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
20.8k
            }
3008
49.7k
        }
3009
3010
        // Optimize instructions
3011
456k
        for (int i = 0; i < block->b_iused; i++) {
3012
410k
            if (!instr_flags[i]) {
3013
377k
                cfg_instr *instr = &block->b_instr[i];
3014
377k
                switch (instr->i_opcode) {
3015
49.0k
                    case LOAD_FAST:
3016
49.0k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
49.0k
                        break;
3018
7.48k
                    case LOAD_FAST_LOAD_FAST:
3019
7.48k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
7.48k
                        break;
3021
320k
                    default:
3022
320k
                        break;
3023
377k
                }
3024
377k
            }
3025
410k
        }
3026
45.4k
    }
3027
3028
8.54k
    #undef PUSH_REF
3029
3030
8.54k
    status = SUCCESS;
3031
3032
8.54k
done:
3033
8.54k
    ref_stack_fini(&refs);
3034
8.54k
    PyMem_Free(instr_flags);
3035
8.54k
    PyMem_Free(blocks);
3036
8.54k
    return status;
3037
8.54k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
208k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
208k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
208k
    if (b->b_unsafe_locals_mask != both) {
3048
34.8k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
34.8k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
34.6k
            *(*sp)++ = b;
3053
34.6k
            b->b_visited = 1;
3054
34.6k
        }
3055
34.8k
    }
3056
208k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
91.6k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
91.6k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
640k
    for (int i = 0; i < b->b_iused; i++) {
3064
549k
        cfg_instr *instr = &b->b_instr[i];
3065
549k
        assert(instr->i_opcode != EXTENDED_ARG);
3066
549k
        if (instr->i_except != NULL) {
3067
108k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
108k
        }
3069
549k
        if (instr->i_oparg >= 64) {
3070
39.0k
            continue;
3071
39.0k
        }
3072
549k
        assert(instr->i_oparg >= 0);
3073
510k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
510k
        switch (instr->i_opcode) {
3075
516
            case DELETE_FAST:
3076
1.20k
            case LOAD_FAST_AND_CLEAR:
3077
2.58k
            case STORE_FAST_MAYBE_NULL:
3078
2.58k
                unsafe_mask |= bit;
3079
2.58k
                break;
3080
33.7k
            case STORE_FAST:
3081
33.7k
                unsafe_mask &= ~bit;
3082
33.7k
                break;
3083
151
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
151
                unsafe_mask &= ~bit;
3086
151
                break;
3087
114k
            case LOAD_FAST:
3088
114k
                if (unsafe_mask & bit) {
3089
151
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
151
                }
3091
114k
                unsafe_mask &= ~bit;
3092
114k
                break;
3093
510k
        }
3094
510k
    }
3095
91.6k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
48.9k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
48.9k
    }
3098
91.6k
    cfg_instr *last = basicblock_last_instr(b);
3099
91.6k
    if (last && is_jump(last)) {
3100
44.9k
        assert(last->i_target != NULL);
3101
44.9k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
44.9k
    }
3103
91.6k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
0
{
3108
0
    assert(nlocals > 64);
3109
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
0
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
0
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
0
        blocknum++;
3121
0
        for (int i = 0; i < b->b_iused; i++) {
3122
0
            cfg_instr *instr = &b->b_instr[i];
3123
0
            assert(instr->i_opcode != EXTENDED_ARG);
3124
0
            int arg = instr->i_oparg;
3125
0
            if (arg < 64) {
3126
0
                continue;
3127
0
            }
3128
0
            assert(arg >= 0);
3129
0
            switch (instr->i_opcode) {
3130
0
                case DELETE_FAST:
3131
0
                case LOAD_FAST_AND_CLEAR:
3132
0
                case STORE_FAST_MAYBE_NULL:
3133
0
                    states[arg - 64] = blocknum - 1;
3134
0
                    break;
3135
0
                case STORE_FAST:
3136
0
                    states[arg - 64] = blocknum;
3137
0
                    break;
3138
0
                case LOAD_FAST:
3139
0
                    if (states[arg - 64] != blocknum) {
3140
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
0
                    }
3142
0
                    states[arg - 64] = blocknum;
3143
0
                    break;
3144
0
                    Py_UNREACHABLE();
3145
0
            }
3146
0
        }
3147
0
    }
3148
0
    PyMem_Free(states);
3149
0
    return SUCCESS;
3150
0
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
8.54k
{
3155
8.54k
    assert(PyList_CheckExact(consts));
3156
8.54k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
8.54k
    if (nconsts == 0) {
3158
0
        return SUCCESS;  /* nothing to do */
3159
0
    }
3160
3161
8.54k
    Py_ssize_t *index_map = NULL;
3162
8.54k
    Py_ssize_t *reverse_index_map = NULL;
3163
8.54k
    int err = ERROR;
3164
3165
8.54k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
8.54k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
62.4k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
53.9k
        index_map[i] = -1;
3171
53.9k
    }
3172
    // The first constant may be docstring; keep it always.
3173
8.54k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
69.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
494k
        for (int i = 0; i < b->b_iused; i++) {
3178
433k
            int opcode = b->b_instr[i].i_opcode;
3179
433k
            if (OPCODE_HAS_CONST(opcode)) {
3180
55.1k
                int index = b->b_instr[i].i_oparg;
3181
55.1k
                index_map[index] = index;
3182
55.1k
            }
3183
433k
        }
3184
61.0k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
8.54k
    Py_ssize_t n_used_consts = 0;
3188
71.0k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
62.4k
        if (index_map[i] != -1) {
3190
47.4k
            assert(index_map[i] == i);
3191
47.4k
            index_map[n_used_consts++] = index_map[i];
3192
47.4k
        }
3193
62.4k
    }
3194
8.54k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
3.58k
        err = SUCCESS;
3197
3.58k
        goto end;
3198
3.58k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
8.54k
    assert(n_used_consts < nconsts);
3202
42.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
37.0k
        Py_ssize_t old_index = index_map[i];
3204
37.0k
        assert(i <= old_index && old_index < nconsts);
3205
37.0k
        if (i != old_index) {
3206
26.5k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
26.5k
            assert(value != NULL);
3208
26.5k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
26.5k
        }
3210
37.0k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
4.96k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
4.96k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
4.96k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
57.0k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
52.0k
        reverse_index_map[i] = -1;
3223
52.0k
    }
3224
42.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
37.0k
        assert(index_map[i] != -1);
3226
37.0k
        assert(reverse_index_map[index_map[i]] == -1);
3227
37.0k
        reverse_index_map[index_map[i]] = i;
3228
37.0k
    }
3229
3230
48.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
377k
        for (int i = 0; i < b->b_iused; i++) {
3232
334k
            int opcode = b->b_instr[i].i_opcode;
3233
334k
            if (OPCODE_HAS_CONST(opcode)) {
3234
44.0k
                int index = b->b_instr[i].i_oparg;
3235
44.0k
                assert(reverse_index_map[index] >= 0);
3236
44.0k
                assert(reverse_index_map[index] < n_used_consts);
3237
44.0k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
44.0k
            }
3239
334k
        }
3240
43.6k
    }
3241
3242
4.96k
    err = SUCCESS;
3243
8.54k
end:
3244
8.54k
    PyMem_Free(index_map);
3245
8.54k
    PyMem_Free(reverse_index_map);
3246
8.54k
    return err;
3247
4.96k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
8.54k
{
3256
8.54k
    if (nlocals == 0) {
3257
1.91k
        return SUCCESS;
3258
1.91k
    }
3259
6.63k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
0
        nlocals = 64;
3267
0
    }
3268
6.63k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
6.63k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
6.63k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
6.63k
    uint64_t start_mask = 0;
3277
16.8k
    for (int i = nparams; i < nlocals; i++) {
3278
10.2k
        start_mask |= (uint64_t)1 << i;
3279
10.2k
    }
3280
6.63k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
63.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
56.9k
        scan_block_for_locals(b, &sp);
3287
56.9k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
41.3k
    while (sp > stack) {
3291
34.6k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
34.6k
        b->b_visited = 0;
3294
34.6k
        scan_block_for_locals(b, &sp);
3295
34.6k
    }
3296
6.63k
    PyMem_Free(stack);
3297
6.63k
    return SUCCESS;
3298
6.63k
}
3299
3300
3301
static int
3302
7.53k
mark_warm(basicblock *entryblock) {
3303
7.53k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
7.53k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
7.53k
    basicblock **sp = stack;
3308
3309
7.53k
    *sp++ = entryblock;
3310
7.53k
    entryblock->b_visited = 1;
3311
50.6k
    while (sp > stack) {
3312
43.1k
        basicblock *b = *(--sp);
3313
43.1k
        assert(!b->b_except_handler);
3314
43.1k
        b->b_warm = 1;
3315
43.1k
        basicblock *next = b->b_next;
3316
43.1k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
20.7k
            *sp++ = next;
3318
20.7k
            next->b_visited = 1;
3319
20.7k
        }
3320
378k
        for (int i=0; i < b->b_iused; i++) {
3321
335k
            cfg_instr *instr = &b->b_instr[i];
3322
335k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
14.7k
                *sp++ = instr->i_target;
3324
14.7k
                instr->i_target->b_visited = 1;
3325
14.7k
            }
3326
335k
        }
3327
43.1k
    }
3328
7.53k
    PyMem_Free(stack);
3329
7.53k
    return SUCCESS;
3330
7.53k
}
3331
3332
static int
3333
7.53k
mark_cold(basicblock *entryblock) {
3334
67.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
60.0k
        assert(!b->b_cold && !b->b_warm);
3336
60.0k
    }
3337
7.53k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
7.53k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
7.53k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
7.53k
    basicblock **sp = stack;
3347
67.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
60.0k
        if (b->b_except_handler) {
3349
3.63k
            assert(!b->b_warm);
3350
3.63k
            *sp++ = b;
3351
3.63k
            b->b_visited = 1;
3352
3.63k
        }
3353
60.0k
    }
3354
3355
14.7k
    while (sp > stack) {
3356
7.24k
        basicblock *b = *(--sp);
3357
7.24k
        b->b_cold = 1;
3358
7.24k
        basicblock *next = b->b_next;
3359
7.24k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
2.00k
            if (!next->b_warm && !next->b_visited) {
3361
1.86k
                *sp++ = next;
3362
1.86k
                next->b_visited = 1;
3363
1.86k
            }
3364
2.00k
        }
3365
38.7k
        for (int i = 0; i < b->b_iused; i++) {
3366
31.4k
            cfg_instr *instr = &b->b_instr[i];
3367
31.4k
            if (is_jump(instr)) {
3368
2.49k
                assert(i == b->b_iused - 1);
3369
2.49k
                basicblock *target = b->b_instr[i].i_target;
3370
2.49k
                if (!target->b_warm && !target->b_visited) {
3371
1.75k
                    *sp++ = target;
3372
1.75k
                    target->b_visited = 1;
3373
1.75k
                }
3374
2.49k
            }
3375
31.4k
        }
3376
7.24k
    }
3377
7.53k
    PyMem_Free(stack);
3378
7.53k
    return SUCCESS;
3379
7.53k
}
3380
3381
3382
static int
3383
8.54k
push_cold_blocks_to_end(cfg_builder *g) {
3384
8.54k
    basicblock *entryblock = g->g_entryblock;
3385
8.54k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
1.00k
        return SUCCESS;
3388
1.00k
    }
3389
7.53k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
7.53k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
67.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
60.0k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
55
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
55
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
55
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
55
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
55
                             NO_LOCATION);
3406
55
            explicit_jump->b_cold = 1;
3407
55
            explicit_jump->b_next = b->b_next;
3408
55
            explicit_jump->b_predecessors = 1;
3409
55
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
55
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
55
            last->i_target = explicit_jump->b_next;
3414
55
        }
3415
60.0k
    }
3416
3417
7.53k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
7.53k
    basicblock *cold_blocks = NULL;
3419
7.53k
    basicblock *cold_blocks_tail = NULL;
3420
3421
7.53k
    basicblock *b = entryblock;
3422
10.2k
    while(b->b_next) {
3423
10.2k
        assert(!b->b_cold);
3424
55.5k
        while (b->b_next && !b->b_next->b_cold) {
3425
45.2k
            b = b->b_next;
3426
45.2k
        }
3427
10.2k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
7.53k
            break;
3430
7.53k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
10.2k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
2.75k
        basicblock *b_end = b->b_next;
3436
7.30k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
4.54k
            b_end = b_end->b_next;
3438
4.54k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
2.75k
        assert(b_end && b_end->b_cold);
3442
2.75k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
2.75k
        if (cold_blocks == NULL) {
3445
1.44k
            cold_blocks = b->b_next;
3446
1.44k
        }
3447
1.31k
        else {
3448
1.31k
            cold_blocks_tail->b_next = b->b_next;
3449
1.31k
        }
3450
2.75k
        cold_blocks_tail = b_end;
3451
2.75k
        b->b_next = b_end->b_next;
3452
2.75k
        b_end->b_next = NULL;
3453
2.75k
    }
3454
7.53k
    assert(b != NULL && b->b_next == NULL);
3455
7.53k
    b->b_next = cold_blocks;
3456
3457
7.53k
    if (cold_blocks != NULL) {
3458
1.44k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
1.44k
    }
3460
7.53k
    return SUCCESS;
3461
7.53k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
8.54k
{
3466
8.54k
    basicblock *entryblock = g->g_entryblock;
3467
69.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
484k
        for (int i = 0; i < b->b_iused; i++) {
3469
423k
            cfg_instr *instr = &b->b_instr[i];
3470
423k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
677
                assert(i == b->b_iused - 1);
3472
677
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
343
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
677
                location loc = instr->i_loc;
3475
677
                basicblock *except = instr->i_except;
3476
677
                cfg_instr copy = {
3477
677
                            .i_opcode = COPY,
3478
677
                            .i_oparg = 1,
3479
677
                            .i_loc = loc,
3480
677
                            .i_target = NULL,
3481
677
                            .i_except = except,
3482
677
                };
3483
677
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3484
677
                cfg_instr to_bool = {
3485
677
                            .i_opcode = TO_BOOL,
3486
677
                            .i_oparg = 0,
3487
677
                            .i_loc = loc,
3488
677
                            .i_target = NULL,
3489
677
                            .i_except = except,
3490
677
                };
3491
677
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3492
677
            }
3493
423k
        }
3494
61.0k
    }
3495
8.54k
    return SUCCESS;
3496
8.54k
}
3497
3498
static int
3499
convert_pseudo_ops(cfg_builder *g)
3500
8.54k
{
3501
8.54k
    basicblock *entryblock = g->g_entryblock;
3502
69.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3503
489k
        for (int i = 0; i < b->b_iused; i++) {
3504
428k
            cfg_instr *instr = &b->b_instr[i];
3505
428k
            if (is_block_push(instr)) {
3506
3.63k
                INSTR_SET_OP0(instr, NOP);
3507
3.63k
            }
3508
424k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3509
1.98k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3510
1.98k
                instr->i_opcode = LOAD_FAST;
3511
1.98k
            }
3512
422k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3513
690
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3514
690
                instr->i_opcode = STORE_FAST;
3515
690
            }
3516
428k
        }
3517
61.0k
    }
3518
8.54k
    return remove_redundant_nops_and_jumps(g);
3519
8.54k
}
3520
3521
static inline bool
3522
103k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3523
103k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3524
58.0k
        return basicblock_has_no_lineno(b);
3525
58.0k
    }
3526
45.5k
    else {
3527
45.5k
        return false;
3528
45.5k
    }
3529
103k
}
3530
3531
3532
/* PEP 626 mandates that the f_lineno of a frame is correct
3533
 * after a frame terminates. It would be prohibitively expensive
3534
 * to continuously update the f_lineno field at runtime,
3535
 * so we make sure that all exiting instruction (raises and returns)
3536
 * have a valid line number, allowing us to compute f_lineno lazily.
3537
 * We can do this by duplicating the exit blocks without line number
3538
 * so that none have more than one predecessor. We can then safely
3539
 * copy the line number from the sole predecessor block.
3540
 */
3541
static int
3542
duplicate_exits_without_lineno(cfg_builder *g)
3543
17.0k
{
3544
17.0k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3545
3546
    /* Copy all exit blocks without line number that are targets of a jump.
3547
     */
3548
17.0k
    basicblock *entryblock = g->g_entryblock;
3549
139k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3550
122k
        cfg_instr *last = basicblock_last_instr(b);
3551
122k
        if (last == NULL) {
3552
18.5k
            continue;
3553
18.5k
        }
3554
103k
        if (is_jump(last)) {
3555
50.8k
            basicblock *target = next_nonempty_block(last->i_target);
3556
50.8k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3557
813
                basicblock *new_target = copy_basicblock(g, target);
3558
813
                if (new_target == NULL) {
3559
0
                    return ERROR;
3560
0
                }
3561
813
                new_target->b_instr[0].i_loc = last->i_loc;
3562
813
                last->i_target = new_target;
3563
813
                target->b_predecessors--;
3564
813
                new_target->b_predecessors = 1;
3565
813
                new_target->b_next = target->b_next;
3566
813
                new_target->b_label.id = next_lbl++;
3567
813
                target->b_next = new_target;
3568
813
            }
3569
50.8k
        }
3570
103k
    }
3571
3572
    /* Any remaining reachable exit blocks without line number can only be reached by
3573
     * fall through, and thus can only have a single predecessor */
3574
139k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3575
122k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3576
52.7k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3577
1.10k
                cfg_instr *last = basicblock_last_instr(b);
3578
1.10k
                assert(last != NULL);
3579
1.10k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3580
1.10k
            }
3581
52.7k
        }
3582
122k
    }
3583
17.0k
    return SUCCESS;
3584
17.0k
}
3585
3586
3587
/* If an instruction has no line number, but it's predecessor in the BB does,
3588
 * then copy the line number. If a successor block has no line number, and only
3589
 * one predecessor, then inherit the line number.
3590
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3591
 * Also reduces the size of the line number table,
3592
 * but has no impact on the generated line number events.
3593
 */
3594
3595
static inline void
3596
maybe_propagate_location(basicblock *b, int i, location loc)
3597
959k
{
3598
959k
    assert(b->b_iused > i);
3599
959k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3600
57.0k
         b->b_instr[i].i_loc = loc;
3601
57.0k
    }
3602
959k
}
3603
3604
static void
3605
propagate_line_numbers(basicblock *entryblock)
3606
17.0k
{
3607
139k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3608
122k
        cfg_instr *last = basicblock_last_instr(b);
3609
122k
        if (last == NULL) {
3610
18.5k
            continue;
3611
18.5k
        }
3612
3613
103k
        location prev_location = NO_LOCATION;
3614
1.00M
        for (int i = 0; i < b->b_iused; i++) {
3615
897k
            maybe_propagate_location(b, i, prev_location);
3616
897k
            prev_location = b->b_instr[i].i_loc;
3617
897k
        }
3618
103k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3619
38.2k
            if (b->b_next->b_iused > 0) {
3620
38.2k
                maybe_propagate_location(b->b_next, 0, prev_location);
3621
38.2k
            }
3622
38.2k
        }
3623
103k
        if (is_jump(last)) {
3624
50.8k
            basicblock *target = last->i_target;
3625
50.8k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3626
3
                target = target->b_next;
3627
3
            }
3628
50.8k
            if (target->b_predecessors == 1) {
3629
23.7k
                maybe_propagate_location(target, 0, prev_location);
3630
23.7k
            }
3631
50.8k
        }
3632
103k
    }
3633
17.0k
}
3634
3635
static int
3636
resolve_line_numbers(cfg_builder *g, int firstlineno)
3637
17.0k
{
3638
17.0k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3639
17.0k
    propagate_line_numbers(g->g_entryblock);
3640
17.0k
    return SUCCESS;
3641
17.0k
}
3642
3643
int
3644
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3645
                        int nlocals, int nparams, int firstlineno)
3646
8.54k
{
3647
8.54k
    assert(cfg_builder_check(g));
3648
    /** Preprocessing **/
3649
    /* Map labels to targets and mark exception handlers */
3650
8.54k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3651
8.54k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3652
8.54k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3653
3654
    /** Optimization **/
3655
8.54k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3656
8.54k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3657
8.54k
    RETURN_IF_ERROR(
3658
8.54k
        add_checks_for_loads_of_uninitialized_variables(
3659
8.54k
            g->g_entryblock, nlocals, nparams));
3660
8.54k
    RETURN_IF_ERROR(insert_superinstructions(g));
3661
3662
8.54k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3663
8.54k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3664
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3665
    // assert(all_exits_have_lineno(g->g_entryblock));
3666
8.54k
    return SUCCESS;
3667
8.54k
}
3668
3669
static int *
3670
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3671
8.54k
{
3672
8.54k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3673
8.54k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3674
8.54k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3675
3676
8.54k
    int noffsets = ncellvars + nfreevars;
3677
8.54k
    int *fixed = PyMem_New(int, noffsets);
3678
8.54k
    if (fixed == NULL) {
3679
0
        PyErr_NoMemory();
3680
0
        return NULL;
3681
0
    }
3682
11.0k
    for (int i = 0; i < noffsets; i++) {
3683
2.52k
        fixed[i] = nlocals + i;
3684
2.52k
    }
3685
3686
8.54k
    PyObject *varname, *cellindex;
3687
8.54k
    Py_ssize_t pos = 0;
3688
9.98k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3689
1.44k
        PyObject *varindex;
3690
1.44k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3691
0
            goto error;
3692
0
        }
3693
1.44k
        if (varindex == NULL) {
3694
1.13k
            continue;
3695
1.13k
        }
3696
3697
315
        int argoffset = PyLong_AsInt(varindex);
3698
315
        Py_DECREF(varindex);
3699
315
        if (argoffset == -1 && PyErr_Occurred()) {
3700
0
            goto error;
3701
0
        }
3702
3703
315
        int oldindex = PyLong_AsInt(cellindex);
3704
315
        if (oldindex == -1 && PyErr_Occurred()) {
3705
0
            goto error;
3706
0
        }
3707
315
        fixed[oldindex] = argoffset;
3708
315
    }
3709
8.54k
    return fixed;
3710
3711
0
error:
3712
0
    PyMem_Free(fixed);
3713
0
    return NULL;
3714
8.54k
}
3715
3716
#define IS_GENERATOR(CF) \
3717
8.54k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3718
3719
static int
3720
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3721
                           int *fixed, int nfreevars, int code_flags)
3722
8.54k
{
3723
8.54k
    assert(umd->u_firstlineno > 0);
3724
3725
    /* Add the generator prefix instructions. */
3726
8.54k
    if (IS_GENERATOR(code_flags)) {
3727
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3728
         * of 0. This is because RETURN_GENERATOR pushes an element
3729
         * with _PyFrame_StackPush before switching stacks.
3730
         */
3731
3732
458
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3733
458
        cfg_instr make_gen = {
3734
458
            .i_opcode = RETURN_GENERATOR,
3735
458
            .i_oparg = 0,
3736
458
            .i_loc = loc,
3737
458
            .i_target = NULL,
3738
458
            .i_except = NULL,
3739
458
        };
3740
458
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3741
458
        cfg_instr pop_top = {
3742
458
            .i_opcode = POP_TOP,
3743
458
            .i_oparg = 0,
3744
458
            .i_loc = loc,
3745
458
            .i_target = NULL,
3746
458
            .i_except = NULL,
3747
458
        };
3748
458
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3749
458
    }
3750
3751
    /* Set up cells for any variable that escapes, to be put in a closure. */
3752
8.54k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3753
8.54k
    if (ncellvars) {
3754
        // umd->u_cellvars has the cells out of order so we sort them
3755
        // before adding the MAKE_CELL instructions.  Note that we
3756
        // adjust for arg cells, which come first.
3757
1.02k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3758
1.02k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3759
1.02k
        if (sorted == NULL) {
3760
0
            PyErr_NoMemory();
3761
0
            return ERROR;
3762
0
        }
3763
2.46k
        for (int i = 0; i < ncellvars; i++) {
3764
1.44k
            sorted[fixed[i]] = i + 1;
3765
1.44k
        }
3766
3.22k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3767
2.20k
            int oldindex = sorted[i] - 1;
3768
2.20k
            if (oldindex == -1) {
3769
754
                continue;
3770
754
            }
3771
1.44k
            cfg_instr make_cell = {
3772
1.44k
                .i_opcode = MAKE_CELL,
3773
                // This will get fixed in offset_derefs().
3774
1.44k
                .i_oparg = oldindex,
3775
1.44k
                .i_loc = NO_LOCATION,
3776
1.44k
                .i_target = NULL,
3777
1.44k
                .i_except = NULL,
3778
1.44k
            };
3779
1.44k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3780
0
                PyMem_RawFree(sorted);
3781
0
                return ERROR;
3782
0
            }
3783
1.44k
            ncellsused += 1;
3784
1.44k
        }
3785
1.02k
        PyMem_RawFree(sorted);
3786
1.02k
    }
3787
3788
8.54k
    if (nfreevars) {
3789
682
        cfg_instr copy_frees = {
3790
682
            .i_opcode = COPY_FREE_VARS,
3791
682
            .i_oparg = nfreevars,
3792
682
            .i_loc = NO_LOCATION,
3793
682
            .i_target = NULL,
3794
682
            .i_except = NULL,
3795
682
        };
3796
682
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3797
682
    }
3798
3799
8.54k
    return SUCCESS;
3800
8.54k
}
3801
3802
static int
3803
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3804
8.54k
{
3805
8.54k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3806
8.54k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3807
8.54k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3808
8.54k
    int noffsets = ncellvars + nfreevars;
3809
3810
    // First deal with duplicates (arg cells).
3811
8.54k
    int numdropped = 0;
3812
11.0k
    for (int i = 0; i < noffsets ; i++) {
3813
2.52k
        if (fixedmap[i] == i + nlocals) {
3814
2.21k
            fixedmap[i] -= numdropped;
3815
2.21k
        }
3816
315
        else {
3817
            // It was a duplicate (cell/arg).
3818
315
            numdropped += 1;
3819
315
        }
3820
2.52k
    }
3821
3822
    // Then update offsets, either relative to locals or by cell2arg.
3823
69.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3824
489k
        for (int i = 0; i < b->b_iused; i++) {
3825
428k
            cfg_instr *inst = &b->b_instr[i];
3826
            // This is called before extended args are generated.
3827
428k
            assert(inst->i_opcode != EXTENDED_ARG);
3828
428k
            int oldoffset = inst->i_oparg;
3829
428k
            switch(inst->i_opcode) {
3830
1.44k
                case MAKE_CELL:
3831
3.43k
                case LOAD_CLOSURE:
3832
5.97k
                case LOAD_DEREF:
3833
7.16k
                case STORE_DEREF:
3834
7.17k
                case DELETE_DEREF:
3835
7.17k
                case LOAD_FROM_DICT_OR_DEREF:
3836
7.17k
                    assert(oldoffset >= 0);
3837
7.17k
                    assert(oldoffset < noffsets);
3838
7.17k
                    assert(fixedmap[oldoffset] >= 0);
3839
7.17k
                    inst->i_oparg = fixedmap[oldoffset];
3840
428k
            }
3841
428k
        }
3842
61.0k
    }
3843
3844
8.54k
    return numdropped;
3845
8.54k
}
3846
3847
static int
3848
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3849
8.54k
{
3850
8.54k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3851
8.54k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3852
8.54k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3853
8.54k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3854
8.54k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3855
8.54k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3856
8.54k
    assert(INT_MAX - nlocals - ncellvars > 0);
3857
8.54k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3858
8.54k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3859
8.54k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3860
8.54k
    if (cellfixedoffsets == NULL) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
    // This must be called before fix_cell_offsets().
3865
8.54k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3866
0
        PyMem_Free(cellfixedoffsets);
3867
0
        return ERROR;
3868
0
    }
3869
3870
8.54k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3871
8.54k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3872
8.54k
    cellfixedoffsets = NULL;
3873
8.54k
    if (numdropped < 0) {
3874
0
        return ERROR;
3875
0
    }
3876
3877
8.54k
    nlocalsplus -= numdropped;
3878
8.54k
    return nlocalsplus;
3879
8.54k
}
3880
3881
cfg_builder *
3882
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3883
8.54k
{
3884
8.54k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3885
0
        return NULL;
3886
0
    }
3887
8.54k
    cfg_builder *g = _PyCfgBuilder_New();
3888
8.54k
    if (g == NULL) {
3889
0
        return NULL;
3890
0
    }
3891
495k
    for (int i = 0; i < seq->s_used; i++) {
3892
486k
        seq->s_instrs[i].i_target = 0;
3893
486k
    }
3894
495k
    for (int i = 0; i < seq->s_used; i++) {
3895
486k
        _PyInstruction *instr = &seq->s_instrs[i];
3896
486k
        if (HAS_TARGET(instr->i_opcode)) {
3897
32.3k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3898
32.3k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3899
32.3k
        }
3900
486k
    }
3901
8.54k
    int offset = 0;
3902
495k
    for (int i = 0; i < seq->s_used; i++) {
3903
486k
        _PyInstruction *instr = &seq->s_instrs[i];
3904
486k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3905
446
            if (seq->s_annotations_code != NULL) {
3906
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
3907
1
                    && seq->s_annotations_code->s_nested == NULL);
3908
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3909
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3910
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3911
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3912
0
                        goto error;
3913
0
                    }
3914
3
                }
3915
1
                offset += seq->s_annotations_code->s_used - 1;
3916
1
            }
3917
445
            else {
3918
445
                offset -= 1;
3919
445
            }
3920
446
            continue;
3921
446
        }
3922
486k
        if (instr->i_target) {
3923
26.0k
            jump_target_label lbl_ = {i + offset};
3924
26.0k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3925
0
                goto error;
3926
0
            }
3927
26.0k
        }
3928
486k
        int opcode = instr->i_opcode;
3929
486k
        int oparg = instr->i_oparg;
3930
486k
        if (HAS_TARGET(opcode)) {
3931
32.3k
            oparg += offset;
3932
32.3k
        }
3933
486k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3934
0
            goto error;
3935
0
        }
3936
486k
    }
3937
8.54k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3938
0
        goto error;
3939
0
    }
3940
8.54k
    return g;
3941
0
error:
3942
0
    _PyCfgBuilder_Free(g);
3943
0
    return NULL;
3944
8.54k
}
3945
3946
int
3947
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3948
8.54k
{
3949
8.54k
    int lbl = 0;
3950
70.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3951
62.0k
        b->b_label = (jump_target_label){lbl};
3952
62.0k
        lbl += 1;
3953
62.0k
    }
3954
70.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3955
62.0k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3956
504k
        for (int i = 0; i < b->b_iused; i++) {
3957
442k
            cfg_instr *instr = &b->b_instr[i];
3958
442k
            if (HAS_TARGET(instr->i_opcode)) {
3959
                /* Set oparg to the label id (it will later be mapped to an offset) */
3960
25.4k
                instr->i_oparg = instr->i_target->b_label.id;
3961
25.4k
            }
3962
442k
            RETURN_IF_ERROR(
3963
442k
                _PyInstructionSequence_Addop(
3964
442k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3965
3966
442k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3967
442k
            if (instr->i_except != NULL) {
3968
58.3k
                hi->h_label = instr->i_except->b_label.id;
3969
58.3k
                hi->h_startdepth = instr->i_except->b_startdepth;
3970
58.3k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3971
58.3k
            }
3972
384k
            else {
3973
384k
                hi->h_label = -1;
3974
384k
            }
3975
442k
        }
3976
62.0k
    }
3977
8.54k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3978
0
        return ERROR;
3979
0
    }
3980
8.54k
    return SUCCESS;
3981
8.54k
}
3982
3983
3984
int
3985
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3986
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3987
                                     int *stackdepth, int *nlocalsplus,
3988
                                     _PyInstructionSequence *seq)
3989
8.54k
{
3990
8.54k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3991
3992
8.54k
    *stackdepth = calculate_stackdepth(g);
3993
8.54k
    if (*stackdepth < 0) {
3994
0
        return ERROR;
3995
0
    }
3996
3997
    /* prepare_localsplus adds instructions for generators that push
3998
     * and pop an item on the stack. This assertion makes sure there
3999
     * is space on the stack for that.
4000
     * It should always be true, because a generator must have at
4001
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
4002
     * which requires stackspace.
4003
     */
4004
8.54k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4005
4006
8.54k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4007
8.54k
    if (*nlocalsplus < 0) {
4008
0
        return ERROR;
4009
0
    }
4010
4011
8.54k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4012
4013
    /* Order of basic blocks must have been determined by now */
4014
4015
8.54k
    RETURN_IF_ERROR(normalize_jumps(g));
4016
8.54k
    assert(no_redundant_jumps(g));
4017
4018
    /* Can't modify the bytecode after inserting instructions that produce
4019
     * borrowed references.
4020
     */
4021
8.54k
    RETURN_IF_ERROR(optimize_load_fast(g));
4022
4023
    /* Can't modify the bytecode after computing jump offsets. */
4024
8.54k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4025
0
        return ERROR;
4026
0
    }
4027
4028
8.54k
    return SUCCESS;
4029
8.54k
}
4030
4031
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4032
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4033
 */
4034
int
4035
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4036
0
{
4037
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4038
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4039
0
    return SUCCESS;
4040
0
}
4041
4042
/* Exported API functions */
4043
4044
int
4045
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4046
0
{
4047
0
    stack_effects effs;
4048
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4049
0
        return PY_INVALID_STACK_EFFECT;
4050
0
    }
4051
0
    return effs.net;
4052
0
}
4053
4054
int
4055
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4056
182
{
4057
182
    stack_effects effs;
4058
182
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4059
0
        return PY_INVALID_STACK_EFFECT;
4060
0
    }
4061
182
    return effs.net;
4062
182
}
4063
4064
/* Access to compiler optimizations for unit tests.
4065
4066
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4067
 * a CFG, optimizes it and converts back to an instruction list.
4068
 */
4069
4070
static PyObject *
4071
cfg_to_instruction_sequence(cfg_builder *g)
4072
0
{
4073
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4074
0
    if (seq == NULL) {
4075
0
        return NULL;
4076
0
    }
4077
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4078
0
        PyInstructionSequence_Fini(seq);
4079
0
        return NULL;
4080
0
    }
4081
0
    return (PyObject*)seq;
4082
0
}
4083
4084
PyObject *
4085
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4086
0
{
4087
0
    if (!_PyInstructionSequence_Check(seq)) {
4088
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4089
0
        return NULL;
4090
0
    }
4091
0
    PyObject *const_cache = PyDict_New();
4092
0
    if (const_cache == NULL) {
4093
0
        return NULL;
4094
0
    }
4095
4096
0
    PyObject *res = NULL;
4097
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4098
0
    if (g == NULL) {
4099
0
        goto error;
4100
0
    }
4101
0
    int nparams = 0, firstlineno = 1;
4102
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4103
0
                                nparams, firstlineno) < 0) {
4104
0
        goto error;
4105
0
    }
4106
4107
0
    if (calculate_stackdepth(g) == ERROR) {
4108
0
        goto error;
4109
0
    }
4110
4111
0
    if (optimize_load_fast(g) != SUCCESS) {
4112
0
        goto error;
4113
0
    }
4114
4115
0
    res = cfg_to_instruction_sequence(g);
4116
0
error:
4117
0
    Py_DECREF(const_cache);
4118
0
    _PyCfgBuilder_Free(g);
4119
0
    return res;
4120
0
}