Coverage Report

Created: 2025-07-18 06:09

/src/cpython/Python/flowgraph.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
991k
#define SUCCESS 0
19
5.76k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
1.54M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
331k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
339k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
339k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
295
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
1.44M
{
101
1.44M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
1.44M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
1.44M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
1.31M
{
108
1.31M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
1.31M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
35.7k
    do { \
114
35.7k
        assert(OPCODE_HAS_ARG(OP)); \
115
35.7k
        cfg_instr *_instr__ptr_ = (I); \
116
35.7k
        _instr__ptr_->i_opcode = (OP); \
117
35.7k
        _instr__ptr_->i_oparg = (ARG); \
118
35.7k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
68.8k
    do { \
123
68.8k
        assert(!OPCODE_HAS_ARG(OP)); \
124
68.8k
        cfg_instr *_instr__ptr_ = (I); \
125
68.8k
        _instr__ptr_->i_opcode = (OP); \
126
68.8k
        _instr__ptr_->i_oparg = 0; \
127
68.8k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
4.10k
    do { \
131
4.10k
        cfg_instr *_instr__ptr_ = (I); \
132
4.10k
        _instr__ptr_->i_loc = (LOC); \
133
4.10k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
331k
{
144
331k
    assert(b != NULL);
145
331k
    _Py_c_array_t array = {
146
331k
        .array = (void*)b->b_instr,
147
331k
        .allocated_entries = b->b_ialloc,
148
331k
        .item_size = sizeof(cfg_instr),
149
331k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
331k
    };
151
152
331k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
331k
    b->b_instr = array.array;
154
331k
    b->b_ialloc = array.allocated_entries;
155
331k
    return b->b_iused++;
156
331k
}
157
158
static cfg_instr *
159
1.21M
basicblock_last_instr(const basicblock *b) {
160
1.21M
    assert(b->b_iused >= 0);
161
1.21M
    if (b->b_iused > 0) {
162
1.12M
        assert(b->b_instr != NULL);
163
1.12M
        return &b->b_instr[b->b_iused - 1];
164
1.12M
    }
165
96.8k
    return NULL;
166
1.21M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
38.3k
{
175
38.3k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
38.3k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
38.3k
    b->b_list = g->g_block_list;
182
38.3k
    g->g_block_list = b;
183
38.3k
    b->b_label = NO_LABEL;
184
38.3k
    return b;
185
38.3k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
324k
{
190
324k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
324k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
324k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
324k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
324k
    int off = basicblock_next_instr(b);
196
324k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
324k
    cfg_instr *i = &b->b_instr[off];
200
324k
    i->i_opcode = opcode;
201
324k
    i->i_oparg = oparg;
202
324k
    i->i_target = NULL;
203
324k
    i->i_loc = loc;
204
205
324k
    return SUCCESS;
206
324k
}
207
208
static int
209
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
210
1.15k
{
211
1.15k
    cfg_instr *last = basicblock_last_instr(b);
212
1.15k
    if (last && is_jump(last)) {
213
0
        return ERROR;
214
0
    }
215
216
1.15k
    RETURN_IF_ERROR(
217
1.15k
        basicblock_addop(b, opcode, target->b_label.id, loc));
218
1.15k
    last = basicblock_last_instr(b);
219
1.15k
    assert(last && last->i_opcode == opcode);
220
1.15k
    last->i_target = target;
221
1.15k
    return SUCCESS;
222
1.15k
}
223
224
static inline int
225
basicblock_append_instructions(basicblock *to, basicblock *from)
226
2.65k
{
227
6.94k
    for (int i = 0; i < from->b_iused; i++) {
228
4.29k
        int n = basicblock_next_instr(to);
229
4.29k
        if (n < 0) {
230
0
            return ERROR;
231
0
        }
232
4.29k
        to->b_instr[n] = from->b_instr[i];
233
4.29k
    }
234
2.65k
    return SUCCESS;
235
2.65k
}
236
237
static inline int
238
351k
basicblock_nofallthrough(const basicblock *b) {
239
351k
    cfg_instr *last = basicblock_last_instr(b);
240
351k
    return (last &&
241
351k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
242
331k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
243
351k
}
244
245
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
246
585k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
247
248
static basicblock *
249
copy_basicblock(cfg_builder *g, basicblock *block)
250
510
{
251
    /* Cannot copy a block if it has a fallthrough, since
252
     * a block can only have one fallthrough predecessor.
253
     */
254
510
    assert(BB_NO_FALLTHROUGH(block));
255
510
    basicblock *result = cfg_builder_new_block(g);
256
510
    if (result == NULL) {
257
0
        return NULL;
258
0
    }
259
510
    if (basicblock_append_instructions(result, block) < 0) {
260
0
        return NULL;
261
0
    }
262
510
    return result;
263
510
}
264
265
static int
266
2.74k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
267
2.74k
    RETURN_IF_ERROR(basicblock_next_instr(block));
268
46.3k
    for (int i = block->b_iused - 1; i > pos; i--) {
269
43.6k
        block->b_instr[i] = block->b_instr[i-1];
270
43.6k
    }
271
2.74k
    block->b_instr[pos] = *instr;
272
2.74k
    return SUCCESS;
273
2.74k
}
274
275
/* For debugging purposes only */
276
#if 0
277
static void
278
dump_instr(cfg_instr *i)
279
{
280
    const char *jump = is_jump(i) ? "jump " : "";
281
282
    char arg[128];
283
284
    *arg = '\0';
285
    if (OPCODE_HAS_ARG(i->i_opcode)) {
286
        sprintf(arg, "arg: %d ", i->i_oparg);
287
    }
288
    if (HAS_TARGET(i->i_opcode)) {
289
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
290
    }
291
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
292
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
293
}
294
295
static inline int
296
basicblock_returns(const basicblock *b) {
297
    cfg_instr *last = basicblock_last_instr(b);
298
    return last && IS_RETURN_OPCODE(last->i_opcode);
299
}
300
301
static void
302
dump_basicblock(const basicblock *b, bool highlight)
303
{
304
    const char *b_return = basicblock_returns(b) ? "return " : "";
305
    if (highlight) {
306
        fprintf(stderr, ">>> ");
307
    }
308
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
309
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
310
        b->b_startdepth, b->b_predecessors, b_return);
311
    int depth = b->b_startdepth;
312
    if (b->b_instr) {
313
        int i;
314
        for (i = 0; i < b->b_iused; i++) {
315
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
316
            dump_instr(b->b_instr + i);
317
318
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
319
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
320
            depth += (pushed - popped);
321
        }
322
    }
323
}
324
325
void
326
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
327
{
328
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
329
        dump_basicblock(b, b == mark);
330
    }
331
}
332
333
#endif
334
335
336
/***** CFG construction and modification *****/
337
338
static basicblock *
339
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
340
31.4k
{
341
31.4k
    assert(block != NULL);
342
31.4k
    g->g_curblock->b_next = block;
343
31.4k
    g->g_curblock = block;
344
31.4k
    return block;
345
31.4k
}
346
347
static inline int
348
72.0k
basicblock_exits_scope(const basicblock *b) {
349
72.0k
    cfg_instr *last = basicblock_last_instr(b);
350
72.0k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
351
72.0k
}
352
353
static inline int
354
46.0k
basicblock_has_eval_break(const basicblock *b) {
355
226k
    for (int i = 0; i < b->b_iused; i++) {
356
199k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
357
18.6k
            return true;
358
18.6k
        }
359
199k
    }
360
27.4k
    return false;
361
46.0k
}
362
363
static bool
364
cfg_builder_current_block_is_terminated(cfg_builder *g)
365
328k
{
366
328k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
367
328k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
368
26.7k
        return true;
369
26.7k
    }
370
302k
    if (IS_LABEL(g->g_current_label)) {
371
4.71k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
372
4.71k
            return true;
373
4.71k
        }
374
0
        else {
375
            /* current block is empty, label it */
376
0
            g->g_curblock->b_label = g->g_current_label;
377
0
            g->g_current_label = NO_LABEL;
378
0
        }
379
4.71k
    }
380
297k
    return false;
381
302k
}
382
383
static int
384
cfg_builder_maybe_start_new_block(cfg_builder *g)
385
328k
{
386
328k
    if (cfg_builder_current_block_is_terminated(g)) {
387
31.4k
        basicblock *b = cfg_builder_new_block(g);
388
31.4k
        if (b == NULL) {
389
0
            return ERROR;
390
0
        }
391
31.4k
        b->b_label = g->g_current_label;
392
31.4k
        g->g_current_label = NO_LABEL;
393
31.4k
        cfg_builder_use_next_block(g, b);
394
31.4k
    }
395
328k
    return SUCCESS;
396
328k
}
397
398
#ifndef NDEBUG
399
static bool
400
cfg_builder_check(cfg_builder *g)
401
{
402
    assert(g->g_entryblock->b_iused > 0);
403
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
404
        assert(!_PyMem_IsPtrFreed(block));
405
        if (block->b_instr != NULL) {
406
            assert(block->b_ialloc > 0);
407
            assert(block->b_iused >= 0);
408
            assert(block->b_ialloc >= block->b_iused);
409
        }
410
        else {
411
            assert (block->b_iused == 0);
412
            assert (block->b_ialloc == 0);
413
        }
414
    }
415
    return true;
416
}
417
#endif
418
419
static int
420
init_cfg_builder(cfg_builder *g)
421
5.76k
{
422
5.76k
    g->g_block_list = NULL;
423
5.76k
    basicblock *block = cfg_builder_new_block(g);
424
5.76k
    if (block == NULL) {
425
0
        return ERROR;
426
0
    }
427
5.76k
    g->g_curblock = g->g_entryblock = block;
428
5.76k
    g->g_current_label = NO_LABEL;
429
5.76k
    return SUCCESS;
430
5.76k
}
431
432
cfg_builder *
433
_PyCfgBuilder_New(void)
434
5.76k
{
435
5.76k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
436
5.76k
    if (g == NULL) {
437
0
        PyErr_NoMemory();
438
0
        return NULL;
439
0
    }
440
5.76k
    memset(g, 0, sizeof(cfg_builder));
441
5.76k
    if (init_cfg_builder(g) < 0) {
442
0
        PyMem_Free(g);
443
0
        return NULL;
444
0
    }
445
5.76k
    return g;
446
5.76k
}
447
448
void
449
_PyCfgBuilder_Free(cfg_builder *g)
450
5.76k
{
451
5.76k
    if (g == NULL) {
452
0
        return;
453
0
    }
454
5.76k
    assert(cfg_builder_check(g));
455
5.76k
    basicblock *b = g->g_block_list;
456
44.0k
    while (b != NULL) {
457
38.3k
        if (b->b_instr) {
458
38.3k
            PyMem_Free((void *)b->b_instr);
459
38.3k
        }
460
38.3k
        basicblock *next = b->b_list;
461
38.3k
        PyMem_Free((void *)b);
462
38.3k
        b = next;
463
38.3k
    }
464
5.76k
    PyMem_Free(g);
465
5.76k
}
466
467
int
468
_PyCfgBuilder_CheckSize(cfg_builder *g)
469
5.76k
{
470
5.76k
    int nblocks = 0;
471
42.9k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
472
37.1k
        nblocks++;
473
37.1k
    }
474
5.76k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
475
0
        PyErr_NoMemory();
476
0
        return ERROR;
477
0
    }
478
5.76k
    return SUCCESS;
479
5.76k
}
480
481
int
482
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
483
15.6k
{
484
15.6k
    g->g_current_label = lbl;
485
15.6k
    return cfg_builder_maybe_start_new_block(g);
486
15.6k
}
487
488
int
489
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
490
313k
{
491
313k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
492
313k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
493
313k
}
494
495
496
static basicblock *
497
next_nonempty_block(basicblock *b)
498
62.5k
{
499
65.1k
    while (b && b->b_iused == 0) {
500
2.57k
        b = b->b_next;
501
2.57k
    }
502
62.5k
    return b;
503
62.5k
}
504
505
/***** debugging helpers *****/
506
507
#ifndef NDEBUG
508
static int remove_redundant_nops(cfg_builder *g);
509
510
static bool
511
no_redundant_nops(cfg_builder *g) {
512
    if (remove_redundant_nops(g) != 0) {
513
        return false;
514
    }
515
    return true;
516
}
517
518
static bool
519
no_redundant_jumps(cfg_builder *g) {
520
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
521
        cfg_instr *last = basicblock_last_instr(b);
522
        if (last != NULL) {
523
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
524
                basicblock *next = next_nonempty_block(b->b_next);
525
                basicblock *jump_target = next_nonempty_block(last->i_target);
526
                if (jump_target == next) {
527
                    assert(next);
528
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
529
                        assert(0);
530
                        return false;
531
                    }
532
                }
533
            }
534
        }
535
    }
536
    return true;
537
}
538
#endif
539
540
/***** CFG preprocessing (jump targets and exceptions) *****/
541
542
static int
543
38.3k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
544
38.3k
    cfg_instr *last = basicblock_last_instr(b);
545
38.3k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
546
28.8k
        return SUCCESS;
547
28.8k
    }
548
9.49k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
549
550
9.49k
    bool is_forward = last->i_target->b_visited == 0;
551
9.49k
    if (is_forward) {
552
8.89k
        RETURN_IF_ERROR(
553
8.89k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
554
8.89k
        return SUCCESS;
555
8.89k
    }
556
557
596
    int reversed_opcode = 0;
558
596
    switch(last->i_opcode) {
559
28
        case POP_JUMP_IF_NOT_NONE:
560
28
            reversed_opcode = POP_JUMP_IF_NONE;
561
28
            break;
562
30
        case POP_JUMP_IF_NONE:
563
30
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
564
30
            break;
565
489
        case POP_JUMP_IF_FALSE:
566
489
            reversed_opcode = POP_JUMP_IF_TRUE;
567
489
            break;
568
49
        case POP_JUMP_IF_TRUE:
569
49
            reversed_opcode = POP_JUMP_IF_FALSE;
570
49
            break;
571
596
    }
572
    /* transform 'conditional jump T' to
573
     * 'reversed_jump b_next' followed by 'jump_backwards T'
574
     */
575
576
596
    basicblock *target = last->i_target;
577
596
    basicblock *backwards_jump = cfg_builder_new_block(g);
578
596
    if (backwards_jump == NULL) {
579
0
        return ERROR;
580
0
    }
581
596
    RETURN_IF_ERROR(
582
596
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
583
596
    RETURN_IF_ERROR(
584
596
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
585
596
    backwards_jump->b_startdepth = target->b_startdepth;
586
596
    last->i_opcode = reversed_opcode;
587
596
    last->i_target = b->b_next;
588
589
596
    backwards_jump->b_cold = b->b_cold;
590
596
    backwards_jump->b_next = b->b_next;
591
596
    b->b_next = backwards_jump;
592
596
    return SUCCESS;
593
596
}
594
595
596
static int
597
normalize_jumps(cfg_builder *g)
598
5.76k
{
599
5.76k
    basicblock *entryblock = g->g_entryblock;
600
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
601
37.7k
        b->b_visited = 0;
602
37.7k
    }
603
44.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
604
38.3k
        b->b_visited = 1;
605
38.3k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
606
38.3k
    }
607
5.76k
    return SUCCESS;
608
5.76k
}
609
610
static int
611
5.76k
check_cfg(cfg_builder *g) {
612
42.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
613
        /* Raise SystemError if jump or exit is not last instruction in the block. */
614
350k
        for (int i = 0; i < b->b_iused; i++) {
615
313k
            int opcode = b->b_instr[i].i_opcode;
616
313k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
617
313k
            if (IS_TERMINATOR_OPCODE(opcode)) {
618
32.4k
                if (i != b->b_iused - 1) {
619
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
620
0
                    return ERROR;
621
0
                }
622
32.4k
            }
623
313k
        }
624
37.1k
    }
625
5.76k
    return SUCCESS;
626
5.76k
}
627
628
static int
629
get_max_label(basicblock *entryblock)
630
22.4k
{
631
22.4k
    int lbl = -1;
632
171k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
633
149k
        if (b->b_label.id > lbl) {
634
56.8k
            lbl = b->b_label.id;
635
56.8k
        }
636
149k
    }
637
22.4k
    return lbl;
638
22.4k
}
639
640
/* Calculate the actual jump target from the target_label */
641
static int
642
translate_jump_labels_to_targets(basicblock *entryblock)
643
5.76k
{
644
5.76k
    int max_label = get_max_label(entryblock);
645
5.76k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
646
5.76k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
647
5.76k
    if (!label2block) {
648
0
        PyErr_NoMemory();
649
0
        return ERROR;
650
0
    }
651
5.76k
    memset(label2block, 0, mapsize);
652
42.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
653
37.1k
        if (b->b_label.id >= 0) {
654
15.6k
            label2block[b->b_label.id] = b;
655
15.6k
        }
656
37.1k
    }
657
42.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
658
350k
        for (int i = 0; i < b->b_iused; i++) {
659
313k
            cfg_instr *instr = &b->b_instr[i];
660
313k
            assert(instr->i_target == NULL);
661
313k
            if (HAS_TARGET(instr->i_opcode)) {
662
19.2k
                int lbl = instr->i_oparg;
663
19.2k
                assert(lbl >= 0 && lbl <= max_label);
664
19.2k
                instr->i_target = label2block[lbl];
665
19.2k
                assert(instr->i_target != NULL);
666
19.2k
                assert(instr->i_target->b_label.id == lbl);
667
19.2k
            }
668
313k
        }
669
37.1k
    }
670
5.76k
    PyMem_Free(label2block);
671
5.76k
    return SUCCESS;
672
5.76k
}
673
674
static int
675
5.76k
mark_except_handlers(basicblock *entryblock) {
676
#ifndef NDEBUG
677
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
678
        assert(!b->b_except_handler);
679
    }
680
#endif
681
42.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
682
350k
        for (int i=0; i < b->b_iused; i++) {
683
313k
            cfg_instr *instr = &b->b_instr[i];
684
313k
            if (is_block_push(instr)) {
685
2.22k
                instr->i_target->b_except_handler = 1;
686
2.22k
            }
687
313k
        }
688
37.1k
    }
689
5.76k
    return SUCCESS;
690
5.76k
}
691
692
693
struct _PyCfgExceptStack {
694
    basicblock *handlers[CO_MAXBLOCKS+2];
695
    int depth;
696
};
697
698
699
static basicblock *
700
2.22k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
701
2.22k
    assert(is_block_push(setup));
702
2.22k
    int opcode = setup->i_opcode;
703
2.22k
    basicblock * target = setup->i_target;
704
2.22k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
705
1.30k
        target->b_preserve_lasti = 1;
706
1.30k
    }
707
2.22k
    assert(stack->depth <= CO_MAXBLOCKS);
708
2.22k
    stack->handlers[++stack->depth] = target;
709
2.22k
    return target;
710
2.22k
}
711
712
static basicblock *
713
1.77k
pop_except_block(struct _PyCfgExceptStack *stack) {
714
1.77k
    assert(stack->depth > 0);
715
1.77k
    return stack->handlers[--stack->depth];
716
1.77k
}
717
718
static basicblock *
719
31.6k
except_stack_top(struct _PyCfgExceptStack *stack) {
720
31.6k
    return stack->handlers[stack->depth];
721
31.6k
}
722
723
static struct _PyCfgExceptStack *
724
5.76k
make_except_stack(void) {
725
5.76k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
726
5.76k
    if (new == NULL) {
727
0
        PyErr_NoMemory();
728
0
        return NULL;
729
0
    }
730
5.76k
    new->depth = 0;
731
5.76k
    new->handlers[0] = NULL;
732
5.76k
    return new;
733
5.76k
}
734
735
static struct _PyCfgExceptStack *
736
11.7k
copy_except_stack(struct _PyCfgExceptStack *stack) {
737
11.7k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
738
11.7k
    if (copy == NULL) {
739
0
        PyErr_NoMemory();
740
0
        return NULL;
741
0
    }
742
11.7k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
743
11.7k
    return copy;
744
11.7k
}
745
746
static basicblock**
747
43.3k
make_cfg_traversal_stack(basicblock *entryblock) {
748
43.3k
    int nblocks = 0;
749
340k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
750
296k
        b->b_visited = 0;
751
296k
        nblocks++;
752
296k
    }
753
43.3k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
754
43.3k
    if (!stack) {
755
0
        PyErr_NoMemory();
756
0
    }
757
43.3k
    return stack;
758
43.3k
}
759
760
/* Compute the stack effects of opcode with argument oparg.
761
762
   Some opcodes have different stack effect when jump to the target and
763
   when not jump. The 'jump' parameter specifies the case:
764
765
   * 0 -- when not jump
766
   * 1 -- when jump
767
   * -1 -- maximal
768
 */
769
typedef struct {
770
    /* The stack effect of the instruction. */
771
    int net;
772
} stack_effects;
773
774
Py_LOCAL(int)
775
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
776
291k
{
777
291k
    if (opcode < 0) {
778
0
        return -1;
779
0
    }
780
291k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
781
        // Specialized instructions are not supported.
782
0
        return -1;
783
0
    }
784
291k
    int popped = _PyOpcode_num_popped(opcode, oparg);
785
291k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
786
291k
    if (popped < 0 || pushed < 0) {
787
0
        return -1;
788
0
    }
789
291k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
790
2.22k
        effects->net = 0;
791
2.22k
        return 0;
792
2.22k
    }
793
289k
    effects->net = pushed - popped;
794
289k
    return 0;
795
291k
}
796
797
Py_LOCAL_INLINE(int)
798
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
799
38.8k
{
800
38.8k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
801
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
802
0
        return ERROR;
803
0
    }
804
38.8k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
805
31.5k
        assert(b->b_startdepth < 0);
806
31.5k
        b->b_startdepth = depth;
807
31.5k
        *(*sp)++ = b;
808
31.5k
    }
809
38.8k
    return SUCCESS;
810
38.8k
}
811
812
/* Find the flow path that needs the largest stack.  We assume that
813
 * cycles in the flow graph have no net effect on the stack depth.
814
 */
815
static int
816
calculate_stackdepth(cfg_builder *g)
817
5.76k
{
818
5.76k
    basicblock *entryblock = g->g_entryblock;
819
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
820
37.7k
        b->b_startdepth = INT_MIN;
821
37.7k
    }
822
5.76k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
823
5.76k
    if (!stack) {
824
0
        return ERROR;
825
0
    }
826
827
828
5.76k
    int stackdepth = -1;
829
5.76k
    int maxdepth = 0;
830
5.76k
    basicblock **sp = stack;
831
5.76k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
832
0
        goto error;
833
0
    }
834
37.3k
    while (sp != stack) {
835
31.5k
        basicblock *b = *--sp;
836
31.5k
        int depth = b->b_startdepth;
837
31.5k
        assert(depth >= 0);
838
31.5k
        basicblock *next = b->b_next;
839
291k
        for (int i = 0; i < b->b_iused; i++) {
840
274k
            cfg_instr *instr = &b->b_instr[i];
841
274k
            stack_effects effects;
842
274k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
843
0
                PyErr_Format(PyExc_SystemError,
844
0
                             "Invalid stack effect for opcode=%d, arg=%i",
845
0
                             instr->i_opcode, instr->i_oparg);
846
0
                goto error;
847
0
            }
848
274k
            int new_depth = depth + effects.net;
849
274k
            if (new_depth < 0) {
850
0
                PyErr_Format(PyExc_ValueError,
851
0
                             "Invalid CFG, stack underflow");
852
0
                goto error;
853
0
            }
854
274k
            maxdepth = Py_MAX(maxdepth, depth);
855
274k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
856
16.7k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
857
0
                    PyErr_Format(PyExc_SystemError,
858
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
859
0
                                 instr->i_opcode, instr->i_oparg);
860
0
                    goto error;
861
0
                }
862
16.7k
                int target_depth = depth + effects.net;
863
16.7k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
864
16.7k
                maxdepth = Py_MAX(maxdepth, depth);
865
16.7k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
866
0
                    goto error;
867
0
                }
868
16.7k
            }
869
274k
            depth = new_depth;
870
274k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
871
274k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
872
274k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
873
15.1k
            {
874
                /* remaining code is dead */
875
15.1k
                next = NULL;
876
15.1k
                break;
877
15.1k
            }
878
274k
        }
879
31.5k
        if (next != NULL) {
880
16.3k
            assert(BB_HAS_FALLTHROUGH(b));
881
16.3k
            if (stackdepth_push(&sp, next, depth) < 0) {
882
0
                goto error;
883
0
            }
884
16.3k
        }
885
31.5k
    }
886
5.76k
    stackdepth = maxdepth;
887
5.76k
error:
888
5.76k
    PyMem_Free(stack);
889
5.76k
    return stackdepth;
890
5.76k
}
891
892
static int
893
5.76k
label_exception_targets(basicblock *entryblock) {
894
5.76k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
895
5.76k
    if (todo_stack == NULL) {
896
0
        return ERROR;
897
0
    }
898
5.76k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
899
5.76k
    if (except_stack == NULL) {
900
0
        PyMem_Free(todo_stack);
901
0
        PyErr_NoMemory();
902
0
        return ERROR;
903
0
    }
904
5.76k
    except_stack->depth = 0;
905
5.76k
    todo_stack[0] = entryblock;
906
5.76k
    entryblock->b_visited = 1;
907
5.76k
    entryblock->b_exceptstack = except_stack;
908
5.76k
    basicblock **todo = &todo_stack[1];
909
5.76k
    basicblock *handler = NULL;
910
37.4k
    while (todo > todo_stack) {
911
31.6k
        todo--;
912
31.6k
        basicblock *b = todo[0];
913
31.6k
        assert(b->b_visited == 1);
914
31.6k
        except_stack = b->b_exceptstack;
915
31.6k
        assert(except_stack != NULL);
916
31.6k
        b->b_exceptstack = NULL;
917
31.6k
        handler = except_stack_top(except_stack);
918
31.6k
        int last_yield_except_depth = -1;
919
333k
        for (int i = 0; i < b->b_iused; i++) {
920
301k
            cfg_instr *instr = &b->b_instr[i];
921
301k
            if (is_block_push(instr)) {
922
2.22k
                if (!instr->i_target->b_visited) {
923
2.22k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
924
2.22k
                    if (copy == NULL) {
925
0
                        goto error;
926
0
                    }
927
2.22k
                    instr->i_target->b_exceptstack = copy;
928
2.22k
                    todo[0] = instr->i_target;
929
2.22k
                    instr->i_target->b_visited = 1;
930
2.22k
                    todo++;
931
2.22k
                }
932
2.22k
                handler = push_except_block(except_stack, instr);
933
2.22k
            }
934
299k
            else if (instr->i_opcode == POP_BLOCK) {
935
1.77k
                handler = pop_except_block(except_stack);
936
1.77k
                INSTR_SET_OP0(instr, NOP);
937
1.77k
            }
938
297k
            else if (is_jump(instr)) {
939
16.0k
                instr->i_except = handler;
940
16.0k
                assert(i == b->b_iused -1);
941
16.0k
                if (!instr->i_target->b_visited) {
942
11.5k
                    if (BB_HAS_FALLTHROUGH(b)) {
943
9.52k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
944
9.52k
                        if (copy == NULL) {
945
0
                            goto error;
946
0
                        }
947
9.52k
                        instr->i_target->b_exceptstack = copy;
948
9.52k
                    }
949
2.02k
                    else {
950
2.02k
                        instr->i_target->b_exceptstack = except_stack;
951
2.02k
                        except_stack = NULL;
952
2.02k
                    }
953
11.5k
                    todo[0] = instr->i_target;
954
11.5k
                    instr->i_target->b_visited = 1;
955
11.5k
                    todo++;
956
11.5k
                }
957
16.0k
            }
958
281k
            else if (instr->i_opcode == YIELD_VALUE) {
959
413
                instr->i_except = handler;
960
413
                last_yield_except_depth = except_stack->depth;
961
413
            }
962
281k
            else if (instr->i_opcode == RESUME) {
963
6.17k
                instr->i_except = handler;
964
6.17k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
965
413
                    assert(last_yield_except_depth >= 0);
966
413
                    if (last_yield_except_depth == 1) {
967
352
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
968
352
                    }
969
413
                    last_yield_except_depth = -1;
970
413
                }
971
6.17k
            }
972
275k
            else {
973
275k
                instr->i_except = handler;
974
275k
            }
975
301k
        }
976
31.6k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
977
12.1k
            assert(except_stack != NULL);
978
12.1k
            b->b_next->b_exceptstack = except_stack;
979
12.1k
            todo[0] = b->b_next;
980
12.1k
            b->b_next->b_visited = 1;
981
12.1k
            todo++;
982
12.1k
        }
983
19.5k
        else if (except_stack != NULL) {
984
17.5k
           PyMem_Free(except_stack);
985
17.5k
        }
986
31.6k
    }
987
#ifdef Py_DEBUG
988
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
989
        assert(b->b_exceptstack == NULL);
990
    }
991
#endif
992
5.76k
    PyMem_Free(todo_stack);
993
5.76k
    return SUCCESS;
994
0
error:
995
0
    PyMem_Free(todo_stack);
996
0
    PyMem_Free(except_stack);
997
0
    return ERROR;
998
5.76k
}
999
1000
/***** CFG optimizations *****/
1001
1002
static int
1003
11.5k
remove_unreachable(basicblock *entryblock) {
1004
86.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1005
74.8k
        b->b_predecessors = 0;
1006
74.8k
    }
1007
11.5k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1008
11.5k
    if (stack == NULL) {
1009
0
        return ERROR;
1010
0
    }
1011
11.5k
    basicblock **sp = stack;
1012
11.5k
    entryblock->b_predecessors = 1;
1013
11.5k
    *sp++ = entryblock;
1014
11.5k
    entryblock->b_visited = 1;
1015
74.1k
    while (sp > stack) {
1016
62.5k
        basicblock *b = *(--sp);
1017
62.5k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1018
31.2k
            if (!b->b_next->b_visited) {
1019
26.5k
                assert(b->b_next->b_predecessors == 0);
1020
26.5k
                *sp++ = b->b_next;
1021
26.5k
                b->b_next->b_visited = 1;
1022
26.5k
            }
1023
31.2k
            b->b_next->b_predecessors++;
1024
31.2k
        }
1025
646k
        for (int i = 0; i < b->b_iused; i++) {
1026
584k
            basicblock *target;
1027
584k
            cfg_instr *instr = &b->b_instr[i];
1028
584k
            if (is_jump(instr) || is_block_push(instr)) {
1029
34.7k
                target = instr->i_target;
1030
34.7k
                if (!target->b_visited) {
1031
24.4k
                    *sp++ = target;
1032
24.4k
                    target->b_visited = 1;
1033
24.4k
                }
1034
34.7k
                target->b_predecessors++;
1035
34.7k
            }
1036
584k
        }
1037
62.5k
    }
1038
11.5k
    PyMem_Free(stack);
1039
1040
    /* Delete unreachable instructions */
1041
86.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1042
74.8k
       if (b->b_predecessors == 0) {
1043
12.2k
            b->b_iused = 0;
1044
12.2k
            b->b_except_handler = 0;
1045
12.2k
       }
1046
74.8k
    }
1047
11.5k
    return SUCCESS;
1048
11.5k
}
1049
1050
static int
1051
206k
basicblock_remove_redundant_nops(basicblock *bb) {
1052
    /* Remove NOPs when legal to do so. */
1053
206k
    int dest = 0;
1054
206k
    int prev_lineno = -1;
1055
1.66M
    for (int src = 0; src < bb->b_iused; src++) {
1056
1.45M
        int lineno = bb->b_instr[src].i_loc.lineno;
1057
1.45M
        if (bb->b_instr[src].i_opcode == NOP) {
1058
            /* Eliminate no-op if it doesn't have a line number */
1059
34.3k
            if (lineno < 0) {
1060
4.89k
                continue;
1061
4.89k
            }
1062
            /* or, if the previous instruction had the same line number. */
1063
29.4k
            if (prev_lineno == lineno) {
1064
23.8k
                continue;
1065
23.8k
            }
1066
            /* or, if the next instruction has same line number or no line number */
1067
5.65k
            if (src < bb->b_iused - 1) {
1068
4.88k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1069
4.88k
                if (next_lineno == lineno) {
1070
3.25k
                    continue;
1071
3.25k
                }
1072
1.63k
                if (next_lineno < 0) {
1073
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1074
0
                    continue;
1075
0
                }
1076
1.63k
            }
1077
772
            else {
1078
772
                basicblock *next = next_nonempty_block(bb->b_next);
1079
                /* or if last instruction in BB and next BB has same line number */
1080
772
                if (next) {
1081
772
                    location next_loc = NO_LOCATION;
1082
772
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1083
772
                        cfg_instr *instr = &next->b_instr[next_i];
1084
772
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1085
                            /* Skip over NOPs without a location, they will be removed */
1086
0
                            continue;
1087
0
                        }
1088
772
                        next_loc = instr->i_loc;
1089
772
                        break;
1090
772
                    }
1091
772
                    if (lineno == next_loc.lineno) {
1092
7
                        continue;
1093
7
                    }
1094
772
                }
1095
772
            }
1096
1097
5.65k
        }
1098
1.42M
        if (dest != src) {
1099
134k
            bb->b_instr[dest] = bb->b_instr[src];
1100
134k
        }
1101
1.42M
        dest++;
1102
1.42M
        prev_lineno = lineno;
1103
1.42M
    }
1104
206k
    assert(dest <= bb->b_iused);
1105
206k
    int num_removed = bb->b_iused - dest;
1106
206k
    bb->b_iused = dest;
1107
206k
    return num_removed;
1108
206k
}
1109
1110
static int
1111
19.7k
remove_redundant_nops(cfg_builder *g) {
1112
19.7k
    int changes = 0;
1113
189k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1114
169k
        int change = basicblock_remove_redundant_nops(b);
1115
169k
        RETURN_IF_ERROR(change);
1116
169k
        changes += change;
1117
169k
    }
1118
19.7k
    return changes;
1119
19.7k
}
1120
1121
static int
1122
remove_redundant_nops_and_pairs(basicblock *entryblock)
1123
5.76k
{
1124
5.76k
    bool done = false;
1125
1126
11.5k
    while (! done) {
1127
5.76k
        done = true;
1128
5.76k
        cfg_instr *prev_instr = NULL;
1129
5.76k
        cfg_instr *instr = NULL;
1130
43.4k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1131
37.6k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1132
37.6k
            if (IS_LABEL(b->b_label)) {
1133
                /* this block is a jump target, forget instr */
1134
16.1k
                instr = NULL;
1135
16.1k
            }
1136
318k
            for (int i = 0; i < b->b_iused; i++) {
1137
280k
                prev_instr = instr;
1138
280k
                instr = &b->b_instr[i];
1139
280k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1140
280k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1141
280k
                int opcode = instr->i_opcode;
1142
280k
                bool is_redundant_pair = false;
1143
280k
                if (opcode == POP_TOP) {
1144
6.44k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1145
0
                       is_redundant_pair = true;
1146
0
                   }
1147
6.44k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
6.44k
                }
1151
280k
                if (is_redundant_pair) {
1152
0
                    INSTR_SET_OP0(prev_instr, NOP);
1153
0
                    INSTR_SET_OP0(instr, NOP);
1154
0
                    done = false;
1155
0
                }
1156
280k
            }
1157
37.6k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1158
27.0k
                instr = NULL;
1159
27.0k
            }
1160
37.6k
        }
1161
5.76k
    }
1162
5.76k
    return SUCCESS;
1163
5.76k
}
1164
1165
static int
1166
14.0k
remove_redundant_jumps(cfg_builder *g) {
1167
    /* If a non-empty block ends with a jump instruction, check if the next
1168
     * non-empty block reached through normal flow control is the target
1169
     * of that jump. If it is, then the jump instruction is redundant and
1170
     * can be deleted.
1171
     *
1172
     * Return the number of changes applied, or -1 on error.
1173
     */
1174
1175
14.0k
    int changes = 0;
1176
145k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1177
131k
        cfg_instr *last = basicblock_last_instr(b);
1178
131k
        if (last == NULL) {
1179
18.7k
            continue;
1180
18.7k
        }
1181
112k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1182
112k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1183
15.8k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1184
15.8k
            if (jump_target == NULL) {
1185
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1186
0
                return ERROR;
1187
0
            }
1188
15.8k
            basicblock *next = next_nonempty_block(b->b_next);
1189
15.8k
            if (jump_target == next) {
1190
522
                changes++;
1191
522
                INSTR_SET_OP0(last, NOP);
1192
522
            }
1193
15.8k
        }
1194
112k
    }
1195
1196
14.0k
    return changes;
1197
14.0k
}
1198
1199
static inline bool
1200
44.6k
basicblock_has_no_lineno(basicblock *b) {
1201
53.3k
    for (int i = 0; i < b->b_iused; i++) {
1202
49.0k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1203
40.2k
            return false;
1204
40.2k
        }
1205
49.0k
    }
1206
4.36k
    return true;
1207
44.6k
}
1208
1209
/* Maximum size of basic block that should be copied in optimizer */
1210
1.83k
#define MAX_COPY_SIZE 4
1211
1212
/* If this block ends with an unconditional jump to a small exit block or
1213
 * a block that has no line numbers (and no fallthrough), then
1214
 * remove the jump and extend this block with the target.
1215
 * Returns 1 if extended, 0 if no change, and -1 on error.
1216
 */
1217
static int
1218
58.3k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1219
58.3k
    cfg_instr *last = basicblock_last_instr(bb);
1220
58.3k
    if (last == NULL) {
1221
0
        return 0;
1222
0
    }
1223
58.3k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1224
48.2k
        return 0;
1225
48.2k
    }
1226
10.1k
    basicblock *target = last->i_target;
1227
10.1k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1228
10.1k
                             target->b_iused <= MAX_COPY_SIZE);
1229
10.1k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1230
10.1k
                                     !BB_HAS_FALLTHROUGH(target));
1231
10.1k
    if (small_exit_block || no_lineno_no_fallthrough) {
1232
2.14k
        assert(is_jump(last));
1233
2.14k
        int removed_jump_opcode = last->i_opcode;
1234
2.14k
        INSTR_SET_OP0(last, NOP);
1235
2.14k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1236
2.14k
        if (no_lineno_no_fallthrough) {
1237
1.82k
            last = basicblock_last_instr(bb);
1238
1.82k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1239
1.82k
                removed_jump_opcode == JUMP)
1240
45
            {
1241
                /* Make sure we don't lose eval breaker checks */
1242
45
                last->i_opcode = JUMP;
1243
45
            }
1244
1.82k
        }
1245
2.14k
        target->b_predecessors--;
1246
2.14k
        return 1;
1247
2.14k
    }
1248
8.03k
    return 0;
1249
10.1k
}
1250
1251
static int
1252
5.76k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1253
5.76k
    bool changes;
1254
6.70k
    do {
1255
6.70k
        changes = false;
1256
65.0k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1257
58.3k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1258
58.3k
            RETURN_IF_ERROR(res);
1259
58.3k
            if (res) {
1260
2.14k
                changes = true;
1261
2.14k
            }
1262
58.3k
        }
1263
6.70k
    } while(changes); /* every change removes a jump, ensuring convergence */
1264
5.76k
    return changes;
1265
5.76k
}
1266
1267
// Attempt to eliminate jumps to jumps by updating inst to jump to
1268
// target->i_target using the provided opcode. Return whether or not the
1269
// optimization was successful.
1270
static bool
1271
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1272
555
{
1273
555
    assert(is_jump(inst));
1274
555
    assert(is_jump(target));
1275
555
    assert(inst == basicblock_last_instr(bb));
1276
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1277
    // changes (and we fall into an infinite loop):
1278
555
    if (inst->i_target != target->i_target) {
1279
        /* Change inst to NOP and append a jump to target->i_target. The
1280
         * NOP will be removed later if it's not needed for the lineno.
1281
         */
1282
555
        INSTR_SET_OP0(inst, NOP);
1283
1284
555
        RETURN_IF_ERROR(
1285
555
            basicblock_add_jump(
1286
555
                bb, opcode, target->i_target, target->i_loc));
1287
1288
555
        return true;
1289
555
    }
1290
0
    return false;
1291
555
}
1292
1293
static int
1294
loads_const(int opcode)
1295
15.3k
{
1296
15.3k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1297
15.3k
}
1298
1299
/* Returns new reference */
1300
static PyObject*
1301
get_const_value(int opcode, int oparg, PyObject *co_consts)
1302
63.6k
{
1303
63.6k
    PyObject *constant = NULL;
1304
63.6k
    assert(loads_const(opcode));
1305
63.6k
    if (opcode == LOAD_CONST) {
1306
62.9k
        constant = PyList_GET_ITEM(co_consts, oparg);
1307
62.9k
    }
1308
63.6k
    if (opcode == LOAD_SMALL_INT) {
1309
683
        return PyLong_FromLong(oparg);
1310
683
    }
1311
1312
62.9k
    if (constant == NULL) {
1313
0
        PyErr_SetString(PyExc_SystemError,
1314
0
                        "Internal error: failed to get value of a constant");
1315
0
        return NULL;
1316
0
    }
1317
62.9k
    return Py_NewRef(constant);
1318
62.9k
}
1319
1320
// Steals a reference to newconst.
1321
static int
1322
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1323
2.01k
{
1324
2.01k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1325
0
        Py_DECREF(newconst);
1326
0
        return -1;
1327
0
    }
1328
1329
2.01k
    Py_ssize_t index;
1330
83.6k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1331
82.2k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1332
613
            break;
1333
613
        }
1334
82.2k
    }
1335
2.01k
    if (index == PyList_GET_SIZE(consts)) {
1336
1.40k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1337
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1338
0
            Py_DECREF(newconst);
1339
0
            return -1;
1340
0
        }
1341
1.40k
        if (PyList_Append(consts, newconst)) {
1342
0
            Py_DECREF(newconst);
1343
0
            return -1;
1344
0
        }
1345
1.40k
    }
1346
2.01k
    Py_DECREF(newconst);
1347
2.01k
    return (int)index;
1348
2.01k
}
1349
1350
/*
1351
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1352
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1353
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1354
1355
  Return boolean indicating whether "size" such instructions were found.
1356
*/
1357
static bool
1358
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1359
10.1k
{
1360
10.1k
    assert(start < bb->b_iused);
1361
10.1k
    assert(size >= 0);
1362
10.1k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1363
1364
17.4k
    for (; start >= 0 && size > 0; start--) {
1365
15.4k
        cfg_instr *instr = &bb->b_instr[start];
1366
15.4k
        if (instr->i_opcode == NOP) {
1367
245
            continue;
1368
245
        }
1369
15.2k
        if (!loads_const(instr->i_opcode)) {
1370
8.16k
            return false;
1371
8.16k
        }
1372
7.06k
        instrs[--size] = instr;
1373
7.06k
    }
1374
1375
1.96k
    return size == 0;
1376
10.1k
}
1377
1378
/*
1379
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1380
  Caller must make sure "instrs" has at least "size" elements.
1381
*/
1382
static void
1383
nop_out(cfg_instr **instrs, int size)
1384
2.02k
{
1385
6.02k
    for (int i = 0; i < size; i++) {
1386
4.00k
        cfg_instr *instr = instrs[i];
1387
4.00k
        assert(instr->i_opcode != NOP);
1388
4.00k
        INSTR_SET_OP0(instr, NOP);
1389
4.00k
        INSTR_SET_LOC(instr, NO_LOCATION);
1390
4.00k
    }
1391
2.02k
}
1392
1393
/* Does not steal reference to "newconst".
1394
   Return 1 if changed instruction to LOAD_SMALL_INT.
1395
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1396
   Return -1 on error.
1397
*/
1398
static int
1399
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1400
                               PyObject *consts, PyObject *const_cache)
1401
60.1k
{
1402
60.1k
    if (PyLong_CheckExact(newconst)) {
1403
22.7k
        int overflow;
1404
22.7k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1405
22.7k
        if (val == -1 && PyErr_Occurred()) {
1406
0
            return -1;
1407
0
        }
1408
22.7k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1409
17.4k
            assert(_Py_IsImmortal(newconst));
1410
17.4k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1411
17.4k
            return 1;
1412
17.4k
        }
1413
22.7k
    }
1414
42.6k
    return 0;
1415
60.1k
}
1416
1417
1418
/* Steals reference to "newconst" */
1419
static int
1420
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1421
                      PyObject *consts, PyObject *const_cache)
1422
1.80k
{
1423
1.80k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1424
1.80k
    if (res < 0) {
1425
0
        Py_DECREF(newconst);
1426
0
        return ERROR;
1427
0
    }
1428
1.80k
    if (res > 0) {
1429
9
        return SUCCESS;
1430
9
    }
1431
1.79k
    int oparg = add_const(newconst, consts, const_cache);
1432
1.79k
    RETURN_IF_ERROR(oparg);
1433
1.79k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1434
1.79k
    return SUCCESS;
1435
1.79k
}
1436
1437
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1438
   with    LOAD_CONST (c1, c2, ... cn).
1439
   The consts table must still be in list form so that the
1440
   new constant (c1, c2, ... cn) can be appended.
1441
   Called with codestr pointing to the first LOAD_CONST.
1442
*/
1443
static int
1444
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1445
3.18k
{
1446
    /* Pre-conditions */
1447
3.18k
    assert(PyDict_CheckExact(const_cache));
1448
3.18k
    assert(PyList_CheckExact(consts));
1449
1450
3.18k
    cfg_instr *instr = &bb->b_instr[i];
1451
3.18k
    assert(instr->i_opcode == BUILD_TUPLE);
1452
1453
3.18k
    int seq_size = instr->i_oparg;
1454
3.18k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1455
0
        return SUCCESS;
1456
0
    }
1457
1458
3.18k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1459
3.18k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1460
        /* not a const sequence */
1461
1.92k
        return SUCCESS;
1462
1.92k
    }
1463
1464
1.26k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1465
1.26k
    if (const_tuple == NULL) {
1466
0
        return ERROR;
1467
0
    }
1468
1469
3.39k
    for (int i = 0; i < seq_size; i++) {
1470
2.12k
        cfg_instr *inst = const_instrs[i];
1471
2.12k
        assert(loads_const(inst->i_opcode));
1472
2.12k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1473
2.12k
        if (element == NULL) {
1474
0
            Py_DECREF(const_tuple);
1475
0
            return ERROR;
1476
0
        }
1477
2.12k
        PyTuple_SET_ITEM(const_tuple, i, element);
1478
2.12k
    }
1479
1480
1.26k
    nop_out(const_instrs, seq_size);
1481
1.26k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1482
1.26k
}
1483
1484
/* Replace:
1485
    BUILD_LIST 0
1486
    LOAD_CONST c1
1487
    LIST_APPEND 1
1488
    LOAD_CONST c2
1489
    LIST_APPEND 1
1490
    ...
1491
    LOAD_CONST cN
1492
    LIST_APPEND 1
1493
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1494
   with:
1495
    LOAD_CONST (c1, c2, ... cN)
1496
*/
1497
static int
1498
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1499
                                      PyObject *consts, PyObject *const_cache)
1500
75
{
1501
75
    assert(PyDict_CheckExact(const_cache));
1502
75
    assert(PyList_CheckExact(consts));
1503
75
    assert(i >= 0);
1504
75
    assert(i < bb->b_iused);
1505
1506
75
    cfg_instr *intrinsic = &bb->b_instr[i];
1507
75
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1508
75
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1509
1510
75
    int consts_found = 0;
1511
75
    bool expect_append = true;
1512
1513
168
    for (int pos = i - 1; pos >= 0; pos--) {
1514
168
        cfg_instr *instr = &bb->b_instr[pos];
1515
168
        int opcode = instr->i_opcode;
1516
168
        int oparg = instr->i_oparg;
1517
1518
168
        if (opcode == NOP) {
1519
0
            continue;
1520
0
        }
1521
1522
168
        if (opcode == BUILD_LIST && oparg == 0) {
1523
1
            if (!expect_append) {
1524
                /* Not a sequence start. */
1525
0
                return SUCCESS;
1526
0
            }
1527
1528
            /* Sequence start, we are done. */
1529
1
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1530
1
            if (newconst == NULL) {
1531
0
                return ERROR;
1532
0
            }
1533
1534
92
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1535
91
                instr = &bb->b_instr[newpos];
1536
91
                if (instr->i_opcode == NOP) {
1537
0
                    continue;
1538
0
                }
1539
91
                if (loads_const(instr->i_opcode)) {
1540
45
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1541
45
                    if (constant == NULL) {
1542
0
                        Py_DECREF(newconst);
1543
0
                        return ERROR;
1544
0
                    }
1545
45
                    assert(consts_found > 0);
1546
45
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1547
45
                }
1548
91
                nop_out(&instr, 1);
1549
91
            }
1550
1
            assert(consts_found == 0);
1551
1
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1552
1
        }
1553
1554
167
        if (expect_append) {
1555
120
            if (opcode != LIST_APPEND || oparg != 1) {
1556
73
                return SUCCESS;
1557
73
            }
1558
120
        }
1559
47
        else {
1560
47
            if (!loads_const(opcode)) {
1561
1
                return SUCCESS;
1562
1
            }
1563
46
            consts_found++;
1564
46
        }
1565
1566
93
        expect_append = !expect_append;
1567
93
    }
1568
1569
    /* Did not find sequence start. */
1570
0
    return SUCCESS;
1571
75
}
1572
1573
2.08k
#define MIN_CONST_SEQUENCE_SIZE 3
1574
/*
1575
Optimize lists and sets for:
1576
    1. "for" loop, comprehension or "in"/"not in" tests:
1577
           Change literal list or set of constants into constant
1578
           tuple or frozenset respectively. Change list of
1579
           non-constants into tuple.
1580
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1581
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1582
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1583
           or BUILD_SET & SET_UPDATE respectively.
1584
*/
1585
static int
1586
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1587
                        PyObject *consts, PyObject *const_cache)
1588
1.04k
{
1589
1.04k
    assert(PyDict_CheckExact(const_cache));
1590
1.04k
    assert(PyList_CheckExact(consts));
1591
1592
1.04k
    cfg_instr *instr = &bb->b_instr[i];
1593
1.04k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1594
1595
1.04k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1596
1.04k
    int seq_size = instr->i_oparg;
1597
1.04k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1598
1.04k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1599
872
    {
1600
872
        return SUCCESS;
1601
872
    }
1602
1603
172
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1604
172
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1605
43
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1606
            /* iterate over a tuple instead of list */
1607
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1608
0
        }
1609
43
        return SUCCESS;
1610
43
    }
1611
1612
129
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1613
129
    if (const_result == NULL) {
1614
0
        return ERROR;
1615
0
    }
1616
1617
1.33k
    for (int i = 0; i < seq_size; i++) {
1618
1.20k
        cfg_instr *inst = const_instrs[i];
1619
1.20k
        assert(loads_const(inst->i_opcode));
1620
1.20k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1621
1.20k
        if (element == NULL) {
1622
0
            Py_DECREF(const_result);
1623
0
            return ERROR;
1624
0
        }
1625
1.20k
        PyTuple_SET_ITEM(const_result, i, element);
1626
1.20k
    }
1627
1628
129
    if (instr->i_opcode == BUILD_SET) {
1629
26
        PyObject *frozenset = PyFrozenSet_New(const_result);
1630
26
        if (frozenset == NULL) {
1631
0
            Py_DECREF(const_result);
1632
0
            return ERROR;
1633
0
        }
1634
26
        Py_SETREF(const_result, frozenset);
1635
26
    }
1636
1637
129
    int index = add_const(const_result, consts, const_cache);
1638
129
    RETURN_IF_ERROR(index);
1639
129
    nop_out(const_instrs, seq_size);
1640
1641
129
    if (contains_or_iter) {
1642
25
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1643
25
    }
1644
104
    else {
1645
104
        assert(i >= 2);
1646
104
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1647
1648
104
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1649
1650
104
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1651
104
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1652
104
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1653
104
    }
1654
129
    return SUCCESS;
1655
129
}
1656
1657
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1658
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1659
 * Used to avoid creating constants which are slow to hash.
1660
 */
1661
static Py_ssize_t
1662
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1663
0
{
1664
0
    if (PyTuple_Check(obj)) {
1665
0
        Py_ssize_t i;
1666
0
        limit -= PyTuple_GET_SIZE(obj);
1667
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1668
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1669
0
            if (limit < 0) {
1670
0
                return limit;
1671
0
            }
1672
0
        }
1673
0
    }
1674
0
    return limit;
1675
0
}
1676
1677
23
#define MAX_INT_SIZE           128  /* bits */
1678
0
#define MAX_COLLECTION_SIZE    256  /* items */
1679
10
#define MAX_STR_SIZE          4096  /* characters */
1680
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1681
1682
static PyObject *
1683
const_folding_safe_multiply(PyObject *v, PyObject *w)
1684
23
{
1685
23
    if (PyLong_Check(v) && PyLong_Check(w) &&
1686
23
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1687
23
    ) {
1688
3
        int64_t vbits = _PyLong_NumBits(v);
1689
3
        int64_t wbits = _PyLong_NumBits(w);
1690
3
        assert(vbits >= 0);
1691
3
        assert(wbits >= 0);
1692
3
        if (vbits + wbits > MAX_INT_SIZE) {
1693
0
            return NULL;
1694
0
        }
1695
3
    }
1696
20
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1697
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1698
0
        if (size) {
1699
0
            long n = PyLong_AsLong(v);
1700
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1701
0
                return NULL;
1702
0
            }
1703
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1704
0
                return NULL;
1705
0
            }
1706
0
        }
1707
0
    }
1708
20
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1709
10
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1710
10
                                               PyBytes_GET_SIZE(w);
1711
10
        if (size) {
1712
10
            long n = PyLong_AsLong(v);
1713
10
            if (n < 0 || n > MAX_STR_SIZE / size) {
1714
3
                return NULL;
1715
3
            }
1716
10
        }
1717
10
    }
1718
10
    else if (PyLong_Check(w) &&
1719
10
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1720
10
    {
1721
10
        return const_folding_safe_multiply(w, v);
1722
10
    }
1723
1724
10
    return PyNumber_Multiply(v, w);
1725
23
}
1726
1727
static PyObject *
1728
const_folding_safe_power(PyObject *v, PyObject *w)
1729
8
{
1730
8
    if (PyLong_Check(v) && PyLong_Check(w) &&
1731
8
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1732
8
    ) {
1733
8
        int64_t vbits = _PyLong_NumBits(v);
1734
8
        size_t wbits = PyLong_AsSize_t(w);
1735
8
        assert(vbits >= 0);
1736
8
        if (wbits == (size_t)-1) {
1737
0
            return NULL;
1738
0
        }
1739
8
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1740
0
            return NULL;
1741
0
        }
1742
8
    }
1743
1744
8
    return PyNumber_Power(v, w, Py_None);
1745
8
}
1746
1747
static PyObject *
1748
const_folding_safe_lshift(PyObject *v, PyObject *w)
1749
4
{
1750
4
    if (PyLong_Check(v) && PyLong_Check(w) &&
1751
4
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1752
4
    ) {
1753
4
        int64_t vbits = _PyLong_NumBits(v);
1754
4
        size_t wbits = PyLong_AsSize_t(w);
1755
4
        assert(vbits >= 0);
1756
4
        if (wbits == (size_t)-1) {
1757
0
            return NULL;
1758
0
        }
1759
4
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1760
0
            return NULL;
1761
0
        }
1762
4
    }
1763
1764
4
    return PyNumber_Lshift(v, w);
1765
4
}
1766
1767
static PyObject *
1768
const_folding_safe_mod(PyObject *v, PyObject *w)
1769
0
{
1770
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1771
0
        return NULL;
1772
0
    }
1773
1774
0
    return PyNumber_Remainder(v, w);
1775
0
}
1776
1777
static PyObject *
1778
eval_const_binop(PyObject *left, int op, PyObject *right)
1779
39
{
1780
39
    assert(left != NULL && right != NULL);
1781
39
    assert(op >= 0 && op <= NB_OPARG_LAST);
1782
1783
39
    PyObject *result = NULL;
1784
39
    switch (op) {
1785
4
        case NB_ADD:
1786
4
            result = PyNumber_Add(left, right);
1787
4
            break;
1788
0
        case NB_SUBTRACT:
1789
0
            result = PyNumber_Subtract(left, right);
1790
0
            break;
1791
13
        case NB_MULTIPLY:
1792
13
            result = const_folding_safe_multiply(left, right);
1793
13
            break;
1794
1
        case NB_TRUE_DIVIDE:
1795
1
            result = PyNumber_TrueDivide(left, right);
1796
1
            break;
1797
0
        case NB_FLOOR_DIVIDE:
1798
0
            result = PyNumber_FloorDivide(left, right);
1799
0
            break;
1800
0
        case NB_REMAINDER:
1801
0
            result = const_folding_safe_mod(left, right);
1802
0
            break;
1803
8
        case NB_POWER:
1804
8
            result = const_folding_safe_power(left, right);
1805
8
            break;
1806
4
        case NB_LSHIFT:
1807
4
            result = const_folding_safe_lshift(left, right);
1808
4
            break;
1809
0
        case NB_RSHIFT:
1810
0
            result = PyNumber_Rshift(left, right);
1811
0
            break;
1812
0
        case NB_OR:
1813
0
            result = PyNumber_Or(left, right);
1814
0
            break;
1815
0
        case NB_XOR:
1816
0
            result = PyNumber_Xor(left, right);
1817
0
            break;
1818
0
        case NB_AND:
1819
0
            result = PyNumber_And(left, right);
1820
0
            break;
1821
9
        case NB_SUBSCR:
1822
9
            result = PyObject_GetItem(left, right);
1823
9
            break;
1824
0
        case NB_MATRIX_MULTIPLY:
1825
            // No builtin constants implement matrix multiplication
1826
0
            break;
1827
0
        default:
1828
0
            Py_UNREACHABLE();
1829
39
    }
1830
39
    return result;
1831
39
}
1832
1833
static int
1834
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1835
6.13k
{
1836
6.16k
    #define BINOP_OPERAND_COUNT 2
1837
6.13k
    assert(PyDict_CheckExact(const_cache));
1838
6.13k
    assert(PyList_CheckExact(consts));
1839
1840
6.13k
    cfg_instr *binop = &bb->b_instr[i];
1841
6.13k
    assert(binop->i_opcode == BINARY_OP);
1842
1843
6.13k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1844
6.13k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1845
        /* not a const sequence */
1846
6.09k
        return SUCCESS;
1847
6.09k
    }
1848
1849
39
    cfg_instr *lhs_instr = operands_instrs[0];
1850
39
    assert(loads_const(lhs_instr->i_opcode));
1851
39
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1852
39
    if (lhs == NULL) {
1853
0
        return ERROR;
1854
0
    }
1855
1856
39
    cfg_instr *rhs_instr = operands_instrs[1];
1857
39
    assert(loads_const(rhs_instr->i_opcode));
1858
39
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1859
39
    if (rhs == NULL) {
1860
0
        Py_DECREF(lhs);
1861
0
        return ERROR;
1862
0
    }
1863
1864
39
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1865
39
    Py_DECREF(lhs);
1866
39
    Py_DECREF(rhs);
1867
39
    if (newconst == NULL) {
1868
3
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1869
0
            return ERROR;
1870
0
        }
1871
3
        PyErr_Clear();
1872
3
        return SUCCESS;
1873
3
    }
1874
1875
36
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1876
36
    return instr_make_load_const(binop, newconst, consts, const_cache);
1877
39
}
1878
1879
static PyObject *
1880
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1881
501
{
1882
501
    assert(operand != NULL);
1883
501
    assert(
1884
501
        opcode == UNARY_NEGATIVE ||
1885
501
        opcode == UNARY_INVERT ||
1886
501
        opcode == UNARY_NOT ||
1887
501
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1888
501
    );
1889
501
    PyObject *result;
1890
501
    switch (opcode) {
1891
501
        case UNARY_NEGATIVE:
1892
501
            result = PyNumber_Negative(operand);
1893
501
            break;
1894
0
        case UNARY_INVERT:
1895
            // XXX: This should be removed once the ~bool depreciation expires.
1896
0
            if (PyBool_Check(operand)) {
1897
0
                return NULL;
1898
0
            }
1899
0
            result = PyNumber_Invert(operand);
1900
0
            break;
1901
0
        case UNARY_NOT: {
1902
0
            int r = PyObject_IsTrue(operand);
1903
0
            if (r < 0) {
1904
0
                return NULL;
1905
0
            }
1906
0
            result = PyBool_FromLong(!r);
1907
0
            break;
1908
0
        }
1909
0
        case CALL_INTRINSIC_1:
1910
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1911
0
                Py_UNREACHABLE();
1912
0
            }
1913
0
            result = PyNumber_Positive(operand);
1914
0
            break;
1915
0
        default:
1916
0
            Py_UNREACHABLE();
1917
501
    }
1918
501
    return result;
1919
501
}
1920
1921
static int
1922
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1923
636
{
1924
1.13k
    #define UNARYOP_OPERAND_COUNT 1
1925
636
    assert(PyDict_CheckExact(const_cache));
1926
636
    assert(PyList_CheckExact(consts));
1927
636
    cfg_instr *unaryop = &bb->b_instr[i];
1928
1929
636
    cfg_instr *operand_instr;
1930
636
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1931
        /* not a const */
1932
135
        return SUCCESS;
1933
135
    }
1934
1935
501
    assert(loads_const(operand_instr->i_opcode));
1936
501
    PyObject *operand = get_const_value(
1937
501
        operand_instr->i_opcode,
1938
501
        operand_instr->i_oparg,
1939
501
        consts
1940
501
    );
1941
501
    if (operand == NULL) {
1942
0
        return ERROR;
1943
0
    }
1944
1945
501
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1946
501
    Py_DECREF(operand);
1947
501
    if (newconst == NULL) {
1948
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1949
0
            return ERROR;
1950
0
        }
1951
0
        PyErr_Clear();
1952
0
        return SUCCESS;
1953
0
    }
1954
1955
501
    if (unaryop->i_opcode == UNARY_NOT) {
1956
0
        assert(PyBool_Check(newconst));
1957
0
    }
1958
501
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1959
501
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1960
501
}
1961
1962
1.56k
#define VISITED (-1)
1963
1964
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1965
// same effect.
1966
static int
1967
swaptimize(basicblock *block, int *ix)
1968
1.35k
{
1969
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1970
    // for this function. Make sure to blow away cached *.pyc files first!
1971
1.35k
    assert(*ix < block->b_iused);
1972
1.35k
    cfg_instr *instructions = &block->b_instr[*ix];
1973
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1974
    // maximum depth of the stack manipulations:
1975
1.35k
    assert(instructions[0].i_opcode == SWAP);
1976
1.35k
    int depth = instructions[0].i_oparg;
1977
1.35k
    int len = 0;
1978
1.35k
    int more = false;
1979
1.35k
    int limit = block->b_iused - *ix;
1980
1.53k
    while (++len < limit) {
1981
1.53k
        int opcode = instructions[len].i_opcode;
1982
1.53k
        if (opcode == SWAP) {
1983
120
            depth = Py_MAX(depth, instructions[len].i_oparg);
1984
120
            more = true;
1985
120
        }
1986
1.41k
        else if (opcode != NOP) {
1987
1.35k
            break;
1988
1.35k
        }
1989
1.53k
    }
1990
    // It's already optimal if there's only one SWAP:
1991
1.35k
    if (!more) {
1992
1.23k
        return SUCCESS;
1993
1.23k
    }
1994
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1995
120
    int *stack = PyMem_Malloc(depth * sizeof(int));
1996
120
    if (stack == NULL) {
1997
0
        PyErr_NoMemory();
1998
0
        return ERROR;
1999
0
    }
2000
480
    for (int i = 0; i < depth; i++) {
2001
360
        stack[i] = i;
2002
360
    }
2003
    // Simulate the combined effect of these instructions by "running" them on
2004
    // our "stack":
2005
360
    for (int i = 0; i < len; i++) {
2006
240
        if (instructions[i].i_opcode == SWAP) {
2007
240
            int oparg = instructions[i].i_oparg;
2008
240
            int top = stack[0];
2009
            // SWAPs are 1-indexed:
2010
240
            stack[0] = stack[oparg - 1];
2011
240
            stack[oparg - 1] = top;
2012
240
        }
2013
240
    }
2014
    // Now we can begin! Our approach here is based on a solution to a closely
2015
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2016
    // think of this algorithm as determining the steps needed to efficiently
2017
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2018
    // though, we can efficiently *shuffle* it! For this reason, we will be
2019
    // replacing instructions starting from the *end* of the run. Since the
2020
    // solution is optimal, we don't need to worry about running out of space:
2021
120
    int current = len - 1;
2022
480
    for (int i = 0; i < depth; i++) {
2023
        // Skip items that have already been visited, or just happen to be in
2024
        // the correct location:
2025
360
        if (stack[i] == VISITED || stack[i] == i) {
2026
240
            continue;
2027
240
        }
2028
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2029
        // with other items; traversing the cycle and swapping each item with
2030
        // the next will put them all in the correct place. The weird
2031
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2032
        // can only swap from that position:
2033
120
        int j = i;
2034
480
        while (true) {
2035
            // Skip the actual swap if our item is zero, since swapping the top
2036
            // item with itself is pointless:
2037
480
            if (j) {
2038
240
                assert(0 <= current);
2039
                // SWAPs are 1-indexed:
2040
240
                instructions[current].i_opcode = SWAP;
2041
240
                instructions[current--].i_oparg = j + 1;
2042
240
            }
2043
480
            if (stack[j] == VISITED) {
2044
                // Completed the cycle:
2045
120
                assert(j == i);
2046
120
                break;
2047
120
            }
2048
360
            int next_j = stack[j];
2049
360
            stack[j] = VISITED;
2050
360
            j = next_j;
2051
360
        }
2052
120
    }
2053
    // NOP out any unused instructions:
2054
120
    while (0 <= current) {
2055
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2056
0
    }
2057
120
    PyMem_Free(stack);
2058
120
    *ix += len - 1;
2059
120
    return SUCCESS;
2060
120
}
2061
2062
2063
// This list is pretty small, since it's only okay to reorder opcodes that:
2064
// - can't affect control flow (like jumping or raising exceptions)
2065
// - can't invoke arbitrary code (besides finalizers)
2066
// - only touch the TOS (and pop it when finished)
2067
#define SWAPPABLE(opcode) \
2068
2.27k
    ((opcode) == STORE_FAST || \
2069
2.27k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2070
2.27k
     (opcode) == POP_TOP)
2071
2072
#define STORES_TO(instr) \
2073
330
    (((instr).i_opcode == STORE_FAST || \
2074
330
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2075
330
     ? (instr).i_oparg : -1)
2076
2077
static int
2078
next_swappable_instruction(basicblock *block, int i, int lineno)
2079
2.23k
{
2080
2.29k
    while (++i < block->b_iused) {
2081
2.27k
        cfg_instr *instruction = &block->b_instr[i];
2082
2.27k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2083
            // Optimizing across this instruction could cause user-visible
2084
            // changes in the names bound between line tracing events!
2085
9
            return -1;
2086
9
        }
2087
2.26k
        if (instruction->i_opcode == NOP) {
2088
62
            continue;
2089
62
        }
2090
2.20k
        if (SWAPPABLE(instruction->i_opcode)) {
2091
1.02k
            return i;
2092
1.02k
        }
2093
1.17k
        return -1;
2094
2.20k
    }
2095
19
    return -1;
2096
2.23k
}
2097
2098
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2099
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2100
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2101
static void
2102
apply_static_swaps(basicblock *block, int i)
2103
1.35k
{
2104
    // SWAPs are to our left, and potential swaperands are to our right:
2105
1.57k
    for (; 0 <= i; i--) {
2106
1.49k
        assert(i < block->b_iused);
2107
1.49k
        cfg_instr *swap = &block->b_instr[i];
2108
1.49k
        if (swap->i_opcode != SWAP) {
2109
138
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2110
                // Nope, but we know how to handle these. Keep looking:
2111
69
                continue;
2112
69
            }
2113
            // We can't reason about what this instruction does. Bail:
2114
69
            return;
2115
138
        }
2116
1.35k
        int j = next_swappable_instruction(block, i, -1);
2117
1.35k
        if (j < 0) {
2118
674
            return;
2119
674
        }
2120
680
        int k = j;
2121
680
        int lineno = block->b_instr[j].i_loc.lineno;
2122
1.02k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2123
876
            k = next_swappable_instruction(block, k, lineno);
2124
876
            if (k < 0) {
2125
529
                return;
2126
529
            }
2127
876
        }
2128
        // The reordering is not safe if the two instructions to be swapped
2129
        // store to the same location, or if any intervening instruction stores
2130
        // to the same location as either of them.
2131
151
        int store_j = STORES_TO(block->b_instr[j]);
2132
151
        int store_k = STORES_TO(block->b_instr[k]);
2133
151
        if (store_j >= 0 || store_k >= 0) {
2134
151
            if (store_j == store_k) {
2135
0
                return;
2136
0
            }
2137
179
            for (int idx = j + 1; idx < k; idx++) {
2138
28
                int store_idx = STORES_TO(block->b_instr[idx]);
2139
28
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2140
0
                    return;
2141
0
                }
2142
28
            }
2143
151
        }
2144
2145
        // Success!
2146
151
        INSTR_SET_OP0(swap, NOP);
2147
151
        cfg_instr temp = block->b_instr[j];
2148
151
        block->b_instr[j] = block->b_instr[k];
2149
151
        block->b_instr[k] = temp;
2150
151
    }
2151
1.35k
}
2152
2153
static int
2154
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2155
37.6k
{
2156
37.6k
    assert(PyDict_CheckExact(const_cache));
2157
37.6k
    assert(PyList_CheckExact(consts));
2158
37.6k
    int opcode = 0;
2159
37.6k
    int oparg = 0;
2160
342k
    for (int i = 0; i < bb->b_iused; i++) {
2161
304k
        cfg_instr *inst = &bb->b_instr[i];
2162
304k
        if (inst->i_opcode == LOAD_CONST) {
2163
58.3k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2164
58.3k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2165
58.3k
            Py_DECREF(constant);
2166
58.3k
            if (res < 0) {
2167
0
                return ERROR;
2168
0
            }
2169
58.3k
        }
2170
304k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2171
304k
                                      inst->i_opcode == COPY &&
2172
304k
                                      inst->i_oparg == 1);
2173
304k
        if (! is_copy_of_load_const) {
2174
304k
            opcode = inst->i_opcode;
2175
304k
            oparg = inst->i_oparg;
2176
304k
        }
2177
304k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2178
304k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2179
246k
            continue;
2180
246k
        }
2181
58.3k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2182
58.3k
        switch(nextop) {
2183
95
            case POP_JUMP_IF_FALSE:
2184
98
            case POP_JUMP_IF_TRUE:
2185
98
            case JUMP_IF_FALSE:
2186
98
            case JUMP_IF_TRUE:
2187
98
            {
2188
                /* Remove LOAD_CONST const; conditional jump */
2189
98
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2190
98
                if (cnt == NULL) {
2191
0
                    return ERROR;
2192
0
                }
2193
98
                int is_true = PyObject_IsTrue(cnt);
2194
98
                Py_DECREF(cnt);
2195
98
                if (is_true == -1) {
2196
0
                    return ERROR;
2197
0
                }
2198
98
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2199
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2200
98
                    INSTR_SET_OP0(inst, NOP);
2201
98
                }
2202
98
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2203
98
                if (is_true == jump_if_true) {
2204
0
                    bb->b_instr[i+1].i_opcode = JUMP;
2205
0
                }
2206
98
                else {
2207
98
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2208
98
                }
2209
98
                break;
2210
98
            }
2211
1.16k
            case IS_OP:
2212
1.16k
            {
2213
                // Fold to POP_JUMP_IF_NONE:
2214
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2215
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2216
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2217
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2218
                // Fold to POP_JUMP_IF_NOT_NONE:
2219
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2220
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2223
1.16k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2224
1.16k
                if (cnt == NULL) {
2225
0
                    return ERROR;
2226
0
                }
2227
1.16k
                if (!Py_IsNone(cnt)) {
2228
14
                    Py_DECREF(cnt);
2229
14
                    break;
2230
14
                }
2231
1.14k
                if (bb->b_iused <= i + 2) {
2232
6
                    break;
2233
6
                }
2234
1.14k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2235
1.14k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2236
                // Get rid of TO_BOOL regardless:
2237
1.14k
                if (jump_instr->i_opcode == TO_BOOL) {
2238
1.12k
                    INSTR_SET_OP0(jump_instr, NOP);
2239
1.12k
                    if (bb->b_iused <= i + 3) {
2240
0
                        break;
2241
0
                    }
2242
1.12k
                    jump_instr = &bb->b_instr[i + 3];
2243
1.12k
                }
2244
1.14k
                bool invert = is_instr->i_oparg;
2245
1.14k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2246
1.08k
                    invert = !invert;
2247
1.08k
                }
2248
54
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2249
17
                    break;
2250
17
                }
2251
1.12k
                INSTR_SET_OP0(inst, NOP);
2252
1.12k
                INSTR_SET_OP0(is_instr, NOP);
2253
1.12k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2254
1.12k
                                              : POP_JUMP_IF_NONE;
2255
1.12k
                break;
2256
1.14k
            }
2257
98
            case TO_BOOL:
2258
98
            {
2259
98
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2260
98
                if (cnt == NULL) {
2261
0
                    return ERROR;
2262
0
                }
2263
98
                int is_true = PyObject_IsTrue(cnt);
2264
98
                Py_DECREF(cnt);
2265
98
                if (is_true == -1) {
2266
0
                    return ERROR;
2267
0
                }
2268
98
                cnt = PyBool_FromLong(is_true);
2269
98
                int index = add_const(cnt, consts, const_cache);
2270
98
                if (index < 0) {
2271
0
                    return ERROR;
2272
0
                }
2273
98
                INSTR_SET_OP0(inst, NOP);
2274
98
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2275
98
                break;
2276
98
            }
2277
58.3k
        }
2278
58.3k
    }
2279
37.6k
    return SUCCESS;
2280
37.6k
}
2281
2282
static int
2283
5.76k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2284
43.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2285
37.6k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2286
37.6k
    }
2287
5.76k
    return SUCCESS;
2288
5.76k
}
2289
2290
static int
2291
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2292
37.6k
{
2293
37.6k
    assert(PyDict_CheckExact(const_cache));
2294
37.6k
    assert(PyList_CheckExact(consts));
2295
37.6k
    cfg_instr nop;
2296
37.6k
    INSTR_SET_OP0(&nop, NOP);
2297
343k
    for (int i = 0; i < bb->b_iused; i++) {
2298
305k
        cfg_instr *inst = &bb->b_instr[i];
2299
305k
        cfg_instr *target;
2300
305k
        int opcode = inst->i_opcode;
2301
305k
        int oparg = inst->i_oparg;
2302
305k
        if (HAS_TARGET(opcode)) {
2303
18.3k
            assert(inst->i_target->b_iused > 0);
2304
18.3k
            target = &inst->i_target->b_instr[0];
2305
18.3k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2306
18.3k
        }
2307
287k
        else {
2308
287k
            target = &nop;
2309
287k
        }
2310
305k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2311
305k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2312
305k
        switch (opcode) {
2313
            /* Try to fold tuples of constants.
2314
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2315
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2316
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2317
3.26k
            case BUILD_TUPLE:
2318
3.26k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2319
79
                    switch(oparg) {
2320
0
                        case 1:
2321
0
                            INSTR_SET_OP0(inst, NOP);
2322
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2323
0
                            continue;
2324
66
                        case 2:
2325
76
                        case 3:
2326
76
                            INSTR_SET_OP0(inst, NOP);
2327
76
                            bb->b_instr[i+1].i_opcode = SWAP;
2328
76
                            continue;
2329
79
                    }
2330
79
                }
2331
3.18k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2332
3.18k
                break;
2333
986
            case BUILD_LIST:
2334
1.04k
            case BUILD_SET:
2335
1.04k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2336
1.04k
                break;
2337
605
            case POP_JUMP_IF_NOT_NONE:
2338
1.18k
            case POP_JUMP_IF_NONE:
2339
1.18k
                switch (target->i_opcode) {
2340
58
                    case JUMP:
2341
58
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2342
1.18k
                }
2343
1.18k
                break;
2344
7.07k
            case POP_JUMP_IF_FALSE:
2345
7.07k
                switch (target->i_opcode) {
2346
462
                    case JUMP:
2347
462
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2348
7.07k
                }
2349
7.07k
                break;
2350
7.07k
            case POP_JUMP_IF_TRUE:
2351
1.40k
                switch (target->i_opcode) {
2352
35
                    case JUMP:
2353
35
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2354
1.40k
                }
2355
1.40k
                break;
2356
1.40k
            case JUMP_IF_FALSE:
2357
166
                switch (target->i_opcode) {
2358
0
                    case JUMP:
2359
0
                    case JUMP_IF_FALSE:
2360
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2361
0
                        continue;
2362
2
                    case JUMP_IF_TRUE:
2363
                        // No need to check for loops here, a block's b_next
2364
                        // cannot point to itself.
2365
2
                        assert(inst->i_target != inst->i_target->b_next);
2366
2
                        inst->i_target = inst->i_target->b_next;
2367
2
                        i--;
2368
2
                        continue;
2369
166
                }
2370
164
                break;
2371
227
            case JUMP_IF_TRUE:
2372
227
                switch (target->i_opcode) {
2373
0
                    case JUMP:
2374
0
                    case JUMP_IF_TRUE:
2375
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2376
0
                        continue;
2377
3
                    case JUMP_IF_FALSE:
2378
                        // No need to check for loops here, a block's b_next
2379
                        // cannot point to itself.
2380
3
                        assert(inst->i_target != inst->i_target->b_next);
2381
3
                        inst->i_target = inst->i_target->b_next;
2382
3
                        i--;
2383
3
                        continue;
2384
227
                }
2385
224
                break;
2386
2.84k
            case JUMP:
2387
4.72k
            case JUMP_NO_INTERRUPT:
2388
4.72k
                switch (target->i_opcode) {
2389
0
                    case JUMP:
2390
0
                        i -= jump_thread(bb, inst, target, JUMP);
2391
0
                        continue;
2392
0
                    case JUMP_NO_INTERRUPT:
2393
0
                        i -= jump_thread(bb, inst, target, opcode);
2394
0
                        continue;
2395
4.72k
                }
2396
4.72k
                break;
2397
4.72k
            case FOR_ITER:
2398
1.26k
                if (target->i_opcode == JUMP) {
2399
                    /* This will not work now because the jump (at target) could
2400
                     * be forward or backward and FOR_ITER only jumps forward. We
2401
                     * can re-enable this if ever we implement a backward version
2402
                     * of FOR_ITER.
2403
                     */
2404
                    /*
2405
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2406
                    */
2407
0
                }
2408
1.26k
                break;
2409
10.8k
            case STORE_FAST:
2410
10.8k
                if (opcode == nextop &&
2411
10.8k
                    oparg == bb->b_instr[i+1].i_oparg &&
2412
10.8k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2413
28
                    bb->b_instr[i].i_opcode = POP_TOP;
2414
28
                    bb->b_instr[i].i_oparg = 0;
2415
28
                }
2416
10.8k
                break;
2417
1.47k
            case SWAP:
2418
1.47k
                if (oparg == 1) {
2419
0
                    INSTR_SET_OP0(inst, NOP);
2420
0
                }
2421
1.47k
                break;
2422
13.2k
            case LOAD_GLOBAL:
2423
13.2k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2424
6.48k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2425
6.48k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2426
6.48k
                }
2427
13.2k
                break;
2428
4.88k
            case COMPARE_OP:
2429
4.88k
                if (nextop == TO_BOOL) {
2430
2.30k
                    INSTR_SET_OP0(inst, NOP);
2431
2.30k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2432
2.30k
                    continue;
2433
2.30k
                }
2434
2.58k
                break;
2435
2.58k
            case CONTAINS_OP:
2436
2.84k
            case IS_OP:
2437
2.84k
                if (nextop == TO_BOOL) {
2438
1.30k
                    INSTR_SET_OP0(inst, NOP);
2439
1.30k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2440
1.30k
                    continue;
2441
1.30k
                }
2442
1.53k
                if (nextop == UNARY_NOT) {
2443
0
                    INSTR_SET_OP0(inst, NOP);
2444
0
                    int inverted = oparg ^ 1;
2445
0
                    assert(inverted == 0 || inverted == 1);
2446
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2447
0
                    continue;
2448
0
                }
2449
1.53k
                break;
2450
3.68k
            case TO_BOOL:
2451
3.68k
                if (nextop == TO_BOOL) {
2452
0
                    INSTR_SET_OP0(inst, NOP);
2453
0
                    continue;
2454
0
                }
2455
3.68k
                break;
2456
3.68k
            case UNARY_NOT:
2457
50
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2460
0
                    continue;
2461
0
                }
2462
50
                if (nextop == UNARY_NOT) {
2463
0
                    INSTR_SET_OP0(inst, NOP);
2464
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2465
0
                    continue;
2466
0
                }
2467
50
                _Py_FALLTHROUGH;
2468
90
            case UNARY_INVERT:
2469
634
            case UNARY_NEGATIVE:
2470
634
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2471
634
                break;
2472
390
            case CALL_INTRINSIC_1:
2473
390
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2474
75
                    if (nextop == GET_ITER) {
2475
0
                        INSTR_SET_OP0(inst, NOP);
2476
0
                    }
2477
75
                    else {
2478
75
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2479
75
                    }
2480
75
                }
2481
315
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2482
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2483
2
                }
2484
390
                break;
2485
6.13k
            case BINARY_OP:
2486
6.13k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2487
6.13k
                break;
2488
305k
        }
2489
305k
    }
2490
2491
342k
    for (int i = 0; i < bb->b_iused; i++) {
2492
304k
        cfg_instr *inst = &bb->b_instr[i];
2493
304k
        if (inst->i_opcode == SWAP) {
2494
1.35k
            if (swaptimize(bb, &i) < 0) {
2495
0
                goto error;
2496
0
            }
2497
1.35k
            apply_static_swaps(bb, i);
2498
1.35k
        }
2499
304k
    }
2500
37.6k
    return SUCCESS;
2501
0
error:
2502
0
    return ERROR;
2503
37.6k
}
2504
2505
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2506
2507
static int
2508
remove_redundant_nops_and_jumps(cfg_builder *g)
2509
12.4k
{
2510
12.4k
    int removed_nops, removed_jumps;
2511
14.0k
    do {
2512
        /* Convergence is guaranteed because the number of
2513
         * redundant jumps and nops only decreases.
2514
         */
2515
14.0k
        removed_nops = remove_redundant_nops(g);
2516
14.0k
        RETURN_IF_ERROR(removed_nops);
2517
14.0k
        removed_jumps = remove_redundant_jumps(g);
2518
14.0k
        RETURN_IF_ERROR(removed_jumps);
2519
14.0k
    } while(removed_nops + removed_jumps > 0);
2520
12.4k
    return SUCCESS;
2521
12.4k
}
2522
2523
/* Perform optimizations on a control flow graph.
2524
   The consts object should still be in list form to allow new constants
2525
   to be appended.
2526
2527
   Code trasnformations that reduce code size initially fill the gaps with
2528
   NOPs.  Later those NOPs are removed.
2529
*/
2530
static int
2531
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2532
5.76k
{
2533
5.76k
    assert(PyDict_CheckExact(const_cache));
2534
5.76k
    RETURN_IF_ERROR(check_cfg(g));
2535
5.76k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2536
5.76k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2537
5.76k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2538
5.76k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2539
43.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2540
37.6k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2541
37.6k
    }
2542
5.76k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2543
5.76k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2544
5.76k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2545
5.76k
    assert(no_redundant_jumps(g));
2546
5.76k
    return SUCCESS;
2547
5.76k
}
2548
2549
static void
2550
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2551
10.0k
{
2552
10.0k
    int32_t line1 = inst1->i_loc.lineno;
2553
10.0k
    int32_t line2 = inst2->i_loc.lineno;
2554
    /* Skip if instructions are on different lines */
2555
10.0k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2556
3.60k
        return;
2557
3.60k
    }
2558
6.49k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2559
568
        return;
2560
568
    }
2561
5.92k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2562
5.92k
    INSTR_SET_OP0(inst2, NOP);
2563
5.92k
}
2564
2565
static int
2566
insert_superinstructions(cfg_builder *g)
2567
5.76k
{
2568
43.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2569
2570
318k
        for (int i = 0; i < b->b_iused; i++) {
2571
280k
            cfg_instr *inst = &b->b_instr[i];
2572
280k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2573
280k
            switch(inst->i_opcode) {
2574
34.7k
                case LOAD_FAST:
2575
34.7k
                    if (nextop == LOAD_FAST) {
2576
5.28k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2577
5.28k
                    }
2578
34.7k
                    break;
2579
9.79k
                case STORE_FAST:
2580
9.79k
                    switch (nextop) {
2581
3.67k
                        case LOAD_FAST:
2582
3.67k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2583
3.67k
                            break;
2584
1.13k
                        case STORE_FAST:
2585
1.13k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2586
1.13k
                            break;
2587
9.79k
                    }
2588
9.79k
                    break;
2589
280k
            }
2590
280k
        }
2591
37.6k
    }
2592
5.76k
    int res = remove_redundant_nops(g);
2593
5.76k
    assert(no_redundant_nops(g));
2594
5.76k
    return res;
2595
5.76k
}
2596
2597
#define NOT_LOCAL -1
2598
10.0k
#define DUMMY_INSTR -1
2599
2600
typedef struct {
2601
    // Index of instruction that produced the reference or DUMMY_INSTR.
2602
    int instr;
2603
2604
    // The local to which the reference refers or NOT_LOCAL.
2605
    int local;
2606
} ref;
2607
2608
typedef struct {
2609
    ref *refs;
2610
    Py_ssize_t size;
2611
    Py_ssize_t capacity;
2612
} ref_stack;
2613
2614
static int
2615
ref_stack_push(ref_stack *stack, ref r)
2616
226k
{
2617
226k
    if (stack->size == stack->capacity) {
2618
5.76k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2619
5.76k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2620
5.76k
        if (refs == NULL) {
2621
0
            PyErr_NoMemory();
2622
0
            return -1;
2623
0
        }
2624
5.76k
        stack->refs = refs;
2625
5.76k
        stack->capacity = new_cap;
2626
5.76k
    }
2627
226k
    stack->refs[stack->size] = r;
2628
226k
    stack->size++;
2629
226k
    return 0;
2630
226k
}
2631
2632
static ref
2633
ref_stack_pop(ref_stack *stack)
2634
196k
{
2635
196k
    assert(stack->size > 0);
2636
196k
    stack->size--;
2637
196k
    ref r = stack->refs[stack->size];
2638
196k
    return r;
2639
196k
}
2640
2641
static void
2642
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2643
884
{
2644
884
    Py_ssize_t idx = stack->size - off;
2645
884
    assert(idx >= 0 && idx < stack->size);
2646
884
    ref tmp = stack->refs[idx];
2647
884
    stack->refs[idx] = stack->refs[stack->size - 1];
2648
884
    stack->refs[stack->size - 1] = tmp;
2649
884
}
2650
2651
static ref
2652
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2653
40.9k
{
2654
40.9k
    assert(idx >= 0 && idx < stack->size);
2655
40.9k
    return stack->refs[idx];
2656
40.9k
}
2657
2658
static void
2659
ref_stack_clear(ref_stack *stack)
2660
26.6k
{
2661
26.6k
    stack->size = 0;
2662
26.6k
}
2663
2664
static void
2665
ref_stack_fini(ref_stack *stack)
2666
5.76k
{
2667
5.76k
    if (stack->refs != NULL) {
2668
5.76k
        PyMem_Free(stack->refs);
2669
5.76k
    }
2670
5.76k
    stack->refs = NULL;
2671
5.76k
    stack->capacity = 0;
2672
5.76k
    stack->size = 0;
2673
5.76k
}
2674
2675
typedef enum {
2676
    // The loaded reference is still on the stack when the local is killed
2677
    SUPPORT_KILLED  = 1,
2678
    // The loaded reference is stored into a local
2679
    STORED_AS_LOCAL = 2,
2680
    // The loaded reference is still on the stack at the end of the basic block
2681
    REF_UNCONSUMED  = 4,
2682
} LoadFastInstrFlag;
2683
2684
static void
2685
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2686
10.2k
{
2687
20.3k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2688
10.0k
        ref r = ref_stack_at(refs, i);
2689
10.0k
        if (r.local == local) {
2690
11
            assert(r.instr >= 0);
2691
11
            instr_flags[r.instr] |= SUPPORT_KILLED;
2692
11
        }
2693
10.0k
    }
2694
10.2k
}
2695
2696
static void
2697
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2698
10.0k
{
2699
10.0k
    kill_local(instr_flags, refs, local);
2700
10.0k
    if (r.instr != DUMMY_INSTR) {
2701
8.79k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2702
8.79k
    }
2703
10.0k
}
2704
2705
static void
2706
load_fast_push_block(basicblock ***sp, basicblock *target,
2707
                     Py_ssize_t start_depth)
2708
27.2k
{
2709
27.2k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2710
27.2k
    if (!target->b_visited) {
2711
20.8k
        target->b_visited = 1;
2712
20.8k
        *(*sp)++ = target;
2713
20.8k
    }
2714
27.2k
}
2715
2716
/*
2717
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2718
 * load borrowed references onto the operand stack.
2719
 *
2720
 * This is only safe when we can prove that the reference in the frame outlives
2721
 * the borrowed reference produced by the instruction. We make this tractable
2722
 * by enforcing the following lifetimes:
2723
 *
2724
 * 1. Borrowed references loaded onto the operand stack live until the end of
2725
 *    the instruction that consumes them from the stack. Any borrowed
2726
 *    references that would escape into the heap (e.g. into frame objects or
2727
 *    generators) are converted into new, strong references.
2728
 *
2729
 * 2. Locals live until they are either killed by an instruction
2730
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2731
 *    via `f_locals` is added to a tuple owned by the frame object.
2732
 *
2733
 * To simplify the problem of detecting which supporting references in the
2734
 * frame are killed by instructions that overwrite locals, we only allow
2735
 * borrowed references to be stored as a local in the frame if they were passed
2736
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2737
 * strong references.
2738
 *
2739
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2740
 * that meet the following criteria:
2741
 *
2742
 * 1. The produced reference must be consumed from the stack before the
2743
 *    supporting reference in the frame is killed.
2744
 *
2745
 * 2. The produced reference cannot be stored as a local.
2746
 *
2747
 * We use abstract interpretation to identify instructions that meet these
2748
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2749
 * stack of abstract references and note any instructions that violate the
2750
 * criteria above. Once we've processed all the instructions in a block, any
2751
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2752
 */
2753
static int
2754
optimize_load_fast(cfg_builder *g)
2755
5.76k
{
2756
5.76k
    int status;
2757
5.76k
    ref_stack refs = {0};
2758
5.76k
    int max_instrs = 0;
2759
5.76k
    basicblock *entryblock = g->g_entryblock;
2760
44.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2761
38.3k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2762
38.3k
    }
2763
5.76k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2764
5.76k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2765
5.76k
    if (instr_flags == NULL) {
2766
0
        PyErr_NoMemory();
2767
0
        return ERROR;
2768
0
    }
2769
5.76k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2770
5.76k
    if (blocks == NULL) {
2771
0
        status = ERROR;
2772
0
        goto done;
2773
0
    }
2774
5.76k
    basicblock **sp = blocks;
2775
5.76k
    *sp = entryblock;
2776
5.76k
    sp++;
2777
5.76k
    entryblock->b_startdepth = 0;
2778
5.76k
    entryblock->b_visited = 1;
2779
2780
5.76k
    #define PUSH_REF(instr, local)                \
2781
226k
        do {                                      \
2782
226k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2783
0
                status = ERROR;                   \
2784
0
                goto done;                        \
2785
0
            }                                     \
2786
226k
        } while(0)
2787
2788
32.3k
    while (sp != blocks) {
2789
26.6k
        basicblock *block = *--sp;
2790
26.6k
        assert(block->b_startdepth > -1);
2791
2792
        // Reset per-block state.
2793
26.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2794
2795
        // Reset the stack of refs. We don't track references on the stack
2796
        // across basic blocks, but the bytecode will expect their
2797
        // presence. Add dummy references as necessary.
2798
26.6k
        ref_stack_clear(&refs);
2799
50.9k
        for (int i = 0; i < block->b_startdepth; i++) {
2800
24.3k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2801
24.3k
        }
2802
2803
286k
        for (int i = 0; i < block->b_iused; i++) {
2804
259k
            cfg_instr *instr = &block->b_instr[i];
2805
259k
            int opcode = instr->i_opcode;
2806
259k
            int oparg = instr->i_oparg;
2807
259k
            assert(opcode != EXTENDED_ARG);
2808
259k
            switch (opcode) {
2809
                // Opcodes that load and store locals
2810
2
                case DELETE_FAST: {
2811
2
                    kill_local(instr_flags, &refs, oparg);
2812
2
                    break;
2813
0
                }
2814
2815
28.6k
                case LOAD_FAST: {
2816
28.6k
                    PUSH_REF(i, oparg);
2817
28.6k
                    break;
2818
28.6k
                }
2819
2820
28.6k
                case LOAD_FAST_AND_CLEAR: {
2821
217
                    kill_local(instr_flags, &refs, oparg);
2822
217
                    PUSH_REF(i, oparg);
2823
217
                    break;
2824
217
                }
2825
2826
4.44k
                case LOAD_FAST_LOAD_FAST: {
2827
4.44k
                    PUSH_REF(i, oparg >> 4);
2828
4.44k
                    PUSH_REF(i, oparg & 15);
2829
4.44k
                    break;
2830
4.44k
                }
2831
2832
7.96k
                case STORE_FAST: {
2833
7.96k
                    ref r = ref_stack_pop(&refs);
2834
7.96k
                    store_local(instr_flags, &refs, oparg, r);
2835
7.96k
                    break;
2836
4.44k
                }
2837
2838
167
                case STORE_FAST_LOAD_FAST: {
2839
                    // STORE_FAST
2840
167
                    ref r = ref_stack_pop(&refs);
2841
167
                    store_local(instr_flags, &refs, oparg >> 4, r);
2842
                    // LOAD_FAST
2843
167
                    PUSH_REF(i, oparg & 15);
2844
167
                    break;
2845
167
                }
2846
2847
963
                case STORE_FAST_STORE_FAST: {
2848
                    // STORE_FAST
2849
963
                    ref r = ref_stack_pop(&refs);
2850
963
                    store_local(instr_flags, &refs, oparg >> 4, r);
2851
                    // STORE_FAST
2852
963
                    r = ref_stack_pop(&refs);
2853
963
                    store_local(instr_flags, &refs, oparg & 15, r);
2854
963
                    break;
2855
167
                }
2856
2857
                // Opcodes that shuffle values on the stack
2858
947
                case COPY: {
2859
947
                    assert(oparg > 0);
2860
947
                    Py_ssize_t idx = refs.size - oparg;
2861
947
                    ref r = ref_stack_at(&refs, idx);
2862
947
                    PUSH_REF(r.instr, r.local);
2863
947
                    break;
2864
947
                }
2865
2866
947
                case SWAP: {
2867
884
                    assert(oparg >= 2);
2868
884
                    ref_stack_swap_top(&refs, oparg);
2869
884
                    break;
2870
947
                }
2871
2872
                // We treat opcodes that do not consume all of their inputs on
2873
                // a case by case basis, as we have no generic way of knowing
2874
                // how many inputs should be left on the stack.
2875
2876
                // Opcodes that consume no inputs
2877
888
                case FORMAT_SIMPLE:
2878
888
                case GET_ANEXT:
2879
2.04k
                case GET_ITER:
2880
2.04k
                case GET_LEN:
2881
2.06k
                case GET_YIELD_FROM_ITER:
2882
2.55k
                case IMPORT_FROM:
2883
2.55k
                case MATCH_KEYS:
2884
2.55k
                case MATCH_MAPPING:
2885
2.55k
                case MATCH_SEQUENCE:
2886
2.55k
                case WITH_EXCEPT_START: {
2887
2.55k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2888
2.55k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2889
2.55k
                    int net_pushed = num_pushed - num_popped;
2890
2.55k
                    assert(net_pushed >= 0);
2891
4.19k
                    for (int i = 0; i < net_pushed; i++) {
2892
1.63k
                        PUSH_REF(i, NOT_LOCAL);
2893
1.63k
                    }
2894
2.55k
                    break;
2895
2.55k
                }
2896
2897
                // Opcodes that consume some inputs and push no new values
2898
2.55k
                case DICT_MERGE:
2899
710
                case DICT_UPDATE:
2900
1.05k
                case LIST_APPEND:
2901
1.22k
                case LIST_EXTEND:
2902
11.6k
                case MAP_ADD:
2903
11.6k
                case RERAISE:
2904
11.7k
                case SET_ADD:
2905
11.7k
                case SET_UPDATE: {
2906
11.7k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2907
11.7k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2908
11.7k
                    int net_popped = num_popped - num_pushed;
2909
11.7k
                    assert(net_popped > 0);
2910
34.0k
                    for (int i = 0; i < net_popped; i++) {
2911
22.2k
                        ref_stack_pop(&refs);
2912
22.2k
                    }
2913
11.7k
                    break;
2914
11.7k
                }
2915
2916
38
                case END_SEND:
2917
1.47k
                case SET_FUNCTION_ATTRIBUTE: {
2918
1.47k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2919
1.47k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2920
1.47k
                    ref tos = ref_stack_pop(&refs);
2921
1.47k
                    ref_stack_pop(&refs);
2922
1.47k
                    PUSH_REF(tos.instr, tos.local);
2923
1.47k
                    break;
2924
1.47k
                }
2925
2926
                // Opcodes that consume some inputs and push new values
2927
1.47k
                case CHECK_EXC_MATCH: {
2928
0
                    ref_stack_pop(&refs);
2929
0
                    PUSH_REF(i, NOT_LOCAL);
2930
0
                    break;
2931
0
                }
2932
2933
1.20k
                case FOR_ITER: {
2934
1.20k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2935
1.20k
                    PUSH_REF(i, NOT_LOCAL);
2936
1.20k
                    break;
2937
1.20k
                }
2938
2939
15.5k
                case LOAD_ATTR:
2940
15.7k
                case LOAD_SUPER_ATTR: {
2941
15.7k
                    ref self = ref_stack_pop(&refs);
2942
15.7k
                    if (opcode == LOAD_SUPER_ATTR) {
2943
175
                        ref_stack_pop(&refs);
2944
175
                        ref_stack_pop(&refs);
2945
175
                    }
2946
15.7k
                    PUSH_REF(i, NOT_LOCAL);
2947
15.7k
                    if (oparg & 1) {
2948
                        // A method call; conservatively assume that self is pushed
2949
                        // back onto the stack
2950
6.29k
                        PUSH_REF(self.instr, self.local);
2951
6.29k
                    }
2952
15.7k
                    break;
2953
15.7k
                }
2954
2955
15.7k
                case LOAD_SPECIAL:
2956
180
                case PUSH_EXC_INFO: {
2957
180
                    ref tos = ref_stack_pop(&refs);
2958
180
                    PUSH_REF(i, NOT_LOCAL);
2959
180
                    PUSH_REF(tos.instr, tos.local);
2960
180
                    break;
2961
180
                }
2962
2963
180
                case SEND: {
2964
38
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2965
38
                    ref_stack_pop(&refs);
2966
38
                    PUSH_REF(i, NOT_LOCAL);
2967
38
                    break;
2968
38
                }
2969
2970
                // Opcodes that consume all of their inputs
2971
182k
                default: {
2972
182k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2973
182k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2974
182k
                    if (HAS_TARGET(instr->i_opcode)) {
2975
11.7k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2976
11.7k
                    }
2977
182k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2978
                        // Block push opcodes only affect the stack when jumping
2979
                        // to the target.
2980
326k
                        for (int j = 0; j < num_popped; j++) {
2981
144k
                            ref_stack_pop(&refs);
2982
144k
                        }
2983
318k
                        for (int j = 0; j < num_pushed; j++) {
2984
136k
                            PUSH_REF(i, NOT_LOCAL);
2985
136k
                        }
2986
182k
                    }
2987
182k
                    break;
2988
182k
                }
2989
259k
            }
2990
259k
        }
2991
2992
        // Push fallthrough block
2993
26.6k
        cfg_instr *term = basicblock_last_instr(block);
2994
26.6k
        if (term != NULL && block->b_next != NULL &&
2995
26.6k
            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2996
25.4k
              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
2997
14.2k
            assert(BB_HAS_FALLTHROUGH(block));
2998
14.2k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
14.2k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
56.5k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
29.9k
            ref r = ref_stack_at(&refs, i);
3005
29.9k
            if (r.instr != -1) {
3006
13.0k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
13.0k
            }
3008
29.9k
        }
3009
3010
        // Optimize instructions
3011
286k
        for (int i = 0; i < block->b_iused; i++) {
3012
259k
            if (!instr_flags[i]) {
3013
239k
                cfg_instr *instr = &block->b_instr[i];
3014
239k
                switch (instr->i_opcode) {
3015
27.5k
                    case LOAD_FAST:
3016
27.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
27.5k
                        break;
3018
4.35k
                    case LOAD_FAST_LOAD_FAST:
3019
4.35k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
4.35k
                        break;
3021
207k
                    default:
3022
207k
                        break;
3023
239k
                }
3024
239k
            }
3025
259k
        }
3026
26.6k
    }
3027
3028
5.76k
    #undef PUSH_REF
3029
3030
5.76k
    status = SUCCESS;
3031
3032
5.76k
done:
3033
5.76k
    ref_stack_fini(&refs);
3034
5.76k
    PyMem_Free(instr_flags);
3035
5.76k
    PyMem_Free(blocks);
3036
5.76k
    return status;
3037
5.76k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
127k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
127k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
127k
    if (b->b_unsafe_locals_mask != both) {
3048
20.7k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
20.7k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
20.6k
            *(*sp)++ = b;
3053
20.6k
            b->b_visited = 1;
3054
20.6k
        }
3055
20.7k
    }
3056
127k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
55.2k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
55.2k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
381k
    for (int i = 0; i < b->b_iused; i++) {
3064
326k
        cfg_instr *instr = &b->b_instr[i];
3065
326k
        assert(instr->i_opcode != EXTENDED_ARG);
3066
326k
        if (instr->i_except != NULL) {
3067
67.0k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
67.0k
        }
3069
326k
        if (instr->i_oparg >= 64) {
3070
21.9k
            continue;
3071
21.9k
        }
3072
304k
        assert(instr->i_oparg >= 0);
3073
304k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
304k
        switch (instr->i_opcode) {
3075
305
            case DELETE_FAST:
3076
759
            case LOAD_FAST_AND_CLEAR:
3077
1.66k
            case STORE_FAST_MAYBE_NULL:
3078
1.66k
                unsafe_mask |= bit;
3079
1.66k
                break;
3080
20.9k
            case STORE_FAST:
3081
20.9k
                unsafe_mask &= ~bit;
3082
20.9k
                break;
3083
95
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
95
                unsafe_mask &= ~bit;
3086
95
                break;
3087
67.7k
            case LOAD_FAST:
3088
67.7k
                if (unsafe_mask & bit) {
3089
95
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
95
                }
3091
67.7k
                unsafe_mask &= ~bit;
3092
67.7k
                break;
3093
304k
        }
3094
304k
    }
3095
55.2k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
29.3k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
29.3k
    }
3098
55.2k
    cfg_instr *last = basicblock_last_instr(b);
3099
55.2k
    if (last && is_jump(last)) {
3100
26.6k
        assert(last->i_target != NULL);
3101
26.6k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
26.6k
    }
3103
55.2k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
0
{
3108
0
    assert(nlocals > 64);
3109
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
0
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
0
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
0
        blocknum++;
3121
0
        for (int i = 0; i < b->b_iused; i++) {
3122
0
            cfg_instr *instr = &b->b_instr[i];
3123
0
            assert(instr->i_opcode != EXTENDED_ARG);
3124
0
            int arg = instr->i_oparg;
3125
0
            if (arg < 64) {
3126
0
                continue;
3127
0
            }
3128
0
            assert(arg >= 0);
3129
0
            switch (instr->i_opcode) {
3130
0
                case DELETE_FAST:
3131
0
                case LOAD_FAST_AND_CLEAR:
3132
0
                case STORE_FAST_MAYBE_NULL:
3133
0
                    states[arg - 64] = blocknum - 1;
3134
0
                    break;
3135
0
                case STORE_FAST:
3136
0
                    states[arg - 64] = blocknum;
3137
0
                    break;
3138
0
                case LOAD_FAST:
3139
0
                    if (states[arg - 64] != blocknum) {
3140
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
0
                    }
3142
0
                    states[arg - 64] = blocknum;
3143
0
                    break;
3144
0
                    Py_UNREACHABLE();
3145
0
            }
3146
0
        }
3147
0
    }
3148
0
    PyMem_Free(states);
3149
0
    return SUCCESS;
3150
0
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
5.76k
{
3155
5.76k
    assert(PyList_CheckExact(consts));
3156
5.76k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
5.76k
    if (nconsts == 0) {
3158
0
        return SUCCESS;  /* nothing to do */
3159
0
    }
3160
3161
5.76k
    Py_ssize_t *index_map = NULL;
3162
5.76k
    Py_ssize_t *reverse_index_map = NULL;
3163
5.76k
    int err = ERROR;
3164
3165
5.76k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
5.76k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
44.4k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
38.6k
        index_map[i] = -1;
3171
38.6k
    }
3172
    // The first constant may be docstring; keep it always.
3173
5.76k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
318k
        for (int i = 0; i < b->b_iused; i++) {
3178
280k
            int opcode = b->b_instr[i].i_opcode;
3179
280k
            if (OPCODE_HAS_CONST(opcode)) {
3180
38.1k
                int index = b->b_instr[i].i_oparg;
3181
38.1k
                index_map[index] = index;
3182
38.1k
            }
3183
280k
        }
3184
37.6k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
5.76k
    Py_ssize_t n_used_consts = 0;
3188
50.1k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
44.4k
        if (index_map[i] != -1) {
3190
32.7k
            assert(index_map[i] == i);
3191
32.7k
            index_map[n_used_consts++] = index_map[i];
3192
32.7k
        }
3193
44.4k
    }
3194
5.76k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
2.28k
        err = SUCCESS;
3197
2.28k
        goto end;
3198
2.28k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
3.48k
    assert(n_used_consts < nconsts);
3202
29.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
26.2k
        Py_ssize_t old_index = index_map[i];
3204
26.2k
        assert(i <= old_index && old_index < nconsts);
3205
26.2k
        if (i != old_index) {
3206
20.0k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
20.0k
            assert(value != NULL);
3208
20.0k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
20.0k
        }
3210
26.2k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
3.48k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
3.48k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
3.48k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
41.4k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
37.9k
        reverse_index_map[i] = -1;
3223
37.9k
    }
3224
29.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
26.2k
        assert(index_map[i] != -1);
3226
26.2k
        assert(reverse_index_map[index_map[i]] == -1);
3227
26.2k
        reverse_index_map[index_map[i]] = i;
3228
26.2k
    }
3229
3230
30.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
248k
        for (int i = 0; i < b->b_iused; i++) {
3232
220k
            int opcode = b->b_instr[i].i_opcode;
3233
220k
            if (OPCODE_HAS_CONST(opcode)) {
3234
31.4k
                int index = b->b_instr[i].i_oparg;
3235
31.4k
                assert(reverse_index_map[index] >= 0);
3236
31.4k
                assert(reverse_index_map[index] < n_used_consts);
3237
31.4k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
31.4k
            }
3239
220k
        }
3240
27.4k
    }
3241
3242
3.48k
    err = SUCCESS;
3243
5.76k
end:
3244
5.76k
    PyMem_Free(index_map);
3245
5.76k
    PyMem_Free(reverse_index_map);
3246
5.76k
    return err;
3247
3.48k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
5.76k
{
3256
5.76k
    if (nlocals == 0) {
3257
1.51k
        return SUCCESS;
3258
1.51k
    }
3259
4.24k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
0
        nlocals = 64;
3267
0
    }
3268
4.24k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
4.24k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
4.24k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
4.24k
    uint64_t start_mask = 0;
3277
10.6k
    for (int i = nparams; i < nlocals; i++) {
3278
6.36k
        start_mask |= (uint64_t)1 << i;
3279
6.36k
    }
3280
4.24k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
38.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
34.5k
        scan_block_for_locals(b, &sp);
3287
34.5k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
24.8k
    while (sp > stack) {
3291
20.6k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
20.6k
        b->b_visited = 0;
3294
20.6k
        scan_block_for_locals(b, &sp);
3295
20.6k
    }
3296
4.24k
    PyMem_Free(stack);
3297
4.24k
    return SUCCESS;
3298
4.24k
}
3299
3300
3301
static int
3302
5.12k
mark_warm(basicblock *entryblock) {
3303
5.12k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
5.12k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
5.12k
    basicblock **sp = stack;
3308
3309
5.12k
    *sp++ = entryblock;
3310
5.12k
    entryblock->b_visited = 1;
3311
31.4k
    while (sp > stack) {
3312
26.3k
        basicblock *b = *(--sp);
3313
26.3k
        assert(!b->b_except_handler);
3314
26.3k
        b->b_warm = 1;
3315
26.3k
        basicblock *next = b->b_next;
3316
26.3k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
12.4k
            *sp++ = next;
3318
12.4k
            next->b_visited = 1;
3319
12.4k
        }
3320
235k
        for (int i=0; i < b->b_iused; i++) {
3321
209k
            cfg_instr *instr = &b->b_instr[i];
3322
209k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
8.74k
                *sp++ = instr->i_target;
3324
8.74k
                instr->i_target->b_visited = 1;
3325
8.74k
            }
3326
209k
        }
3327
26.3k
    }
3328
5.12k
    PyMem_Free(stack);
3329
5.12k
    return SUCCESS;
3330
5.12k
}
3331
3332
static int
3333
5.12k
mark_cold(basicblock *entryblock) {
3334
42.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
37.0k
        assert(!b->b_cold && !b->b_warm);
3336
37.0k
    }
3337
5.12k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
5.12k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
5.12k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
5.12k
    basicblock **sp = stack;
3347
42.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
37.0k
        if (b->b_except_handler) {
3349
2.22k
            assert(!b->b_warm);
3350
2.22k
            *sp++ = b;
3351
2.22k
            b->b_visited = 1;
3352
2.22k
        }
3353
37.0k
    }
3354
3355
9.47k
    while (sp > stack) {
3356
4.35k
        basicblock *b = *(--sp);
3357
4.35k
        b->b_cold = 1;
3358
4.35k
        basicblock *next = b->b_next;
3359
4.35k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
1.20k
            if (!next->b_warm && !next->b_visited) {
3361
1.10k
                *sp++ = next;
3362
1.10k
                next->b_visited = 1;
3363
1.10k
            }
3364
1.20k
        }
3365
23.3k
        for (int i = 0; i < b->b_iused; i++) {
3366
19.0k
            cfg_instr *instr = &b->b_instr[i];
3367
19.0k
            if (is_jump(instr)) {
3368
1.47k
                assert(i == b->b_iused - 1);
3369
1.47k
                basicblock *target = b->b_instr[i].i_target;
3370
1.47k
                if (!target->b_warm && !target->b_visited) {
3371
1.02k
                    *sp++ = target;
3372
1.02k
                    target->b_visited = 1;
3373
1.02k
                }
3374
1.47k
            }
3375
19.0k
        }
3376
4.35k
    }
3377
5.12k
    PyMem_Free(stack);
3378
5.12k
    return SUCCESS;
3379
5.12k
}
3380
3381
3382
static int
3383
5.76k
push_cold_blocks_to_end(cfg_builder *g) {
3384
5.76k
    basicblock *entryblock = g->g_entryblock;
3385
5.76k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
641
        return SUCCESS;
3388
641
    }
3389
5.12k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
5.12k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
42.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
37.0k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
41
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
41
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
41
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
41
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
41
                             NO_LOCATION);
3406
41
            explicit_jump->b_cold = 1;
3407
41
            explicit_jump->b_next = b->b_next;
3408
41
            explicit_jump->b_predecessors = 1;
3409
41
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
41
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
41
            last->i_target = explicit_jump->b_next;
3414
41
        }
3415
37.0k
    }
3416
3417
5.12k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
5.12k
    basicblock *cold_blocks = NULL;
3419
5.12k
    basicblock *cold_blocks_tail = NULL;
3420
3421
5.12k
    basicblock *b = entryblock;
3422
6.79k
    while(b->b_next) {
3423
6.79k
        assert(!b->b_cold);
3424
34.3k
        while (b->b_next && !b->b_next->b_cold) {
3425
27.5k
            b = b->b_next;
3426
27.5k
        }
3427
6.79k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
5.12k
            break;
3430
5.12k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
1.67k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
1.67k
        basicblock *b_end = b->b_next;
3436
4.39k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
2.72k
            b_end = b_end->b_next;
3438
2.72k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
1.67k
        assert(b_end && b_end->b_cold);
3442
1.67k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
1.67k
        if (cold_blocks == NULL) {
3445
901
            cold_blocks = b->b_next;
3446
901
        }
3447
771
        else {
3448
771
            cold_blocks_tail->b_next = b->b_next;
3449
771
        }
3450
1.67k
        cold_blocks_tail = b_end;
3451
1.67k
        b->b_next = b_end->b_next;
3452
1.67k
        b_end->b_next = NULL;
3453
1.67k
    }
3454
5.12k
    assert(b != NULL && b->b_next == NULL);
3455
5.12k
    b->b_next = cold_blocks;
3456
3457
5.12k
    if (cold_blocks != NULL) {
3458
901
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
901
    }
3460
5.12k
    return SUCCESS;
3461
5.12k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
5.76k
{
3466
5.76k
    basicblock *entryblock = g->g_entryblock;
3467
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
311k
        for (int i = 0; i < b->b_iused; i++) {
3469
274k
            cfg_instr *instr = &b->b_instr[i];
3470
274k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
388
                assert(i == b->b_iused - 1);
3472
388
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
224
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
388
                location loc = instr->i_loc;
3475
388
                cfg_instr copy = {
3476
388
                            .i_opcode = COPY,
3477
388
                            .i_oparg = 1,
3478
388
                            .i_loc = loc,
3479
388
                            .i_target = NULL,
3480
388
                };
3481
388
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3482
388
                cfg_instr to_bool = {
3483
388
                            .i_opcode = TO_BOOL,
3484
388
                            .i_oparg = 0,
3485
388
                            .i_loc = loc,
3486
388
                            .i_target = NULL,
3487
388
                };
3488
388
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3489
388
            }
3490
274k
        }
3491
37.7k
    }
3492
5.76k
    return SUCCESS;
3493
5.76k
}
3494
3495
static int
3496
convert_pseudo_ops(cfg_builder *g)
3497
5.76k
{
3498
5.76k
    basicblock *entryblock = g->g_entryblock;
3499
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3500
314k
        for (int i = 0; i < b->b_iused; i++) {
3501
276k
            cfg_instr *instr = &b->b_instr[i];
3502
276k
            if (is_block_push(instr)) {
3503
2.22k
                INSTR_SET_OP0(instr, NOP);
3504
2.22k
            }
3505
274k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3506
1.31k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3507
1.31k
                instr->i_opcode = LOAD_FAST;
3508
1.31k
            }
3509
273k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3510
454
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3511
454
                instr->i_opcode = STORE_FAST;
3512
454
            }
3513
276k
        }
3514
37.7k
    }
3515
5.76k
    return remove_redundant_nops_and_jumps(g);
3516
5.76k
}
3517
3518
static inline bool
3519
61.8k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3520
61.8k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3521
34.4k
        return basicblock_has_no_lineno(b);
3522
34.4k
    }
3523
27.4k
    else {
3524
27.4k
        return false;
3525
27.4k
    }
3526
61.8k
}
3527
3528
3529
/* PEP 626 mandates that the f_lineno of a frame is correct
3530
 * after a frame terminates. It would be prohibitively expensive
3531
 * to continuously update the f_lineno field at runtime,
3532
 * so we make sure that all exiting instruction (raises and returns)
3533
 * have a valid line number, allowing us to compute f_lineno lazily.
3534
 * We can do this by duplicating the exit blocks without line number
3535
 * so that none have more than one predecessor. We can then safely
3536
 * copy the line number from the sole predecessor block.
3537
 */
3538
static int
3539
duplicate_exits_without_lineno(cfg_builder *g)
3540
11.5k
{
3541
11.5k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3542
3543
    /* Copy all exit blocks without line number that are targets of a jump.
3544
     */
3545
11.5k
    basicblock *entryblock = g->g_entryblock;
3546
86.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3547
75.4k
        cfg_instr *last = basicblock_last_instr(b);
3548
75.4k
        if (last == NULL) {
3549
12.3k
            continue;
3550
12.3k
        }
3551
63.1k
        if (is_jump(last)) {
3552
30.1k
            basicblock *target = next_nonempty_block(last->i_target);
3553
30.1k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3554
510
                basicblock *new_target = copy_basicblock(g, target);
3555
510
                if (new_target == NULL) {
3556
0
                    return ERROR;
3557
0
                }
3558
510
                new_target->b_instr[0].i_loc = last->i_loc;
3559
510
                last->i_target = new_target;
3560
510
                target->b_predecessors--;
3561
510
                new_target->b_predecessors = 1;
3562
510
                new_target->b_next = target->b_next;
3563
510
                new_target->b_label.id = next_lbl++;
3564
510
                target->b_next = new_target;
3565
510
            }
3566
30.1k
        }
3567
63.1k
    }
3568
3569
    /* Any remaining reachable exit blocks without line number can only be reached by
3570
     * fall through, and thus can only have a single predecessor */
3571
86.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3572
75.4k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3573
31.7k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3574
678
                cfg_instr *last = basicblock_last_instr(b);
3575
678
                assert(last != NULL);
3576
678
                b->b_next->b_instr[0].i_loc = last->i_loc;
3577
678
            }
3578
31.7k
        }
3579
75.4k
    }
3580
11.5k
    return SUCCESS;
3581
11.5k
}
3582
3583
3584
/* If an instruction has no line number, but it's predecessor in the BB does,
3585
 * then copy the line number. If a successor block has no line number, and only
3586
 * one predecessor, then inherit the line number.
3587
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3588
 * Also reduces the size of the line number table,
3589
 * but has no impact on the generated line number events.
3590
 */
3591
static void
3592
11.5k
propagate_line_numbers(basicblock *entryblock) {
3593
86.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3594
75.4k
        cfg_instr *last = basicblock_last_instr(b);
3595
75.4k
        if (last == NULL) {
3596
12.3k
            continue;
3597
12.3k
        }
3598
3599
63.1k
        location prev_location = NO_LOCATION;
3600
641k
        for (int i = 0; i < b->b_iused; i++) {
3601
578k
            if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3602
32.9k
                b->b_instr[i].i_loc = prev_location;
3603
32.9k
            }
3604
545k
            else {
3605
545k
                prev_location = b->b_instr[i].i_loc;
3606
545k
            }
3607
578k
        }
3608
63.1k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3609
22.7k
            if (b->b_next->b_iused > 0) {
3610
22.7k
                if (b->b_next->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3611
629
                    b->b_next->b_instr[0].i_loc = prev_location;
3612
629
                }
3613
22.7k
            }
3614
22.7k
        }
3615
63.1k
        if (is_jump(last)) {
3616
30.1k
            basicblock *target = last->i_target;
3617
30.1k
            if (target->b_predecessors == 1) {
3618
13.8k
                if (target->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3619
2.40k
                    target->b_instr[0].i_loc = prev_location;
3620
2.40k
                }
3621
13.8k
            }
3622
30.1k
        }
3623
63.1k
    }
3624
11.5k
}
3625
3626
static int
3627
resolve_line_numbers(cfg_builder *g, int firstlineno)
3628
11.5k
{
3629
11.5k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3630
11.5k
    propagate_line_numbers(g->g_entryblock);
3631
11.5k
    return SUCCESS;
3632
11.5k
}
3633
3634
int
3635
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3636
                        int nlocals, int nparams, int firstlineno)
3637
5.76k
{
3638
5.76k
    assert(cfg_builder_check(g));
3639
    /** Preprocessing **/
3640
    /* Map labels to targets and mark exception handlers */
3641
5.76k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3642
5.76k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3643
5.76k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3644
3645
    /** Optimization **/
3646
5.76k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3647
5.76k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3648
5.76k
    RETURN_IF_ERROR(
3649
5.76k
        add_checks_for_loads_of_uninitialized_variables(
3650
5.76k
            g->g_entryblock, nlocals, nparams));
3651
5.76k
    RETURN_IF_ERROR(insert_superinstructions(g));
3652
3653
5.76k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3654
5.76k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3655
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3656
    // assert(all_exits_have_lineno(g->g_entryblock));
3657
5.76k
    return SUCCESS;
3658
5.76k
}
3659
3660
static int *
3661
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3662
5.76k
{
3663
5.76k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3664
5.76k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3665
5.76k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3666
3667
5.76k
    int noffsets = ncellvars + nfreevars;
3668
5.76k
    int *fixed = PyMem_New(int, noffsets);
3669
5.76k
    if (fixed == NULL) {
3670
0
        PyErr_NoMemory();
3671
0
        return NULL;
3672
0
    }
3673
7.38k
    for (int i = 0; i < noffsets; i++) {
3674
1.62k
        fixed[i] = nlocals + i;
3675
1.62k
    }
3676
3677
5.76k
    PyObject *varname, *cellindex;
3678
5.76k
    Py_ssize_t pos = 0;
3679
6.74k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3680
979
        PyObject *varindex;
3681
979
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3682
0
            goto error;
3683
0
        }
3684
979
        if (varindex == NULL) {
3685
792
            continue;
3686
792
        }
3687
3688
187
        int argoffset = PyLong_AsInt(varindex);
3689
187
        Py_DECREF(varindex);
3690
187
        if (argoffset == -1 && PyErr_Occurred()) {
3691
0
            goto error;
3692
0
        }
3693
3694
187
        int oldindex = PyLong_AsInt(cellindex);
3695
187
        if (oldindex == -1 && PyErr_Occurred()) {
3696
0
            goto error;
3697
0
        }
3698
187
        fixed[oldindex] = argoffset;
3699
187
    }
3700
5.76k
    return fixed;
3701
3702
0
error:
3703
0
    PyMem_Free(fixed);
3704
0
    return NULL;
3705
5.76k
}
3706
3707
#define IS_GENERATOR(CF) \
3708
5.76k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3709
3710
static int
3711
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3712
                           int *fixed, int nfreevars, int code_flags)
3713
5.76k
{
3714
5.76k
    assert(umd->u_firstlineno > 0);
3715
3716
    /* Add the generator prefix instructions. */
3717
5.76k
    if (IS_GENERATOR(code_flags)) {
3718
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3719
         * of 0. This is because RETURN_GENERATOR pushes an element
3720
         * with _PyFrame_StackPush before switching stacks.
3721
         */
3722
3723
295
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3724
295
        cfg_instr make_gen = {
3725
295
            .i_opcode = RETURN_GENERATOR,
3726
295
            .i_oparg = 0,
3727
295
            .i_loc = loc,
3728
295
            .i_target = NULL,
3729
295
        };
3730
295
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3731
295
        cfg_instr pop_top = {
3732
295
            .i_opcode = POP_TOP,
3733
295
            .i_oparg = 0,
3734
295
            .i_loc = loc,
3735
295
            .i_target = NULL,
3736
295
        };
3737
295
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3738
295
    }
3739
3740
    /* Set up cells for any variable that escapes, to be put in a closure. */
3741
5.76k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3742
5.76k
    if (ncellvars) {
3743
        // umd->u_cellvars has the cells out of order so we sort them
3744
        // before adding the MAKE_CELL instructions.  Note that we
3745
        // adjust for arg cells, which come first.
3746
739
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3747
739
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3748
739
        if (sorted == NULL) {
3749
0
            PyErr_NoMemory();
3750
0
            return ERROR;
3751
0
        }
3752
1.71k
        for (int i = 0; i < ncellvars; i++) {
3753
979
            sorted[fixed[i]] = i + 1;
3754
979
        }
3755
2.07k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3756
1.33k
            int oldindex = sorted[i] - 1;
3757
1.33k
            if (oldindex == -1) {
3758
355
                continue;
3759
355
            }
3760
979
            cfg_instr make_cell = {
3761
979
                .i_opcode = MAKE_CELL,
3762
                // This will get fixed in offset_derefs().
3763
979
                .i_oparg = oldindex,
3764
979
                .i_loc = NO_LOCATION,
3765
979
                .i_target = NULL,
3766
979
            };
3767
979
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3768
0
                PyMem_RawFree(sorted);
3769
0
                return ERROR;
3770
0
            }
3771
979
            ncellsused += 1;
3772
979
        }
3773
739
        PyMem_RawFree(sorted);
3774
739
    }
3775
3776
5.76k
    if (nfreevars) {
3777
395
        cfg_instr copy_frees = {
3778
395
            .i_opcode = COPY_FREE_VARS,
3779
395
            .i_oparg = nfreevars,
3780
395
            .i_loc = NO_LOCATION,
3781
395
            .i_target = NULL,
3782
395
        };
3783
395
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3784
395
    }
3785
3786
5.76k
    return SUCCESS;
3787
5.76k
}
3788
3789
static int
3790
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3791
5.76k
{
3792
5.76k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3793
5.76k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3794
5.76k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3795
5.76k
    int noffsets = ncellvars + nfreevars;
3796
3797
    // First deal with duplicates (arg cells).
3798
5.76k
    int numdropped = 0;
3799
7.38k
    for (int i = 0; i < noffsets ; i++) {
3800
1.62k
        if (fixedmap[i] == i + nlocals) {
3801
1.43k
            fixedmap[i] -= numdropped;
3802
1.43k
        }
3803
187
        else {
3804
            // It was a duplicate (cell/arg).
3805
187
            numdropped += 1;
3806
187
        }
3807
1.62k
    }
3808
3809
    // Then update offsets, either relative to locals or by cell2arg.
3810
43.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3811
314k
        for (int i = 0; i < b->b_iused; i++) {
3812
276k
            cfg_instr *inst = &b->b_instr[i];
3813
            // This is called before extended args are generated.
3814
276k
            assert(inst->i_opcode != EXTENDED_ARG);
3815
276k
            int oldoffset = inst->i_oparg;
3816
276k
            switch(inst->i_opcode) {
3817
979
                case MAKE_CELL:
3818
2.29k
                case LOAD_CLOSURE:
3819
3.67k
                case LOAD_DEREF:
3820
4.49k
                case STORE_DEREF:
3821
4.49k
                case DELETE_DEREF:
3822
4.49k
                case LOAD_FROM_DICT_OR_DEREF:
3823
4.49k
                    assert(oldoffset >= 0);
3824
4.49k
                    assert(oldoffset < noffsets);
3825
4.49k
                    assert(fixedmap[oldoffset] >= 0);
3826
4.49k
                    inst->i_oparg = fixedmap[oldoffset];
3827
276k
            }
3828
276k
        }
3829
37.7k
    }
3830
3831
5.76k
    return numdropped;
3832
5.76k
}
3833
3834
static int
3835
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3836
5.76k
{
3837
5.76k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3838
5.76k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3839
5.76k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3840
5.76k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3841
5.76k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3842
5.76k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3843
5.76k
    assert(INT_MAX - nlocals - ncellvars > 0);
3844
5.76k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3845
5.76k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3846
5.76k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3847
5.76k
    if (cellfixedoffsets == NULL) {
3848
0
        return ERROR;
3849
0
    }
3850
3851
    // This must be called before fix_cell_offsets().
3852
5.76k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3853
0
        PyMem_Free(cellfixedoffsets);
3854
0
        return ERROR;
3855
0
    }
3856
3857
5.76k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3858
5.76k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3859
5.76k
    cellfixedoffsets = NULL;
3860
5.76k
    if (numdropped < 0) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
5.76k
    nlocalsplus -= numdropped;
3865
5.76k
    return nlocalsplus;
3866
5.76k
}
3867
3868
cfg_builder *
3869
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3870
5.76k
{
3871
5.76k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3872
0
        return NULL;
3873
0
    }
3874
5.76k
    cfg_builder *g = _PyCfgBuilder_New();
3875
5.76k
    if (g == NULL) {
3876
0
        return NULL;
3877
0
    }
3878
319k
    for (int i = 0; i < seq->s_used; i++) {
3879
313k
        seq->s_instrs[i].i_target = 0;
3880
313k
    }
3881
319k
    for (int i = 0; i < seq->s_used; i++) {
3882
313k
        _PyInstruction *instr = &seq->s_instrs[i];
3883
313k
        if (HAS_TARGET(instr->i_opcode)) {
3884
19.2k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3885
19.2k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3886
19.2k
        }
3887
313k
    }
3888
5.76k
    int offset = 0;
3889
319k
    for (int i = 0; i < seq->s_used; i++) {
3890
313k
        _PyInstruction *instr = &seq->s_instrs[i];
3891
313k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3892
309
            if (seq->s_annotations_code != NULL) {
3893
0
                assert(seq->s_annotations_code->s_labelmap_size == 0
3894
0
                    && seq->s_annotations_code->s_nested == NULL);
3895
0
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3896
0
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3897
0
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3898
0
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3899
0
                        goto error;
3900
0
                    }
3901
0
                }
3902
0
                offset += seq->s_annotations_code->s_used - 1;
3903
0
            }
3904
309
            else {
3905
309
                offset -= 1;
3906
309
            }
3907
309
            continue;
3908
309
        }
3909
313k
        if (instr->i_target) {
3910
15.6k
            jump_target_label lbl_ = {i + offset};
3911
15.6k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3912
0
                goto error;
3913
0
            }
3914
15.6k
        }
3915
313k
        int opcode = instr->i_opcode;
3916
313k
        int oparg = instr->i_oparg;
3917
313k
        if (HAS_TARGET(opcode)) {
3918
19.2k
            oparg += offset;
3919
19.2k
        }
3920
313k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3921
0
            goto error;
3922
0
        }
3923
313k
    }
3924
5.76k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3925
0
        goto error;
3926
0
    }
3927
5.76k
    return g;
3928
0
error:
3929
0
    _PyCfgBuilder_Free(g);
3930
0
    return NULL;
3931
5.76k
}
3932
3933
int
3934
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3935
5.76k
{
3936
5.76k
    int lbl = 0;
3937
44.0k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3938
38.3k
        b->b_label = (jump_target_label){lbl};
3939
38.3k
        lbl += 1;
3940
38.3k
    }
3941
44.0k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3942
38.3k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3943
323k
        for (int i = 0; i < b->b_iused; i++) {
3944
285k
            cfg_instr *instr = &b->b_instr[i];
3945
285k
            if (HAS_TARGET(instr->i_opcode)) {
3946
                /* Set oparg to the label id (it will later be mapped to an offset) */
3947
15.1k
                instr->i_oparg = instr->i_target->b_label.id;
3948
15.1k
            }
3949
285k
            RETURN_IF_ERROR(
3950
285k
                _PyInstructionSequence_Addop(
3951
285k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3952
3953
285k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3954
285k
            if (instr->i_except != NULL) {
3955
37.3k
                hi->h_label = instr->i_except->b_label.id;
3956
37.3k
                hi->h_startdepth = instr->i_except->b_startdepth;
3957
37.3k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3958
37.3k
            }
3959
247k
            else {
3960
247k
                hi->h_label = -1;
3961
247k
            }
3962
285k
        }
3963
38.3k
    }
3964
5.76k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3965
0
        return ERROR;
3966
0
    }
3967
5.76k
    return SUCCESS;
3968
5.76k
}
3969
3970
3971
int
3972
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3973
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3974
                                     int *stackdepth, int *nlocalsplus,
3975
                                     _PyInstructionSequence *seq)
3976
5.76k
{
3977
5.76k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3978
3979
5.76k
    *stackdepth = calculate_stackdepth(g);
3980
5.76k
    if (*stackdepth < 0) {
3981
0
        return ERROR;
3982
0
    }
3983
3984
    /* prepare_localsplus adds instructions for generators that push
3985
     * and pop an item on the stack. This assertion makes sure there
3986
     * is space on the stack for that.
3987
     * It should always be true, because a generator must have at
3988
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
3989
     * which requires stackspace.
3990
     */
3991
5.76k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
3992
3993
5.76k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
3994
5.76k
    if (*nlocalsplus < 0) {
3995
0
        return ERROR;
3996
0
    }
3997
3998
5.76k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3999
4000
    /* Order of basic blocks must have been determined by now */
4001
4002
5.76k
    RETURN_IF_ERROR(normalize_jumps(g));
4003
5.76k
    assert(no_redundant_jumps(g));
4004
4005
    /* Can't modify the bytecode after inserting instructions that produce
4006
     * borrowed references.
4007
     */
4008
5.76k
    RETURN_IF_ERROR(optimize_load_fast(g));
4009
4010
    /* Can't modify the bytecode after computing jump offsets. */
4011
5.76k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4012
0
        return ERROR;
4013
0
    }
4014
4015
5.76k
    return SUCCESS;
4016
5.76k
}
4017
4018
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4019
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4020
 */
4021
int
4022
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4023
0
{
4024
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4025
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4026
0
    return SUCCESS;
4027
0
}
4028
4029
/* Exported API functions */
4030
4031
int
4032
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4033
0
{
4034
0
    stack_effects effs;
4035
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4036
0
        return PY_INVALID_STACK_EFFECT;
4037
0
    }
4038
0
    return effs.net;
4039
0
}
4040
4041
int
4042
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4043
98
{
4044
98
    stack_effects effs;
4045
98
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4046
0
        return PY_INVALID_STACK_EFFECT;
4047
0
    }
4048
98
    return effs.net;
4049
98
}
4050
4051
/* Access to compiler optimizations for unit tests.
4052
4053
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4054
 * a CFG, optimizes it and converts back to an instruction list.
4055
 */
4056
4057
static PyObject *
4058
cfg_to_instruction_sequence(cfg_builder *g)
4059
0
{
4060
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4061
0
    if (seq == NULL) {
4062
0
        return NULL;
4063
0
    }
4064
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4065
0
        PyInstructionSequence_Fini(seq);
4066
0
        return NULL;
4067
0
    }
4068
0
    return (PyObject*)seq;
4069
0
}
4070
4071
PyObject *
4072
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4073
0
{
4074
0
    if (!_PyInstructionSequence_Check(seq)) {
4075
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4076
0
        return NULL;
4077
0
    }
4078
0
    PyObject *const_cache = PyDict_New();
4079
0
    if (const_cache == NULL) {
4080
0
        return NULL;
4081
0
    }
4082
4083
0
    PyObject *res = NULL;
4084
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4085
0
    if (g == NULL) {
4086
0
        goto error;
4087
0
    }
4088
0
    int nparams = 0, firstlineno = 1;
4089
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4090
0
                                nparams, firstlineno) < 0) {
4091
0
        goto error;
4092
0
    }
4093
4094
0
    if (calculate_stackdepth(g) == ERROR) {
4095
0
        goto error;
4096
0
    }
4097
4098
0
    if (optimize_load_fast(g) != SUCCESS) {
4099
0
        goto error;
4100
0
    }
4101
4102
0
    res = cfg_to_instruction_sequence(g);
4103
0
error:
4104
0
    Py_DECREF(const_cache);
4105
0
    _PyCfgBuilder_Free(g);
4106
0
    return res;
4107
0
}