Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Python/flowgraph.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
1.15M
#define SUCCESS 0
19
6.55k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
1.80M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
386k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
395k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
395k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
342
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
1.68M
{
101
1.68M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
1.68M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
1.68M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
1.54M
{
108
1.54M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
1.54M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
40.1k
    do { \
114
40.1k
        assert(OPCODE_HAS_ARG(OP)); \
115
40.1k
        cfg_instr *_instr__ptr_ = (I); \
116
40.1k
        _instr__ptr_->i_opcode = (OP); \
117
40.1k
        _instr__ptr_->i_oparg = (ARG); \
118
40.1k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
81.1k
    do { \
123
81.1k
        assert(!OPCODE_HAS_ARG(OP)); \
124
81.1k
        cfg_instr *_instr__ptr_ = (I); \
125
81.1k
        _instr__ptr_->i_opcode = (OP); \
126
81.1k
        _instr__ptr_->i_oparg = 0; \
127
81.1k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
4.89k
    do { \
131
4.89k
        cfg_instr *_instr__ptr_ = (I); \
132
4.89k
        _instr__ptr_->i_loc = (LOC); \
133
4.89k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
386k
{
144
386k
    assert(b != NULL);
145
386k
    _Py_c_array_t array = {
146
386k
        .array = (void*)b->b_instr,
147
386k
        .allocated_entries = b->b_ialloc,
148
386k
        .item_size = sizeof(cfg_instr),
149
386k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
386k
    };
151
152
386k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
386k
    b->b_instr = array.array;
154
386k
    b->b_ialloc = array.allocated_entries;
155
386k
    return b->b_iused++;
156
386k
}
157
158
static cfg_instr *
159
1.43M
basicblock_last_instr(const basicblock *b) {
160
1.43M
    assert(b->b_iused >= 0);
161
1.43M
    if (b->b_iused > 0) {
162
1.32M
        assert(b->b_instr != NULL);
163
1.32M
        return &b->b_instr[b->b_iused - 1];
164
1.32M
    }
165
111k
    return NULL;
166
1.43M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
45.0k
{
175
45.0k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
45.0k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
45.0k
    b->b_list = g->g_block_list;
182
45.0k
    g->g_block_list = b;
183
45.0k
    b->b_label = NO_LABEL;
184
45.0k
    return b;
185
45.0k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
377k
{
190
377k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
377k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
377k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
377k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
377k
    int off = basicblock_next_instr(b);
196
377k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
377k
    cfg_instr *i = &b->b_instr[off];
200
377k
    i->i_opcode = opcode;
201
377k
    i->i_oparg = oparg;
202
377k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
377k
    assert(i->i_target == NULL);
205
377k
    assert(i->i_except == NULL);
206
207
377k
    return SUCCESS;
208
377k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
1.40k
{
213
1.40k
    cfg_instr *last = basicblock_last_instr(b);
214
1.40k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
1.40k
    RETURN_IF_ERROR(
219
1.40k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
1.40k
    last = basicblock_last_instr(b);
221
1.40k
    assert(last && last->i_opcode == opcode);
222
1.40k
    last->i_target = target;
223
1.40k
    return SUCCESS;
224
1.40k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
3.06k
{
229
8.07k
    for (int i = 0; i < from->b_iused; i++) {
230
5.01k
        int n = basicblock_next_instr(to);
231
5.01k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
5.01k
        to->b_instr[n] = from->b_instr[i];
235
5.01k
    }
236
3.06k
    return SUCCESS;
237
3.06k
}
238
239
static inline int
240
414k
basicblock_nofallthrough(const basicblock *b) {
241
414k
    cfg_instr *last = basicblock_last_instr(b);
242
414k
    return (last &&
243
414k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
391k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
414k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
689k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
595
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
595
    assert(BB_NO_FALLTHROUGH(block));
257
595
    basicblock *result = cfg_builder_new_block(g);
258
595
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
595
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
595
    return result;
265
595
}
266
267
static int
268
3.50k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
3.50k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
64.1k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
60.6k
        block->b_instr[i] = block->b_instr[i-1];
272
60.6k
    }
273
3.50k
    block->b_instr[pos] = *instr;
274
3.50k
    return SUCCESS;
275
3.50k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
37.1k
{
343
37.1k
    assert(block != NULL);
344
37.1k
    g->g_curblock->b_next = block;
345
37.1k
    g->g_curblock = block;
346
37.1k
    return block;
347
37.1k
}
348
349
static inline int
350
85.9k
basicblock_exits_scope(const basicblock *b) {
351
85.9k
    cfg_instr *last = basicblock_last_instr(b);
352
85.9k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
85.9k
}
354
355
static inline int
356
54.9k
basicblock_has_eval_break(const basicblock *b) {
357
270k
    for (int i = 0; i < b->b_iused; i++) {
358
237k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
22.1k
            return true;
360
22.1k
        }
361
237k
    }
362
32.7k
    return false;
363
54.9k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
383k
{
368
383k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
383k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
31.5k
        return true;
371
31.5k
    }
372
351k
    if (IS_LABEL(g->g_current_label)) {
373
5.56k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
5.56k
            return true;
375
5.56k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
5.56k
    }
382
346k
    return false;
383
351k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
383k
{
388
383k
    if (cfg_builder_current_block_is_terminated(g)) {
389
37.1k
        basicblock *b = cfg_builder_new_block(g);
390
37.1k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
37.1k
        b->b_label = g->g_current_label;
394
37.1k
        g->g_current_label = NO_LABEL;
395
37.1k
        cfg_builder_use_next_block(g, b);
396
37.1k
    }
397
383k
    return SUCCESS;
398
383k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
6.55k
{
424
6.55k
    g->g_block_list = NULL;
425
6.55k
    basicblock *block = cfg_builder_new_block(g);
426
6.55k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
6.55k
    g->g_curblock = g->g_entryblock = block;
430
6.55k
    g->g_current_label = NO_LABEL;
431
6.55k
    return SUCCESS;
432
6.55k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
6.55k
{
437
6.55k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
6.55k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
6.55k
    memset(g, 0, sizeof(cfg_builder));
443
6.55k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
6.55k
    return g;
448
6.55k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
6.55k
{
453
6.55k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
6.55k
    assert(cfg_builder_check(g));
457
6.55k
    basicblock *b = g->g_block_list;
458
51.5k
    while (b != NULL) {
459
45.0k
        if (b->b_instr) {
460
45.0k
            PyMem_Free((void *)b->b_instr);
461
45.0k
        }
462
45.0k
        basicblock *next = b->b_list;
463
45.0k
        PyMem_Free((void *)b);
464
45.0k
        b = next;
465
45.0k
    }
466
6.55k
    PyMem_Free(g);
467
6.55k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
6.55k
{
472
6.55k
    int nblocks = 0;
473
50.2k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
43.6k
        nblocks++;
475
43.6k
    }
476
6.55k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
6.55k
    return SUCCESS;
481
6.55k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
18.4k
{
486
18.4k
    g->g_current_label = lbl;
487
18.4k
    return cfg_builder_maybe_start_new_block(g);
488
18.4k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
364k
{
493
364k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
364k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
364k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
74.6k
{
501
77.7k
    while (b && b->b_iused == 0) {
502
3.10k
        b = b->b_next;
503
3.10k
    }
504
74.6k
    return b;
505
74.6k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
45.0k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
45.0k
    cfg_instr *last = basicblock_last_instr(b);
547
45.0k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
33.6k
        return SUCCESS;
549
33.6k
    }
550
11.3k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
11.3k
    bool is_forward = last->i_target->b_visited == 0;
553
11.3k
    if (is_forward) {
554
10.6k
        RETURN_IF_ERROR(
555
10.6k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
10.6k
        return SUCCESS;
557
10.6k
    }
558
559
721
    int reversed_opcode = 0;
560
721
    switch(last->i_opcode) {
561
29
        case POP_JUMP_IF_NOT_NONE:
562
29
            reversed_opcode = POP_JUMP_IF_NONE;
563
29
            break;
564
36
        case POP_JUMP_IF_NONE:
565
36
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
36
            break;
567
595
        case POP_JUMP_IF_FALSE:
568
595
            reversed_opcode = POP_JUMP_IF_TRUE;
569
595
            break;
570
61
        case POP_JUMP_IF_TRUE:
571
61
            reversed_opcode = POP_JUMP_IF_FALSE;
572
61
            break;
573
721
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
721
    basicblock *target = last->i_target;
579
721
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
721
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
721
    RETURN_IF_ERROR(
584
721
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
721
    RETURN_IF_ERROR(
586
721
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
721
    backwards_jump->b_startdepth = target->b_startdepth;
588
721
    last->i_opcode = reversed_opcode;
589
721
    last->i_target = b->b_next;
590
591
721
    backwards_jump->b_cold = b->b_cold;
592
721
    backwards_jump->b_next = b->b_next;
593
721
    b->b_next = backwards_jump;
594
721
    return SUCCESS;
595
721
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
6.55k
{
601
6.55k
    basicblock *entryblock = g->g_entryblock;
602
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
44.3k
        b->b_visited = 0;
604
44.3k
    }
605
51.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
45.0k
        b->b_visited = 1;
607
45.0k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
45.0k
    }
609
6.55k
    return SUCCESS;
610
6.55k
}
611
612
static int
613
6.55k
check_cfg(cfg_builder *g) {
614
50.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
408k
        for (int i = 0; i < b->b_iused; i++) {
617
364k
            int opcode = b->b_instr[i].i_opcode;
618
364k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
364k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
38.1k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
38.1k
            }
625
364k
        }
626
43.6k
    }
627
6.55k
    return SUCCESS;
628
6.55k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
25.4k
{
633
25.4k
    int lbl = -1;
634
200k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
175k
        if (b->b_label.id > lbl) {
636
67.3k
            lbl = b->b_label.id;
637
67.3k
        }
638
175k
    }
639
25.4k
    return lbl;
640
25.4k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
6.55k
{
646
6.55k
    int max_label = get_max_label(entryblock);
647
6.55k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
6.55k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
6.55k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
6.55k
    memset(label2block, 0, mapsize);
654
50.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
43.6k
        if (b->b_label.id >= 0) {
656
18.4k
            label2block[b->b_label.id] = b;
657
18.4k
        }
658
43.6k
    }
659
50.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
408k
        for (int i = 0; i < b->b_iused; i++) {
661
364k
            cfg_instr *instr = &b->b_instr[i];
662
364k
            assert(instr->i_target == NULL);
663
364k
            if (HAS_TARGET(instr->i_opcode)) {
664
22.8k
                int lbl = instr->i_oparg;
665
22.8k
                assert(lbl >= 0 && lbl <= max_label);
666
22.8k
                instr->i_target = label2block[lbl];
667
22.8k
                assert(instr->i_target != NULL);
668
22.8k
                assert(instr->i_target->b_label.id == lbl);
669
22.8k
            }
670
364k
        }
671
43.6k
    }
672
6.55k
    PyMem_Free(label2block);
673
6.55k
    return SUCCESS;
674
6.55k
}
675
676
static int
677
6.55k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
50.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
408k
        for (int i=0; i < b->b_iused; i++) {
685
364k
            cfg_instr *instr = &b->b_instr[i];
686
364k
            if (is_block_push(instr)) {
687
2.57k
                instr->i_target->b_except_handler = 1;
688
2.57k
            }
689
364k
        }
690
43.6k
    }
691
6.55k
    return SUCCESS;
692
6.55k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
2.56k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
2.56k
    assert(is_block_push(setup));
704
2.56k
    int opcode = setup->i_opcode;
705
2.56k
    basicblock * target = setup->i_target;
706
2.56k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
1.47k
        target->b_preserve_lasti = 1;
708
1.47k
    }
709
2.56k
    assert(stack->depth <= CO_MAXBLOCKS);
710
2.56k
    stack->handlers[++stack->depth] = target;
711
2.56k
    return target;
712
2.56k
}
713
714
static basicblock *
715
2.06k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
2.06k
    assert(stack->depth > 0);
717
2.06k
    return stack->handlers[--stack->depth];
718
2.06k
}
719
720
static basicblock *
721
37.3k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
37.3k
    return stack->handlers[stack->depth];
723
37.3k
}
724
725
static struct _PyCfgExceptStack *
726
6.55k
make_except_stack(void) {
727
6.55k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
6.55k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
6.55k
    new->depth = 0;
733
6.55k
    new->handlers[0] = NULL;
734
6.55k
    return new;
735
6.55k
}
736
737
static struct _PyCfgExceptStack *
738
13.9k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
13.9k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
13.9k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
13.9k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
13.9k
    return copy;
746
13.9k
}
747
748
static basicblock**
749
49.3k
make_cfg_traversal_stack(basicblock *entryblock) {
750
49.3k
    int nblocks = 0;
751
398k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
348k
        b->b_visited = 0;
753
348k
        nblocks++;
754
348k
    }
755
49.3k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
49.3k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
49.3k
    return stack;
760
49.3k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
339k
{
779
339k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
339k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
339k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
339k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
339k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
339k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
2.56k
        effects->net = 0;
793
2.56k
        return 0;
794
2.56k
    }
795
337k
    effects->net = pushed - popped;
796
337k
    return 0;
797
339k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
46.0k
{
802
46.0k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
46.0k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
37.2k
        assert(b->b_startdepth < 0);
808
37.2k
        b->b_startdepth = depth;
809
37.2k
        *(*sp)++ = b;
810
37.2k
    }
811
46.0k
    return SUCCESS;
812
46.0k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
6.55k
{
820
6.55k
    basicblock *entryblock = g->g_entryblock;
821
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
44.3k
        b->b_startdepth = INT_MIN;
823
44.3k
    }
824
6.55k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
6.55k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
6.55k
    int stackdepth = -1;
831
6.55k
    int maxdepth = 0;
832
6.55k
    basicblock **sp = stack;
833
6.55k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
43.8k
    while (sp != stack) {
837
37.2k
        basicblock *b = *--sp;
838
37.2k
        int depth = b->b_startdepth;
839
37.2k
        assert(depth >= 0);
840
37.2k
        basicblock *next = b->b_next;
841
339k
        for (int i = 0; i < b->b_iused; i++) {
842
319k
            cfg_instr *instr = &b->b_instr[i];
843
319k
            stack_effects effects;
844
319k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
319k
            int new_depth = depth + effects.net;
851
319k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
319k
            maxdepth = Py_MAX(maxdepth, depth);
857
319k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
19.9k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
19.9k
                int target_depth = depth + effects.net;
865
19.9k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
19.9k
                maxdepth = Py_MAX(maxdepth, depth);
867
19.9k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
19.9k
            }
871
319k
            depth = new_depth;
872
319k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
319k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
319k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
17.7k
            {
876
                /* remaining code is dead */
877
17.7k
                next = NULL;
878
17.7k
                break;
879
17.7k
            }
880
319k
        }
881
37.2k
        if (next != NULL) {
882
19.5k
            assert(BB_HAS_FALLTHROUGH(b));
883
19.5k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
19.5k
        }
887
37.2k
    }
888
6.55k
    stackdepth = maxdepth;
889
6.55k
error:
890
6.55k
    PyMem_Free(stack);
891
6.55k
    return stackdepth;
892
6.55k
}
893
894
static int
895
6.55k
label_exception_targets(basicblock *entryblock) {
896
6.55k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
6.55k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
6.55k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
6.55k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
6.55k
    except_stack->depth = 0;
907
6.55k
    todo_stack[0] = entryblock;
908
6.55k
    entryblock->b_visited = 1;
909
6.55k
    entryblock->b_exceptstack = except_stack;
910
6.55k
    basicblock **todo = &todo_stack[1];
911
6.55k
    basicblock *handler = NULL;
912
43.9k
    while (todo > todo_stack) {
913
37.3k
        todo--;
914
37.3k
        basicblock *b = todo[0];
915
37.3k
        assert(b->b_visited == 1);
916
37.3k
        except_stack = b->b_exceptstack;
917
37.3k
        assert(except_stack != NULL);
918
37.3k
        b->b_exceptstack = NULL;
919
37.3k
        handler = except_stack_top(except_stack);
920
37.3k
        int last_yield_except_depth = -1;
921
389k
        for (int i = 0; i < b->b_iused; i++) {
922
351k
            cfg_instr *instr = &b->b_instr[i];
923
351k
            if (is_block_push(instr)) {
924
2.56k
                if (!instr->i_target->b_visited) {
925
2.56k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
2.56k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
2.56k
                    instr->i_target->b_exceptstack = copy;
930
2.56k
                    todo[0] = instr->i_target;
931
2.56k
                    instr->i_target->b_visited = 1;
932
2.56k
                    todo++;
933
2.56k
                }
934
2.56k
                handler = push_except_block(except_stack, instr);
935
2.56k
            }
936
349k
            else if (instr->i_opcode == POP_BLOCK) {
937
2.06k
                handler = pop_except_block(except_stack);
938
2.06k
                INSTR_SET_OP0(instr, NOP);
939
2.06k
            }
940
347k
            else if (is_jump(instr)) {
941
19.1k
                instr->i_except = handler;
942
19.1k
                assert(i == b->b_iused -1);
943
19.1k
                if (!instr->i_target->b_visited) {
944
13.7k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
11.3k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
11.3k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
11.3k
                        instr->i_target->b_exceptstack = copy;
950
11.3k
                    }
951
2.40k
                    else {
952
2.40k
                        instr->i_target->b_exceptstack = except_stack;
953
2.40k
                        except_stack = NULL;
954
2.40k
                    }
955
13.7k
                    todo[0] = instr->i_target;
956
13.7k
                    instr->i_target->b_visited = 1;
957
13.7k
                    todo++;
958
13.7k
                }
959
19.1k
            }
960
327k
            else if (instr->i_opcode == YIELD_VALUE) {
961
464
                instr->i_except = handler;
962
464
                last_yield_except_depth = except_stack->depth;
963
464
            }
964
327k
            else if (instr->i_opcode == RESUME) {
965
7.01k
                instr->i_except = handler;
966
7.01k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
464
                    assert(last_yield_except_depth >= 0);
968
464
                    if (last_yield_except_depth == 1) {
969
399
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
399
                    }
971
464
                    last_yield_except_depth = -1;
972
464
                }
973
7.01k
            }
974
320k
            else {
975
320k
                instr->i_except = handler;
976
320k
            }
977
351k
        }
978
37.3k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
14.4k
            assert(except_stack != NULL);
980
14.4k
            b->b_next->b_exceptstack = except_stack;
981
14.4k
            todo[0] = b->b_next;
982
14.4k
            b->b_next->b_visited = 1;
983
14.4k
            todo++;
984
14.4k
        }
985
22.8k
        else if (except_stack != NULL) {
986
20.4k
           PyMem_Free(except_stack);
987
20.4k
        }
988
37.3k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
6.55k
    PyMem_Free(todo_stack);
995
6.55k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
6.55k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
13.1k
remove_unreachable(basicblock *entryblock) {
1006
101k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
87.9k
        b->b_predecessors = 0;
1008
87.9k
    }
1009
13.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
13.1k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
13.1k
    basicblock **sp = stack;
1014
13.1k
    entryblock->b_predecessors = 1;
1015
13.1k
    *sp++ = entryblock;
1016
13.1k
    entryblock->b_visited = 1;
1017
87.0k
    while (sp > stack) {
1018
73.9k
        basicblock *b = *(--sp);
1019
73.9k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
37.2k
            if (!b->b_next->b_visited) {
1021
31.7k
                assert(b->b_next->b_predecessors == 0);
1022
31.7k
                *sp++ = b->b_next;
1023
31.7k
                b->b_next->b_visited = 1;
1024
31.7k
            }
1025
37.2k
            b->b_next->b_predecessors++;
1026
37.2k
        }
1027
753k
        for (int i = 0; i < b->b_iused; i++) {
1028
679k
            basicblock *target;
1029
679k
            cfg_instr *instr = &b->b_instr[i];
1030
679k
            if (is_jump(instr) || is_block_push(instr)) {
1031
41.3k
                target = instr->i_target;
1032
41.3k
                if (!target->b_visited) {
1033
29.0k
                    *sp++ = target;
1034
29.0k
                    target->b_visited = 1;
1035
29.0k
                }
1036
41.3k
                target->b_predecessors++;
1037
41.3k
            }
1038
679k
        }
1039
73.9k
    }
1040
13.1k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
101k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
87.9k
       if (b->b_predecessors == 0) {
1045
14.0k
            b->b_iused = 0;
1046
14.0k
            b->b_except_handler = 0;
1047
14.0k
       }
1048
87.9k
    }
1049
13.1k
    return SUCCESS;
1050
13.1k
}
1051
1052
static int
1053
243k
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
243k
    int dest = 0;
1056
243k
    int prev_lineno = -1;
1057
1.94M
    for (int src = 0; src < bb->b_iused; src++) {
1058
1.70M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
1.70M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
40.6k
            if (lineno < 0) {
1062
5.77k
                continue;
1063
5.77k
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
34.8k
            if (prev_lineno == lineno) {
1066
28.2k
                continue;
1067
28.2k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
6.61k
            if (src < bb->b_iused - 1) {
1070
5.70k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
5.70k
                if (next_lineno == lineno) {
1072
3.80k
                    continue;
1073
3.80k
                }
1074
1.89k
                if (next_lineno < 0) {
1075
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
0
                    continue;
1077
0
                }
1078
1.89k
            }
1079
913
            else {
1080
913
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
913
                if (next) {
1083
913
                    location next_loc = NO_LOCATION;
1084
913
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
913
                        cfg_instr *instr = &next->b_instr[next_i];
1086
913
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
0
                            continue;
1089
0
                        }
1090
913
                        next_loc = instr->i_loc;
1091
913
                        break;
1092
913
                    }
1093
913
                    if (lineno == next_loc.lineno) {
1094
7
                        continue;
1095
7
                    }
1096
913
                }
1097
913
            }
1098
1099
6.61k
        }
1100
1.66M
        if (dest != src) {
1101
157k
            bb->b_instr[dest] = bb->b_instr[src];
1102
157k
        }
1103
1.66M
        dest++;
1104
1.66M
        prev_lineno = lineno;
1105
1.66M
    }
1106
243k
    assert(dest <= bb->b_iused);
1107
243k
    int num_removed = bb->b_iused - dest;
1108
243k
    bb->b_iused = dest;
1109
243k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
243k
    return num_removed;
1111
243k
}
1112
1113
static int
1114
22.5k
remove_redundant_nops(cfg_builder *g) {
1115
22.5k
    int changes = 0;
1116
222k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
199k
        int change = basicblock_remove_redundant_nops(b);
1118
199k
        RETURN_IF_ERROR(change);
1119
199k
        changes += change;
1120
199k
    }
1121
22.5k
    return changes;
1122
22.5k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
6.55k
{
1127
6.55k
    bool done = false;
1128
1129
13.1k
    while (! done) {
1130
6.55k
        done = true;
1131
6.55k
        cfg_instr *prev_instr = NULL;
1132
6.55k
        cfg_instr *instr = NULL;
1133
50.8k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
44.2k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
44.2k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
19.0k
                instr = NULL;
1138
19.0k
            }
1139
370k
            for (int i = 0; i < b->b_iused; i++) {
1140
326k
                prev_instr = instr;
1141
326k
                instr = &b->b_instr[i];
1142
326k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
326k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
326k
                int opcode = instr->i_opcode;
1145
326k
                bool is_redundant_pair = false;
1146
326k
                if (opcode == POP_TOP) {
1147
7.48k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
7.48k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
7.48k
                }
1154
326k
                if (is_redundant_pair) {
1155
0
                    INSTR_SET_OP0(prev_instr, NOP);
1156
0
                    INSTR_SET_OP0(instr, NOP);
1157
0
                    done = false;
1158
0
                }
1159
326k
            }
1160
44.2k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
31.8k
                instr = NULL;
1162
31.8k
            }
1163
44.2k
        }
1164
6.55k
    }
1165
6.55k
    return SUCCESS;
1166
6.55k
}
1167
1168
static int
1169
16.0k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
16.0k
    int changes = 0;
1179
171k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
155k
        cfg_instr *last = basicblock_last_instr(b);
1181
155k
        if (last == NULL) {
1182
21.6k
            continue;
1183
21.6k
        }
1184
133k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
133k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
18.8k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
18.8k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
18.8k
            basicblock *next = next_nonempty_block(b->b_next);
1192
18.8k
            if (jump_target == next) {
1193
629
                changes++;
1194
629
                INSTR_SET_OP0(last, NOP);
1195
629
            }
1196
18.8k
        }
1197
133k
    }
1198
1199
16.0k
    return changes;
1200
16.0k
}
1201
1202
static inline bool
1203
53.1k
basicblock_has_no_lineno(basicblock *b) {
1204
63.1k
    for (int i = 0; i < b->b_iused; i++) {
1205
58.1k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
48.1k
            return false;
1207
48.1k
        }
1208
58.1k
    }
1209
4.99k
    return true;
1210
53.1k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
2.13k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
68.4k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
68.4k
    cfg_instr *last = basicblock_last_instr(bb);
1223
68.4k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
68.4k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
56.4k
        return 0;
1228
56.4k
    }
1229
11.9k
    basicblock *target = last->i_target;
1230
11.9k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
11.9k
                             target->b_iused <= MAX_COPY_SIZE);
1232
11.9k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
11.9k
                                     !BB_HAS_FALLTHROUGH(target));
1234
11.9k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
2.46k
        assert(is_jump(last));
1236
2.46k
        int removed_jump_opcode = last->i_opcode;
1237
2.46k
        INSTR_SET_OP0(last, NOP);
1238
2.46k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
2.46k
        if (no_lineno_no_fallthrough) {
1240
2.09k
            last = basicblock_last_instr(bb);
1241
2.09k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
2.09k
                removed_jump_opcode == JUMP)
1243
52
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
52
                last->i_opcode = JUMP;
1246
52
            }
1247
2.09k
        }
1248
2.46k
        target->b_predecessors--;
1249
2.46k
        return 1;
1250
2.46k
    }
1251
9.52k
    return 0;
1252
11.9k
}
1253
1254
static int
1255
6.55k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
6.55k
    bool changes;
1257
7.61k
    do {
1258
7.61k
        changes = false;
1259
76.0k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
68.4k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
68.4k
            RETURN_IF_ERROR(res);
1262
68.4k
            if (res) {
1263
2.46k
                changes = true;
1264
2.46k
            }
1265
68.4k
        }
1266
7.61k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
6.55k
    return changes;
1268
6.55k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
679
{
1276
679
    assert(is_jump(inst));
1277
679
    assert(is_jump(target));
1278
679
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
679
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
679
        INSTR_SET_OP0(inst, NOP);
1286
1287
679
        RETURN_IF_ERROR(
1288
679
            basicblock_add_jump(
1289
679
                bb, opcode, target->i_target, target->i_loc));
1290
1291
679
        return true;
1292
679
    }
1293
0
    return false;
1294
679
}
1295
1296
static int
1297
loads_const(int opcode)
1298
17.8k
{
1299
17.8k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
17.8k
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
72.1k
{
1306
72.1k
    PyObject *constant = NULL;
1307
72.1k
    assert(loads_const(opcode));
1308
72.1k
    if (opcode == LOAD_CONST) {
1309
71.4k
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
71.4k
    }
1311
72.1k
    if (opcode == LOAD_SMALL_INT) {
1312
753
        return PyLong_FromLong(oparg);
1313
753
    }
1314
1315
71.4k
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
71.4k
    return Py_NewRef(constant);
1321
71.4k
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
2.25k
{
1327
2.25k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
2.25k
    Py_ssize_t index;
1333
102k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
100k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
672
            break;
1336
672
        }
1337
100k
    }
1338
2.25k
    if (index == PyList_GET_SIZE(consts)) {
1339
1.58k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
1.58k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
1.58k
    }
1349
2.25k
    Py_DECREF(newconst);
1350
2.25k
    return (int)index;
1351
2.25k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
11.6k
{
1363
11.6k
    assert(start < bb->b_iused);
1364
11.6k
    assert(size >= 0);
1365
11.6k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
20.0k
    for (; start >= 0 && size > 0; start--) {
1368
17.8k
        cfg_instr *instr = &bb->b_instr[start];
1369
17.8k
        if (instr->i_opcode == NOP) {
1370
264
            continue;
1371
264
        }
1372
17.5k
        if (!loads_const(instr->i_opcode)) {
1373
9.45k
            return false;
1374
9.45k
        }
1375
8.12k
        instrs[--size] = instr;
1376
8.12k
    }
1377
1378
2.19k
    return size == 0;
1379
11.6k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
2.33k
{
1388
7.10k
    for (int i = 0; i < size; i++) {
1389
4.77k
        cfg_instr *instr = instrs[i];
1390
4.77k
        assert(instr->i_opcode != NOP);
1391
4.77k
        INSTR_SET_OP0(instr, NOP);
1392
4.77k
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
4.77k
    }
1394
2.33k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
67.8k
{
1405
67.8k
    if (PyLong_CheckExact(newconst)) {
1406
23.8k
        int overflow;
1407
23.8k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
23.8k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
23.8k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
18.5k
            assert(_Py_IsImmortal(newconst));
1413
18.5k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
18.5k
            return 1;
1415
18.5k
        }
1416
23.8k
    }
1417
49.2k
    return 0;
1418
67.8k
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
1.97k
{
1426
1.97k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
1.97k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
1.97k
    if (res > 0) {
1432
9
        return SUCCESS;
1433
9
    }
1434
1.96k
    int oparg = add_const(newconst, consts, const_cache);
1435
1.96k
    RETURN_IF_ERROR(oparg);
1436
1.96k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
1.96k
    return SUCCESS;
1438
1.96k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
3.75k
{
1449
    /* Pre-conditions */
1450
3.75k
    assert(PyDict_CheckExact(const_cache));
1451
3.75k
    assert(PyList_CheckExact(consts));
1452
1453
3.75k
    cfg_instr *instr = &bb->b_instr[i];
1454
3.75k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
3.75k
    int seq_size = instr->i_oparg;
1457
3.75k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
0
        return SUCCESS;
1459
0
    }
1460
1461
3.75k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
3.75k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
2.36k
        return SUCCESS;
1465
2.36k
    }
1466
1467
1.39k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
1.39k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
3.78k
    for (int i = 0; i < seq_size; i++) {
1473
2.38k
        cfg_instr *inst = const_instrs[i];
1474
2.38k
        assert(loads_const(inst->i_opcode));
1475
2.38k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
2.38k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
2.38k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
2.38k
    }
1482
1483
1.39k
    nop_out(const_instrs, seq_size);
1484
1.39k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
1.39k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
93
{
1504
93
    assert(PyDict_CheckExact(const_cache));
1505
93
    assert(PyList_CheckExact(consts));
1506
93
    assert(i >= 0);
1507
93
    assert(i < bb->b_iused);
1508
1509
93
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
93
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
93
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
93
    int consts_found = 0;
1514
93
    bool expect_append = true;
1515
1516
278
    for (int pos = i - 1; pos >= 0; pos--) {
1517
278
        cfg_instr *instr = &bb->b_instr[pos];
1518
278
        int opcode = instr->i_opcode;
1519
278
        int oparg = instr->i_oparg;
1520
1521
278
        if (opcode == NOP) {
1522
0
            continue;
1523
0
        }
1524
1525
278
        if (opcode == BUILD_LIST && oparg == 0) {
1526
2
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
0
                return SUCCESS;
1529
0
            }
1530
1531
            /* Sequence start, we are done. */
1532
2
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
2
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
184
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
182
                instr = &bb->b_instr[newpos];
1539
182
                if (instr->i_opcode == NOP) {
1540
0
                    continue;
1541
0
                }
1542
182
                if (loads_const(instr->i_opcode)) {
1543
90
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
90
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
90
                    assert(consts_found > 0);
1549
90
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
90
                }
1551
182
                nop_out(&instr, 1);
1552
182
            }
1553
2
            assert(consts_found == 0);
1554
2
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
2
        }
1556
1557
276
        if (expect_append) {
1558
183
            if (opcode != LIST_APPEND || oparg != 1) {
1559
90
                return SUCCESS;
1560
90
            }
1561
183
        }
1562
93
        else {
1563
93
            if (!loads_const(opcode)) {
1564
1
                return SUCCESS;
1565
1
            }
1566
92
            consts_found++;
1567
92
        }
1568
1569
185
        expect_append = !expect_append;
1570
185
    }
1571
1572
    /* Did not find sequence start. */
1573
0
    return SUCCESS;
1574
93
}
1575
1576
2.55k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
1.27k
{
1592
1.27k
    assert(PyDict_CheckExact(const_cache));
1593
1.27k
    assert(PyList_CheckExact(consts));
1594
1595
1.27k
    cfg_instr *instr = &bb->b_instr[i];
1596
1.27k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
1.27k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
1.27k
    int seq_size = instr->i_oparg;
1600
1.27k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
1.27k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
1.04k
    {
1603
1.04k
        return SUCCESS;
1604
1.04k
    }
1605
1606
235
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
235
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
56
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
0
        }
1612
56
        return SUCCESS;
1613
56
    }
1614
1615
179
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
179
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
1.76k
    for (int i = 0; i < seq_size; i++) {
1621
1.58k
        cfg_instr *inst = const_instrs[i];
1622
1.58k
        assert(loads_const(inst->i_opcode));
1623
1.58k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
1.58k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
1.58k
        PyTuple_SET_ITEM(const_result, i, element);
1629
1.58k
    }
1630
1631
179
    if (instr->i_opcode == BUILD_SET) {
1632
51
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
51
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
51
        Py_SETREF(const_result, frozenset);
1638
51
    }
1639
1640
179
    int index = add_const(const_result, consts, const_cache);
1641
179
    RETURN_IF_ERROR(index);
1642
179
    nop_out(const_instrs, seq_size);
1643
1644
179
    if (contains_or_iter) {
1645
51
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
51
    }
1647
128
    else {
1648
128
        assert(i >= 2);
1649
128
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
128
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
128
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
128
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
128
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
128
    }
1657
179
    return SUCCESS;
1658
179
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
0
{
1667
0
    if (PyTuple_Check(obj)) {
1668
0
        Py_ssize_t i;
1669
0
        limit -= PyTuple_GET_SIZE(obj);
1670
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
0
            if (limit < 0) {
1673
0
                return limit;
1674
0
            }
1675
0
        }
1676
0
    }
1677
0
    return limit;
1678
0
}
1679
1680
32
#define MAX_INT_SIZE           128  /* bits */
1681
0
#define MAX_COLLECTION_SIZE    256  /* items */
1682
10
#define MAX_STR_SIZE          4096  /* characters */
1683
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
23
{
1688
23
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
23
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
23
    ) {
1691
3
        int64_t vbits = _PyLong_NumBits(v);
1692
3
        int64_t wbits = _PyLong_NumBits(w);
1693
3
        assert(vbits >= 0);
1694
3
        assert(wbits >= 0);
1695
3
        if (vbits + wbits > MAX_INT_SIZE) {
1696
0
            return NULL;
1697
0
        }
1698
3
    }
1699
20
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
0
        if (size) {
1702
0
            long n = PyLong_AsLong(v);
1703
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
0
                return NULL;
1705
0
            }
1706
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
0
                return NULL;
1708
0
            }
1709
0
        }
1710
0
    }
1711
20
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
10
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
10
                                               PyBytes_GET_SIZE(w);
1714
10
        if (size) {
1715
10
            long n = PyLong_AsLong(v);
1716
10
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
3
                return NULL;
1718
3
            }
1719
10
        }
1720
10
    }
1721
10
    else if (PyLong_Check(w) &&
1722
10
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
10
    {
1724
10
        return const_folding_safe_multiply(w, v);
1725
10
    }
1726
1727
10
    return PyNumber_Multiply(v, w);
1728
23
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
8
{
1733
8
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
8
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
8
    ) {
1736
8
        int64_t vbits = _PyLong_NumBits(v);
1737
8
        size_t wbits = PyLong_AsSize_t(w);
1738
8
        assert(vbits >= 0);
1739
8
        if (wbits == (size_t)-1) {
1740
0
            return NULL;
1741
0
        }
1742
8
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
0
            return NULL;
1744
0
        }
1745
8
    }
1746
1747
8
    return PyNumber_Power(v, w, Py_None);
1748
8
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
7
{
1753
7
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
7
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
7
    ) {
1756
7
        int64_t vbits = _PyLong_NumBits(v);
1757
7
        size_t wbits = PyLong_AsSize_t(w);
1758
7
        assert(vbits >= 0);
1759
7
        if (wbits == (size_t)-1) {
1760
0
            return NULL;
1761
0
        }
1762
7
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
0
            return NULL;
1764
0
        }
1765
7
    }
1766
1767
7
    return PyNumber_Lshift(v, w);
1768
7
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
0
{
1773
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
0
        return NULL;
1775
0
    }
1776
1777
0
    return PyNumber_Remainder(v, w);
1778
0
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
42
{
1783
42
    assert(left != NULL && right != NULL);
1784
42
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
42
    PyObject *result = NULL;
1787
42
    switch (op) {
1788
4
        case NB_ADD:
1789
4
            result = PyNumber_Add(left, right);
1790
4
            break;
1791
0
        case NB_SUBTRACT:
1792
0
            result = PyNumber_Subtract(left, right);
1793
0
            break;
1794
13
        case NB_MULTIPLY:
1795
13
            result = const_folding_safe_multiply(left, right);
1796
13
            break;
1797
1
        case NB_TRUE_DIVIDE:
1798
1
            result = PyNumber_TrueDivide(left, right);
1799
1
            break;
1800
0
        case NB_FLOOR_DIVIDE:
1801
0
            result = PyNumber_FloorDivide(left, right);
1802
0
            break;
1803
0
        case NB_REMAINDER:
1804
0
            result = const_folding_safe_mod(left, right);
1805
0
            break;
1806
8
        case NB_POWER:
1807
8
            result = const_folding_safe_power(left, right);
1808
8
            break;
1809
7
        case NB_LSHIFT:
1810
7
            result = const_folding_safe_lshift(left, right);
1811
7
            break;
1812
0
        case NB_RSHIFT:
1813
0
            result = PyNumber_Rshift(left, right);
1814
0
            break;
1815
0
        case NB_OR:
1816
0
            result = PyNumber_Or(left, right);
1817
0
            break;
1818
0
        case NB_XOR:
1819
0
            result = PyNumber_Xor(left, right);
1820
0
            break;
1821
0
        case NB_AND:
1822
0
            result = PyNumber_And(left, right);
1823
0
            break;
1824
9
        case NB_SUBSCR:
1825
9
            result = PyObject_GetItem(left, right);
1826
9
            break;
1827
0
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
0
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
42
    }
1833
42
    return result;
1834
42
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
6.96k
{
1839
7.00k
    #define BINOP_OPERAND_COUNT 2
1840
6.96k
    assert(PyDict_CheckExact(const_cache));
1841
6.96k
    assert(PyList_CheckExact(consts));
1842
1843
6.96k
    cfg_instr *binop = &bb->b_instr[i];
1844
6.96k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
6.96k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
6.96k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
6.92k
        return SUCCESS;
1850
6.92k
    }
1851
1852
42
    cfg_instr *lhs_instr = operands_instrs[0];
1853
42
    assert(loads_const(lhs_instr->i_opcode));
1854
42
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
42
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
42
    cfg_instr *rhs_instr = operands_instrs[1];
1860
42
    assert(loads_const(rhs_instr->i_opcode));
1861
42
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
42
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
42
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
42
    Py_DECREF(lhs);
1869
42
    Py_DECREF(rhs);
1870
42
    if (newconst == NULL) {
1871
3
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
3
        PyErr_Clear();
1875
3
        return SUCCESS;
1876
3
    }
1877
1878
39
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
39
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
42
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
539
{
1885
539
    assert(operand != NULL);
1886
539
    assert(
1887
539
        opcode == UNARY_NEGATIVE ||
1888
539
        opcode == UNARY_INVERT ||
1889
539
        opcode == UNARY_NOT ||
1890
539
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
539
    );
1892
539
    PyObject *result;
1893
539
    switch (opcode) {
1894
539
        case UNARY_NEGATIVE:
1895
539
            result = PyNumber_Negative(operand);
1896
539
            break;
1897
0
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
0
            if (PyBool_Check(operand)) {
1900
0
                return NULL;
1901
0
            }
1902
0
            result = PyNumber_Invert(operand);
1903
0
            break;
1904
0
        case UNARY_NOT: {
1905
0
            int r = PyObject_IsTrue(operand);
1906
0
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
0
            result = PyBool_FromLong(!r);
1910
0
            break;
1911
0
        }
1912
0
        case CALL_INTRINSIC_1:
1913
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
0
            result = PyNumber_Positive(operand);
1917
0
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
539
    }
1921
539
    return result;
1922
539
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
686
{
1927
1.22k
    #define UNARYOP_OPERAND_COUNT 1
1928
686
    assert(PyDict_CheckExact(const_cache));
1929
686
    assert(PyList_CheckExact(consts));
1930
686
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
686
    cfg_instr *operand_instr;
1933
686
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
147
        return SUCCESS;
1936
147
    }
1937
1938
539
    assert(loads_const(operand_instr->i_opcode));
1939
539
    PyObject *operand = get_const_value(
1940
539
        operand_instr->i_opcode,
1941
539
        operand_instr->i_oparg,
1942
539
        consts
1943
539
    );
1944
539
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
539
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
539
    Py_DECREF(operand);
1950
539
    if (newconst == NULL) {
1951
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
0
        PyErr_Clear();
1955
0
        return SUCCESS;
1956
0
    }
1957
1958
539
    if (unaryop->i_opcode == UNARY_NOT) {
1959
0
        assert(PyBool_Check(newconst));
1960
0
    }
1961
539
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
539
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
539
}
1964
1965
1.63k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
1.67k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
1.67k
    assert(*ix < block->b_iused);
1975
1.67k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
1.67k
    assert(instructions[0].i_opcode == SWAP);
1979
1.67k
    int depth = instructions[0].i_oparg;
1980
1.67k
    int len = 0;
1981
1.67k
    int more = false;
1982
1.67k
    int limit = block->b_iused - *ix;
1983
1.87k
    while (++len < limit) {
1984
1.87k
        int opcode = instructions[len].i_opcode;
1985
1.87k
        if (opcode == SWAP) {
1986
126
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
126
            more = true;
1988
126
        }
1989
1.74k
        else if (opcode != NOP) {
1990
1.67k
            break;
1991
1.67k
        }
1992
1.87k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
1.67k
    if (!more) {
1995
1.54k
        return SUCCESS;
1996
1.54k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
126
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
126
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
504
    for (int i = 0; i < depth; i++) {
2004
378
        stack[i] = i;
2005
378
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
378
    for (int i = 0; i < len; i++) {
2009
252
        if (instructions[i].i_opcode == SWAP) {
2010
252
            int oparg = instructions[i].i_oparg;
2011
252
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
252
            stack[0] = stack[oparg - 1];
2014
252
            stack[oparg - 1] = top;
2015
252
        }
2016
252
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
126
    int current = len - 1;
2025
504
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
378
        if (stack[i] == VISITED || stack[i] == i) {
2029
252
            continue;
2030
252
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
126
        int j = i;
2037
504
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
504
            if (j) {
2041
252
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
252
                instructions[current].i_opcode = SWAP;
2044
252
                instructions[current--].i_oparg = j + 1;
2045
252
            }
2046
504
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
126
                assert(j == i);
2049
126
                break;
2050
126
            }
2051
378
            int next_j = stack[j];
2052
378
            stack[j] = VISITED;
2053
378
            j = next_j;
2054
378
        }
2055
126
    }
2056
    // NOP out any unused instructions:
2057
126
    while (0 <= current) {
2058
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
0
    }
2060
126
    PyMem_Free(stack);
2061
126
    *ix += len - 1;
2062
126
    return SUCCESS;
2063
126
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
2.81k
    ((opcode) == STORE_FAST || \
2072
2.81k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
2.81k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
395
    (((instr).i_opcode == STORE_FAST || \
2077
395
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
395
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
2.76k
{
2083
2.84k
    while (++i < block->b_iused) {
2084
2.82k
        cfg_instr *instruction = &block->b_instr[i];
2085
2.82k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
12
            return -1;
2089
12
        }
2090
2.81k
        if (instruction->i_opcode == NOP) {
2091
77
            continue;
2092
77
        }
2093
2.73k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
1.27k
            return i;
2095
1.27k
        }
2096
1.46k
        return -1;
2097
2.73k
    }
2098
20
    return -1;
2099
2.76k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
1.67k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
1.93k
    for (; 0 <= i; i--) {
2109
1.83k
        assert(i < block->b_iused);
2110
1.83k
        cfg_instr *swap = &block->b_instr[i];
2111
1.83k
        if (swap->i_opcode != SWAP) {
2112
158
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
79
                continue;
2115
79
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
79
            return;
2118
158
        }
2119
1.67k
        int j = next_swappable_instruction(block, i, -1);
2120
1.67k
        if (j < 0) {
2121
810
            return;
2122
810
        }
2123
862
        int k = j;
2124
862
        int lineno = block->b_instr[j].i_loc.lineno;
2125
1.27k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
1.09k
            k = next_swappable_instruction(block, k, lineno);
2127
1.09k
            if (k < 0) {
2128
682
                return;
2129
682
            }
2130
1.09k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
180
        int store_j = STORES_TO(block->b_instr[j]);
2135
180
        int store_k = STORES_TO(block->b_instr[k]);
2136
180
        if (store_j >= 0 || store_k >= 0) {
2137
180
            if (store_j == store_k) {
2138
0
                return;
2139
0
            }
2140
215
            for (int idx = j + 1; idx < k; idx++) {
2141
35
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
35
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
35
            }
2146
180
        }
2147
2148
        // Success!
2149
180
        INSTR_SET_OP0(swap, NOP);
2150
180
        cfg_instr temp = block->b_instr[j];
2151
180
        block->b_instr[j] = block->b_instr[k];
2152
180
        block->b_instr[k] = temp;
2153
180
    }
2154
1.67k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
44.2k
{
2159
44.2k
    assert(PyDict_CheckExact(const_cache));
2160
44.2k
    assert(PyList_CheckExact(consts));
2161
44.2k
    int opcode = 0;
2162
44.2k
    int oparg = 0;
2163
398k
    for (int i = 0; i < bb->b_iused; i++) {
2164
354k
        cfg_instr *inst = &bb->b_instr[i];
2165
354k
        if (inst->i_opcode == LOAD_CONST) {
2166
65.8k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
65.8k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
65.8k
            Py_DECREF(constant);
2169
65.8k
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
65.8k
        }
2173
354k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
354k
                                      inst->i_opcode == COPY &&
2175
354k
                                      inst->i_oparg == 1);
2176
354k
        if (! is_copy_of_load_const) {
2177
354k
            opcode = inst->i_opcode;
2178
354k
            oparg = inst->i_oparg;
2179
354k
        }
2180
354k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
354k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
288k
            continue;
2183
288k
        }
2184
65.9k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
65.9k
        switch(nextop) {
2186
110
            case POP_JUMP_IF_FALSE:
2187
113
            case POP_JUMP_IF_TRUE:
2188
113
            case JUMP_IF_FALSE:
2189
113
            case JUMP_IF_TRUE:
2190
113
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
113
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
113
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
113
                int is_true = PyObject_IsTrue(cnt);
2197
113
                Py_DECREF(cnt);
2198
113
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
113
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
113
                    INSTR_SET_OP0(inst, NOP);
2204
113
                }
2205
113
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
113
                if (is_true == jump_if_true) {
2207
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
1
                }
2209
112
                else {
2210
112
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
112
                }
2212
113
                break;
2213
113
            }
2214
1.37k
            case IS_OP:
2215
1.37k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
1.37k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
1.37k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
1.37k
                if (!Py_IsNone(cnt)) {
2231
18
                    Py_DECREF(cnt);
2232
18
                    break;
2233
18
                }
2234
1.36k
                if (bb->b_iused <= i + 2) {
2235
7
                    break;
2236
7
                }
2237
1.35k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.35k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.35k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
1.32k
                    INSTR_SET_OP0(jump_instr, NOP);
2242
1.32k
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
1.32k
                    jump_instr = &bb->b_instr[i + 3];
2246
1.32k
                }
2247
1.35k
                bool invert = is_instr->i_oparg;
2248
1.35k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.28k
                    invert = !invert;
2250
1.28k
                }
2251
74
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
25
                    break;
2253
25
                }
2254
1.32k
                INSTR_SET_OP0(inst, NOP);
2255
1.32k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.32k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.32k
                                              : POP_JUMP_IF_NONE;
2258
1.32k
                break;
2259
1.35k
            }
2260
113
            case TO_BOOL:
2261
113
            {
2262
113
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
113
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
113
                int is_true = PyObject_IsTrue(cnt);
2267
113
                Py_DECREF(cnt);
2268
113
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
113
                cnt = PyBool_FromLong(is_true);
2272
113
                int index = add_const(cnt, consts, const_cache);
2273
113
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
113
                INSTR_SET_OP0(inst, NOP);
2277
113
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
113
                break;
2279
113
            }
2280
65.9k
        }
2281
65.9k
    }
2282
44.2k
    return SUCCESS;
2283
44.2k
}
2284
2285
static int
2286
6.55k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
50.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
44.2k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
44.2k
    }
2290
6.55k
    return SUCCESS;
2291
6.55k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
44.2k
{
2296
44.2k
    assert(PyDict_CheckExact(const_cache));
2297
44.2k
    assert(PyList_CheckExact(consts));
2298
44.2k
    cfg_instr nop;
2299
44.2k
    INSTR_SET_OP0(&nop, NOP);
2300
400k
    for (int i = 0; i < bb->b_iused; i++) {
2301
356k
        cfg_instr *inst = &bb->b_instr[i];
2302
356k
        cfg_instr *target;
2303
356k
        int opcode = inst->i_opcode;
2304
356k
        int oparg = inst->i_oparg;
2305
356k
        if (HAS_TARGET(opcode)) {
2306
21.8k
            assert(inst->i_target->b_iused > 0);
2307
21.8k
            target = &inst->i_target->b_instr[0];
2308
21.8k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
21.8k
        }
2310
334k
        else {
2311
334k
            target = &nop;
2312
334k
        }
2313
356k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
356k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
356k
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
3.84k
            case BUILD_TUPLE:
2321
3.84k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
89
                    switch(oparg) {
2323
0
                        case 1:
2324
0
                            INSTR_SET_OP0(inst, NOP);
2325
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
0
                            continue;
2327
76
                        case 2:
2328
86
                        case 3:
2329
86
                            INSTR_SET_OP0(inst, NOP);
2330
86
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
86
                            continue;
2332
89
                    }
2333
89
                }
2334
3.75k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
3.75k
                break;
2336
1.18k
            case BUILD_LIST:
2337
1.27k
            case BUILD_SET:
2338
1.27k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
1.27k
                break;
2340
703
            case POP_JUMP_IF_NOT_NONE:
2341
1.39k
            case POP_JUMP_IF_NONE:
2342
1.39k
                switch (target->i_opcode) {
2343
68
                    case JUMP:
2344
68
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
1.39k
                }
2346
1.39k
                break;
2347
8.49k
            case POP_JUMP_IF_FALSE:
2348
8.49k
                switch (target->i_opcode) {
2349
565
                    case JUMP:
2350
565
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
8.49k
                }
2352
8.49k
                break;
2353
8.49k
            case POP_JUMP_IF_TRUE:
2354
1.63k
                switch (target->i_opcode) {
2355
46
                    case JUMP:
2356
46
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
1.63k
                }
2358
1.63k
                break;
2359
1.63k
            case JUMP_IF_FALSE:
2360
280
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
0
                    case JUMP_IF_FALSE:
2363
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
0
                        continue;
2365
3
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
3
                        assert(inst->i_target != inst->i_target->b_next);
2369
3
                        inst->i_target = inst->i_target->b_next;
2370
3
                        i--;
2371
3
                        continue;
2372
280
                }
2373
277
                break;
2374
277
            case JUMP_IF_TRUE:
2375
271
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
0
                    case JUMP_IF_TRUE:
2378
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
0
                        continue;
2380
5
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
5
                        assert(inst->i_target != inst->i_target->b_next);
2384
5
                        inst->i_target = inst->i_target->b_next;
2385
5
                        i--;
2386
5
                        continue;
2387
271
                }
2388
266
                break;
2389
3.35k
            case JUMP:
2390
5.61k
            case JUMP_NO_INTERRUPT:
2391
5.61k
                switch (target->i_opcode) {
2392
0
                    case JUMP:
2393
0
                        i -= jump_thread(bb, inst, target, JUMP);
2394
0
                        continue;
2395
0
                    case JUMP_NO_INTERRUPT:
2396
0
                        i -= jump_thread(bb, inst, target, opcode);
2397
0
                        continue;
2398
5.61k
                }
2399
5.61k
                break;
2400
5.61k
            case FOR_ITER:
2401
1.52k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
1.52k
                break;
2412
12.8k
            case STORE_FAST:
2413
12.8k
                if (opcode == nextop &&
2414
12.8k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
12.8k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
31
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
31
                    bb->b_instr[i].i_oparg = 0;
2418
31
                }
2419
12.8k
                break;
2420
1.79k
            case SWAP:
2421
1.79k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
1.79k
                break;
2425
15.9k
            case LOAD_GLOBAL:
2426
15.9k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
7.90k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
7.90k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
7.90k
                }
2430
15.9k
                break;
2431
5.62k
            case COMPARE_OP:
2432
5.62k
                if (nextop == TO_BOOL) {
2433
2.59k
                    INSTR_SET_OP0(inst, NOP);
2434
2.59k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
2.59k
                    continue;
2436
2.59k
                }
2437
3.03k
                break;
2438
3.03k
            case CONTAINS_OP:
2439
3.67k
            case IS_OP:
2440
3.67k
                if (nextop == TO_BOOL) {
2441
1.70k
                    INSTR_SET_OP0(inst, NOP);
2442
1.70k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
1.70k
                    continue;
2444
1.70k
                }
2445
1.97k
                if (nextop == UNARY_NOT) {
2446
0
                    INSTR_SET_OP0(inst, NOP);
2447
0
                    int inverted = oparg ^ 1;
2448
0
                    assert(inverted == 0 || inverted == 1);
2449
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
0
                    continue;
2451
0
                }
2452
1.97k
                break;
2453
4.37k
            case TO_BOOL:
2454
4.37k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
4.37k
                break;
2459
4.37k
            case UNARY_NOT:
2460
58
                if (nextop == TO_BOOL) {
2461
0
                    INSTR_SET_OP0(inst, NOP);
2462
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
0
                    continue;
2464
0
                }
2465
58
                if (nextop == UNARY_NOT) {
2466
0
                    INSTR_SET_OP0(inst, NOP);
2467
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
0
                    continue;
2469
0
                }
2470
58
                _Py_FALLTHROUGH;
2471
98
            case UNARY_INVERT:
2472
684
            case UNARY_NEGATIVE:
2473
684
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
684
                break;
2475
458
            case CALL_INTRINSIC_1:
2476
458
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
93
                    if (nextop == GET_ITER) {
2478
0
                        INSTR_SET_OP0(inst, NOP);
2479
0
                    }
2480
93
                    else {
2481
93
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
93
                    }
2483
93
                }
2484
365
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
2
                }
2487
458
                break;
2488
6.96k
            case BINARY_OP:
2489
6.96k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
6.96k
                break;
2491
356k
        }
2492
356k
    }
2493
2494
399k
    for (int i = 0; i < bb->b_iused; i++) {
2495
355k
        cfg_instr *inst = &bb->b_instr[i];
2496
355k
        if (inst->i_opcode == SWAP) {
2497
1.67k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
1.67k
            apply_static_swaps(bb, i);
2501
1.67k
        }
2502
355k
    }
2503
44.2k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
44.2k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
14.1k
{
2513
14.1k
    int removed_nops, removed_jumps;
2514
16.0k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
16.0k
        removed_nops = remove_redundant_nops(g);
2519
16.0k
        RETURN_IF_ERROR(removed_nops);
2520
16.0k
        removed_jumps = remove_redundant_jumps(g);
2521
16.0k
        RETURN_IF_ERROR(removed_jumps);
2522
16.0k
    } while(removed_nops + removed_jumps > 0);
2523
14.1k
    return SUCCESS;
2524
14.1k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
6.55k
{
2536
6.55k
    assert(PyDict_CheckExact(const_cache));
2537
6.55k
    RETURN_IF_ERROR(check_cfg(g));
2538
6.55k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
6.55k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
6.55k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
6.55k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
50.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
44.2k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
44.2k
    }
2545
6.55k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
6.55k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
6.55k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
6.55k
    assert(no_redundant_jumps(g));
2549
6.55k
    return SUCCESS;
2550
6.55k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
12.0k
{
2555
12.0k
    int32_t line1 = inst1->i_loc.lineno;
2556
12.0k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
12.0k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
4.36k
        return;
2560
4.36k
    }
2561
7.63k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
781
        return;
2563
781
    }
2564
6.85k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
6.85k
    INSTR_SET_OP0(inst2, NOP);
2566
6.85k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
6.55k
{
2571
50.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
370k
        for (int i = 0; i < b->b_iused; i++) {
2574
325k
            cfg_instr *inst = &b->b_instr[i];
2575
325k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
325k
            switch(inst->i_opcode) {
2577
41.4k
                case LOAD_FAST:
2578
41.4k
                    if (nextop == LOAD_FAST) {
2579
6.31k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
6.31k
                    }
2581
41.4k
                    break;
2582
11.6k
                case STORE_FAST:
2583
11.6k
                    switch (nextop) {
2584
4.40k
                        case LOAD_FAST:
2585
4.40k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
4.40k
                            break;
2587
1.28k
                        case STORE_FAST:
2588
1.28k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
1.28k
                            break;
2590
11.6k
                    }
2591
11.6k
                    break;
2592
325k
            }
2593
325k
        }
2594
44.2k
    }
2595
6.55k
    int res = remove_redundant_nops(g);
2596
6.55k
    assert(no_redundant_nops(g));
2597
6.55k
    return res;
2598
6.55k
}
2599
2600
#define NOT_LOCAL -1
2601
11.9k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
264k
{
2620
264k
    if (stack->size == stack->capacity) {
2621
6.54k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
6.54k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
6.54k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
6.54k
        stack->refs = refs;
2628
6.54k
        stack->capacity = new_cap;
2629
6.54k
    }
2630
264k
    stack->refs[stack->size] = r;
2631
264k
    stack->size++;
2632
264k
    return 0;
2633
264k
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
228k
{
2638
228k
    assert(stack->size > 0);
2639
228k
    stack->size--;
2640
228k
    ref r = stack->refs[stack->size];
2641
228k
    return r;
2642
228k
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
1.04k
{
2647
1.04k
    Py_ssize_t idx = stack->size - off;
2648
1.04k
    assert(idx >= 0 && idx < stack->size);
2649
1.04k
    ref tmp = stack->refs[idx];
2650
1.04k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
1.04k
    stack->refs[stack->size - 1] = tmp;
2652
1.04k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
49.1k
{
2657
49.1k
    assert(idx >= 0 && idx < stack->size);
2658
49.1k
    return stack->refs[idx];
2659
49.1k
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
31.5k
{
2664
31.5k
    stack->size = 0;
2665
31.5k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
6.55k
{
2670
6.55k
    if (stack->refs != NULL) {
2671
6.54k
        PyMem_Free(stack->refs);
2672
6.54k
    }
2673
6.55k
    stack->refs = NULL;
2674
6.55k
    stack->capacity = 0;
2675
6.55k
    stack->size = 0;
2676
6.55k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
12.1k
{
2690
24.1k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
11.9k
        ref r = ref_stack_at(refs, i);
2692
11.9k
        if (r.local == local) {
2693
11
            assert(r.instr >= 0);
2694
11
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
11
        }
2696
11.9k
    }
2697
12.1k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
11.9k
{
2702
11.9k
    kill_local(instr_flags, refs, local);
2703
11.9k
    if (r.instr != DUMMY_INSTR) {
2704
10.3k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
10.3k
    }
2706
11.9k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
32.5k
{
2712
32.5k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
32.5k
    if (!target->b_visited) {
2714
24.9k
        target->b_visited = 1;
2715
24.9k
        *(*sp)++ = target;
2716
24.9k
    }
2717
32.5k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
6.55k
{
2759
6.55k
    int status;
2760
6.55k
    ref_stack refs = {0};
2761
6.55k
    int max_instrs = 0;
2762
6.55k
    basicblock *entryblock = g->g_entryblock;
2763
51.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
45.0k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
45.0k
    }
2766
6.55k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
6.55k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
6.55k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
6.55k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
6.55k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
6.55k
    basicblock **sp = blocks;
2778
6.55k
    *sp = entryblock;
2779
6.55k
    sp++;
2780
6.55k
    entryblock->b_startdepth = 0;
2781
6.55k
    entryblock->b_visited = 1;
2782
2783
6.55k
    #define PUSH_REF(instr, local)                \
2784
264k
        do {                                      \
2785
264k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
264k
        } while(0)
2790
2791
38.0k
    while (sp != blocks) {
2792
31.5k
        basicblock *block = *--sp;
2793
31.5k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
31.5k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
31.5k
        ref_stack_clear(&refs);
2802
61.0k
        for (int i = 0; i < block->b_startdepth; i++) {
2803
29.5k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
29.5k
        }
2805
2806
333k
        for (int i = 0; i < block->b_iused; i++) {
2807
301k
            cfg_instr *instr = &block->b_instr[i];
2808
301k
            int opcode = instr->i_opcode;
2809
301k
            int oparg = instr->i_oparg;
2810
301k
            assert(opcode != EXTENDED_ARG);
2811
301k
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
2
                case DELETE_FAST: {
2814
2
                    kill_local(instr_flags, &refs, oparg);
2815
2
                    break;
2816
0
                }
2817
2818
34.5k
                case LOAD_FAST: {
2819
34.5k
                    PUSH_REF(i, oparg);
2820
34.5k
                    break;
2821
34.5k
                }
2822
2823
34.5k
                case LOAD_FAST_AND_CLEAR: {
2824
281
                    kill_local(instr_flags, &refs, oparg);
2825
281
                    PUSH_REF(i, oparg);
2826
281
                    break;
2827
281
                }
2828
2829
5.14k
                case LOAD_FAST_LOAD_FAST: {
2830
5.14k
                    PUSH_REF(i, oparg >> 4);
2831
5.14k
                    PUSH_REF(i, oparg & 15);
2832
5.14k
                    break;
2833
5.14k
                }
2834
2835
9.56k
                case STORE_FAST: {
2836
9.56k
                    ref r = ref_stack_pop(&refs);
2837
9.56k
                    store_local(instr_flags, &refs, oparg, r);
2838
9.56k
                    break;
2839
5.14k
                }
2840
2841
199
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
199
                    ref r = ref_stack_pop(&refs);
2844
199
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
199
                    PUSH_REF(i, oparg & 15);
2847
199
                    break;
2848
199
                }
2849
2850
1.07k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
1.07k
                    ref r = ref_stack_pop(&refs);
2853
1.07k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
1.07k
                    r = ref_stack_pop(&refs);
2856
1.07k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
1.07k
                    break;
2858
199
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
1.21k
                case COPY: {
2862
1.21k
                    assert(oparg > 0);
2863
1.21k
                    Py_ssize_t idx = refs.size - oparg;
2864
1.21k
                    ref r = ref_stack_at(&refs, idx);
2865
1.21k
                    PUSH_REF(r.instr, r.local);
2866
1.21k
                    break;
2867
1.21k
                }
2868
2869
1.21k
                case SWAP: {
2870
1.04k
                    assert(oparg >= 2);
2871
1.04k
                    ref_stack_swap_top(&refs, oparg);
2872
1.04k
                    break;
2873
1.21k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
1.20k
                case FORMAT_SIMPLE:
2881
1.20k
                case GET_ANEXT:
2882
2.59k
                case GET_ITER:
2883
2.59k
                case GET_LEN:
2884
2.62k
                case GET_YIELD_FROM_ITER:
2885
3.16k
                case IMPORT_FROM:
2886
3.16k
                case MATCH_KEYS:
2887
3.16k
                case MATCH_MAPPING:
2888
3.16k
                case MATCH_SEQUENCE:
2889
3.16k
                case WITH_EXCEPT_START: {
2890
3.16k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
3.16k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
3.16k
                    int net_pushed = num_pushed - num_popped;
2893
3.16k
                    assert(net_pushed >= 0);
2894
5.08k
                    for (int i = 0; i < net_pushed; i++) {
2895
1.92k
                        PUSH_REF(i, NOT_LOCAL);
2896
1.92k
                    }
2897
3.16k
                    break;
2898
3.16k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
3.16k
                case DICT_MERGE:
2902
761
                case DICT_UPDATE:
2903
1.28k
                case LIST_APPEND:
2904
1.48k
                case LIST_EXTEND:
2905
12.5k
                case MAP_ADD:
2906
12.5k
                case RERAISE:
2907
12.6k
                case SET_ADD:
2908
12.6k
                case SET_UPDATE: {
2909
12.6k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
12.6k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
12.6k
                    int net_popped = num_popped - num_pushed;
2912
12.6k
                    assert(net_popped > 0);
2913
36.4k
                    for (int i = 0; i < net_popped; i++) {
2914
23.7k
                        ref_stack_pop(&refs);
2915
23.7k
                    }
2916
12.6k
                    break;
2917
12.6k
                }
2918
2919
41
                case END_SEND:
2920
1.73k
                case SET_FUNCTION_ATTRIBUTE: {
2921
1.73k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
1.73k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
1.73k
                    ref tos = ref_stack_pop(&refs);
2924
1.73k
                    ref_stack_pop(&refs);
2925
1.73k
                    PUSH_REF(tos.instr, tos.local);
2926
1.73k
                    break;
2927
1.73k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
1.73k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
1.45k
                case FOR_ITER: {
2937
1.45k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
1.45k
                    PUSH_REF(i, NOT_LOCAL);
2939
1.45k
                    break;
2940
1.45k
                }
2941
2942
18.3k
                case LOAD_ATTR:
2943
18.5k
                case LOAD_SUPER_ATTR: {
2944
18.5k
                    ref self = ref_stack_pop(&refs);
2945
18.5k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
208
                        ref_stack_pop(&refs);
2947
208
                        ref_stack_pop(&refs);
2948
208
                    }
2949
18.5k
                    PUSH_REF(i, NOT_LOCAL);
2950
18.5k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
7.18k
                        PUSH_REF(self.instr, self.local);
2954
7.18k
                    }
2955
18.5k
                    break;
2956
18.5k
                }
2957
2958
18.5k
                case LOAD_SPECIAL:
2959
186
                case PUSH_EXC_INFO: {
2960
186
                    ref tos = ref_stack_pop(&refs);
2961
186
                    PUSH_REF(i, NOT_LOCAL);
2962
186
                    PUSH_REF(tos.instr, tos.local);
2963
186
                    break;
2964
186
                }
2965
2966
186
                case SEND: {
2967
41
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
41
                    ref_stack_pop(&refs);
2969
41
                    PUSH_REF(i, NOT_LOCAL);
2970
41
                    break;
2971
41
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
210k
                default: {
2975
210k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
210k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
210k
                    if (HAS_TARGET(instr->i_opcode)) {
2978
14.0k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
14.0k
                    }
2980
210k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
381k
                        for (int j = 0; j < num_popped; j++) {
2984
170k
                            ref_stack_pop(&refs);
2985
170k
                        }
2986
368k
                        for (int j = 0; j < num_pushed; j++) {
2987
157k
                            PUSH_REF(i, NOT_LOCAL);
2988
157k
                        }
2989
210k
                    }
2990
210k
                    break;
2991
210k
                }
2992
301k
            }
2993
301k
        }
2994
2995
        // Push fallthrough block
2996
31.5k
        cfg_instr *term = basicblock_last_instr(block);
2997
31.5k
        if (term != NULL && block->b_next != NULL &&
2998
31.5k
            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2999
30.0k
              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
3000
17.0k
            assert(BB_HAS_FALLTHROUGH(block));
3001
17.0k
            load_fast_push_block(&sp, block->b_next, refs.size);
3002
17.0k
        }
3003
3004
        // Mark instructions that produce values that are on the stack at the
3005
        // end of the basic block
3006
67.4k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3007
35.9k
            ref r = ref_stack_at(&refs, i);
3008
35.9k
            if (r.instr != -1) {
3009
15.5k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3010
15.5k
            }
3011
35.9k
        }
3012
3013
        // Optimize instructions
3014
333k
        for (int i = 0; i < block->b_iused; i++) {
3015
301k
            if (!instr_flags[i]) {
3016
277k
                cfg_instr *instr = &block->b_instr[i];
3017
277k
                switch (instr->i_opcode) {
3018
33.1k
                    case LOAD_FAST:
3019
33.1k
                        instr->i_opcode = LOAD_FAST_BORROW;
3020
33.1k
                        break;
3021
5.05k
                    case LOAD_FAST_LOAD_FAST:
3022
5.05k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3023
5.05k
                        break;
3024
239k
                    default:
3025
239k
                        break;
3026
277k
                }
3027
277k
            }
3028
301k
        }
3029
31.5k
    }
3030
3031
6.55k
    #undef PUSH_REF
3032
3033
6.55k
    status = SUCCESS;
3034
3035
6.55k
done:
3036
6.55k
    ref_stack_fini(&refs);
3037
6.55k
    PyMem_Free(instr_flags);
3038
6.55k
    PyMem_Free(blocks);
3039
6.55k
    return status;
3040
6.55k
}
3041
3042
// helper functions for add_checks_for_loads_of_unknown_variables
3043
static inline void
3044
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3045
149k
{
3046
    // Push b if the unsafe mask is giving us any new information.
3047
    // To avoid overflowing the stack, only allow each block once.
3048
    // Use b->b_visited=1 to mean that b is currently on the stack.
3049
149k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3050
149k
    if (b->b_unsafe_locals_mask != both) {
3051
24.8k
        b->b_unsafe_locals_mask = both;
3052
        // More work left to do.
3053
24.8k
        if (!b->b_visited) {
3054
            // not on the stack, so push it.
3055
24.6k
            *(*sp)++ = b;
3056
24.6k
            b->b_visited = 1;
3057
24.6k
        }
3058
24.8k
    }
3059
149k
}
3060
3061
static void
3062
scan_block_for_locals(basicblock *b, basicblock ***sp)
3063
65.5k
{
3064
    // bit i is set if local i is potentially uninitialized
3065
65.5k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3066
456k
    for (int i = 0; i < b->b_iused; i++) {
3067
390k
        cfg_instr *instr = &b->b_instr[i];
3068
390k
        assert(instr->i_opcode != EXTENDED_ARG);
3069
390k
        if (instr->i_except != NULL) {
3070
77.2k
            maybe_push(instr->i_except, unsafe_mask, sp);
3071
77.2k
        }
3072
390k
        if (instr->i_oparg >= 64) {
3073
26.8k
            continue;
3074
26.8k
        }
3075
363k
        assert(instr->i_oparg >= 0);
3076
363k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3077
363k
        switch (instr->i_opcode) {
3078
351
            case DELETE_FAST:
3079
943
            case LOAD_FAST_AND_CLEAR:
3080
2.12k
            case STORE_FAST_MAYBE_NULL:
3081
2.12k
                unsafe_mask |= bit;
3082
2.12k
                break;
3083
24.8k
            case STORE_FAST:
3084
24.8k
                unsafe_mask &= ~bit;
3085
24.8k
                break;
3086
111
            case LOAD_FAST_CHECK:
3087
                // If this doesn't raise, then the local is defined.
3088
111
                unsafe_mask &= ~bit;
3089
111
                break;
3090
80.6k
            case LOAD_FAST:
3091
80.6k
                if (unsafe_mask & bit) {
3092
111
                    instr->i_opcode = LOAD_FAST_CHECK;
3093
111
                }
3094
80.6k
                unsafe_mask &= ~bit;
3095
80.6k
                break;
3096
363k
        }
3097
363k
    }
3098
65.5k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3099
35.0k
        maybe_push(b->b_next, unsafe_mask, sp);
3100
35.0k
    }
3101
65.5k
    cfg_instr *last = basicblock_last_instr(b);
3102
65.5k
    if (last && is_jump(last)) {
3103
31.8k
        assert(last->i_target != NULL);
3104
31.8k
        maybe_push(last->i_target, unsafe_mask, sp);
3105
31.8k
    }
3106
65.5k
}
3107
3108
static int
3109
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3110
0
{
3111
0
    assert(nlocals > 64);
3112
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3113
0
    if (states == NULL) {
3114
0
        PyErr_NoMemory();
3115
0
        return ERROR;
3116
0
    }
3117
0
    Py_ssize_t blocknum = 0;
3118
    // state[i - 64] == blocknum if local i is guaranteed to
3119
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3120
    // STORE_FAST within that basicblock (not followed by
3121
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3122
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3123
0
        blocknum++;
3124
0
        for (int i = 0; i < b->b_iused; i++) {
3125
0
            cfg_instr *instr = &b->b_instr[i];
3126
0
            assert(instr->i_opcode != EXTENDED_ARG);
3127
0
            int arg = instr->i_oparg;
3128
0
            if (arg < 64) {
3129
0
                continue;
3130
0
            }
3131
0
            assert(arg >= 0);
3132
0
            switch (instr->i_opcode) {
3133
0
                case DELETE_FAST:
3134
0
                case LOAD_FAST_AND_CLEAR:
3135
0
                case STORE_FAST_MAYBE_NULL:
3136
0
                    states[arg - 64] = blocknum - 1;
3137
0
                    break;
3138
0
                case STORE_FAST:
3139
0
                    states[arg - 64] = blocknum;
3140
0
                    break;
3141
0
                case LOAD_FAST:
3142
0
                    if (states[arg - 64] != blocknum) {
3143
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3144
0
                    }
3145
0
                    states[arg - 64] = blocknum;
3146
0
                    break;
3147
0
                    Py_UNREACHABLE();
3148
0
            }
3149
0
        }
3150
0
    }
3151
0
    PyMem_Free(states);
3152
0
    return SUCCESS;
3153
0
}
3154
3155
static int
3156
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3157
6.55k
{
3158
6.55k
    assert(PyList_CheckExact(consts));
3159
6.55k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3160
6.55k
    if (nconsts == 0) {
3161
0
        return SUCCESS;  /* nothing to do */
3162
0
    }
3163
3164
6.55k
    Py_ssize_t *index_map = NULL;
3165
6.55k
    Py_ssize_t *reverse_index_map = NULL;
3166
6.55k
    int err = ERROR;
3167
3168
6.55k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3169
6.55k
    if (index_map == NULL) {
3170
0
        goto end;
3171
0
    }
3172
50.7k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3173
44.1k
        index_map[i] = -1;
3174
44.1k
    }
3175
    // The first constant may be docstring; keep it always.
3176
6.55k
    index_map[0] = 0;
3177
3178
    /* mark used consts */
3179
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3180
370k
        for (int i = 0; i < b->b_iused; i++) {
3181
325k
            int opcode = b->b_instr[i].i_opcode;
3182
325k
            if (OPCODE_HAS_CONST(opcode)) {
3183
43.9k
                int index = b->b_instr[i].i_oparg;
3184
43.9k
                index_map[index] = index;
3185
43.9k
            }
3186
325k
        }
3187
44.2k
    }
3188
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3189
    /* condense consts */
3190
6.55k
    Py_ssize_t n_used_consts = 0;
3191
57.2k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3192
50.7k
        if (index_map[i] != -1) {
3193
37.7k
            assert(index_map[i] == i);
3194
37.7k
            index_map[n_used_consts++] = index_map[i];
3195
37.7k
        }
3196
50.7k
    }
3197
6.55k
    if (n_used_consts == nconsts) {
3198
        /* nothing to do */
3199
2.61k
        err = SUCCESS;
3200
2.61k
        goto end;
3201
2.61k
    }
3202
3203
    /* move all used consts to the beginning of the consts list */
3204
3.93k
    assert(n_used_consts < nconsts);
3205
33.8k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3206
29.8k
        Py_ssize_t old_index = index_map[i];
3207
29.8k
        assert(i <= old_index && old_index < nconsts);
3208
29.8k
        if (i != old_index) {
3209
22.2k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3210
22.2k
            assert(value != NULL);
3211
22.2k
            PyList_SetItem(consts, i, Py_NewRef(value));
3212
22.2k
        }
3213
29.8k
    }
3214
3215
    /* truncate the consts list at its new size */
3216
3.93k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3217
0
        goto end;
3218
0
    }
3219
    /* adjust const indices in the bytecode */
3220
3.93k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3221
3.93k
    if (reverse_index_map == NULL) {
3222
0
        goto end;
3223
0
    }
3224
46.7k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3225
42.8k
        reverse_index_map[i] = -1;
3226
42.8k
    }
3227
33.8k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3228
29.8k
        assert(index_map[i] != -1);
3229
29.8k
        assert(reverse_index_map[index_map[i]] == -1);
3230
29.8k
        reverse_index_map[index_map[i]] = i;
3231
29.8k
    }
3232
3233
35.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
286k
        for (int i = 0; i < b->b_iused; i++) {
3235
254k
            int opcode = b->b_instr[i].i_opcode;
3236
254k
            if (OPCODE_HAS_CONST(opcode)) {
3237
35.5k
                int index = b->b_instr[i].i_oparg;
3238
35.5k
                assert(reverse_index_map[index] >= 0);
3239
35.5k
                assert(reverse_index_map[index] < n_used_consts);
3240
35.5k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3241
35.5k
            }
3242
254k
        }
3243
32.0k
    }
3244
3245
3.93k
    err = SUCCESS;
3246
6.55k
end:
3247
6.55k
    PyMem_Free(index_map);
3248
6.55k
    PyMem_Free(reverse_index_map);
3249
6.55k
    return err;
3250
3.93k
}
3251
3252
3253
3254
static int
3255
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3256
                                                int nlocals,
3257
                                                int nparams)
3258
6.55k
{
3259
6.55k
    if (nlocals == 0) {
3260
1.64k
        return SUCCESS;
3261
1.64k
    }
3262
4.90k
    if (nlocals > 64) {
3263
        // To avoid O(nlocals**2) compilation, locals beyond the first
3264
        // 64 are only analyzed one basicblock at a time: initialization
3265
        // info is not passed between basicblocks.
3266
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3267
0
            return ERROR;
3268
0
        }
3269
0
        nlocals = 64;
3270
0
    }
3271
4.90k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3272
4.90k
    if (stack == NULL) {
3273
0
        return ERROR;
3274
0
    }
3275
4.90k
    basicblock **sp = stack;
3276
3277
    // First origin of being uninitialized:
3278
    // The non-parameter locals in the entry block.
3279
4.90k
    uint64_t start_mask = 0;
3280
12.5k
    for (int i = nparams; i < nlocals; i++) {
3281
7.62k
        start_mask |= (uint64_t)1 << i;
3282
7.62k
    }
3283
4.90k
    maybe_push(entryblock, start_mask, &sp);
3284
3285
    // Second origin of being uninitialized:
3286
    // There could be DELETE_FAST somewhere, so
3287
    // be sure to scan each basicblock at least once.
3288
45.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3289
40.9k
        scan_block_for_locals(b, &sp);
3290
40.9k
    }
3291
    // Now propagate the uncertainty from the origins we found: Use
3292
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3293
29.5k
    while (sp > stack) {
3294
24.6k
        basicblock *b = *--sp;
3295
        // mark as no longer on stack
3296
24.6k
        b->b_visited = 0;
3297
24.6k
        scan_block_for_locals(b, &sp);
3298
24.6k
    }
3299
4.90k
    PyMem_Free(stack);
3300
4.90k
    return SUCCESS;
3301
4.90k
}
3302
3303
3304
static int
3305
5.82k
mark_warm(basicblock *entryblock) {
3306
5.82k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3307
5.82k
    if (stack == NULL) {
3308
0
        return ERROR;
3309
0
    }
3310
5.82k
    basicblock **sp = stack;
3311
3312
5.82k
    *sp++ = entryblock;
3313
5.82k
    entryblock->b_visited = 1;
3314
37.0k
    while (sp > stack) {
3315
31.1k
        basicblock *b = *(--sp);
3316
31.1k
        assert(!b->b_except_handler);
3317
31.1k
        b->b_warm = 1;
3318
31.1k
        basicblock *next = b->b_next;
3319
31.1k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3320
14.9k
            *sp++ = next;
3321
14.9k
            next->b_visited = 1;
3322
14.9k
        }
3323
277k
        for (int i=0; i < b->b_iused; i++) {
3324
246k
            cfg_instr *instr = &b->b_instr[i];
3325
246k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3326
10.4k
                *sp++ = instr->i_target;
3327
10.4k
                instr->i_target->b_visited = 1;
3328
10.4k
            }
3329
246k
        }
3330
31.1k
    }
3331
5.82k
    PyMem_Free(stack);
3332
5.82k
    return SUCCESS;
3333
5.82k
}
3334
3335
static int
3336
5.82k
mark_cold(basicblock *entryblock) {
3337
49.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3338
43.5k
        assert(!b->b_cold && !b->b_warm);
3339
43.5k
    }
3340
5.82k
    if (mark_warm(entryblock) < 0) {
3341
0
        return ERROR;
3342
0
    }
3343
3344
5.82k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3345
5.82k
    if (stack == NULL) {
3346
0
        return ERROR;
3347
0
    }
3348
3349
5.82k
    basicblock **sp = stack;
3350
49.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3351
43.5k
        if (b->b_except_handler) {
3352
2.56k
            assert(!b->b_warm);
3353
2.56k
            *sp++ = b;
3354
2.56k
            b->b_visited = 1;
3355
2.56k
        }
3356
43.5k
    }
3357
3358
10.8k
    while (sp > stack) {
3359
5.04k
        basicblock *b = *(--sp);
3360
5.04k
        b->b_cold = 1;
3361
5.04k
        basicblock *next = b->b_next;
3362
5.04k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3363
1.38k
            if (!next->b_warm && !next->b_visited) {
3364
1.28k
                *sp++ = next;
3365
1.28k
                next->b_visited = 1;
3366
1.28k
            }
3367
1.38k
        }
3368
27.2k
        for (int i = 0; i < b->b_iused; i++) {
3369
22.1k
            cfg_instr *instr = &b->b_instr[i];
3370
22.1k
            if (is_jump(instr)) {
3371
1.73k
                assert(i == b->b_iused - 1);
3372
1.73k
                basicblock *target = b->b_instr[i].i_target;
3373
1.73k
                if (!target->b_warm && !target->b_visited) {
3374
1.19k
                    *sp++ = target;
3375
1.19k
                    target->b_visited = 1;
3376
1.19k
                }
3377
1.73k
            }
3378
22.1k
        }
3379
5.04k
    }
3380
5.82k
    PyMem_Free(stack);
3381
5.82k
    return SUCCESS;
3382
5.82k
}
3383
3384
3385
static int
3386
6.55k
push_cold_blocks_to_end(cfg_builder *g) {
3387
6.55k
    basicblock *entryblock = g->g_entryblock;
3388
6.55k
    if (entryblock->b_next == NULL) {
3389
        /* single basicblock, no need to reorder */
3390
726
        return SUCCESS;
3391
726
    }
3392
5.82k
    RETURN_IF_ERROR(mark_cold(entryblock));
3393
3394
5.82k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3395
3396
    /* If we have a cold block with fallthrough to a warm block, add */
3397
    /* an explicit jump instead of fallthrough */
3398
49.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3399
43.5k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3400
44
            basicblock *explicit_jump = cfg_builder_new_block(g);
3401
44
            if (explicit_jump == NULL) {
3402
0
                return ERROR;
3403
0
            }
3404
44
            if (!IS_LABEL(b->b_next->b_label)) {
3405
0
                b->b_next->b_label.id = next_lbl++;
3406
0
            }
3407
44
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3408
44
                             NO_LOCATION);
3409
44
            explicit_jump->b_cold = 1;
3410
44
            explicit_jump->b_next = b->b_next;
3411
44
            explicit_jump->b_predecessors = 1;
3412
44
            b->b_next = explicit_jump;
3413
3414
            /* set target */
3415
44
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3416
44
            last->i_target = explicit_jump->b_next;
3417
44
        }
3418
43.5k
    }
3419
3420
5.82k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3421
5.82k
    basicblock *cold_blocks = NULL;
3422
5.82k
    basicblock *cold_blocks_tail = NULL;
3423
3424
5.82k
    basicblock *b = entryblock;
3425
7.77k
    while(b->b_next) {
3426
7.77k
        assert(!b->b_cold);
3427
40.4k
        while (b->b_next && !b->b_next->b_cold) {
3428
32.6k
            b = b->b_next;
3429
32.6k
        }
3430
7.77k
        if (b->b_next == NULL) {
3431
            /* no more cold blocks */
3432
5.82k
            break;
3433
5.82k
        }
3434
3435
        /* b->b_next is the beginning of a cold streak */
3436
1.94k
        assert(!b->b_cold && b->b_next->b_cold);
3437
3438
1.94k
        basicblock *b_end = b->b_next;
3439
5.09k
        while (b_end->b_next && b_end->b_next->b_cold) {
3440
3.14k
            b_end = b_end->b_next;
3441
3.14k
        }
3442
3443
        /* b_end is the end of the cold streak */
3444
1.94k
        assert(b_end && b_end->b_cold);
3445
1.94k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3446
3447
1.94k
        if (cold_blocks == NULL) {
3448
1.03k
            cold_blocks = b->b_next;
3449
1.03k
        }
3450
911
        else {
3451
911
            cold_blocks_tail->b_next = b->b_next;
3452
911
        }
3453
1.94k
        cold_blocks_tail = b_end;
3454
1.94k
        b->b_next = b_end->b_next;
3455
1.94k
        b_end->b_next = NULL;
3456
1.94k
    }
3457
5.82k
    assert(b != NULL && b->b_next == NULL);
3458
5.82k
    b->b_next = cold_blocks;
3459
3460
5.82k
    if (cold_blocks != NULL) {
3461
1.03k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3462
1.03k
    }
3463
5.82k
    return SUCCESS;
3464
5.82k
}
3465
3466
static int
3467
convert_pseudo_conditional_jumps(cfg_builder *g)
3468
6.55k
{
3469
6.55k
    basicblock *entryblock = g->g_entryblock;
3470
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3471
362k
        for (int i = 0; i < b->b_iused; i++) {
3472
318k
            cfg_instr *instr = &b->b_instr[i];
3473
318k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3474
543
                assert(i == b->b_iused - 1);
3475
543
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3476
277
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3477
543
                location loc = instr->i_loc;
3478
543
                basicblock *except = instr->i_except;
3479
543
                cfg_instr copy = {
3480
543
                            .i_opcode = COPY,
3481
543
                            .i_oparg = 1,
3482
543
                            .i_loc = loc,
3483
543
                            .i_target = NULL,
3484
543
                            .i_except = except,
3485
543
                };
3486
543
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3487
543
                cfg_instr to_bool = {
3488
543
                            .i_opcode = TO_BOOL,
3489
543
                            .i_oparg = 0,
3490
543
                            .i_loc = loc,
3491
543
                            .i_target = NULL,
3492
543
                            .i_except = except,
3493
543
                };
3494
543
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3495
543
            }
3496
318k
        }
3497
44.3k
    }
3498
6.55k
    return SUCCESS;
3499
6.55k
}
3500
3501
static int
3502
convert_pseudo_ops(cfg_builder *g)
3503
6.55k
{
3504
6.55k
    basicblock *entryblock = g->g_entryblock;
3505
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3506
366k
        for (int i = 0; i < b->b_iused; i++) {
3507
322k
            cfg_instr *instr = &b->b_instr[i];
3508
322k
            if (is_block_push(instr)) {
3509
2.56k
                INSTR_SET_OP0(instr, NOP);
3510
2.56k
            }
3511
319k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3512
1.62k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3513
1.62k
                instr->i_opcode = LOAD_FAST;
3514
1.62k
            }
3515
317k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3516
592
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3517
592
                instr->i_opcode = STORE_FAST;
3518
592
            }
3519
322k
        }
3520
44.3k
    }
3521
6.55k
    return remove_redundant_nops_and_jumps(g);
3522
6.55k
}
3523
3524
static inline bool
3525
73.9k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3526
73.9k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3527
41.1k
        return basicblock_has_no_lineno(b);
3528
41.1k
    }
3529
32.7k
    else {
3530
32.7k
        return false;
3531
32.7k
    }
3532
73.9k
}
3533
3534
3535
/* PEP 626 mandates that the f_lineno of a frame is correct
3536
 * after a frame terminates. It would be prohibitively expensive
3537
 * to continuously update the f_lineno field at runtime,
3538
 * so we make sure that all exiting instruction (raises and returns)
3539
 * have a valid line number, allowing us to compute f_lineno lazily.
3540
 * We can do this by duplicating the exit blocks without line number
3541
 * so that none have more than one predecessor. We can then safely
3542
 * copy the line number from the sole predecessor block.
3543
 */
3544
static int
3545
duplicate_exits_without_lineno(cfg_builder *g)
3546
13.1k
{
3547
13.1k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3548
3549
    /* Copy all exit blocks without line number that are targets of a jump.
3550
     */
3551
13.1k
    basicblock *entryblock = g->g_entryblock;
3552
101k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3553
88.5k
        cfg_instr *last = basicblock_last_instr(b);
3554
88.5k
        if (last == NULL) {
3555
14.0k
            continue;
3556
14.0k
        }
3557
74.5k
        if (is_jump(last)) {
3558
36.0k
            basicblock *target = next_nonempty_block(last->i_target);
3559
36.0k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3560
595
                basicblock *new_target = copy_basicblock(g, target);
3561
595
                if (new_target == NULL) {
3562
0
                    return ERROR;
3563
0
                }
3564
595
                new_target->b_instr[0].i_loc = last->i_loc;
3565
595
                last->i_target = new_target;
3566
595
                target->b_predecessors--;
3567
595
                new_target->b_predecessors = 1;
3568
595
                new_target->b_next = target->b_next;
3569
595
                new_target->b_label.id = next_lbl++;
3570
595
                target->b_next = new_target;
3571
595
            }
3572
36.0k
        }
3573
74.5k
    }
3574
3575
    /* Any remaining reachable exit blocks without line number can only be reached by
3576
     * fall through, and thus can only have a single predecessor */
3577
101k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3578
88.5k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3579
37.8k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3580
753
                cfg_instr *last = basicblock_last_instr(b);
3581
753
                assert(last != NULL);
3582
753
                b->b_next->b_instr[0].i_loc = last->i_loc;
3583
753
            }
3584
37.8k
        }
3585
88.5k
    }
3586
13.1k
    return SUCCESS;
3587
13.1k
}
3588
3589
3590
/* If an instruction has no line number, but it's predecessor in the BB does,
3591
 * then copy the line number. If a successor block has no line number, and only
3592
 * one predecessor, then inherit the line number.
3593
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3594
 * Also reduces the size of the line number table,
3595
 * but has no impact on the generated line number events.
3596
 */
3597
static void
3598
13.1k
propagate_line_numbers(basicblock *entryblock) {
3599
101k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3600
88.5k
        cfg_instr *last = basicblock_last_instr(b);
3601
88.5k
        if (last == NULL) {
3602
14.0k
            continue;
3603
14.0k
        }
3604
3605
74.5k
        location prev_location = NO_LOCATION;
3606
747k
        for (int i = 0; i < b->b_iused; i++) {
3607
673k
            if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3608
37.5k
                b->b_instr[i].i_loc = prev_location;
3609
37.5k
            }
3610
635k
            else {
3611
635k
                prev_location = b->b_instr[i].i_loc;
3612
635k
            }
3613
673k
        }
3614
74.5k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3615
27.2k
            if (b->b_next->b_iused > 0) {
3616
27.2k
                if (b->b_next->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3617
700
                    b->b_next->b_instr[0].i_loc = prev_location;
3618
700
                }
3619
27.2k
            }
3620
27.2k
        }
3621
74.5k
        if (is_jump(last)) {
3622
36.0k
            basicblock *target = last->i_target;
3623
36.0k
            if (target->b_predecessors == 1) {
3624
16.5k
                if (target->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3625
2.83k
                    target->b_instr[0].i_loc = prev_location;
3626
2.83k
                }
3627
16.5k
            }
3628
36.0k
        }
3629
74.5k
    }
3630
13.1k
}
3631
3632
static int
3633
resolve_line_numbers(cfg_builder *g, int firstlineno)
3634
13.1k
{
3635
13.1k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3636
13.1k
    propagate_line_numbers(g->g_entryblock);
3637
13.1k
    return SUCCESS;
3638
13.1k
}
3639
3640
int
3641
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3642
                        int nlocals, int nparams, int firstlineno)
3643
6.55k
{
3644
6.55k
    assert(cfg_builder_check(g));
3645
    /** Preprocessing **/
3646
    /* Map labels to targets and mark exception handlers */
3647
6.55k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3648
6.55k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3649
6.55k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3650
3651
    /** Optimization **/
3652
6.55k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3653
6.55k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3654
6.55k
    RETURN_IF_ERROR(
3655
6.55k
        add_checks_for_loads_of_uninitialized_variables(
3656
6.55k
            g->g_entryblock, nlocals, nparams));
3657
6.55k
    RETURN_IF_ERROR(insert_superinstructions(g));
3658
3659
6.55k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3660
6.55k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3661
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3662
    // assert(all_exits_have_lineno(g->g_entryblock));
3663
6.55k
    return SUCCESS;
3664
6.55k
}
3665
3666
static int *
3667
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3668
6.55k
{
3669
6.55k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3670
6.55k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3671
6.55k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3672
3673
6.55k
    int noffsets = ncellvars + nfreevars;
3674
6.55k
    int *fixed = PyMem_New(int, noffsets);
3675
6.55k
    if (fixed == NULL) {
3676
0
        PyErr_NoMemory();
3677
0
        return NULL;
3678
0
    }
3679
8.68k
    for (int i = 0; i < noffsets; i++) {
3680
2.13k
        fixed[i] = nlocals + i;
3681
2.13k
    }
3682
3683
6.55k
    PyObject *varname, *cellindex;
3684
6.55k
    Py_ssize_t pos = 0;
3685
7.78k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3686
1.23k
        PyObject *varindex;
3687
1.23k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3688
0
            goto error;
3689
0
        }
3690
1.23k
        if (varindex == NULL) {
3691
862
            continue;
3692
862
        }
3693
3694
368
        int argoffset = PyLong_AsInt(varindex);
3695
368
        Py_DECREF(varindex);
3696
368
        if (argoffset == -1 && PyErr_Occurred()) {
3697
0
            goto error;
3698
0
        }
3699
3700
368
        int oldindex = PyLong_AsInt(cellindex);
3701
368
        if (oldindex == -1 && PyErr_Occurred()) {
3702
0
            goto error;
3703
0
        }
3704
368
        fixed[oldindex] = argoffset;
3705
368
    }
3706
6.55k
    return fixed;
3707
3708
0
error:
3709
0
    PyMem_Free(fixed);
3710
0
    return NULL;
3711
6.55k
}
3712
3713
#define IS_GENERATOR(CF) \
3714
6.55k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3715
3716
static int
3717
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3718
                           int *fixed, int nfreevars, int code_flags)
3719
6.55k
{
3720
6.55k
    assert(umd->u_firstlineno > 0);
3721
3722
    /* Add the generator prefix instructions. */
3723
6.55k
    if (IS_GENERATOR(code_flags)) {
3724
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3725
         * of 0. This is because RETURN_GENERATOR pushes an element
3726
         * with _PyFrame_StackPush before switching stacks.
3727
         */
3728
3729
342
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3730
342
        cfg_instr make_gen = {
3731
342
            .i_opcode = RETURN_GENERATOR,
3732
342
            .i_oparg = 0,
3733
342
            .i_loc = loc,
3734
342
            .i_target = NULL,
3735
342
            .i_except = NULL,
3736
342
        };
3737
342
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3738
342
        cfg_instr pop_top = {
3739
342
            .i_opcode = POP_TOP,
3740
342
            .i_oparg = 0,
3741
342
            .i_loc = loc,
3742
342
            .i_target = NULL,
3743
342
            .i_except = NULL,
3744
342
        };
3745
342
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3746
342
    }
3747
3748
    /* Set up cells for any variable that escapes, to be put in a closure. */
3749
6.55k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3750
6.55k
    if (ncellvars) {
3751
        // umd->u_cellvars has the cells out of order so we sort them
3752
        // before adding the MAKE_CELL instructions.  Note that we
3753
        // adjust for arg cells, which come first.
3754
812
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3755
812
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3756
812
        if (sorted == NULL) {
3757
0
            PyErr_NoMemory();
3758
0
            return ERROR;
3759
0
        }
3760
2.04k
        for (int i = 0; i < ncellvars; i++) {
3761
1.23k
            sorted[fixed[i]] = i + 1;
3762
1.23k
        }
3763
2.60k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3764
1.78k
            int oldindex = sorted[i] - 1;
3765
1.78k
            if (oldindex == -1) {
3766
559
                continue;
3767
559
            }
3768
1.23k
            cfg_instr make_cell = {
3769
1.23k
                .i_opcode = MAKE_CELL,
3770
                // This will get fixed in offset_derefs().
3771
1.23k
                .i_oparg = oldindex,
3772
1.23k
                .i_loc = NO_LOCATION,
3773
1.23k
                .i_target = NULL,
3774
1.23k
                .i_except = NULL,
3775
1.23k
            };
3776
1.23k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3777
0
                PyMem_RawFree(sorted);
3778
0
                return ERROR;
3779
0
            }
3780
1.23k
            ncellsused += 1;
3781
1.23k
        }
3782
812
        PyMem_RawFree(sorted);
3783
812
    }
3784
3785
6.55k
    if (nfreevars) {
3786
500
        cfg_instr copy_frees = {
3787
500
            .i_opcode = COPY_FREE_VARS,
3788
500
            .i_oparg = nfreevars,
3789
500
            .i_loc = NO_LOCATION,
3790
500
            .i_target = NULL,
3791
500
            .i_except = NULL,
3792
500
        };
3793
500
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3794
500
    }
3795
3796
6.55k
    return SUCCESS;
3797
6.55k
}
3798
3799
static int
3800
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3801
6.55k
{
3802
6.55k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3803
6.55k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3804
6.55k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3805
6.55k
    int noffsets = ncellvars + nfreevars;
3806
3807
    // First deal with duplicates (arg cells).
3808
6.55k
    int numdropped = 0;
3809
8.68k
    for (int i = 0; i < noffsets ; i++) {
3810
2.13k
        if (fixedmap[i] == i + nlocals) {
3811
1.76k
            fixedmap[i] -= numdropped;
3812
1.76k
        }
3813
368
        else {
3814
            // It was a duplicate (cell/arg).
3815
368
            numdropped += 1;
3816
368
        }
3817
2.13k
    }
3818
3819
    // Then update offsets, either relative to locals or by cell2arg.
3820
50.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3821
366k
        for (int i = 0; i < b->b_iused; i++) {
3822
322k
            cfg_instr *inst = &b->b_instr[i];
3823
            // This is called before extended args are generated.
3824
322k
            assert(inst->i_opcode != EXTENDED_ARG);
3825
322k
            int oldoffset = inst->i_oparg;
3826
322k
            switch(inst->i_opcode) {
3827
1.23k
                case MAKE_CELL:
3828
2.85k
                case LOAD_CLOSURE:
3829
4.79k
                case LOAD_DEREF:
3830
5.67k
                case STORE_DEREF:
3831
5.67k
                case DELETE_DEREF:
3832
5.67k
                case LOAD_FROM_DICT_OR_DEREF:
3833
5.67k
                    assert(oldoffset >= 0);
3834
5.67k
                    assert(oldoffset < noffsets);
3835
5.67k
                    assert(fixedmap[oldoffset] >= 0);
3836
5.67k
                    inst->i_oparg = fixedmap[oldoffset];
3837
322k
            }
3838
322k
        }
3839
44.3k
    }
3840
3841
6.55k
    return numdropped;
3842
6.55k
}
3843
3844
static int
3845
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3846
6.55k
{
3847
6.55k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3848
6.55k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3849
6.55k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3850
6.55k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3851
6.55k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3852
6.55k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3853
6.55k
    assert(INT_MAX - nlocals - ncellvars > 0);
3854
6.55k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3855
6.55k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3856
6.55k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3857
6.55k
    if (cellfixedoffsets == NULL) {
3858
0
        return ERROR;
3859
0
    }
3860
3861
    // This must be called before fix_cell_offsets().
3862
6.55k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3863
0
        PyMem_Free(cellfixedoffsets);
3864
0
        return ERROR;
3865
0
    }
3866
3867
6.55k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3868
6.55k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3869
6.55k
    cellfixedoffsets = NULL;
3870
6.55k
    if (numdropped < 0) {
3871
0
        return ERROR;
3872
0
    }
3873
3874
6.55k
    nlocalsplus -= numdropped;
3875
6.55k
    return nlocalsplus;
3876
6.55k
}
3877
3878
cfg_builder *
3879
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3880
6.55k
{
3881
6.55k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3882
0
        return NULL;
3883
0
    }
3884
6.55k
    cfg_builder *g = _PyCfgBuilder_New();
3885
6.55k
    if (g == NULL) {
3886
0
        return NULL;
3887
0
    }
3888
371k
    for (int i = 0; i < seq->s_used; i++) {
3889
365k
        seq->s_instrs[i].i_target = 0;
3890
365k
    }
3891
371k
    for (int i = 0; i < seq->s_used; i++) {
3892
365k
        _PyInstruction *instr = &seq->s_instrs[i];
3893
365k
        if (HAS_TARGET(instr->i_opcode)) {
3894
22.8k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3895
22.8k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3896
22.8k
        }
3897
365k
    }
3898
6.55k
    int offset = 0;
3899
371k
    for (int i = 0; i < seq->s_used; i++) {
3900
365k
        _PyInstruction *instr = &seq->s_instrs[i];
3901
365k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3902
362
            if (seq->s_annotations_code != NULL) {
3903
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
3904
1
                    && seq->s_annotations_code->s_nested == NULL);
3905
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3906
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3907
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3908
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3909
0
                        goto error;
3910
0
                    }
3911
3
                }
3912
1
                offset += seq->s_annotations_code->s_used - 1;
3913
1
            }
3914
361
            else {
3915
361
                offset -= 1;
3916
361
            }
3917
362
            continue;
3918
362
        }
3919
364k
        if (instr->i_target) {
3920
18.4k
            jump_target_label lbl_ = {i + offset};
3921
18.4k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3922
0
                goto error;
3923
0
            }
3924
18.4k
        }
3925
364k
        int opcode = instr->i_opcode;
3926
364k
        int oparg = instr->i_oparg;
3927
364k
        if (HAS_TARGET(opcode)) {
3928
22.8k
            oparg += offset;
3929
22.8k
        }
3930
364k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3931
0
            goto error;
3932
0
        }
3933
364k
    }
3934
6.55k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3935
0
        goto error;
3936
0
    }
3937
6.55k
    return g;
3938
0
error:
3939
0
    _PyCfgBuilder_Free(g);
3940
0
    return NULL;
3941
6.55k
}
3942
3943
int
3944
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3945
6.55k
{
3946
6.55k
    int lbl = 0;
3947
51.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3948
45.0k
        b->b_label = (jump_target_label){lbl};
3949
45.0k
        lbl += 1;
3950
45.0k
    }
3951
51.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3952
45.0k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3953
377k
        for (int i = 0; i < b->b_iused; i++) {
3954
332k
            cfg_instr *instr = &b->b_instr[i];
3955
332k
            if (HAS_TARGET(instr->i_opcode)) {
3956
                /* Set oparg to the label id (it will later be mapped to an offset) */
3957
18.1k
                instr->i_oparg = instr->i_target->b_label.id;
3958
18.1k
            }
3959
332k
            RETURN_IF_ERROR(
3960
332k
                _PyInstructionSequence_Addop(
3961
332k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3962
3963
332k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3964
332k
            if (instr->i_except != NULL) {
3965
41.4k
                hi->h_label = instr->i_except->b_label.id;
3966
41.4k
                hi->h_startdepth = instr->i_except->b_startdepth;
3967
41.4k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3968
41.4k
            }
3969
291k
            else {
3970
291k
                hi->h_label = -1;
3971
291k
            }
3972
332k
        }
3973
45.0k
    }
3974
6.55k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3975
0
        return ERROR;
3976
0
    }
3977
6.55k
    return SUCCESS;
3978
6.55k
}
3979
3980
3981
int
3982
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3983
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3984
                                     int *stackdepth, int *nlocalsplus,
3985
                                     _PyInstructionSequence *seq)
3986
6.55k
{
3987
6.55k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3988
3989
6.55k
    *stackdepth = calculate_stackdepth(g);
3990
6.55k
    if (*stackdepth < 0) {
3991
0
        return ERROR;
3992
0
    }
3993
3994
    /* prepare_localsplus adds instructions for generators that push
3995
     * and pop an item on the stack. This assertion makes sure there
3996
     * is space on the stack for that.
3997
     * It should always be true, because a generator must have at
3998
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
3999
     * which requires stackspace.
4000
     */
4001
6.55k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4002
4003
6.55k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4004
6.55k
    if (*nlocalsplus < 0) {
4005
0
        return ERROR;
4006
0
    }
4007
4008
6.55k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4009
4010
    /* Order of basic blocks must have been determined by now */
4011
4012
6.55k
    RETURN_IF_ERROR(normalize_jumps(g));
4013
6.55k
    assert(no_redundant_jumps(g));
4014
4015
    /* Can't modify the bytecode after inserting instructions that produce
4016
     * borrowed references.
4017
     */
4018
6.55k
    RETURN_IF_ERROR(optimize_load_fast(g));
4019
4020
    /* Can't modify the bytecode after computing jump offsets. */
4021
6.55k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4022
0
        return ERROR;
4023
0
    }
4024
4025
6.55k
    return SUCCESS;
4026
6.55k
}
4027
4028
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4029
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4030
 */
4031
int
4032
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4033
0
{
4034
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4035
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4036
0
    return SUCCESS;
4037
0
}
4038
4039
/* Exported API functions */
4040
4041
int
4042
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4043
0
{
4044
0
    stack_effects effs;
4045
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4046
0
        return PY_INVALID_STACK_EFFECT;
4047
0
    }
4048
0
    return effs.net;
4049
0
}
4050
4051
int
4052
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4053
113
{
4054
113
    stack_effects effs;
4055
113
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4056
0
        return PY_INVALID_STACK_EFFECT;
4057
0
    }
4058
113
    return effs.net;
4059
113
}
4060
4061
/* Access to compiler optimizations for unit tests.
4062
4063
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4064
 * a CFG, optimizes it and converts back to an instruction list.
4065
 */
4066
4067
static PyObject *
4068
cfg_to_instruction_sequence(cfg_builder *g)
4069
0
{
4070
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4071
0
    if (seq == NULL) {
4072
0
        return NULL;
4073
0
    }
4074
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4075
0
        PyInstructionSequence_Fini(seq);
4076
0
        return NULL;
4077
0
    }
4078
0
    return (PyObject*)seq;
4079
0
}
4080
4081
PyObject *
4082
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4083
0
{
4084
0
    if (!_PyInstructionSequence_Check(seq)) {
4085
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4086
0
        return NULL;
4087
0
    }
4088
0
    PyObject *const_cache = PyDict_New();
4089
0
    if (const_cache == NULL) {
4090
0
        return NULL;
4091
0
    }
4092
4093
0
    PyObject *res = NULL;
4094
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4095
0
    if (g == NULL) {
4096
0
        goto error;
4097
0
    }
4098
0
    int nparams = 0, firstlineno = 1;
4099
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4100
0
                                nparams, firstlineno) < 0) {
4101
0
        goto error;
4102
0
    }
4103
4104
0
    if (calculate_stackdepth(g) == ERROR) {
4105
0
        goto error;
4106
0
    }
4107
4108
0
    if (optimize_load_fast(g) != SUCCESS) {
4109
0
        goto error;
4110
0
    }
4111
4112
0
    res = cfg_to_instruction_sequence(g);
4113
0
error:
4114
0
    Py_DECREF(const_cache);
4115
0
    _PyCfgBuilder_Free(g);
4116
0
    return res;
4117
0
}