Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/flowgraph.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
996k
#define SUCCESS 0
19
5.78k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
1.55M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
332k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
341k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
341k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
307
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
1.45M
{
101
1.45M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
1.45M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
1.45M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
1.32M
{
108
1.32M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
1.32M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
35.7k
    do { \
114
35.7k
        assert(OPCODE_HAS_ARG(OP)); \
115
35.7k
        cfg_instr *_instr__ptr_ = (I); \
116
35.7k
        _instr__ptr_->i_opcode = (OP); \
117
35.7k
        _instr__ptr_->i_oparg = (ARG); \
118
35.7k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
69.3k
    do { \
123
69.3k
        assert(!OPCODE_HAS_ARG(OP)); \
124
69.3k
        cfg_instr *_instr__ptr_ = (I); \
125
69.3k
        _instr__ptr_->i_opcode = (OP); \
126
69.3k
        _instr__ptr_->i_oparg = 0; \
127
69.3k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
4.09k
    do { \
131
4.09k
        cfg_instr *_instr__ptr_ = (I); \
132
4.09k
        _instr__ptr_->i_loc = (LOC); \
133
4.09k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
332k
{
144
332k
    assert(b != NULL);
145
332k
    _Py_c_array_t array = {
146
332k
        .array = (void*)b->b_instr,
147
332k
        .allocated_entries = b->b_ialloc,
148
332k
        .item_size = sizeof(cfg_instr),
149
332k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
332k
    };
151
152
332k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
332k
    b->b_instr = array.array;
154
332k
    b->b_ialloc = array.allocated_entries;
155
332k
    return b->b_iused++;
156
332k
}
157
158
static cfg_instr *
159
1.22M
basicblock_last_instr(const basicblock *b) {
160
1.22M
    assert(b->b_iused >= 0);
161
1.22M
    if (b->b_iused > 0) {
162
1.12M
        assert(b->b_instr != NULL);
163
1.12M
        return &b->b_instr[b->b_iused - 1];
164
1.12M
    }
165
97.6k
    return NULL;
166
1.22M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
38.5k
{
175
38.5k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
38.5k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
38.5k
    b->b_list = g->g_block_list;
182
38.5k
    g->g_block_list = b;
183
38.5k
    b->b_label = NO_LABEL;
184
38.5k
    return b;
185
38.5k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
325k
{
190
325k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
325k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
325k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
325k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
325k
    int off = basicblock_next_instr(b);
196
325k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
325k
    cfg_instr *i = &b->b_instr[off];
200
325k
    i->i_opcode = opcode;
201
325k
    i->i_oparg = oparg;
202
325k
    i->i_target = NULL;
203
325k
    i->i_loc = loc;
204
205
325k
    return SUCCESS;
206
325k
}
207
208
static int
209
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
210
1.17k
{
211
1.17k
    cfg_instr *last = basicblock_last_instr(b);
212
1.17k
    if (last && is_jump(last)) {
213
0
        return ERROR;
214
0
    }
215
216
1.17k
    RETURN_IF_ERROR(
217
1.17k
        basicblock_addop(b, opcode, target->b_label.id, loc));
218
1.17k
    last = basicblock_last_instr(b);
219
1.17k
    assert(last && last->i_opcode == opcode);
220
1.17k
    last->i_target = target;
221
1.17k
    return SUCCESS;
222
1.17k
}
223
224
static inline int
225
basicblock_append_instructions(basicblock *to, basicblock *from)
226
2.63k
{
227
6.91k
    for (int i = 0; i < from->b_iused; i++) {
228
4.28k
        int n = basicblock_next_instr(to);
229
4.28k
        if (n < 0) {
230
0
            return ERROR;
231
0
        }
232
4.28k
        to->b_instr[n] = from->b_instr[i];
233
4.28k
    }
234
2.63k
    return SUCCESS;
235
2.63k
}
236
237
static inline int
238
353k
basicblock_nofallthrough(const basicblock *b) {
239
353k
    cfg_instr *last = basicblock_last_instr(b);
240
353k
    return (last &&
241
353k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
242
333k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
243
353k
}
244
245
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
246
589k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
247
248
static basicblock *
249
copy_basicblock(cfg_builder *g, basicblock *block)
250
507
{
251
    /* Cannot copy a block if it has a fallthrough, since
252
     * a block can only have one fallthrough predecessor.
253
     */
254
507
    assert(BB_NO_FALLTHROUGH(block));
255
507
    basicblock *result = cfg_builder_new_block(g);
256
507
    if (result == NULL) {
257
0
        return NULL;
258
0
    }
259
507
    if (basicblock_append_instructions(result, block) < 0) {
260
0
        return NULL;
261
0
    }
262
507
    return result;
263
507
}
264
265
static int
266
2.89k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
267
2.89k
    RETURN_IF_ERROR(basicblock_next_instr(block));
268
50.0k
    for (int i = block->b_iused - 1; i > pos; i--) {
269
47.1k
        block->b_instr[i] = block->b_instr[i-1];
270
47.1k
    }
271
2.89k
    block->b_instr[pos] = *instr;
272
2.89k
    return SUCCESS;
273
2.89k
}
274
275
/* For debugging purposes only */
276
#if 0
277
static void
278
dump_instr(cfg_instr *i)
279
{
280
    const char *jump = is_jump(i) ? "jump " : "";
281
282
    char arg[128];
283
284
    *arg = '\0';
285
    if (OPCODE_HAS_ARG(i->i_opcode)) {
286
        sprintf(arg, "arg: %d ", i->i_oparg);
287
    }
288
    if (HAS_TARGET(i->i_opcode)) {
289
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
290
    }
291
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
292
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
293
}
294
295
static inline int
296
basicblock_returns(const basicblock *b) {
297
    cfg_instr *last = basicblock_last_instr(b);
298
    return last && IS_RETURN_OPCODE(last->i_opcode);
299
}
300
301
static void
302
dump_basicblock(const basicblock *b, bool highlight)
303
{
304
    const char *b_return = basicblock_returns(b) ? "return " : "";
305
    if (highlight) {
306
        fprintf(stderr, ">>> ");
307
    }
308
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
309
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
310
        b->b_startdepth, b->b_predecessors, b_return);
311
    int depth = b->b_startdepth;
312
    if (b->b_instr) {
313
        int i;
314
        for (i = 0; i < b->b_iused; i++) {
315
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
316
            dump_instr(b->b_instr + i);
317
318
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
319
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
320
            depth += (pushed - popped);
321
        }
322
    }
323
}
324
325
void
326
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
327
{
328
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
329
        dump_basicblock(b, b == mark);
330
    }
331
}
332
333
#endif
334
335
336
/***** CFG construction and modification *****/
337
338
static basicblock *
339
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
340
31.6k
{
341
31.6k
    assert(block != NULL);
342
31.6k
    g->g_curblock->b_next = block;
343
31.6k
    g->g_curblock = block;
344
31.6k
    return block;
345
31.6k
}
346
347
static inline int
348
72.1k
basicblock_exits_scope(const basicblock *b) {
349
72.1k
    cfg_instr *last = basicblock_last_instr(b);
350
72.1k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
351
72.1k
}
352
353
static inline int
354
45.9k
basicblock_has_eval_break(const basicblock *b) {
355
225k
    for (int i = 0; i < b->b_iused; i++) {
356
198k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
357
18.4k
            return true;
358
18.4k
        }
359
198k
    }
360
27.4k
    return false;
361
45.9k
}
362
363
static bool
364
cfg_builder_current_block_is_terminated(cfg_builder *g)
365
330k
{
366
330k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
367
330k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
368
26.9k
        return true;
369
26.9k
    }
370
303k
    if (IS_LABEL(g->g_current_label)) {
371
4.72k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
372
4.72k
            return true;
373
4.72k
        }
374
0
        else {
375
            /* current block is empty, label it */
376
0
            g->g_curblock->b_label = g->g_current_label;
377
0
            g->g_current_label = NO_LABEL;
378
0
        }
379
4.72k
    }
380
299k
    return false;
381
303k
}
382
383
static int
384
cfg_builder_maybe_start_new_block(cfg_builder *g)
385
330k
{
386
330k
    if (cfg_builder_current_block_is_terminated(g)) {
387
31.6k
        basicblock *b = cfg_builder_new_block(g);
388
31.6k
        if (b == NULL) {
389
0
            return ERROR;
390
0
        }
391
31.6k
        b->b_label = g->g_current_label;
392
31.6k
        g->g_current_label = NO_LABEL;
393
31.6k
        cfg_builder_use_next_block(g, b);
394
31.6k
    }
395
330k
    return SUCCESS;
396
330k
}
397
398
#ifndef NDEBUG
399
static bool
400
cfg_builder_check(cfg_builder *g)
401
{
402
    assert(g->g_entryblock->b_iused > 0);
403
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
404
        assert(!_PyMem_IsPtrFreed(block));
405
        if (block->b_instr != NULL) {
406
            assert(block->b_ialloc > 0);
407
            assert(block->b_iused >= 0);
408
            assert(block->b_ialloc >= block->b_iused);
409
        }
410
        else {
411
            assert (block->b_iused == 0);
412
            assert (block->b_ialloc == 0);
413
        }
414
    }
415
    return true;
416
}
417
#endif
418
419
static int
420
init_cfg_builder(cfg_builder *g)
421
5.78k
{
422
5.78k
    g->g_block_list = NULL;
423
5.78k
    basicblock *block = cfg_builder_new_block(g);
424
5.78k
    if (block == NULL) {
425
0
        return ERROR;
426
0
    }
427
5.78k
    g->g_curblock = g->g_entryblock = block;
428
5.78k
    g->g_current_label = NO_LABEL;
429
5.78k
    return SUCCESS;
430
5.78k
}
431
432
cfg_builder *
433
_PyCfgBuilder_New(void)
434
5.78k
{
435
5.78k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
436
5.78k
    if (g == NULL) {
437
0
        PyErr_NoMemory();
438
0
        return NULL;
439
0
    }
440
5.78k
    memset(g, 0, sizeof(cfg_builder));
441
5.78k
    if (init_cfg_builder(g) < 0) {
442
0
        PyMem_Free(g);
443
0
        return NULL;
444
0
    }
445
5.78k
    return g;
446
5.78k
}
447
448
void
449
_PyCfgBuilder_Free(cfg_builder *g)
450
5.78k
{
451
5.78k
    if (g == NULL) {
452
0
        return;
453
0
    }
454
5.78k
    assert(cfg_builder_check(g));
455
5.78k
    basicblock *b = g->g_block_list;
456
44.3k
    while (b != NULL) {
457
38.5k
        if (b->b_instr) {
458
38.5k
            PyMem_Free((void *)b->b_instr);
459
38.5k
        }
460
38.5k
        basicblock *next = b->b_list;
461
38.5k
        PyMem_Free((void *)b);
462
38.5k
        b = next;
463
38.5k
    }
464
5.78k
    PyMem_Free(g);
465
5.78k
}
466
467
int
468
_PyCfgBuilder_CheckSize(cfg_builder *g)
469
5.78k
{
470
5.78k
    int nblocks = 0;
471
43.2k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
472
37.4k
        nblocks++;
473
37.4k
    }
474
5.78k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
475
0
        PyErr_NoMemory();
476
0
        return ERROR;
477
0
    }
478
5.78k
    return SUCCESS;
479
5.78k
}
480
481
int
482
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
483
15.7k
{
484
15.7k
    g->g_current_label = lbl;
485
15.7k
    return cfg_builder_maybe_start_new_block(g);
486
15.7k
}
487
488
int
489
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
490
314k
{
491
314k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
492
314k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
493
314k
}
494
495
496
static basicblock *
497
next_nonempty_block(basicblock *b)
498
63.0k
{
499
65.6k
    while (b && b->b_iused == 0) {
500
2.63k
        b = b->b_next;
501
2.63k
    }
502
63.0k
    return b;
503
63.0k
}
504
505
/***** debugging helpers *****/
506
507
#ifndef NDEBUG
508
static int remove_redundant_nops(cfg_builder *g);
509
510
static bool
511
no_redundant_nops(cfg_builder *g) {
512
    if (remove_redundant_nops(g) != 0) {
513
        return false;
514
    }
515
    return true;
516
}
517
518
static bool
519
no_redundant_jumps(cfg_builder *g) {
520
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
521
        cfg_instr *last = basicblock_last_instr(b);
522
        if (last != NULL) {
523
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
524
                basicblock *next = next_nonempty_block(b->b_next);
525
                basicblock *jump_target = next_nonempty_block(last->i_target);
526
                if (jump_target == next) {
527
                    assert(next);
528
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
529
                        assert(0);
530
                        return false;
531
                    }
532
                }
533
            }
534
        }
535
    }
536
    return true;
537
}
538
#endif
539
540
/***** CFG preprocessing (jump targets and exceptions) *****/
541
542
static int
543
38.5k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
544
38.5k
    cfg_instr *last = basicblock_last_instr(b);
545
38.5k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
546
29.1k
        return SUCCESS;
547
29.1k
    }
548
9.46k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
549
550
9.46k
    bool is_forward = last->i_target->b_visited == 0;
551
9.46k
    if (is_forward) {
552
8.85k
        RETURN_IF_ERROR(
553
8.85k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
554
8.85k
        return SUCCESS;
555
8.85k
    }
556
557
610
    int reversed_opcode = 0;
558
610
    switch(last->i_opcode) {
559
28
        case POP_JUMP_IF_NOT_NONE:
560
28
            reversed_opcode = POP_JUMP_IF_NONE;
561
28
            break;
562
30
        case POP_JUMP_IF_NONE:
563
30
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
564
30
            break;
565
500
        case POP_JUMP_IF_FALSE:
566
500
            reversed_opcode = POP_JUMP_IF_TRUE;
567
500
            break;
568
52
        case POP_JUMP_IF_TRUE:
569
52
            reversed_opcode = POP_JUMP_IF_FALSE;
570
52
            break;
571
610
    }
572
    /* transform 'conditional jump T' to
573
     * 'reversed_jump b_next' followed by 'jump_backwards T'
574
     */
575
576
610
    basicblock *target = last->i_target;
577
610
    basicblock *backwards_jump = cfg_builder_new_block(g);
578
610
    if (backwards_jump == NULL) {
579
0
        return ERROR;
580
0
    }
581
610
    RETURN_IF_ERROR(
582
610
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
583
610
    RETURN_IF_ERROR(
584
610
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
585
610
    backwards_jump->b_startdepth = target->b_startdepth;
586
610
    last->i_opcode = reversed_opcode;
587
610
    last->i_target = b->b_next;
588
589
610
    backwards_jump->b_cold = b->b_cold;
590
610
    backwards_jump->b_next = b->b_next;
591
610
    b->b_next = backwards_jump;
592
610
    return SUCCESS;
593
610
}
594
595
596
static int
597
normalize_jumps(cfg_builder *g)
598
5.78k
{
599
5.78k
    basicblock *entryblock = g->g_entryblock;
600
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
601
37.9k
        b->b_visited = 0;
602
37.9k
    }
603
44.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
604
38.5k
        b->b_visited = 1;
605
38.5k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
606
38.5k
    }
607
5.78k
    return SUCCESS;
608
5.78k
}
609
610
static int
611
5.78k
check_cfg(cfg_builder *g) {
612
43.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
613
        /* Raise SystemError if jump or exit is not last instruction in the block. */
614
352k
        for (int i = 0; i < b->b_iused; i++) {
615
314k
            int opcode = b->b_instr[i].i_opcode;
616
314k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
617
314k
            if (IS_TERMINATOR_OPCODE(opcode)) {
618
32.7k
                if (i != b->b_iused - 1) {
619
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
620
0
                    return ERROR;
621
0
                }
622
32.7k
            }
623
314k
        }
624
37.4k
    }
625
5.78k
    return SUCCESS;
626
5.78k
}
627
628
static int
629
get_max_label(basicblock *entryblock)
630
22.5k
{
631
22.5k
    int lbl = -1;
632
172k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
633
150k
        if (b->b_label.id > lbl) {
634
57.4k
            lbl = b->b_label.id;
635
57.4k
        }
636
150k
    }
637
22.5k
    return lbl;
638
22.5k
}
639
640
/* Calculate the actual jump target from the target_label */
641
static int
642
translate_jump_labels_to_targets(basicblock *entryblock)
643
5.78k
{
644
5.78k
    int max_label = get_max_label(entryblock);
645
5.78k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
646
5.78k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
647
5.78k
    if (!label2block) {
648
0
        PyErr_NoMemory();
649
0
        return ERROR;
650
0
    }
651
5.78k
    memset(label2block, 0, mapsize);
652
43.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
653
37.4k
        if (b->b_label.id >= 0) {
654
15.7k
            label2block[b->b_label.id] = b;
655
15.7k
        }
656
37.4k
    }
657
43.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
658
352k
        for (int i = 0; i < b->b_iused; i++) {
659
314k
            cfg_instr *instr = &b->b_instr[i];
660
314k
            assert(instr->i_target == NULL);
661
314k
            if (HAS_TARGET(instr->i_opcode)) {
662
19.4k
                int lbl = instr->i_oparg;
663
19.4k
                assert(lbl >= 0 && lbl <= max_label);
664
19.4k
                instr->i_target = label2block[lbl];
665
19.4k
                assert(instr->i_target != NULL);
666
19.4k
                assert(instr->i_target->b_label.id == lbl);
667
19.4k
            }
668
314k
        }
669
37.4k
    }
670
5.78k
    PyMem_Free(label2block);
671
5.78k
    return SUCCESS;
672
5.78k
}
673
674
static int
675
5.78k
mark_except_handlers(basicblock *entryblock) {
676
#ifndef NDEBUG
677
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
678
        assert(!b->b_except_handler);
679
    }
680
#endif
681
43.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
682
352k
        for (int i=0; i < b->b_iused; i++) {
683
314k
            cfg_instr *instr = &b->b_instr[i];
684
314k
            if (is_block_push(instr)) {
685
2.32k
                instr->i_target->b_except_handler = 1;
686
2.32k
            }
687
314k
        }
688
37.4k
    }
689
5.78k
    return SUCCESS;
690
5.78k
}
691
692
693
struct _PyCfgExceptStack {
694
    basicblock *handlers[CO_MAXBLOCKS+2];
695
    int depth;
696
};
697
698
699
static basicblock *
700
2.32k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
701
2.32k
    assert(is_block_push(setup));
702
2.32k
    int opcode = setup->i_opcode;
703
2.32k
    basicblock * target = setup->i_target;
704
2.32k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
705
1.35k
        target->b_preserve_lasti = 1;
706
1.35k
    }
707
2.32k
    assert(stack->depth <= CO_MAXBLOCKS);
708
2.32k
    stack->handlers[++stack->depth] = target;
709
2.32k
    return target;
710
2.32k
}
711
712
static basicblock *
713
1.84k
pop_except_block(struct _PyCfgExceptStack *stack) {
714
1.84k
    assert(stack->depth > 0);
715
1.84k
    return stack->handlers[--stack->depth];
716
1.84k
}
717
718
static basicblock *
719
31.8k
except_stack_top(struct _PyCfgExceptStack *stack) {
720
31.8k
    return stack->handlers[stack->depth];
721
31.8k
}
722
723
static struct _PyCfgExceptStack *
724
5.78k
make_except_stack(void) {
725
5.78k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
726
5.78k
    if (new == NULL) {
727
0
        PyErr_NoMemory();
728
0
        return NULL;
729
0
    }
730
5.78k
    new->depth = 0;
731
5.78k
    new->handlers[0] = NULL;
732
5.78k
    return new;
733
5.78k
}
734
735
static struct _PyCfgExceptStack *
736
11.8k
copy_except_stack(struct _PyCfgExceptStack *stack) {
737
11.8k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
738
11.8k
    if (copy == NULL) {
739
0
        PyErr_NoMemory();
740
0
        return NULL;
741
0
    }
742
11.8k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
743
11.8k
    return copy;
744
11.8k
}
745
746
static basicblock**
747
43.5k
make_cfg_traversal_stack(basicblock *entryblock) {
748
43.5k
    int nblocks = 0;
749
342k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
750
298k
        b->b_visited = 0;
751
298k
        nblocks++;
752
298k
    }
753
43.5k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
754
43.5k
    if (!stack) {
755
0
        PyErr_NoMemory();
756
0
    }
757
43.5k
    return stack;
758
43.5k
}
759
760
/* Compute the stack effects of opcode with argument oparg.
761
762
   Some opcodes have different stack effect when jump to the target and
763
   when not jump. The 'jump' parameter specifies the case:
764
765
   * 0 -- when not jump
766
   * 1 -- when jump
767
   * -1 -- maximal
768
 */
769
typedef struct {
770
    /* The stack effect of the instruction. */
771
    int net;
772
} stack_effects;
773
774
Py_LOCAL(int)
775
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
776
293k
{
777
293k
    if (opcode < 0) {
778
0
        return -1;
779
0
    }
780
293k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
781
        // Specialized instructions are not supported.
782
0
        return -1;
783
0
    }
784
293k
    int popped = _PyOpcode_num_popped(opcode, oparg);
785
293k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
786
293k
    if (popped < 0 || pushed < 0) {
787
0
        return -1;
788
0
    }
789
293k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
790
2.32k
        effects->net = 0;
791
2.32k
        return 0;
792
2.32k
    }
793
290k
    effects->net = pushed - popped;
794
290k
    return 0;
795
293k
}
796
797
Py_LOCAL_INLINE(int)
798
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
799
39.0k
{
800
39.0k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
801
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
802
0
        return ERROR;
803
0
    }
804
39.0k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
805
31.7k
        assert(b->b_startdepth < 0);
806
31.7k
        b->b_startdepth = depth;
807
31.7k
        *(*sp)++ = b;
808
31.7k
    }
809
39.0k
    return SUCCESS;
810
39.0k
}
811
812
/* Find the flow path that needs the largest stack.  We assume that
813
 * cycles in the flow graph have no net effect on the stack depth.
814
 */
815
static int
816
calculate_stackdepth(cfg_builder *g)
817
5.78k
{
818
5.78k
    basicblock *entryblock = g->g_entryblock;
819
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
820
37.9k
        b->b_startdepth = INT_MIN;
821
37.9k
    }
822
5.78k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
823
5.78k
    if (!stack) {
824
0
        return ERROR;
825
0
    }
826
827
828
5.78k
    int stackdepth = -1;
829
5.78k
    int maxdepth = 0;
830
5.78k
    basicblock **sp = stack;
831
5.78k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
832
0
        goto error;
833
0
    }
834
37.5k
    while (sp != stack) {
835
31.7k
        basicblock *b = *--sp;
836
31.7k
        int depth = b->b_startdepth;
837
31.7k
        assert(depth >= 0);
838
31.7k
        basicblock *next = b->b_next;
839
292k
        for (int i = 0; i < b->b_iused; i++) {
840
276k
            cfg_instr *instr = &b->b_instr[i];
841
276k
            stack_effects effects;
842
276k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
843
0
                PyErr_Format(PyExc_SystemError,
844
0
                             "Invalid stack effect for opcode=%d, arg=%i",
845
0
                             instr->i_opcode, instr->i_oparg);
846
0
                goto error;
847
0
            }
848
276k
            int new_depth = depth + effects.net;
849
276k
            if (new_depth < 0) {
850
0
                PyErr_Format(PyExc_ValueError,
851
0
                             "Invalid CFG, stack underflow");
852
0
                goto error;
853
0
            }
854
276k
            maxdepth = Py_MAX(maxdepth, depth);
855
276k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
856
16.8k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
857
0
                    PyErr_Format(PyExc_SystemError,
858
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
859
0
                                 instr->i_opcode, instr->i_oparg);
860
0
                    goto error;
861
0
                }
862
16.8k
                int target_depth = depth + effects.net;
863
16.8k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
864
16.8k
                maxdepth = Py_MAX(maxdepth, depth);
865
16.8k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
866
0
                    goto error;
867
0
                }
868
16.8k
            }
869
276k
            depth = new_depth;
870
276k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
871
276k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
872
276k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
873
15.3k
            {
874
                /* remaining code is dead */
875
15.3k
                next = NULL;
876
15.3k
                break;
877
15.3k
            }
878
276k
        }
879
31.7k
        if (next != NULL) {
880
16.4k
            assert(BB_HAS_FALLTHROUGH(b));
881
16.4k
            if (stackdepth_push(&sp, next, depth) < 0) {
882
0
                goto error;
883
0
            }
884
16.4k
        }
885
31.7k
    }
886
5.78k
    stackdepth = maxdepth;
887
5.78k
error:
888
5.78k
    PyMem_Free(stack);
889
5.78k
    return stackdepth;
890
5.78k
}
891
892
static int
893
5.78k
label_exception_targets(basicblock *entryblock) {
894
5.78k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
895
5.78k
    if (todo_stack == NULL) {
896
0
        return ERROR;
897
0
    }
898
5.78k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
899
5.78k
    if (except_stack == NULL) {
900
0
        PyMem_Free(todo_stack);
901
0
        PyErr_NoMemory();
902
0
        return ERROR;
903
0
    }
904
5.78k
    except_stack->depth = 0;
905
5.78k
    todo_stack[0] = entryblock;
906
5.78k
    entryblock->b_visited = 1;
907
5.78k
    entryblock->b_exceptstack = except_stack;
908
5.78k
    basicblock **todo = &todo_stack[1];
909
5.78k
    basicblock *handler = NULL;
910
37.6k
    while (todo > todo_stack) {
911
31.8k
        todo--;
912
31.8k
        basicblock *b = todo[0];
913
31.8k
        assert(b->b_visited == 1);
914
31.8k
        except_stack = b->b_exceptstack;
915
31.8k
        assert(except_stack != NULL);
916
31.8k
        b->b_exceptstack = NULL;
917
31.8k
        handler = except_stack_top(except_stack);
918
31.8k
        int last_yield_except_depth = -1;
919
335k
        for (int i = 0; i < b->b_iused; i++) {
920
303k
            cfg_instr *instr = &b->b_instr[i];
921
303k
            if (is_block_push(instr)) {
922
2.32k
                if (!instr->i_target->b_visited) {
923
2.32k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
924
2.32k
                    if (copy == NULL) {
925
0
                        goto error;
926
0
                    }
927
2.32k
                    instr->i_target->b_exceptstack = copy;
928
2.32k
                    todo[0] = instr->i_target;
929
2.32k
                    instr->i_target->b_visited = 1;
930
2.32k
                    todo++;
931
2.32k
                }
932
2.32k
                handler = push_except_block(except_stack, instr);
933
2.32k
            }
934
301k
            else if (instr->i_opcode == POP_BLOCK) {
935
1.84k
                handler = pop_except_block(except_stack);
936
1.84k
                INSTR_SET_OP0(instr, NOP);
937
1.84k
            }
938
299k
            else if (is_jump(instr)) {
939
16.1k
                instr->i_except = handler;
940
16.1k
                assert(i == b->b_iused -1);
941
16.1k
                if (!instr->i_target->b_visited) {
942
11.5k
                    if (BB_HAS_FALLTHROUGH(b)) {
943
9.53k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
944
9.53k
                        if (copy == NULL) {
945
0
                            goto error;
946
0
                        }
947
9.53k
                        instr->i_target->b_exceptstack = copy;
948
9.53k
                    }
949
2.05k
                    else {
950
2.05k
                        instr->i_target->b_exceptstack = except_stack;
951
2.05k
                        except_stack = NULL;
952
2.05k
                    }
953
11.5k
                    todo[0] = instr->i_target;
954
11.5k
                    instr->i_target->b_visited = 1;
955
11.5k
                    todo++;
956
11.5k
                }
957
16.1k
            }
958
283k
            else if (instr->i_opcode == YIELD_VALUE) {
959
423
                instr->i_except = handler;
960
423
                last_yield_except_depth = except_stack->depth;
961
423
            }
962
282k
            else if (instr->i_opcode == RESUME) {
963
6.20k
                instr->i_except = handler;
964
6.20k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
965
423
                    assert(last_yield_except_depth >= 0);
966
423
                    if (last_yield_except_depth == 1) {
967
362
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
968
362
                    }
969
423
                    last_yield_except_depth = -1;
970
423
                }
971
6.20k
            }
972
276k
            else {
973
276k
                instr->i_except = handler;
974
276k
            }
975
303k
        }
976
31.8k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
977
12.1k
            assert(except_stack != NULL);
978
12.1k
            b->b_next->b_exceptstack = except_stack;
979
12.1k
            todo[0] = b->b_next;
980
12.1k
            b->b_next->b_visited = 1;
981
12.1k
            todo++;
982
12.1k
        }
983
19.7k
        else if (except_stack != NULL) {
984
17.6k
           PyMem_Free(except_stack);
985
17.6k
        }
986
31.8k
    }
987
#ifdef Py_DEBUG
988
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
989
        assert(b->b_exceptstack == NULL);
990
    }
991
#endif
992
5.78k
    PyMem_Free(todo_stack);
993
5.78k
    return SUCCESS;
994
0
error:
995
0
    PyMem_Free(todo_stack);
996
0
    PyMem_Free(except_stack);
997
0
    return ERROR;
998
5.78k
}
999
1000
/***** CFG optimizations *****/
1001
1002
static int
1003
11.5k
remove_unreachable(basicblock *entryblock) {
1004
86.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1005
75.3k
        b->b_predecessors = 0;
1006
75.3k
    }
1007
11.5k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1008
11.5k
    if (stack == NULL) {
1009
0
        return ERROR;
1010
0
    }
1011
11.5k
    basicblock **sp = stack;
1012
11.5k
    entryblock->b_predecessors = 1;
1013
11.5k
    *sp++ = entryblock;
1014
11.5k
    entryblock->b_visited = 1;
1015
74.5k
    while (sp > stack) {
1016
63.0k
        basicblock *b = *(--sp);
1017
63.0k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1018
31.2k
            if (!b->b_next->b_visited) {
1019
26.6k
                assert(b->b_next->b_predecessors == 0);
1020
26.6k
                *sp++ = b->b_next;
1021
26.6k
                b->b_next->b_visited = 1;
1022
26.6k
            }
1023
31.2k
            b->b_next->b_predecessors++;
1024
31.2k
        }
1025
649k
        for (int i = 0; i < b->b_iused; i++) {
1026
586k
            basicblock *target;
1027
586k
            cfg_instr *instr = &b->b_instr[i];
1028
586k
            if (is_jump(instr) || is_block_push(instr)) {
1029
35.0k
                target = instr->i_target;
1030
35.0k
                if (!target->b_visited) {
1031
24.7k
                    *sp++ = target;
1032
24.7k
                    target->b_visited = 1;
1033
24.7k
                }
1034
35.0k
                target->b_predecessors++;
1035
35.0k
            }
1036
586k
        }
1037
63.0k
    }
1038
11.5k
    PyMem_Free(stack);
1039
1040
    /* Delete unreachable instructions */
1041
86.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1042
75.3k
       if (b->b_predecessors == 0) {
1043
12.3k
            b->b_iused = 0;
1044
12.3k
            b->b_except_handler = 0;
1045
12.3k
       }
1046
75.3k
    }
1047
11.5k
    return SUCCESS;
1048
11.5k
}
1049
1050
static int
1051
209k
basicblock_remove_redundant_nops(basicblock *bb) {
1052
    /* Remove NOPs when legal to do so. */
1053
209k
    int dest = 0;
1054
209k
    int prev_lineno = -1;
1055
1.67M
    for (int src = 0; src < bb->b_iused; src++) {
1056
1.46M
        int lineno = bb->b_instr[src].i_loc.lineno;
1057
1.46M
        if (bb->b_instr[src].i_opcode == NOP) {
1058
            /* Eliminate no-op if it doesn't have a line number */
1059
34.7k
            if (lineno < 0) {
1060
4.95k
                continue;
1061
4.95k
            }
1062
            /* or, if the previous instruction had the same line number. */
1063
29.8k
            if (prev_lineno == lineno) {
1064
24.0k
                continue;
1065
24.0k
            }
1066
            /* or, if the next instruction has same line number or no line number */
1067
5.75k
            if (src < bb->b_iused - 1) {
1068
4.96k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1069
4.96k
                if (next_lineno == lineno) {
1070
3.29k
                    continue;
1071
3.29k
                }
1072
1.67k
                if (next_lineno < 0) {
1073
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1074
0
                    continue;
1075
0
                }
1076
1.67k
            }
1077
785
            else {
1078
785
                basicblock *next = next_nonempty_block(bb->b_next);
1079
                /* or if last instruction in BB and next BB has same line number */
1080
785
                if (next) {
1081
785
                    location next_loc = NO_LOCATION;
1082
785
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1083
785
                        cfg_instr *instr = &next->b_instr[next_i];
1084
785
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1085
                            /* Skip over NOPs without a location, they will be removed */
1086
0
                            continue;
1087
0
                        }
1088
785
                        next_loc = instr->i_loc;
1089
785
                        break;
1090
785
                    }
1091
785
                    if (lineno == next_loc.lineno) {
1092
7
                        continue;
1093
7
                    }
1094
785
                }
1095
785
            }
1096
1097
5.75k
        }
1098
1.43M
        if (dest != src) {
1099
135k
            bb->b_instr[dest] = bb->b_instr[src];
1100
135k
        }
1101
1.43M
        dest++;
1102
1.43M
        prev_lineno = lineno;
1103
1.43M
    }
1104
209k
    assert(dest <= bb->b_iused);
1105
209k
    int num_removed = bb->b_iused - dest;
1106
209k
    bb->b_iused = dest;
1107
209k
    return num_removed;
1108
209k
}
1109
1110
static int
1111
19.9k
remove_redundant_nops(cfg_builder *g) {
1112
19.9k
    int changes = 0;
1113
191k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1114
171k
        int change = basicblock_remove_redundant_nops(b);
1115
171k
        RETURN_IF_ERROR(change);
1116
171k
        changes += change;
1117
171k
    }
1118
19.9k
    return changes;
1119
19.9k
}
1120
1121
static int
1122
remove_redundant_nops_and_pairs(basicblock *entryblock)
1123
5.78k
{
1124
5.78k
    bool done = false;
1125
1126
11.5k
    while (! done) {
1127
5.78k
        done = true;
1128
5.78k
        cfg_instr *prev_instr = NULL;
1129
5.78k
        cfg_instr *instr = NULL;
1130
43.7k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1131
37.9k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1132
37.9k
            if (IS_LABEL(b->b_label)) {
1133
                /* this block is a jump target, forget instr */
1134
16.2k
                instr = NULL;
1135
16.2k
            }
1136
320k
            for (int i = 0; i < b->b_iused; i++) {
1137
282k
                prev_instr = instr;
1138
282k
                instr = &b->b_instr[i];
1139
282k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1140
282k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1141
282k
                int opcode = instr->i_opcode;
1142
282k
                bool is_redundant_pair = false;
1143
282k
                if (opcode == POP_TOP) {
1144
6.54k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1145
0
                       is_redundant_pair = true;
1146
0
                   }
1147
6.54k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
6.54k
                }
1151
282k
                if (is_redundant_pair) {
1152
0
                    INSTR_SET_OP0(prev_instr, NOP);
1153
0
                    INSTR_SET_OP0(instr, NOP);
1154
0
                    done = false;
1155
0
                }
1156
282k
            }
1157
37.9k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1158
27.2k
                instr = NULL;
1159
27.2k
            }
1160
37.9k
        }
1161
5.78k
    }
1162
5.78k
    return SUCCESS;
1163
5.78k
}
1164
1165
static int
1166
14.1k
remove_redundant_jumps(cfg_builder *g) {
1167
    /* If a non-empty block ends with a jump instruction, check if the next
1168
     * non-empty block reached through normal flow control is the target
1169
     * of that jump. If it is, then the jump instruction is redundant and
1170
     * can be deleted.
1171
     *
1172
     * Return the number of changes applied, or -1 on error.
1173
     */
1174
1175
14.1k
    int changes = 0;
1176
147k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1177
133k
        cfg_instr *last = basicblock_last_instr(b);
1178
133k
        if (last == NULL) {
1179
18.9k
            continue;
1180
18.9k
        }
1181
114k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1182
114k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1183
16.0k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1184
16.0k
            if (jump_target == NULL) {
1185
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1186
0
                return ERROR;
1187
0
            }
1188
16.0k
            basicblock *next = next_nonempty_block(b->b_next);
1189
16.0k
            if (jump_target == next) {
1190
550
                changes++;
1191
550
                INSTR_SET_OP0(last, NOP);
1192
550
            }
1193
16.0k
        }
1194
114k
    }
1195
1196
14.1k
    return changes;
1197
14.1k
}
1198
1199
static inline bool
1200
44.7k
basicblock_has_no_lineno(basicblock *b) {
1201
53.6k
    for (int i = 0; i < b->b_iused; i++) {
1202
49.2k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1203
40.3k
            return false;
1204
40.3k
        }
1205
49.2k
    }
1206
4.39k
    return true;
1207
44.7k
}
1208
1209
/* Maximum size of basic block that should be copied in optimizer */
1210
1.88k
#define MAX_COPY_SIZE 4
1211
1212
/* If this block ends with an unconditional jump to a small exit block or
1213
 * a block that has no line numbers (and no fallthrough), then
1214
 * remove the jump and extend this block with the target.
1215
 * Returns 1 if extended, 0 if no change, and -1 on error.
1216
 */
1217
static int
1218
57.8k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1219
57.8k
    cfg_instr *last = basicblock_last_instr(bb);
1220
57.8k
    if (last == NULL) {
1221
0
        return 0;
1222
0
    }
1223
57.8k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1224
47.7k
        return 0;
1225
47.7k
    }
1226
10.1k
    basicblock *target = last->i_target;
1227
10.1k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1228
10.1k
                             target->b_iused <= MAX_COPY_SIZE);
1229
10.1k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1230
10.1k
                                     !BB_HAS_FALLTHROUGH(target));
1231
10.1k
    if (small_exit_block || no_lineno_no_fallthrough) {
1232
2.12k
        assert(is_jump(last));
1233
2.12k
        int removed_jump_opcode = last->i_opcode;
1234
2.12k
        INSTR_SET_OP0(last, NOP);
1235
2.12k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1236
2.12k
        if (no_lineno_no_fallthrough) {
1237
1.81k
            last = basicblock_last_instr(bb);
1238
1.81k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1239
1.81k
                removed_jump_opcode == JUMP)
1240
45
            {
1241
                /* Make sure we don't lose eval breaker checks */
1242
45
                last->i_opcode = JUMP;
1243
45
            }
1244
1.81k
        }
1245
2.12k
        target->b_predecessors--;
1246
2.12k
        return 1;
1247
2.12k
    }
1248
8.00k
    return 0;
1249
10.1k
}
1250
1251
static int
1252
5.78k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1253
5.78k
    bool changes;
1254
6.72k
    do {
1255
6.72k
        changes = false;
1256
64.5k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1257
57.8k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1258
57.8k
            RETURN_IF_ERROR(res);
1259
57.8k
            if (res) {
1260
2.12k
                changes = true;
1261
2.12k
            }
1262
57.8k
        }
1263
6.72k
    } while(changes); /* every change removes a jump, ensuring convergence */
1264
5.78k
    return changes;
1265
5.78k
}
1266
1267
// Attempt to eliminate jumps to jumps by updating inst to jump to
1268
// target->i_target using the provided opcode. Return whether or not the
1269
// optimization was successful.
1270
static bool
1271
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1272
565
{
1273
565
    assert(is_jump(inst));
1274
565
    assert(is_jump(target));
1275
565
    assert(inst == basicblock_last_instr(bb));
1276
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1277
    // changes (and we fall into an infinite loop):
1278
565
    if (inst->i_target != target->i_target) {
1279
        /* Change inst to NOP and append a jump to target->i_target. The
1280
         * NOP will be removed later if it's not needed for the lineno.
1281
         */
1282
565
        INSTR_SET_OP0(inst, NOP);
1283
1284
565
        RETURN_IF_ERROR(
1285
565
            basicblock_add_jump(
1286
565
                bb, opcode, target->i_target, target->i_loc));
1287
1288
565
        return true;
1289
565
    }
1290
0
    return false;
1291
565
}
1292
1293
static int
1294
loads_const(int opcode)
1295
15.2k
{
1296
15.2k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1297
15.2k
}
1298
1299
/* Returns new reference */
1300
static PyObject*
1301
get_const_value(int opcode, int oparg, PyObject *co_consts)
1302
63.4k
{
1303
63.4k
    PyObject *constant = NULL;
1304
63.4k
    assert(loads_const(opcode));
1305
63.4k
    if (opcode == LOAD_CONST) {
1306
62.7k
        constant = PyList_GET_ITEM(co_consts, oparg);
1307
62.7k
    }
1308
63.4k
    if (opcode == LOAD_SMALL_INT) {
1309
676
        return PyLong_FromLong(oparg);
1310
676
    }
1311
1312
62.7k
    if (constant == NULL) {
1313
0
        PyErr_SetString(PyExc_SystemError,
1314
0
                        "Internal error: failed to get value of a constant");
1315
0
        return NULL;
1316
0
    }
1317
62.7k
    return Py_NewRef(constant);
1318
62.7k
}
1319
1320
// Steals a reference to newconst.
1321
static int
1322
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1323
2.02k
{
1324
2.02k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1325
0
        Py_DECREF(newconst);
1326
0
        return -1;
1327
0
    }
1328
1329
2.02k
    Py_ssize_t index;
1330
82.5k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1331
81.1k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1332
581
            break;
1333
581
        }
1334
81.1k
    }
1335
2.02k
    if (index == PyList_GET_SIZE(consts)) {
1336
1.43k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1337
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1338
0
            Py_DECREF(newconst);
1339
0
            return -1;
1340
0
        }
1341
1.43k
        if (PyList_Append(consts, newconst)) {
1342
0
            Py_DECREF(newconst);
1343
0
            return -1;
1344
0
        }
1345
1.43k
    }
1346
2.02k
    Py_DECREF(newconst);
1347
2.02k
    return (int)index;
1348
2.02k
}
1349
1350
/*
1351
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1352
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1353
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1354
1355
  Return boolean indicating whether "size" such instructions were found.
1356
*/
1357
static bool
1358
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1359
10.0k
{
1360
10.0k
    assert(start < bb->b_iused);
1361
10.0k
    assert(size >= 0);
1362
10.0k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1363
1364
17.1k
    for (; start >= 0 && size > 0; start--) {
1365
15.2k
        cfg_instr *instr = &bb->b_instr[start];
1366
15.2k
        if (instr->i_opcode == NOP) {
1367
233
            continue;
1368
233
        }
1369
14.9k
        if (!loads_const(instr->i_opcode)) {
1370
8.08k
            return false;
1371
8.08k
        }
1372
6.88k
        instrs[--size] = instr;
1373
6.88k
    }
1374
1375
1.96k
    return size == 0;
1376
10.0k
}
1377
1378
/*
1379
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1380
  Caller must make sure "instrs" has at least "size" elements.
1381
*/
1382
static void
1383
nop_out(cfg_instr **instrs, int size)
1384
2.10k
{
1385
6.11k
    for (int i = 0; i < size; i++) {
1386
4.00k
        cfg_instr *instr = instrs[i];
1387
4.00k
        assert(instr->i_opcode != NOP);
1388
4.00k
        INSTR_SET_OP0(instr, NOP);
1389
4.00k
        INSTR_SET_LOC(instr, NO_LOCATION);
1390
4.00k
    }
1391
2.10k
}
1392
1393
/* Does not steal reference to "newconst".
1394
   Return 1 if changed instruction to LOAD_SMALL_INT.
1395
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1396
   Return -1 on error.
1397
*/
1398
static int
1399
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1400
                               PyObject *consts, PyObject *const_cache)
1401
59.9k
{
1402
59.9k
    if (PyLong_CheckExact(newconst)) {
1403
22.6k
        int overflow;
1404
22.6k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1405
22.6k
        if (val == -1 && PyErr_Occurred()) {
1406
0
            return -1;
1407
0
        }
1408
22.6k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1409
17.4k
            assert(_Py_IsImmortal(newconst));
1410
17.4k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1411
17.4k
            return 1;
1412
17.4k
        }
1413
22.6k
    }
1414
42.5k
    return 0;
1415
59.9k
}
1416
1417
1418
/* Steals reference to "newconst" */
1419
static int
1420
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1421
                      PyObject *consts, PyObject *const_cache)
1422
1.80k
{
1423
1.80k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1424
1.80k
    if (res < 0) {
1425
0
        Py_DECREF(newconst);
1426
0
        return ERROR;
1427
0
    }
1428
1.80k
    if (res > 0) {
1429
9
        return SUCCESS;
1430
9
    }
1431
1.79k
    int oparg = add_const(newconst, consts, const_cache);
1432
1.79k
    RETURN_IF_ERROR(oparg);
1433
1.79k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1434
1.79k
    return SUCCESS;
1435
1.79k
}
1436
1437
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1438
   with    LOAD_CONST (c1, c2, ... cn).
1439
   The consts table must still be in list form so that the
1440
   new constant (c1, c2, ... cn) can be appended.
1441
   Called with codestr pointing to the first LOAD_CONST.
1442
*/
1443
static int
1444
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1445
3.26k
{
1446
    /* Pre-conditions */
1447
3.26k
    assert(PyDict_CheckExact(const_cache));
1448
3.26k
    assert(PyList_CheckExact(consts));
1449
1450
3.26k
    cfg_instr *instr = &bb->b_instr[i];
1451
3.26k
    assert(instr->i_opcode == BUILD_TUPLE);
1452
1453
3.26k
    int seq_size = instr->i_oparg;
1454
3.26k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1455
0
        return SUCCESS;
1456
0
    }
1457
1458
3.26k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1459
3.26k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1460
        /* not a const sequence */
1461
1.96k
        return SUCCESS;
1462
1.96k
    }
1463
1464
1.29k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1465
1.29k
    if (const_tuple == NULL) {
1466
0
        return ERROR;
1467
0
    }
1468
1469
3.48k
    for (int i = 0; i < seq_size; i++) {
1470
2.18k
        cfg_instr *inst = const_instrs[i];
1471
2.18k
        assert(loads_const(inst->i_opcode));
1472
2.18k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1473
2.18k
        if (element == NULL) {
1474
0
            Py_DECREF(const_tuple);
1475
0
            return ERROR;
1476
0
        }
1477
2.18k
        PyTuple_SET_ITEM(const_tuple, i, element);
1478
2.18k
    }
1479
1480
1.29k
    nop_out(const_instrs, seq_size);
1481
1.29k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1482
1.29k
}
1483
1484
/* Replace:
1485
    BUILD_LIST 0
1486
    LOAD_CONST c1
1487
    LIST_APPEND 1
1488
    LOAD_CONST c2
1489
    LIST_APPEND 1
1490
    ...
1491
    LOAD_CONST cN
1492
    LIST_APPEND 1
1493
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1494
   with:
1495
    LOAD_CONST (c1, c2, ... cN)
1496
*/
1497
static int
1498
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1499
                                      PyObject *consts, PyObject *const_cache)
1500
84
{
1501
84
    assert(PyDict_CheckExact(const_cache));
1502
84
    assert(PyList_CheckExact(consts));
1503
84
    assert(i >= 0);
1504
84
    assert(i < bb->b_iused);
1505
1506
84
    cfg_instr *intrinsic = &bb->b_instr[i];
1507
84
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1508
84
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1509
1510
84
    int consts_found = 0;
1511
84
    bool expect_append = true;
1512
1513
267
    for (int pos = i - 1; pos >= 0; pos--) {
1514
267
        cfg_instr *instr = &bb->b_instr[pos];
1515
267
        int opcode = instr->i_opcode;
1516
267
        int oparg = instr->i_oparg;
1517
1518
267
        if (opcode == NOP) {
1519
0
            continue;
1520
0
        }
1521
1522
267
        if (opcode == BUILD_LIST && oparg == 0) {
1523
2
            if (!expect_append) {
1524
                /* Not a sequence start. */
1525
0
                return SUCCESS;
1526
0
            }
1527
1528
            /* Sequence start, we are done. */
1529
2
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1530
2
            if (newconst == NULL) {
1531
0
                return ERROR;
1532
0
            }
1533
1534
184
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1535
182
                instr = &bb->b_instr[newpos];
1536
182
                if (instr->i_opcode == NOP) {
1537
0
                    continue;
1538
0
                }
1539
182
                if (loads_const(instr->i_opcode)) {
1540
90
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1541
90
                    if (constant == NULL) {
1542
0
                        Py_DECREF(newconst);
1543
0
                        return ERROR;
1544
0
                    }
1545
90
                    assert(consts_found > 0);
1546
90
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1547
90
                }
1548
182
                nop_out(&instr, 1);
1549
182
            }
1550
2
            assert(consts_found == 0);
1551
2
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1552
2
        }
1553
1554
265
        if (expect_append) {
1555
173
            if (opcode != LIST_APPEND || oparg != 1) {
1556
81
                return SUCCESS;
1557
81
            }
1558
173
        }
1559
92
        else {
1560
92
            if (!loads_const(opcode)) {
1561
1
                return SUCCESS;
1562
1
            }
1563
91
            consts_found++;
1564
91
        }
1565
1566
183
        expect_append = !expect_append;
1567
183
    }
1568
1569
    /* Did not find sequence start. */
1570
0
    return SUCCESS;
1571
84
}
1572
1573
2.14k
#define MIN_CONST_SEQUENCE_SIZE 3
1574
/*
1575
Optimize lists and sets for:
1576
    1. "for" loop, comprehension or "in"/"not in" tests:
1577
           Change literal list or set of constants into constant
1578
           tuple or frozenset respectively. Change list of
1579
           non-constants into tuple.
1580
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1581
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1582
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1583
           or BUILD_SET & SET_UPDATE respectively.
1584
*/
1585
static int
1586
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1587
                        PyObject *consts, PyObject *const_cache)
1588
1.07k
{
1589
1.07k
    assert(PyDict_CheckExact(const_cache));
1590
1.07k
    assert(PyList_CheckExact(consts));
1591
1592
1.07k
    cfg_instr *instr = &bb->b_instr[i];
1593
1.07k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1594
1595
1.07k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1596
1.07k
    int seq_size = instr->i_oparg;
1597
1.07k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1598
1.07k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1599
896
    {
1600
896
        return SUCCESS;
1601
896
    }
1602
1603
178
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1604
178
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1605
56
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1606
            /* iterate over a tuple instead of list */
1607
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1608
0
        }
1609
56
        return SUCCESS;
1610
56
    }
1611
1612
122
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1613
122
    if (const_result == NULL) {
1614
0
        return ERROR;
1615
0
    }
1616
1617
1.21k
    for (int i = 0; i < seq_size; i++) {
1618
1.09k
        cfg_instr *inst = const_instrs[i];
1619
1.09k
        assert(loads_const(inst->i_opcode));
1620
1.09k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1621
1.09k
        if (element == NULL) {
1622
0
            Py_DECREF(const_result);
1623
0
            return ERROR;
1624
0
        }
1625
1.09k
        PyTuple_SET_ITEM(const_result, i, element);
1626
1.09k
    }
1627
1628
122
    if (instr->i_opcode == BUILD_SET) {
1629
28
        PyObject *frozenset = PyFrozenSet_New(const_result);
1630
28
        if (frozenset == NULL) {
1631
0
            Py_DECREF(const_result);
1632
0
            return ERROR;
1633
0
        }
1634
28
        Py_SETREF(const_result, frozenset);
1635
28
    }
1636
1637
122
    int index = add_const(const_result, consts, const_cache);
1638
122
    RETURN_IF_ERROR(index);
1639
122
    nop_out(const_instrs, seq_size);
1640
1641
122
    if (contains_or_iter) {
1642
27
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1643
27
    }
1644
95
    else {
1645
95
        assert(i >= 2);
1646
95
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1647
1648
95
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1649
1650
95
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1651
95
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1652
95
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1653
95
    }
1654
122
    return SUCCESS;
1655
122
}
1656
1657
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1658
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1659
 * Used to avoid creating constants which are slow to hash.
1660
 */
1661
static Py_ssize_t
1662
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1663
0
{
1664
0
    if (PyTuple_Check(obj)) {
1665
0
        Py_ssize_t i;
1666
0
        limit -= PyTuple_GET_SIZE(obj);
1667
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1668
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1669
0
            if (limit < 0) {
1670
0
                return limit;
1671
0
            }
1672
0
        }
1673
0
    }
1674
0
    return limit;
1675
0
}
1676
1677
23
#define MAX_INT_SIZE           128  /* bits */
1678
0
#define MAX_COLLECTION_SIZE    256  /* items */
1679
10
#define MAX_STR_SIZE          4096  /* characters */
1680
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1681
1682
static PyObject *
1683
const_folding_safe_multiply(PyObject *v, PyObject *w)
1684
23
{
1685
23
    if (PyLong_Check(v) && PyLong_Check(w) &&
1686
23
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1687
23
    ) {
1688
3
        int64_t vbits = _PyLong_NumBits(v);
1689
3
        int64_t wbits = _PyLong_NumBits(w);
1690
3
        assert(vbits >= 0);
1691
3
        assert(wbits >= 0);
1692
3
        if (vbits + wbits > MAX_INT_SIZE) {
1693
0
            return NULL;
1694
0
        }
1695
3
    }
1696
20
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1697
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1698
0
        if (size) {
1699
0
            long n = PyLong_AsLong(v);
1700
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1701
0
                return NULL;
1702
0
            }
1703
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1704
0
                return NULL;
1705
0
            }
1706
0
        }
1707
0
    }
1708
20
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1709
10
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1710
10
                                               PyBytes_GET_SIZE(w);
1711
10
        if (size) {
1712
10
            long n = PyLong_AsLong(v);
1713
10
            if (n < 0 || n > MAX_STR_SIZE / size) {
1714
3
                return NULL;
1715
3
            }
1716
10
        }
1717
10
    }
1718
10
    else if (PyLong_Check(w) &&
1719
10
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1720
10
    {
1721
10
        return const_folding_safe_multiply(w, v);
1722
10
    }
1723
1724
10
    return PyNumber_Multiply(v, w);
1725
23
}
1726
1727
static PyObject *
1728
const_folding_safe_power(PyObject *v, PyObject *w)
1729
8
{
1730
8
    if (PyLong_Check(v) && PyLong_Check(w) &&
1731
8
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1732
8
    ) {
1733
8
        int64_t vbits = _PyLong_NumBits(v);
1734
8
        size_t wbits = PyLong_AsSize_t(w);
1735
8
        assert(vbits >= 0);
1736
8
        if (wbits == (size_t)-1) {
1737
0
            return NULL;
1738
0
        }
1739
8
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1740
0
            return NULL;
1741
0
        }
1742
8
    }
1743
1744
8
    return PyNumber_Power(v, w, Py_None);
1745
8
}
1746
1747
static PyObject *
1748
const_folding_safe_lshift(PyObject *v, PyObject *w)
1749
4
{
1750
4
    if (PyLong_Check(v) && PyLong_Check(w) &&
1751
4
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1752
4
    ) {
1753
4
        int64_t vbits = _PyLong_NumBits(v);
1754
4
        size_t wbits = PyLong_AsSize_t(w);
1755
4
        assert(vbits >= 0);
1756
4
        if (wbits == (size_t)-1) {
1757
0
            return NULL;
1758
0
        }
1759
4
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1760
0
            return NULL;
1761
0
        }
1762
4
    }
1763
1764
4
    return PyNumber_Lshift(v, w);
1765
4
}
1766
1767
static PyObject *
1768
const_folding_safe_mod(PyObject *v, PyObject *w)
1769
0
{
1770
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1771
0
        return NULL;
1772
0
    }
1773
1774
0
    return PyNumber_Remainder(v, w);
1775
0
}
1776
1777
static PyObject *
1778
eval_const_binop(PyObject *left, int op, PyObject *right)
1779
39
{
1780
39
    assert(left != NULL && right != NULL);
1781
39
    assert(op >= 0 && op <= NB_OPARG_LAST);
1782
1783
39
    PyObject *result = NULL;
1784
39
    switch (op) {
1785
4
        case NB_ADD:
1786
4
            result = PyNumber_Add(left, right);
1787
4
            break;
1788
0
        case NB_SUBTRACT:
1789
0
            result = PyNumber_Subtract(left, right);
1790
0
            break;
1791
13
        case NB_MULTIPLY:
1792
13
            result = const_folding_safe_multiply(left, right);
1793
13
            break;
1794
1
        case NB_TRUE_DIVIDE:
1795
1
            result = PyNumber_TrueDivide(left, right);
1796
1
            break;
1797
0
        case NB_FLOOR_DIVIDE:
1798
0
            result = PyNumber_FloorDivide(left, right);
1799
0
            break;
1800
0
        case NB_REMAINDER:
1801
0
            result = const_folding_safe_mod(left, right);
1802
0
            break;
1803
8
        case NB_POWER:
1804
8
            result = const_folding_safe_power(left, right);
1805
8
            break;
1806
4
        case NB_LSHIFT:
1807
4
            result = const_folding_safe_lshift(left, right);
1808
4
            break;
1809
0
        case NB_RSHIFT:
1810
0
            result = PyNumber_Rshift(left, right);
1811
0
            break;
1812
0
        case NB_OR:
1813
0
            result = PyNumber_Or(left, right);
1814
0
            break;
1815
0
        case NB_XOR:
1816
0
            result = PyNumber_Xor(left, right);
1817
0
            break;
1818
0
        case NB_AND:
1819
0
            result = PyNumber_And(left, right);
1820
0
            break;
1821
9
        case NB_SUBSCR:
1822
9
            result = PyObject_GetItem(left, right);
1823
9
            break;
1824
0
        case NB_MATRIX_MULTIPLY:
1825
            // No builtin constants implement matrix multiplication
1826
0
            break;
1827
0
        default:
1828
0
            Py_UNREACHABLE();
1829
39
    }
1830
39
    return result;
1831
39
}
1832
1833
static int
1834
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1835
5.98k
{
1836
6.02k
    #define BINOP_OPERAND_COUNT 2
1837
5.98k
    assert(PyDict_CheckExact(const_cache));
1838
5.98k
    assert(PyList_CheckExact(consts));
1839
1840
5.98k
    cfg_instr *binop = &bb->b_instr[i];
1841
5.98k
    assert(binop->i_opcode == BINARY_OP);
1842
1843
5.98k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1844
5.98k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1845
        /* not a const sequence */
1846
5.94k
        return SUCCESS;
1847
5.94k
    }
1848
1849
39
    cfg_instr *lhs_instr = operands_instrs[0];
1850
39
    assert(loads_const(lhs_instr->i_opcode));
1851
39
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1852
39
    if (lhs == NULL) {
1853
0
        return ERROR;
1854
0
    }
1855
1856
39
    cfg_instr *rhs_instr = operands_instrs[1];
1857
39
    assert(loads_const(rhs_instr->i_opcode));
1858
39
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1859
39
    if (rhs == NULL) {
1860
0
        Py_DECREF(lhs);
1861
0
        return ERROR;
1862
0
    }
1863
1864
39
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1865
39
    Py_DECREF(lhs);
1866
39
    Py_DECREF(rhs);
1867
39
    if (newconst == NULL) {
1868
3
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1869
0
            return ERROR;
1870
0
        }
1871
3
        PyErr_Clear();
1872
3
        return SUCCESS;
1873
3
    }
1874
1875
36
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1876
36
    return instr_make_load_const(binop, newconst, consts, const_cache);
1877
39
}
1878
1879
static PyObject *
1880
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1881
471
{
1882
471
    assert(operand != NULL);
1883
471
    assert(
1884
471
        opcode == UNARY_NEGATIVE ||
1885
471
        opcode == UNARY_INVERT ||
1886
471
        opcode == UNARY_NOT ||
1887
471
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1888
471
    );
1889
471
    PyObject *result;
1890
471
    switch (opcode) {
1891
471
        case UNARY_NEGATIVE:
1892
471
            result = PyNumber_Negative(operand);
1893
471
            break;
1894
0
        case UNARY_INVERT:
1895
            // XXX: This should be removed once the ~bool depreciation expires.
1896
0
            if (PyBool_Check(operand)) {
1897
0
                return NULL;
1898
0
            }
1899
0
            result = PyNumber_Invert(operand);
1900
0
            break;
1901
0
        case UNARY_NOT: {
1902
0
            int r = PyObject_IsTrue(operand);
1903
0
            if (r < 0) {
1904
0
                return NULL;
1905
0
            }
1906
0
            result = PyBool_FromLong(!r);
1907
0
            break;
1908
0
        }
1909
0
        case CALL_INTRINSIC_1:
1910
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1911
0
                Py_UNREACHABLE();
1912
0
            }
1913
0
            result = PyNumber_Positive(operand);
1914
0
            break;
1915
0
        default:
1916
0
            Py_UNREACHABLE();
1917
471
    }
1918
471
    return result;
1919
471
}
1920
1921
static int
1922
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1923
622
{
1924
1.09k
    #define UNARYOP_OPERAND_COUNT 1
1925
622
    assert(PyDict_CheckExact(const_cache));
1926
622
    assert(PyList_CheckExact(consts));
1927
622
    cfg_instr *unaryop = &bb->b_instr[i];
1928
1929
622
    cfg_instr *operand_instr;
1930
622
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1931
        /* not a const */
1932
151
        return SUCCESS;
1933
151
    }
1934
1935
471
    assert(loads_const(operand_instr->i_opcode));
1936
471
    PyObject *operand = get_const_value(
1937
471
        operand_instr->i_opcode,
1938
471
        operand_instr->i_oparg,
1939
471
        consts
1940
471
    );
1941
471
    if (operand == NULL) {
1942
0
        return ERROR;
1943
0
    }
1944
1945
471
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1946
471
    Py_DECREF(operand);
1947
471
    if (newconst == NULL) {
1948
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1949
0
            return ERROR;
1950
0
        }
1951
0
        PyErr_Clear();
1952
0
        return SUCCESS;
1953
0
    }
1954
1955
471
    if (unaryop->i_opcode == UNARY_NOT) {
1956
0
        assert(PyBool_Check(newconst));
1957
0
    }
1958
471
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1959
471
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1960
471
}
1961
1962
1.71k
#define VISITED (-1)
1963
1964
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1965
// same effect.
1966
static int
1967
swaptimize(basicblock *block, int *ix)
1968
1.41k
{
1969
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1970
    // for this function. Make sure to blow away cached *.pyc files first!
1971
1.41k
    assert(*ix < block->b_iused);
1972
1.41k
    cfg_instr *instructions = &block->b_instr[*ix];
1973
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1974
    // maximum depth of the stack manipulations:
1975
1.41k
    assert(instructions[0].i_opcode == SWAP);
1976
1.41k
    int depth = instructions[0].i_oparg;
1977
1.41k
    int len = 0;
1978
1.41k
    int more = false;
1979
1.41k
    int limit = block->b_iused - *ix;
1980
1.60k
    while (++len < limit) {
1981
1.60k
        int opcode = instructions[len].i_opcode;
1982
1.60k
        if (opcode == SWAP) {
1983
132
            depth = Py_MAX(depth, instructions[len].i_oparg);
1984
132
            more = true;
1985
132
        }
1986
1.47k
        else if (opcode != NOP) {
1987
1.41k
            break;
1988
1.41k
        }
1989
1.60k
    }
1990
    // It's already optimal if there's only one SWAP:
1991
1.41k
    if (!more) {
1992
1.28k
        return SUCCESS;
1993
1.28k
    }
1994
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1995
132
    int *stack = PyMem_Malloc(depth * sizeof(int));
1996
132
    if (stack == NULL) {
1997
0
        PyErr_NoMemory();
1998
0
        return ERROR;
1999
0
    }
2000
528
    for (int i = 0; i < depth; i++) {
2001
396
        stack[i] = i;
2002
396
    }
2003
    // Simulate the combined effect of these instructions by "running" them on
2004
    // our "stack":
2005
396
    for (int i = 0; i < len; i++) {
2006
264
        if (instructions[i].i_opcode == SWAP) {
2007
264
            int oparg = instructions[i].i_oparg;
2008
264
            int top = stack[0];
2009
            // SWAPs are 1-indexed:
2010
264
            stack[0] = stack[oparg - 1];
2011
264
            stack[oparg - 1] = top;
2012
264
        }
2013
264
    }
2014
    // Now we can begin! Our approach here is based on a solution to a closely
2015
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2016
    // think of this algorithm as determining the steps needed to efficiently
2017
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2018
    // though, we can efficiently *shuffle* it! For this reason, we will be
2019
    // replacing instructions starting from the *end* of the run. Since the
2020
    // solution is optimal, we don't need to worry about running out of space:
2021
132
    int current = len - 1;
2022
528
    for (int i = 0; i < depth; i++) {
2023
        // Skip items that have already been visited, or just happen to be in
2024
        // the correct location:
2025
396
        if (stack[i] == VISITED || stack[i] == i) {
2026
264
            continue;
2027
264
        }
2028
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2029
        // with other items; traversing the cycle and swapping each item with
2030
        // the next will put them all in the correct place. The weird
2031
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2032
        // can only swap from that position:
2033
132
        int j = i;
2034
528
        while (true) {
2035
            // Skip the actual swap if our item is zero, since swapping the top
2036
            // item with itself is pointless:
2037
528
            if (j) {
2038
264
                assert(0 <= current);
2039
                // SWAPs are 1-indexed:
2040
264
                instructions[current].i_opcode = SWAP;
2041
264
                instructions[current--].i_oparg = j + 1;
2042
264
            }
2043
528
            if (stack[j] == VISITED) {
2044
                // Completed the cycle:
2045
132
                assert(j == i);
2046
132
                break;
2047
132
            }
2048
396
            int next_j = stack[j];
2049
396
            stack[j] = VISITED;
2050
396
            j = next_j;
2051
396
        }
2052
132
    }
2053
    // NOP out any unused instructions:
2054
132
    while (0 <= current) {
2055
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2056
0
    }
2057
132
    PyMem_Free(stack);
2058
132
    *ix += len - 1;
2059
132
    return SUCCESS;
2060
132
}
2061
2062
2063
// This list is pretty small, since it's only okay to reorder opcodes that:
2064
// - can't affect control flow (like jumping or raising exceptions)
2065
// - can't invoke arbitrary code (besides finalizers)
2066
// - only touch the TOS (and pop it when finished)
2067
#define SWAPPABLE(opcode) \
2068
2.36k
    ((opcode) == STORE_FAST || \
2069
2.36k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2070
2.36k
     (opcode) == POP_TOP)
2071
2072
#define STORES_TO(instr) \
2073
344
    (((instr).i_opcode == STORE_FAST || \
2074
344
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2075
344
     ? (instr).i_oparg : -1)
2076
2077
static int
2078
next_swappable_instruction(basicblock *block, int i, int lineno)
2079
2.32k
{
2080
2.38k
    while (++i < block->b_iused) {
2081
2.36k
        cfg_instr *instruction = &block->b_instr[i];
2082
2.36k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2083
            // Optimizing across this instruction could cause user-visible
2084
            // changes in the names bound between line tracing events!
2085
9
            return -1;
2086
9
        }
2087
2.36k
        if (instruction->i_opcode == NOP) {
2088
59
            continue;
2089
59
        }
2090
2.30k
        if (SWAPPABLE(instruction->i_opcode)) {
2091
1.07k
            return i;
2092
1.07k
        }
2093
1.22k
        return -1;
2094
2.30k
    }
2095
19
    return -1;
2096
2.32k
}
2097
2098
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2099
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2100
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2101
static void
2102
apply_static_swaps(basicblock *block, int i)
2103
1.41k
{
2104
    // SWAPs are to our left, and potential swaperands are to our right:
2105
1.63k
    for (; 0 <= i; i--) {
2106
1.54k
        assert(i < block->b_iused);
2107
1.54k
        cfg_instr *swap = &block->b_instr[i];
2108
1.54k
        if (swap->i_opcode != SWAP) {
2109
134
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2110
                // Nope, but we know how to handle these. Keep looking:
2111
67
                continue;
2112
67
            }
2113
            // We can't reason about what this instruction does. Bail:
2114
67
            return;
2115
134
        }
2116
1.41k
        int j = next_swappable_instruction(block, i, -1);
2117
1.41k
        if (j < 0) {
2118
691
            return;
2119
691
        }
2120
722
        int k = j;
2121
722
        int lineno = block->b_instr[j].i_loc.lineno;
2122
1.07k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2123
916
            k = next_swappable_instruction(block, k, lineno);
2124
916
            if (k < 0) {
2125
564
                return;
2126
564
            }
2127
916
        }
2128
        // The reordering is not safe if the two instructions to be swapped
2129
        // store to the same location, or if any intervening instruction stores
2130
        // to the same location as either of them.
2131
158
        int store_j = STORES_TO(block->b_instr[j]);
2132
158
        int store_k = STORES_TO(block->b_instr[k]);
2133
158
        if (store_j >= 0 || store_k >= 0) {
2134
158
            if (store_j == store_k) {
2135
0
                return;
2136
0
            }
2137
186
            for (int idx = j + 1; idx < k; idx++) {
2138
28
                int store_idx = STORES_TO(block->b_instr[idx]);
2139
28
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2140
0
                    return;
2141
0
                }
2142
28
            }
2143
158
        }
2144
2145
        // Success!
2146
158
        INSTR_SET_OP0(swap, NOP);
2147
158
        cfg_instr temp = block->b_instr[j];
2148
158
        block->b_instr[j] = block->b_instr[k];
2149
158
        block->b_instr[k] = temp;
2150
158
    }
2151
1.41k
}
2152
2153
static int
2154
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2155
37.9k
{
2156
37.9k
    assert(PyDict_CheckExact(const_cache));
2157
37.9k
    assert(PyList_CheckExact(consts));
2158
37.9k
    int opcode = 0;
2159
37.9k
    int oparg = 0;
2160
343k
    for (int i = 0; i < bb->b_iused; i++) {
2161
305k
        cfg_instr *inst = &bb->b_instr[i];
2162
305k
        if (inst->i_opcode == LOAD_CONST) {
2163
58.1k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2164
58.1k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2165
58.1k
            Py_DECREF(constant);
2166
58.1k
            if (res < 0) {
2167
0
                return ERROR;
2168
0
            }
2169
58.1k
        }
2170
305k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2171
305k
                                      inst->i_opcode == COPY &&
2172
305k
                                      inst->i_oparg == 1);
2173
305k
        if (! is_copy_of_load_const) {
2174
305k
            opcode = inst->i_opcode;
2175
305k
            oparg = inst->i_oparg;
2176
305k
        }
2177
305k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2178
305k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2179
247k
            continue;
2180
247k
        }
2181
58.1k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2182
58.1k
        switch(nextop) {
2183
97
            case POP_JUMP_IF_FALSE:
2184
100
            case POP_JUMP_IF_TRUE:
2185
100
            case JUMP_IF_FALSE:
2186
100
            case JUMP_IF_TRUE:
2187
100
            {
2188
                /* Remove LOAD_CONST const; conditional jump */
2189
100
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2190
100
                if (cnt == NULL) {
2191
0
                    return ERROR;
2192
0
                }
2193
100
                int is_true = PyObject_IsTrue(cnt);
2194
100
                Py_DECREF(cnt);
2195
100
                if (is_true == -1) {
2196
0
                    return ERROR;
2197
0
                }
2198
100
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2199
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2200
100
                    INSTR_SET_OP0(inst, NOP);
2201
100
                }
2202
100
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2203
100
                if (is_true == jump_if_true) {
2204
0
                    bb->b_instr[i+1].i_opcode = JUMP;
2205
0
                }
2206
100
                else {
2207
100
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2208
100
                }
2209
100
                break;
2210
100
            }
2211
1.17k
            case IS_OP:
2212
1.17k
            {
2213
                // Fold to POP_JUMP_IF_NONE:
2214
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2215
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2216
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2217
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2218
                // Fold to POP_JUMP_IF_NOT_NONE:
2219
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2220
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2223
1.17k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2224
1.17k
                if (cnt == NULL) {
2225
0
                    return ERROR;
2226
0
                }
2227
1.17k
                if (!Py_IsNone(cnt)) {
2228
14
                    Py_DECREF(cnt);
2229
14
                    break;
2230
14
                }
2231
1.15k
                if (bb->b_iused <= i + 2) {
2232
6
                    break;
2233
6
                }
2234
1.15k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2235
1.15k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2236
                // Get rid of TO_BOOL regardless:
2237
1.15k
                if (jump_instr->i_opcode == TO_BOOL) {
2238
1.13k
                    INSTR_SET_OP0(jump_instr, NOP);
2239
1.13k
                    if (bb->b_iused <= i + 3) {
2240
0
                        break;
2241
0
                    }
2242
1.13k
                    jump_instr = &bb->b_instr[i + 3];
2243
1.13k
                }
2244
1.15k
                bool invert = is_instr->i_oparg;
2245
1.15k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2246
1.09k
                    invert = !invert;
2247
1.09k
                }
2248
54
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2249
17
                    break;
2250
17
                }
2251
1.13k
                INSTR_SET_OP0(inst, NOP);
2252
1.13k
                INSTR_SET_OP0(is_instr, NOP);
2253
1.13k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2254
1.13k
                                              : POP_JUMP_IF_NONE;
2255
1.13k
                break;
2256
1.15k
            }
2257
100
            case TO_BOOL:
2258
100
            {
2259
100
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2260
100
                if (cnt == NULL) {
2261
0
                    return ERROR;
2262
0
                }
2263
100
                int is_true = PyObject_IsTrue(cnt);
2264
100
                Py_DECREF(cnt);
2265
100
                if (is_true == -1) {
2266
0
                    return ERROR;
2267
0
                }
2268
100
                cnt = PyBool_FromLong(is_true);
2269
100
                int index = add_const(cnt, consts, const_cache);
2270
100
                if (index < 0) {
2271
0
                    return ERROR;
2272
0
                }
2273
100
                INSTR_SET_OP0(inst, NOP);
2274
100
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2275
100
                break;
2276
100
            }
2277
58.1k
        }
2278
58.1k
    }
2279
37.9k
    return SUCCESS;
2280
37.9k
}
2281
2282
static int
2283
5.78k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2284
43.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2285
37.9k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2286
37.9k
    }
2287
5.78k
    return SUCCESS;
2288
5.78k
}
2289
2290
static int
2291
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2292
37.9k
{
2293
37.9k
    assert(PyDict_CheckExact(const_cache));
2294
37.9k
    assert(PyList_CheckExact(consts));
2295
37.9k
    cfg_instr nop;
2296
37.9k
    INSTR_SET_OP0(&nop, NOP);
2297
345k
    for (int i = 0; i < bb->b_iused; i++) {
2298
307k
        cfg_instr *inst = &bb->b_instr[i];
2299
307k
        cfg_instr *target;
2300
307k
        int opcode = inst->i_opcode;
2301
307k
        int oparg = inst->i_oparg;
2302
307k
        if (HAS_TARGET(opcode)) {
2303
18.4k
            assert(inst->i_target->b_iused > 0);
2304
18.4k
            target = &inst->i_target->b_instr[0];
2305
18.4k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2306
18.4k
        }
2307
288k
        else {
2308
288k
            target = &nop;
2309
288k
        }
2310
307k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2311
307k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2312
307k
        switch (opcode) {
2313
            /* Try to fold tuples of constants.
2314
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2315
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2316
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2317
3.34k
            case BUILD_TUPLE:
2318
3.34k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2319
81
                    switch(oparg) {
2320
0
                        case 1:
2321
0
                            INSTR_SET_OP0(inst, NOP);
2322
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2323
0
                            continue;
2324
66
                        case 2:
2325
76
                        case 3:
2326
76
                            INSTR_SET_OP0(inst, NOP);
2327
76
                            bb->b_instr[i+1].i_opcode = SWAP;
2328
76
                            continue;
2329
81
                    }
2330
81
                }
2331
3.26k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2332
3.26k
                break;
2333
1.01k
            case BUILD_LIST:
2334
1.07k
            case BUILD_SET:
2335
1.07k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2336
1.07k
                break;
2337
608
            case POP_JUMP_IF_NOT_NONE:
2338
1.19k
            case POP_JUMP_IF_NONE:
2339
1.19k
                switch (target->i_opcode) {
2340
58
                    case JUMP:
2341
58
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2342
1.19k
                }
2343
1.19k
                break;
2344
7.03k
            case POP_JUMP_IF_FALSE:
2345
7.03k
                switch (target->i_opcode) {
2346
471
                    case JUMP:
2347
471
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2348
7.03k
                }
2349
7.03k
                break;
2350
7.03k
            case POP_JUMP_IF_TRUE:
2351
1.39k
                switch (target->i_opcode) {
2352
36
                    case JUMP:
2353
36
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2354
1.39k
                }
2355
1.39k
                break;
2356
1.39k
            case JUMP_IF_FALSE:
2357
182
                switch (target->i_opcode) {
2358
0
                    case JUMP:
2359
0
                    case JUMP_IF_FALSE:
2360
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2361
0
                        continue;
2362
2
                    case JUMP_IF_TRUE:
2363
                        // No need to check for loops here, a block's b_next
2364
                        // cannot point to itself.
2365
2
                        assert(inst->i_target != inst->i_target->b_next);
2366
2
                        inst->i_target = inst->i_target->b_next;
2367
2
                        i--;
2368
2
                        continue;
2369
182
                }
2370
180
                break;
2371
221
            case JUMP_IF_TRUE:
2372
221
                switch (target->i_opcode) {
2373
0
                    case JUMP:
2374
0
                    case JUMP_IF_TRUE:
2375
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2376
0
                        continue;
2377
3
                    case JUMP_IF_FALSE:
2378
                        // No need to check for loops here, a block's b_next
2379
                        // cannot point to itself.
2380
3
                        assert(inst->i_target != inst->i_target->b_next);
2381
3
                        inst->i_target = inst->i_target->b_next;
2382
3
                        i--;
2383
3
                        continue;
2384
221
                }
2385
218
                break;
2386
2.85k
            case JUMP:
2387
4.76k
            case JUMP_NO_INTERRUPT:
2388
4.76k
                switch (target->i_opcode) {
2389
0
                    case JUMP:
2390
0
                        i -= jump_thread(bb, inst, target, JUMP);
2391
0
                        continue;
2392
0
                    case JUMP_NO_INTERRUPT:
2393
0
                        i -= jump_thread(bb, inst, target, opcode);
2394
0
                        continue;
2395
4.76k
                }
2396
4.76k
                break;
2397
4.76k
            case FOR_ITER:
2398
1.30k
                if (target->i_opcode == JUMP) {
2399
                    /* This will not work now because the jump (at target) could
2400
                     * be forward or backward and FOR_ITER only jumps forward. We
2401
                     * can re-enable this if ever we implement a backward version
2402
                     * of FOR_ITER.
2403
                     */
2404
                    /*
2405
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2406
                    */
2407
0
                }
2408
1.30k
                break;
2409
10.7k
            case STORE_FAST:
2410
10.7k
                if (opcode == nextop &&
2411
10.7k
                    oparg == bb->b_instr[i+1].i_oparg &&
2412
10.7k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2413
24
                    bb->b_instr[i].i_opcode = POP_TOP;
2414
24
                    bb->b_instr[i].i_oparg = 0;
2415
24
                }
2416
10.7k
                break;
2417
1.54k
            case SWAP:
2418
1.54k
                if (oparg == 1) {
2419
0
                    INSTR_SET_OP0(inst, NOP);
2420
0
                }
2421
1.54k
                break;
2422
13.3k
            case LOAD_GLOBAL:
2423
13.3k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2424
6.58k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2425
6.58k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2426
6.58k
                }
2427
13.3k
                break;
2428
4.78k
            case COMPARE_OP:
2429
4.78k
                if (nextop == TO_BOOL) {
2430
2.24k
                    INSTR_SET_OP0(inst, NOP);
2431
2.24k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2432
2.24k
                    continue;
2433
2.24k
                }
2434
2.54k
                break;
2435
2.54k
            case CONTAINS_OP:
2436
2.91k
            case IS_OP:
2437
2.91k
                if (nextop == TO_BOOL) {
2438
1.33k
                    INSTR_SET_OP0(inst, NOP);
2439
1.33k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2440
1.33k
                    continue;
2441
1.33k
                }
2442
1.57k
                if (nextop == UNARY_NOT) {
2443
0
                    INSTR_SET_OP0(inst, NOP);
2444
0
                    int inverted = oparg ^ 1;
2445
0
                    assert(inverted == 0 || inverted == 1);
2446
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2447
0
                    continue;
2448
0
                }
2449
1.57k
                break;
2450
3.65k
            case TO_BOOL:
2451
3.65k
                if (nextop == TO_BOOL) {
2452
0
                    INSTR_SET_OP0(inst, NOP);
2453
0
                    continue;
2454
0
                }
2455
3.65k
                break;
2456
3.65k
            case UNARY_NOT:
2457
68
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2460
0
                    continue;
2461
0
                }
2462
68
                if (nextop == UNARY_NOT) {
2463
0
                    INSTR_SET_OP0(inst, NOP);
2464
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2465
0
                    continue;
2466
0
                }
2467
68
                _Py_FALLTHROUGH;
2468
107
            case UNARY_INVERT:
2469
620
            case UNARY_NEGATIVE:
2470
620
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2471
620
                break;
2472
411
            case CALL_INTRINSIC_1:
2473
411
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2474
84
                    if (nextop == GET_ITER) {
2475
0
                        INSTR_SET_OP0(inst, NOP);
2476
0
                    }
2477
84
                    else {
2478
84
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2479
84
                    }
2480
84
                }
2481
327
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2482
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2483
2
                }
2484
411
                break;
2485
5.98k
            case BINARY_OP:
2486
5.98k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2487
5.98k
                break;
2488
307k
        }
2489
307k
    }
2490
2491
344k
    for (int i = 0; i < bb->b_iused; i++) {
2492
306k
        cfg_instr *inst = &bb->b_instr[i];
2493
306k
        if (inst->i_opcode == SWAP) {
2494
1.41k
            if (swaptimize(bb, &i) < 0) {
2495
0
                goto error;
2496
0
            }
2497
1.41k
            apply_static_swaps(bb, i);
2498
1.41k
        }
2499
306k
    }
2500
37.9k
    return SUCCESS;
2501
0
error:
2502
0
    return ERROR;
2503
37.9k
}
2504
2505
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2506
2507
static int
2508
remove_redundant_nops_and_jumps(cfg_builder *g)
2509
12.5k
{
2510
12.5k
    int removed_nops, removed_jumps;
2511
14.1k
    do {
2512
        /* Convergence is guaranteed because the number of
2513
         * redundant jumps and nops only decreases.
2514
         */
2515
14.1k
        removed_nops = remove_redundant_nops(g);
2516
14.1k
        RETURN_IF_ERROR(removed_nops);
2517
14.1k
        removed_jumps = remove_redundant_jumps(g);
2518
14.1k
        RETURN_IF_ERROR(removed_jumps);
2519
14.1k
    } while(removed_nops + removed_jumps > 0);
2520
12.5k
    return SUCCESS;
2521
12.5k
}
2522
2523
/* Perform optimizations on a control flow graph.
2524
   The consts object should still be in list form to allow new constants
2525
   to be appended.
2526
2527
   Code trasnformations that reduce code size initially fill the gaps with
2528
   NOPs.  Later those NOPs are removed.
2529
*/
2530
static int
2531
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2532
5.78k
{
2533
5.78k
    assert(PyDict_CheckExact(const_cache));
2534
5.78k
    RETURN_IF_ERROR(check_cfg(g));
2535
5.78k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2536
5.78k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2537
5.78k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2538
5.78k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2539
43.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2540
37.9k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2541
37.9k
    }
2542
5.78k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2543
5.78k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2544
5.78k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2545
5.78k
    assert(no_redundant_jumps(g));
2546
5.78k
    return SUCCESS;
2547
5.78k
}
2548
2549
static void
2550
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2551
10.0k
{
2552
10.0k
    int32_t line1 = inst1->i_loc.lineno;
2553
10.0k
    int32_t line2 = inst2->i_loc.lineno;
2554
    /* Skip if instructions are on different lines */
2555
10.0k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2556
3.60k
        return;
2557
3.60k
    }
2558
6.48k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2559
551
        return;
2560
551
    }
2561
5.93k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2562
5.93k
    INSTR_SET_OP0(inst2, NOP);
2563
5.93k
}
2564
2565
static int
2566
insert_superinstructions(cfg_builder *g)
2567
5.78k
{
2568
43.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2569
2570
319k
        for (int i = 0; i < b->b_iused; i++) {
2571
281k
            cfg_instr *inst = &b->b_instr[i];
2572
281k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2573
281k
            switch(inst->i_opcode) {
2574
34.6k
                case LOAD_FAST:
2575
34.6k
                    if (nextop == LOAD_FAST) {
2576
5.33k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2577
5.33k
                    }
2578
34.6k
                    break;
2579
9.79k
                case STORE_FAST:
2580
9.79k
                    switch (nextop) {
2581
3.67k
                        case LOAD_FAST:
2582
3.67k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2583
3.67k
                            break;
2584
1.08k
                        case STORE_FAST:
2585
1.08k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2586
1.08k
                            break;
2587
9.79k
                    }
2588
9.79k
                    break;
2589
281k
            }
2590
281k
        }
2591
37.9k
    }
2592
5.78k
    int res = remove_redundant_nops(g);
2593
5.78k
    assert(no_redundant_nops(g));
2594
5.78k
    return res;
2595
5.78k
}
2596
2597
#define NOT_LOCAL -1
2598
10.0k
#define DUMMY_INSTR -1
2599
2600
typedef struct {
2601
    // Index of instruction that produced the reference or DUMMY_INSTR.
2602
    int instr;
2603
2604
    // The local to which the reference refers or NOT_LOCAL.
2605
    int local;
2606
} ref;
2607
2608
typedef struct {
2609
    ref *refs;
2610
    Py_ssize_t size;
2611
    Py_ssize_t capacity;
2612
} ref_stack;
2613
2614
static int
2615
ref_stack_push(ref_stack *stack, ref r)
2616
226k
{
2617
226k
    if (stack->size == stack->capacity) {
2618
5.78k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2619
5.78k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2620
5.78k
        if (refs == NULL) {
2621
0
            PyErr_NoMemory();
2622
0
            return -1;
2623
0
        }
2624
5.78k
        stack->refs = refs;
2625
5.78k
        stack->capacity = new_cap;
2626
5.78k
    }
2627
226k
    stack->refs[stack->size] = r;
2628
226k
    stack->size++;
2629
226k
    return 0;
2630
226k
}
2631
2632
static ref
2633
ref_stack_pop(ref_stack *stack)
2634
196k
{
2635
196k
    assert(stack->size > 0);
2636
196k
    stack->size--;
2637
196k
    ref r = stack->refs[stack->size];
2638
196k
    return r;
2639
196k
}
2640
2641
static void
2642
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2643
923
{
2644
923
    Py_ssize_t idx = stack->size - off;
2645
923
    assert(idx >= 0 && idx < stack->size);
2646
923
    ref tmp = stack->refs[idx];
2647
923
    stack->refs[idx] = stack->refs[stack->size - 1];
2648
923
    stack->refs[stack->size - 1] = tmp;
2649
923
}
2650
2651
static ref
2652
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2653
41.5k
{
2654
41.5k
    assert(idx >= 0 && idx < stack->size);
2655
41.5k
    return stack->refs[idx];
2656
41.5k
}
2657
2658
static void
2659
ref_stack_clear(ref_stack *stack)
2660
26.6k
{
2661
26.6k
    stack->size = 0;
2662
26.6k
}
2663
2664
static void
2665
ref_stack_fini(ref_stack *stack)
2666
5.78k
{
2667
5.78k
    if (stack->refs != NULL) {
2668
5.78k
        PyMem_Free(stack->refs);
2669
5.78k
    }
2670
5.78k
    stack->refs = NULL;
2671
5.78k
    stack->capacity = 0;
2672
5.78k
    stack->size = 0;
2673
5.78k
}
2674
2675
typedef enum {
2676
    // The loaded reference is still on the stack when the local is killed
2677
    SUPPORT_KILLED  = 1,
2678
    // The loaded reference is stored into a local
2679
    STORED_AS_LOCAL = 2,
2680
    // The loaded reference is still on the stack at the end of the basic block
2681
    REF_UNCONSUMED  = 4,
2682
} LoadFastInstrFlag;
2683
2684
static void
2685
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2686
10.2k
{
2687
20.2k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2688
10.0k
        ref r = ref_stack_at(refs, i);
2689
10.0k
        if (r.local == local) {
2690
8
            assert(r.instr >= 0);
2691
8
            instr_flags[r.instr] |= SUPPORT_KILLED;
2692
8
        }
2693
10.0k
    }
2694
10.2k
}
2695
2696
static void
2697
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2698
10.0k
{
2699
10.0k
    kill_local(instr_flags, refs, local);
2700
10.0k
    if (r.instr != DUMMY_INSTR) {
2701
8.68k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2702
8.68k
    }
2703
10.0k
}
2704
2705
static void
2706
load_fast_push_block(basicblock ***sp, basicblock *target,
2707
                     Py_ssize_t start_depth)
2708
27.2k
{
2709
27.2k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2710
27.2k
    if (!target->b_visited) {
2711
20.9k
        target->b_visited = 1;
2712
20.9k
        *(*sp)++ = target;
2713
20.9k
    }
2714
27.2k
}
2715
2716
/*
2717
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2718
 * load borrowed references onto the operand stack.
2719
 *
2720
 * This is only safe when we can prove that the reference in the frame outlives
2721
 * the borrowed reference produced by the instruction. We make this tractable
2722
 * by enforcing the following lifetimes:
2723
 *
2724
 * 1. Borrowed references loaded onto the operand stack live until the end of
2725
 *    the instruction that consumes them from the stack. Any borrowed
2726
 *    references that would escape into the heap (e.g. into frame objects or
2727
 *    generators) are converted into new, strong references.
2728
 *
2729
 * 2. Locals live until they are either killed by an instruction
2730
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2731
 *    via `f_locals` is added to a tuple owned by the frame object.
2732
 *
2733
 * To simplify the problem of detecting which supporting references in the
2734
 * frame are killed by instructions that overwrite locals, we only allow
2735
 * borrowed references to be stored as a local in the frame if they were passed
2736
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2737
 * strong references.
2738
 *
2739
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2740
 * that meet the following criteria:
2741
 *
2742
 * 1. The produced reference must be consumed from the stack before the
2743
 *    supporting reference in the frame is killed.
2744
 *
2745
 * 2. The produced reference cannot be stored as a local.
2746
 *
2747
 * We use abstract interpretation to identify instructions that meet these
2748
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2749
 * stack of abstract references and note any instructions that violate the
2750
 * criteria above. Once we've processed all the instructions in a block, any
2751
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2752
 */
2753
static int
2754
optimize_load_fast(cfg_builder *g)
2755
5.78k
{
2756
5.78k
    int status;
2757
5.78k
    ref_stack refs = {0};
2758
5.78k
    int max_instrs = 0;
2759
5.78k
    basicblock *entryblock = g->g_entryblock;
2760
44.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2761
38.5k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2762
38.5k
    }
2763
5.78k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2764
5.78k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2765
5.78k
    if (instr_flags == NULL) {
2766
0
        PyErr_NoMemory();
2767
0
        return ERROR;
2768
0
    }
2769
5.78k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2770
5.78k
    if (blocks == NULL) {
2771
0
        status = ERROR;
2772
0
        goto done;
2773
0
    }
2774
5.78k
    basicblock **sp = blocks;
2775
5.78k
    *sp = entryblock;
2776
5.78k
    sp++;
2777
5.78k
    entryblock->b_startdepth = 0;
2778
5.78k
    entryblock->b_visited = 1;
2779
2780
5.78k
    #define PUSH_REF(instr, local)                \
2781
226k
        do {                                      \
2782
226k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2783
0
                status = ERROR;                   \
2784
0
                goto done;                        \
2785
0
            }                                     \
2786
226k
        } while(0)
2787
2788
32.4k
    while (sp != blocks) {
2789
26.6k
        basicblock *block = *--sp;
2790
26.6k
        assert(block->b_startdepth > -1);
2791
2792
        // Reset per-block state.
2793
26.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2794
2795
        // Reset the stack of refs. We don't track references on the stack
2796
        // across basic blocks, but the bytecode will expect their
2797
        // presence. Add dummy references as necessary.
2798
26.6k
        ref_stack_clear(&refs);
2799
51.6k
        for (int i = 0; i < block->b_startdepth; i++) {
2800
24.9k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2801
24.9k
        }
2802
2803
286k
        for (int i = 0; i < block->b_iused; i++) {
2804
260k
            cfg_instr *instr = &block->b_instr[i];
2805
260k
            int opcode = instr->i_opcode;
2806
260k
            int oparg = instr->i_oparg;
2807
260k
            assert(opcode != EXTENDED_ARG);
2808
260k
            switch (opcode) {
2809
                // Opcodes that load and store locals
2810
2
                case DELETE_FAST: {
2811
2
                    kill_local(instr_flags, &refs, oparg);
2812
2
                    break;
2813
0
                }
2814
2815
28.6k
                case LOAD_FAST: {
2816
28.6k
                    PUSH_REF(i, oparg);
2817
28.6k
                    break;
2818
28.6k
                }
2819
2820
28.6k
                case LOAD_FAST_AND_CLEAR: {
2821
230
                    kill_local(instr_flags, &refs, oparg);
2822
230
                    PUSH_REF(i, oparg);
2823
230
                    break;
2824
230
                }
2825
2826
4.50k
                case LOAD_FAST_LOAD_FAST: {
2827
4.50k
                    PUSH_REF(i, oparg >> 4);
2828
4.50k
                    PUSH_REF(i, oparg & 15);
2829
4.50k
                    break;
2830
4.50k
                }
2831
2832
8.02k
                case STORE_FAST: {
2833
8.02k
                    ref r = ref_stack_pop(&refs);
2834
8.02k
                    store_local(instr_flags, &refs, oparg, r);
2835
8.02k
                    break;
2836
4.50k
                }
2837
2838
169
                case STORE_FAST_LOAD_FAST: {
2839
                    // STORE_FAST
2840
169
                    ref r = ref_stack_pop(&refs);
2841
169
                    store_local(instr_flags, &refs, oparg >> 4, r);
2842
                    // LOAD_FAST
2843
169
                    PUSH_REF(i, oparg & 15);
2844
169
                    break;
2845
169
                }
2846
2847
907
                case STORE_FAST_STORE_FAST: {
2848
                    // STORE_FAST
2849
907
                    ref r = ref_stack_pop(&refs);
2850
907
                    store_local(instr_flags, &refs, oparg >> 4, r);
2851
                    // STORE_FAST
2852
907
                    r = ref_stack_pop(&refs);
2853
907
                    store_local(instr_flags, &refs, oparg & 15, r);
2854
907
                    break;
2855
169
                }
2856
2857
                // Opcodes that shuffle values on the stack
2858
961
                case COPY: {
2859
961
                    assert(oparg > 0);
2860
961
                    Py_ssize_t idx = refs.size - oparg;
2861
961
                    ref r = ref_stack_at(&refs, idx);
2862
961
                    PUSH_REF(r.instr, r.local);
2863
961
                    break;
2864
961
                }
2865
2866
961
                case SWAP: {
2867
923
                    assert(oparg >= 2);
2868
923
                    ref_stack_swap_top(&refs, oparg);
2869
923
                    break;
2870
961
                }
2871
2872
                // We treat opcodes that do not consume all of their inputs on
2873
                // a case by case basis, as we have no generic way of knowing
2874
                // how many inputs should be left on the stack.
2875
2876
                // Opcodes that consume no inputs
2877
924
                case FORMAT_SIMPLE:
2878
924
                case GET_ANEXT:
2879
2.11k
                case GET_ITER:
2880
2.11k
                case GET_LEN:
2881
2.13k
                case GET_YIELD_FROM_ITER:
2882
2.65k
                case IMPORT_FROM:
2883
2.65k
                case MATCH_KEYS:
2884
2.65k
                case MATCH_MAPPING:
2885
2.65k
                case MATCH_SEQUENCE:
2886
2.65k
                case WITH_EXCEPT_START: {
2887
2.65k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2888
2.65k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2889
2.65k
                    int net_pushed = num_pushed - num_popped;
2890
2.65k
                    assert(net_pushed >= 0);
2891
4.35k
                    for (int i = 0; i < net_pushed; i++) {
2892
1.70k
                        PUSH_REF(i, NOT_LOCAL);
2893
1.70k
                    }
2894
2.65k
                    break;
2895
2.65k
                }
2896
2897
                // Opcodes that consume some inputs and push no new values
2898
2.65k
                case DICT_MERGE:
2899
741
                case DICT_UPDATE:
2900
1.10k
                case LIST_APPEND:
2901
1.26k
                case LIST_EXTEND:
2902
11.7k
                case MAP_ADD:
2903
11.7k
                case RERAISE:
2904
11.8k
                case SET_ADD:
2905
11.8k
                case SET_UPDATE: {
2906
11.8k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2907
11.8k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2908
11.8k
                    int net_popped = num_popped - num_pushed;
2909
11.8k
                    assert(net_popped > 0);
2910
34.2k
                    for (int i = 0; i < net_popped; i++) {
2911
22.3k
                        ref_stack_pop(&refs);
2912
22.3k
                    }
2913
11.8k
                    break;
2914
11.8k
                }
2915
2916
38
                case END_SEND:
2917
1.51k
                case SET_FUNCTION_ATTRIBUTE: {
2918
1.51k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2919
1.51k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2920
1.51k
                    ref tos = ref_stack_pop(&refs);
2921
1.51k
                    ref_stack_pop(&refs);
2922
1.51k
                    PUSH_REF(tos.instr, tos.local);
2923
1.51k
                    break;
2924
1.51k
                }
2925
2926
                // Opcodes that consume some inputs and push new values
2927
1.51k
                case CHECK_EXC_MATCH: {
2928
0
                    ref_stack_pop(&refs);
2929
0
                    PUSH_REF(i, NOT_LOCAL);
2930
0
                    break;
2931
0
                }
2932
2933
1.24k
                case FOR_ITER: {
2934
1.24k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2935
1.24k
                    PUSH_REF(i, NOT_LOCAL);
2936
1.24k
                    break;
2937
1.24k
                }
2938
2939
15.2k
                case LOAD_ATTR:
2940
15.4k
                case LOAD_SUPER_ATTR: {
2941
15.4k
                    ref self = ref_stack_pop(&refs);
2942
15.4k
                    if (opcode == LOAD_SUPER_ATTR) {
2943
176
                        ref_stack_pop(&refs);
2944
176
                        ref_stack_pop(&refs);
2945
176
                    }
2946
15.4k
                    PUSH_REF(i, NOT_LOCAL);
2947
15.4k
                    if (oparg & 1) {
2948
                        // A method call; conservatively assume that self is pushed
2949
                        // back onto the stack
2950
6.13k
                        PUSH_REF(self.instr, self.local);
2951
6.13k
                    }
2952
15.4k
                    break;
2953
15.4k
                }
2954
2955
15.4k
                case LOAD_SPECIAL:
2956
196
                case PUSH_EXC_INFO: {
2957
196
                    ref tos = ref_stack_pop(&refs);
2958
196
                    PUSH_REF(i, NOT_LOCAL);
2959
196
                    PUSH_REF(tos.instr, tos.local);
2960
196
                    break;
2961
196
                }
2962
2963
196
                case SEND: {
2964
38
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2965
38
                    ref_stack_pop(&refs);
2966
38
                    PUSH_REF(i, NOT_LOCAL);
2967
38
                    break;
2968
38
                }
2969
2970
                // Opcodes that consume all of their inputs
2971
182k
                default: {
2972
182k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2973
182k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2974
182k
                    if (HAS_TARGET(instr->i_opcode)) {
2975
11.6k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2976
11.6k
                    }
2977
182k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2978
                        // Block push opcodes only affect the stack when jumping
2979
                        // to the target.
2980
327k
                        for (int j = 0; j < num_popped; j++) {
2981
145k
                            ref_stack_pop(&refs);
2982
145k
                        }
2983
319k
                        for (int j = 0; j < num_pushed; j++) {
2984
136k
                            PUSH_REF(i, NOT_LOCAL);
2985
136k
                        }
2986
182k
                    }
2987
182k
                    break;
2988
182k
                }
2989
260k
            }
2990
260k
        }
2991
2992
        // Push fallthrough block
2993
26.6k
        cfg_instr *term = basicblock_last_instr(block);
2994
26.6k
        if (term != NULL && block->b_next != NULL &&
2995
26.6k
            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2996
25.4k
              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
2997
14.2k
            assert(BB_HAS_FALLTHROUGH(block));
2998
14.2k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
14.2k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
57.1k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
30.4k
            ref r = ref_stack_at(&refs, i);
3005
30.4k
            if (r.instr != -1) {
3006
13.2k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
13.2k
            }
3008
30.4k
        }
3009
3010
        // Optimize instructions
3011
286k
        for (int i = 0; i < block->b_iused; i++) {
3012
260k
            if (!instr_flags[i]) {
3013
239k
                cfg_instr *instr = &block->b_instr[i];
3014
239k
                switch (instr->i_opcode) {
3015
27.5k
                    case LOAD_FAST:
3016
27.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
27.5k
                        break;
3018
4.42k
                    case LOAD_FAST_LOAD_FAST:
3019
4.42k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
4.42k
                        break;
3021
207k
                    default:
3022
207k
                        break;
3023
239k
                }
3024
239k
            }
3025
260k
        }
3026
26.6k
    }
3027
3028
5.78k
    #undef PUSH_REF
3029
3030
5.78k
    status = SUCCESS;
3031
3032
5.78k
done:
3033
5.78k
    ref_stack_fini(&refs);
3034
5.78k
    PyMem_Free(instr_flags);
3035
5.78k
    PyMem_Free(blocks);
3036
5.78k
    return status;
3037
5.78k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
129k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
129k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
129k
    if (b->b_unsafe_locals_mask != both) {
3048
20.9k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
20.9k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
20.8k
            *(*sp)++ = b;
3053
20.8k
            b->b_visited = 1;
3054
20.8k
        }
3055
20.9k
    }
3056
129k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
55.6k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
55.6k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
384k
    for (int i = 0; i < b->b_iused; i++) {
3064
328k
        cfg_instr *instr = &b->b_instr[i];
3065
328k
        assert(instr->i_opcode != EXTENDED_ARG);
3066
328k
        if (instr->i_except != NULL) {
3067
68.8k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
68.8k
        }
3069
328k
        if (instr->i_oparg >= 64) {
3070
21.8k
            continue;
3071
21.8k
        }
3072
306k
        assert(instr->i_oparg >= 0);
3073
306k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
306k
        switch (instr->i_opcode) {
3075
303
            case DELETE_FAST:
3076
783
            case LOAD_FAST_AND_CLEAR:
3077
1.74k
            case STORE_FAST_MAYBE_NULL:
3078
1.74k
                unsafe_mask |= bit;
3079
1.74k
                break;
3080
20.7k
            case STORE_FAST:
3081
20.7k
                unsafe_mask &= ~bit;
3082
20.7k
                break;
3083
102
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
102
                unsafe_mask &= ~bit;
3086
102
                break;
3087
67.8k
            case LOAD_FAST:
3088
67.8k
                if (unsafe_mask & bit) {
3089
102
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
102
                }
3091
67.8k
                unsafe_mask &= ~bit;
3092
67.8k
                break;
3093
306k
        }
3094
306k
    }
3095
55.6k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
29.4k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
29.4k
    }
3098
55.6k
    cfg_instr *last = basicblock_last_instr(b);
3099
55.6k
    if (last && is_jump(last)) {
3100
26.7k
        assert(last->i_target != NULL);
3101
26.7k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
26.7k
    }
3103
55.6k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
0
{
3108
0
    assert(nlocals > 64);
3109
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
0
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
0
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
0
        blocknum++;
3121
0
        for (int i = 0; i < b->b_iused; i++) {
3122
0
            cfg_instr *instr = &b->b_instr[i];
3123
0
            assert(instr->i_opcode != EXTENDED_ARG);
3124
0
            int arg = instr->i_oparg;
3125
0
            if (arg < 64) {
3126
0
                continue;
3127
0
            }
3128
0
            assert(arg >= 0);
3129
0
            switch (instr->i_opcode) {
3130
0
                case DELETE_FAST:
3131
0
                case LOAD_FAST_AND_CLEAR:
3132
0
                case STORE_FAST_MAYBE_NULL:
3133
0
                    states[arg - 64] = blocknum - 1;
3134
0
                    break;
3135
0
                case STORE_FAST:
3136
0
                    states[arg - 64] = blocknum;
3137
0
                    break;
3138
0
                case LOAD_FAST:
3139
0
                    if (states[arg - 64] != blocknum) {
3140
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
0
                    }
3142
0
                    states[arg - 64] = blocknum;
3143
0
                    break;
3144
0
                    Py_UNREACHABLE();
3145
0
            }
3146
0
        }
3147
0
    }
3148
0
    PyMem_Free(states);
3149
0
    return SUCCESS;
3150
0
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
5.78k
{
3155
5.78k
    assert(PyList_CheckExact(consts));
3156
5.78k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
5.78k
    if (nconsts == 0) {
3158
0
        return SUCCESS;  /* nothing to do */
3159
0
    }
3160
3161
5.78k
    Py_ssize_t *index_map = NULL;
3162
5.78k
    Py_ssize_t *reverse_index_map = NULL;
3163
5.78k
    int err = ERROR;
3164
3165
5.78k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
5.78k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
44.4k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
38.6k
        index_map[i] = -1;
3171
38.6k
    }
3172
    // The first constant may be docstring; keep it always.
3173
5.78k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
319k
        for (int i = 0; i < b->b_iused; i++) {
3178
281k
            int opcode = b->b_instr[i].i_opcode;
3179
281k
            if (OPCODE_HAS_CONST(opcode)) {
3180
38.0k
                int index = b->b_instr[i].i_oparg;
3181
38.0k
                index_map[index] = index;
3182
38.0k
            }
3183
281k
        }
3184
37.9k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
5.78k
    Py_ssize_t n_used_consts = 0;
3188
50.1k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
44.4k
        if (index_map[i] != -1) {
3190
32.7k
            assert(index_map[i] == i);
3191
32.7k
            index_map[n_used_consts++] = index_map[i];
3192
32.7k
        }
3193
44.4k
    }
3194
5.78k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
2.29k
        err = SUCCESS;
3197
2.29k
        goto end;
3198
2.29k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
3.49k
    assert(n_used_consts < nconsts);
3202
29.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
26.2k
        Py_ssize_t old_index = index_map[i];
3204
26.2k
        assert(i <= old_index && old_index < nconsts);
3205
26.2k
        if (i != old_index) {
3206
19.9k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
19.9k
            assert(value != NULL);
3208
19.9k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
19.9k
        }
3210
26.2k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
3.49k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
3.49k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
3.49k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
41.4k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
37.9k
        reverse_index_map[i] = -1;
3223
37.9k
    }
3224
29.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
26.2k
        assert(index_map[i] != -1);
3226
26.2k
        assert(reverse_index_map[index_map[i]] == -1);
3227
26.2k
        reverse_index_map[index_map[i]] = i;
3228
26.2k
    }
3229
3230
31.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
248k
        for (int i = 0; i < b->b_iused; i++) {
3232
221k
            int opcode = b->b_instr[i].i_opcode;
3233
221k
            if (OPCODE_HAS_CONST(opcode)) {
3234
31.3k
                int index = b->b_instr[i].i_oparg;
3235
31.3k
                assert(reverse_index_map[index] >= 0);
3236
31.3k
                assert(reverse_index_map[index] < n_used_consts);
3237
31.3k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
31.3k
            }
3239
221k
        }
3240
27.5k
    }
3241
3242
3.49k
    err = SUCCESS;
3243
5.78k
end:
3244
5.78k
    PyMem_Free(index_map);
3245
5.78k
    PyMem_Free(reverse_index_map);
3246
5.78k
    return err;
3247
3.49k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
5.78k
{
3256
5.78k
    if (nlocals == 0) {
3257
1.50k
        return SUCCESS;
3258
1.50k
    }
3259
4.28k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
0
        nlocals = 64;
3267
0
    }
3268
4.28k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
4.28k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
4.28k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
4.28k
    uint64_t start_mask = 0;
3277
10.6k
    for (int i = nparams; i < nlocals; i++) {
3278
6.38k
        start_mask |= (uint64_t)1 << i;
3279
6.38k
    }
3280
4.28k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
39.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
34.8k
        scan_block_for_locals(b, &sp);
3287
34.8k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
25.1k
    while (sp > stack) {
3291
20.8k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
20.8k
        b->b_visited = 0;
3294
20.8k
        scan_block_for_locals(b, &sp);
3295
20.8k
    }
3296
4.28k
    PyMem_Free(stack);
3297
4.28k
    return SUCCESS;
3298
4.28k
}
3299
3300
3301
static int
3302
5.15k
mark_warm(basicblock *entryblock) {
3303
5.15k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
5.15k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
5.15k
    basicblock **sp = stack;
3308
3309
5.15k
    *sp++ = entryblock;
3310
5.15k
    entryblock->b_visited = 1;
3311
31.5k
    while (sp > stack) {
3312
26.3k
        basicblock *b = *(--sp);
3313
26.3k
        assert(!b->b_except_handler);
3314
26.3k
        b->b_warm = 1;
3315
26.3k
        basicblock *next = b->b_next;
3316
26.3k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
12.4k
            *sp++ = next;
3318
12.4k
            next->b_visited = 1;
3319
12.4k
        }
3320
236k
        for (int i=0; i < b->b_iused; i++) {
3321
210k
            cfg_instr *instr = &b->b_instr[i];
3322
210k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
8.74k
                *sp++ = instr->i_target;
3324
8.74k
                instr->i_target->b_visited = 1;
3325
8.74k
            }
3326
210k
        }
3327
26.3k
    }
3328
5.15k
    PyMem_Free(stack);
3329
5.15k
    return SUCCESS;
3330
5.15k
}
3331
3332
static int
3333
5.15k
mark_cold(basicblock *entryblock) {
3334
42.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
37.3k
        assert(!b->b_cold && !b->b_warm);
3336
37.3k
    }
3337
5.15k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
5.15k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
5.15k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
5.15k
    basicblock **sp = stack;
3347
42.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
37.3k
        if (b->b_except_handler) {
3349
2.32k
            assert(!b->b_warm);
3350
2.32k
            *sp++ = b;
3351
2.32k
            b->b_visited = 1;
3352
2.32k
        }
3353
37.3k
    }
3354
3355
9.67k
    while (sp > stack) {
3356
4.52k
        basicblock *b = *(--sp);
3357
4.52k
        b->b_cold = 1;
3358
4.52k
        basicblock *next = b->b_next;
3359
4.52k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
1.23k
            if (!next->b_warm && !next->b_visited) {
3361
1.14k
                *sp++ = next;
3362
1.14k
                next->b_visited = 1;
3363
1.14k
            }
3364
1.23k
        }
3365
24.1k
        for (int i = 0; i < b->b_iused; i++) {
3366
19.6k
            cfg_instr *instr = &b->b_instr[i];
3367
19.6k
            if (is_jump(instr)) {
3368
1.53k
                assert(i == b->b_iused - 1);
3369
1.53k
                basicblock *target = b->b_instr[i].i_target;
3370
1.53k
                if (!target->b_warm && !target->b_visited) {
3371
1.06k
                    *sp++ = target;
3372
1.06k
                    target->b_visited = 1;
3373
1.06k
                }
3374
1.53k
            }
3375
19.6k
        }
3376
4.52k
    }
3377
5.15k
    PyMem_Free(stack);
3378
5.15k
    return SUCCESS;
3379
5.15k
}
3380
3381
3382
static int
3383
5.78k
push_cold_blocks_to_end(cfg_builder *g) {
3384
5.78k
    basicblock *entryblock = g->g_entryblock;
3385
5.78k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
634
        return SUCCESS;
3388
634
    }
3389
5.15k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
5.15k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
42.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
37.3k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
41
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
41
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
41
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
41
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
41
                             NO_LOCATION);
3406
41
            explicit_jump->b_cold = 1;
3407
41
            explicit_jump->b_next = b->b_next;
3408
41
            explicit_jump->b_predecessors = 1;
3409
41
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
41
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
41
            last->i_target = explicit_jump->b_next;
3414
41
        }
3415
37.3k
    }
3416
3417
5.15k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
5.15k
    basicblock *cold_blocks = NULL;
3419
5.15k
    basicblock *cold_blocks_tail = NULL;
3420
3421
5.15k
    basicblock *b = entryblock;
3422
6.88k
    while(b->b_next) {
3423
6.88k
        assert(!b->b_cold);
3424
34.5k
        while (b->b_next && !b->b_next->b_cold) {
3425
27.6k
            b = b->b_next;
3426
27.6k
        }
3427
6.88k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
5.15k
            break;
3430
5.15k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
1.73k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
1.73k
        basicblock *b_end = b->b_next;
3436
4.56k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
2.82k
            b_end = b_end->b_next;
3438
2.82k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
1.73k
        assert(b_end && b_end->b_cold);
3442
1.73k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
1.73k
        if (cold_blocks == NULL) {
3445
939
            cold_blocks = b->b_next;
3446
939
        }
3447
796
        else {
3448
796
            cold_blocks_tail->b_next = b->b_next;
3449
796
        }
3450
1.73k
        cold_blocks_tail = b_end;
3451
1.73k
        b->b_next = b_end->b_next;
3452
1.73k
        b_end->b_next = NULL;
3453
1.73k
    }
3454
5.15k
    assert(b != NULL && b->b_next == NULL);
3455
5.15k
    b->b_next = cold_blocks;
3456
3457
5.15k
    if (cold_blocks != NULL) {
3458
939
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
939
    }
3460
5.15k
    return SUCCESS;
3461
5.15k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
5.78k
{
3466
5.78k
    basicblock *entryblock = g->g_entryblock;
3467
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
313k
        for (int i = 0; i < b->b_iused; i++) {
3469
275k
            cfg_instr *instr = &b->b_instr[i];
3470
275k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
398
                assert(i == b->b_iused - 1);
3472
398
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
218
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
398
                location loc = instr->i_loc;
3475
398
                cfg_instr copy = {
3476
398
                            .i_opcode = COPY,
3477
398
                            .i_oparg = 1,
3478
398
                            .i_loc = loc,
3479
398
                            .i_target = NULL,
3480
398
                };
3481
398
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3482
398
                cfg_instr to_bool = {
3483
398
                            .i_opcode = TO_BOOL,
3484
398
                            .i_oparg = 0,
3485
398
                            .i_loc = loc,
3486
398
                            .i_target = NULL,
3487
398
                };
3488
398
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3489
398
            }
3490
275k
        }
3491
37.9k
    }
3492
5.78k
    return SUCCESS;
3493
5.78k
}
3494
3495
static int
3496
convert_pseudo_ops(cfg_builder *g)
3497
5.78k
{
3498
5.78k
    basicblock *entryblock = g->g_entryblock;
3499
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3500
316k
        for (int i = 0; i < b->b_iused; i++) {
3501
278k
            cfg_instr *instr = &b->b_instr[i];
3502
278k
            if (is_block_push(instr)) {
3503
2.32k
                INSTR_SET_OP0(instr, NOP);
3504
2.32k
            }
3505
275k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3506
1.44k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3507
1.44k
                instr->i_opcode = LOAD_FAST;
3508
1.44k
            }
3509
274k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3510
480
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3511
480
                instr->i_opcode = STORE_FAST;
3512
480
            }
3513
278k
        }
3514
37.9k
    }
3515
5.78k
    return remove_redundant_nops_and_jumps(g);
3516
5.78k
}
3517
3518
static inline bool
3519
62.0k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3520
62.0k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3521
34.5k
        return basicblock_has_no_lineno(b);
3522
34.5k
    }
3523
27.4k
    else {
3524
27.4k
        return false;
3525
27.4k
    }
3526
62.0k
}
3527
3528
3529
/* PEP 626 mandates that the f_lineno of a frame is correct
3530
 * after a frame terminates. It would be prohibitively expensive
3531
 * to continuously update the f_lineno field at runtime,
3532
 * so we make sure that all exiting instruction (raises and returns)
3533
 * have a valid line number, allowing us to compute f_lineno lazily.
3534
 * We can do this by duplicating the exit blocks without line number
3535
 * so that none have more than one predecessor. We can then safely
3536
 * copy the line number from the sole predecessor block.
3537
 */
3538
static int
3539
duplicate_exits_without_lineno(cfg_builder *g)
3540
11.5k
{
3541
11.5k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3542
3543
    /* Copy all exit blocks without line number that are targets of a jump.
3544
     */
3545
11.5k
    basicblock *entryblock = g->g_entryblock;
3546
87.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3547
75.9k
        cfg_instr *last = basicblock_last_instr(b);
3548
75.9k
        if (last == NULL) {
3549
12.3k
            continue;
3550
12.3k
        }
3551
63.5k
        if (is_jump(last)) {
3552
30.2k
            basicblock *target = next_nonempty_block(last->i_target);
3553
30.2k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3554
507
                basicblock *new_target = copy_basicblock(g, target);
3555
507
                if (new_target == NULL) {
3556
0
                    return ERROR;
3557
0
                }
3558
507
                new_target->b_instr[0].i_loc = last->i_loc;
3559
507
                last->i_target = new_target;
3560
507
                target->b_predecessors--;
3561
507
                new_target->b_predecessors = 1;
3562
507
                new_target->b_next = target->b_next;
3563
507
                new_target->b_label.id = next_lbl++;
3564
507
                target->b_next = new_target;
3565
507
            }
3566
30.2k
        }
3567
63.5k
    }
3568
3569
    /* Any remaining reachable exit blocks without line number can only be reached by
3570
     * fall through, and thus can only have a single predecessor */
3571
87.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3572
75.9k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3573
31.7k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3574
677
                cfg_instr *last = basicblock_last_instr(b);
3575
677
                assert(last != NULL);
3576
677
                b->b_next->b_instr[0].i_loc = last->i_loc;
3577
677
            }
3578
31.7k
        }
3579
75.9k
    }
3580
11.5k
    return SUCCESS;
3581
11.5k
}
3582
3583
3584
/* If an instruction has no line number, but it's predecessor in the BB does,
3585
 * then copy the line number. If a successor block has no line number, and only
3586
 * one predecessor, then inherit the line number.
3587
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3588
 * Also reduces the size of the line number table,
3589
 * but has no impact on the generated line number events.
3590
 */
3591
static void
3592
11.5k
propagate_line_numbers(basicblock *entryblock) {
3593
87.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3594
75.9k
        cfg_instr *last = basicblock_last_instr(b);
3595
75.9k
        if (last == NULL) {
3596
12.3k
            continue;
3597
12.3k
        }
3598
3599
63.5k
        location prev_location = NO_LOCATION;
3600
644k
        for (int i = 0; i < b->b_iused; i++) {
3601
581k
            if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3602
33.4k
                b->b_instr[i].i_loc = prev_location;
3603
33.4k
            }
3604
547k
            else {
3605
547k
                prev_location = b->b_instr[i].i_loc;
3606
547k
            }
3607
581k
        }
3608
63.5k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3609
22.7k
            if (b->b_next->b_iused > 0) {
3610
22.7k
                if (b->b_next->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3611
636
                    b->b_next->b_instr[0].i_loc = prev_location;
3612
636
                }
3613
22.7k
            }
3614
22.7k
        }
3615
63.5k
        if (is_jump(last)) {
3616
30.2k
            basicblock *target = last->i_target;
3617
30.2k
            if (target->b_predecessors == 1) {
3618
13.9k
                if (target->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3619
2.48k
                    target->b_instr[0].i_loc = prev_location;
3620
2.48k
                }
3621
13.9k
            }
3622
30.2k
        }
3623
63.5k
    }
3624
11.5k
}
3625
3626
static int
3627
resolve_line_numbers(cfg_builder *g, int firstlineno)
3628
11.5k
{
3629
11.5k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3630
11.5k
    propagate_line_numbers(g->g_entryblock);
3631
11.5k
    return SUCCESS;
3632
11.5k
}
3633
3634
int
3635
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3636
                        int nlocals, int nparams, int firstlineno)
3637
5.78k
{
3638
5.78k
    assert(cfg_builder_check(g));
3639
    /** Preprocessing **/
3640
    /* Map labels to targets and mark exception handlers */
3641
5.78k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3642
5.78k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3643
5.78k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3644
3645
    /** Optimization **/
3646
5.78k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3647
5.78k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3648
5.78k
    RETURN_IF_ERROR(
3649
5.78k
        add_checks_for_loads_of_uninitialized_variables(
3650
5.78k
            g->g_entryblock, nlocals, nparams));
3651
5.78k
    RETURN_IF_ERROR(insert_superinstructions(g));
3652
3653
5.78k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3654
5.78k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3655
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3656
    // assert(all_exits_have_lineno(g->g_entryblock));
3657
5.78k
    return SUCCESS;
3658
5.78k
}
3659
3660
static int *
3661
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3662
5.78k
{
3663
5.78k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3664
5.78k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3665
5.78k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3666
3667
5.78k
    int noffsets = ncellvars + nfreevars;
3668
5.78k
    int *fixed = PyMem_New(int, noffsets);
3669
5.78k
    if (fixed == NULL) {
3670
0
        PyErr_NoMemory();
3671
0
        return NULL;
3672
0
    }
3673
7.60k
    for (int i = 0; i < noffsets; i++) {
3674
1.82k
        fixed[i] = nlocals + i;
3675
1.82k
    }
3676
3677
5.78k
    PyObject *varname, *cellindex;
3678
5.78k
    Py_ssize_t pos = 0;
3679
6.82k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3680
1.03k
        PyObject *varindex;
3681
1.03k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3682
0
            goto error;
3683
0
        }
3684
1.03k
        if (varindex == NULL) {
3685
832
            continue;
3686
832
        }
3687
3688
207
        int argoffset = PyLong_AsInt(varindex);
3689
207
        Py_DECREF(varindex);
3690
207
        if (argoffset == -1 && PyErr_Occurred()) {
3691
0
            goto error;
3692
0
        }
3693
3694
207
        int oldindex = PyLong_AsInt(cellindex);
3695
207
        if (oldindex == -1 && PyErr_Occurred()) {
3696
0
            goto error;
3697
0
        }
3698
207
        fixed[oldindex] = argoffset;
3699
207
    }
3700
5.78k
    return fixed;
3701
3702
0
error:
3703
0
    PyMem_Free(fixed);
3704
0
    return NULL;
3705
5.78k
}
3706
3707
#define IS_GENERATOR(CF) \
3708
5.78k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3709
3710
static int
3711
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3712
                           int *fixed, int nfreevars, int code_flags)
3713
5.78k
{
3714
5.78k
    assert(umd->u_firstlineno > 0);
3715
3716
    /* Add the generator prefix instructions. */
3717
5.78k
    if (IS_GENERATOR(code_flags)) {
3718
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3719
         * of 0. This is because RETURN_GENERATOR pushes an element
3720
         * with _PyFrame_StackPush before switching stacks.
3721
         */
3722
3723
307
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3724
307
        cfg_instr make_gen = {
3725
307
            .i_opcode = RETURN_GENERATOR,
3726
307
            .i_oparg = 0,
3727
307
            .i_loc = loc,
3728
307
            .i_target = NULL,
3729
307
        };
3730
307
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3731
307
        cfg_instr pop_top = {
3732
307
            .i_opcode = POP_TOP,
3733
307
            .i_oparg = 0,
3734
307
            .i_loc = loc,
3735
307
            .i_target = NULL,
3736
307
        };
3737
307
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3738
307
    }
3739
3740
    /* Set up cells for any variable that escapes, to be put in a closure. */
3741
5.78k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3742
5.78k
    if (ncellvars) {
3743
        // umd->u_cellvars has the cells out of order so we sort them
3744
        // before adding the MAKE_CELL instructions.  Note that we
3745
        // adjust for arg cells, which come first.
3746
750
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3747
750
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3748
750
        if (sorted == NULL) {
3749
0
            PyErr_NoMemory();
3750
0
            return ERROR;
3751
0
        }
3752
1.78k
        for (int i = 0; i < ncellvars; i++) {
3753
1.03k
            sorted[fixed[i]] = i + 1;
3754
1.03k
        }
3755
2.20k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3756
1.45k
            int oldindex = sorted[i] - 1;
3757
1.45k
            if (oldindex == -1) {
3758
411
                continue;
3759
411
            }
3760
1.03k
            cfg_instr make_cell = {
3761
1.03k
                .i_opcode = MAKE_CELL,
3762
                // This will get fixed in offset_derefs().
3763
1.03k
                .i_oparg = oldindex,
3764
1.03k
                .i_loc = NO_LOCATION,
3765
1.03k
                .i_target = NULL,
3766
1.03k
            };
3767
1.03k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3768
0
                PyMem_RawFree(sorted);
3769
0
                return ERROR;
3770
0
            }
3771
1.03k
            ncellsused += 1;
3772
1.03k
        }
3773
750
        PyMem_RawFree(sorted);
3774
750
    }
3775
3776
5.78k
    if (nfreevars) {
3777
447
        cfg_instr copy_frees = {
3778
447
            .i_opcode = COPY_FREE_VARS,
3779
447
            .i_oparg = nfreevars,
3780
447
            .i_loc = NO_LOCATION,
3781
447
            .i_target = NULL,
3782
447
        };
3783
447
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3784
447
    }
3785
3786
5.78k
    return SUCCESS;
3787
5.78k
}
3788
3789
static int
3790
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3791
5.78k
{
3792
5.78k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3793
5.78k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3794
5.78k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3795
5.78k
    int noffsets = ncellvars + nfreevars;
3796
3797
    // First deal with duplicates (arg cells).
3798
5.78k
    int numdropped = 0;
3799
7.60k
    for (int i = 0; i < noffsets ; i++) {
3800
1.82k
        if (fixedmap[i] == i + nlocals) {
3801
1.61k
            fixedmap[i] -= numdropped;
3802
1.61k
        }
3803
207
        else {
3804
            // It was a duplicate (cell/arg).
3805
207
            numdropped += 1;
3806
207
        }
3807
1.82k
    }
3808
3809
    // Then update offsets, either relative to locals or by cell2arg.
3810
43.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3811
316k
        for (int i = 0; i < b->b_iused; i++) {
3812
278k
            cfg_instr *inst = &b->b_instr[i];
3813
            // This is called before extended args are generated.
3814
278k
            assert(inst->i_opcode != EXTENDED_ARG);
3815
278k
            int oldoffset = inst->i_oparg;
3816
278k
            switch(inst->i_opcode) {
3817
1.03k
                case MAKE_CELL:
3818
2.48k
                case LOAD_CLOSURE:
3819
4.18k
                case LOAD_DEREF:
3820
5.07k
                case STORE_DEREF:
3821
5.07k
                case DELETE_DEREF:
3822
5.07k
                case LOAD_FROM_DICT_OR_DEREF:
3823
5.07k
                    assert(oldoffset >= 0);
3824
5.07k
                    assert(oldoffset < noffsets);
3825
5.07k
                    assert(fixedmap[oldoffset] >= 0);
3826
5.07k
                    inst->i_oparg = fixedmap[oldoffset];
3827
278k
            }
3828
278k
        }
3829
37.9k
    }
3830
3831
5.78k
    return numdropped;
3832
5.78k
}
3833
3834
static int
3835
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3836
5.78k
{
3837
5.78k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3838
5.78k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3839
5.78k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3840
5.78k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3841
5.78k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3842
5.78k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3843
5.78k
    assert(INT_MAX - nlocals - ncellvars > 0);
3844
5.78k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3845
5.78k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3846
5.78k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3847
5.78k
    if (cellfixedoffsets == NULL) {
3848
0
        return ERROR;
3849
0
    }
3850
3851
    // This must be called before fix_cell_offsets().
3852
5.78k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3853
0
        PyMem_Free(cellfixedoffsets);
3854
0
        return ERROR;
3855
0
    }
3856
3857
5.78k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3858
5.78k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3859
5.78k
    cellfixedoffsets = NULL;
3860
5.78k
    if (numdropped < 0) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
5.78k
    nlocalsplus -= numdropped;
3865
5.78k
    return nlocalsplus;
3866
5.78k
}
3867
3868
cfg_builder *
3869
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3870
5.78k
{
3871
5.78k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3872
0
        return NULL;
3873
0
    }
3874
5.78k
    cfg_builder *g = _PyCfgBuilder_New();
3875
5.78k
    if (g == NULL) {
3876
0
        return NULL;
3877
0
    }
3878
320k
    for (int i = 0; i < seq->s_used; i++) {
3879
315k
        seq->s_instrs[i].i_target = 0;
3880
315k
    }
3881
320k
    for (int i = 0; i < seq->s_used; i++) {
3882
315k
        _PyInstruction *instr = &seq->s_instrs[i];
3883
315k
        if (HAS_TARGET(instr->i_opcode)) {
3884
19.4k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3885
19.4k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3886
19.4k
        }
3887
315k
    }
3888
5.78k
    int offset = 0;
3889
320k
    for (int i = 0; i < seq->s_used; i++) {
3890
315k
        _PyInstruction *instr = &seq->s_instrs[i];
3891
315k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3892
307
            if (seq->s_annotations_code != NULL) {
3893
0
                assert(seq->s_annotations_code->s_labelmap_size == 0
3894
0
                    && seq->s_annotations_code->s_nested == NULL);
3895
0
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3896
0
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3897
0
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3898
0
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3899
0
                        goto error;
3900
0
                    }
3901
0
                }
3902
0
                offset += seq->s_annotations_code->s_used - 1;
3903
0
            }
3904
307
            else {
3905
307
                offset -= 1;
3906
307
            }
3907
307
            continue;
3908
307
        }
3909
314k
        if (instr->i_target) {
3910
15.7k
            jump_target_label lbl_ = {i + offset};
3911
15.7k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3912
0
                goto error;
3913
0
            }
3914
15.7k
        }
3915
314k
        int opcode = instr->i_opcode;
3916
314k
        int oparg = instr->i_oparg;
3917
314k
        if (HAS_TARGET(opcode)) {
3918
19.4k
            oparg += offset;
3919
19.4k
        }
3920
314k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3921
0
            goto error;
3922
0
        }
3923
314k
    }
3924
5.78k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3925
0
        goto error;
3926
0
    }
3927
5.78k
    return g;
3928
0
error:
3929
0
    _PyCfgBuilder_Free(g);
3930
0
    return NULL;
3931
5.78k
}
3932
3933
int
3934
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3935
5.78k
{
3936
5.78k
    int lbl = 0;
3937
44.3k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3938
38.5k
        b->b_label = (jump_target_label){lbl};
3939
38.5k
        lbl += 1;
3940
38.5k
    }
3941
44.3k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3942
38.5k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3943
325k
        for (int i = 0; i < b->b_iused; i++) {
3944
286k
            cfg_instr *instr = &b->b_instr[i];
3945
286k
            if (HAS_TARGET(instr->i_opcode)) {
3946
                /* Set oparg to the label id (it will later be mapped to an offset) */
3947
15.1k
                instr->i_oparg = instr->i_target->b_label.id;
3948
15.1k
            }
3949
286k
            RETURN_IF_ERROR(
3950
286k
                _PyInstructionSequence_Addop(
3951
286k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3952
3953
286k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3954
286k
            if (instr->i_except != NULL) {
3955
38.4k
                hi->h_label = instr->i_except->b_label.id;
3956
38.4k
                hi->h_startdepth = instr->i_except->b_startdepth;
3957
38.4k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3958
38.4k
            }
3959
248k
            else {
3960
248k
                hi->h_label = -1;
3961
248k
            }
3962
286k
        }
3963
38.5k
    }
3964
5.78k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3965
0
        return ERROR;
3966
0
    }
3967
5.78k
    return SUCCESS;
3968
5.78k
}
3969
3970
3971
int
3972
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3973
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3974
                                     int *stackdepth, int *nlocalsplus,
3975
                                     _PyInstructionSequence *seq)
3976
5.78k
{
3977
5.78k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3978
3979
5.78k
    *stackdepth = calculate_stackdepth(g);
3980
5.78k
    if (*stackdepth < 0) {
3981
0
        return ERROR;
3982
0
    }
3983
3984
    /* prepare_localsplus adds instructions for generators that push
3985
     * and pop an item on the stack. This assertion makes sure there
3986
     * is space on the stack for that.
3987
     * It should always be true, because a generator must have at
3988
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
3989
     * which requires stackspace.
3990
     */
3991
5.78k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
3992
3993
5.78k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
3994
5.78k
    if (*nlocalsplus < 0) {
3995
0
        return ERROR;
3996
0
    }
3997
3998
5.78k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3999
4000
    /* Order of basic blocks must have been determined by now */
4001
4002
5.78k
    RETURN_IF_ERROR(normalize_jumps(g));
4003
5.78k
    assert(no_redundant_jumps(g));
4004
4005
    /* Can't modify the bytecode after inserting instructions that produce
4006
     * borrowed references.
4007
     */
4008
5.78k
    RETURN_IF_ERROR(optimize_load_fast(g));
4009
4010
    /* Can't modify the bytecode after computing jump offsets. */
4011
5.78k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4012
0
        return ERROR;
4013
0
    }
4014
4015
5.78k
    return SUCCESS;
4016
5.78k
}
4017
4018
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4019
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4020
 */
4021
int
4022
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4023
0
{
4024
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4025
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4026
0
    return SUCCESS;
4027
0
}
4028
4029
/* Exported API functions */
4030
4031
int
4032
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4033
0
{
4034
0
    stack_effects effs;
4035
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4036
0
        return PY_INVALID_STACK_EFFECT;
4037
0
    }
4038
0
    return effs.net;
4039
0
}
4040
4041
int
4042
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4043
100
{
4044
100
    stack_effects effs;
4045
100
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4046
0
        return PY_INVALID_STACK_EFFECT;
4047
0
    }
4048
100
    return effs.net;
4049
100
}
4050
4051
/* Access to compiler optimizations for unit tests.
4052
4053
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4054
 * a CFG, optimizes it and converts back to an instruction list.
4055
 */
4056
4057
static PyObject *
4058
cfg_to_instruction_sequence(cfg_builder *g)
4059
0
{
4060
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4061
0
    if (seq == NULL) {
4062
0
        return NULL;
4063
0
    }
4064
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4065
0
        PyInstructionSequence_Fini(seq);
4066
0
        return NULL;
4067
0
    }
4068
0
    return (PyObject*)seq;
4069
0
}
4070
4071
PyObject *
4072
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4073
0
{
4074
0
    if (!_PyInstructionSequence_Check(seq)) {
4075
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4076
0
        return NULL;
4077
0
    }
4078
0
    PyObject *const_cache = PyDict_New();
4079
0
    if (const_cache == NULL) {
4080
0
        return NULL;
4081
0
    }
4082
4083
0
    PyObject *res = NULL;
4084
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4085
0
    if (g == NULL) {
4086
0
        goto error;
4087
0
    }
4088
0
    int nparams = 0, firstlineno = 1;
4089
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4090
0
                                nparams, firstlineno) < 0) {
4091
0
        goto error;
4092
0
    }
4093
4094
0
    if (calculate_stackdepth(g) == ERROR) {
4095
0
        goto error;
4096
0
    }
4097
4098
0
    if (optimize_load_fast(g) != SUCCESS) {
4099
0
        goto error;
4100
0
    }
4101
4102
0
    res = cfg_to_instruction_sequence(g);
4103
0
error:
4104
0
    Py_DECREF(const_cache);
4105
0
    _PyCfgBuilder_Free(g);
4106
0
    return res;
4107
0
}