Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
631k
#define SUCCESS 0
19
4.57k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
960k
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
205k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
210k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
210k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
909k
{
101
909k
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
909k
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
909k
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
796k
{
108
796k
    return OPCODE_HAS_JUMP(i->i_opcode);
109
796k
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
22.1k
    do { \
114
22.1k
        assert(OPCODE_HAS_ARG(OP)); \
115
22.1k
        cfg_instr *_instr__ptr_ = (I); \
116
22.1k
        _instr__ptr_->i_opcode = (OP); \
117
22.1k
        _instr__ptr_->i_oparg = (ARG); \
118
22.1k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
37.7k
    do { \
123
37.7k
        assert(!OPCODE_HAS_ARG(OP)); \
124
37.7k
        cfg_instr *_instr__ptr_ = (I); \
125
37.7k
        _instr__ptr_->i_opcode = (OP); \
126
37.7k
        _instr__ptr_->i_oparg = 0; \
127
37.7k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
2.05k
    do { \
131
2.05k
        cfg_instr *_instr__ptr_ = (I); \
132
2.05k
        _instr__ptr_->i_loc = (LOC); \
133
2.05k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
205k
{
144
205k
    assert(b != NULL);
145
205k
    _Py_c_array_t array = {
146
205k
        .array = (void*)b->b_instr,
147
205k
        .allocated_entries = b->b_ialloc,
148
205k
        .item_size = sizeof(cfg_instr),
149
205k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
205k
    };
151
152
205k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
205k
    b->b_instr = array.array;
154
205k
    b->b_ialloc = array.allocated_entries;
155
205k
    return b->b_iused++;
156
205k
}
157
158
static cfg_instr *
159
690k
basicblock_last_instr(const basicblock *b) {
160
690k
    assert(b->b_iused >= 0);
161
690k
    if (b->b_iused > 0) {
162
631k
        assert(b->b_instr != NULL);
163
631k
        return &b->b_instr[b->b_iused - 1];
164
631k
    }
165
58.3k
    return NULL;
166
690k
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
22.1k
{
175
22.1k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
22.1k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
22.1k
    b->b_list = g->g_block_list;
182
22.1k
    g->g_block_list = b;
183
22.1k
    b->b_label = NO_LABEL;
184
22.1k
    return b;
185
22.1k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
200k
{
190
200k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
200k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
200k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
200k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
200k
    int off = basicblock_next_instr(b);
196
200k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
200k
    cfg_instr *i = &b->b_instr[off];
200
200k
    i->i_opcode = opcode;
201
200k
    i->i_oparg = oparg;
202
200k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
200k
    assert(i->i_target == NULL);
205
200k
    assert(i->i_except == NULL);
206
207
200k
    return SUCCESS;
208
200k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
379
{
213
379
    cfg_instr *last = basicblock_last_instr(b);
214
379
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
379
    RETURN_IF_ERROR(
219
379
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
379
    last = basicblock_last_instr(b);
221
379
    assert(last && last->i_opcode == opcode);
222
379
    last->i_target = target;
223
379
    return SUCCESS;
224
379
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
1.45k
{
229
4.17k
    for (int i = 0; i < from->b_iused; i++) {
230
2.71k
        int n = basicblock_next_instr(to);
231
2.71k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
2.71k
        to->b_instr[n] = from->b_instr[i];
235
2.71k
    }
236
1.45k
    return SUCCESS;
237
1.45k
}
238
239
static inline int
240
210k
basicblock_nofallthrough(const basicblock *b) {
241
210k
    cfg_instr *last = basicblock_last_instr(b);
242
210k
    return (last &&
243
198k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
113k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
210k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
343k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
378
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
378
    assert(BB_NO_FALLTHROUGH(block));
257
378
    basicblock *result = cfg_builder_new_block(g);
258
378
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
378
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
378
    return result;
265
378
}
266
267
static int
268
2.51k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
2.51k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
43.4k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
40.8k
        block->b_instr[i] = block->b_instr[i-1];
272
40.8k
    }
273
2.51k
    block->b_instr[pos] = *instr;
274
2.51k
    return SUCCESS;
275
2.51k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
16.8k
{
343
16.8k
    assert(block != NULL);
344
16.8k
    g->g_curblock->b_next = block;
345
16.8k
    g->g_curblock = block;
346
16.8k
    return block;
347
16.8k
}
348
349
static inline int
350
35.0k
basicblock_exits_scope(const basicblock *b) {
351
35.0k
    cfg_instr *last = basicblock_last_instr(b);
352
35.0k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
35.0k
}
354
355
static inline int
356
20.4k
basicblock_has_eval_break(const basicblock *b) {
357
105k
    for (int i = 0; i < b->b_iused; i++) {
358
93.1k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
8.06k
            return true;
360
8.06k
        }
361
93.1k
    }
362
12.3k
    return false;
363
20.4k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
202k
{
368
202k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
202k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
14.3k
        return true;
371
14.3k
    }
372
188k
    if (IS_LABEL(g->g_current_label)) {
373
2.48k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
2.48k
            return true;
375
2.48k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
2.48k
    }
382
186k
    return false;
383
188k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
202k
{
388
202k
    if (cfg_builder_current_block_is_terminated(g)) {
389
16.8k
        basicblock *b = cfg_builder_new_block(g);
390
16.8k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
16.8k
        b->b_label = g->g_current_label;
394
16.8k
        g->g_current_label = NO_LABEL;
395
16.8k
        cfg_builder_use_next_block(g, b);
396
16.8k
    }
397
202k
    return SUCCESS;
398
202k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
4.58k
{
424
4.58k
    g->g_block_list = NULL;
425
4.58k
    basicblock *block = cfg_builder_new_block(g);
426
4.58k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
4.58k
    g->g_curblock = g->g_entryblock = block;
430
4.58k
    g->g_current_label = NO_LABEL;
431
4.58k
    return SUCCESS;
432
4.58k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
4.58k
{
437
4.58k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
4.58k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
4.58k
    memset(g, 0, sizeof(cfg_builder));
443
4.58k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
4.58k
    return g;
448
4.58k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
4.58k
{
453
4.58k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
4.58k
    assert(cfg_builder_check(g));
457
4.58k
    basicblock *b = g->g_block_list;
458
26.7k
    while (b != NULL) {
459
22.1k
        if (b->b_instr) {
460
22.1k
            PyMem_Free((void *)b->b_instr);
461
22.1k
        }
462
22.1k
        basicblock *next = b->b_list;
463
22.1k
        PyMem_Free((void *)b);
464
22.1k
        b = next;
465
22.1k
    }
466
4.58k
    PyMem_Free(g);
467
4.58k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
4.58k
{
472
4.58k
    int nblocks = 0;
473
25.9k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
21.4k
        nblocks++;
475
21.4k
    }
476
4.58k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
4.58k
    return SUCCESS;
481
4.58k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
8.09k
{
486
8.09k
    g->g_current_label = lbl;
487
8.09k
    return cfg_builder_maybe_start_new_block(g);
488
8.09k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
194k
{
493
194k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
194k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
194k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
28.3k
{
501
29.3k
    while (b && b->b_iused == 0) {
502
1.05k
        b = b->b_next;
503
1.05k
    }
504
28.3k
    return b;
505
28.3k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
22.1k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
22.1k
    cfg_instr *last = basicblock_last_instr(b);
547
22.1k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
17.1k
        return SUCCESS;
549
17.1k
    }
550
22.1k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
4.95k
    bool is_forward = last->i_target->b_visited == 0;
553
4.95k
    if (is_forward) {
554
4.75k
        RETURN_IF_ERROR(
555
4.75k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
4.75k
        return SUCCESS;
557
4.75k
    }
558
559
194
    int reversed_opcode = 0;
560
194
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
16
        case POP_JUMP_IF_NONE:
565
16
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
16
            break;
567
140
        case POP_JUMP_IF_FALSE:
568
140
            reversed_opcode = POP_JUMP_IF_TRUE;
569
140
            break;
570
38
        case POP_JUMP_IF_TRUE:
571
38
            reversed_opcode = POP_JUMP_IF_FALSE;
572
38
            break;
573
194
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
194
    basicblock *target = last->i_target;
579
194
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
194
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
194
    RETURN_IF_ERROR(
584
194
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
194
    RETURN_IF_ERROR(
586
194
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
194
    backwards_jump->b_startdepth = target->b_startdepth;
588
194
    last->i_opcode = reversed_opcode;
589
194
    last->i_target = b->b_next;
590
591
194
    backwards_jump->b_cold = b->b_cold;
592
194
    backwards_jump->b_next = b->b_next;
593
194
    b->b_next = backwards_jump;
594
194
    return SUCCESS;
595
194
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
4.58k
{
601
4.58k
    basicblock *entryblock = g->g_entryblock;
602
26.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
21.9k
        b->b_visited = 0;
604
21.9k
    }
605
26.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
22.1k
        b->b_visited = 1;
607
22.1k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
22.1k
    }
609
4.58k
    return SUCCESS;
610
4.58k
}
611
612
static int
613
4.58k
check_cfg(cfg_builder *g) {
614
25.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
216k
        for (int i = 0; i < b->b_iused; i++) {
617
194k
            int opcode = b->b_instr[i].i_opcode;
618
194k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
194k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
18.9k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
18.9k
            }
625
194k
        }
626
21.4k
    }
627
4.58k
    return SUCCESS;
628
4.58k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
17.3k
{
633
17.3k
    int lbl = -1;
634
102k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
85.5k
        if (b->b_label.id > lbl) {
636
29.8k
            lbl = b->b_label.id;
637
29.8k
        }
638
85.5k
    }
639
17.3k
    return lbl;
640
17.3k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
4.58k
{
646
4.58k
    int max_label = get_max_label(entryblock);
647
4.58k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
4.58k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
4.58k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
4.58k
    memset(label2block, 0, mapsize);
654
25.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
21.4k
        if (b->b_label.id >= 0) {
656
8.09k
            label2block[b->b_label.id] = b;
657
8.09k
        }
658
21.4k
    }
659
25.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
216k
        for (int i = 0; i < b->b_iused; i++) {
661
194k
            cfg_instr *instr = &b->b_instr[i];
662
194k
            assert(instr->i_target == NULL);
663
194k
            if (HAS_TARGET(instr->i_opcode)) {
664
9.99k
                int lbl = instr->i_oparg;
665
9.99k
                assert(lbl >= 0 && lbl <= max_label);
666
9.99k
                instr->i_target = label2block[lbl];
667
9.99k
                assert(instr->i_target != NULL);
668
9.99k
                assert(instr->i_target->b_label.id == lbl);
669
9.99k
            }
670
194k
        }
671
21.4k
    }
672
4.58k
    PyMem_Free(label2block);
673
4.58k
    return SUCCESS;
674
4.58k
}
675
676
static int
677
4.58k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
25.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
216k
        for (int i=0; i < b->b_iused; i++) {
685
194k
            cfg_instr *instr = &b->b_instr[i];
686
194k
            if (is_block_push(instr)) {
687
1.63k
                instr->i_target->b_except_handler = 1;
688
1.63k
            }
689
194k
        }
690
21.4k
    }
691
4.58k
    return SUCCESS;
692
4.58k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
1.62k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
1.62k
    assert(is_block_push(setup));
704
1.62k
    int opcode = setup->i_opcode;
705
1.62k
    basicblock * target = setup->i_target;
706
1.62k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
1.00k
        target->b_preserve_lasti = 1;
708
1.00k
    }
709
1.62k
    assert(stack->depth <= CO_MAXBLOCKS);
710
1.62k
    stack->handlers[++stack->depth] = target;
711
1.62k
    return target;
712
1.62k
}
713
714
static basicblock *
715
1.33k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
1.33k
    assert(stack->depth > 0);
717
1.33k
    return stack->handlers[--stack->depth];
718
1.33k
}
719
720
static basicblock *
721
17.9k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
17.9k
    return stack->handlers[stack->depth];
723
17.9k
}
724
725
static struct _PyCfgExceptStack *
726
4.58k
make_except_stack(void) {
727
4.58k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
4.58k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
4.58k
    new->depth = 0;
733
4.58k
    new->handlers[0] = NULL;
734
4.58k
    return new;
735
4.58k
}
736
737
static struct _PyCfgExceptStack *
738
6.27k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
6.27k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
6.27k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
6.27k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
6.27k
    return copy;
746
6.27k
}
747
748
static basicblock**
749
33.3k
make_cfg_traversal_stack(basicblock *entryblock) {
750
33.3k
    int nblocks = 0;
751
202k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
169k
        b->b_visited = 0;
753
169k
        nblocks++;
754
169k
    }
755
33.3k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
33.3k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
33.3k
    return stack;
760
33.3k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
183k
{
779
183k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
183k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
183k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
183k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
183k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
183k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
1.62k
        effects->net = 0;
793
1.62k
        return 0;
794
1.62k
    }
795
182k
    effects->net = pushed - popped;
796
182k
    return 0;
797
183k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
21.7k
{
802
21.7k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
21.7k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
18.1k
        assert(b->b_startdepth < 0);
808
18.1k
        b->b_startdepth = depth;
809
18.1k
        *(*sp)++ = b;
810
18.1k
    }
811
21.7k
    return SUCCESS;
812
21.7k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
4.58k
{
820
4.58k
    basicblock *entryblock = g->g_entryblock;
821
26.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
21.9k
        b->b_startdepth = INT_MIN;
823
21.9k
    }
824
4.58k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
4.58k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
4.58k
    int stackdepth = -1;
831
4.58k
    int maxdepth = 0;
832
4.58k
    basicblock **sp = stack;
833
4.58k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
22.7k
    while (sp != stack) {
837
18.1k
        basicblock *b = *--sp;
838
18.1k
        int depth = b->b_startdepth;
839
18.1k
        assert(depth >= 0);
840
18.1k
        basicblock *next = b->b_next;
841
183k
        for (int i = 0; i < b->b_iused; i++) {
842
174k
            cfg_instr *instr = &b->b_instr[i];
843
174k
            stack_effects effects;
844
174k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
174k
            int new_depth = depth + effects.net;
851
174k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
854
0
                goto error;
855
0
            }
856
174k
            maxdepth = Py_MAX(maxdepth, depth);
857
174k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
8.76k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
8.76k
                int target_depth = depth + effects.net;
865
8.76k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
8.76k
                maxdepth = Py_MAX(maxdepth, depth);
867
8.76k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
8.76k
            }
871
174k
            depth = new_depth;
872
174k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
174k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
173k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
9.73k
            {
876
                /* remaining code is dead */
877
9.73k
                next = NULL;
878
9.73k
                break;
879
9.73k
            }
880
174k
        }
881
18.1k
        if (next != NULL) {
882
8.41k
            assert(BB_HAS_FALLTHROUGH(b));
883
8.41k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
8.41k
        }
887
18.1k
    }
888
4.58k
    stackdepth = maxdepth;
889
4.58k
error:
890
4.58k
    PyMem_Free(stack);
891
4.58k
    return stackdepth;
892
4.58k
}
893
894
static int
895
4.58k
label_exception_targets(basicblock *entryblock) {
896
4.58k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
4.58k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
4.58k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
4.58k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
4.58k
    except_stack->depth = 0;
907
4.58k
    todo_stack[0] = entryblock;
908
4.58k
    entryblock->b_visited = 1;
909
4.58k
    entryblock->b_exceptstack = except_stack;
910
4.58k
    basicblock **todo = &todo_stack[1];
911
4.58k
    basicblock *handler = NULL;
912
22.5k
    while (todo > todo_stack) {
913
17.9k
        todo--;
914
17.9k
        basicblock *b = todo[0];
915
17.9k
        assert(b->b_visited == 1);
916
17.9k
        except_stack = b->b_exceptstack;
917
17.9k
        assert(except_stack != NULL);
918
17.9k
        b->b_exceptstack = NULL;
919
17.9k
        handler = except_stack_top(except_stack);
920
17.9k
        int last_yield_except_depth = -1;
921
205k
        for (int i = 0; i < b->b_iused; i++) {
922
187k
            cfg_instr *instr = &b->b_instr[i];
923
187k
            if (is_block_push(instr)) {
924
1.62k
                if (!instr->i_target->b_visited) {
925
1.62k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
1.62k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
1.62k
                    instr->i_target->b_exceptstack = copy;
930
1.62k
                    todo[0] = instr->i_target;
931
1.62k
                    instr->i_target->b_visited = 1;
932
1.62k
                    todo++;
933
1.62k
                }
934
1.62k
                handler = push_except_block(except_stack, instr);
935
1.62k
            }
936
185k
            else if (instr->i_opcode == POP_BLOCK) {
937
1.33k
                handler = pop_except_block(except_stack);
938
1.33k
                INSTR_SET_OP0(instr, NOP);
939
1.33k
            }
940
184k
            else if (is_jump(instr)) {
941
7.92k
                instr->i_except = handler;
942
7.92k
                assert(i == b->b_iused -1);
943
7.92k
                if (!instr->i_target->b_visited) {
944
5.66k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
4.65k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
4.65k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
4.65k
                        instr->i_target->b_exceptstack = copy;
950
4.65k
                    }
951
1.00k
                    else {
952
1.00k
                        instr->i_target->b_exceptstack = except_stack;
953
1.00k
                        except_stack = NULL;
954
1.00k
                    }
955
5.66k
                    todo[0] = instr->i_target;
956
5.66k
                    instr->i_target->b_visited = 1;
957
5.66k
                    todo++;
958
5.66k
                }
959
7.92k
            }
960
176k
            else if (instr->i_opcode == YIELD_VALUE) {
961
304
                instr->i_except = handler;
962
304
                last_yield_except_depth = except_stack->depth;
963
304
            }
964
176k
            else if (instr->i_opcode == RESUME) {
965
4.89k
                instr->i_except = handler;
966
4.89k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
304
                    assert(last_yield_except_depth >= 0);
968
304
                    if (last_yield_except_depth == 1) {
969
133
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
133
                    }
971
304
                    last_yield_except_depth = -1;
972
304
                }
973
4.89k
            }
974
171k
            else if (instr->i_opcode == RETURN_GENERATOR) {
975
248
                instr->i_except = NULL;
976
248
            }
977
171k
            else {
978
171k
                instr->i_except = handler;
979
171k
            }
980
187k
        }
981
17.9k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
982
6.07k
            assert(except_stack != NULL);
983
6.07k
            b->b_next->b_exceptstack = except_stack;
984
6.07k
            todo[0] = b->b_next;
985
6.07k
            b->b_next->b_visited = 1;
986
6.07k
            todo++;
987
6.07k
        }
988
11.8k
        else if (except_stack != NULL) {
989
10.8k
           PyMem_Free(except_stack);
990
10.8k
        }
991
17.9k
    }
992
#ifdef Py_DEBUG
993
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
994
        assert(b->b_exceptstack == NULL);
995
    }
996
#endif
997
4.58k
    PyMem_Free(todo_stack);
998
4.58k
    return SUCCESS;
999
0
error:
1000
0
    PyMem_Free(todo_stack);
1001
0
    PyMem_Free(except_stack);
1002
0
    return ERROR;
1003
4.58k
}
1004
1005
/***** CFG optimizations *****/
1006
1007
static int
1008
9.17k
remove_unreachable(basicblock *entryblock) {
1009
52.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1010
43.1k
        b->b_predecessors = 0;
1011
43.1k
    }
1012
9.17k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1013
9.17k
    if (stack == NULL) {
1014
0
        return ERROR;
1015
0
    }
1016
9.17k
    basicblock **sp = stack;
1017
9.17k
    entryblock->b_predecessors = 1;
1018
9.17k
    *sp++ = entryblock;
1019
9.17k
    entryblock->b_visited = 1;
1020
44.7k
    while (sp > stack) {
1021
35.5k
        basicblock *b = *(--sp);
1022
35.5k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1023
16.1k
            if (!b->b_next->b_visited) {
1024
13.3k
                assert(b->b_next->b_predecessors == 0);
1025
13.3k
                *sp++ = b->b_next;
1026
13.3k
                b->b_next->b_visited = 1;
1027
13.3k
            }
1028
16.1k
            b->b_next->b_predecessors++;
1029
16.1k
        }
1030
400k
        for (int i = 0; i < b->b_iused; i++) {
1031
365k
            basicblock *target;
1032
365k
            cfg_instr *instr = &b->b_instr[i];
1033
365k
            if (is_jump(instr) || is_block_push(instr)) {
1034
17.7k
                target = instr->i_target;
1035
17.7k
                if (!target->b_visited) {
1036
13.0k
                    *sp++ = target;
1037
13.0k
                    target->b_visited = 1;
1038
13.0k
                }
1039
17.7k
                target->b_predecessors++;
1040
17.7k
            }
1041
365k
        }
1042
35.5k
    }
1043
9.17k
    PyMem_Free(stack);
1044
1045
    /* Delete unreachable instructions */
1046
52.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1047
43.1k
       if (b->b_predecessors == 0) {
1048
7.63k
            b->b_iused = 0;
1049
7.63k
            b->b_except_handler = 0;
1050
7.63k
       }
1051
43.1k
    }
1052
9.17k
    return SUCCESS;
1053
9.17k
}
1054
1055
static int
1056
114k
basicblock_remove_redundant_nops(basicblock *bb) {
1057
    /* Remove NOPs when legal to do so. */
1058
114k
    int dest = 0;
1059
114k
    int prev_lineno = -1;
1060
980k
    for (int src = 0; src < bb->b_iused; src++) {
1061
866k
        int lineno = bb->b_instr[src].i_loc.lineno;
1062
866k
        if (bb->b_instr[src].i_opcode == NOP) {
1063
            /* Eliminate no-op if it doesn't have a line number */
1064
17.7k
            if (lineno < 0) {
1065
2.48k
                continue;
1066
2.48k
            }
1067
            /* or, if the previous instruction had the same line number. */
1068
15.3k
            if (prev_lineno == lineno) {
1069
12.1k
                continue;
1070
12.1k
            }
1071
            /* or, if the next instruction has same line number or no line number */
1072
3.12k
            if (src < bb->b_iused - 1) {
1073
2.89k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1074
2.89k
                if (next_lineno == lineno) {
1075
1.90k
                    continue;
1076
1.90k
                }
1077
990
                if (next_lineno < 0) {
1078
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1079
0
                    continue;
1080
0
                }
1081
990
            }
1082
236
            else {
1083
236
                basicblock *next = next_nonempty_block(bb->b_next);
1084
                /* or if last instruction in BB and next BB has same line number */
1085
236
                if (next) {
1086
236
                    location next_loc = NO_LOCATION;
1087
236
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1088
236
                        cfg_instr *instr = &next->b_instr[next_i];
1089
236
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1090
                            /* Skip over NOPs without a location, they will be removed */
1091
0
                            continue;
1092
0
                        }
1093
236
                        next_loc = instr->i_loc;
1094
236
                        break;
1095
236
                    }
1096
236
                    if (lineno == next_loc.lineno) {
1097
1
                        continue;
1098
1
                    }
1099
236
                }
1100
236
            }
1101
1102
3.12k
        }
1103
849k
        if (dest != src) {
1104
67.7k
            bb->b_instr[dest] = bb->b_instr[src];
1105
67.7k
        }
1106
849k
        dest++;
1107
849k
        prev_lineno = lineno;
1108
849k
    }
1109
114k
    assert(dest <= bb->b_iused);
1110
114k
    int num_removed = bb->b_iused - dest;
1111
114k
    bb->b_iused = dest;
1112
114k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1113
114k
    return num_removed;
1114
114k
}
1115
1116
static int
1117
15.2k
remove_redundant_nops(cfg_builder *g) {
1118
15.2k
    int changes = 0;
1119
108k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1120
92.8k
        int change = basicblock_remove_redundant_nops(b);
1121
92.8k
        RETURN_IF_ERROR(change);
1122
92.8k
        changes += change;
1123
92.8k
    }
1124
15.2k
    return changes;
1125
15.2k
}
1126
1127
static int
1128
remove_redundant_nops_and_pairs(basicblock *entryblock)
1129
4.58k
{
1130
4.58k
    bool done = false;
1131
1132
9.17k
    while (! done) {
1133
4.58k
        done = true;
1134
4.58k
        cfg_instr *prev_instr = NULL;
1135
4.58k
        cfg_instr *instr = NULL;
1136
26.3k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1137
21.7k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1138
21.7k
            if (IS_LABEL(b->b_label)) {
1139
                /* this block is a jump target, forget instr */
1140
8.47k
                instr = NULL;
1141
8.47k
            }
1142
199k
            for (int i = 0; i < b->b_iused; i++) {
1143
177k
                prev_instr = instr;
1144
177k
                instr = &b->b_instr[i];
1145
177k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1146
177k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1147
177k
                int opcode = instr->i_opcode;
1148
177k
                bool is_redundant_pair = false;
1149
177k
                if (opcode == POP_TOP) {
1150
5.84k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
5.84k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1154
0
                       is_redundant_pair = true;
1155
0
                   }
1156
5.84k
                }
1157
177k
                if (is_redundant_pair) {
1158
0
                    INSTR_SET_OP0(prev_instr, NOP);
1159
0
                    INSTR_SET_OP0(instr, NOP);
1160
0
                    done = false;
1161
0
                }
1162
177k
            }
1163
21.7k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1164
15.5k
                instr = NULL;
1165
15.5k
            }
1166
21.7k
        }
1167
4.58k
    }
1168
4.58k
    return SUCCESS;
1169
4.58k
}
1170
1171
static int
1172
10.6k
remove_redundant_jumps(cfg_builder *g) {
1173
    /* If a non-empty block ends with a jump instruction, check if the next
1174
     * non-empty block reached through normal flow control is the target
1175
     * of that jump. If it is, then the jump instruction is redundant and
1176
     * can be deleted.
1177
     *
1178
     * Return the number of changes applied, or -1 on error.
1179
     */
1180
1181
10.6k
    int changes = 0;
1182
81.6k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1183
71.0k
        cfg_instr *last = basicblock_last_instr(b);
1184
71.0k
        if (last == NULL) {
1185
11.0k
            continue;
1186
11.0k
        }
1187
71.0k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1188
60.0k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1189
6.75k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1190
6.75k
            if (jump_target == NULL) {
1191
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1192
0
                return ERROR;
1193
0
            }
1194
6.75k
            basicblock *next = next_nonempty_block(b->b_next);
1195
6.75k
            if (jump_target == next) {
1196
238
                changes++;
1197
238
                INSTR_SET_OP0(last, NOP);
1198
238
            }
1199
6.75k
        }
1200
60.0k
    }
1201
1202
10.6k
    return changes;
1203
10.6k
}
1204
1205
static inline bool
1206
22.6k
basicblock_has_no_lineno(basicblock *b) {
1207
28.3k
    for (int i = 0; i < b->b_iused; i++) {
1208
25.8k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1209
20.1k
            return false;
1210
20.1k
        }
1211
25.8k
    }
1212
2.51k
    return true;
1213
22.6k
}
1214
1215
/* Maximum size of basic block that should be copied in optimizer */
1216
1.12k
#define MAX_COPY_SIZE 4
1217
1218
/* If this block ends with an unconditional jump to a small exit block or
1219
 * a block that has no line numbers (and no fallthrough), then
1220
 * remove the jump and extend this block with the target.
1221
 * Returns 1 if extended, 0 if no change, and -1 on error.
1222
 */
1223
static int
1224
30.3k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1225
30.3k
    cfg_instr *last = basicblock_last_instr(bb);
1226
30.3k
    if (last == NULL) {
1227
0
        return 0;
1228
0
    }
1229
30.3k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1230
26.2k
        return 0;
1231
26.2k
    }
1232
4.13k
    basicblock *target = last->i_target;
1233
4.13k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1234
1.12k
                             target->b_iused <= MAX_COPY_SIZE);
1235
4.13k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1236
973
                                     !BB_HAS_FALLTHROUGH(target));
1237
4.13k
    if (small_exit_block || no_lineno_no_fallthrough) {
1238
1.07k
        assert(is_jump(last));
1239
1.07k
        int removed_jump_opcode = last->i_opcode;
1240
1.07k
        INSTR_SET_OP0(last, NOP);
1241
1.07k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1242
1.07k
        if (no_lineno_no_fallthrough) {
1243
965
            last = basicblock_last_instr(bb);
1244
965
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1245
299
                removed_jump_opcode == JUMP)
1246
25
            {
1247
                /* Make sure we don't lose eval breaker checks */
1248
25
                last->i_opcode = JUMP;
1249
25
            }
1250
965
        }
1251
1.07k
        target->b_predecessors--;
1252
1.07k
        return 1;
1253
1.07k
    }
1254
3.05k
    return 0;
1255
4.13k
}
1256
1257
static int
1258
4.58k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1259
4.58k
    bool changes;
1260
5.09k
    do {
1261
5.09k
        changes = false;
1262
35.4k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1263
30.3k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1264
30.3k
            RETURN_IF_ERROR(res);
1265
30.3k
            if (res) {
1266
1.07k
                changes = true;
1267
1.07k
            }
1268
30.3k
        }
1269
5.09k
    } while(changes); /* every change removes a jump, ensuring convergence */
1270
4.58k
    return changes;
1271
4.58k
}
1272
1273
// Attempt to eliminate jumps to jumps by updating inst to jump to
1274
// target->i_target using the provided opcode. Return whether or not the
1275
// optimization was successful.
1276
static bool
1277
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1278
185
{
1279
185
    assert(is_jump(inst));
1280
185
    assert(is_jump(target));
1281
185
    assert(inst == basicblock_last_instr(bb));
1282
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1283
    // changes (and we fall into an infinite loop):
1284
185
    if (inst->i_target != target->i_target) {
1285
        /* Change inst to NOP and append a jump to target->i_target. The
1286
         * NOP will be removed later if it's not needed for the lineno.
1287
         */
1288
185
        INSTR_SET_OP0(inst, NOP);
1289
1290
185
        RETURN_IF_ERROR(
1291
185
            basicblock_add_jump(
1292
185
                bb, opcode, target->i_target, target->i_loc));
1293
1294
185
        return true;
1295
185
    }
1296
0
    return false;
1297
185
}
1298
1299
static int
1300
loads_const(int opcode)
1301
5.61k
{
1302
5.61k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1303
5.61k
}
1304
1305
/* Returns new reference */
1306
static PyObject*
1307
get_const_value(int opcode, int oparg, PyObject *co_consts)
1308
41.3k
{
1309
41.3k
    PyObject *constant = NULL;
1310
41.3k
    assert(loads_const(opcode));
1311
41.3k
    if (opcode == LOAD_CONST) {
1312
41.1k
        constant = PyList_GET_ITEM(co_consts, oparg);
1313
41.1k
    }
1314
41.3k
    if (opcode == LOAD_SMALL_INT) {
1315
205
        return PyLong_FromLong(oparg);
1316
205
    }
1317
1318
41.1k
    if (constant == NULL) {
1319
0
        PyErr_SetString(PyExc_SystemError,
1320
0
                        "Internal error: failed to get value of a constant");
1321
0
        return NULL;
1322
0
    }
1323
41.1k
    return Py_NewRef(constant);
1324
41.1k
}
1325
1326
// Steals a reference to newconst.
1327
static int
1328
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329
990
{
1330
990
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1331
0
        Py_DECREF(newconst);
1332
0
        return -1;
1333
0
    }
1334
1335
990
    Py_ssize_t index;
1336
24.0k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337
23.3k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1338
268
            break;
1339
268
        }
1340
23.3k
    }
1341
990
    if (index == PyList_GET_SIZE(consts)) {
1342
722
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1343
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1344
0
            Py_DECREF(newconst);
1345
0
            return -1;
1346
0
        }
1347
722
        if (PyList_Append(consts, newconst)) {
1348
0
            Py_DECREF(newconst);
1349
0
            return -1;
1350
0
        }
1351
722
    }
1352
990
    Py_DECREF(newconst);
1353
990
    return (int)index;
1354
990
}
1355
1356
/*
1357
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1358
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1359
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1360
1361
  Return boolean indicating whether "size" such instructions were found.
1362
*/
1363
static bool
1364
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1365
3.74k
{
1366
3.74k
    assert(start < bb->b_iused);
1367
3.74k
    assert(size >= 0);
1368
3.74k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1369
1370
6.63k
    for (; start >= 0 && size > 0; start--) {
1371
5.66k
        cfg_instr *instr = &bb->b_instr[start];
1372
5.66k
        if (instr->i_opcode == NOP) {
1373
47
            continue;
1374
47
        }
1375
5.61k
        if (!loads_const(instr->i_opcode)) {
1376
2.76k
            return false;
1377
2.76k
        }
1378
2.84k
        instrs[--size] = instr;
1379
2.84k
    }
1380
1381
972
    return size == 0;
1382
3.74k
}
1383
1384
/*
1385
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1386
  Caller must make sure "instrs" has at least "size" elements.
1387
*/
1388
static void
1389
nop_out(cfg_instr **instrs, int size)
1390
965
{
1391
3.00k
    for (int i = 0; i < size; i++) {
1392
2.04k
        cfg_instr *instr = instrs[i];
1393
2.04k
        assert(instr->i_opcode != NOP);
1394
2.04k
        INSTR_SET_OP0(instr, NOP);
1395
2.04k
        INSTR_SET_LOC(instr, NO_LOCATION);
1396
2.04k
    }
1397
965
}
1398
1399
/* Does not steal reference to "newconst".
1400
   Return 1 if changed instruction to LOAD_SMALL_INT.
1401
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1402
   Return -1 on error.
1403
*/
1404
static int
1405
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1406
                               PyObject *consts, PyObject *const_cache)
1407
39.4k
{
1408
39.4k
    if (PyLong_CheckExact(newconst)) {
1409
17.1k
        int overflow;
1410
17.1k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1411
17.1k
        if (val == -1 && PyErr_Occurred()) {
1412
0
            return -1;
1413
0
        }
1414
17.1k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1415
13.8k
            assert(_Py_IsImmortal(newconst));
1416
13.8k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1417
13.8k
            return 1;
1418
13.8k
        }
1419
17.1k
    }
1420
25.6k
    return 0;
1421
39.4k
}
1422
1423
1424
/* Steals reference to "newconst" */
1425
static int
1426
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427
                      PyObject *consts, PyObject *const_cache)
1428
848
{
1429
848
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1430
848
    if (res < 0) {
1431
0
        Py_DECREF(newconst);
1432
0
        return ERROR;
1433
0
    }
1434
848
    if (res > 0) {
1435
0
        return SUCCESS;
1436
0
    }
1437
848
    int oparg = add_const(newconst, consts, const_cache);
1438
848
    RETURN_IF_ERROR(oparg);
1439
848
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1440
848
    return SUCCESS;
1441
848
}
1442
1443
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1444
   with    LOAD_CONST (c1, c2, ... cn).
1445
   The consts table must still be in list form so that the
1446
   new constant (c1, c2, ... cn) can be appended.
1447
   Called with codestr pointing to the first LOAD_CONST.
1448
*/
1449
static int
1450
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1451
2.03k
{
1452
    /* Pre-conditions */
1453
2.03k
    assert(PyDict_CheckExact(const_cache));
1454
2.03k
    assert(PyList_CheckExact(consts));
1455
1456
2.03k
    cfg_instr *instr = &bb->b_instr[i];
1457
2.03k
    assert(instr->i_opcode == BUILD_TUPLE);
1458
1459
2.03k
    int seq_size = instr->i_oparg;
1460
2.03k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1461
0
        return SUCCESS;
1462
0
    }
1463
1464
2.03k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1465
2.03k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1466
        /* not a const sequence */
1467
1.30k
        return SUCCESS;
1468
1.30k
    }
1469
1470
726
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1471
726
    if (const_tuple == NULL) {
1472
0
        return ERROR;
1473
0
    }
1474
1475
1.76k
    for (int i = 0; i < seq_size; i++) {
1476
1.04k
        cfg_instr *inst = const_instrs[i];
1477
1.04k
        assert(loads_const(inst->i_opcode));
1478
1.04k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1479
1.04k
        if (element == NULL) {
1480
0
            Py_DECREF(const_tuple);
1481
0
            return ERROR;
1482
0
        }
1483
1.04k
        PyTuple_SET_ITEM(const_tuple, i, element);
1484
1.04k
    }
1485
1486
726
    nop_out(const_instrs, seq_size);
1487
726
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1488
726
}
1489
1490
/* Replace:
1491
    BUILD_LIST 0
1492
    LOAD_CONST c1
1493
    LIST_APPEND 1
1494
    LOAD_CONST c2
1495
    LIST_APPEND 1
1496
    ...
1497
    LOAD_CONST cN
1498
    LIST_APPEND 1
1499
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1500
   with:
1501
    LOAD_CONST (c1, c2, ... cN)
1502
*/
1503
static int
1504
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505
                                      PyObject *consts, PyObject *const_cache)
1506
48
{
1507
48
    assert(PyDict_CheckExact(const_cache));
1508
48
    assert(PyList_CheckExact(consts));
1509
48
    assert(i >= 0);
1510
48
    assert(i < bb->b_iused);
1511
1512
48
    cfg_instr *intrinsic = &bb->b_instr[i];
1513
48
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1514
48
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1515
1516
48
    int consts_found = 0;
1517
48
    bool expect_append = true;
1518
1519
53
    for (int pos = i - 1; pos >= 0; pos--) {
1520
53
        cfg_instr *instr = &bb->b_instr[pos];
1521
53
        int opcode = instr->i_opcode;
1522
53
        int oparg = instr->i_oparg;
1523
1524
53
        if (opcode == NOP) {
1525
0
            continue;
1526
0
        }
1527
1528
53
        if (opcode == BUILD_LIST && oparg == 0) {
1529
0
            if (!expect_append) {
1530
                /* Not a sequence start. */
1531
0
                return SUCCESS;
1532
0
            }
1533
1534
            /* Sequence start, we are done. */
1535
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1536
0
            if (newconst == NULL) {
1537
0
                return ERROR;
1538
0
            }
1539
1540
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1541
0
                instr = &bb->b_instr[newpos];
1542
0
                if (instr->i_opcode == NOP) {
1543
0
                    continue;
1544
0
                }
1545
0
                if (loads_const(instr->i_opcode)) {
1546
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1547
0
                    if (constant == NULL) {
1548
0
                        Py_DECREF(newconst);
1549
0
                        return ERROR;
1550
0
                    }
1551
0
                    assert(consts_found > 0);
1552
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1553
0
                }
1554
0
                nop_out(&instr, 1);
1555
0
            }
1556
0
            assert(consts_found == 0);
1557
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1558
0
        }
1559
1560
53
        if (expect_append) {
1561
48
            if (opcode != LIST_APPEND || oparg != 1) {
1562
43
                return SUCCESS;
1563
43
            }
1564
48
        }
1565
5
        else {
1566
5
            if (!loads_const(opcode)) {
1567
5
                return SUCCESS;
1568
5
            }
1569
0
            consts_found++;
1570
0
        }
1571
1572
5
        expect_append = !expect_append;
1573
5
    }
1574
1575
    /* Did not find sequence start. */
1576
0
    return SUCCESS;
1577
48
}
1578
1579
918
#define MIN_CONST_SEQUENCE_SIZE 3
1580
/*
1581
Optimize lists and sets for:
1582
    1. "for" loop, comprehension or "in"/"not in" tests:
1583
           Change literal list or set of constants into constant
1584
           tuple or frozenset respectively. Change list of
1585
           non-constants into tuple.
1586
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1587
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1588
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1589
           or BUILD_SET & SET_UPDATE respectively.
1590
*/
1591
static int
1592
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593
                        PyObject *consts, PyObject *const_cache)
1594
459
{
1595
459
    assert(PyDict_CheckExact(const_cache));
1596
459
    assert(PyList_CheckExact(consts));
1597
1598
459
    cfg_instr *instr = &bb->b_instr[i];
1599
459
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1600
1601
459
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1602
459
    int seq_size = instr->i_oparg;
1603
459
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1604
459
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1605
319
    {
1606
319
        return SUCCESS;
1607
319
    }
1608
1609
140
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1610
140
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1611
23
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1612
            /* iterate over a tuple instead of list */
1613
13
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1614
13
        }
1615
23
        return SUCCESS;
1616
23
    }
1617
1618
117
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1619
117
    if (const_result == NULL) {
1620
0
        return ERROR;
1621
0
    }
1622
1623
966
    for (int i = 0; i < seq_size; i++) {
1624
849
        cfg_instr *inst = const_instrs[i];
1625
849
        assert(loads_const(inst->i_opcode));
1626
849
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1627
849
        if (element == NULL) {
1628
0
            Py_DECREF(const_result);
1629
0
            return ERROR;
1630
0
        }
1631
849
        PyTuple_SET_ITEM(const_result, i, element);
1632
849
    }
1633
1634
117
    if (instr->i_opcode == BUILD_SET) {
1635
109
        PyObject *frozenset = PyFrozenSet_New(const_result);
1636
109
        if (frozenset == NULL) {
1637
0
            Py_DECREF(const_result);
1638
0
            return ERROR;
1639
0
        }
1640
109
        Py_SETREF(const_result, frozenset);
1641
109
    }
1642
1643
117
    int index = add_const(const_result, consts, const_cache);
1644
117
    RETURN_IF_ERROR(index);
1645
117
    nop_out(const_instrs, seq_size);
1646
1647
117
    if (contains_or_iter) {
1648
104
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1649
104
    }
1650
13
    else {
1651
13
        assert(i >= 2);
1652
13
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1653
1654
13
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1655
1656
13
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1657
13
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1658
13
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1659
13
    }
1660
117
    return SUCCESS;
1661
117
}
1662
1663
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1664
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1665
 * Used to avoid creating constants which are slow to hash.
1666
 */
1667
static Py_ssize_t
1668
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1669
0
{
1670
0
    if (PyTuple_Check(obj)) {
1671
0
        Py_ssize_t i;
1672
0
        limit -= PyTuple_GET_SIZE(obj);
1673
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1674
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1675
0
            if (limit < 0) {
1676
0
                return limit;
1677
0
            }
1678
0
        }
1679
0
    }
1680
0
    return limit;
1681
0
}
1682
1683
67
#define MAX_INT_SIZE           128  /* bits */
1684
0
#define MAX_COLLECTION_SIZE    256  /* items */
1685
2
#define MAX_STR_SIZE          4096  /* characters */
1686
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1687
1688
static PyObject *
1689
const_folding_safe_multiply(PyObject *v, PyObject *w)
1690
12
{
1691
12
    if (PyLong_Check(v) && PyLong_Check(w) &&
1692
8
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1693
12
    ) {
1694
8
        int64_t vbits = _PyLong_NumBits(v);
1695
8
        int64_t wbits = _PyLong_NumBits(w);
1696
8
        assert(vbits >= 0);
1697
8
        assert(wbits >= 0);
1698
8
        if (vbits + wbits > MAX_INT_SIZE) {
1699
0
            return NULL;
1700
0
        }
1701
8
    }
1702
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1703
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1704
0
        if (size) {
1705
0
            long n = PyLong_AsLong(v);
1706
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1707
0
                return NULL;
1708
0
            }
1709
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1710
0
                return NULL;
1711
0
            }
1712
0
        }
1713
0
    }
1714
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1715
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1716
2
                                               PyBytes_GET_SIZE(w);
1717
2
        if (size) {
1718
2
            long n = PyLong_AsLong(v);
1719
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1720
0
                return NULL;
1721
0
            }
1722
2
        }
1723
2
    }
1724
2
    else if (PyLong_Check(w) &&
1725
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1726
2
    {
1727
2
        return const_folding_safe_multiply(w, v);
1728
2
    }
1729
1730
10
    return PyNumber_Multiply(v, w);
1731
12
}
1732
1733
static PyObject *
1734
const_folding_safe_power(PyObject *v, PyObject *w)
1735
2
{
1736
2
    if (PyLong_Check(v) && PyLong_Check(w) &&
1737
2
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1738
2
    ) {
1739
2
        int64_t vbits = _PyLong_NumBits(v);
1740
2
        size_t wbits = PyLong_AsSize_t(w);
1741
2
        assert(vbits >= 0);
1742
2
        if (wbits == (size_t)-1) {
1743
0
            return NULL;
1744
0
        }
1745
2
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1746
0
            return NULL;
1747
0
        }
1748
2
    }
1749
1750
2
    return PyNumber_Power(v, w, Py_None);
1751
2
}
1752
1753
static PyObject *
1754
const_folding_safe_lshift(PyObject *v, PyObject *w)
1755
19
{
1756
19
    if (PyLong_Check(v) && PyLong_Check(w) &&
1757
19
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1758
19
    ) {
1759
19
        int64_t vbits = _PyLong_NumBits(v);
1760
19
        size_t wbits = PyLong_AsSize_t(w);
1761
19
        assert(vbits >= 0);
1762
19
        if (wbits == (size_t)-1) {
1763
0
            return NULL;
1764
0
        }
1765
19
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1766
0
            return NULL;
1767
0
        }
1768
19
    }
1769
1770
19
    return PyNumber_Lshift(v, w);
1771
19
}
1772
1773
static PyObject *
1774
const_folding_safe_mod(PyObject *v, PyObject *w)
1775
0
{
1776
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1777
0
        return NULL;
1778
0
    }
1779
1780
0
    return PyNumber_Remainder(v, w);
1781
0
}
1782
1783
static PyObject *
1784
eval_const_binop(PyObject *left, int op, PyObject *right)
1785
32
{
1786
32
    assert(left != NULL && right != NULL);
1787
32
    assert(op >= 0 && op <= NB_OPARG_LAST);
1788
1789
32
    PyObject *result = NULL;
1790
32
    switch (op) {
1791
1
        case NB_ADD:
1792
1
            result = PyNumber_Add(left, right);
1793
1
            break;
1794
0
        case NB_SUBTRACT:
1795
0
            result = PyNumber_Subtract(left, right);
1796
0
            break;
1797
10
        case NB_MULTIPLY:
1798
10
            result = const_folding_safe_multiply(left, right);
1799
10
            break;
1800
0
        case NB_TRUE_DIVIDE:
1801
0
            result = PyNumber_TrueDivide(left, right);
1802
0
            break;
1803
0
        case NB_FLOOR_DIVIDE:
1804
0
            result = PyNumber_FloorDivide(left, right);
1805
0
            break;
1806
0
        case NB_REMAINDER:
1807
0
            result = const_folding_safe_mod(left, right);
1808
0
            break;
1809
2
        case NB_POWER:
1810
2
            result = const_folding_safe_power(left, right);
1811
2
            break;
1812
19
        case NB_LSHIFT:
1813
19
            result = const_folding_safe_lshift(left, right);
1814
19
            break;
1815
0
        case NB_RSHIFT:
1816
0
            result = PyNumber_Rshift(left, right);
1817
0
            break;
1818
0
        case NB_OR:
1819
0
            result = PyNumber_Or(left, right);
1820
0
            break;
1821
0
        case NB_XOR:
1822
0
            result = PyNumber_Xor(left, right);
1823
0
            break;
1824
0
        case NB_AND:
1825
0
            result = PyNumber_And(left, right);
1826
0
            break;
1827
0
        case NB_SUBSCR:
1828
0
            result = PyObject_GetItem(left, right);
1829
0
            break;
1830
0
        case NB_MATRIX_MULTIPLY:
1831
            // No builtin constants implement matrix multiplication
1832
0
            break;
1833
0
        default:
1834
0
            Py_UNREACHABLE();
1835
32
    }
1836
32
    return result;
1837
32
}
1838
1839
static int
1840
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1841
1.45k
{
1842
1.48k
    #define BINOP_OPERAND_COUNT 2
1843
1.45k
    assert(PyDict_CheckExact(const_cache));
1844
1.45k
    assert(PyList_CheckExact(consts));
1845
1846
1.45k
    cfg_instr *binop = &bb->b_instr[i];
1847
1.45k
    assert(binop->i_opcode == BINARY_OP);
1848
1849
1.45k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1850
1.45k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1851
        /* not a const sequence */
1852
1.42k
        return SUCCESS;
1853
1.42k
    }
1854
1855
32
    cfg_instr *lhs_instr = operands_instrs[0];
1856
32
    assert(loads_const(lhs_instr->i_opcode));
1857
32
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1858
32
    if (lhs == NULL) {
1859
0
        return ERROR;
1860
0
    }
1861
1862
32
    cfg_instr *rhs_instr = operands_instrs[1];
1863
32
    assert(loads_const(rhs_instr->i_opcode));
1864
32
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1865
32
    if (rhs == NULL) {
1866
0
        Py_DECREF(lhs);
1867
0
        return ERROR;
1868
0
    }
1869
1870
32
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1871
32
    Py_DECREF(lhs);
1872
32
    Py_DECREF(rhs);
1873
32
    if (newconst == NULL) {
1874
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1875
0
            return ERROR;
1876
0
        }
1877
0
        PyErr_Clear();
1878
0
        return SUCCESS;
1879
0
    }
1880
1881
32
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882
32
    return instr_make_load_const(binop, newconst, consts, const_cache);
1883
32
}
1884
1885
static PyObject *
1886
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1887
90
{
1888
90
    assert(operand != NULL);
1889
90
    assert(
1890
90
        opcode == UNARY_NEGATIVE ||
1891
90
        opcode == UNARY_INVERT ||
1892
90
        opcode == UNARY_NOT ||
1893
90
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1894
90
    );
1895
90
    PyObject *result;
1896
90
    switch (opcode) {
1897
90
        case UNARY_NEGATIVE:
1898
90
            result = PyNumber_Negative(operand);
1899
90
            break;
1900
0
        case UNARY_INVERT:
1901
            // XXX: This should be removed once the ~bool depreciation expires.
1902
0
            if (PyBool_Check(operand)) {
1903
0
                return NULL;
1904
0
            }
1905
0
            result = PyNumber_Invert(operand);
1906
0
            break;
1907
0
        case UNARY_NOT: {
1908
0
            int r = PyObject_IsTrue(operand);
1909
0
            if (r < 0) {
1910
0
                return NULL;
1911
0
            }
1912
0
            result = PyBool_FromLong(!r);
1913
0
            break;
1914
0
        }
1915
0
        case CALL_INTRINSIC_1:
1916
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1917
0
                Py_UNREACHABLE();
1918
0
            }
1919
0
            result = PyNumber_Positive(operand);
1920
0
            break;
1921
0
        default:
1922
0
            Py_UNREACHABLE();
1923
90
    }
1924
90
    return result;
1925
90
}
1926
1927
static int
1928
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1929
111
{
1930
201
    #define UNARYOP_OPERAND_COUNT 1
1931
111
    assert(PyDict_CheckExact(const_cache));
1932
111
    assert(PyList_CheckExact(consts));
1933
111
    cfg_instr *unaryop = &bb->b_instr[i];
1934
1935
111
    cfg_instr *operand_instr;
1936
111
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1937
        /* not a const */
1938
21
        return SUCCESS;
1939
21
    }
1940
1941
111
    assert(loads_const(operand_instr->i_opcode));
1942
90
    PyObject *operand = get_const_value(
1943
90
        operand_instr->i_opcode,
1944
90
        operand_instr->i_oparg,
1945
90
        consts
1946
90
    );
1947
90
    if (operand == NULL) {
1948
0
        return ERROR;
1949
0
    }
1950
1951
90
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1952
90
    Py_DECREF(operand);
1953
90
    if (newconst == NULL) {
1954
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1955
0
            return ERROR;
1956
0
        }
1957
0
        PyErr_Clear();
1958
0
        return SUCCESS;
1959
0
    }
1960
1961
90
    if (unaryop->i_opcode == UNARY_NOT) {
1962
0
        assert(PyBool_Check(newconst));
1963
0
    }
1964
90
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965
90
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1966
90
}
1967
1968
1.92k
#define VISITED (-1)
1969
1970
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1971
// same effect.
1972
static int
1973
swaptimize(basicblock *block, int *ix)
1974
566
{
1975
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1976
    // for this function. Make sure to blow away cached *.pyc files first!
1977
566
    assert(*ix < block->b_iused);
1978
566
    cfg_instr *instructions = &block->b_instr[*ix];
1979
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1980
    // maximum depth of the stack manipulations:
1981
566
    assert(instructions[0].i_opcode == SWAP);
1982
566
    int depth = instructions[0].i_oparg;
1983
566
    int len = 0;
1984
566
    int more = false;
1985
566
    int limit = block->b_iused - *ix;
1986
734
    while (++len < limit) {
1987
734
        int opcode = instructions[len].i_opcode;
1988
734
        if (opcode == SWAP) {
1989
148
            depth = Py_MAX(depth, instructions[len].i_oparg);
1990
148
            more = true;
1991
148
        }
1992
586
        else if (opcode != NOP) {
1993
566
            break;
1994
566
        }
1995
734
    }
1996
    // It's already optimal if there's only one SWAP:
1997
566
    if (!more) {
1998
418
        return SUCCESS;
1999
418
    }
2000
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2001
148
    int *stack = PyMem_Malloc(depth * sizeof(int));
2002
148
    if (stack == NULL) {
2003
0
        PyErr_NoMemory();
2004
0
        return ERROR;
2005
0
    }
2006
592
    for (int i = 0; i < depth; i++) {
2007
444
        stack[i] = i;
2008
444
    }
2009
    // Simulate the combined effect of these instructions by "running" them on
2010
    // our "stack":
2011
444
    for (int i = 0; i < len; i++) {
2012
296
        if (instructions[i].i_opcode == SWAP) {
2013
296
            int oparg = instructions[i].i_oparg;
2014
296
            int top = stack[0];
2015
            // SWAPs are 1-indexed:
2016
296
            stack[0] = stack[oparg - 1];
2017
296
            stack[oparg - 1] = top;
2018
296
        }
2019
296
    }
2020
    // Now we can begin! Our approach here is based on a solution to a closely
2021
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2022
    // think of this algorithm as determining the steps needed to efficiently
2023
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2024
    // though, we can efficiently *shuffle* it! For this reason, we will be
2025
    // replacing instructions starting from the *end* of the run. Since the
2026
    // solution is optimal, we don't need to worry about running out of space:
2027
148
    int current = len - 1;
2028
592
    for (int i = 0; i < depth; i++) {
2029
        // Skip items that have already been visited, or just happen to be in
2030
        // the correct location:
2031
444
        if (stack[i] == VISITED || stack[i] == i) {
2032
296
            continue;
2033
296
        }
2034
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2035
        // with other items; traversing the cycle and swapping each item with
2036
        // the next will put them all in the correct place. The weird
2037
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2038
        // can only swap from that position:
2039
148
        int j = i;
2040
592
        while (true) {
2041
            // Skip the actual swap if our item is zero, since swapping the top
2042
            // item with itself is pointless:
2043
592
            if (j) {
2044
296
                assert(0 <= current);
2045
                // SWAPs are 1-indexed:
2046
296
                instructions[current].i_opcode = SWAP;
2047
296
                instructions[current--].i_oparg = j + 1;
2048
296
            }
2049
592
            if (stack[j] == VISITED) {
2050
                // Completed the cycle:
2051
148
                assert(j == i);
2052
148
                break;
2053
148
            }
2054
444
            int next_j = stack[j];
2055
444
            stack[j] = VISITED;
2056
444
            j = next_j;
2057
444
        }
2058
148
    }
2059
    // NOP out any unused instructions:
2060
148
    while (0 <= current) {
2061
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2062
0
    }
2063
148
    PyMem_Free(stack);
2064
148
    *ix += len - 1;
2065
148
    return SUCCESS;
2066
148
}
2067
2068
2069
// This list is pretty small, since it's only okay to reorder opcodes that:
2070
// - can't affect control flow (like jumping or raising exceptions)
2071
// - can't invoke arbitrary code (besides finalizers)
2072
// - only touch the TOS (and pop it when finished)
2073
#define SWAPPABLE(opcode) \
2074
877
    ((opcode) == STORE_FAST || \
2075
877
     (opcode) == STORE_FAST_MAYBE_NULL || \
2076
877
     (opcode) == POP_TOP)
2077
2078
#define STORES_TO(instr) \
2079
165
    (((instr).i_opcode == STORE_FAST || \
2080
165
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2081
165
     ? (instr).i_oparg : -1)
2082
2083
static int
2084
next_swappable_instruction(basicblock *block, int i, int lineno)
2085
839
{
2086
859
    while (++i < block->b_iused) {
2087
856
        cfg_instr *instruction = &block->b_instr[i];
2088
856
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2089
            // Optimizing across this instruction could cause user-visible
2090
            // changes in the names bound between line tracing events!
2091
4
            return -1;
2092
4
        }
2093
852
        if (instruction->i_opcode == NOP) {
2094
20
            continue;
2095
20
        }
2096
832
        if (SWAPPABLE(instruction->i_opcode)) {
2097
349
            return i;
2098
349
        }
2099
483
        return -1;
2100
832
    }
2101
3
    return -1;
2102
839
}
2103
2104
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2105
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2106
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2107
static void
2108
apply_static_swaps(basicblock *block, int i)
2109
566
{
2110
    // SWAPs are to our left, and potential swaperands are to our right:
2111
687
    for (; 0 <= i; i--) {
2112
656
        assert(i < block->b_iused);
2113
656
        cfg_instr *swap = &block->b_instr[i];
2114
656
        if (swap->i_opcode != SWAP) {
2115
90
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2116
                // Nope, but we know how to handle these. Keep looking:
2117
45
                continue;
2118
45
            }
2119
            // We can't reason about what this instruction does. Bail:
2120
45
            return;
2121
90
        }
2122
566
        int j = next_swappable_instruction(block, i, -1);
2123
566
        if (j < 0) {
2124
336
            return;
2125
336
        }
2126
230
        int k = j;
2127
230
        int lineno = block->b_instr[j].i_loc.lineno;
2128
349
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2129
273
            k = next_swappable_instruction(block, k, lineno);
2130
273
            if (k < 0) {
2131
154
                return;
2132
154
            }
2133
273
        }
2134
        // The reordering is not safe if the two instructions to be swapped
2135
        // store to the same location, or if any intervening instruction stores
2136
        // to the same location as either of them.
2137
76
        int store_j = STORES_TO(block->b_instr[j]);
2138
76
        int store_k = STORES_TO(block->b_instr[k]);
2139
76
        if (store_j >= 0 || store_k >= 0) {
2140
76
            if (store_j == store_k) {
2141
0
                return;
2142
0
            }
2143
89
            for (int idx = j + 1; idx < k; idx++) {
2144
13
                int store_idx = STORES_TO(block->b_instr[idx]);
2145
13
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2146
0
                    return;
2147
0
                }
2148
13
            }
2149
76
        }
2150
2151
        // Success!
2152
76
        INSTR_SET_OP0(swap, NOP);
2153
76
        cfg_instr temp = block->b_instr[j];
2154
76
        block->b_instr[j] = block->b_instr[k];
2155
76
        block->b_instr[k] = temp;
2156
76
    }
2157
566
}
2158
2159
static int
2160
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2161
21.7k
{
2162
21.7k
    assert(PyDict_CheckExact(const_cache));
2163
21.7k
    assert(PyList_CheckExact(consts));
2164
21.7k
    int opcode = 0;
2165
21.7k
    int oparg = 0;
2166
210k
    for (int i = 0; i < bb->b_iused; i++) {
2167
189k
        cfg_instr *inst = &bb->b_instr[i];
2168
189k
        if (inst->i_opcode == LOAD_CONST) {
2169
38.5k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2170
38.5k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2171
38.5k
            Py_DECREF(constant);
2172
38.5k
            if (res < 0) {
2173
0
                return ERROR;
2174
0
            }
2175
38.5k
        }
2176
189k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2177
24.5k
                                      inst->i_opcode == COPY &&
2178
12
                                      inst->i_oparg == 1);
2179
189k
        if (! is_copy_of_load_const) {
2180
189k
            opcode = inst->i_opcode;
2181
189k
            oparg = inst->i_oparg;
2182
189k
        }
2183
189k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2184
189k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2185
150k
            continue;
2186
150k
        }
2187
38.5k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2188
38.5k
        switch(nextop) {
2189
25
            case POP_JUMP_IF_FALSE:
2190
25
            case POP_JUMP_IF_TRUE:
2191
25
            case JUMP_IF_FALSE:
2192
25
            case JUMP_IF_TRUE:
2193
25
            {
2194
                /* Remove LOAD_CONST const; conditional jump */
2195
25
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2196
25
                if (cnt == NULL) {
2197
0
                    return ERROR;
2198
0
                }
2199
25
                int is_true = PyObject_IsTrue(cnt);
2200
25
                Py_DECREF(cnt);
2201
25
                if (is_true == -1) {
2202
0
                    return ERROR;
2203
0
                }
2204
25
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2205
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2206
25
                    INSTR_SET_OP0(inst, NOP);
2207
25
                }
2208
25
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2209
25
                if (is_true == jump_if_true) {
2210
0
                    bb->b_instr[i+1].i_opcode = JUMP;
2211
0
                }
2212
25
                else {
2213
25
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2214
25
                }
2215
25
                break;
2216
25
            }
2217
689
            case IS_OP:
2218
689
            {
2219
                // Fold to POP_JUMP_IF_NONE:
2220
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2223
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2224
                // Fold to POP_JUMP_IF_NOT_NONE:
2225
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2226
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2227
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2228
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2229
689
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2230
689
                if (cnt == NULL) {
2231
0
                    return ERROR;
2232
0
                }
2233
689
                if (!Py_IsNone(cnt)) {
2234
15
                    Py_DECREF(cnt);
2235
15
                    break;
2236
15
                }
2237
674
                if (bb->b_iused <= i + 2) {
2238
1
                    break;
2239
1
                }
2240
673
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2241
673
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2242
                // Get rid of TO_BOOL regardless:
2243
673
                if (jump_instr->i_opcode == TO_BOOL) {
2244
658
                    INSTR_SET_OP0(jump_instr, NOP);
2245
658
                    if (bb->b_iused <= i + 3) {
2246
0
                        break;
2247
0
                    }
2248
658
                    jump_instr = &bb->b_instr[i + 3];
2249
658
                }
2250
673
                bool invert = is_instr->i_oparg;
2251
673
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2252
625
                    invert = !invert;
2253
625
                }
2254
48
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2255
15
                    break;
2256
15
                }
2257
658
                INSTR_SET_OP0(inst, NOP);
2258
658
                INSTR_SET_OP0(is_instr, NOP);
2259
658
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2260
658
                                              : POP_JUMP_IF_NONE;
2261
658
                break;
2262
673
            }
2263
25
            case TO_BOOL:
2264
25
            {
2265
25
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2266
25
                if (cnt == NULL) {
2267
0
                    return ERROR;
2268
0
                }
2269
25
                int is_true = PyObject_IsTrue(cnt);
2270
25
                Py_DECREF(cnt);
2271
25
                if (is_true == -1) {
2272
0
                    return ERROR;
2273
0
                }
2274
25
                cnt = PyBool_FromLong(is_true);
2275
25
                int index = add_const(cnt, consts, const_cache);
2276
25
                if (index < 0) {
2277
0
                    return ERROR;
2278
0
                }
2279
25
                INSTR_SET_OP0(inst, NOP);
2280
25
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2281
25
                break;
2282
25
            }
2283
38.5k
        }
2284
38.5k
    }
2285
21.7k
    return SUCCESS;
2286
21.7k
}
2287
2288
static int
2289
4.58k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2290
26.3k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291
21.7k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2292
21.7k
    }
2293
4.58k
    return SUCCESS;
2294
4.58k
}
2295
2296
static int
2297
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2298
21.7k
{
2299
21.7k
    assert(PyDict_CheckExact(const_cache));
2300
21.7k
    assert(PyList_CheckExact(consts));
2301
21.7k
    cfg_instr nop;
2302
21.7k
    INSTR_SET_OP0(&nop, NOP);
2303
211k
    for (int i = 0; i < bb->b_iused; i++) {
2304
189k
        cfg_instr *inst = &bb->b_instr[i];
2305
189k
        cfg_instr *target;
2306
189k
        int opcode = inst->i_opcode;
2307
189k
        int oparg = inst->i_oparg;
2308
189k
        if (HAS_TARGET(opcode)) {
2309
9.20k
            assert(inst->i_target->b_iused > 0);
2310
9.20k
            target = &inst->i_target->b_instr[0];
2311
9.20k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2312
9.20k
        }
2313
180k
        else {
2314
180k
            target = &nop;
2315
180k
        }
2316
189k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2317
189k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2318
189k
        switch (opcode) {
2319
            /* Try to fold tuples of constants.
2320
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2321
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2322
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2323
2.08k
            case BUILD_TUPLE:
2324
2.08k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2325
54
                    switch(oparg) {
2326
0
                        case 1:
2327
0
                            INSTR_SET_OP0(inst, NOP);
2328
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2329
0
                            continue;
2330
45
                        case 2:
2331
53
                        case 3:
2332
53
                            INSTR_SET_OP0(inst, NOP);
2333
53
                            bb->b_instr[i+1].i_opcode = SWAP;
2334
53
                            continue;
2335
54
                    }
2336
54
                }
2337
2.03k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2338
2.03k
                break;
2339
324
            case BUILD_LIST:
2340
459
            case BUILD_SET:
2341
459
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2342
459
                break;
2343
250
            case POP_JUMP_IF_NOT_NONE:
2344
674
            case POP_JUMP_IF_NONE:
2345
674
                switch (target->i_opcode) {
2346
16
                    case JUMP:
2347
16
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2348
674
                }
2349
674
                break;
2350
3.04k
            case POP_JUMP_IF_FALSE:
2351
3.04k
                switch (target->i_opcode) {
2352
136
                    case JUMP:
2353
136
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2354
3.04k
                }
2355
3.04k
                break;
2356
3.04k
            case POP_JUMP_IF_TRUE:
2357
861
                switch (target->i_opcode) {
2358
33
                    case JUMP:
2359
33
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2360
861
                }
2361
861
                break;
2362
861
            case JUMP_IF_FALSE:
2363
507
                switch (target->i_opcode) {
2364
0
                    case JUMP:
2365
0
                    case JUMP_IF_FALSE:
2366
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2367
0
                        continue;
2368
2
                    case JUMP_IF_TRUE:
2369
                        // No need to check for loops here, a block's b_next
2370
                        // cannot point to itself.
2371
2
                        assert(inst->i_target != inst->i_target->b_next);
2372
2
                        inst->i_target = inst->i_target->b_next;
2373
2
                        i--;
2374
2
                        continue;
2375
507
                }
2376
505
                break;
2377
505
            case JUMP_IF_TRUE:
2378
54
                switch (target->i_opcode) {
2379
0
                    case JUMP:
2380
0
                    case JUMP_IF_TRUE:
2381
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2382
0
                        continue;
2383
0
                    case JUMP_IF_FALSE:
2384
                        // No need to check for loops here, a block's b_next
2385
                        // cannot point to itself.
2386
0
                        assert(inst->i_target != inst->i_target->b_next);
2387
0
                        inst->i_target = inst->i_target->b_next;
2388
0
                        i--;
2389
0
                        continue;
2390
54
                }
2391
54
                break;
2392
856
            case JUMP:
2393
1.83k
            case JUMP_NO_INTERRUPT:
2394
1.83k
                switch (target->i_opcode) {
2395
0
                    case JUMP:
2396
0
                        i -= jump_thread(bb, inst, target, JUMP);
2397
0
                        continue;
2398
0
                    case JUMP_NO_INTERRUPT:
2399
0
                        i -= jump_thread(bb, inst, target, opcode);
2400
0
                        continue;
2401
1.83k
                }
2402
1.83k
                break;
2403
1.83k
            case FOR_ITER:
2404
445
                if (target->i_opcode == JUMP) {
2405
                    /* This will not work now because the jump (at target) could
2406
                     * be forward or backward and FOR_ITER only jumps forward. We
2407
                     * can re-enable this if ever we implement a backward version
2408
                     * of FOR_ITER.
2409
                     */
2410
                    /*
2411
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2412
                    */
2413
0
                }
2414
445
                break;
2415
4.41k
            case STORE_FAST:
2416
4.41k
                if (opcode == nextop &&
2417
371
                    oparg == bb->b_instr[i+1].i_oparg &&
2418
19
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2419
19
                    bb->b_instr[i].i_opcode = POP_TOP;
2420
19
                    bb->b_instr[i].i_oparg = 0;
2421
19
                }
2422
4.41k
                break;
2423
714
            case SWAP:
2424
714
                if (oparg == 1) {
2425
0
                    INSTR_SET_OP0(inst, NOP);
2426
0
                }
2427
714
                break;
2428
7.03k
            case LOAD_GLOBAL:
2429
7.03k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2430
2.79k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2431
2.79k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
2.79k
                }
2433
7.03k
                break;
2434
2.11k
            case COMPARE_OP:
2435
2.11k
                if (nextop == TO_BOOL) {
2436
725
                    INSTR_SET_OP0(inst, NOP);
2437
725
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2438
725
                    continue;
2439
725
                }
2440
1.38k
                break;
2441
1.38k
            case CONTAINS_OP:
2442
1.43k
            case IS_OP:
2443
1.43k
                if (nextop == TO_BOOL) {
2444
690
                    INSTR_SET_OP0(inst, NOP);
2445
690
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2446
690
                    continue;
2447
690
                }
2448
745
                if (nextop == UNARY_NOT) {
2449
0
                    INSTR_SET_OP0(inst, NOP);
2450
0
                    int inverted = oparg ^ 1;
2451
0
                    assert(inverted == 0 || inverted == 1);
2452
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2453
0
                    continue;
2454
0
                }
2455
745
                break;
2456
1.88k
            case TO_BOOL:
2457
1.88k
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    continue;
2460
0
                }
2461
1.88k
                break;
2462
1.88k
            case UNARY_NOT:
2463
17
                if (nextop == TO_BOOL) {
2464
0
                    INSTR_SET_OP0(inst, NOP);
2465
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2466
0
                    continue;
2467
0
                }
2468
17
                if (nextop == UNARY_NOT) {
2469
0
                    INSTR_SET_OP0(inst, NOP);
2470
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2471
0
                    continue;
2472
0
                }
2473
17
                _Py_FALLTHROUGH;
2474
19
            case UNARY_INVERT:
2475
111
            case UNARY_NEGATIVE:
2476
111
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2477
111
                break;
2478
317
            case CALL_INTRINSIC_1:
2479
317
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2480
48
                    if (nextop == GET_ITER) {
2481
0
                        INSTR_SET_OP0(inst, NOP);
2482
0
                    }
2483
48
                    else {
2484
48
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2485
48
                    }
2486
48
                }
2487
269
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2489
0
                }
2490
317
                break;
2491
1.45k
            case BINARY_OP:
2492
1.45k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2493
1.45k
                break;
2494
189k
        }
2495
189k
    }
2496
2497
210k
    for (int i = 0; i < bb->b_iused; i++) {
2498
189k
        cfg_instr *inst = &bb->b_instr[i];
2499
189k
        if (inst->i_opcode == SWAP) {
2500
566
            if (swaptimize(bb, &i) < 0) {
2501
0
                goto error;
2502
0
            }
2503
566
            apply_static_swaps(bb, i);
2504
566
        }
2505
189k
    }
2506
21.7k
    return SUCCESS;
2507
0
error:
2508
0
    return ERROR;
2509
21.7k
}
2510
2511
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2512
2513
static int
2514
remove_redundant_nops_and_jumps(cfg_builder *g)
2515
9.74k
{
2516
9.74k
    int removed_nops, removed_jumps;
2517
10.6k
    do {
2518
        /* Convergence is guaranteed because the number of
2519
         * redundant jumps and nops only decreases.
2520
         */
2521
10.6k
        removed_nops = remove_redundant_nops(g);
2522
10.6k
        RETURN_IF_ERROR(removed_nops);
2523
10.6k
        removed_jumps = remove_redundant_jumps(g);
2524
10.6k
        RETURN_IF_ERROR(removed_jumps);
2525
10.6k
    } while(removed_nops + removed_jumps > 0);
2526
9.74k
    return SUCCESS;
2527
9.74k
}
2528
2529
/* Perform optimizations on a control flow graph.
2530
   The consts object should still be in list form to allow new constants
2531
   to be appended.
2532
2533
   Code trasnformations that reduce code size initially fill the gaps with
2534
   NOPs.  Later those NOPs are removed.
2535
*/
2536
static int
2537
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2538
4.58k
{
2539
4.58k
    assert(PyDict_CheckExact(const_cache));
2540
4.58k
    RETURN_IF_ERROR(check_cfg(g));
2541
4.58k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2542
4.58k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2543
4.58k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544
4.58k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2545
26.3k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546
21.7k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2547
21.7k
    }
2548
4.58k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2549
4.58k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2550
4.58k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2551
4.58k
    assert(no_redundant_jumps(g));
2552
4.58k
    return SUCCESS;
2553
4.58k
}
2554
2555
static void
2556
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2557
4.96k
{
2558
4.96k
    int32_t line1 = inst1->i_loc.lineno;
2559
4.96k
    int32_t line2 = inst2->i_loc.lineno;
2560
    /* Skip if instructions are on different lines */
2561
4.96k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2562
1.65k
        return;
2563
1.65k
    }
2564
3.30k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2565
235
        return;
2566
235
    }
2567
3.07k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2568
3.07k
    INSTR_SET_OP0(inst2, NOP);
2569
3.07k
}
2570
2571
static int
2572
insert_superinstructions(cfg_builder *g)
2573
4.58k
{
2574
26.3k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2575
2576
198k
        for (int i = 0; i < b->b_iused; i++) {
2577
177k
            cfg_instr *inst = &b->b_instr[i];
2578
177k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2579
177k
            switch(inst->i_opcode) {
2580
20.7k
                case LOAD_FAST:
2581
20.7k
                    if (nextop == LOAD_FAST) {
2582
3.04k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2583
3.04k
                    }
2584
20.7k
                    break;
2585
4.15k
                case STORE_FAST:
2586
4.15k
                    switch (nextop) {
2587
1.63k
                        case LOAD_FAST:
2588
1.63k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2589
1.63k
                            break;
2590
288
                        case STORE_FAST:
2591
288
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2592
288
                            break;
2593
4.15k
                    }
2594
4.15k
                    break;
2595
177k
            }
2596
177k
        }
2597
21.7k
    }
2598
4.58k
    int res = remove_redundant_nops(g);
2599
4.58k
    assert(no_redundant_nops(g));
2600
4.58k
    return res;
2601
4.58k
}
2602
2603
#define NOT_LOCAL -1
2604
3.99k
#define DUMMY_INSTR -1
2605
2606
typedef struct {
2607
    // Index of instruction that produced the reference or DUMMY_INSTR.
2608
    int instr;
2609
2610
    // The local to which the reference refers or NOT_LOCAL.
2611
    int local;
2612
} ref;
2613
2614
typedef struct {
2615
    ref *refs;
2616
    Py_ssize_t size;
2617
    Py_ssize_t capacity;
2618
} ref_stack;
2619
2620
static int
2621
ref_stack_push(ref_stack *stack, ref r)
2622
140k
{
2623
140k
    if (stack->size == stack->capacity) {
2624
4.59k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2625
4.59k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2626
4.59k
        if (refs == NULL) {
2627
0
            PyErr_NoMemory();
2628
0
            return -1;
2629
0
        }
2630
4.59k
        stack->refs = refs;
2631
4.59k
        stack->capacity = new_cap;
2632
4.59k
    }
2633
140k
    stack->refs[stack->size] = r;
2634
140k
    stack->size++;
2635
140k
    return 0;
2636
140k
}
2637
2638
static ref
2639
ref_stack_pop(ref_stack *stack)
2640
125k
{
2641
125k
    assert(stack->size > 0);
2642
125k
    stack->size--;
2643
125k
    ref r = stack->refs[stack->size];
2644
125k
    return r;
2645
125k
}
2646
2647
static void
2648
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2649
501
{
2650
501
    Py_ssize_t idx = stack->size - off;
2651
501
    assert(idx >= 0 && idx < stack->size);
2652
501
    ref tmp = stack->refs[idx];
2653
501
    stack->refs[idx] = stack->refs[stack->size - 1];
2654
501
    stack->refs[stack->size - 1] = tmp;
2655
501
}
2656
2657
static ref
2658
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2659
18.7k
{
2660
18.7k
    assert(idx >= 0 && idx < stack->size);
2661
18.7k
    return stack->refs[idx];
2662
18.7k
}
2663
2664
static void
2665
ref_stack_clear(ref_stack *stack)
2666
15.2k
{
2667
15.2k
    stack->size = 0;
2668
15.2k
}
2669
2670
static void
2671
ref_stack_fini(ref_stack *stack)
2672
4.58k
{
2673
4.58k
    if (stack->refs != NULL) {
2674
4.58k
        PyMem_Free(stack->refs);
2675
4.58k
    }
2676
4.58k
    stack->refs = NULL;
2677
4.58k
    stack->capacity = 0;
2678
4.58k
    stack->size = 0;
2679
4.58k
}
2680
2681
typedef enum {
2682
    // The loaded reference is still on the stack when the local is killed
2683
    SUPPORT_KILLED  = 1,
2684
    // The loaded reference is stored into a local
2685
    STORED_AS_LOCAL = 2,
2686
    // The loaded reference is still on the stack at the end of the basic block
2687
    REF_UNCONSUMED  = 4,
2688
} LoadFastInstrFlag;
2689
2690
static void
2691
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2692
4.06k
{
2693
7.20k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2694
3.14k
        ref r = ref_stack_at(refs, i);
2695
3.14k
        if (r.local == local) {
2696
4
            assert(r.instr >= 0);
2697
4
            instr_flags[r.instr] |= SUPPORT_KILLED;
2698
4
        }
2699
3.14k
    }
2700
4.06k
}
2701
2702
static void
2703
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2704
3.99k
{
2705
3.99k
    kill_local(instr_flags, refs, local);
2706
3.99k
    if (r.instr != DUMMY_INSTR) {
2707
3.49k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2708
3.49k
    }
2709
3.99k
}
2710
2711
static void
2712
load_fast_push_block(basicblock ***sp, basicblock *target,
2713
                     Py_ssize_t start_depth)
2714
13.8k
{
2715
13.8k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2716
13.8k
    if (!target->b_visited) {
2717
10.6k
        target->b_visited = 1;
2718
10.6k
        *(*sp)++ = target;
2719
10.6k
    }
2720
13.8k
}
2721
2722
/*
2723
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2724
 * load borrowed references onto the operand stack.
2725
 *
2726
 * This is only safe when we can prove that the reference in the frame outlives
2727
 * the borrowed reference produced by the instruction. We make this tractable
2728
 * by enforcing the following lifetimes:
2729
 *
2730
 * 1. Borrowed references loaded onto the operand stack live until the end of
2731
 *    the instruction that consumes them from the stack. Any borrowed
2732
 *    references that would escape into the heap (e.g. into frame objects or
2733
 *    generators) are converted into new, strong references.
2734
 *
2735
 * 2. Locals live until they are either killed by an instruction
2736
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2737
 *    via `f_locals` is added to a tuple owned by the frame object.
2738
 *
2739
 * To simplify the problem of detecting which supporting references in the
2740
 * frame are killed by instructions that overwrite locals, we only allow
2741
 * borrowed references to be stored as a local in the frame if they were passed
2742
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2743
 * strong references.
2744
 *
2745
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2746
 * that meet the following criteria:
2747
 *
2748
 * 1. The produced reference must be consumed from the stack before the
2749
 *    supporting reference in the frame is killed.
2750
 *
2751
 * 2. The produced reference cannot be stored as a local.
2752
 *
2753
 * We use abstract interpretation to identify instructions that meet these
2754
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2755
 * stack of abstract references and note any instructions that violate the
2756
 * criteria above. Once we've processed all the instructions in a block, any
2757
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2758
 */
2759
static int
2760
optimize_load_fast(cfg_builder *g)
2761
4.58k
{
2762
4.58k
    int status;
2763
4.58k
    ref_stack refs = {0};
2764
4.58k
    int max_instrs = 0;
2765
4.58k
    basicblock *entryblock = g->g_entryblock;
2766
26.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2767
22.1k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2768
22.1k
    }
2769
4.58k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2770
4.58k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2771
4.58k
    if (instr_flags == NULL) {
2772
0
        PyErr_NoMemory();
2773
0
        return ERROR;
2774
0
    }
2775
4.58k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2776
4.58k
    if (blocks == NULL) {
2777
0
        status = ERROR;
2778
0
        goto done;
2779
0
    }
2780
4.58k
    basicblock **sp = blocks;
2781
4.58k
    *sp = entryblock;
2782
4.58k
    sp++;
2783
4.58k
    entryblock->b_startdepth = 0;
2784
4.58k
    entryblock->b_visited = 1;
2785
2786
4.58k
    #define PUSH_REF(instr, local)                \
2787
140k
        do {                                      \
2788
140k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2789
0
                status = ERROR;                   \
2790
0
                goto done;                        \
2791
0
            }                                     \
2792
140k
        } while(0)
2793
2794
19.7k
    while (sp != blocks) {
2795
15.2k
        basicblock *block = *--sp;
2796
15.2k
        assert(block->b_startdepth > -1);
2797
2798
        // Reset per-block state.
2799
15.2k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2800
2801
        // Reset the stack of refs. We don't track references on the stack
2802
        // across basic blocks, but the bytecode will expect their
2803
        // presence. Add dummy references as necessary.
2804
15.2k
        ref_stack_clear(&refs);
2805
24.9k
        for (int i = 0; i < block->b_startdepth; i++) {
2806
9.74k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2807
9.74k
        }
2808
2809
182k
        for (int i = 0; i < block->b_iused; i++) {
2810
167k
            cfg_instr *instr = &block->b_instr[i];
2811
167k
            int opcode = instr->i_opcode;
2812
167k
            int oparg = instr->i_oparg;
2813
167k
            assert(opcode != EXTENDED_ARG);
2814
167k
            switch (opcode) {
2815
                // Opcodes that load and store locals
2816
7
                case DELETE_FAST: {
2817
7
                    kill_local(instr_flags, &refs, oparg);
2818
7
                    break;
2819
0
                }
2820
2821
18.3k
                case LOAD_FAST: {
2822
18.3k
                    PUSH_REF(i, oparg);
2823
18.3k
                    break;
2824
18.3k
                }
2825
2826
18.3k
                case LOAD_FAST_AND_CLEAR: {
2827
63
                    kill_local(instr_flags, &refs, oparg);
2828
63
                    PUSH_REF(i, oparg);
2829
63
                    break;
2830
63
                }
2831
2832
2.69k
                case LOAD_FAST_LOAD_FAST: {
2833
2.69k
                    PUSH_REF(i, oparg >> 4);
2834
2.69k
                    PUSH_REF(i, oparg & 15);
2835
2.69k
                    break;
2836
2.69k
                }
2837
2838
3.48k
                case STORE_FAST: {
2839
3.48k
                    ref r = ref_stack_pop(&refs);
2840
3.48k
                    store_local(instr_flags, &refs, oparg, r);
2841
3.48k
                    break;
2842
2.69k
                }
2843
2844
38
                case STORE_FAST_LOAD_FAST: {
2845
                    // STORE_FAST
2846
38
                    ref r = ref_stack_pop(&refs);
2847
38
                    store_local(instr_flags, &refs, oparg >> 4, r);
2848
                    // LOAD_FAST
2849
38
                    PUSH_REF(i, oparg & 15);
2850
38
                    break;
2851
38
                }
2852
2853
234
                case STORE_FAST_STORE_FAST: {
2854
                    // STORE_FAST
2855
234
                    ref r = ref_stack_pop(&refs);
2856
234
                    store_local(instr_flags, &refs, oparg >> 4, r);
2857
                    // STORE_FAST
2858
234
                    r = ref_stack_pop(&refs);
2859
234
                    store_local(instr_flags, &refs, oparg & 15, r);
2860
234
                    break;
2861
38
                }
2862
2863
                // Opcodes that shuffle values on the stack
2864
858
                case COPY: {
2865
858
                    assert(oparg > 0);
2866
858
                    Py_ssize_t idx = refs.size - oparg;
2867
858
                    ref r = ref_stack_at(&refs, idx);
2868
858
                    PUSH_REF(r.instr, r.local);
2869
858
                    break;
2870
858
                }
2871
2872
858
                case SWAP: {
2873
501
                    assert(oparg >= 2);
2874
501
                    ref_stack_swap_top(&refs, oparg);
2875
501
                    break;
2876
858
                }
2877
2878
                // We treat opcodes that do not consume all of their inputs on
2879
                // a case by case basis, as we have no generic way of knowing
2880
                // how many inputs should be left on the stack.
2881
2882
                // Opcodes that consume no inputs
2883
1.36k
                case FORMAT_SIMPLE:
2884
1.36k
                case GET_ANEXT:
2885
1.77k
                case GET_ITER:
2886
1.77k
                case GET_LEN:
2887
1.79k
                case GET_YIELD_FROM_ITER:
2888
2.13k
                case IMPORT_FROM:
2889
2.13k
                case MATCH_KEYS:
2890
2.13k
                case MATCH_MAPPING:
2891
2.13k
                case MATCH_SEQUENCE:
2892
2.13k
                case WITH_EXCEPT_START: {
2893
2.13k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2894
2.13k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2895
2.13k
                    int net_pushed = num_pushed - num_popped;
2896
2.13k
                    assert(net_pushed >= 0);
2897
2.87k
                    for (int j = 0; j < net_pushed; j++) {
2898
749
                        PUSH_REF(i, NOT_LOCAL);
2899
749
                    }
2900
2.13k
                    break;
2901
2.13k
                }
2902
2903
                // Opcodes that consume some inputs and push no new values
2904
2.13k
                case DICT_MERGE:
2905
486
                case DICT_UPDATE:
2906
1.06k
                case LIST_APPEND:
2907
1.11k
                case LIST_EXTEND:
2908
7.85k
                case MAP_ADD:
2909
7.85k
                case RERAISE:
2910
8.10k
                case SET_ADD:
2911
8.11k
                case SET_UPDATE: {
2912
8.11k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2913
8.11k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2914
8.11k
                    int net_popped = num_popped - num_pushed;
2915
8.11k
                    assert(net_popped > 0);
2916
22.9k
                    for (int i = 0; i < net_popped; i++) {
2917
14.8k
                        ref_stack_pop(&refs);
2918
14.8k
                    }
2919
8.11k
                    break;
2920
8.10k
                }
2921
2922
145
                case END_SEND:
2923
1.52k
                case SET_FUNCTION_ATTRIBUTE: {
2924
1.52k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2925
1.52k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2926
1.52k
                    ref tos = ref_stack_pop(&refs);
2927
1.52k
                    ref_stack_pop(&refs);
2928
1.52k
                    PUSH_REF(tos.instr, tos.local);
2929
1.52k
                    break;
2930
1.52k
                }
2931
2932
                // Opcodes that consume some inputs and push new values
2933
1.52k
                case CHECK_EXC_MATCH: {
2934
0
                    ref_stack_pop(&refs);
2935
0
                    PUSH_REF(i, NOT_LOCAL);
2936
0
                    break;
2937
0
                }
2938
2939
433
                case FOR_ITER: {
2940
433
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2941
433
                    PUSH_REF(i, NOT_LOCAL);
2942
433
                    break;
2943
433
                }
2944
2945
13.2k
                case LOAD_ATTR:
2946
13.3k
                case LOAD_SUPER_ATTR: {
2947
13.3k
                    ref self = ref_stack_pop(&refs);
2948
13.3k
                    if (opcode == LOAD_SUPER_ATTR) {
2949
168
                        ref_stack_pop(&refs);
2950
168
                        ref_stack_pop(&refs);
2951
168
                    }
2952
13.3k
                    PUSH_REF(i, NOT_LOCAL);
2953
13.3k
                    if (oparg & 1) {
2954
                        // A method call; conservatively assume that self is pushed
2955
                        // back onto the stack
2956
4.13k
                        PUSH_REF(self.instr, self.local);
2957
4.13k
                    }
2958
13.3k
                    break;
2959
13.3k
                }
2960
2961
13.3k
                case LOAD_SPECIAL:
2962
238
                case PUSH_EXC_INFO: {
2963
238
                    ref tos = ref_stack_pop(&refs);
2964
238
                    PUSH_REF(i, NOT_LOCAL);
2965
238
                    PUSH_REF(tos.instr, tos.local);
2966
238
                    break;
2967
238
                }
2968
2969
238
                case SEND: {
2970
145
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2971
145
                    ref_stack_pop(&refs);
2972
145
                    PUSH_REF(i, NOT_LOCAL);
2973
145
                    break;
2974
145
                }
2975
2976
                // Opcodes that consume all of their inputs
2977
115k
                default: {
2978
115k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2979
115k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2980
115k
                    if (HAS_TARGET(instr->i_opcode)) {
2981
5.72k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2982
5.72k
                    }
2983
115k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2984
                        // Block push opcodes only affect the stack when jumping
2985
                        // to the target.
2986
204k
                        for (int j = 0; j < num_popped; j++) {
2987
89.4k
                            ref_stack_pop(&refs);
2988
89.4k
                        }
2989
200k
                        for (int j = 0; j < num_pushed; j++) {
2990
84.8k
                            PUSH_REF(i, NOT_LOCAL);
2991
84.8k
                        }
2992
115k
                    }
2993
115k
                    break;
2994
115k
                }
2995
167k
            }
2996
167k
        }
2997
2998
        // Push fallthrough block
2999
15.2k
        if (BB_HAS_FALLTHROUGH(block)) {
3000
7.51k
            assert(block->b_next != NULL);
3001
7.51k
            load_fast_push_block(&sp, block->b_next, refs.size);
3002
7.51k
        }
3003
3004
        // Mark instructions that produce values that are on the stack at the
3005
        // end of the basic block
3006
29.9k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3007
14.7k
            ref r = ref_stack_at(&refs, i);
3008
14.7k
            if (r.instr != -1) {
3009
8.78k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3010
8.78k
            }
3011
14.7k
        }
3012
3013
        // Optimize instructions
3014
182k
        for (int i = 0; i < block->b_iused; i++) {
3015
167k
            if (!instr_flags[i]) {
3016
155k
                cfg_instr *instr = &block->b_instr[i];
3017
155k
                switch (instr->i_opcode) {
3018
17.8k
                    case LOAD_FAST:
3019
17.8k
                        instr->i_opcode = LOAD_FAST_BORROW;
3020
17.8k
                        break;
3021
2.68k
                    case LOAD_FAST_LOAD_FAST:
3022
2.68k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3023
2.68k
                        break;
3024
134k
                    default:
3025
134k
                        break;
3026
155k
                }
3027
155k
            }
3028
167k
        }
3029
15.2k
    }
3030
3031
4.58k
    #undef PUSH_REF
3032
3033
4.58k
    status = SUCCESS;
3034
3035
4.58k
done:
3036
4.58k
    ref_stack_fini(&refs);
3037
4.58k
    PyMem_Free(instr_flags);
3038
4.58k
    PyMem_Free(blocks);
3039
4.58k
    return status;
3040
4.58k
}
3041
3042
// helper functions for add_checks_for_loads_of_unknown_variables
3043
static inline void
3044
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3045
76.9k
{
3046
    // Push b if the unsafe mask is giving us any new information.
3047
    // To avoid overflowing the stack, only allow each block once.
3048
    // Use b->b_visited=1 to mean that b is currently on the stack.
3049
76.9k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3050
76.9k
    if (b->b_unsafe_locals_mask != both) {
3051
9.02k
        b->b_unsafe_locals_mask = both;
3052
        // More work left to do.
3053
9.02k
        if (!b->b_visited) {
3054
            // not on the stack, so push it.
3055
8.95k
            *(*sp)++ = b;
3056
8.95k
            b->b_visited = 1;
3057
8.95k
        }
3058
9.02k
    }
3059
76.9k
}
3060
3061
static void
3062
scan_block_for_locals(basicblock *b, basicblock ***sp)
3063
28.2k
{
3064
    // bit i is set if local i is potentially uninitialized
3065
28.2k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3066
209k
    for (int i = 0; i < b->b_iused; i++) {
3067
181k
        cfg_instr *instr = &b->b_instr[i];
3068
181k
        assert(instr->i_opcode != EXTENDED_ARG);
3069
181k
        if (instr->i_except != NULL) {
3070
48.3k
            maybe_push(instr->i_except, unsafe_mask, sp);
3071
48.3k
        }
3072
181k
        if (instr->i_oparg >= 64) {
3073
9.65k
            continue;
3074
9.65k
        }
3075
181k
        assert(instr->i_oparg >= 0);
3076
171k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3077
171k
        switch (instr->i_opcode) {
3078
403
            case DELETE_FAST:
3079
529
            case LOAD_FAST_AND_CLEAR:
3080
781
            case STORE_FAST_MAYBE_NULL:
3081
781
                unsafe_mask |= bit;
3082
781
                break;
3083
8.47k
            case STORE_FAST:
3084
8.47k
                unsafe_mask &= ~bit;
3085
8.47k
                break;
3086
41
            case LOAD_FAST_CHECK:
3087
                // If this doesn't raise, then the local is defined.
3088
41
                unsafe_mask &= ~bit;
3089
41
                break;
3090
35.2k
            case LOAD_FAST:
3091
35.2k
                if (unsafe_mask & bit) {
3092
41
                    instr->i_opcode = LOAD_FAST_CHECK;
3093
41
                }
3094
35.2k
                unsafe_mask &= ~bit;
3095
35.2k
                break;
3096
171k
        }
3097
171k
    }
3098
28.2k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3099
13.7k
        maybe_push(b->b_next, unsafe_mask, sp);
3100
13.7k
    }
3101
28.2k
    cfg_instr *last = basicblock_last_instr(b);
3102
28.2k
    if (last && is_jump(last)) {
3103
11.7k
        assert(last->i_target != NULL);
3104
11.7k
        maybe_push(last->i_target, unsafe_mask, sp);
3105
11.7k
    }
3106
28.2k
}
3107
3108
static int
3109
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3110
0
{
3111
0
    assert(nlocals > 64);
3112
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3113
0
    if (states == NULL) {
3114
0
        PyErr_NoMemory();
3115
0
        return ERROR;
3116
0
    }
3117
0
    Py_ssize_t blocknum = 0;
3118
    // state[i - 64] == blocknum if local i is guaranteed to
3119
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3120
    // STORE_FAST within that basicblock (not followed by
3121
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3122
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3123
0
        blocknum++;
3124
0
        for (int i = 0; i < b->b_iused; i++) {
3125
0
            cfg_instr *instr = &b->b_instr[i];
3126
0
            assert(instr->i_opcode != EXTENDED_ARG);
3127
0
            int arg = instr->i_oparg;
3128
0
            if (arg < 64) {
3129
0
                continue;
3130
0
            }
3131
0
            assert(arg >= 0);
3132
0
            switch (instr->i_opcode) {
3133
0
                case DELETE_FAST:
3134
0
                case LOAD_FAST_AND_CLEAR:
3135
0
                case STORE_FAST_MAYBE_NULL:
3136
0
                    states[arg - 64] = blocknum - 1;
3137
0
                    break;
3138
0
                case STORE_FAST:
3139
0
                    states[arg - 64] = blocknum;
3140
0
                    break;
3141
0
                case LOAD_FAST:
3142
0
                    if (states[arg - 64] != blocknum) {
3143
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3144
0
                    }
3145
0
                    states[arg - 64] = blocknum;
3146
0
                    break;
3147
0
                    Py_UNREACHABLE();
3148
0
            }
3149
0
        }
3150
0
    }
3151
0
    PyMem_Free(states);
3152
0
    return SUCCESS;
3153
0
}
3154
3155
static int
3156
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3157
4.58k
{
3158
4.58k
    assert(PyList_CheckExact(consts));
3159
4.58k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3160
4.58k
    if (nconsts == 0) {
3161
16
        return SUCCESS;  /* nothing to do */
3162
16
    }
3163
3164
4.57k
    Py_ssize_t *index_map = NULL;
3165
4.57k
    Py_ssize_t *reverse_index_map = NULL;
3166
4.57k
    int err = ERROR;
3167
3168
4.57k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3169
4.57k
    if (index_map == NULL) {
3170
0
        goto end;
3171
0
    }
3172
28.1k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3173
23.5k
        index_map[i] = -1;
3174
23.5k
    }
3175
    // The first constant may be docstring; keep it always.
3176
4.57k
    index_map[0] = 0;
3177
3178
    /* mark used consts */
3179
26.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3180
198k
        for (int i = 0; i < b->b_iused; i++) {
3181
176k
            int opcode = b->b_instr[i].i_opcode;
3182
176k
            if (OPCODE_HAS_CONST(opcode)) {
3183
23.1k
                int index = b->b_instr[i].i_oparg;
3184
23.1k
                index_map[index] = index;
3185
23.1k
            }
3186
176k
        }
3187
21.7k
    }
3188
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3189
    /* condense consts */
3190
4.57k
    Py_ssize_t n_used_consts = 0;
3191
32.6k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3192
28.1k
        if (index_map[i] != -1) {
3193
19.6k
            assert(index_map[i] == i);
3194
19.6k
            index_map[n_used_consts++] = index_map[i];
3195
19.6k
        }
3196
28.1k
    }
3197
4.57k
    if (n_used_consts == nconsts) {
3198
        /* nothing to do */
3199
2.25k
        err = SUCCESS;
3200
2.25k
        goto end;
3201
2.25k
    }
3202
3203
    /* move all used consts to the beginning of the consts list */
3204
4.57k
    assert(n_used_consts < nconsts);
3205
15.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3206
13.3k
        Py_ssize_t old_index = index_map[i];
3207
13.3k
        assert(i <= old_index && old_index < nconsts);
3208
13.3k
        if (i != old_index) {
3209
8.59k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3210
8.59k
            assert(value != NULL);
3211
8.59k
            PyList_SetItem(consts, i, Py_NewRef(value));
3212
8.59k
        }
3213
13.3k
    }
3214
3215
    /* truncate the consts list at its new size */
3216
2.32k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3217
0
        goto end;
3218
0
    }
3219
    /* adjust const indices in the bytecode */
3220
2.32k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3221
2.32k
    if (reverse_index_map == NULL) {
3222
0
        goto end;
3223
0
    }
3224
24.1k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3225
21.8k
        reverse_index_map[i] = -1;
3226
21.8k
    }
3227
15.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3228
13.3k
        assert(index_map[i] != -1);
3229
13.3k
        assert(reverse_index_map[index_map[i]] == -1);
3230
13.3k
        reverse_index_map[index_map[i]] = i;
3231
13.3k
    }
3232
3233
14.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
125k
        for (int i = 0; i < b->b_iused; i++) {
3235
113k
            int opcode = b->b_instr[i].i_opcode;
3236
113k
            if (OPCODE_HAS_CONST(opcode)) {
3237
16.2k
                int index = b->b_instr[i].i_oparg;
3238
16.2k
                assert(reverse_index_map[index] >= 0);
3239
16.2k
                assert(reverse_index_map[index] < n_used_consts);
3240
16.2k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3241
16.2k
            }
3242
113k
        }
3243
11.8k
    }
3244
3245
2.32k
    err = SUCCESS;
3246
4.57k
end:
3247
4.57k
    PyMem_Free(index_map);
3248
4.57k
    PyMem_Free(reverse_index_map);
3249
4.57k
    return err;
3250
2.32k
}
3251
3252
3253
3254
static int
3255
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3256
                                                int nlocals,
3257
                                                int nparams)
3258
4.58k
{
3259
4.58k
    if (nlocals == 0) {
3260
1.48k
        return SUCCESS;
3261
1.48k
    }
3262
3.10k
    if (nlocals > 64) {
3263
        // To avoid O(nlocals**2) compilation, locals beyond the first
3264
        // 64 are only analyzed one basicblock at a time: initialization
3265
        // info is not passed between basicblocks.
3266
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3267
0
            return ERROR;
3268
0
        }
3269
0
        nlocals = 64;
3270
0
    }
3271
3.10k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3272
3.10k
    if (stack == NULL) {
3273
0
        return ERROR;
3274
0
    }
3275
3.10k
    basicblock **sp = stack;
3276
3277
    // First origin of being uninitialized:
3278
    // The non-parameter locals in the entry block.
3279
3.10k
    uint64_t start_mask = 0;
3280
6.02k
    for (int i = nparams; i < nlocals; i++) {
3281
2.91k
        start_mask |= (uint64_t)1 << i;
3282
2.91k
    }
3283
3.10k
    maybe_push(entryblock, start_mask, &sp);
3284
3285
    // Second origin of being uninitialized:
3286
    // There could be DELETE_FAST somewhere, so
3287
    // be sure to scan each basicblock at least once.
3288
22.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3289
19.3k
        scan_block_for_locals(b, &sp);
3290
19.3k
    }
3291
    // Now propagate the uncertainty from the origins we found: Use
3292
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3293
12.0k
    while (sp > stack) {
3294
8.95k
        basicblock *b = *--sp;
3295
        // mark as no longer on stack
3296
8.95k
        b->b_visited = 0;
3297
8.95k
        scan_block_for_locals(b, &sp);
3298
8.95k
    }
3299
3.10k
    PyMem_Free(stack);
3300
3.10k
    return SUCCESS;
3301
3.10k
}
3302
3303
3304
static int
3305
3.63k
mark_warm(basicblock *entryblock) {
3306
3.63k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3307
3.63k
    if (stack == NULL) {
3308
0
        return ERROR;
3309
0
    }
3310
3.63k
    basicblock **sp = stack;
3311
3312
3.63k
    *sp++ = entryblock;
3313
3.63k
    entryblock->b_visited = 1;
3314
17.5k
    while (sp > stack) {
3315
13.9k
        basicblock *b = *(--sp);
3316
13.9k
        assert(!b->b_except_handler);
3317
13.9k
        b->b_warm = 1;
3318
13.9k
        basicblock *next = b->b_next;
3319
13.9k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3320
6.11k
            *sp++ = next;
3321
6.11k
            next->b_visited = 1;
3322
6.11k
        }
3323
132k
        for (int i=0; i < b->b_iused; i++) {
3324
118k
            cfg_instr *instr = &b->b_instr[i];
3325
118k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3326
4.18k
                *sp++ = instr->i_target;
3327
4.18k
                instr->i_target->b_visited = 1;
3328
4.18k
            }
3329
118k
        }
3330
13.9k
    }
3331
3.63k
    PyMem_Free(stack);
3332
3.63k
    return SUCCESS;
3333
3.63k
}
3334
3335
static int
3336
3.63k
mark_cold(basicblock *entryblock) {
3337
24.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3338
20.8k
        assert(!b->b_cold && !b->b_warm);
3339
20.8k
    }
3340
3.63k
    if (mark_warm(entryblock) < 0) {
3341
0
        return ERROR;
3342
0
    }
3343
3344
3.63k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3345
3.63k
    if (stack == NULL) {
3346
0
        return ERROR;
3347
0
    }
3348
3349
3.63k
    basicblock **sp = stack;
3350
24.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3351
20.8k
        if (b->b_except_handler) {
3352
1.62k
            assert(!b->b_warm);
3353
1.62k
            *sp++ = b;
3354
1.62k
            b->b_visited = 1;
3355
1.62k
        }
3356
20.8k
    }
3357
3358
6.62k
    while (sp > stack) {
3359
2.99k
        basicblock *b = *(--sp);
3360
2.99k
        b->b_cold = 1;
3361
2.99k
        basicblock *next = b->b_next;
3362
2.99k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3363
904
            if (!next->b_warm && !next->b_visited) {
3364
725
                *sp++ = next;
3365
725
                next->b_visited = 1;
3366
725
            }
3367
904
        }
3368
15.6k
        for (int i = 0; i < b->b_iused; i++) {
3369
12.7k
            cfg_instr *instr = &b->b_instr[i];
3370
12.7k
            if (is_jump(instr)) {
3371
880
                assert(i == b->b_iused - 1);
3372
880
                basicblock *target = b->b_instr[i].i_target;
3373
880
                if (!target->b_warm && !target->b_visited) {
3374
645
                    *sp++ = target;
3375
645
                    target->b_visited = 1;
3376
645
                }
3377
880
            }
3378
12.7k
        }
3379
2.99k
    }
3380
3.63k
    PyMem_Free(stack);
3381
3.63k
    return SUCCESS;
3382
3.63k
}
3383
3384
3385
static int
3386
4.58k
push_cold_blocks_to_end(cfg_builder *g) {
3387
4.58k
    basicblock *entryblock = g->g_entryblock;
3388
4.58k
    if (entryblock->b_next == NULL) {
3389
        /* single basicblock, no need to reorder */
3390
952
        return SUCCESS;
3391
952
    }
3392
3.63k
    RETURN_IF_ERROR(mark_cold(entryblock));
3393
3394
3.63k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3395
3396
    /* If we have a cold block with fallthrough to a warm block, add */
3397
    /* an explicit jump instead of fallthrough */
3398
24.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3399
20.9k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3400
145
            basicblock *explicit_jump = cfg_builder_new_block(g);
3401
145
            if (explicit_jump == NULL) {
3402
0
                return ERROR;
3403
0
            }
3404
145
            if (!IS_LABEL(b->b_next->b_label)) {
3405
0
                b->b_next->b_label.id = next_lbl++;
3406
0
            }
3407
145
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3408
145
                             NO_LOCATION);
3409
145
            explicit_jump->b_cold = 1;
3410
145
            explicit_jump->b_next = b->b_next;
3411
145
            explicit_jump->b_predecessors = 1;
3412
145
            b->b_next = explicit_jump;
3413
3414
            /* set target */
3415
145
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3416
145
            last->i_target = explicit_jump->b_next;
3417
145
        }
3418
20.9k
    }
3419
3420
3.63k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3421
3.63k
    basicblock *cold_blocks = NULL;
3422
3.63k
    basicblock *cold_blocks_tail = NULL;
3423
3424
3.63k
    basicblock *b = entryblock;
3425
4.83k
    while(b->b_next) {
3426
4.83k
        assert(!b->b_cold);
3427
19.0k
        while (b->b_next && !b->b_next->b_cold) {
3428
14.2k
            b = b->b_next;
3429
14.2k
        }
3430
4.83k
        if (b->b_next == NULL) {
3431
            /* no more cold blocks */
3432
3.63k
            break;
3433
3.63k
        }
3434
3435
        /* b->b_next is the beginning of a cold streak */
3436
4.83k
        assert(!b->b_cold && b->b_next->b_cold);
3437
3438
1.20k
        basicblock *b_end = b->b_next;
3439
3.13k
        while (b_end->b_next && b_end->b_next->b_cold) {
3440
1.93k
            b_end = b_end->b_next;
3441
1.93k
        }
3442
3443
        /* b_end is the end of the cold streak */
3444
1.20k
        assert(b_end && b_end->b_cold);
3445
1.20k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3446
3447
1.20k
        if (cold_blocks == NULL) {
3448
572
            cold_blocks = b->b_next;
3449
572
        }
3450
628
        else {
3451
628
            cold_blocks_tail->b_next = b->b_next;
3452
628
        }
3453
1.20k
        cold_blocks_tail = b_end;
3454
1.20k
        b->b_next = b_end->b_next;
3455
1.20k
        b_end->b_next = NULL;
3456
1.20k
    }
3457
3.63k
    assert(b != NULL && b->b_next == NULL);
3458
3.63k
    b->b_next = cold_blocks;
3459
3460
3.63k
    if (cold_blocks != NULL) {
3461
572
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3462
572
    }
3463
3.63k
    return SUCCESS;
3464
3.63k
}
3465
3466
static int
3467
convert_pseudo_conditional_jumps(cfg_builder *g)
3468
4.58k
{
3469
4.58k
    basicblock *entryblock = g->g_entryblock;
3470
26.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3471
195k
        for (int i = 0; i < b->b_iused; i++) {
3472
173k
            cfg_instr *instr = &b->b_instr[i];
3473
173k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3474
559
                assert(i == b->b_iused - 1);
3475
559
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3476
505
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3477
559
                location loc = instr->i_loc;
3478
559
                basicblock *except = instr->i_except;
3479
559
                cfg_instr copy = {
3480
559
                            .i_opcode = COPY,
3481
559
                            .i_oparg = 1,
3482
559
                            .i_loc = loc,
3483
559
                            .i_target = NULL,
3484
559
                            .i_except = except,
3485
559
                };
3486
559
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3487
559
                cfg_instr to_bool = {
3488
559
                            .i_opcode = TO_BOOL,
3489
559
                            .i_oparg = 0,
3490
559
                            .i_loc = loc,
3491
559
                            .i_target = NULL,
3492
559
                            .i_except = except,
3493
559
                };
3494
559
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3495
559
            }
3496
173k
        }
3497
21.9k
    }
3498
4.58k
    return SUCCESS;
3499
4.58k
}
3500
3501
static int
3502
convert_pseudo_ops(cfg_builder *g)
3503
4.58k
{
3504
4.58k
    basicblock *entryblock = g->g_entryblock;
3505
26.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3506
198k
        for (int i = 0; i < b->b_iused; i++) {
3507
176k
            cfg_instr *instr = &b->b_instr[i];
3508
176k
            if (is_block_push(instr)) {
3509
1.62k
                INSTR_SET_OP0(instr, NOP);
3510
1.62k
            }
3511
174k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3512
1.16k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3513
1.16k
                instr->i_opcode = LOAD_FAST;
3514
1.16k
            }
3515
173k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3516
126
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3517
126
                instr->i_opcode = STORE_FAST;
3518
126
            }
3519
176k
        }
3520
21.9k
    }
3521
4.58k
    return remove_redundant_nops_and_jumps(g);
3522
4.58k
}
3523
3524
static inline bool
3525
30.8k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3526
30.8k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3527
18.4k
        return basicblock_has_no_lineno(b);
3528
18.4k
    }
3529
12.3k
    else {
3530
12.3k
        return false;
3531
12.3k
    }
3532
30.8k
}
3533
3534
3535
/* PEP 626 mandates that the f_lineno of a frame is correct
3536
 * after a frame terminates. It would be prohibitively expensive
3537
 * to continuously update the f_lineno field at runtime,
3538
 * so we make sure that all exiting instruction (raises and returns)
3539
 * have a valid line number, allowing us to compute f_lineno lazily.
3540
 * We can do this by duplicating the exit blocks without line number
3541
 * so that none have more than one predecessor. We can then safely
3542
 * copy the line number from the sole predecessor block.
3543
 */
3544
static int
3545
duplicate_exits_without_lineno(cfg_builder *g)
3546
9.17k
{
3547
9.17k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3548
3549
    /* Copy all exit blocks without line number that are targets of a jump.
3550
     */
3551
9.17k
    basicblock *entryblock = g->g_entryblock;
3552
52.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3553
43.7k
        cfg_instr *last = basicblock_last_instr(b);
3554
43.7k
        if (last == NULL) {
3555
7.64k
            continue;
3556
7.64k
        }
3557
36.0k
        if (is_jump(last)) {
3558
14.5k
            basicblock *target = next_nonempty_block(last->i_target);
3559
14.5k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3560
378
                basicblock *new_target = copy_basicblock(g, target);
3561
378
                if (new_target == NULL) {
3562
0
                    return ERROR;
3563
0
                }
3564
378
                new_target->b_instr[0].i_loc = last->i_loc;
3565
378
                last->i_target = new_target;
3566
378
                target->b_predecessors--;
3567
378
                new_target->b_predecessors = 1;
3568
378
                new_target->b_next = target->b_next;
3569
378
                new_target->b_label.id = next_lbl++;
3570
378
                target->b_next = new_target;
3571
378
            }
3572
14.5k
        }
3573
36.0k
    }
3574
3575
    /* Any remaining reachable exit blocks without line number can only be reached by
3576
     * fall through, and thus can only have a single predecessor */
3577
52.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3578
43.7k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3579
16.3k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3580
514
                cfg_instr *last = basicblock_last_instr(b);
3581
514
                assert(last != NULL);
3582
514
                b->b_next->b_instr[0].i_loc = last->i_loc;
3583
514
            }
3584
16.3k
        }
3585
43.7k
    }
3586
9.17k
    return SUCCESS;
3587
9.17k
}
3588
3589
3590
/* If an instruction has no line number, but it's predecessor in the BB does,
3591
 * then copy the line number. If a successor block has no line number, and only
3592
 * one predecessor, then inherit the line number.
3593
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3594
 * Also reduces the size of the line number table,
3595
 * but has no impact on the generated line number events.
3596
 */
3597
3598
static inline void
3599
maybe_propagate_location(basicblock *b, int i, location loc)
3600
381k
{
3601
381k
    assert(b->b_iused > i);
3602
381k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3603
25.1k
         b->b_instr[i].i_loc = loc;
3604
25.1k
    }
3605
381k
}
3606
3607
static void
3608
propagate_line_numbers(basicblock *entryblock)
3609
9.17k
{
3610
52.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3611
43.7k
        cfg_instr *last = basicblock_last_instr(b);
3612
43.7k
        if (last == NULL) {
3613
7.64k
            continue;
3614
7.64k
        }
3615
3616
36.0k
        location prev_location = NO_LOCATION;
3617
399k
        for (int i = 0; i < b->b_iused; i++) {
3618
362k
            maybe_propagate_location(b, i, prev_location);
3619
362k
            prev_location = b->b_instr[i].i_loc;
3620
362k
        }
3621
36.0k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3622
11.8k
            if (b->b_next->b_iused > 0) {
3623
11.8k
                maybe_propagate_location(b->b_next, 0, prev_location);
3624
11.8k
            }
3625
11.8k
        }
3626
36.0k
        if (is_jump(last)) {
3627
14.5k
            basicblock *target = last->i_target;
3628
14.5k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3629
1
                target = target->b_next;
3630
1
            }
3631
14.5k
            if (target->b_predecessors == 1) {
3632
6.56k
                maybe_propagate_location(target, 0, prev_location);
3633
6.56k
            }
3634
14.5k
        }
3635
36.0k
    }
3636
9.17k
}
3637
3638
static int
3639
resolve_line_numbers(cfg_builder *g, int firstlineno)
3640
9.17k
{
3641
9.17k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3642
9.17k
    propagate_line_numbers(g->g_entryblock);
3643
9.17k
    return SUCCESS;
3644
9.17k
}
3645
3646
int
3647
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3648
                        int nlocals, int nparams, int firstlineno)
3649
4.58k
{
3650
4.58k
    assert(cfg_builder_check(g));
3651
    /** Preprocessing **/
3652
    /* Map labels to targets and mark exception handlers */
3653
4.58k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3654
4.58k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3655
4.58k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3656
3657
    /** Optimization **/
3658
4.58k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3659
4.58k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3660
4.58k
    RETURN_IF_ERROR(
3661
4.58k
        add_checks_for_loads_of_uninitialized_variables(
3662
4.58k
            g->g_entryblock, nlocals, nparams));
3663
4.58k
    RETURN_IF_ERROR(insert_superinstructions(g));
3664
3665
4.58k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3666
4.58k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3667
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3668
    // assert(all_exits_have_lineno(g->g_entryblock));
3669
4.58k
    return SUCCESS;
3670
4.58k
}
3671
3672
static int *
3673
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3674
4.58k
{
3675
4.58k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3676
4.58k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3677
4.58k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3678
3679
4.58k
    int noffsets = ncellvars + nfreevars;
3680
4.58k
    int *fixed = PyMem_New(int, noffsets);
3681
4.58k
    if (fixed == NULL) {
3682
0
        PyErr_NoMemory();
3683
0
        return NULL;
3684
0
    }
3685
6.22k
    for (int i = 0; i < noffsets; i++) {
3686
1.64k
        fixed[i] = nlocals + i;
3687
1.64k
    }
3688
3689
4.58k
    PyObject *varname, *cellindex;
3690
4.58k
    Py_ssize_t pos = 0;
3691
5.48k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3692
903
        PyObject *varindex;
3693
903
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3694
0
            goto error;
3695
0
        }
3696
903
        if (varindex == NULL) {
3697
518
            continue;
3698
518
        }
3699
3700
385
        int argoffset = PyLong_AsInt(varindex);
3701
385
        Py_DECREF(varindex);
3702
385
        if (argoffset == -1 && PyErr_Occurred()) {
3703
0
            goto error;
3704
0
        }
3705
3706
385
        int oldindex = PyLong_AsInt(cellindex);
3707
385
        if (oldindex == -1 && PyErr_Occurred()) {
3708
0
            goto error;
3709
0
        }
3710
385
        fixed[oldindex] = argoffset;
3711
385
    }
3712
4.58k
    return fixed;
3713
3714
0
error:
3715
0
    PyMem_Free(fixed);
3716
0
    return NULL;
3717
4.58k
}
3718
3719
#define IS_GENERATOR(CF) \
3720
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3721
3722
static int
3723
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3724
                           int *fixed, int nfreevars)
3725
4.58k
{
3726
4.58k
    assert(umd->u_firstlineno > 0);
3727
3728
    /* Set up cells for any variable that escapes, to be put in a closure. */
3729
4.58k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3730
4.58k
    if (ncellvars) {
3731
        // umd->u_cellvars has the cells out of order so we sort them
3732
        // before adding the MAKE_CELL instructions.  Note that we
3733
        // adjust for arg cells, which come first.
3734
625
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3735
625
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3736
625
        if (sorted == NULL) {
3737
0
            PyErr_NoMemory();
3738
0
            return ERROR;
3739
0
        }
3740
1.52k
        for (int i = 0; i < ncellvars; i++) {
3741
903
            sorted[fixed[i]] = i + 1;
3742
903
        }
3743
2.19k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3744
1.57k
            int oldindex = sorted[i] - 1;
3745
1.57k
            if (oldindex == -1) {
3746
669
                continue;
3747
669
            }
3748
903
            cfg_instr make_cell = {
3749
903
                .i_opcode = MAKE_CELL,
3750
                // This will get fixed in offset_derefs().
3751
903
                .i_oparg = oldindex,
3752
903
                .i_loc = NO_LOCATION,
3753
903
                .i_target = NULL,
3754
903
                .i_except = NULL,
3755
903
            };
3756
903
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3757
0
                PyMem_RawFree(sorted);
3758
0
                return ERROR;
3759
0
            }
3760
903
            ncellsused += 1;
3761
903
        }
3762
625
        PyMem_RawFree(sorted);
3763
625
    }
3764
3765
4.58k
    if (nfreevars) {
3766
489
        cfg_instr copy_frees = {
3767
489
            .i_opcode = COPY_FREE_VARS,
3768
489
            .i_oparg = nfreevars,
3769
489
            .i_loc = NO_LOCATION,
3770
489
            .i_target = NULL,
3771
489
            .i_except = NULL,
3772
489
        };
3773
489
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3774
489
    }
3775
3776
4.58k
    return SUCCESS;
3777
4.58k
}
3778
3779
static int
3780
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3781
4.58k
{
3782
4.58k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3783
4.58k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3784
4.58k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3785
4.58k
    int noffsets = ncellvars + nfreevars;
3786
3787
    // First deal with duplicates (arg cells).
3788
4.58k
    int numdropped = 0;
3789
6.22k
    for (int i = 0; i < noffsets ; i++) {
3790
1.64k
        if (fixedmap[i] == i + nlocals) {
3791
1.25k
            fixedmap[i] -= numdropped;
3792
1.25k
        }
3793
385
        else {
3794
            // It was a duplicate (cell/arg).
3795
385
            numdropped += 1;
3796
385
        }
3797
1.64k
    }
3798
3799
    // Then update offsets, either relative to locals or by cell2arg.
3800
26.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3801
198k
        for (int i = 0; i < b->b_iused; i++) {
3802
176k
            cfg_instr *inst = &b->b_instr[i];
3803
            // This is called before extended args are generated.
3804
176k
            assert(inst->i_opcode != EXTENDED_ARG);
3805
176k
            int oldoffset = inst->i_oparg;
3806
176k
            switch(inst->i_opcode) {
3807
903
                case MAKE_CELL:
3808
2.07k
                case LOAD_CLOSURE:
3809
3.92k
                case LOAD_DEREF:
3810
4.43k
                case STORE_DEREF:
3811
4.44k
                case DELETE_DEREF:
3812
4.44k
                case LOAD_FROM_DICT_OR_DEREF:
3813
4.44k
                    assert(oldoffset >= 0);
3814
4.44k
                    assert(oldoffset < noffsets);
3815
4.44k
                    assert(fixedmap[oldoffset] >= 0);
3816
4.44k
                    inst->i_oparg = fixedmap[oldoffset];
3817
176k
            }
3818
176k
        }
3819
21.9k
    }
3820
3821
4.58k
    return numdropped;
3822
4.58k
}
3823
3824
static int
3825
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3826
4.58k
{
3827
4.58k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3828
4.58k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3829
4.58k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3830
4.58k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3831
4.58k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3832
4.58k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3833
4.58k
    assert(INT_MAX - nlocals - ncellvars > 0);
3834
4.58k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3835
4.58k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3836
4.58k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3837
4.58k
    if (cellfixedoffsets == NULL) {
3838
0
        return ERROR;
3839
0
    }
3840
3841
    // This must be called before fix_cell_offsets().
3842
4.58k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3843
0
        PyMem_Free(cellfixedoffsets);
3844
0
        return ERROR;
3845
0
    }
3846
3847
4.58k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3848
4.58k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3849
4.58k
    cellfixedoffsets = NULL;
3850
4.58k
    if (numdropped < 0) {
3851
0
        return ERROR;
3852
0
    }
3853
3854
4.58k
    nlocalsplus -= numdropped;
3855
4.58k
    return nlocalsplus;
3856
4.58k
}
3857
3858
cfg_builder *
3859
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3860
4.58k
{
3861
4.58k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3862
0
        return NULL;
3863
0
    }
3864
4.58k
    cfg_builder *g = _PyCfgBuilder_New();
3865
4.58k
    if (g == NULL) {
3866
0
        return NULL;
3867
0
    }
3868
200k
    for (int i = 0; i < seq->s_used; i++) {
3869
195k
        seq->s_instrs[i].i_target = 0;
3870
195k
    }
3871
200k
    for (int i = 0; i < seq->s_used; i++) {
3872
195k
        _PyInstruction *instr = &seq->s_instrs[i];
3873
195k
        if (HAS_TARGET(instr->i_opcode)) {
3874
9.99k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3875
9.99k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3876
9.99k
        }
3877
195k
    }
3878
4.58k
    int offset = 0;
3879
200k
    for (int i = 0; i < seq->s_used; i++) {
3880
195k
        _PyInstruction *instr = &seq->s_instrs[i];
3881
195k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3882
588
            if (seq->s_annotations_code != NULL) {
3883
0
                assert(seq->s_annotations_code->s_labelmap_size == 0
3884
0
                    && seq->s_annotations_code->s_nested == NULL);
3885
0
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3886
0
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3887
0
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3888
0
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3889
0
                        goto error;
3890
0
                    }
3891
0
                }
3892
0
                offset += seq->s_annotations_code->s_used - 1;
3893
0
            }
3894
588
            else {
3895
588
                offset -= 1;
3896
588
            }
3897
588
            continue;
3898
588
        }
3899
194k
        if (instr->i_target) {
3900
8.09k
            jump_target_label lbl_ = {i + offset};
3901
8.09k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3902
0
                goto error;
3903
0
            }
3904
8.09k
        }
3905
194k
        int opcode = instr->i_opcode;
3906
194k
        int oparg = instr->i_oparg;
3907
194k
        if (HAS_TARGET(opcode)) {
3908
9.99k
            oparg += offset;
3909
9.99k
        }
3910
194k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3911
0
            goto error;
3912
0
        }
3913
194k
    }
3914
4.58k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3915
0
        goto error;
3916
0
    }
3917
4.58k
    return g;
3918
0
error:
3919
0
    _PyCfgBuilder_Free(g);
3920
0
    return NULL;
3921
4.58k
}
3922
3923
int
3924
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3925
4.58k
{
3926
4.58k
    int lbl = 0;
3927
26.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3928
22.1k
        b->b_label = (jump_target_label){lbl};
3929
22.1k
        lbl += 1;
3930
22.1k
    }
3931
26.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3932
22.1k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3933
202k
        for (int i = 0; i < b->b_iused; i++) {
3934
180k
            cfg_instr *instr = &b->b_instr[i];
3935
180k
            if (HAS_TARGET(instr->i_opcode)) {
3936
                /* Set oparg to the label id (it will later be mapped to an offset) */
3937
7.33k
                instr->i_oparg = instr->i_target->b_label.id;
3938
7.33k
            }
3939
180k
            RETURN_IF_ERROR(
3940
180k
                _PyInstructionSequence_Addop(
3941
180k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3942
3943
180k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3944
180k
            if (instr->i_except != NULL) {
3945
26.7k
                hi->h_label = instr->i_except->b_label.id;
3946
26.7k
                hi->h_startdepth = instr->i_except->b_startdepth;
3947
26.7k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3948
26.7k
            }
3949
153k
            else {
3950
153k
                hi->h_label = -1;
3951
153k
            }
3952
180k
        }
3953
22.1k
    }
3954
4.58k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3955
0
        return ERROR;
3956
0
    }
3957
4.58k
    return SUCCESS;
3958
4.58k
}
3959
3960
3961
int
3962
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3963
                                     _PyCompile_CodeUnitMetadata *umd,
3964
                                     int *stackdepth, int *nlocalsplus,
3965
                                     _PyInstructionSequence *seq)
3966
4.58k
{
3967
4.58k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3968
3969
4.58k
    *stackdepth = calculate_stackdepth(g);
3970
4.58k
    if (*stackdepth < 0) {
3971
0
        return ERROR;
3972
0
    }
3973
3974
4.58k
    *nlocalsplus = prepare_localsplus(umd, g);
3975
4.58k
    if (*nlocalsplus < 0) {
3976
0
        return ERROR;
3977
0
    }
3978
3979
4.58k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3980
3981
    /* Order of basic blocks must have been determined by now */
3982
3983
4.58k
    RETURN_IF_ERROR(normalize_jumps(g));
3984
4.58k
    assert(no_redundant_jumps(g));
3985
3986
    /* Can't modify the bytecode after inserting instructions that produce
3987
     * borrowed references.
3988
     */
3989
4.58k
    RETURN_IF_ERROR(optimize_load_fast(g));
3990
3991
    /* Can't modify the bytecode after computing jump offsets. */
3992
4.58k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
3993
0
        return ERROR;
3994
0
    }
3995
3996
4.58k
    return SUCCESS;
3997
4.58k
}
3998
3999
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4000
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4001
 */
4002
int
4003
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4004
0
{
4005
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4006
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4007
0
    return SUCCESS;
4008
0
}
4009
4010
/* Exported API functions */
4011
4012
int
4013
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4014
0
{
4015
0
    stack_effects effs;
4016
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4017
0
        return PY_INVALID_STACK_EFFECT;
4018
0
    }
4019
0
    return effs.net;
4020
0
}
4021
4022
int
4023
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4024
25
{
4025
25
    stack_effects effs;
4026
25
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4027
0
        return PY_INVALID_STACK_EFFECT;
4028
0
    }
4029
25
    return effs.net;
4030
25
}
4031
4032
/* Access to compiler optimizations for unit tests.
4033
4034
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4035
 * a CFG, optimizes it and converts back to an instruction list.
4036
 */
4037
4038
static PyObject *
4039
cfg_to_instruction_sequence(cfg_builder *g)
4040
0
{
4041
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4042
0
    if (seq == NULL) {
4043
0
        return NULL;
4044
0
    }
4045
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4046
0
        PyInstructionSequence_Fini(seq);
4047
0
        return NULL;
4048
0
    }
4049
0
    return (PyObject*)seq;
4050
0
}
4051
4052
PyObject *
4053
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4054
0
{
4055
0
    if (!_PyInstructionSequence_Check(seq)) {
4056
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4057
0
        return NULL;
4058
0
    }
4059
0
    PyObject *const_cache = PyDict_New();
4060
0
    if (const_cache == NULL) {
4061
0
        return NULL;
4062
0
    }
4063
4064
0
    PyObject *res = NULL;
4065
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4066
0
    if (g == NULL) {
4067
0
        goto error;
4068
0
    }
4069
0
    int nparams = 0, firstlineno = 1;
4070
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4071
0
                                nparams, firstlineno) < 0) {
4072
0
        goto error;
4073
0
    }
4074
4075
0
    if (calculate_stackdepth(g) == ERROR) {
4076
0
        goto error;
4077
0
    }
4078
4079
0
    if (optimize_load_fast(g) != SUCCESS) {
4080
0
        goto error;
4081
0
    }
4082
4083
0
    res = cfg_to_instruction_sequence(g);
4084
0
error:
4085
0
    Py_DECREF(const_cache);
4086
0
    _PyCfgBuilder_Free(g);
4087
0
    return res;
4088
0
}