Coverage Report

Created: 2025-11-24 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
11.7M
#define SUCCESS 0
19
41.8k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
17.8M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
4.05M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
4.17M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
4.17M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
3.32k
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
16.5M
{
101
16.5M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
16.5M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
16.5M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
13.9M
{
108
13.9M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
13.9M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
938k
    do { \
114
938k
        assert(OPCODE_HAS_ARG(OP)); \
115
938k
        cfg_instr *_instr__ptr_ = (I); \
116
938k
        _instr__ptr_->i_opcode = (OP); \
117
938k
        _instr__ptr_->i_oparg = (ARG); \
118
938k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
1.44M
    do { \
123
1.44M
        assert(!OPCODE_HAS_ARG(OP)); \
124
1.44M
        cfg_instr *_instr__ptr_ = (I); \
125
1.44M
        _instr__ptr_->i_opcode = (OP); \
126
1.44M
        _instr__ptr_->i_oparg = 0; \
127
1.44M
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
915k
    do { \
131
915k
        cfg_instr *_instr__ptr_ = (I); \
132
915k
        _instr__ptr_->i_loc = (LOC); \
133
915k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
4.05M
{
144
4.05M
    assert(b != NULL);
145
4.05M
    _Py_c_array_t array = {
146
4.05M
        .array = (void*)b->b_instr,
147
4.05M
        .allocated_entries = b->b_ialloc,
148
4.05M
        .item_size = sizeof(cfg_instr),
149
4.05M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
4.05M
    };
151
152
4.05M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
4.05M
    b->b_instr = array.array;
154
4.05M
    b->b_ialloc = array.allocated_entries;
155
4.05M
    return b->b_iused++;
156
4.05M
}
157
158
static cfg_instr *
159
13.0M
basicblock_last_instr(const basicblock *b) {
160
13.0M
    assert(b->b_iused >= 0);
161
13.0M
    if (b->b_iused > 0) {
162
12.3M
        assert(b->b_instr != NULL);
163
12.3M
        return &b->b_instr[b->b_iused - 1];
164
12.3M
    }
165
730k
    return NULL;
166
13.0M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
352k
{
175
352k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
352k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
352k
    b->b_list = g->g_block_list;
182
352k
    g->g_block_list = b;
183
352k
    b->b_label = NO_LABEL;
184
352k
    return b;
185
352k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
4.00M
{
190
4.00M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
4.00M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
4.00M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
4.00M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
4.00M
    int off = basicblock_next_instr(b);
196
4.00M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
4.00M
    cfg_instr *i = &b->b_instr[off];
200
4.00M
    i->i_opcode = opcode;
201
4.00M
    i->i_oparg = oparg;
202
4.00M
    i->i_loc = loc;
203
    // memory is already zero initialized
204
4.00M
    assert(i->i_target == NULL);
205
4.00M
    assert(i->i_except == NULL);
206
207
4.00M
    return SUCCESS;
208
4.00M
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
8.63k
{
213
8.63k
    cfg_instr *last = basicblock_last_instr(b);
214
8.63k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
8.63k
    RETURN_IF_ERROR(
219
8.63k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
8.63k
    last = basicblock_last_instr(b);
221
8.63k
    assert(last && last->i_opcode == opcode);
222
8.63k
    last->i_target = target;
223
8.63k
    return SUCCESS;
224
8.63k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
12.2k
{
229
38.4k
    for (int i = 0; i < from->b_iused; i++) {
230
26.1k
        int n = basicblock_next_instr(to);
231
26.1k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
26.1k
        to->b_instr[n] = from->b_instr[i];
235
26.1k
    }
236
12.2k
    return SUCCESS;
237
12.2k
}
238
239
static inline int
240
3.52M
basicblock_nofallthrough(const basicblock *b) {
241
3.52M
    cfg_instr *last = basicblock_last_instr(b);
242
3.52M
    return (last &&
243
3.39M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
2.60M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
3.52M
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
5.48M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
1.62k
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
1.62k
    assert(BB_NO_FALLTHROUGH(block));
257
1.62k
    basicblock *result = cfg_builder_new_block(g);
258
1.62k
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
1.62k
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
1.62k
    return result;
265
1.62k
}
266
267
static int
268
25.8k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
25.8k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
441k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
415k
        block->b_instr[i] = block->b_instr[i-1];
272
415k
    }
273
25.8k
    block->b_instr[pos] = *instr;
274
25.8k
    return SUCCESS;
275
25.8k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
300k
{
343
300k
    assert(block != NULL);
344
300k
    g->g_curblock->b_next = block;
345
300k
    g->g_curblock = block;
346
300k
    return block;
347
300k
}
348
349
static inline int
350
817k
basicblock_exits_scope(const basicblock *b) {
351
817k
    cfg_instr *last = basicblock_last_instr(b);
352
817k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
817k
}
354
355
static inline int
356
629k
basicblock_has_eval_break(const basicblock *b) {
357
3.84M
    for (int i = 0; i < b->b_iused; i++) {
358
3.33M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
115k
            return true;
360
115k
        }
361
3.33M
    }
362
513k
    return false;
363
629k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
4.05M
{
368
4.05M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
4.05M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
240k
        return true;
371
240k
    }
372
3.80M
    if (IS_LABEL(g->g_current_label)) {
373
59.6k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
59.6k
            return true;
375
59.6k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
59.6k
    }
382
3.74M
    return false;
383
3.80M
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
4.05M
{
388
4.05M
    if (cfg_builder_current_block_is_terminated(g)) {
389
300k
        basicblock *b = cfg_builder_new_block(g);
390
300k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
300k
        b->b_label = g->g_current_label;
394
300k
        g->g_current_label = NO_LABEL;
395
300k
        cfg_builder_use_next_block(g, b);
396
300k
    }
397
4.05M
    return SUCCESS;
398
4.05M
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
84.0k
{
404
84.0k
    assert(g->g_entryblock->b_iused > 0);
405
779k
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
695k
        assert(!_PyMem_IsPtrFreed(block));
407
695k
        if (block->b_instr != NULL) {
408
695k
            assert(block->b_ialloc > 0);
409
695k
            assert(block->b_iused >= 0);
410
695k
            assert(block->b_ialloc >= block->b_iused);
411
695k
        }
412
0
        else {
413
0
            assert (block->b_iused == 0);
414
0
            assert (block->b_ialloc == 0);
415
0
        }
416
695k
    }
417
84.0k
    return true;
418
84.0k
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
42.0k
{
424
42.0k
    g->g_block_list = NULL;
425
42.0k
    basicblock *block = cfg_builder_new_block(g);
426
42.0k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
42.0k
    g->g_curblock = g->g_entryblock = block;
430
42.0k
    g->g_current_label = NO_LABEL;
431
42.0k
    return SUCCESS;
432
42.0k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
42.0k
{
437
42.0k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
42.0k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
42.0k
    memset(g, 0, sizeof(cfg_builder));
443
42.0k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
42.0k
    return g;
448
42.0k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
42.0k
{
453
42.0k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
42.0k
    assert(cfg_builder_check(g));
457
42.0k
    basicblock *b = g->g_block_list;
458
394k
    while (b != NULL) {
459
352k
        if (b->b_instr) {
460
352k
            PyMem_Free((void *)b->b_instr);
461
352k
        }
462
352k
        basicblock *next = b->b_list;
463
352k
        PyMem_Free((void *)b);
464
352k
        b = next;
465
352k
    }
466
42.0k
    PyMem_Free(g);
467
42.0k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
42.0k
{
472
42.0k
    int nblocks = 0;
473
384k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
342k
        nblocks++;
475
342k
    }
476
42.0k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
42.0k
    return SUCCESS;
481
42.0k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
154k
{
486
154k
    g->g_current_label = lbl;
487
154k
    return cfg_builder_maybe_start_new_block(g);
488
154k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
3.89M
{
493
3.89M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
3.89M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
3.89M
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
1.09M
{
501
1.15M
    while (b && b->b_iused == 0) {
502
52.9k
        b = b->b_next;
503
52.9k
    }
504
1.09M
    return b;
505
1.09M
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
42.0k
no_redundant_nops(cfg_builder *g) {
514
42.0k
    if (remove_redundant_nops(g) != 0) {
515
0
        return false;
516
0
    }
517
42.0k
    return true;
518
42.0k
}
519
520
static bool
521
84.0k
no_redundant_jumps(cfg_builder *g) {
522
780k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
696k
        cfg_instr *last = basicblock_last_instr(b);
524
696k
        if (last != NULL) {
525
619k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
103k
                basicblock *next = next_nonempty_block(b->b_next);
527
103k
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
103k
                if (jump_target == next) {
529
0
                    assert(next);
530
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
0
                        assert(0);
532
0
                        return false;
533
0
                    }
534
0
                }
535
103k
            }
536
619k
        }
537
696k
    }
538
84.0k
    return true;
539
84.0k
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
352k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
352k
    cfg_instr *last = basicblock_last_instr(b);
547
352k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
259k
        return SUCCESS;
549
259k
    }
550
352k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
92.9k
    bool is_forward = last->i_target->b_visited == 0;
553
92.9k
    if (is_forward) {
554
91.6k
        RETURN_IF_ERROR(
555
91.6k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
91.6k
        return SUCCESS;
557
91.6k
    }
558
559
1.28k
    int reversed_opcode = 0;
560
1.28k
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
0
        case POP_JUMP_IF_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
0
            break;
567
1.27k
        case POP_JUMP_IF_FALSE:
568
1.27k
            reversed_opcode = POP_JUMP_IF_TRUE;
569
1.27k
            break;
570
10
        case POP_JUMP_IF_TRUE:
571
10
            reversed_opcode = POP_JUMP_IF_FALSE;
572
10
            break;
573
1.28k
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
1.28k
    basicblock *target = last->i_target;
579
1.28k
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
1.28k
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
1.28k
    RETURN_IF_ERROR(
584
1.28k
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
1.28k
    RETURN_IF_ERROR(
586
1.28k
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
1.28k
    backwards_jump->b_startdepth = target->b_startdepth;
588
1.28k
    last->i_opcode = reversed_opcode;
589
1.28k
    last->i_target = b->b_next;
590
591
1.28k
    backwards_jump->b_cold = b->b_cold;
592
1.28k
    backwards_jump->b_next = b->b_next;
593
1.28k
    b->b_next = backwards_jump;
594
1.28k
    return SUCCESS;
595
1.28k
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
42.0k
{
601
42.0k
    basicblock *entryblock = g->g_entryblock;
602
393k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
351k
        b->b_visited = 0;
604
351k
    }
605
394k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
352k
        b->b_visited = 1;
607
352k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
352k
    }
609
42.0k
    return SUCCESS;
610
42.0k
}
611
612
static int
613
42.0k
check_cfg(cfg_builder *g) {
614
384k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
4.23M
        for (int i = 0; i < b->b_iused; i++) {
617
3.89M
            int opcode = b->b_instr[i].i_opcode;
618
3.89M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
3.89M
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
282k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
282k
            }
625
3.89M
        }
626
342k
    }
627
42.0k
    return SUCCESS;
628
42.0k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
149k
{
633
149k
    int lbl = -1;
634
1.51M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
1.36M
        if (b->b_label.id > lbl) {
636
545k
            lbl = b->b_label.id;
637
545k
        }
638
1.36M
    }
639
149k
    return lbl;
640
149k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
42.0k
{
646
42.0k
    int max_label = get_max_label(entryblock);
647
42.0k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
42.0k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
42.0k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
42.0k
    memset(label2block, 0, mapsize);
654
384k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
342k
        if (b->b_label.id >= 0) {
656
154k
            label2block[b->b_label.id] = b;
657
154k
        }
658
342k
    }
659
384k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
4.23M
        for (int i = 0; i < b->b_iused; i++) {
661
3.89M
            cfg_instr *instr = &b->b_instr[i];
662
3.89M
            assert(instr->i_target == NULL);
663
3.89M
            if (HAS_TARGET(instr->i_opcode)) {
664
228k
                int lbl = instr->i_oparg;
665
228k
                assert(lbl >= 0 && lbl <= max_label);
666
228k
                instr->i_target = label2block[lbl];
667
228k
                assert(instr->i_target != NULL);
668
228k
                assert(instr->i_target->b_label.id == lbl);
669
228k
            }
670
3.89M
        }
671
342k
    }
672
42.0k
    PyMem_Free(label2block);
673
42.0k
    return SUCCESS;
674
42.0k
}
675
676
static int
677
42.0k
mark_except_handlers(basicblock *entryblock) {
678
42.0k
#ifndef NDEBUG
679
384k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
342k
        assert(!b->b_except_handler);
681
342k
    }
682
42.0k
#endif
683
384k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
4.23M
        for (int i=0; i < b->b_iused; i++) {
685
3.89M
            cfg_instr *instr = &b->b_instr[i];
686
3.89M
            if (is_block_push(instr)) {
687
44.7k
                instr->i_target->b_except_handler = 1;
688
44.7k
            }
689
3.89M
        }
690
342k
    }
691
42.0k
    return SUCCESS;
692
42.0k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
44.5k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
44.5k
    assert(is_block_push(setup));
704
44.5k
    int opcode = setup->i_opcode;
705
44.5k
    basicblock * target = setup->i_target;
706
44.5k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
23.1k
        target->b_preserve_lasti = 1;
708
23.1k
    }
709
44.5k
    assert(stack->depth <= CO_MAXBLOCKS);
710
44.5k
    stack->handlers[++stack->depth] = target;
711
44.5k
    return target;
712
44.5k
}
713
714
static basicblock *
715
42.1k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
42.1k
    assert(stack->depth > 0);
717
42.1k
    return stack->handlers[--stack->depth];
718
42.1k
}
719
720
static basicblock *
721
317k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
317k
    return stack->handlers[stack->depth];
723
317k
}
724
725
static struct _PyCfgExceptStack *
726
42.0k
make_except_stack(void) {
727
42.0k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
42.0k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
42.0k
    new->depth = 0;
733
42.0k
    new->handlers[0] = NULL;
734
42.0k
    return new;
735
42.0k
}
736
737
static struct _PyCfgExceptStack *
738
108k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
108k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
108k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
108k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
108k
    return copy;
746
108k
}
747
748
static basicblock**
749
277k
make_cfg_traversal_stack(basicblock *entryblock) {
750
277k
    int nblocks = 0;
751
2.83M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
2.55M
        b->b_visited = 0;
753
2.55M
        nblocks++;
754
2.55M
    }
755
277k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
277k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
277k
    return stack;
760
277k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
2.92M
{
779
2.92M
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
2.92M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
2.92M
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
2.92M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
2.92M
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
2.92M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
44.5k
        effects->net = 0;
793
44.5k
        return 0;
794
44.5k
    }
795
2.87M
    effects->net = pushed - popped;
796
2.87M
    return 0;
797
2.92M
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
442k
{
802
442k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
442k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
322k
        assert(b->b_startdepth < 0);
808
322k
        b->b_startdepth = depth;
809
322k
        *(*sp)++ = b;
810
322k
    }
811
442k
    return SUCCESS;
812
442k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
42.0k
{
820
42.0k
    basicblock *entryblock = g->g_entryblock;
821
393k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
351k
        b->b_startdepth = INT_MIN;
823
351k
    }
824
42.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
42.0k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
42.0k
    int stackdepth = -1;
831
42.0k
    int maxdepth = 0;
832
42.0k
    basicblock **sp = stack;
833
42.0k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
364k
    while (sp != stack) {
837
322k
        basicblock *b = *--sp;
838
322k
        int depth = b->b_startdepth;
839
322k
        assert(depth >= 0);
840
322k
        basicblock *next = b->b_next;
841
2.90M
        for (int i = 0; i < b->b_iused; i++) {
842
2.70M
            cfg_instr *instr = &b->b_instr[i];
843
2.70M
            stack_effects effects;
844
2.70M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
2.70M
            int new_depth = depth + effects.net;
851
2.70M
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
2.70M
            maxdepth = Py_MAX(maxdepth, depth);
857
2.70M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
207k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
207k
                int target_depth = depth + effects.net;
865
207k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
207k
                maxdepth = Py_MAX(maxdepth, depth);
867
207k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
207k
            }
871
2.70M
            depth = new_depth;
872
2.70M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
2.70M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
2.65M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
129k
            {
876
                /* remaining code is dead */
877
129k
                next = NULL;
878
129k
                break;
879
129k
            }
880
2.70M
        }
881
322k
        if (next != NULL) {
882
192k
            assert(BB_HAS_FALLTHROUGH(b));
883
192k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
192k
        }
887
322k
    }
888
42.0k
    stackdepth = maxdepth;
889
42.0k
error:
890
42.0k
    PyMem_Free(stack);
891
42.0k
    return stackdepth;
892
42.0k
}
893
894
static int
895
42.0k
label_exception_targets(basicblock *entryblock) {
896
42.0k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
42.0k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
42.0k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
42.0k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
42.0k
    except_stack->depth = 0;
907
42.0k
    todo_stack[0] = entryblock;
908
42.0k
    entryblock->b_visited = 1;
909
42.0k
    entryblock->b_exceptstack = except_stack;
910
42.0k
    basicblock **todo = &todo_stack[1];
911
42.0k
    basicblock *handler = NULL;
912
359k
    while (todo > todo_stack) {
913
317k
        todo--;
914
317k
        basicblock *b = todo[0];
915
317k
        assert(b->b_visited == 1);
916
317k
        except_stack = b->b_exceptstack;
917
317k
        assert(except_stack != NULL);
918
317k
        b->b_exceptstack = NULL;
919
317k
        handler = except_stack_top(except_stack);
920
317k
        int last_yield_except_depth = -1;
921
4.08M
        for (int i = 0; i < b->b_iused; i++) {
922
3.76M
            cfg_instr *instr = &b->b_instr[i];
923
3.76M
            if (is_block_push(instr)) {
924
44.5k
                if (!instr->i_target->b_visited) {
925
44.5k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
44.5k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
44.5k
                    instr->i_target->b_exceptstack = copy;
930
44.5k
                    todo[0] = instr->i_target;
931
44.5k
                    instr->i_target->b_visited = 1;
932
44.5k
                    todo++;
933
44.5k
                }
934
44.5k
                handler = push_except_block(except_stack, instr);
935
44.5k
            }
936
3.72M
            else if (instr->i_opcode == POP_BLOCK) {
937
42.1k
                handler = pop_except_block(except_stack);
938
42.1k
                INSTR_SET_OP0(instr, NOP);
939
42.1k
            }
940
3.68M
            else if (is_jump(instr)) {
941
182k
                instr->i_except = handler;
942
182k
                assert(i == b->b_iused -1);
943
182k
                if (!instr->i_target->b_visited) {
944
87.5k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
64.1k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
64.1k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
64.1k
                        instr->i_target->b_exceptstack = copy;
950
64.1k
                    }
951
23.4k
                    else {
952
23.4k
                        instr->i_target->b_exceptstack = except_stack;
953
23.4k
                        except_stack = NULL;
954
23.4k
                    }
955
87.5k
                    todo[0] = instr->i_target;
956
87.5k
                    instr->i_target->b_visited = 1;
957
87.5k
                    todo++;
958
87.5k
                }
959
182k
            }
960
3.50M
            else if (instr->i_opcode == YIELD_VALUE) {
961
17.4k
                instr->i_except = handler;
962
17.4k
                last_yield_except_depth = except_stack->depth;
963
17.4k
            }
964
3.48M
            else if (instr->i_opcode == RESUME) {
965
59.4k
                instr->i_except = handler;
966
59.4k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
17.4k
                    assert(last_yield_except_depth >= 0);
968
17.4k
                    if (last_yield_except_depth == 1) {
969
1.80k
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
1.80k
                    }
971
17.4k
                    last_yield_except_depth = -1;
972
17.4k
                }
973
59.4k
            }
974
3.42M
            else {
975
3.42M
                instr->i_except = handler;
976
3.42M
            }
977
3.76M
        }
978
317k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
143k
            assert(except_stack != NULL);
980
143k
            b->b_next->b_exceptstack = except_stack;
981
143k
            todo[0] = b->b_next;
982
143k
            b->b_next->b_visited = 1;
983
143k
            todo++;
984
143k
        }
985
174k
        else if (except_stack != NULL) {
986
150k
           PyMem_Free(except_stack);
987
150k
        }
988
317k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
42.0k
    PyMem_Free(todo_stack);
995
42.0k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
42.0k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
84.0k
remove_unreachable(basicblock *entryblock) {
1006
770k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
686k
        b->b_predecessors = 0;
1008
686k
    }
1009
84.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
84.0k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
84.0k
    basicblock **sp = stack;
1014
84.0k
    entryblock->b_predecessors = 1;
1015
84.0k
    *sp++ = entryblock;
1016
84.0k
    entryblock->b_visited = 1;
1017
705k
    while (sp > stack) {
1018
621k
        basicblock *b = *(--sp);
1019
621k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
354k
            if (!b->b_next->b_visited) {
1021
298k
                assert(b->b_next->b_predecessors == 0);
1022
298k
                *sp++ = b->b_next;
1023
298k
                b->b_next->b_visited = 1;
1024
298k
            }
1025
354k
            b->b_next->b_predecessors++;
1026
354k
        }
1027
7.12M
        for (int i = 0; i < b->b_iused; i++) {
1028
6.50M
            basicblock *target;
1029
6.50M
            cfg_instr *instr = &b->b_instr[i];
1030
6.50M
            if (is_jump(instr) || is_block_push(instr)) {
1031
431k
                target = instr->i_target;
1032
431k
                if (!target->b_visited) {
1033
238k
                    *sp++ = target;
1034
238k
                    target->b_visited = 1;
1035
238k
                }
1036
431k
                target->b_predecessors++;
1037
431k
            }
1038
6.50M
        }
1039
621k
    }
1040
84.0k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
770k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
686k
       if (b->b_predecessors == 0) {
1045
64.8k
            b->b_iused = 0;
1046
64.8k
            b->b_except_handler = 0;
1047
64.8k
       }
1048
686k
    }
1049
84.0k
    return SUCCESS;
1050
84.0k
}
1051
1052
static int
1053
2.43M
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
2.43M
    int dest = 0;
1056
2.43M
    int prev_lineno = -1;
1057
20.6M
    for (int src = 0; src < bb->b_iused; src++) {
1058
18.2M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
18.2M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
1.11M
            if (lineno < 0) {
1062
912k
                continue;
1063
912k
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
205k
            if (prev_lineno == lineno) {
1066
135k
                continue;
1067
135k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
69.8k
            if (src < bb->b_iused - 1) {
1070
62.0k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
62.0k
                if (next_lineno == lineno) {
1072
46.9k
                    continue;
1073
46.9k
                }
1074
15.1k
                if (next_lineno < 0) {
1075
891
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
891
                    continue;
1077
891
                }
1078
15.1k
            }
1079
7.72k
            else {
1080
7.72k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
7.72k
                if (next) {
1083
7.72k
                    location next_loc = NO_LOCATION;
1084
10.8k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
10.8k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
10.8k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
3.11k
                            continue;
1089
3.11k
                        }
1090
7.70k
                        next_loc = instr->i_loc;
1091
7.70k
                        break;
1092
10.8k
                    }
1093
7.72k
                    if (lineno == next_loc.lineno) {
1094
2.17k
                        continue;
1095
2.17k
                    }
1096
7.72k
                }
1097
7.72k
            }
1098
1099
69.8k
        }
1100
17.1M
        if (dest != src) {
1101
1.46M
            bb->b_instr[dest] = bb->b_instr[src];
1102
1.46M
        }
1103
17.1M
        dest++;
1104
17.1M
        prev_lineno = lineno;
1105
17.1M
    }
1106
2.43M
    assert(dest <= bb->b_iused);
1107
2.43M
    int num_removed = bb->b_iused - dest;
1108
2.43M
    bb->b_iused = dest;
1109
2.43M
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
2.43M
    return num_removed;
1111
2.43M
}
1112
1113
static int
1114
179k
remove_redundant_nops(cfg_builder *g) {
1115
179k
    int changes = 0;
1116
2.25M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
2.07M
        int change = basicblock_remove_redundant_nops(b);
1118
2.07M
        RETURN_IF_ERROR(change);
1119
2.07M
        changes += change;
1120
2.07M
    }
1121
179k
    return changes;
1122
179k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
42.0k
{
1127
42.0k
    bool done = false;
1128
1129
85.4k
    while (! done) {
1130
43.4k
        done = true;
1131
43.4k
        cfg_instr *prev_instr = NULL;
1132
43.4k
        cfg_instr *instr = NULL;
1133
404k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
360k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
360k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
164k
                instr = NULL;
1138
164k
            }
1139
3.32M
            for (int i = 0; i < b->b_iused; i++) {
1140
2.96M
                prev_instr = instr;
1141
2.96M
                instr = &b->b_instr[i];
1142
2.96M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
2.96M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
2.96M
                int opcode = instr->i_opcode;
1145
2.96M
                bool is_redundant_pair = false;
1146
2.96M
                if (opcode == POP_TOP) {
1147
213k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
5.45k
                       is_redundant_pair = true;
1149
5.45k
                   }
1150
208k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
554
                       is_redundant_pair = true;
1152
554
                   }
1153
213k
                }
1154
2.96M
                if (is_redundant_pair) {
1155
6.00k
                    INSTR_SET_OP0(prev_instr, NOP);
1156
6.00k
                    INSTR_SET_OP0(instr, NOP);
1157
6.00k
                    done = false;
1158
6.00k
                }
1159
2.96M
            }
1160
360k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
266k
                instr = NULL;
1162
266k
            }
1163
360k
        }
1164
43.4k
    }
1165
42.0k
    return SUCCESS;
1166
42.0k
}
1167
1168
static int
1169
95.7k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
95.7k
    int changes = 0;
1179
1.47M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
1.38M
        cfg_instr *last = basicblock_last_instr(b);
1181
1.38M
        if (last == NULL) {
1182
142k
            continue;
1183
142k
        }
1184
1.38M
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
1.24M
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
271k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
271k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
271k
            basicblock *next = next_nonempty_block(b->b_next);
1192
271k
            if (jump_target == next) {
1193
10.8k
                changes++;
1194
10.8k
                INSTR_SET_OP0(last, NOP);
1195
10.8k
            }
1196
271k
        }
1197
1.24M
    }
1198
1199
95.7k
    return changes;
1200
95.7k
}
1201
1202
static inline bool
1203
303k
basicblock_has_no_lineno(basicblock *b) {
1204
396k
    for (int i = 0; i < b->b_iused; i++) {
1205
364k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
271k
            return false;
1207
271k
        }
1208
364k
    }
1209
31.7k
    return true;
1210
303k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
7.94k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
574k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
574k
    cfg_instr *last = basicblock_last_instr(bb);
1223
574k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
574k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
460k
        return 0;
1228
460k
    }
1229
113k
    basicblock *target = last->i_target;
1230
113k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
7.94k
                             target->b_iused <= MAX_COPY_SIZE);
1232
113k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
16.6k
                                     !BB_HAS_FALLTHROUGH(target));
1234
113k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
10.6k
        assert(is_jump(last));
1236
10.6k
        int removed_jump_opcode = last->i_opcode;
1237
10.6k
        INSTR_SET_OP0(last, NOP);
1238
10.6k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
10.6k
        if (no_lineno_no_fallthrough) {
1240
9.49k
            last = basicblock_last_instr(bb);
1241
9.49k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
5.01k
                removed_jump_opcode == JUMP)
1243
199
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
199
                last->i_opcode = JUMP;
1246
199
            }
1247
9.49k
        }
1248
10.6k
        target->b_predecessors--;
1249
10.6k
        return 1;
1250
10.6k
    }
1251
102k
    return 0;
1252
113k
}
1253
1254
static int
1255
42.0k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
42.0k
    bool changes;
1257
44.2k
    do {
1258
44.2k
        changes = false;
1259
618k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
574k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
574k
            RETURN_IF_ERROR(res);
1262
574k
            if (res) {
1263
10.6k
                changes = true;
1264
10.6k
            }
1265
574k
        }
1266
44.2k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
42.0k
    return changes;
1268
42.0k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
7.35k
{
1276
7.35k
    assert(is_jump(inst));
1277
7.35k
    assert(is_jump(target));
1278
7.35k
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
7.35k
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
7.35k
        INSTR_SET_OP0(inst, NOP);
1286
1287
7.35k
        RETURN_IF_ERROR(
1288
7.35k
            basicblock_add_jump(
1289
7.35k
                bb, opcode, target->i_target, target->i_loc));
1290
1291
7.35k
        return true;
1292
7.35k
    }
1293
0
    return false;
1294
7.35k
}
1295
1296
static int
1297
loads_const(int opcode)
1298
4.34M
{
1299
4.34M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
4.34M
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
1.93M
{
1306
1.93M
    PyObject *constant = NULL;
1307
1.93M
    assert(loads_const(opcode));
1308
1.93M
    if (opcode == LOAD_CONST) {
1309
1.62M
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
1.62M
    }
1311
1.93M
    if (opcode == LOAD_SMALL_INT) {
1312
306k
        return PyLong_FromLong(oparg);
1313
306k
    }
1314
1315
1.62M
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
1.62M
    return Py_NewRef(constant);
1321
1.62M
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
469k
{
1327
469k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
469k
    Py_ssize_t index;
1333
36.3M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
72.3M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
361k
            break;
1336
361k
        }
1337
36.1M
    }
1338
469k
    if (index == PyList_GET_SIZE(consts)) {
1339
108k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
108k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
108k
    }
1349
469k
    Py_DECREF(newconst);
1350
469k
    return (int)index;
1351
469k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
879k
{
1363
879k
    assert(start < bb->b_iused);
1364
879k
    assert(size >= 0);
1365
879k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
2.78M
    for (; start >= 0 && size > 0; start--) {
1368
2.21M
        cfg_instr *instr = &bb->b_instr[start];
1369
2.21M
        if (instr->i_opcode == NOP) {
1370
819k
            continue;
1371
819k
        }
1372
1.39M
        if (!loads_const(instr->i_opcode)) {
1373
311k
            return false;
1374
311k
        }
1375
1.08M
        instrs[--size] = instr;
1376
1.08M
    }
1377
1378
567k
    return size == 0;
1379
879k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
526k
{
1388
1.43M
    for (int i = 0; i < size; i++) {
1389
910k
        cfg_instr *instr = instrs[i];
1390
910k
        assert(instr->i_opcode != NOP);
1391
910k
        INSTR_SET_OP0(instr, NOP);
1392
910k
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
910k
    }
1394
526k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
1.43M
{
1405
1.43M
    if (PyLong_CheckExact(newconst)) {
1406
827k
        int overflow;
1407
827k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
827k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
827k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
412k
            assert(_Py_IsImmortal(newconst));
1413
412k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
412k
            return 1;
1415
412k
        }
1416
827k
    }
1417
1.02M
    return 0;
1418
1.43M
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
514k
{
1426
514k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
514k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
514k
    if (res > 0) {
1432
53.5k
        return SUCCESS;
1433
53.5k
    }
1434
461k
    int oparg = add_const(newconst, consts, const_cache);
1435
461k
    RETURN_IF_ERROR(oparg);
1436
461k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
461k
    return SUCCESS;
1438
461k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
74.2k
{
1449
    /* Pre-conditions */
1450
74.2k
    assert(PyDict_CheckExact(const_cache));
1451
74.2k
    assert(PyList_CheckExact(consts));
1452
1453
74.2k
    cfg_instr *instr = &bb->b_instr[i];
1454
74.2k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
74.2k
    int seq_size = instr->i_oparg;
1457
74.2k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
27
        return SUCCESS;
1459
27
    }
1460
1461
74.2k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
74.2k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
30.9k
        return SUCCESS;
1465
30.9k
    }
1466
1467
43.3k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
43.3k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
106k
    for (int i = 0; i < seq_size; i++) {
1473
63.6k
        cfg_instr *inst = const_instrs[i];
1474
63.6k
        assert(loads_const(inst->i_opcode));
1475
63.6k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
63.6k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
63.6k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
63.6k
    }
1482
1483
43.3k
    nop_out(const_instrs, seq_size);
1484
43.3k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
43.3k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
4.90k
{
1504
4.90k
    assert(PyDict_CheckExact(const_cache));
1505
4.90k
    assert(PyList_CheckExact(consts));
1506
4.90k
    assert(i >= 0);
1507
4.90k
    assert(i < bb->b_iused);
1508
1509
4.90k
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
4.90k
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
4.90k
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
4.90k
    int consts_found = 0;
1514
4.90k
    bool expect_append = true;
1515
1516
22.7k
    for (int pos = i - 1; pos >= 0; pos--) {
1517
22.6k
        cfg_instr *instr = &bb->b_instr[pos];
1518
22.6k
        int opcode = instr->i_opcode;
1519
22.6k
        int oparg = instr->i_oparg;
1520
1521
22.6k
        if (opcode == NOP) {
1522
3.71k
            continue;
1523
3.71k
        }
1524
1525
18.8k
        if (opcode == BUILD_LIST && oparg == 0) {
1526
48
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
5
                return SUCCESS;
1529
5
            }
1530
1531
            /* Sequence start, we are done. */
1532
43
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
43
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
9.25k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
9.21k
                instr = &bb->b_instr[newpos];
1539
9.21k
                if (instr->i_opcode == NOP) {
1540
1.75k
                    continue;
1541
1.75k
                }
1542
7.45k
                if (loads_const(instr->i_opcode)) {
1543
3.70k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
3.70k
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
3.70k
                    assert(consts_found > 0);
1549
3.70k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
3.70k
                }
1551
7.45k
                nop_out(&instr, 1);
1552
7.45k
            }
1553
43
            assert(consts_found == 0);
1554
43
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
43
        }
1556
1557
18.8k
        if (expect_append) {
1558
10.7k
            if (opcode != LIST_APPEND || oparg != 1) {
1559
2.61k
                return SUCCESS;
1560
2.61k
            }
1561
10.7k
        }
1562
8.05k
        else {
1563
8.05k
            if (!loads_const(opcode)) {
1564
2.13k
                return SUCCESS;
1565
2.13k
            }
1566
5.92k
            consts_found++;
1567
5.92k
        }
1568
1569
14.0k
        expect_append = !expect_append;
1570
14.0k
    }
1571
1572
    /* Did not find sequence start. */
1573
105
    return SUCCESS;
1574
4.90k
}
1575
1576
39.9k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
19.9k
{
1592
19.9k
    assert(PyDict_CheckExact(const_cache));
1593
19.9k
    assert(PyList_CheckExact(consts));
1594
1595
19.9k
    cfg_instr *instr = &bb->b_instr[i];
1596
19.9k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
19.9k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
19.9k
    int seq_size = instr->i_oparg;
1600
19.9k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
19.9k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
14.4k
    {
1603
14.4k
        return SUCCESS;
1604
14.4k
    }
1605
1606
5.46k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
5.46k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
735
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
24
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
24
        }
1612
735
        return SUCCESS;
1613
735
    }
1614
1615
4.73k
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
4.73k
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
36.0k
    for (int i = 0; i < seq_size; i++) {
1621
31.3k
        cfg_instr *inst = const_instrs[i];
1622
31.3k
        assert(loads_const(inst->i_opcode));
1623
31.3k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
31.3k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
31.3k
        PyTuple_SET_ITEM(const_result, i, element);
1629
31.3k
    }
1630
1631
4.73k
    if (instr->i_opcode == BUILD_SET) {
1632
4.51k
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
4.51k
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
4.51k
        Py_SETREF(const_result, frozenset);
1638
4.51k
    }
1639
1640
4.73k
    int index = add_const(const_result, consts, const_cache);
1641
4.73k
    RETURN_IF_ERROR(index);
1642
4.73k
    nop_out(const_instrs, seq_size);
1643
1644
4.73k
    if (contains_or_iter) {
1645
458
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
458
    }
1647
4.27k
    else {
1648
4.27k
        assert(i >= 2);
1649
4.27k
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
4.27k
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
4.27k
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
4.27k
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
4.27k
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
4.27k
    }
1657
4.73k
    return SUCCESS;
1658
4.73k
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
173k
{
1667
173k
    if (PyTuple_Check(obj)) {
1668
114k
        Py_ssize_t i;
1669
114k
        limit -= PyTuple_GET_SIZE(obj);
1670
263k
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
148k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
148k
            if (limit < 0) {
1673
140
                return limit;
1674
140
            }
1675
148k
        }
1676
114k
    }
1677
173k
    return limit;
1678
173k
}
1679
1680
50.0k
#define MAX_INT_SIZE           128  /* bits */
1681
27.8k
#define MAX_COLLECTION_SIZE    256  /* items */
1682
9.61k
#define MAX_STR_SIZE          4096  /* characters */
1683
25.3k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
128k
{
1688
128k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
18.9k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
128k
    ) {
1691
14.9k
        int64_t vbits = _PyLong_NumBits(v);
1692
14.9k
        int64_t wbits = _PyLong_NumBits(w);
1693
14.9k
        assert(vbits >= 0);
1694
14.9k
        assert(wbits >= 0);
1695
14.9k
        if (vbits + wbits > MAX_INT_SIZE) {
1696
292
            return NULL;
1697
292
        }
1698
14.9k
    }
1699
113k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
28.4k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
28.4k
        if (size) {
1702
27.9k
            long n = PyLong_AsLong(v);
1703
27.9k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
151
                return NULL;
1705
151
            }
1706
27.7k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
27
                return NULL;
1708
27
            }
1709
27.7k
        }
1710
28.4k
    }
1711
85.4k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
19.3k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
19.3k
                                               PyBytes_GET_SIZE(w);
1714
19.3k
        if (size) {
1715
9.95k
            long n = PyLong_AsLong(v);
1716
9.95k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
1.13k
                return NULL;
1718
1.13k
            }
1719
9.95k
        }
1720
19.3k
    }
1721
66.1k
    else if (PyLong_Check(w) &&
1722
55.2k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
45.2k
    {
1724
45.2k
        return const_folding_safe_multiply(w, v);
1725
45.2k
    }
1726
1727
81.8k
    return PyNumber_Multiply(v, w);
1728
128k
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
58.2k
{
1733
58.2k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
35.2k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
58.2k
    ) {
1736
24.1k
        int64_t vbits = _PyLong_NumBits(v);
1737
24.1k
        size_t wbits = PyLong_AsSize_t(w);
1738
24.1k
        assert(vbits >= 0);
1739
24.1k
        if (wbits == (size_t)-1) {
1740
1.01k
            return NULL;
1741
1.01k
        }
1742
23.1k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
2.82k
            return NULL;
1744
2.82k
        }
1745
23.1k
    }
1746
1747
54.4k
    return PyNumber_Power(v, w, Py_None);
1748
58.2k
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
6.80k
{
1753
6.80k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
6.09k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
6.80k
    ) {
1756
4.93k
        int64_t vbits = _PyLong_NumBits(v);
1757
4.93k
        size_t wbits = PyLong_AsSize_t(w);
1758
4.93k
        assert(vbits >= 0);
1759
4.93k
        if (wbits == (size_t)-1) {
1760
567
            return NULL;
1761
567
        }
1762
4.36k
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
1.23k
            return NULL;
1764
1.23k
        }
1765
4.36k
    }
1766
1767
5.00k
    return PyNumber_Lshift(v, w);
1768
6.80k
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
27.2k
{
1773
27.2k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
1.50k
        return NULL;
1775
1.50k
    }
1776
1777
25.7k
    return PyNumber_Remainder(v, w);
1778
27.2k
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
382k
{
1783
382k
    assert(left != NULL && right != NULL);
1784
382k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
382k
    PyObject *result = NULL;
1787
382k
    switch (op) {
1788
40.9k
        case NB_ADD:
1789
40.9k
            result = PyNumber_Add(left, right);
1790
40.9k
            break;
1791
36.8k
        case NB_SUBTRACT:
1792
36.8k
            result = PyNumber_Subtract(left, right);
1793
36.8k
            break;
1794
83.4k
        case NB_MULTIPLY:
1795
83.4k
            result = const_folding_safe_multiply(left, right);
1796
83.4k
            break;
1797
66.8k
        case NB_TRUE_DIVIDE:
1798
66.8k
            result = PyNumber_TrueDivide(left, right);
1799
66.8k
            break;
1800
19.0k
        case NB_FLOOR_DIVIDE:
1801
19.0k
            result = PyNumber_FloorDivide(left, right);
1802
19.0k
            break;
1803
27.2k
        case NB_REMAINDER:
1804
27.2k
            result = const_folding_safe_mod(left, right);
1805
27.2k
            break;
1806
58.2k
        case NB_POWER:
1807
58.2k
            result = const_folding_safe_power(left, right);
1808
58.2k
            break;
1809
6.80k
        case NB_LSHIFT:
1810
6.80k
            result = const_folding_safe_lshift(left, right);
1811
6.80k
            break;
1812
10.9k
        case NB_RSHIFT:
1813
10.9k
            result = PyNumber_Rshift(left, right);
1814
10.9k
            break;
1815
4.23k
        case NB_OR:
1816
4.23k
            result = PyNumber_Or(left, right);
1817
4.23k
            break;
1818
6.25k
        case NB_XOR:
1819
6.25k
            result = PyNumber_Xor(left, right);
1820
6.25k
            break;
1821
6.35k
        case NB_AND:
1822
6.35k
            result = PyNumber_And(left, right);
1823
6.35k
            break;
1824
14.6k
        case NB_SUBSCR:
1825
14.6k
            result = PyObject_GetItem(left, right);
1826
14.6k
            break;
1827
795
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
795
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
382k
    }
1833
382k
    return result;
1834
382k
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
582k
{
1839
919k
    #define BINOP_OPERAND_COUNT 2
1840
582k
    assert(PyDict_CheckExact(const_cache));
1841
582k
    assert(PyList_CheckExact(consts));
1842
1843
582k
    cfg_instr *binop = &bb->b_instr[i];
1844
582k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
582k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
582k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
199k
        return SUCCESS;
1850
199k
    }
1851
1852
382k
    cfg_instr *lhs_instr = operands_instrs[0];
1853
382k
    assert(loads_const(lhs_instr->i_opcode));
1854
382k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
382k
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
382k
    cfg_instr *rhs_instr = operands_instrs[1];
1860
382k
    assert(loads_const(rhs_instr->i_opcode));
1861
382k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
382k
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
382k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
382k
    Py_DECREF(lhs);
1869
382k
    Py_DECREF(rhs);
1870
382k
    if (newconst == NULL) {
1871
45.7k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
45.7k
        PyErr_Clear();
1875
45.7k
        return SUCCESS;
1876
45.7k
    }
1877
1878
337k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
337k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
382k
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
136k
{
1885
136k
    assert(operand != NULL);
1886
136k
    assert(
1887
136k
        opcode == UNARY_NEGATIVE ||
1888
136k
        opcode == UNARY_INVERT ||
1889
136k
        opcode == UNARY_NOT ||
1890
136k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
136k
    );
1892
136k
    PyObject *result;
1893
136k
    switch (opcode) {
1894
81.4k
        case UNARY_NEGATIVE:
1895
81.4k
            result = PyNumber_Negative(operand);
1896
81.4k
            break;
1897
12.5k
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
12.5k
            if (PyBool_Check(operand)) {
1900
86
                return NULL;
1901
86
            }
1902
12.5k
            result = PyNumber_Invert(operand);
1903
12.5k
            break;
1904
336
        case UNARY_NOT: {
1905
336
            int r = PyObject_IsTrue(operand);
1906
336
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
336
            result = PyBool_FromLong(!r);
1910
336
            break;
1911
336
        }
1912
41.9k
        case CALL_INTRINSIC_1:
1913
41.9k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
41.9k
            result = PyNumber_Positive(operand);
1917
41.9k
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
136k
    }
1921
136k
    return result;
1922
136k
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
217k
{
1927
351k
    #define UNARYOP_OPERAND_COUNT 1
1928
217k
    assert(PyDict_CheckExact(const_cache));
1929
217k
    assert(PyList_CheckExact(consts));
1930
217k
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
217k
    cfg_instr *operand_instr;
1933
217k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
81.0k
        return SUCCESS;
1936
81.0k
    }
1937
1938
217k
    assert(loads_const(operand_instr->i_opcode));
1939
136k
    PyObject *operand = get_const_value(
1940
136k
        operand_instr->i_opcode,
1941
136k
        operand_instr->i_oparg,
1942
136k
        consts
1943
136k
    );
1944
136k
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
136k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
136k
    Py_DECREF(operand);
1950
136k
    if (newconst == NULL) {
1951
1.86k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
1.86k
        PyErr_Clear();
1955
1.86k
        return SUCCESS;
1956
1.86k
    }
1957
1958
134k
    if (unaryop->i_opcode == UNARY_NOT) {
1959
336
        assert(PyBool_Check(newconst));
1960
336
    }
1961
134k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
134k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
134k
}
1964
1965
131k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
62.5k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
62.5k
    assert(*ix < block->b_iused);
1975
62.5k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
62.5k
    assert(instructions[0].i_opcode == SWAP);
1979
62.5k
    int depth = instructions[0].i_oparg;
1980
62.5k
    int len = 0;
1981
62.5k
    int more = false;
1982
62.5k
    int limit = block->b_iused - *ix;
1983
76.9k
    while (++len < limit) {
1984
76.9k
        int opcode = instructions[len].i_opcode;
1985
76.9k
        if (opcode == SWAP) {
1986
13.1k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
13.1k
            more = true;
1988
13.1k
        }
1989
63.8k
        else if (opcode != NOP) {
1990
62.5k
            break;
1991
62.5k
        }
1992
76.9k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
62.5k
    if (!more) {
1995
52.7k
        return SUCCESS;
1996
52.7k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
9.81k
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
9.81k
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
40.8k
    for (int i = 0; i < depth; i++) {
2004
31.0k
        stack[i] = i;
2005
31.0k
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
34.0k
    for (int i = 0; i < len; i++) {
2009
24.2k
        if (instructions[i].i_opcode == SWAP) {
2010
22.9k
            int oparg = instructions[i].i_oparg;
2011
22.9k
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
22.9k
            stack[0] = stack[oparg - 1];
2014
22.9k
            stack[oparg - 1] = top;
2015
22.9k
        }
2016
24.2k
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
9.81k
    int current = len - 1;
2025
40.8k
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
31.0k
        if (stack[i] == VISITED || stack[i] == i) {
2029
21.6k
            continue;
2030
21.6k
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
9.37k
        int j = i;
2037
39.4k
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
39.4k
            if (j) {
2041
20.7k
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
20.7k
                instructions[current].i_opcode = SWAP;
2044
20.7k
                instructions[current--].i_oparg = j + 1;
2045
20.7k
            }
2046
39.4k
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
9.37k
                assert(j == i);
2049
9.37k
                break;
2050
9.37k
            }
2051
30.0k
            int next_j = stack[j];
2052
30.0k
            stack[j] = VISITED;
2053
30.0k
            j = next_j;
2054
30.0k
        }
2055
9.37k
    }
2056
    // NOP out any unused instructions:
2057
13.2k
    while (0 <= current) {
2058
3.46k
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
3.46k
    }
2060
9.81k
    PyMem_Free(stack);
2061
9.81k
    *ix += len - 1;
2062
9.81k
    return SUCCESS;
2063
9.81k
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
91.8k
    ((opcode) == STORE_FAST || \
2072
91.8k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
91.8k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
3.19k
    (((instr).i_opcode == STORE_FAST || \
2077
3.19k
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
3.19k
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
95.3k
{
2083
95.6k
    while (++i < block->b_iused) {
2084
90.8k
        cfg_instr *instruction = &block->b_instr[i];
2085
90.8k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
119
            return -1;
2089
119
        }
2090
90.7k
        if (instruction->i_opcode == NOP) {
2091
323
            continue;
2092
323
        }
2093
90.4k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
34.3k
            return i;
2095
34.3k
        }
2096
56.0k
        return -1;
2097
90.4k
    }
2098
4.77k
    return -1;
2099
95.3k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
62.5k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
65.8k
    for (; 0 <= i; i--) {
2109
65.6k
        assert(i < block->b_iused);
2110
65.6k
        cfg_instr *swap = &block->b_instr[i];
2111
65.6k
        if (swap->i_opcode != SWAP) {
2112
3.57k
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
2.38k
                continue;
2115
2.38k
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
1.19k
            return;
2118
3.57k
        }
2119
62.1k
        int j = next_swappable_instruction(block, i, -1);
2120
62.1k
        if (j < 0) {
2121
47.5k
            return;
2122
47.5k
        }
2123
14.5k
        int k = j;
2124
14.5k
        int lineno = block->b_instr[j].i_loc.lineno;
2125
34.3k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
33.1k
            k = next_swappable_instruction(block, k, lineno);
2127
33.1k
            if (k < 0) {
2128
13.3k
                return;
2129
13.3k
            }
2130
33.1k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
1.17k
        int store_j = STORES_TO(block->b_instr[j]);
2135
1.17k
        int store_k = STORES_TO(block->b_instr[k]);
2136
1.17k
        if (store_j >= 0 || store_k >= 0) {
2137
1.16k
            if (store_j == store_k) {
2138
243
                return;
2139
243
            }
2140
1.76k
            for (int idx = j + 1; idx < k; idx++) {
2141
843
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
843
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
843
            }
2146
924
        }
2147
2148
        // Success!
2149
935
        INSTR_SET_OP0(swap, NOP);
2150
935
        cfg_instr temp = block->b_instr[j];
2151
935
        block->b_instr[j] = block->b_instr[k];
2152
935
        block->b_instr[k] = temp;
2153
935
    }
2154
62.5k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
343k
{
2159
343k
    assert(PyDict_CheckExact(const_cache));
2160
343k
    assert(PyList_CheckExact(consts));
2161
343k
    int opcode = 0;
2162
343k
    int oparg = 0;
2163
4.13M
    for (int i = 0; i < bb->b_iused; i++) {
2164
3.78M
        cfg_instr *inst = &bb->b_instr[i];
2165
3.78M
        if (inst->i_opcode == LOAD_CONST) {
2166
918k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
918k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
918k
            Py_DECREF(constant);
2169
918k
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
918k
        }
2173
3.78M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
542k
                                      inst->i_opcode == COPY &&
2175
397
                                      inst->i_oparg == 1);
2176
3.78M
        if (! is_copy_of_load_const) {
2177
3.78M
            opcode = inst->i_opcode;
2178
3.78M
            oparg = inst->i_oparg;
2179
3.78M
        }
2180
3.78M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
3.78M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
2.86M
            continue;
2183
2.86M
        }
2184
918k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
918k
        switch(nextop) {
2186
1.41k
            case POP_JUMP_IF_FALSE:
2187
3.50k
            case POP_JUMP_IF_TRUE:
2188
5.00k
            case JUMP_IF_FALSE:
2189
5.63k
            case JUMP_IF_TRUE:
2190
5.63k
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
5.63k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
5.63k
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
5.63k
                int is_true = PyObject_IsTrue(cnt);
2197
5.63k
                Py_DECREF(cnt);
2198
5.63k
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
5.63k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
3.50k
                    INSTR_SET_OP0(inst, NOP);
2204
3.50k
                }
2205
5.63k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
5.63k
                if (is_true == jump_if_true) {
2207
2.94k
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
2.94k
                }
2209
2.69k
                else {
2210
2.69k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
2.69k
                }
2212
5.63k
                break;
2213
5.63k
            }
2214
5.63k
            case IS_OP:
2215
2.51k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
2.51k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
2.51k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
2.51k
                if (!Py_IsNone(cnt)) {
2231
1.30k
                    Py_DECREF(cnt);
2232
1.30k
                    break;
2233
1.30k
                }
2234
1.21k
                if (bb->b_iused <= i + 2) {
2235
0
                    break;
2236
0
                }
2237
1.21k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.21k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.21k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
0
                    INSTR_SET_OP0(jump_instr, NOP);
2242
0
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
0
                    jump_instr = &bb->b_instr[i + 3];
2246
0
                }
2247
1.21k
                bool invert = is_instr->i_oparg;
2248
1.21k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.21k
                    invert = !invert;
2250
1.21k
                }
2251
0
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
0
                    break;
2253
0
                }
2254
1.21k
                INSTR_SET_OP0(inst, NOP);
2255
1.21k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.21k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.21k
                                              : POP_JUMP_IF_NONE;
2258
1.21k
                break;
2259
1.21k
            }
2260
3.83k
            case TO_BOOL:
2261
3.83k
            {
2262
3.83k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
3.83k
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
3.83k
                int is_true = PyObject_IsTrue(cnt);
2267
3.83k
                Py_DECREF(cnt);
2268
3.83k
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
3.83k
                cnt = PyBool_FromLong(is_true);
2272
3.83k
                int index = add_const(cnt, consts, const_cache);
2273
3.83k
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
3.83k
                INSTR_SET_OP0(inst, NOP);
2277
3.83k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
3.83k
                break;
2279
3.83k
            }
2280
918k
        }
2281
918k
    }
2282
343k
    return SUCCESS;
2283
343k
}
2284
2285
static int
2286
42.0k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
385k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
343k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
343k
    }
2290
42.0k
    return SUCCESS;
2291
42.0k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
343k
{
2296
343k
    assert(PyDict_CheckExact(const_cache));
2297
343k
    assert(PyList_CheckExact(consts));
2298
343k
    cfg_instr nop;
2299
343k
    INSTR_SET_OP0(&nop, NOP);
2300
4.14M
    for (int i = 0; i < bb->b_iused; i++) {
2301
3.80M
        cfg_instr *inst = &bb->b_instr[i];
2302
3.80M
        cfg_instr *target;
2303
3.80M
        int opcode = inst->i_opcode;
2304
3.80M
        int oparg = inst->i_oparg;
2305
3.80M
        if (HAS_TARGET(opcode)) {
2306
224k
            assert(inst->i_target->b_iused > 0);
2307
224k
            target = &inst->i_target->b_instr[0];
2308
224k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
224k
        }
2310
3.57M
        else {
2311
3.57M
            target = &nop;
2312
3.57M
        }
2313
3.80M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
3.80M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
3.80M
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
75.5k
            case BUILD_TUPLE:
2321
75.5k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
1.30k
                    switch(oparg) {
2323
1
                        case 1:
2324
1
                            INSTR_SET_OP0(inst, NOP);
2325
1
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
1
                            continue;
2327
457
                        case 2:
2328
1.26k
                        case 3:
2329
1.26k
                            INSTR_SET_OP0(inst, NOP);
2330
1.26k
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
1.26k
                            continue;
2332
1.30k
                    }
2333
1.30k
                }
2334
74.2k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
74.2k
                break;
2336
10.7k
            case BUILD_LIST:
2337
19.9k
            case BUILD_SET:
2338
19.9k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
19.9k
                break;
2340
1.81k
            case POP_JUMP_IF_NOT_NONE:
2341
3.65k
            case POP_JUMP_IF_NONE:
2342
3.65k
                switch (target->i_opcode) {
2343
0
                    case JUMP:
2344
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
3.65k
                }
2346
3.65k
                break;
2347
82.3k
            case POP_JUMP_IF_FALSE:
2348
82.3k
                switch (target->i_opcode) {
2349
1.38k
                    case JUMP:
2350
1.38k
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
82.3k
                }
2352
82.3k
                break;
2353
82.3k
            case POP_JUMP_IF_TRUE:
2354
8.81k
                switch (target->i_opcode) {
2355
10
                    case JUMP:
2356
10
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
8.81k
                }
2358
8.81k
                break;
2359
8.81k
            case JUMP_IF_FALSE:
2360
1.54k
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
8
                    case JUMP_IF_FALSE:
2363
8
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
8
                        continue;
2365
168
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
168
                        assert(inst->i_target != inst->i_target->b_next);
2369
168
                        inst->i_target = inst->i_target->b_next;
2370
168
                        i--;
2371
168
                        continue;
2372
1.54k
                }
2373
1.36k
                break;
2374
6.73k
            case JUMP_IF_TRUE:
2375
6.73k
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
5.66k
                    case JUMP_IF_TRUE:
2378
5.66k
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
5.66k
                        continue;
2380
0
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
0
                        assert(inst->i_target != inst->i_target->b_next);
2384
0
                        inst->i_target = inst->i_target->b_next;
2385
0
                        i--;
2386
0
                        continue;
2387
6.73k
                }
2388
1.07k
                break;
2389
17.9k
            case JUMP:
2390
56.2k
            case JUMP_NO_INTERRUPT:
2391
56.2k
                switch (target->i_opcode) {
2392
264
                    case JUMP:
2393
264
                        i -= jump_thread(bb, inst, target, JUMP);
2394
264
                        continue;
2395
31
                    case JUMP_NO_INTERRUPT:
2396
31
                        i -= jump_thread(bb, inst, target, opcode);
2397
31
                        continue;
2398
56.2k
                }
2399
55.9k
                break;
2400
55.9k
            case FOR_ITER:
2401
3.70k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
3.70k
                break;
2412
79.3k
            case STORE_FAST:
2413
79.3k
                if (opcode == nextop &&
2414
30.1k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
2.60k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
1.99k
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
1.99k
                    bb->b_instr[i].i_oparg = 0;
2418
1.99k
                }
2419
79.3k
                break;
2420
75.6k
            case SWAP:
2421
75.6k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
75.6k
                break;
2425
90.9k
            case LOAD_GLOBAL:
2426
90.9k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
1.42k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
1.42k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
1.42k
                }
2430
90.9k
                break;
2431
104k
            case COMPARE_OP:
2432
104k
                if (nextop == TO_BOOL) {
2433
31.4k
                    INSTR_SET_OP0(inst, NOP);
2434
31.4k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
31.4k
                    continue;
2436
31.4k
                }
2437
73.1k
                break;
2438
73.1k
            case CONTAINS_OP:
2439
17.9k
            case IS_OP:
2440
17.9k
                if (nextop == TO_BOOL) {
2441
1.67k
                    INSTR_SET_OP0(inst, NOP);
2442
1.67k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
1.67k
                    continue;
2444
1.67k
                }
2445
16.2k
                if (nextop == UNARY_NOT) {
2446
783
                    INSTR_SET_OP0(inst, NOP);
2447
783
                    int inverted = oparg ^ 1;
2448
783
                    assert(inverted == 0 || inverted == 1);
2449
783
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
783
                    continue;
2451
783
                }
2452
15.4k
                break;
2453
39.6k
            case TO_BOOL:
2454
39.6k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
39.6k
                break;
2459
39.6k
            case UNARY_NOT:
2460
1.62k
                if (nextop == TO_BOOL) {
2461
227
                    INSTR_SET_OP0(inst, NOP);
2462
227
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
227
                    continue;
2464
227
                }
2465
1.39k
                if (nextop == UNARY_NOT) {
2466
227
                    INSTR_SET_OP0(inst, NOP);
2467
227
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
227
                    continue;
2469
227
                }
2470
1.17k
                _Py_FALLTHROUGH;
2471
16.0k
            case UNARY_INVERT:
2472
124k
            case UNARY_NEGATIVE:
2473
124k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
124k
                break;
2475
122k
            case CALL_INTRINSIC_1:
2476
122k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
4.92k
                    if (nextop == GET_ITER) {
2478
20
                        INSTR_SET_OP0(inst, NOP);
2479
20
                    }
2480
4.90k
                    else {
2481
4.90k
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
4.90k
                    }
2483
4.92k
                }
2484
118k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
92.4k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
92.4k
                }
2487
122k
                break;
2488
582k
            case BINARY_OP:
2489
582k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
582k
                break;
2491
3.80M
        }
2492
3.80M
    }
2493
2494
4.12M
    for (int i = 0; i < bb->b_iused; i++) {
2495
3.77M
        cfg_instr *inst = &bb->b_instr[i];
2496
3.77M
        if (inst->i_opcode == SWAP) {
2497
62.5k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
62.5k
            apply_static_swaps(bb, i);
2501
62.5k
        }
2502
3.77M
    }
2503
343k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
343k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
88.7k
{
2513
88.7k
    int removed_nops, removed_jumps;
2514
95.7k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
95.7k
        removed_nops = remove_redundant_nops(g);
2519
95.7k
        RETURN_IF_ERROR(removed_nops);
2520
95.7k
        removed_jumps = remove_redundant_jumps(g);
2521
95.7k
        RETURN_IF_ERROR(removed_jumps);
2522
95.7k
    } while(removed_nops + removed_jumps > 0);
2523
88.7k
    return SUCCESS;
2524
88.7k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
42.0k
{
2536
42.0k
    assert(PyDict_CheckExact(const_cache));
2537
42.0k
    RETURN_IF_ERROR(check_cfg(g));
2538
42.0k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
42.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
42.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
42.0k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
385k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
343k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
343k
    }
2545
42.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
42.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
42.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
42.0k
    assert(no_redundant_jumps(g));
2549
42.0k
    return SUCCESS;
2550
42.0k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
29.0k
{
2555
29.0k
    int32_t line1 = inst1->i_loc.lineno;
2556
29.0k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
29.0k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
821
        return;
2560
821
    }
2561
28.2k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
15.8k
        return;
2563
15.8k
    }
2564
12.3k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
12.3k
    INSTR_SET_OP0(inst2, NOP);
2566
12.3k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
42.0k
{
2571
385k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
3.06M
        for (int i = 0; i < b->b_iused; i++) {
2574
2.71M
            cfg_instr *inst = &b->b_instr[i];
2575
2.71M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
2.71M
            switch(inst->i_opcode) {
2577
13.0k
                case LOAD_FAST:
2578
13.0k
                    if (nextop == LOAD_FAST) {
2579
1.71k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
1.71k
                    }
2581
13.0k
                    break;
2582
68.9k
                case STORE_FAST:
2583
68.9k
                    switch (nextop) {
2584
4.43k
                        case LOAD_FAST:
2585
4.43k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
4.43k
                            break;
2587
22.8k
                        case STORE_FAST:
2588
22.8k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
22.8k
                            break;
2590
68.9k
                    }
2591
68.9k
                    break;
2592
2.71M
            }
2593
2.71M
        }
2594
343k
    }
2595
42.0k
    int res = remove_redundant_nops(g);
2596
42.0k
    assert(no_redundant_nops(g));
2597
42.0k
    return res;
2598
42.0k
}
2599
2600
#define NOT_LOCAL -1
2601
86.4k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
2.66M
{
2620
2.66M
    if (stack->size == stack->capacity) {
2621
42.2k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
42.2k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
42.2k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
42.2k
        stack->refs = refs;
2628
42.2k
        stack->capacity = new_cap;
2629
42.2k
    }
2630
2.66M
    stack->refs[stack->size] = r;
2631
2.66M
    stack->size++;
2632
2.66M
    return 0;
2633
2.66M
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
1.62M
{
2638
1.62M
    assert(stack->size > 0);
2639
1.62M
    stack->size--;
2640
1.62M
    ref r = stack->refs[stack->size];
2641
1.62M
    return r;
2642
1.62M
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
53.0k
{
2647
53.0k
    Py_ssize_t idx = stack->size - off;
2648
53.0k
    assert(idx >= 0 && idx < stack->size);
2649
53.0k
    ref tmp = stack->refs[idx];
2650
53.0k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
53.0k
    stack->refs[stack->size - 1] = tmp;
2652
53.0k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
7.98M
{
2657
7.98M
    assert(idx >= 0 && idx < stack->size);
2658
7.98M
    return stack->refs[idx];
2659
7.98M
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
204k
{
2664
204k
    stack->size = 0;
2665
204k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
42.0k
{
2670
42.0k
    if (stack->refs != NULL) {
2671
41.9k
        PyMem_Free(stack->refs);
2672
41.9k
    }
2673
42.0k
    stack->refs = NULL;
2674
42.0k
    stack->capacity = 0;
2675
42.0k
    stack->size = 0;
2676
42.0k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
100k
{
2690
6.91M
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
6.81M
        ref r = ref_stack_at(refs, i);
2692
6.81M
        if (r.local == local) {
2693
863
            assert(r.instr >= 0);
2694
863
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
863
        }
2696
6.81M
    }
2697
100k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
86.4k
{
2702
86.4k
    kill_local(instr_flags, refs, local);
2703
86.4k
    if (r.instr != DUMMY_INSTR) {
2704
70.4k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
70.4k
    }
2706
86.4k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
238k
{
2712
238k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
238k
    if (!target->b_visited) {
2714
162k
        target->b_visited = 1;
2715
162k
        *(*sp)++ = target;
2716
162k
    }
2717
238k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
42.0k
{
2759
42.0k
    int status;
2760
42.0k
    ref_stack refs = {0};
2761
42.0k
    int max_instrs = 0;
2762
42.0k
    basicblock *entryblock = g->g_entryblock;
2763
394k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
352k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
352k
    }
2766
42.0k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
42.0k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
42.0k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
42.0k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
42.0k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
42.0k
    basicblock **sp = blocks;
2778
42.0k
    *sp = entryblock;
2779
42.0k
    sp++;
2780
42.0k
    entryblock->b_startdepth = 0;
2781
42.0k
    entryblock->b_visited = 1;
2782
2783
42.0k
    #define PUSH_REF(instr, local)                \
2784
2.66M
        do {                                      \
2785
2.66M
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
2.66M
        } while(0)
2790
2791
246k
    while (sp != blocks) {
2792
204k
        basicblock *block = *--sp;
2793
204k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
204k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
204k
        ref_stack_clear(&refs);
2802
1.21M
        for (int i = 0; i < block->b_startdepth; i++) {
2803
1.01M
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
1.01M
        }
2805
2806
2.48M
        for (int i = 0; i < block->b_iused; i++) {
2807
2.27M
            cfg_instr *instr = &block->b_instr[i];
2808
2.27M
            int opcode = instr->i_opcode;
2809
2.27M
            int oparg = instr->i_oparg;
2810
2.27M
            assert(opcode != EXTENDED_ARG);
2811
2.27M
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
3.51k
                case DELETE_FAST: {
2814
3.51k
                    kill_local(instr_flags, &refs, oparg);
2815
3.51k
                    break;
2816
0
                }
2817
2818
23.7k
                case LOAD_FAST: {
2819
23.7k
                    PUSH_REF(i, oparg);
2820
23.7k
                    break;
2821
23.7k
                }
2822
2823
23.7k
                case LOAD_FAST_AND_CLEAR: {
2824
10.1k
                    kill_local(instr_flags, &refs, oparg);
2825
10.1k
                    PUSH_REF(i, oparg);
2826
10.1k
                    break;
2827
10.1k
                }
2828
2829
10.1k
                case LOAD_FAST_LOAD_FAST: {
2830
427
                    PUSH_REF(i, oparg >> 4);
2831
427
                    PUSH_REF(i, oparg & 15);
2832
427
                    break;
2833
427
                }
2834
2835
66.2k
                case STORE_FAST: {
2836
66.2k
                    ref r = ref_stack_pop(&refs);
2837
66.2k
                    store_local(instr_flags, &refs, oparg, r);
2838
66.2k
                    break;
2839
427
                }
2840
2841
3.52k
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
3.52k
                    ref r = ref_stack_pop(&refs);
2844
3.52k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
3.52k
                    PUSH_REF(i, oparg & 15);
2847
3.52k
                    break;
2848
3.52k
                }
2849
2850
8.32k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
8.32k
                    ref r = ref_stack_pop(&refs);
2853
8.32k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
8.32k
                    r = ref_stack_pop(&refs);
2856
8.32k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
8.32k
                    break;
2858
3.52k
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
130k
                case COPY: {
2862
130k
                    assert(oparg > 0);
2863
130k
                    Py_ssize_t idx = refs.size - oparg;
2864
130k
                    ref r = ref_stack_at(&refs, idx);
2865
130k
                    PUSH_REF(r.instr, r.local);
2866
130k
                    break;
2867
130k
                }
2868
2869
130k
                case SWAP: {
2870
53.0k
                    assert(oparg >= 2);
2871
53.0k
                    ref_stack_swap_top(&refs, oparg);
2872
53.0k
                    break;
2873
53.0k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
3.53k
                case FORMAT_SIMPLE:
2881
4.01k
                case GET_ANEXT:
2882
7.67k
                case GET_ITER:
2883
33.5k
                case GET_LEN:
2884
33.5k
                case GET_YIELD_FROM_ITER:
2885
39.7k
                case IMPORT_FROM:
2886
39.7k
                case MATCH_KEYS:
2887
39.7k
                case MATCH_MAPPING:
2888
41.1k
                case MATCH_SEQUENCE:
2889
41.1k
                case WITH_EXCEPT_START: {
2890
41.1k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
41.1k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
41.1k
                    int net_pushed = num_pushed - num_popped;
2893
41.1k
                    assert(net_pushed >= 0);
2894
78.6k
                    for (int j = 0; j < net_pushed; j++) {
2895
37.5k
                        PUSH_REF(i, NOT_LOCAL);
2896
37.5k
                    }
2897
41.1k
                    break;
2898
41.1k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
41.1k
                case DICT_MERGE:
2902
2.58k
                case DICT_UPDATE:
2903
43.6k
                case LIST_APPEND:
2904
48.0k
                case LIST_EXTEND:
2905
49.0k
                case MAP_ADD:
2906
49.0k
                case RERAISE:
2907
53.6k
                case SET_ADD:
2908
57.9k
                case SET_UPDATE: {
2909
57.9k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
57.9k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
57.9k
                    int net_popped = num_popped - num_pushed;
2912
57.9k
                    assert(net_popped > 0);
2913
116k
                    for (int i = 0; i < net_popped; i++) {
2914
59.0k
                        ref_stack_pop(&refs);
2915
59.0k
                    }
2916
57.9k
                    break;
2917
57.9k
                }
2918
2919
7.66k
                case END_SEND:
2920
21.8k
                case SET_FUNCTION_ATTRIBUTE: {
2921
21.8k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
21.8k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
21.8k
                    ref tos = ref_stack_pop(&refs);
2924
21.8k
                    ref_stack_pop(&refs);
2925
21.8k
                    PUSH_REF(tos.instr, tos.local);
2926
21.8k
                    break;
2927
21.8k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
21.8k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
3.66k
                case FOR_ITER: {
2937
3.66k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
3.66k
                    PUSH_REF(i, NOT_LOCAL);
2939
3.66k
                    break;
2940
3.66k
                }
2941
2942
11.8k
                case LOAD_ATTR:
2943
12.3k
                case LOAD_SUPER_ATTR: {
2944
12.3k
                    ref self = ref_stack_pop(&refs);
2945
12.3k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
444
                        ref_stack_pop(&refs);
2947
444
                        ref_stack_pop(&refs);
2948
444
                    }
2949
12.3k
                    PUSH_REF(i, NOT_LOCAL);
2950
12.3k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
902
                        PUSH_REF(self.instr, self.local);
2954
902
                    }
2955
12.3k
                    break;
2956
12.3k
                }
2957
2958
13.7k
                case LOAD_SPECIAL:
2959
13.7k
                case PUSH_EXC_INFO: {
2960
13.7k
                    ref tos = ref_stack_pop(&refs);
2961
13.7k
                    PUSH_REF(i, NOT_LOCAL);
2962
13.7k
                    PUSH_REF(tos.instr, tos.local);
2963
13.7k
                    break;
2964
13.7k
                }
2965
2966
13.7k
                case SEND: {
2967
7.66k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
7.66k
                    ref_stack_pop(&refs);
2969
7.66k
                    PUSH_REF(i, NOT_LOCAL);
2970
7.66k
                    break;
2971
7.66k
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
1.81M
                default: {
2975
1.81M
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
1.81M
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
1.81M
                    if (HAS_TARGET(instr->i_opcode)) {
2978
93.9k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
93.9k
                    }
2980
1.81M
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
3.22M
                        for (int j = 0; j < num_popped; j++) {
2984
1.40M
                            ref_stack_pop(&refs);
2985
1.40M
                        }
2986
3.19M
                        for (int j = 0; j < num_pushed; j++) {
2987
1.37M
                            PUSH_REF(i, NOT_LOCAL);
2988
1.37M
                        }
2989
1.81M
                    }
2990
1.81M
                    break;
2991
1.81M
                }
2992
2.27M
            }
2993
2.27M
        }
2994
2995
        // Push fallthrough block
2996
204k
        if (BB_HAS_FALLTHROUGH(block)) {
2997
133k
            assert(block->b_next != NULL);
2998
133k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
133k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
1.24M
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
1.04M
            ref r = ref_stack_at(&refs, i);
3005
1.04M
            if (r.instr != -1) {
3006
166k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
166k
            }
3008
1.04M
        }
3009
3010
        // Optimize instructions
3011
2.48M
        for (int i = 0; i < block->b_iused; i++) {
3012
2.27M
            if (!instr_flags[i]) {
3013
2.07M
                cfg_instr *instr = &block->b_instr[i];
3014
2.07M
                switch (instr->i_opcode) {
3015
20.9k
                    case LOAD_FAST:
3016
20.9k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
20.9k
                        break;
3018
407
                    case LOAD_FAST_LOAD_FAST:
3019
407
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
407
                        break;
3021
2.05M
                    default:
3022
2.05M
                        break;
3023
2.07M
                }
3024
2.07M
            }
3025
2.27M
        }
3026
204k
    }
3027
3028
42.0k
    #undef PUSH_REF
3029
3030
42.0k
    status = SUCCESS;
3031
3032
42.0k
done:
3033
42.0k
    ref_stack_fini(&refs);
3034
42.0k
    PyMem_Free(instr_flags);
3035
42.0k
    PyMem_Free(blocks);
3036
42.0k
    return status;
3037
42.0k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
855k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
855k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
855k
    if (b->b_unsafe_locals_mask != both) {
3048
55.4k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
55.4k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
54.9k
            *(*sp)++ = b;
3053
54.9k
            b->b_visited = 1;
3054
54.9k
        }
3055
55.4k
    }
3056
855k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
228k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
228k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
1.72M
    for (int i = 0; i < b->b_iused; i++) {
3064
1.49M
        cfg_instr *instr = &b->b_instr[i];
3065
1.49M
        assert(instr->i_opcode != EXTENDED_ARG);
3066
1.49M
        if (instr->i_except != NULL) {
3067
616k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
616k
        }
3069
1.49M
        if (instr->i_oparg >= 64) {
3070
146k
            continue;
3071
146k
        }
3072
1.49M
        assert(instr->i_oparg >= 0);
3073
1.34M
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
1.34M
        switch (instr->i_opcode) {
3075
6.90k
            case DELETE_FAST:
3076
28.2k
            case LOAD_FAST_AND_CLEAR:
3077
70.0k
            case STORE_FAST_MAYBE_NULL:
3078
70.0k
                unsafe_mask |= bit;
3079
70.0k
                break;
3080
150k
            case STORE_FAST:
3081
150k
                unsafe_mask &= ~bit;
3082
150k
                break;
3083
2.51k
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
2.51k
                unsafe_mask &= ~bit;
3086
2.51k
                break;
3087
27.6k
            case LOAD_FAST:
3088
27.6k
                if (unsafe_mask & bit) {
3089
2.50k
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
2.50k
                }
3091
27.6k
                unsafe_mask &= ~bit;
3092
27.6k
                break;
3093
1.34M
        }
3094
1.34M
    }
3095
228k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
118k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
118k
    }
3098
228k
    cfg_instr *last = basicblock_last_instr(b);
3099
228k
    if (last && is_jump(last)) {
3100
100k
        assert(last->i_target != NULL);
3101
100k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
100k
    }
3103
228k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
63
{
3108
63
    assert(nlocals > 64);
3109
63
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
63
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
63
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
1.00k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
940
        blocknum++;
3121
42.7k
        for (int i = 0; i < b->b_iused; i++) {
3122
41.7k
            cfg_instr *instr = &b->b_instr[i];
3123
41.7k
            assert(instr->i_opcode != EXTENDED_ARG);
3124
41.7k
            int arg = instr->i_oparg;
3125
41.7k
            if (arg < 64) {
3126
34.9k
                continue;
3127
34.9k
            }
3128
41.7k
            assert(arg >= 0);
3129
6.82k
            switch (instr->i_opcode) {
3130
57
                case DELETE_FAST:
3131
796
                case LOAD_FAST_AND_CLEAR:
3132
2.27k
                case STORE_FAST_MAYBE_NULL:
3133
2.27k
                    states[arg - 64] = blocknum - 1;
3134
2.27k
                    break;
3135
2.42k
                case STORE_FAST:
3136
2.42k
                    states[arg - 64] = blocknum;
3137
2.42k
                    break;
3138
1.29k
                case LOAD_FAST:
3139
1.29k
                    if (states[arg - 64] != blocknum) {
3140
76
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
76
                    }
3142
1.29k
                    states[arg - 64] = blocknum;
3143
1.29k
                    break;
3144
6.82k
                    Py_UNREACHABLE();
3145
6.82k
            }
3146
6.82k
        }
3147
940
    }
3148
63
    PyMem_Free(states);
3149
63
    return SUCCESS;
3150
63
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
42.0k
{
3155
42.0k
    assert(PyList_CheckExact(consts));
3156
42.0k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
42.0k
    if (nconsts == 0) {
3158
207
        return SUCCESS;  /* nothing to do */
3159
207
    }
3160
3161
41.8k
    Py_ssize_t *index_map = NULL;
3162
41.8k
    Py_ssize_t *reverse_index_map = NULL;
3163
41.8k
    int err = ERROR;
3164
3165
41.8k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
41.8k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
345k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
303k
        index_map[i] = -1;
3171
303k
    }
3172
    // The first constant may be docstring; keep it always.
3173
41.8k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
384k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
3.03M
        for (int i = 0; i < b->b_iused; i++) {
3178
2.69M
            int opcode = b->b_instr[i].i_opcode;
3179
2.69M
            if (OPCODE_HAS_CONST(opcode)) {
3180
384k
                int index = b->b_instr[i].i_oparg;
3181
384k
                index_map[index] = index;
3182
384k
            }
3183
2.69M
        }
3184
342k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
41.8k
    Py_ssize_t n_used_consts = 0;
3188
386k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
345k
        if (index_map[i] != -1) {
3190
176k
            assert(index_map[i] == i);
3191
176k
            index_map[n_used_consts++] = index_map[i];
3192
176k
        }
3193
345k
    }
3194
41.8k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
23.3k
        err = SUCCESS;
3197
23.3k
        goto end;
3198
23.3k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
41.8k
    assert(n_used_consts < nconsts);
3202
121k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
103k
        Py_ssize_t old_index = index_map[i];
3204
103k
        assert(i <= old_index && old_index < nconsts);
3205
103k
        if (i != old_index) {
3206
77.0k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
77.0k
            assert(value != NULL);
3208
77.0k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
77.0k
        }
3210
103k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
18.5k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
18.5k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
18.5k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
290k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
272k
        reverse_index_map[i] = -1;
3223
272k
    }
3224
121k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
103k
        assert(index_map[i] != -1);
3226
103k
        assert(reverse_index_map[index_map[i]] == -1);
3227
103k
        reverse_index_map[index_map[i]] = i;
3228
103k
    }
3229
3230
215k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
1.83M
        for (int i = 0; i < b->b_iused; i++) {
3232
1.63M
            int opcode = b->b_instr[i].i_opcode;
3233
1.63M
            if (OPCODE_HAS_CONST(opcode)) {
3234
266k
                int index = b->b_instr[i].i_oparg;
3235
266k
                assert(reverse_index_map[index] >= 0);
3236
266k
                assert(reverse_index_map[index] < n_used_consts);
3237
266k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
266k
            }
3239
1.63M
        }
3240
197k
    }
3241
3242
18.5k
    err = SUCCESS;
3243
41.8k
end:
3244
41.8k
    PyMem_Free(index_map);
3245
41.8k
    PyMem_Free(reverse_index_map);
3246
41.8k
    return err;
3247
18.5k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
42.0k
{
3256
42.0k
    if (nlocals == 0) {
3257
22.4k
        return SUCCESS;
3258
22.4k
    }
3259
19.5k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
63
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
63
        nlocals = 64;
3267
63
    }
3268
19.5k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
19.5k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
19.5k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
19.5k
    uint64_t start_mask = 0;
3277
70.3k
    for (int i = nparams; i < nlocals; i++) {
3278
50.7k
        start_mask |= (uint64_t)1 << i;
3279
50.7k
    }
3280
19.5k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
193k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
173k
        scan_block_for_locals(b, &sp);
3287
173k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
74.4k
    while (sp > stack) {
3291
54.9k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
54.9k
        b->b_visited = 0;
3294
54.9k
        scan_block_for_locals(b, &sp);
3295
54.9k
    }
3296
19.5k
    PyMem_Free(stack);
3297
19.5k
    return SUCCESS;
3298
19.5k
}
3299
3300
3301
static int
3302
23.7k
mark_warm(basicblock *entryblock) {
3303
23.7k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
23.7k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
23.7k
    basicblock **sp = stack;
3308
3309
23.7k
    *sp++ = entryblock;
3310
23.7k
    entryblock->b_visited = 1;
3311
207k
    while (sp > stack) {
3312
183k
        basicblock *b = *(--sp);
3313
183k
        assert(!b->b_except_handler);
3314
183k
        b->b_warm = 1;
3315
183k
        basicblock *next = b->b_next;
3316
183k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
112k
            *sp++ = next;
3318
112k
            next->b_visited = 1;
3319
112k
        }
3320
1.64M
        for (int i=0; i < b->b_iused; i++) {
3321
1.46M
            cfg_instr *instr = &b->b_instr[i];
3322
1.46M
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
47.8k
                *sp++ = instr->i_target;
3324
47.8k
                instr->i_target->b_visited = 1;
3325
47.8k
            }
3326
1.46M
        }
3327
183k
    }
3328
23.7k
    PyMem_Free(stack);
3329
23.7k
    return SUCCESS;
3330
23.7k
}
3331
3332
static int
3333
23.7k
mark_cold(basicblock *entryblock) {
3334
349k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
325k
        assert(!b->b_cold && !b->b_warm);
3336
325k
    }
3337
23.7k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
23.7k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
23.7k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
23.7k
    basicblock **sp = stack;
3347
349k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
325k
        if (b->b_except_handler) {
3349
44.5k
            assert(!b->b_warm);
3350
44.5k
            *sp++ = b;
3351
44.5k
            b->b_visited = 1;
3352
44.5k
        }
3353
325k
    }
3354
3355
134k
    while (sp > stack) {
3356
111k
        basicblock *b = *(--sp);
3357
111k
        b->b_cold = 1;
3358
111k
        basicblock *next = b->b_next;
3359
111k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
59.1k
            if (!next->b_warm && !next->b_visited) {
3361
47.4k
                *sp++ = next;
3362
47.4k
                next->b_visited = 1;
3363
47.4k
            }
3364
59.1k
        }
3365
609k
        for (int i = 0; i < b->b_iused; i++) {
3366
498k
            cfg_instr *instr = &b->b_instr[i];
3367
498k
            if (is_jump(instr)) {
3368
52.5k
                assert(i == b->b_iused - 1);
3369
52.5k
                basicblock *target = b->b_instr[i].i_target;
3370
52.5k
                if (!target->b_warm && !target->b_visited) {
3371
19.2k
                    *sp++ = target;
3372
19.2k
                    target->b_visited = 1;
3373
19.2k
                }
3374
52.5k
            }
3375
498k
        }
3376
111k
    }
3377
23.7k
    PyMem_Free(stack);
3378
23.7k
    return SUCCESS;
3379
23.7k
}
3380
3381
3382
static int
3383
42.0k
push_cold_blocks_to_end(cfg_builder *g) {
3384
42.0k
    basicblock *entryblock = g->g_entryblock;
3385
42.0k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
18.2k
        return SUCCESS;
3388
18.2k
    }
3389
23.7k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
23.7k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
357k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
333k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
7.66k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
7.66k
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
7.66k
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
7.66k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
7.66k
                             NO_LOCATION);
3406
7.66k
            explicit_jump->b_cold = 1;
3407
7.66k
            explicit_jump->b_next = b->b_next;
3408
7.66k
            explicit_jump->b_predecessors = 1;
3409
7.66k
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
7.66k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
7.66k
            last->i_target = explicit_jump->b_next;
3414
7.66k
        }
3415
333k
    }
3416
3417
23.7k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
23.7k
    basicblock *cold_blocks = NULL;
3419
23.7k
    basicblock *cold_blocks_tail = NULL;
3420
3421
23.7k
    basicblock *b = entryblock;
3422
47.2k
    while(b->b_next) {
3423
47.2k
        assert(!b->b_cold);
3424
238k
        while (b->b_next && !b->b_next->b_cold) {
3425
190k
            b = b->b_next;
3426
190k
        }
3427
47.2k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
23.7k
            break;
3430
23.7k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
47.2k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
23.4k
        basicblock *b_end = b->b_next;
3436
118k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
95.4k
            b_end = b_end->b_next;
3438
95.4k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
23.4k
        assert(b_end && b_end->b_cold);
3442
23.4k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
23.4k
        if (cold_blocks == NULL) {
3445
4.72k
            cold_blocks = b->b_next;
3446
4.72k
        }
3447
18.7k
        else {
3448
18.7k
            cold_blocks_tail->b_next = b->b_next;
3449
18.7k
        }
3450
23.4k
        cold_blocks_tail = b_end;
3451
23.4k
        b->b_next = b_end->b_next;
3452
23.4k
        b_end->b_next = NULL;
3453
23.4k
    }
3454
23.7k
    assert(b != NULL && b->b_next == NULL);
3455
23.7k
    b->b_next = cold_blocks;
3456
3457
23.7k
    if (cold_blocks != NULL) {
3458
4.72k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
4.72k
    }
3460
23.7k
    return SUCCESS;
3461
23.7k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
42.0k
{
3466
42.0k
    basicblock *entryblock = g->g_entryblock;
3467
393k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
3.05M
        for (int i = 0; i < b->b_iused; i++) {
3469
2.70M
            cfg_instr *instr = &b->b_instr[i];
3470
2.70M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
2.18k
                assert(i == b->b_iused - 1);
3472
2.18k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
1.29k
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
2.18k
                location loc = instr->i_loc;
3475
2.18k
                basicblock *except = instr->i_except;
3476
2.18k
                cfg_instr copy = {
3477
2.18k
                            .i_opcode = COPY,
3478
2.18k
                            .i_oparg = 1,
3479
2.18k
                            .i_loc = loc,
3480
2.18k
                            .i_target = NULL,
3481
2.18k
                            .i_except = except,
3482
2.18k
                };
3483
2.18k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3484
2.18k
                cfg_instr to_bool = {
3485
2.18k
                            .i_opcode = TO_BOOL,
3486
2.18k
                            .i_oparg = 0,
3487
2.18k
                            .i_loc = loc,
3488
2.18k
                            .i_target = NULL,
3489
2.18k
                            .i_except = except,
3490
2.18k
                };
3491
2.18k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3492
2.18k
            }
3493
2.70M
        }
3494
351k
    }
3495
42.0k
    return SUCCESS;
3496
42.0k
}
3497
3498
static int
3499
convert_pseudo_ops(cfg_builder *g)
3500
42.0k
{
3501
42.0k
    basicblock *entryblock = g->g_entryblock;
3502
393k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3503
3.08M
        for (int i = 0; i < b->b_iused; i++) {
3504
2.72M
            cfg_instr *instr = &b->b_instr[i];
3505
2.72M
            if (is_block_push(instr)) {
3506
44.5k
                INSTR_SET_OP0(instr, NOP);
3507
44.5k
            }
3508
2.68M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3509
11.1k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3510
11.1k
                instr->i_opcode = LOAD_FAST;
3511
11.1k
            }
3512
2.67M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3513
21.8k
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3514
21.8k
                instr->i_opcode = STORE_FAST;
3515
21.8k
            }
3516
2.72M
        }
3517
351k
    }
3518
42.0k
    return remove_redundant_nops_and_jumps(g);
3519
42.0k
}
3520
3521
static inline bool
3522
703k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3523
703k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3524
189k
        return basicblock_has_no_lineno(b);
3525
189k
    }
3526
513k
    else {
3527
513k
        return false;
3528
513k
    }
3529
703k
}
3530
3531
3532
/* PEP 626 mandates that the f_lineno of a frame is correct
3533
 * after a frame terminates. It would be prohibitively expensive
3534
 * to continuously update the f_lineno field at runtime,
3535
 * so we make sure that all exiting instruction (raises and returns)
3536
 * have a valid line number, allowing us to compute f_lineno lazily.
3537
 * We can do this by duplicating the exit blocks without line number
3538
 * so that none have more than one predecessor. We can then safely
3539
 * copy the line number from the sole predecessor block.
3540
 */
3541
static int
3542
duplicate_exits_without_lineno(cfg_builder *g)
3543
84.0k
{
3544
84.0k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3545
3546
    /* Copy all exit blocks without line number that are targets of a jump.
3547
     */
3548
84.0k
    basicblock *entryblock = g->g_entryblock;
3549
779k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3550
695k
        cfg_instr *last = basicblock_last_instr(b);
3551
695k
        if (last == NULL) {
3552
66.3k
            continue;
3553
66.3k
        }
3554
629k
        if (is_jump(last)) {
3555
339k
            basicblock *target = next_nonempty_block(last->i_target);
3556
339k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3557
1.62k
                basicblock *new_target = copy_basicblock(g, target);
3558
1.62k
                if (new_target == NULL) {
3559
0
                    return ERROR;
3560
0
                }
3561
1.62k
                new_target->b_instr[0].i_loc = last->i_loc;
3562
1.62k
                last->i_target = new_target;
3563
1.62k
                target->b_predecessors--;
3564
1.62k
                new_target->b_predecessors = 1;
3565
1.62k
                new_target->b_next = target->b_next;
3566
1.62k
                new_target->b_label.id = next_lbl++;
3567
1.62k
                target->b_next = new_target;
3568
1.62k
            }
3569
339k
        }
3570
629k
    }
3571
3572
    /* Any remaining reachable exit blocks without line number can only be reached by
3573
     * fall through, and thus can only have a single predecessor */
3574
779k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3575
695k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3576
364k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3577
9.45k
                cfg_instr *last = basicblock_last_instr(b);
3578
9.45k
                assert(last != NULL);
3579
9.45k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3580
9.45k
            }
3581
364k
        }
3582
695k
    }
3583
84.0k
    return SUCCESS;
3584
84.0k
}
3585
3586
3587
/* If an instruction has no line number, but it's predecessor in the BB does,
3588
 * then copy the line number. If a successor block has no line number, and only
3589
 * one predecessor, then inherit the line number.
3590
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3591
 * Also reduces the size of the line number table,
3592
 * but has no impact on the generated line number events.
3593
 */
3594
3595
static inline void
3596
maybe_propagate_location(basicblock *b, int i, location loc)
3597
6.80M
{
3598
6.80M
    assert(b->b_iused > i);
3599
6.80M
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3600
449k
         b->b_instr[i].i_loc = loc;
3601
449k
    }
3602
6.80M
}
3603
3604
static void
3605
propagate_line_numbers(basicblock *entryblock)
3606
84.0k
{
3607
779k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3608
695k
        cfg_instr *last = basicblock_last_instr(b);
3609
695k
        if (last == NULL) {
3610
66.3k
            continue;
3611
66.3k
        }
3612
3613
629k
        location prev_location = NO_LOCATION;
3614
7.11M
        for (int i = 0; i < b->b_iused; i++) {
3615
6.49M
            maybe_propagate_location(b, i, prev_location);
3616
6.49M
            prev_location = b->b_instr[i].i_loc;
3617
6.49M
        }
3618
629k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3619
252k
            if (b->b_next->b_iused > 0) {
3620
251k
                maybe_propagate_location(b->b_next, 0, prev_location);
3621
251k
            }
3622
252k
        }
3623
629k
        if (is_jump(last)) {
3624
339k
            basicblock *target = last->i_target;
3625
339k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3626
47
                target = target->b_next;
3627
47
            }
3628
339k
            if (target->b_predecessors == 1) {
3629
64.5k
                maybe_propagate_location(target, 0, prev_location);
3630
64.5k
            }
3631
339k
        }
3632
629k
    }
3633
84.0k
}
3634
3635
static int
3636
resolve_line_numbers(cfg_builder *g, int firstlineno)
3637
84.0k
{
3638
84.0k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3639
84.0k
    propagate_line_numbers(g->g_entryblock);
3640
84.0k
    return SUCCESS;
3641
84.0k
}
3642
3643
int
3644
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3645
                        int nlocals, int nparams, int firstlineno)
3646
42.0k
{
3647
42.0k
    assert(cfg_builder_check(g));
3648
    /** Preprocessing **/
3649
    /* Map labels to targets and mark exception handlers */
3650
42.0k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3651
42.0k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3652
42.0k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3653
3654
    /** Optimization **/
3655
42.0k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3656
42.0k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3657
42.0k
    RETURN_IF_ERROR(
3658
42.0k
        add_checks_for_loads_of_uninitialized_variables(
3659
42.0k
            g->g_entryblock, nlocals, nparams));
3660
42.0k
    RETURN_IF_ERROR(insert_superinstructions(g));
3661
3662
42.0k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3663
42.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3664
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3665
    // assert(all_exits_have_lineno(g->g_entryblock));
3666
42.0k
    return SUCCESS;
3667
42.0k
}
3668
3669
static int *
3670
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3671
42.0k
{
3672
42.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3673
42.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3674
42.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3675
3676
42.0k
    int noffsets = ncellvars + nfreevars;
3677
42.0k
    int *fixed = PyMem_New(int, noffsets);
3678
42.0k
    if (fixed == NULL) {
3679
0
        PyErr_NoMemory();
3680
0
        return NULL;
3681
0
    }
3682
58.7k
    for (int i = 0; i < noffsets; i++) {
3683
16.7k
        fixed[i] = nlocals + i;
3684
16.7k
    }
3685
3686
42.0k
    PyObject *varname, *cellindex;
3687
42.0k
    Py_ssize_t pos = 0;
3688
49.2k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3689
7.19k
        PyObject *varindex;
3690
7.19k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3691
0
            goto error;
3692
0
        }
3693
7.19k
        if (varindex == NULL) {
3694
6.83k
            continue;
3695
6.83k
        }
3696
3697
355
        int argoffset = PyLong_AsInt(varindex);
3698
355
        Py_DECREF(varindex);
3699
355
        if (argoffset == -1 && PyErr_Occurred()) {
3700
0
            goto error;
3701
0
        }
3702
3703
355
        int oldindex = PyLong_AsInt(cellindex);
3704
355
        if (oldindex == -1 && PyErr_Occurred()) {
3705
0
            goto error;
3706
0
        }
3707
355
        fixed[oldindex] = argoffset;
3708
355
    }
3709
42.0k
    return fixed;
3710
3711
0
error:
3712
0
    PyMem_Free(fixed);
3713
0
    return NULL;
3714
42.0k
}
3715
3716
#define IS_GENERATOR(CF) \
3717
42.0k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3718
3719
static int
3720
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3721
                           int *fixed, int nfreevars, int code_flags)
3722
42.0k
{
3723
42.0k
    assert(umd->u_firstlineno > 0);
3724
3725
    /* Add the generator prefix instructions. */
3726
42.0k
    if (IS_GENERATOR(code_flags)) {
3727
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3728
         * of 0. This is because RETURN_GENERATOR pushes an element
3729
         * with _PyFrame_StackPush before switching stacks.
3730
         */
3731
3732
3.32k
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3733
3.32k
        cfg_instr make_gen = {
3734
3.32k
            .i_opcode = RETURN_GENERATOR,
3735
3.32k
            .i_oparg = 0,
3736
3.32k
            .i_loc = loc,
3737
3.32k
            .i_target = NULL,
3738
3.32k
            .i_except = NULL,
3739
3.32k
        };
3740
3.32k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3741
3.32k
        cfg_instr pop_top = {
3742
3.32k
            .i_opcode = POP_TOP,
3743
3.32k
            .i_oparg = 0,
3744
3.32k
            .i_loc = loc,
3745
3.32k
            .i_target = NULL,
3746
3.32k
            .i_except = NULL,
3747
3.32k
        };
3748
3.32k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3749
3.32k
    }
3750
3751
    /* Set up cells for any variable that escapes, to be put in a closure. */
3752
42.0k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3753
42.0k
    if (ncellvars) {
3754
        // umd->u_cellvars has the cells out of order so we sort them
3755
        // before adding the MAKE_CELL instructions.  Note that we
3756
        // adjust for arg cells, which come first.
3757
6.79k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3758
6.79k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3759
6.79k
        if (sorted == NULL) {
3760
0
            PyErr_NoMemory();
3761
0
            return ERROR;
3762
0
        }
3763
13.9k
        for (int i = 0; i < ncellvars; i++) {
3764
7.19k
            sorted[fixed[i]] = i + 1;
3765
7.19k
        }
3766
24.2k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3767
17.4k
            int oldindex = sorted[i] - 1;
3768
17.4k
            if (oldindex == -1) {
3769
10.2k
                continue;
3770
10.2k
            }
3771
7.19k
            cfg_instr make_cell = {
3772
7.19k
                .i_opcode = MAKE_CELL,
3773
                // This will get fixed in offset_derefs().
3774
7.19k
                .i_oparg = oldindex,
3775
7.19k
                .i_loc = NO_LOCATION,
3776
7.19k
                .i_target = NULL,
3777
7.19k
                .i_except = NULL,
3778
7.19k
            };
3779
7.19k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3780
0
                PyMem_RawFree(sorted);
3781
0
                return ERROR;
3782
0
            }
3783
7.19k
            ncellsused += 1;
3784
7.19k
        }
3785
6.79k
        PyMem_RawFree(sorted);
3786
6.79k
    }
3787
3788
42.0k
    if (nfreevars) {
3789
7.69k
        cfg_instr copy_frees = {
3790
7.69k
            .i_opcode = COPY_FREE_VARS,
3791
7.69k
            .i_oparg = nfreevars,
3792
7.69k
            .i_loc = NO_LOCATION,
3793
7.69k
            .i_target = NULL,
3794
7.69k
            .i_except = NULL,
3795
7.69k
        };
3796
7.69k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3797
7.69k
    }
3798
3799
42.0k
    return SUCCESS;
3800
42.0k
}
3801
3802
static int
3803
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3804
42.0k
{
3805
42.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3806
42.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3807
42.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3808
42.0k
    int noffsets = ncellvars + nfreevars;
3809
3810
    // First deal with duplicates (arg cells).
3811
42.0k
    int numdropped = 0;
3812
58.7k
    for (int i = 0; i < noffsets ; i++) {
3813
16.7k
        if (fixedmap[i] == i + nlocals) {
3814
16.3k
            fixedmap[i] -= numdropped;
3815
16.3k
        }
3816
355
        else {
3817
            // It was a duplicate (cell/arg).
3818
355
            numdropped += 1;
3819
355
        }
3820
16.7k
    }
3821
3822
    // Then update offsets, either relative to locals or by cell2arg.
3823
393k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3824
3.08M
        for (int i = 0; i < b->b_iused; i++) {
3825
2.72M
            cfg_instr *inst = &b->b_instr[i];
3826
            // This is called before extended args are generated.
3827
2.72M
            assert(inst->i_opcode != EXTENDED_ARG);
3828
2.72M
            int oldoffset = inst->i_oparg;
3829
2.72M
            switch(inst->i_opcode) {
3830
7.19k
                case MAKE_CELL:
3831
18.3k
                case LOAD_CLOSURE:
3832
26.2k
                case LOAD_DEREF:
3833
30.9k
                case STORE_DEREF:
3834
31.0k
                case DELETE_DEREF:
3835
35.2k
                case LOAD_FROM_DICT_OR_DEREF:
3836
35.2k
                    assert(oldoffset >= 0);
3837
35.2k
                    assert(oldoffset < noffsets);
3838
35.2k
                    assert(fixedmap[oldoffset] >= 0);
3839
35.2k
                    inst->i_oparg = fixedmap[oldoffset];
3840
2.72M
            }
3841
2.72M
        }
3842
351k
    }
3843
3844
42.0k
    return numdropped;
3845
42.0k
}
3846
3847
static int
3848
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3849
42.0k
{
3850
42.0k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3851
42.0k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3852
42.0k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3853
42.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3854
42.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3855
42.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3856
42.0k
    assert(INT_MAX - nlocals - ncellvars > 0);
3857
42.0k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3858
42.0k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3859
42.0k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3860
42.0k
    if (cellfixedoffsets == NULL) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
    // This must be called before fix_cell_offsets().
3865
42.0k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3866
0
        PyMem_Free(cellfixedoffsets);
3867
0
        return ERROR;
3868
0
    }
3869
3870
42.0k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3871
42.0k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3872
42.0k
    cellfixedoffsets = NULL;
3873
42.0k
    if (numdropped < 0) {
3874
0
        return ERROR;
3875
0
    }
3876
3877
42.0k
    nlocalsplus -= numdropped;
3878
42.0k
    return nlocalsplus;
3879
42.0k
}
3880
3881
cfg_builder *
3882
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3883
42.0k
{
3884
42.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3885
0
        return NULL;
3886
0
    }
3887
42.0k
    cfg_builder *g = _PyCfgBuilder_New();
3888
42.0k
    if (g == NULL) {
3889
0
        return NULL;
3890
0
    }
3891
3.94M
    for (int i = 0; i < seq->s_used; i++) {
3892
3.90M
        seq->s_instrs[i].i_target = 0;
3893
3.90M
    }
3894
3.94M
    for (int i = 0; i < seq->s_used; i++) {
3895
3.90M
        _PyInstruction *instr = &seq->s_instrs[i];
3896
3.90M
        if (HAS_TARGET(instr->i_opcode)) {
3897
228k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3898
228k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3899
228k
        }
3900
3.90M
    }
3901
42.0k
    int offset = 0;
3902
3.94M
    for (int i = 0; i < seq->s_used; i++) {
3903
3.90M
        _PyInstruction *instr = &seq->s_instrs[i];
3904
3.90M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3905
12.1k
            if (seq->s_annotations_code != NULL) {
3906
629
                assert(seq->s_annotations_code->s_labelmap_size == 0
3907
629
                    && seq->s_annotations_code->s_nested == NULL);
3908
2.51k
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3909
1.88k
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3910
1.88k
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3911
1.88k
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3912
0
                        goto error;
3913
0
                    }
3914
1.88k
                }
3915
629
                offset += seq->s_annotations_code->s_used - 1;
3916
629
            }
3917
11.4k
            else {
3918
11.4k
                offset -= 1;
3919
11.4k
            }
3920
12.1k
            continue;
3921
12.1k
        }
3922
3.89M
        if (instr->i_target) {
3923
154k
            jump_target_label lbl_ = {i + offset};
3924
154k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3925
0
                goto error;
3926
0
            }
3927
154k
        }
3928
3.89M
        int opcode = instr->i_opcode;
3929
3.89M
        int oparg = instr->i_oparg;
3930
3.89M
        if (HAS_TARGET(opcode)) {
3931
228k
            oparg += offset;
3932
228k
        }
3933
3.89M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3934
0
            goto error;
3935
0
        }
3936
3.89M
    }
3937
42.0k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3938
0
        goto error;
3939
0
    }
3940
42.0k
    return g;
3941
0
error:
3942
0
    _PyCfgBuilder_Free(g);
3943
0
    return NULL;
3944
42.0k
}
3945
3946
int
3947
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3948
42.0k
{
3949
42.0k
    int lbl = 0;
3950
394k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3951
352k
        b->b_label = (jump_target_label){lbl};
3952
352k
        lbl += 1;
3953
352k
    }
3954
394k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3955
352k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3956
3.13M
        for (int i = 0; i < b->b_iused; i++) {
3957
2.77M
            cfg_instr *instr = &b->b_instr[i];
3958
2.77M
            if (HAS_TARGET(instr->i_opcode)) {
3959
                /* Set oparg to the label id (it will later be mapped to an offset) */
3960
165k
                instr->i_oparg = instr->i_target->b_label.id;
3961
165k
            }
3962
2.77M
            RETURN_IF_ERROR(
3963
2.77M
                _PyInstructionSequence_Addop(
3964
2.77M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3965
3966
2.77M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3967
2.77M
            if (instr->i_except != NULL) {
3968
715k
                hi->h_label = instr->i_except->b_label.id;
3969
715k
                hi->h_startdepth = instr->i_except->b_startdepth;
3970
715k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3971
715k
            }
3972
2.06M
            else {
3973
2.06M
                hi->h_label = -1;
3974
2.06M
            }
3975
2.77M
        }
3976
352k
    }
3977
42.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3978
0
        return ERROR;
3979
0
    }
3980
42.0k
    return SUCCESS;
3981
42.0k
}
3982
3983
3984
int
3985
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3986
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3987
                                     int *stackdepth, int *nlocalsplus,
3988
                                     _PyInstructionSequence *seq)
3989
42.0k
{
3990
42.0k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3991
3992
42.0k
    *stackdepth = calculate_stackdepth(g);
3993
42.0k
    if (*stackdepth < 0) {
3994
0
        return ERROR;
3995
0
    }
3996
3997
    /* prepare_localsplus adds instructions for generators that push
3998
     * and pop an item on the stack. This assertion makes sure there
3999
     * is space on the stack for that.
4000
     * It should always be true, because a generator must have at
4001
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
4002
     * which requires stackspace.
4003
     */
4004
42.0k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4005
4006
42.0k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4007
42.0k
    if (*nlocalsplus < 0) {
4008
0
        return ERROR;
4009
0
    }
4010
4011
42.0k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4012
4013
    /* Order of basic blocks must have been determined by now */
4014
4015
42.0k
    RETURN_IF_ERROR(normalize_jumps(g));
4016
42.0k
    assert(no_redundant_jumps(g));
4017
4018
    /* Can't modify the bytecode after inserting instructions that produce
4019
     * borrowed references.
4020
     */
4021
42.0k
    RETURN_IF_ERROR(optimize_load_fast(g));
4022
4023
    /* Can't modify the bytecode after computing jump offsets. */
4024
42.0k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4025
0
        return ERROR;
4026
0
    }
4027
4028
42.0k
    return SUCCESS;
4029
42.0k
}
4030
4031
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4032
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4033
 */
4034
int
4035
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4036
0
{
4037
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4038
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4039
0
    return SUCCESS;
4040
0
}
4041
4042
/* Exported API functions */
4043
4044
int
4045
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4046
0
{
4047
0
    stack_effects effs;
4048
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4049
0
        return PY_INVALID_STACK_EFFECT;
4050
0
    }
4051
0
    return effs.net;
4052
0
}
4053
4054
int
4055
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4056
5.63k
{
4057
5.63k
    stack_effects effs;
4058
5.63k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4059
0
        return PY_INVALID_STACK_EFFECT;
4060
0
    }
4061
5.63k
    return effs.net;
4062
5.63k
}
4063
4064
/* Access to compiler optimizations for unit tests.
4065
4066
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4067
 * a CFG, optimizes it and converts back to an instruction list.
4068
 */
4069
4070
static PyObject *
4071
cfg_to_instruction_sequence(cfg_builder *g)
4072
0
{
4073
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4074
0
    if (seq == NULL) {
4075
0
        return NULL;
4076
0
    }
4077
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4078
0
        PyInstructionSequence_Fini(seq);
4079
0
        return NULL;
4080
0
    }
4081
0
    return (PyObject*)seq;
4082
0
}
4083
4084
PyObject *
4085
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4086
0
{
4087
0
    if (!_PyInstructionSequence_Check(seq)) {
4088
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4089
0
        return NULL;
4090
0
    }
4091
0
    PyObject *const_cache = PyDict_New();
4092
0
    if (const_cache == NULL) {
4093
0
        return NULL;
4094
0
    }
4095
4096
0
    PyObject *res = NULL;
4097
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4098
0
    if (g == NULL) {
4099
0
        goto error;
4100
0
    }
4101
0
    int nparams = 0, firstlineno = 1;
4102
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4103
0
                                nparams, firstlineno) < 0) {
4104
0
        goto error;
4105
0
    }
4106
4107
0
    if (calculate_stackdepth(g) == ERROR) {
4108
0
        goto error;
4109
0
    }
4110
4111
0
    if (optimize_load_fast(g) != SUCCESS) {
4112
0
        goto error;
4113
0
    }
4114
4115
0
    res = cfg_to_instruction_sequence(g);
4116
0
error:
4117
0
    Py_DECREF(const_cache);
4118
0
    _PyCfgBuilder_Free(g);
4119
0
    return res;
4120
0
}