Coverage Report

Created: 2025-08-26 06:57

/src/cpython3/Python/flowgraph.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
19.9M
#define SUCCESS 0
19
76.8k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
30.7M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
6.72M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
6.99M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
6.99M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
5.69k
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
27.9M
{
101
27.9M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
27.9M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
27.9M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
24.0M
{
108
24.0M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
24.0M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
1.47M
    do { \
114
1.47M
        assert(OPCODE_HAS_ARG(OP)); \
115
1.47M
        cfg_instr *_instr__ptr_ = (I); \
116
1.47M
        _instr__ptr_->i_opcode = (OP); \
117
1.47M
        _instr__ptr_->i_oparg = (ARG); \
118
1.47M
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
2.37M
    do { \
123
2.37M
        assert(!OPCODE_HAS_ARG(OP)); \
124
2.37M
        cfg_instr *_instr__ptr_ = (I); \
125
2.37M
        _instr__ptr_->i_opcode = (OP); \
126
2.37M
        _instr__ptr_->i_oparg = 0; \
127
2.37M
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
1.43M
    do { \
131
1.43M
        cfg_instr *_instr__ptr_ = (I); \
132
1.43M
        _instr__ptr_->i_loc = (LOC); \
133
1.43M
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
6.72M
{
144
6.72M
    assert(b != NULL);
145
6.72M
    _Py_c_array_t array = {
146
6.72M
        .array = (void*)b->b_instr,
147
6.72M
        .allocated_entries = b->b_ialloc,
148
6.72M
        .item_size = sizeof(cfg_instr),
149
6.72M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
6.72M
    };
151
152
6.72M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
6.72M
    b->b_instr = array.array;
154
6.72M
    b->b_ialloc = array.allocated_entries;
155
6.72M
    return b->b_iused++;
156
6.72M
}
157
158
static cfg_instr *
159
23.5M
basicblock_last_instr(const basicblock *b) {
160
23.5M
    assert(b->b_iused >= 0);
161
23.5M
    if (b->b_iused > 0) {
162
22.1M
        assert(b->b_instr != NULL);
163
22.1M
        return &b->b_instr[b->b_iused - 1];
164
22.1M
    }
165
1.36M
    return NULL;
166
23.5M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
645k
{
175
645k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
645k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
645k
    b->b_list = g->g_block_list;
182
645k
    g->g_block_list = b;
183
645k
    b->b_label = NO_LABEL;
184
645k
    return b;
185
645k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
6.63M
{
190
6.63M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
6.63M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
6.63M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
6.63M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
6.63M
    int off = basicblock_next_instr(b);
196
6.63M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
6.63M
    cfg_instr *i = &b->b_instr[off];
200
6.63M
    i->i_opcode = opcode;
201
6.63M
    i->i_oparg = oparg;
202
6.63M
    i->i_loc = loc;
203
    // memory is already zero initialized
204
6.63M
    assert(i->i_target == NULL);
205
6.63M
    assert(i->i_except == NULL);
206
207
6.63M
    return SUCCESS;
208
6.63M
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
4.40k
{
213
4.40k
    cfg_instr *last = basicblock_last_instr(b);
214
4.40k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
4.40k
    RETURN_IF_ERROR(
219
4.40k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
4.40k
    last = basicblock_last_instr(b);
221
4.40k
    assert(last && last->i_opcode == opcode);
222
4.40k
    last->i_target = target;
223
4.40k
    return SUCCESS;
224
4.40k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
20.8k
{
229
64.6k
    for (int i = 0; i < from->b_iused; i++) {
230
43.7k
        int n = basicblock_next_instr(to);
231
43.7k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
43.7k
        to->b_instr[n] = from->b_instr[i];
235
43.7k
    }
236
20.8k
    return SUCCESS;
237
20.8k
}
238
239
static inline int
240
6.33M
basicblock_nofallthrough(const basicblock *b) {
241
6.33M
    cfg_instr *last = basicblock_last_instr(b);
242
6.33M
    return (last &&
243
6.33M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
6.10M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
6.33M
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
9.71M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
2.53k
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
2.53k
    assert(BB_NO_FALLTHROUGH(block));
257
2.53k
    basicblock *result = cfg_builder_new_block(g);
258
2.53k
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
2.53k
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
2.53k
    return result;
265
2.53k
}
266
267
static int
268
52.2k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
52.2k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
1.06M
    for (int i = block->b_iused - 1; i > pos; i--) {
271
1.01M
        block->b_instr[i] = block->b_instr[i-1];
272
1.01M
    }
273
52.2k
    block->b_instr[pos] = *instr;
274
52.2k
    return SUCCESS;
275
52.2k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
548k
{
343
548k
    assert(block != NULL);
344
548k
    g->g_curblock->b_next = block;
345
548k
    g->g_curblock = block;
346
548k
    return block;
347
548k
}
348
349
static inline int
350
1.43M
basicblock_exits_scope(const basicblock *b) {
351
1.43M
    cfg_instr *last = basicblock_last_instr(b);
352
1.43M
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
1.43M
}
354
355
static inline int
356
1.08M
basicblock_has_eval_break(const basicblock *b) {
357
6.58M
    for (int i = 0; i < b->b_iused; i++) {
358
5.71M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
206k
            return true;
360
206k
        }
361
5.71M
    }
362
875k
    return false;
363
1.08M
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
6.75M
{
368
6.75M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
6.75M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
429k
        return true;
371
429k
    }
372
6.32M
    if (IS_LABEL(g->g_current_label)) {
373
118k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
118k
            return true;
375
118k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
118k
    }
382
6.20M
    return false;
383
6.32M
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
6.75M
{
388
6.75M
    if (cfg_builder_current_block_is_terminated(g)) {
389
548k
        basicblock *b = cfg_builder_new_block(g);
390
548k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
548k
        b->b_label = g->g_current_label;
394
548k
        g->g_current_label = NO_LABEL;
395
548k
        cfg_builder_use_next_block(g, b);
396
548k
    }
397
6.75M
    return SUCCESS;
398
6.75M
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
154k
{
404
154k
    assert(g->g_entryblock->b_iused > 0);
405
1.42M
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
1.27M
        assert(!_PyMem_IsPtrFreed(block));
407
1.27M
        if (block->b_instr != NULL) {
408
1.27M
            assert(block->b_ialloc > 0);
409
1.27M
            assert(block->b_iused >= 0);
410
1.27M
            assert(block->b_ialloc >= block->b_iused);
411
1.27M
        }
412
0
        else {
413
0
            assert (block->b_iused == 0);
414
0
            assert (block->b_ialloc == 0);
415
0
        }
416
1.27M
    }
417
154k
    return true;
418
154k
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
77.1k
{
424
77.1k
    g->g_block_list = NULL;
425
77.1k
    basicblock *block = cfg_builder_new_block(g);
426
77.1k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
77.1k
    g->g_curblock = g->g_entryblock = block;
430
77.1k
    g->g_current_label = NO_LABEL;
431
77.1k
    return SUCCESS;
432
77.1k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
77.1k
{
437
77.1k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
77.1k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
77.1k
    memset(g, 0, sizeof(cfg_builder));
443
77.1k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
77.1k
    return g;
448
77.1k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
77.1k
{
453
77.1k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
77.1k
    assert(cfg_builder_check(g));
457
77.1k
    basicblock *b = g->g_block_list;
458
722k
    while (b != NULL) {
459
645k
        if (b->b_instr) {
460
645k
            PyMem_Free((void *)b->b_instr);
461
645k
        }
462
645k
        basicblock *next = b->b_list;
463
645k
        PyMem_Free((void *)b);
464
645k
        b = next;
465
645k
    }
466
77.1k
    PyMem_Free(g);
467
77.1k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
77.1k
{
472
77.1k
    int nblocks = 0;
473
702k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
625k
        nblocks++;
475
625k
    }
476
77.1k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
77.1k
    return SUCCESS;
481
77.1k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
296k
{
486
296k
    g->g_current_label = lbl;
487
296k
    return cfg_builder_maybe_start_new_block(g);
488
296k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
6.45M
{
493
6.45M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
6.45M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
6.45M
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
2.05M
{
501
2.12M
    while (b && b->b_iused == 0) {
502
69.6k
        b = b->b_next;
503
69.6k
    }
504
2.05M
    return b;
505
2.05M
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
77.1k
no_redundant_nops(cfg_builder *g) {
514
77.1k
    if (remove_redundant_nops(g) != 0) {
515
0
        return false;
516
0
    }
517
77.1k
    return true;
518
77.1k
}
519
520
static bool
521
154k
no_redundant_jumps(cfg_builder *g) {
522
1.42M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
1.27M
        cfg_instr *last = basicblock_last_instr(b);
524
1.27M
        if (last != NULL) {
525
1.13M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
192k
                basicblock *next = next_nonempty_block(b->b_next);
527
192k
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
192k
                if (jump_target == next) {
529
0
                    assert(next);
530
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
0
                        assert(0);
532
0
                        return false;
533
0
                    }
534
0
                }
535
192k
            }
536
1.13M
        }
537
1.27M
    }
538
154k
    return true;
539
154k
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
645k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
645k
    cfg_instr *last = basicblock_last_instr(b);
547
645k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
492k
        return SUCCESS;
549
492k
    }
550
153k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
153k
    bool is_forward = last->i_target->b_visited == 0;
553
153k
    if (is_forward) {
554
151k
        RETURN_IF_ERROR(
555
151k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
151k
        return SUCCESS;
557
151k
    }
558
559
1.82k
    int reversed_opcode = 0;
560
1.82k
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
0
        case POP_JUMP_IF_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
0
            break;
567
1.72k
        case POP_JUMP_IF_FALSE:
568
1.72k
            reversed_opcode = POP_JUMP_IF_TRUE;
569
1.72k
            break;
570
98
        case POP_JUMP_IF_TRUE:
571
98
            reversed_opcode = POP_JUMP_IF_FALSE;
572
98
            break;
573
1.82k
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
1.82k
    basicblock *target = last->i_target;
579
1.82k
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
1.82k
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
1.82k
    RETURN_IF_ERROR(
584
1.82k
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
1.82k
    RETURN_IF_ERROR(
586
1.82k
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
1.82k
    backwards_jump->b_startdepth = target->b_startdepth;
588
1.82k
    last->i_opcode = reversed_opcode;
589
1.82k
    last->i_target = b->b_next;
590
591
1.82k
    backwards_jump->b_cold = b->b_cold;
592
1.82k
    backwards_jump->b_next = b->b_next;
593
1.82k
    b->b_next = backwards_jump;
594
1.82k
    return SUCCESS;
595
1.82k
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
77.1k
{
601
77.1k
    basicblock *entryblock = g->g_entryblock;
602
721k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
643k
        b->b_visited = 0;
604
643k
    }
605
722k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
645k
        b->b_visited = 1;
607
645k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
645k
    }
609
77.1k
    return SUCCESS;
610
77.1k
}
611
612
static int
613
77.1k
check_cfg(cfg_builder *g) {
614
702k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
7.08M
        for (int i = 0; i < b->b_iused; i++) {
617
6.45M
            int opcode = b->b_instr[i].i_opcode;
618
6.45M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
6.45M
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
506k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
506k
            }
625
6.45M
        }
626
625k
    }
627
77.1k
    return SUCCESS;
628
77.1k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
280k
{
633
280k
    int lbl = -1;
634
2.77M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
2.49M
        if (b->b_label.id > lbl) {
636
1.04M
            lbl = b->b_label.id;
637
1.04M
        }
638
2.49M
    }
639
280k
    return lbl;
640
280k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
77.1k
{
646
77.1k
    int max_label = get_max_label(entryblock);
647
77.1k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
77.1k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
77.1k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
77.1k
    memset(label2block, 0, mapsize);
654
702k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
625k
        if (b->b_label.id >= 0) {
656
296k
            label2block[b->b_label.id] = b;
657
296k
        }
658
625k
    }
659
702k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
7.08M
        for (int i = 0; i < b->b_iused; i++) {
661
6.45M
            cfg_instr *instr = &b->b_instr[i];
662
6.45M
            assert(instr->i_target == NULL);
663
6.45M
            if (HAS_TARGET(instr->i_opcode)) {
664
398k
                int lbl = instr->i_oparg;
665
398k
                assert(lbl >= 0 && lbl <= max_label);
666
398k
                instr->i_target = label2block[lbl];
667
398k
                assert(instr->i_target != NULL);
668
398k
                assert(instr->i_target->b_label.id == lbl);
669
398k
            }
670
6.45M
        }
671
625k
    }
672
77.1k
    PyMem_Free(label2block);
673
77.1k
    return SUCCESS;
674
77.1k
}
675
676
static int
677
77.1k
mark_except_handlers(basicblock *entryblock) {
678
77.1k
#ifndef NDEBUG
679
702k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
625k
        assert(!b->b_except_handler);
681
625k
    }
682
77.1k
#endif
683
702k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
7.08M
        for (int i=0; i < b->b_iused; i++) {
685
6.45M
            cfg_instr *instr = &b->b_instr[i];
686
6.45M
            if (is_block_push(instr)) {
687
82.7k
                instr->i_target->b_except_handler = 1;
688
82.7k
            }
689
6.45M
        }
690
625k
    }
691
77.1k
    return SUCCESS;
692
77.1k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
82.0k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
82.0k
    assert(is_block_push(setup));
704
82.0k
    int opcode = setup->i_opcode;
705
82.0k
    basicblock * target = setup->i_target;
706
82.0k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
41.3k
        target->b_preserve_lasti = 1;
708
41.3k
    }
709
82.0k
    assert(stack->depth <= CO_MAXBLOCKS);
710
82.0k
    stack->handlers[++stack->depth] = target;
711
82.0k
    return target;
712
82.0k
}
713
714
static basicblock *
715
77.0k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
77.0k
    assert(stack->depth > 0);
717
77.0k
    return stack->handlers[--stack->depth];
718
77.0k
}
719
720
static basicblock *
721
572k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
572k
    return stack->handlers[stack->depth];
723
572k
}
724
725
static struct _PyCfgExceptStack *
726
77.1k
make_except_stack(void) {
727
77.1k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
77.1k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
77.1k
    new->depth = 0;
733
77.1k
    new->handlers[0] = NULL;
734
77.1k
    return new;
735
77.1k
}
736
737
static struct _PyCfgExceptStack *
738
211k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
211k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
211k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
211k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
211k
    return copy;
746
211k
}
747
748
static basicblock**
749
526k
make_cfg_traversal_stack(basicblock *entryblock) {
750
526k
    int nblocks = 0;
751
5.23M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
4.70M
        b->b_visited = 0;
753
4.70M
        nblocks++;
754
4.70M
    }
755
526k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
526k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
526k
    return stack;
760
526k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
5.01M
{
779
5.01M
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
5.01M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
5.01M
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
5.01M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
5.01M
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
5.01M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
82.0k
        effects->net = 0;
793
82.0k
        return 0;
794
82.0k
    }
795
4.93M
    effects->net = pushed - popped;
796
4.93M
    return 0;
797
5.01M
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
784k
{
802
784k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
784k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
586k
        assert(b->b_startdepth < 0);
808
586k
        b->b_startdepth = depth;
809
586k
        *(*sp)++ = b;
810
586k
    }
811
784k
    return SUCCESS;
812
784k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
77.1k
{
820
77.1k
    basicblock *entryblock = g->g_entryblock;
821
721k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
643k
        b->b_startdepth = INT_MIN;
823
643k
    }
824
77.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
77.1k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
77.1k
    int stackdepth = -1;
831
77.1k
    int maxdepth = 0;
832
77.1k
    basicblock **sp = stack;
833
77.1k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
663k
    while (sp != stack) {
837
586k
        basicblock *b = *--sp;
838
586k
        int depth = b->b_startdepth;
839
586k
        assert(depth >= 0);
840
586k
        basicblock *next = b->b_next;
841
4.98M
        for (int i = 0; i < b->b_iused; i++) {
842
4.64M
            cfg_instr *instr = &b->b_instr[i];
843
4.64M
            stack_effects effects;
844
4.64M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
4.64M
            int new_depth = depth + effects.net;
851
4.64M
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
4.64M
            maxdepth = Py_MAX(maxdepth, depth);
857
4.64M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
366k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
366k
                int target_depth = depth + effects.net;
865
366k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
366k
                maxdepth = Py_MAX(maxdepth, depth);
867
366k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
366k
            }
871
4.64M
            depth = new_depth;
872
4.64M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
4.64M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
4.64M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
245k
            {
876
                /* remaining code is dead */
877
245k
                next = NULL;
878
245k
                break;
879
245k
            }
880
4.64M
        }
881
586k
        if (next != NULL) {
882
340k
            assert(BB_HAS_FALLTHROUGH(b));
883
340k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
340k
        }
887
586k
    }
888
77.1k
    stackdepth = maxdepth;
889
77.1k
error:
890
77.1k
    PyMem_Free(stack);
891
77.1k
    return stackdepth;
892
77.1k
}
893
894
static int
895
77.1k
label_exception_targets(basicblock *entryblock) {
896
77.1k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
77.1k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
77.1k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
77.1k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
77.1k
    except_stack->depth = 0;
907
77.1k
    todo_stack[0] = entryblock;
908
77.1k
    entryblock->b_visited = 1;
909
77.1k
    entryblock->b_exceptstack = except_stack;
910
77.1k
    basicblock **todo = &todo_stack[1];
911
77.1k
    basicblock *handler = NULL;
912
649k
    while (todo > todo_stack) {
913
572k
        todo--;
914
572k
        basicblock *b = todo[0];
915
572k
        assert(b->b_visited == 1);
916
572k
        except_stack = b->b_exceptstack;
917
572k
        assert(except_stack != NULL);
918
572k
        b->b_exceptstack = NULL;
919
572k
        handler = except_stack_top(except_stack);
920
572k
        int last_yield_except_depth = -1;
921
6.88M
        for (int i = 0; i < b->b_iused; i++) {
922
6.30M
            cfg_instr *instr = &b->b_instr[i];
923
6.30M
            if (is_block_push(instr)) {
924
82.0k
                if (!instr->i_target->b_visited) {
925
82.0k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
82.0k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
82.0k
                    instr->i_target->b_exceptstack = copy;
930
82.0k
                    todo[0] = instr->i_target;
931
82.0k
                    instr->i_target->b_visited = 1;
932
82.0k
                    todo++;
933
82.0k
                }
934
82.0k
                handler = push_except_block(except_stack, instr);
935
82.0k
            }
936
6.22M
            else if (instr->i_opcode == POP_BLOCK) {
937
77.0k
                handler = pop_except_block(except_stack);
938
77.0k
                INSTR_SET_OP0(instr, NOP);
939
77.0k
            }
940
6.14M
            else if (is_jump(instr)) {
941
312k
                instr->i_except = handler;
942
312k
                assert(i == b->b_iused -1);
943
312k
                if (!instr->i_target->b_visited) {
944
172k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
129k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
129k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
129k
                        instr->i_target->b_exceptstack = copy;
950
129k
                    }
951
42.4k
                    else {
952
42.4k
                        instr->i_target->b_exceptstack = except_stack;
953
42.4k
                        except_stack = NULL;
954
42.4k
                    }
955
172k
                    todo[0] = instr->i_target;
956
172k
                    instr->i_target->b_visited = 1;
957
172k
                    todo++;
958
172k
                }
959
312k
            }
960
5.83M
            else if (instr->i_opcode == YIELD_VALUE) {
961
31.9k
                instr->i_except = handler;
962
31.9k
                last_yield_except_depth = except_stack->depth;
963
31.9k
            }
964
5.80M
            else if (instr->i_opcode == RESUME) {
965
109k
                instr->i_except = handler;
966
109k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
31.9k
                    assert(last_yield_except_depth >= 0);
968
31.9k
                    if (last_yield_except_depth == 1) {
969
1.98k
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
1.98k
                    }
971
31.9k
                    last_yield_except_depth = -1;
972
31.9k
                }
973
109k
            }
974
5.69M
            else {
975
5.69M
                instr->i_except = handler;
976
5.69M
            }
977
6.30M
        }
978
572k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
241k
            assert(except_stack != NULL);
980
241k
            b->b_next->b_exceptstack = except_stack;
981
241k
            todo[0] = b->b_next;
982
241k
            b->b_next->b_visited = 1;
983
241k
            todo++;
984
241k
        }
985
331k
        else if (except_stack != NULL) {
986
288k
           PyMem_Free(except_stack);
987
288k
        }
988
572k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
77.1k
    PyMem_Free(todo_stack);
995
77.1k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
77.1k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
154k
remove_unreachable(basicblock *entryblock) {
1006
1.40M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
1.25M
        b->b_predecessors = 0;
1008
1.25M
    }
1009
154k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
154k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
154k
    basicblock **sp = stack;
1014
154k
    entryblock->b_predecessors = 1;
1015
154k
    *sp++ = entryblock;
1016
154k
    entryblock->b_visited = 1;
1017
1.28M
    while (sp > stack) {
1018
1.12M
        basicblock *b = *(--sp);
1019
1.12M
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
628k
            if (!b->b_next->b_visited) {
1021
542k
                assert(b->b_next->b_predecessors == 0);
1022
542k
                *sp++ = b->b_next;
1023
542k
                b->b_next->b_visited = 1;
1024
542k
            }
1025
628k
            b->b_next->b_predecessors++;
1026
628k
        }
1027
12.1M
        for (int i = 0; i < b->b_iused; i++) {
1028
11.0M
            basicblock *target;
1029
11.0M
            cfg_instr *instr = &b->b_instr[i];
1030
11.0M
            if (is_jump(instr) || is_block_push(instr)) {
1031
755k
                target = instr->i_target;
1032
755k
                if (!target->b_visited) {
1033
430k
                    *sp++ = target;
1034
430k
                    target->b_visited = 1;
1035
430k
                }
1036
755k
                target->b_predecessors++;
1037
755k
            }
1038
11.0M
        }
1039
1.12M
    }
1040
154k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
1.40M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
1.25M
       if (b->b_predecessors == 0) {
1045
126k
            b->b_iused = 0;
1046
126k
            b->b_except_handler = 0;
1047
126k
       }
1048
1.25M
    }
1049
154k
    return SUCCESS;
1050
154k
}
1051
1052
static int
1053
4.54M
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
4.54M
    int dest = 0;
1056
4.54M
    int prev_lineno = -1;
1057
36.4M
    for (int src = 0; src < bb->b_iused; src++) {
1058
31.9M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
31.9M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
1.77M
            if (lineno < 0) {
1062
1.42M
                continue;
1063
1.42M
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
348k
            if (prev_lineno == lineno) {
1066
218k
                continue;
1067
218k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
130k
            if (src < bb->b_iused - 1) {
1070
117k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
117k
                if (next_lineno == lineno) {
1072
80.5k
                    continue;
1073
80.5k
                }
1074
37.2k
                if (next_lineno < 0) {
1075
11.7k
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
11.7k
                    continue;
1077
11.7k
                }
1078
37.2k
            }
1079
12.6k
            else {
1080
12.6k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
12.6k
                if (next) {
1083
12.6k
                    location next_loc = NO_LOCATION;
1084
14.7k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
14.6k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
14.6k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
2.06k
                            continue;
1089
2.06k
                        }
1090
12.6k
                        next_loc = instr->i_loc;
1091
12.6k
                        break;
1092
14.6k
                    }
1093
12.6k
                    if (lineno == next_loc.lineno) {
1094
2.83k
                        continue;
1095
2.83k
                    }
1096
12.6k
                }
1097
12.6k
            }
1098
1099
130k
        }
1100
30.2M
        if (dest != src) {
1101
2.50M
            bb->b_instr[dest] = bb->b_instr[src];
1102
2.50M
        }
1103
30.2M
        dest++;
1104
30.2M
        prev_lineno = lineno;
1105
30.2M
    }
1106
4.54M
    assert(dest <= bb->b_iused);
1107
4.54M
    int num_removed = bb->b_iused - dest;
1108
4.54M
    bb->b_iused = dest;
1109
4.54M
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
4.54M
    return num_removed;
1111
4.54M
}
1112
1113
static int
1114
327k
remove_redundant_nops(cfg_builder *g) {
1115
327k
    int changes = 0;
1116
4.21M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
3.89M
        int change = basicblock_remove_redundant_nops(b);
1118
3.89M
        RETURN_IF_ERROR(change);
1119
3.89M
        changes += change;
1120
3.89M
    }
1121
327k
    return changes;
1122
327k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
77.1k
{
1127
77.1k
    bool done = false;
1128
1129
156k
    while (! done) {
1130
79.0k
        done = true;
1131
79.0k
        cfg_instr *prev_instr = NULL;
1132
79.0k
        cfg_instr *instr = NULL;
1133
735k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
656k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
656k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
313k
                instr = NULL;
1138
313k
            }
1139
5.70M
            for (int i = 0; i < b->b_iused; i++) {
1140
5.05M
                prev_instr = instr;
1141
5.05M
                instr = &b->b_instr[i];
1142
5.05M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
5.05M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
5.05M
                int opcode = instr->i_opcode;
1145
5.05M
                bool is_redundant_pair = false;
1146
5.05M
                if (opcode == POP_TOP) {
1147
378k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
7.10k
                       is_redundant_pair = true;
1149
7.10k
                   }
1150
371k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
703
                       is_redundant_pair = true;
1152
703
                   }
1153
378k
                }
1154
5.05M
                if (is_redundant_pair) {
1155
7.80k
                    INSTR_SET_OP0(prev_instr, NOP);
1156
7.80k
                    INSTR_SET_OP0(instr, NOP);
1157
7.80k
                    done = false;
1158
7.80k
                }
1159
5.05M
            }
1160
656k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
471k
                instr = NULL;
1162
471k
            }
1163
656k
        }
1164
79.0k
    }
1165
77.1k
    return SUCCESS;
1166
77.1k
}
1167
1168
static int
1169
173k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
173k
    int changes = 0;
1179
2.80M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
2.63M
        cfg_instr *last = basicblock_last_instr(b);
1181
2.63M
        if (last == NULL) {
1182
253k
            continue;
1183
253k
        }
1184
2.38M
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
2.38M
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
534k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
534k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
534k
            basicblock *next = next_nonempty_block(b->b_next);
1192
534k
            if (jump_target == next) {
1193
20.1k
                changes++;
1194
20.1k
                INSTR_SET_OP0(last, NOP);
1195
20.1k
            }
1196
534k
        }
1197
2.38M
    }
1198
1199
173k
    return changes;
1200
173k
}
1201
1202
static inline bool
1203
563k
basicblock_has_no_lineno(basicblock *b) {
1204
734k
    for (int i = 0; i < b->b_iused; i++) {
1205
674k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
504k
            return false;
1207
504k
        }
1208
674k
    }
1209
59.6k
    return true;
1210
563k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
13.6k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
1.04M
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
1.04M
    cfg_instr *last = basicblock_last_instr(bb);
1223
1.04M
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
1.04M
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
836k
        return 0;
1228
836k
    }
1229
206k
    basicblock *target = last->i_target;
1230
206k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
206k
                             target->b_iused <= MAX_COPY_SIZE);
1232
206k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
206k
                                     !BB_HAS_FALLTHROUGH(target));
1234
206k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
18.3k
        assert(is_jump(last));
1236
18.3k
        int removed_jump_opcode = last->i_opcode;
1237
18.3k
        INSTR_SET_OP0(last, NOP);
1238
18.3k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
18.3k
        if (no_lineno_no_fallthrough) {
1240
16.9k
            last = basicblock_last_instr(bb);
1241
16.9k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
16.9k
                removed_jump_opcode == JUMP)
1243
121
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
121
                last->i_opcode = JUMP;
1246
121
            }
1247
16.9k
        }
1248
18.3k
        target->b_predecessors--;
1249
18.3k
        return 1;
1250
18.3k
    }
1251
187k
    return 0;
1252
206k
}
1253
1254
static int
1255
77.1k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
77.1k
    bool changes;
1257
80.2k
    do {
1258
80.2k
        changes = false;
1259
1.12M
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
1.04M
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
1.04M
            RETURN_IF_ERROR(res);
1262
1.04M
            if (res) {
1263
18.3k
                changes = true;
1264
18.3k
            }
1265
1.04M
        }
1266
80.2k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
77.1k
    return changes;
1268
77.1k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
2.57k
{
1276
2.57k
    assert(is_jump(inst));
1277
2.57k
    assert(is_jump(target));
1278
2.57k
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
2.57k
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
2.57k
        INSTR_SET_OP0(inst, NOP);
1286
1287
2.57k
        RETURN_IF_ERROR(
1288
2.57k
            basicblock_add_jump(
1289
2.57k
                bb, opcode, target->i_target, target->i_loc));
1290
1291
2.57k
        return true;
1292
2.57k
    }
1293
0
    return false;
1294
2.57k
}
1295
1296
static int
1297
loads_const(int opcode)
1298
6.77M
{
1299
6.77M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
6.77M
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
2.99M
{
1306
2.99M
    PyObject *constant = NULL;
1307
2.99M
    assert(loads_const(opcode));
1308
2.99M
    if (opcode == LOAD_CONST) {
1309
2.51M
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
2.51M
    }
1311
2.99M
    if (opcode == LOAD_SMALL_INT) {
1312
476k
        return PyLong_FromLong(oparg);
1313
476k
    }
1314
1315
2.51M
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
2.51M
    return Py_NewRef(constant);
1321
2.51M
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
730k
{
1327
730k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
730k
    Py_ssize_t index;
1333
60.7M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
121M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
563k
            break;
1336
563k
        }
1337
60.6M
    }
1338
730k
    if (index == PyList_GET_SIZE(consts)) {
1339
167k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
167k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
167k
    }
1349
730k
    Py_DECREF(newconst);
1350
730k
    return (int)index;
1351
730k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
1.41M
{
1363
1.41M
    assert(start < bb->b_iused);
1364
1.41M
    assert(size >= 0);
1365
1.41M
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
4.23M
    for (; start >= 0 && size > 0; start--) {
1368
3.35M
        cfg_instr *instr = &bb->b_instr[start];
1369
3.35M
        if (instr->i_opcode == NOP) {
1370
1.14M
            continue;
1371
1.14M
        }
1372
2.20M
        if (!loads_const(instr->i_opcode)) {
1373
542k
            return false;
1374
542k
        }
1375
1.66M
        instrs[--size] = instr;
1376
1.66M
    }
1377
1378
873k
    return size == 0;
1379
1.41M
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
822k
{
1388
2.25M
    for (int i = 0; i < size; i++) {
1389
1.42M
        cfg_instr *instr = instrs[i];
1390
1.42M
        assert(instr->i_opcode != NOP);
1391
1.42M
        INSTR_SET_OP0(instr, NOP);
1392
1.42M
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
1.42M
    }
1394
822k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
2.23M
{
1405
2.23M
    if (PyLong_CheckExact(newconst)) {
1406
1.28M
        int overflow;
1407
1.28M
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
1.28M
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
1.28M
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
656k
            assert(_Py_IsImmortal(newconst));
1413
656k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
656k
            return 1;
1415
656k
        }
1416
1.28M
    }
1417
1.57M
    return 0;
1418
2.23M
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
800k
{
1426
800k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
800k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
800k
    if (res > 0) {
1432
79.7k
        return SUCCESS;
1433
79.7k
    }
1434
720k
    int oparg = add_const(newconst, consts, const_cache);
1435
720k
    RETURN_IF_ERROR(oparg);
1436
720k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
720k
    return SUCCESS;
1438
720k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
128k
{
1449
    /* Pre-conditions */
1450
128k
    assert(PyDict_CheckExact(const_cache));
1451
128k
    assert(PyList_CheckExact(consts));
1452
1453
128k
    cfg_instr *instr = &bb->b_instr[i];
1454
128k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
128k
    int seq_size = instr->i_oparg;
1457
128k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
29
        return SUCCESS;
1459
29
    }
1460
1461
128k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
128k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
63.9k
        return SUCCESS;
1465
63.9k
    }
1466
1467
64.9k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
64.9k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
163k
    for (int i = 0; i < seq_size; i++) {
1473
98.5k
        cfg_instr *inst = const_instrs[i];
1474
98.5k
        assert(loads_const(inst->i_opcode));
1475
98.5k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
98.5k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
98.5k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
98.5k
    }
1482
1483
64.9k
    nop_out(const_instrs, seq_size);
1484
64.9k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
64.9k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
9.67k
{
1504
9.67k
    assert(PyDict_CheckExact(const_cache));
1505
9.67k
    assert(PyList_CheckExact(consts));
1506
9.67k
    assert(i >= 0);
1507
9.67k
    assert(i < bb->b_iused);
1508
1509
9.67k
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
9.67k
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
9.67k
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
9.67k
    int consts_found = 0;
1514
9.67k
    bool expect_append = true;
1515
1516
37.0k
    for (int pos = i - 1; pos >= 0; pos--) {
1517
36.6k
        cfg_instr *instr = &bb->b_instr[pos];
1518
36.6k
        int opcode = instr->i_opcode;
1519
36.6k
        int oparg = instr->i_oparg;
1520
1521
36.6k
        if (opcode == NOP) {
1522
3.18k
            continue;
1523
3.18k
        }
1524
1525
33.4k
        if (opcode == BUILD_LIST && oparg == 0) {
1526
212
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
68
                return SUCCESS;
1529
68
            }
1530
1531
            /* Sequence start, we are done. */
1532
144
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
144
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
18.3k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
18.1k
                instr = &bb->b_instr[newpos];
1539
18.1k
                if (instr->i_opcode == NOP) {
1540
988
                    continue;
1541
988
                }
1542
17.1k
                if (loads_const(instr->i_opcode)) {
1543
8.51k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
8.51k
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
8.51k
                    assert(consts_found > 0);
1549
8.51k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
8.51k
                }
1551
17.1k
                nop_out(&instr, 1);
1552
17.1k
            }
1553
144
            assert(consts_found == 0);
1554
144
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
144
        }
1556
1557
33.2k
        if (expect_append) {
1558
19.6k
            if (opcode != LIST_APPEND || oparg != 1) {
1559
5.65k
                return SUCCESS;
1560
5.65k
            }
1561
19.6k
        }
1562
13.6k
        else {
1563
13.6k
            if (!loads_const(opcode)) {
1564
3.47k
                return SUCCESS;
1565
3.47k
            }
1566
10.1k
            consts_found++;
1567
10.1k
        }
1568
1569
24.1k
        expect_append = !expect_append;
1570
24.1k
    }
1571
1572
    /* Did not find sequence start. */
1573
345
    return SUCCESS;
1574
9.67k
}
1575
1576
66.5k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
33.2k
{
1592
33.2k
    assert(PyDict_CheckExact(const_cache));
1593
33.2k
    assert(PyList_CheckExact(consts));
1594
1595
33.2k
    cfg_instr *instr = &bb->b_instr[i];
1596
33.2k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
33.2k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
33.2k
    int seq_size = instr->i_oparg;
1600
33.2k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
33.2k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
26.2k
    {
1603
26.2k
        return SUCCESS;
1604
26.2k
    }
1605
1606
6.97k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
6.97k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
1.13k
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
8
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
8
        }
1612
1.13k
        return SUCCESS;
1613
1.13k
    }
1614
1615
5.84k
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
5.84k
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
54.7k
    for (int i = 0; i < seq_size; i++) {
1621
48.9k
        cfg_instr *inst = const_instrs[i];
1622
48.9k
        assert(loads_const(inst->i_opcode));
1623
48.9k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
48.9k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
48.9k
        PyTuple_SET_ITEM(const_result, i, element);
1629
48.9k
    }
1630
1631
5.84k
    if (instr->i_opcode == BUILD_SET) {
1632
5.43k
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
5.43k
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
5.43k
        Py_SETREF(const_result, frozenset);
1638
5.43k
    }
1639
1640
5.84k
    int index = add_const(const_result, consts, const_cache);
1641
5.84k
    RETURN_IF_ERROR(index);
1642
5.84k
    nop_out(const_instrs, seq_size);
1643
1644
5.84k
    if (contains_or_iter) {
1645
85
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
85
    }
1647
5.75k
    else {
1648
5.75k
        assert(i >= 2);
1649
5.75k
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
5.75k
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
5.75k
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
5.75k
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
5.75k
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
5.75k
    }
1657
5.84k
    return SUCCESS;
1658
5.84k
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
713k
{
1667
713k
    if (PyTuple_Check(obj)) {
1668
627k
        Py_ssize_t i;
1669
627k
        limit -= PyTuple_GET_SIZE(obj);
1670
1.29M
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
672k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
672k
            if (limit < 0) {
1673
1.67k
                return limit;
1674
1.67k
            }
1675
672k
        }
1676
627k
    }
1677
712k
    return limit;
1678
713k
}
1679
1680
91.4k
#define MAX_INT_SIZE           128  /* bits */
1681
44.4k
#define MAX_COLLECTION_SIZE    256  /* items */
1682
13.8k
#define MAX_STR_SIZE          4096  /* characters */
1683
41.8k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
234k
{
1688
234k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
234k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
234k
    ) {
1691
38.5k
        int64_t vbits = _PyLong_NumBits(v);
1692
38.5k
        int64_t wbits = _PyLong_NumBits(w);
1693
38.5k
        assert(vbits >= 0);
1694
38.5k
        assert(wbits >= 0);
1695
38.5k
        if (vbits + wbits > MAX_INT_SIZE) {
1696
236
            return NULL;
1697
236
        }
1698
38.5k
    }
1699
195k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
46.2k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
46.2k
        if (size) {
1702
45.0k
            long n = PyLong_AsLong(v);
1703
45.0k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
717
                return NULL;
1705
717
            }
1706
44.3k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
230
                return NULL;
1708
230
            }
1709
44.3k
        }
1710
46.2k
    }
1711
149k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
36.6k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
36.6k
                                               PyBytes_GET_SIZE(w);
1714
36.6k
        if (size) {
1715
14.9k
            long n = PyLong_AsLong(v);
1716
14.9k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
1.59k
                return NULL;
1718
1.59k
            }
1719
14.9k
        }
1720
36.6k
    }
1721
112k
    else if (PyLong_Check(w) &&
1722
112k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
78.9k
    {
1724
78.9k
        return const_folding_safe_multiply(w, v);
1725
78.9k
    }
1726
1727
152k
    return PyNumber_Multiply(v, w);
1728
234k
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
86.4k
{
1733
86.4k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
86.4k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
86.4k
    ) {
1736
36.0k
        int64_t vbits = _PyLong_NumBits(v);
1737
36.0k
        size_t wbits = PyLong_AsSize_t(w);
1738
36.0k
        assert(vbits >= 0);
1739
36.0k
        if (wbits == (size_t)-1) {
1740
531
            return NULL;
1741
531
        }
1742
35.5k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
3.79k
            return NULL;
1744
3.79k
        }
1745
35.5k
    }
1746
1747
82.1k
    return PyNumber_Power(v, w, Py_None);
1748
86.4k
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
8.37k
{
1753
8.37k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
8.37k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
8.37k
    ) {
1756
6.40k
        int64_t vbits = _PyLong_NumBits(v);
1757
6.40k
        size_t wbits = PyLong_AsSize_t(w);
1758
6.40k
        assert(vbits >= 0);
1759
6.40k
        if (wbits == (size_t)-1) {
1760
328
            return NULL;
1761
328
        }
1762
6.08k
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
1.47k
            return NULL;
1764
1.47k
        }
1765
6.08k
    }
1766
1767
6.56k
    return PyNumber_Lshift(v, w);
1768
8.37k
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
43.2k
{
1773
43.2k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
2.33k
        return NULL;
1775
2.33k
    }
1776
1777
40.9k
    return PyNumber_Remainder(v, w);
1778
43.2k
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
592k
{
1783
592k
    assert(left != NULL && right != NULL);
1784
592k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
592k
    PyObject *result = NULL;
1787
592k
    switch (op) {
1788
48.7k
        case NB_ADD:
1789
48.7k
            result = PyNumber_Add(left, right);
1790
48.7k
            break;
1791
51.9k
        case NB_SUBTRACT:
1792
51.9k
            result = PyNumber_Subtract(left, right);
1793
51.9k
            break;
1794
155k
        case NB_MULTIPLY:
1795
155k
            result = const_folding_safe_multiply(left, right);
1796
155k
            break;
1797
105k
        case NB_TRUE_DIVIDE:
1798
105k
            result = PyNumber_TrueDivide(left, right);
1799
105k
            break;
1800
26.1k
        case NB_FLOOR_DIVIDE:
1801
26.1k
            result = PyNumber_FloorDivide(left, right);
1802
26.1k
            break;
1803
43.2k
        case NB_REMAINDER:
1804
43.2k
            result = const_folding_safe_mod(left, right);
1805
43.2k
            break;
1806
86.4k
        case NB_POWER:
1807
86.4k
            result = const_folding_safe_power(left, right);
1808
86.4k
            break;
1809
8.37k
        case NB_LSHIFT:
1810
8.37k
            result = const_folding_safe_lshift(left, right);
1811
8.37k
            break;
1812
19.6k
        case NB_RSHIFT:
1813
19.6k
            result = PyNumber_Rshift(left, right);
1814
19.6k
            break;
1815
7.48k
        case NB_OR:
1816
7.48k
            result = PyNumber_Or(left, right);
1817
7.48k
            break;
1818
6.69k
        case NB_XOR:
1819
6.69k
            result = PyNumber_Xor(left, right);
1820
6.69k
            break;
1821
10.6k
        case NB_AND:
1822
10.6k
            result = PyNumber_And(left, right);
1823
10.6k
            break;
1824
21.7k
        case NB_SUBSCR:
1825
21.7k
            result = PyObject_GetItem(left, right);
1826
21.7k
            break;
1827
807
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
807
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
592k
    }
1833
592k
    return result;
1834
592k
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
865k
{
1839
1.39M
    #define BINOP_OPERAND_COUNT 2
1840
865k
    assert(PyDict_CheckExact(const_cache));
1841
865k
    assert(PyList_CheckExact(consts));
1842
1843
865k
    cfg_instr *binop = &bb->b_instr[i];
1844
865k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
865k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
865k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
273k
        return SUCCESS;
1850
273k
    }
1851
1852
592k
    cfg_instr *lhs_instr = operands_instrs[0];
1853
592k
    assert(loads_const(lhs_instr->i_opcode));
1854
592k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
592k
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
592k
    cfg_instr *rhs_instr = operands_instrs[1];
1860
592k
    assert(loads_const(rhs_instr->i_opcode));
1861
592k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
592k
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
592k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
592k
    Py_DECREF(lhs);
1869
592k
    Py_DECREF(rhs);
1870
592k
    if (newconst == NULL) {
1871
63.4k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
63.4k
        PyErr_Clear();
1875
63.4k
        return SUCCESS;
1876
63.4k
    }
1877
1878
528k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
528k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
592k
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
209k
{
1885
209k
    assert(operand != NULL);
1886
209k
    assert(
1887
209k
        opcode == UNARY_NEGATIVE ||
1888
209k
        opcode == UNARY_INVERT ||
1889
209k
        opcode == UNARY_NOT ||
1890
209k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
209k
    );
1892
209k
    PyObject *result;
1893
209k
    switch (opcode) {
1894
140k
        case UNARY_NEGATIVE:
1895
140k
            result = PyNumber_Negative(operand);
1896
140k
            break;
1897
14.4k
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
14.4k
            if (PyBool_Check(operand)) {
1900
196
                return NULL;
1901
196
            }
1902
14.2k
            result = PyNumber_Invert(operand);
1903
14.2k
            break;
1904
1.04k
        case UNARY_NOT: {
1905
1.04k
            int r = PyObject_IsTrue(operand);
1906
1.04k
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
1.04k
            result = PyBool_FromLong(!r);
1910
1.04k
            break;
1911
1.04k
        }
1912
53.3k
        case CALL_INTRINSIC_1:
1913
53.3k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
53.3k
            result = PyNumber_Positive(operand);
1917
53.3k
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
209k
    }
1921
209k
    return result;
1922
209k
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
413k
{
1927
619k
    #define UNARYOP_OPERAND_COUNT 1
1928
413k
    assert(PyDict_CheckExact(const_cache));
1929
413k
    assert(PyList_CheckExact(consts));
1930
413k
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
413k
    cfg_instr *operand_instr;
1933
413k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
204k
        return SUCCESS;
1936
204k
    }
1937
1938
209k
    assert(loads_const(operand_instr->i_opcode));
1939
209k
    PyObject *operand = get_const_value(
1940
209k
        operand_instr->i_opcode,
1941
209k
        operand_instr->i_oparg,
1942
209k
        consts
1943
209k
    );
1944
209k
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
209k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
209k
    Py_DECREF(operand);
1950
209k
    if (newconst == NULL) {
1951
3.10k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
3.10k
        PyErr_Clear();
1955
3.10k
        return SUCCESS;
1956
3.10k
    }
1957
1958
206k
    if (unaryop->i_opcode == UNARY_NOT) {
1959
1.04k
        assert(PyBool_Check(newconst));
1960
1.04k
    }
1961
206k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
206k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
206k
}
1964
1965
360k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
112k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
112k
    assert(*ix < block->b_iused);
1975
112k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
112k
    assert(instructions[0].i_opcode == SWAP);
1979
112k
    int depth = instructions[0].i_oparg;
1980
112k
    int len = 0;
1981
112k
    int more = false;
1982
112k
    int limit = block->b_iused - *ix;
1983
169k
    while (++len < limit) {
1984
169k
        int opcode = instructions[len].i_opcode;
1985
169k
        if (opcode == SWAP) {
1986
56.1k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
56.1k
            more = true;
1988
56.1k
        }
1989
113k
        else if (opcode != NOP) {
1990
112k
            break;
1991
112k
        }
1992
169k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
112k
    if (!more) {
1995
92.9k
        return SUCCESS;
1996
92.9k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
19.0k
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
19.0k
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
106k
    for (int i = 0; i < depth; i++) {
2004
87.6k
        stack[i] = i;
2005
87.6k
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
95.6k
    for (int i = 0; i < len; i++) {
2009
76.5k
        if (instructions[i].i_opcode == SWAP) {
2010
75.2k
            int oparg = instructions[i].i_oparg;
2011
75.2k
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
75.2k
            stack[0] = stack[oparg - 1];
2014
75.2k
            stack[oparg - 1] = top;
2015
75.2k
        }
2016
76.5k
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
19.0k
    int current = len - 1;
2025
106k
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
87.6k
        if (stack[i] == VISITED || stack[i] == i) {
2029
69.9k
            continue;
2030
69.9k
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
17.7k
        int j = i;
2037
101k
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
101k
            if (j) {
2041
66.4k
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
66.4k
                instructions[current].i_opcode = SWAP;
2044
66.4k
                instructions[current--].i_oparg = j + 1;
2045
66.4k
            }
2046
101k
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
17.7k
                assert(j == i);
2049
17.7k
                break;
2050
17.7k
            }
2051
83.9k
            int next_j = stack[j];
2052
83.9k
            stack[j] = VISITED;
2053
83.9k
            j = next_j;
2054
83.9k
        }
2055
17.7k
    }
2056
    // NOP out any unused instructions:
2057
29.2k
    while (0 <= current) {
2058
10.1k
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
10.1k
    }
2060
19.0k
    PyMem_Free(stack);
2061
19.0k
    *ix += len - 1;
2062
19.0k
    return SUCCESS;
2063
19.0k
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
144k
    ((opcode) == STORE_FAST || \
2072
144k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
144k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
7.94k
    (((instr).i_opcode == STORE_FAST || \
2077
7.94k
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
7.94k
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
151k
{
2083
152k
    while (++i < block->b_iused) {
2084
142k
        cfg_instr *instruction = &block->b_instr[i];
2085
142k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
170
            return -1;
2089
170
        }
2090
142k
        if (instruction->i_opcode == NOP) {
2091
632
            continue;
2092
632
        }
2093
141k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
44.0k
            return i;
2095
44.0k
        }
2096
97.5k
        return -1;
2097
141k
    }
2098
10.1k
    return -1;
2099
151k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
112k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
123k
    for (; 0 <= i; i--) {
2109
122k
        assert(i < block->b_iused);
2110
122k
        cfg_instr *swap = &block->b_instr[i];
2111
122k
        if (swap->i_opcode != SWAP) {
2112
11.5k
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
8.61k
                continue;
2115
8.61k
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
2.95k
            return;
2118
11.5k
        }
2119
111k
        int j = next_swappable_instruction(block, i, -1);
2120
111k
        if (j < 0) {
2121
83.1k
            return;
2122
83.1k
        }
2123
27.8k
        int k = j;
2124
27.8k
        int lineno = block->b_instr[j].i_loc.lineno;
2125
44.0k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
40.8k
            k = next_swappable_instruction(block, k, lineno);
2127
40.8k
            if (k < 0) {
2128
24.7k
                return;
2129
24.7k
            }
2130
40.8k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
3.17k
        int store_j = STORES_TO(block->b_instr[j]);
2135
3.17k
        int store_k = STORES_TO(block->b_instr[k]);
2136
3.17k
        if (store_j >= 0 || store_k >= 0) {
2137
2.76k
            if (store_j == store_k) {
2138
330
                return;
2139
330
            }
2140
4.02k
            for (int idx = j + 1; idx < k; idx++) {
2141
1.59k
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
1.59k
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
1.59k
            }
2146
2.43k
        }
2147
2148
        // Success!
2149
2.84k
        INSTR_SET_OP0(swap, NOP);
2150
2.84k
        cfg_instr temp = block->b_instr[j];
2151
2.84k
        block->b_instr[j] = block->b_instr[k];
2152
2.84k
        block->b_instr[k] = temp;
2153
2.84k
    }
2154
112k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
628k
{
2159
628k
    assert(PyDict_CheckExact(const_cache));
2160
628k
    assert(PyList_CheckExact(consts));
2161
628k
    int opcode = 0;
2162
628k
    int oparg = 0;
2163
6.96M
    for (int i = 0; i < bb->b_iused; i++) {
2164
6.33M
        cfg_instr *inst = &bb->b_instr[i];
2165
6.33M
        if (inst->i_opcode == LOAD_CONST) {
2166
1.43M
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
1.43M
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
1.43M
            Py_DECREF(constant);
2169
1.43M
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
1.43M
        }
2173
6.33M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
6.33M
                                      inst->i_opcode == COPY &&
2175
6.33M
                                      inst->i_oparg == 1);
2176
6.33M
        if (! is_copy_of_load_const) {
2177
6.33M
            opcode = inst->i_opcode;
2178
6.33M
            oparg = inst->i_oparg;
2179
6.33M
        }
2180
6.33M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
6.33M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
4.90M
            continue;
2183
4.90M
        }
2184
1.43M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
1.43M
        switch(nextop) {
2186
2.52k
            case POP_JUMP_IF_FALSE:
2187
3.64k
            case POP_JUMP_IF_TRUE:
2188
5.20k
            case JUMP_IF_FALSE:
2189
6.27k
            case JUMP_IF_TRUE:
2190
6.27k
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
6.27k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
6.27k
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
6.27k
                int is_true = PyObject_IsTrue(cnt);
2197
6.27k
                Py_DECREF(cnt);
2198
6.27k
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
6.27k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
3.64k
                    INSTR_SET_OP0(inst, NOP);
2204
3.64k
                }
2205
6.27k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
6.27k
                if (is_true == jump_if_true) {
2207
3.14k
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
3.14k
                }
2209
3.13k
                else {
2210
3.13k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
3.13k
                }
2212
6.27k
                break;
2213
6.27k
            }
2214
6.27k
            case IS_OP:
2215
3.48k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
3.48k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
3.48k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
3.48k
                if (!Py_IsNone(cnt)) {
2231
1.66k
                    Py_DECREF(cnt);
2232
1.66k
                    break;
2233
1.66k
                }
2234
1.81k
                if (bb->b_iused <= i + 2) {
2235
0
                    break;
2236
0
                }
2237
1.81k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.81k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.81k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
0
                    INSTR_SET_OP0(jump_instr, NOP);
2242
0
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
0
                    jump_instr = &bb->b_instr[i + 3];
2246
0
                }
2247
1.81k
                bool invert = is_instr->i_oparg;
2248
1.81k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.81k
                    invert = !invert;
2250
1.81k
                }
2251
1
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
1
                    break;
2253
1
                }
2254
1.81k
                INSTR_SET_OP0(inst, NOP);
2255
1.81k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.81k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.81k
                                              : POP_JUMP_IF_NONE;
2258
1.81k
                break;
2259
1.81k
            }
2260
4.70k
            case TO_BOOL:
2261
4.70k
            {
2262
4.70k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
4.70k
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
4.70k
                int is_true = PyObject_IsTrue(cnt);
2267
4.70k
                Py_DECREF(cnt);
2268
4.70k
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
4.70k
                cnt = PyBool_FromLong(is_true);
2272
4.70k
                int index = add_const(cnt, consts, const_cache);
2273
4.70k
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
4.70k
                INSTR_SET_OP0(inst, NOP);
2277
4.70k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
4.70k
                break;
2279
4.70k
            }
2280
1.43M
        }
2281
1.43M
    }
2282
628k
    return SUCCESS;
2283
628k
}
2284
2285
static int
2286
77.1k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
705k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
628k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
628k
    }
2290
77.1k
    return SUCCESS;
2291
77.1k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
628k
{
2296
628k
    assert(PyDict_CheckExact(const_cache));
2297
628k
    assert(PyList_CheckExact(consts));
2298
628k
    cfg_instr nop;
2299
628k
    INSTR_SET_OP0(&nop, NOP);
2300
6.97M
    for (int i = 0; i < bb->b_iused; i++) {
2301
6.34M
        cfg_instr *inst = &bb->b_instr[i];
2302
6.34M
        cfg_instr *target;
2303
6.34M
        int opcode = inst->i_opcode;
2304
6.34M
        int oparg = inst->i_oparg;
2305
6.34M
        if (HAS_TARGET(opcode)) {
2306
382k
            assert(inst->i_target->b_iused > 0);
2307
382k
            target = &inst->i_target->b_instr[0];
2308
382k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
382k
        }
2310
5.96M
        else {
2311
5.96M
            target = &nop;
2312
5.96M
        }
2313
6.34M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
6.34M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
6.34M
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
131k
            case BUILD_TUPLE:
2321
131k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
2.59k
                    switch(oparg) {
2323
67
                        case 1:
2324
67
                            INSTR_SET_OP0(inst, NOP);
2325
67
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
67
                            continue;
2327
1.00k
                        case 2:
2328
2.42k
                        case 3:
2329
2.42k
                            INSTR_SET_OP0(inst, NOP);
2330
2.42k
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
2.42k
                            continue;
2332
2.59k
                    }
2333
2.59k
                }
2334
128k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
128k
                break;
2336
21.7k
            case BUILD_LIST:
2337
33.2k
            case BUILD_SET:
2338
33.2k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
33.2k
                break;
2340
2.94k
            case POP_JUMP_IF_NOT_NONE:
2341
6.41k
            case POP_JUMP_IF_NONE:
2342
6.41k
                switch (target->i_opcode) {
2343
0
                    case JUMP:
2344
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
6.41k
                }
2346
6.41k
                break;
2347
132k
            case POP_JUMP_IF_FALSE:
2348
132k
                switch (target->i_opcode) {
2349
1.84k
                    case JUMP:
2350
1.84k
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
132k
                }
2352
132k
                break;
2353
132k
            case POP_JUMP_IF_TRUE:
2354
17.2k
                switch (target->i_opcode) {
2355
98
                    case JUMP:
2356
98
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
17.2k
                }
2358
17.2k
                break;
2359
17.2k
            case JUMP_IF_FALSE:
2360
1.65k
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
10
                    case JUMP_IF_FALSE:
2363
10
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
10
                        continue;
2365
220
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
220
                        assert(inst->i_target != inst->i_target->b_next);
2369
220
                        inst->i_target = inst->i_target->b_next;
2370
220
                        i--;
2371
220
                        continue;
2372
1.65k
                }
2373
1.42k
                break;
2374
2.13k
            case JUMP_IF_TRUE:
2375
2.13k
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
94
                    case JUMP_IF_TRUE:
2378
94
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
94
                        continue;
2380
0
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
0
                        assert(inst->i_target != inst->i_target->b_next);
2384
0
                        inst->i_target = inst->i_target->b_next;
2385
0
                        i--;
2386
0
                        continue;
2387
2.13k
                }
2388
2.04k
                break;
2389
29.5k
            case JUMP:
2390
102k
            case JUMP_NO_INTERRUPT:
2391
102k
                switch (target->i_opcode) {
2392
362
                    case JUMP:
2393
362
                        i -= jump_thread(bb, inst, target, JUMP);
2394
362
                        continue;
2395
168
                    case JUMP_NO_INTERRUPT:
2396
168
                        i -= jump_thread(bb, inst, target, opcode);
2397
168
                        continue;
2398
102k
                }
2399
101k
                break;
2400
101k
            case FOR_ITER:
2401
6.01k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
6.01k
                break;
2412
165k
            case STORE_FAST:
2413
165k
                if (opcode == nextop &&
2414
165k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
165k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
2.44k
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
2.44k
                    bb->b_instr[i].i_oparg = 0;
2418
2.44k
                }
2419
165k
                break;
2420
168k
            case SWAP:
2421
168k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
168k
                break;
2425
168k
            case LOAD_GLOBAL:
2426
151k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
2.62k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
2.62k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
2.62k
                }
2430
151k
                break;
2431
166k
            case COMPARE_OP:
2432
166k
                if (nextop == TO_BOOL) {
2433
40.8k
                    INSTR_SET_OP0(inst, NOP);
2434
40.8k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
40.8k
                    continue;
2436
40.8k
                }
2437
126k
                break;
2438
126k
            case CONTAINS_OP:
2439
16.3k
            case IS_OP:
2440
16.3k
                if (nextop == TO_BOOL) {
2441
1.21k
                    INSTR_SET_OP0(inst, NOP);
2442
1.21k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
1.21k
                    continue;
2444
1.21k
                }
2445
15.1k
                if (nextop == UNARY_NOT) {
2446
444
                    INSTR_SET_OP0(inst, NOP);
2447
444
                    int inverted = oparg ^ 1;
2448
444
                    assert(inverted == 0 || inverted == 1);
2449
444
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
444
                    continue;
2451
444
                }
2452
14.7k
                break;
2453
73.7k
            case TO_BOOL:
2454
73.7k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
73.7k
                break;
2459
73.7k
            case UNARY_NOT:
2460
2.02k
                if (nextop == TO_BOOL) {
2461
194
                    INSTR_SET_OP0(inst, NOP);
2462
194
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
194
                    continue;
2464
194
                }
2465
1.83k
                if (nextop == UNARY_NOT) {
2466
194
                    INSTR_SET_OP0(inst, NOP);
2467
194
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
194
                    continue;
2469
194
                }
2470
1.63k
                _Py_FALLTHROUGH;
2471
20.4k
            case UNARY_INVERT:
2472
207k
            case UNARY_NEGATIVE:
2473
207k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
207k
                break;
2475
257k
            case CALL_INTRINSIC_1:
2476
257k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
9.71k
                    if (nextop == GET_ITER) {
2478
34
                        INSTR_SET_OP0(inst, NOP);
2479
34
                    }
2480
9.67k
                    else {
2481
9.67k
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
9.67k
                    }
2483
9.71k
                }
2484
247k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
205k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
205k
                }
2487
257k
                break;
2488
865k
            case BINARY_OP:
2489
865k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
865k
                break;
2491
6.34M
        }
2492
6.34M
    }
2493
2494
6.91M
    for (int i = 0; i < bb->b_iused; i++) {
2495
6.28M
        cfg_instr *inst = &bb->b_instr[i];
2496
6.28M
        if (inst->i_opcode == SWAP) {
2497
112k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
112k
            apply_static_swaps(bb, i);
2501
112k
        }
2502
6.28M
    }
2503
628k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
628k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
161k
{
2513
161k
    int removed_nops, removed_jumps;
2514
173k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
173k
        removed_nops = remove_redundant_nops(g);
2519
173k
        RETURN_IF_ERROR(removed_nops);
2520
173k
        removed_jumps = remove_redundant_jumps(g);
2521
173k
        RETURN_IF_ERROR(removed_jumps);
2522
173k
    } while(removed_nops + removed_jumps > 0);
2523
161k
    return SUCCESS;
2524
161k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
77.1k
{
2536
77.1k
    assert(PyDict_CheckExact(const_cache));
2537
77.1k
    RETURN_IF_ERROR(check_cfg(g));
2538
77.1k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
77.1k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
77.1k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
77.1k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
705k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
628k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
628k
    }
2545
77.1k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
77.1k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
77.1k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
77.1k
    assert(no_redundant_jumps(g));
2549
77.1k
    return SUCCESS;
2550
77.1k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
63.4k
{
2555
63.4k
    int32_t line1 = inst1->i_loc.lineno;
2556
63.4k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
63.4k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
766
        return;
2560
766
    }
2561
62.7k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
34.2k
        return;
2563
34.2k
    }
2564
28.4k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
28.4k
    INSTR_SET_OP0(inst2, NOP);
2566
28.4k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
77.1k
{
2571
705k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
5.29M
        for (int i = 0; i < b->b_iused; i++) {
2574
4.66M
            cfg_instr *inst = &b->b_instr[i];
2575
4.66M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
4.66M
            switch(inst->i_opcode) {
2577
29.0k
                case LOAD_FAST:
2578
29.0k
                    if (nextop == LOAD_FAST) {
2579
3.56k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
3.56k
                    }
2581
29.0k
                    break;
2582
145k
                case STORE_FAST:
2583
145k
                    switch (nextop) {
2584
10.8k
                        case LOAD_FAST:
2585
10.8k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
10.8k
                            break;
2587
49.0k
                        case STORE_FAST:
2588
49.0k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
49.0k
                            break;
2590
145k
                    }
2591
145k
                    break;
2592
4.66M
            }
2593
4.66M
        }
2594
628k
    }
2595
77.1k
    int res = remove_redundant_nops(g);
2596
77.1k
    assert(no_redundant_nops(g));
2597
77.1k
    return res;
2598
77.1k
}
2599
2600
#define NOT_LOCAL -1
2601
168k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
9.03M
{
2620
9.03M
    if (stack->size == stack->capacity) {
2621
77.4k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
77.4k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
77.4k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
77.4k
        stack->refs = refs;
2628
77.4k
        stack->capacity = new_cap;
2629
77.4k
    }
2630
9.03M
    stack->refs[stack->size] = r;
2631
9.03M
    stack->size++;
2632
9.03M
    return 0;
2633
9.03M
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
2.65M
{
2638
2.65M
    assert(stack->size > 0);
2639
2.65M
    stack->size--;
2640
2.65M
    ref r = stack->refs[stack->size];
2641
2.65M
    return r;
2642
2.65M
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
114k
{
2647
114k
    Py_ssize_t idx = stack->size - off;
2648
114k
    assert(idx >= 0 && idx < stack->size);
2649
114k
    ref tmp = stack->refs[idx];
2650
114k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
114k
    stack->refs[stack->size - 1] = tmp;
2652
114k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
8.77M
{
2657
8.77M
    assert(idx >= 0 && idx < stack->size);
2658
8.77M
    return stack->refs[idx];
2659
8.77M
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
343k
{
2664
343k
    stack->size = 0;
2665
343k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
77.1k
{
2670
77.1k
    if (stack->refs != NULL) {
2671
76.9k
        PyMem_Free(stack->refs);
2672
76.9k
    }
2673
77.1k
    stack->refs = NULL;
2674
77.1k
    stack->capacity = 0;
2675
77.1k
    stack->size = 0;
2676
77.1k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
179k
{
2690
2.38M
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
2.20M
        ref r = ref_stack_at(refs, i);
2692
2.20M
        if (r.local == local) {
2693
1.33k
            assert(r.instr >= 0);
2694
1.33k
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
1.33k
        }
2696
2.20M
    }
2697
179k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
168k
{
2702
168k
    kill_local(instr_flags, refs, local);
2703
168k
    if (r.instr != DUMMY_INSTR) {
2704
151k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
151k
    }
2706
168k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
376k
{
2712
376k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
376k
    if (!target->b_visited) {
2714
266k
        target->b_visited = 1;
2715
266k
        *(*sp)++ = target;
2716
266k
    }
2717
376k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
77.1k
{
2759
77.1k
    int status;
2760
77.1k
    ref_stack refs = {0};
2761
77.1k
    int max_instrs = 0;
2762
77.1k
    basicblock *entryblock = g->g_entryblock;
2763
722k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
645k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
645k
    }
2766
77.1k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
77.1k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
77.1k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
77.1k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
77.1k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
77.1k
    basicblock **sp = blocks;
2778
77.1k
    *sp = entryblock;
2779
77.1k
    sp++;
2780
77.1k
    entryblock->b_startdepth = 0;
2781
77.1k
    entryblock->b_visited = 1;
2782
2783
77.1k
    #define PUSH_REF(instr, local)                \
2784
9.03M
        do {                                      \
2785
9.03M
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
9.03M
        } while(0)
2790
2791
420k
    while (sp != blocks) {
2792
343k
        basicblock *block = *--sp;
2793
343k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
343k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
343k
        ref_stack_clear(&refs);
2802
6.67M
        for (int i = 0; i < block->b_startdepth; i++) {
2803
6.33M
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
6.33M
        }
2805
2806
4.07M
        for (int i = 0; i < block->b_iused; i++) {
2807
3.73M
            cfg_instr *instr = &block->b_instr[i];
2808
3.73M
            int opcode = instr->i_opcode;
2809
3.73M
            int oparg = instr->i_oparg;
2810
3.73M
            assert(opcode != EXTENDED_ARG);
2811
3.73M
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
4.21k
                case DELETE_FAST: {
2814
4.21k
                    kill_local(instr_flags, &refs, oparg);
2815
4.21k
                    break;
2816
0
                }
2817
2818
55.7k
                case LOAD_FAST: {
2819
55.7k
                    PUSH_REF(i, oparg);
2820
55.7k
                    break;
2821
55.7k
                }
2822
2823
55.7k
                case LOAD_FAST_AND_CLEAR: {
2824
7.41k
                    kill_local(instr_flags, &refs, oparg);
2825
7.41k
                    PUSH_REF(i, oparg);
2826
7.41k
                    break;
2827
7.41k
                }
2828
2829
7.41k
                case LOAD_FAST_LOAD_FAST: {
2830
1.55k
                    PUSH_REF(i, oparg >> 4);
2831
1.55k
                    PUSH_REF(i, oparg & 15);
2832
1.55k
                    break;
2833
1.55k
                }
2834
2835
124k
                case STORE_FAST: {
2836
124k
                    ref r = ref_stack_pop(&refs);
2837
124k
                    store_local(instr_flags, &refs, oparg, r);
2838
124k
                    break;
2839
1.55k
                }
2840
2841
9.20k
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
9.20k
                    ref r = ref_stack_pop(&refs);
2844
9.20k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
9.20k
                    PUSH_REF(i, oparg & 15);
2847
9.20k
                    break;
2848
9.20k
                }
2849
2850
17.3k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
17.3k
                    ref r = ref_stack_pop(&refs);
2853
17.3k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
17.3k
                    r = ref_stack_pop(&refs);
2856
17.3k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
17.3k
                    break;
2858
9.20k
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
195k
                case COPY: {
2862
195k
                    assert(oparg > 0);
2863
195k
                    Py_ssize_t idx = refs.size - oparg;
2864
195k
                    ref r = ref_stack_at(&refs, idx);
2865
195k
                    PUSH_REF(r.instr, r.local);
2866
195k
                    break;
2867
195k
                }
2868
2869
195k
                case SWAP: {
2870
114k
                    assert(oparg >= 2);
2871
114k
                    ref_stack_swap_top(&refs, oparg);
2872
114k
                    break;
2873
114k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
3.93k
                case FORMAT_SIMPLE:
2881
4.71k
                case GET_ANEXT:
2882
10.4k
                case GET_ITER:
2883
30.7k
                case GET_LEN:
2884
30.7k
                case GET_YIELD_FROM_ITER:
2885
39.6k
                case IMPORT_FROM:
2886
39.6k
                case MATCH_KEYS:
2887
40.1k
                case MATCH_MAPPING:
2888
44.3k
                case MATCH_SEQUENCE:
2889
44.3k
                case WITH_EXCEPT_START: {
2890
44.3k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
44.3k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
44.3k
                    int net_pushed = num_pushed - num_popped;
2893
44.3k
                    assert(net_pushed >= 0);
2894
84.7k
                    for (int i = 0; i < net_pushed; i++) {
2895
40.3k
                        PUSH_REF(i, NOT_LOCAL);
2896
40.3k
                    }
2897
44.3k
                    break;
2898
44.3k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
44.3k
                case DICT_MERGE:
2902
2.81k
                case DICT_UPDATE:
2903
46.4k
                case LIST_APPEND:
2904
54.4k
                case LIST_EXTEND:
2905
55.2k
                case MAP_ADD:
2906
55.2k
                case RERAISE:
2907
67.5k
                case SET_ADD:
2908
73.0k
                case SET_UPDATE: {
2909
73.0k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
73.0k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
73.0k
                    int net_popped = num_popped - num_pushed;
2912
73.0k
                    assert(net_popped > 0);
2913
146k
                    for (int i = 0; i < net_popped; i++) {
2914
73.8k
                        ref_stack_pop(&refs);
2915
73.8k
                    }
2916
73.0k
                    break;
2917
73.0k
                }
2918
2919
15.6k
                case END_SEND:
2920
51.8k
                case SET_FUNCTION_ATTRIBUTE: {
2921
51.8k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
51.8k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
51.8k
                    ref tos = ref_stack_pop(&refs);
2924
51.8k
                    ref_stack_pop(&refs);
2925
51.8k
                    PUSH_REF(tos.instr, tos.local);
2926
51.8k
                    break;
2927
51.8k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
51.8k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
5.76k
                case FOR_ITER: {
2937
5.76k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
5.76k
                    PUSH_REF(i, NOT_LOCAL);
2939
5.76k
                    break;
2940
5.76k
                }
2941
2942
16.6k
                case LOAD_ATTR:
2943
17.1k
                case LOAD_SUPER_ATTR: {
2944
17.1k
                    ref self = ref_stack_pop(&refs);
2945
17.1k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
499
                        ref_stack_pop(&refs);
2947
499
                        ref_stack_pop(&refs);
2948
499
                    }
2949
17.1k
                    PUSH_REF(i, NOT_LOCAL);
2950
17.1k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
1.22k
                        PUSH_REF(self.instr, self.local);
2954
1.22k
                    }
2955
17.1k
                    break;
2956
17.1k
                }
2957
2958
23.3k
                case LOAD_SPECIAL:
2959
23.3k
                case PUSH_EXC_INFO: {
2960
23.3k
                    ref tos = ref_stack_pop(&refs);
2961
23.3k
                    PUSH_REF(i, NOT_LOCAL);
2962
23.3k
                    PUSH_REF(tos.instr, tos.local);
2963
23.3k
                    break;
2964
23.3k
                }
2965
2966
23.3k
                case SEND: {
2967
15.6k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
15.6k
                    ref_stack_pop(&refs);
2969
15.6k
                    PUSH_REF(i, NOT_LOCAL);
2970
15.6k
                    break;
2971
15.6k
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
2.97M
                default: {
2975
2.97M
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
2.97M
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
2.97M
                    if (HAS_TARGET(instr->i_opcode)) {
2978
147k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
147k
                    }
2980
2.97M
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
5.22M
                        for (int j = 0; j < num_popped; j++) {
2984
2.25M
                            ref_stack_pop(&refs);
2985
2.25M
                        }
2986
5.22M
                        for (int j = 0; j < num_pushed; j++) {
2987
2.25M
                            PUSH_REF(i, NOT_LOCAL);
2988
2.25M
                        }
2989
2.97M
                    }
2990
2.97M
                    break;
2991
2.97M
                }
2992
3.73M
            }
2993
3.73M
        }
2994
2995
        // Push fallthrough block
2996
343k
        cfg_instr *term = basicblock_last_instr(block);
2997
343k
        if (term != NULL && block->b_next != NULL &&
2998
343k
            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2999
312k
              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
3000
208k
            assert(BB_HAS_FALLTHROUGH(block));
3001
208k
            load_fast_push_block(&sp, block->b_next, refs.size);
3002
208k
        }
3003
3004
        // Mark instructions that produce values that are on the stack at the
3005
        // end of the basic block
3006
6.71M
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3007
6.37M
            ref r = ref_stack_at(&refs, i);
3008
6.37M
            if (r.instr != -1) {
3009
298k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3010
298k
            }
3011
6.37M
        }
3012
3013
        // Optimize instructions
3014
4.07M
        for (int i = 0; i < block->b_iused; i++) {
3015
3.73M
            if (!instr_flags[i]) {
3016
3.37M
                cfg_instr *instr = &block->b_instr[i];
3017
3.37M
                switch (instr->i_opcode) {
3018
51.5k
                    case LOAD_FAST:
3019
51.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3020
51.5k
                        break;
3021
1.37k
                    case LOAD_FAST_LOAD_FAST:
3022
1.37k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3023
1.37k
                        break;
3024
3.32M
                    default:
3025
3.32M
                        break;
3026
3.37M
                }
3027
3.37M
            }
3028
3.73M
        }
3029
343k
    }
3030
3031
77.1k
    #undef PUSH_REF
3032
3033
77.1k
    status = SUCCESS;
3034
3035
77.1k
done:
3036
77.1k
    ref_stack_fini(&refs);
3037
77.1k
    PyMem_Free(instr_flags);
3038
77.1k
    PyMem_Free(blocks);
3039
77.1k
    return status;
3040
77.1k
}
3041
3042
// helper functions for add_checks_for_loads_of_unknown_variables
3043
static inline void
3044
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3045
1.80M
{
3046
    // Push b if the unsafe mask is giving us any new information.
3047
    // To avoid overflowing the stack, only allow each block once.
3048
    // Use b->b_visited=1 to mean that b is currently on the stack.
3049
1.80M
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3050
1.80M
    if (b->b_unsafe_locals_mask != both) {
3051
139k
        b->b_unsafe_locals_mask = both;
3052
        // More work left to do.
3053
139k
        if (!b->b_visited) {
3054
            // not on the stack, so push it.
3055
137k
            *(*sp)++ = b;
3056
137k
            b->b_visited = 1;
3057
137k
        }
3058
139k
    }
3059
1.80M
}
3060
3061
static void
3062
scan_block_for_locals(basicblock *b, basicblock ***sp)
3063
474k
{
3064
    // bit i is set if local i is potentially uninitialized
3065
474k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3066
3.55M
    for (int i = 0; i < b->b_iused; i++) {
3067
3.08M
        cfg_instr *instr = &b->b_instr[i];
3068
3.08M
        assert(instr->i_opcode != EXTENDED_ARG);
3069
3.08M
        if (instr->i_except != NULL) {
3070
1.32M
            maybe_push(instr->i_except, unsafe_mask, sp);
3071
1.32M
        }
3072
3.08M
        if (instr->i_oparg >= 64) {
3073
281k
            continue;
3074
281k
        }
3075
2.80M
        assert(instr->i_oparg >= 0);
3076
2.80M
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3077
2.80M
        switch (instr->i_opcode) {
3078
7.96k
            case DELETE_FAST:
3079
25.4k
            case LOAD_FAST_AND_CLEAR:
3080
60.7k
            case STORE_FAST_MAYBE_NULL:
3081
60.7k
                unsafe_mask |= bit;
3082
60.7k
                break;
3083
314k
            case STORE_FAST:
3084
314k
                unsafe_mask &= ~bit;
3085
314k
                break;
3086
5.25k
            case LOAD_FAST_CHECK:
3087
                // If this doesn't raise, then the local is defined.
3088
5.25k
                unsafe_mask &= ~bit;
3089
5.25k
                break;
3090
65.9k
            case LOAD_FAST:
3091
65.9k
                if (unsafe_mask & bit) {
3092
5.25k
                    instr->i_opcode = LOAD_FAST_CHECK;
3093
5.25k
                }
3094
65.9k
                unsafe_mask &= ~bit;
3095
65.9k
                break;
3096
2.80M
        }
3097
2.80M
    }
3098
474k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3099
234k
        maybe_push(b->b_next, unsafe_mask, sp);
3100
234k
    }
3101
474k
    cfg_instr *last = basicblock_last_instr(b);
3102
474k
    if (last && is_jump(last)) {
3103
208k
        assert(last->i_target != NULL);
3104
208k
        maybe_push(last->i_target, unsafe_mask, sp);
3105
208k
    }
3106
474k
}
3107
3108
static int
3109
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3110
236
{
3111
236
    assert(nlocals > 64);
3112
236
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3113
236
    if (states == NULL) {
3114
0
        PyErr_NoMemory();
3115
0
        return ERROR;
3116
0
    }
3117
236
    Py_ssize_t blocknum = 0;
3118
    // state[i - 64] == blocknum if local i is guaranteed to
3119
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3120
    // STORE_FAST within that basicblock (not followed by
3121
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3122
5.32k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3123
5.09k
        blocknum++;
3124
103k
        for (int i = 0; i < b->b_iused; i++) {
3125
98.5k
            cfg_instr *instr = &b->b_instr[i];
3126
98.5k
            assert(instr->i_opcode != EXTENDED_ARG);
3127
98.5k
            int arg = instr->i_oparg;
3128
98.5k
            if (arg < 64) {
3129
89.7k
                continue;
3130
89.7k
            }
3131
8.78k
            assert(arg >= 0);
3132
8.78k
            switch (instr->i_opcode) {
3133
244
                case DELETE_FAST:
3134
314
                case LOAD_FAST_AND_CLEAR:
3135
454
                case STORE_FAST_MAYBE_NULL:
3136
454
                    states[arg - 64] = blocknum - 1;
3137
454
                    break;
3138
3.89k
                case STORE_FAST:
3139
3.89k
                    states[arg - 64] = blocknum;
3140
3.89k
                    break;
3141
735
                case LOAD_FAST:
3142
735
                    if (states[arg - 64] != blocknum) {
3143
234
                        instr->i_opcode = LOAD_FAST_CHECK;
3144
234
                    }
3145
735
                    states[arg - 64] = blocknum;
3146
735
                    break;
3147
8.78k
                    Py_UNREACHABLE();
3148
8.78k
            }
3149
8.78k
        }
3150
5.09k
    }
3151
236
    PyMem_Free(states);
3152
236
    return SUCCESS;
3153
236
}
3154
3155
static int
3156
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3157
77.1k
{
3158
77.1k
    assert(PyList_CheckExact(consts));
3159
77.1k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3160
77.1k
    if (nconsts == 0) {
3161
259
        return SUCCESS;  /* nothing to do */
3162
259
    }
3163
3164
76.8k
    Py_ssize_t *index_map = NULL;
3165
76.8k
    Py_ssize_t *reverse_index_map = NULL;
3166
76.8k
    int err = ERROR;
3167
3168
76.8k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3169
76.8k
    if (index_map == NULL) {
3170
0
        goto end;
3171
0
    }
3172
545k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3173
468k
        index_map[i] = -1;
3174
468k
    }
3175
    // The first constant may be docstring; keep it always.
3176
76.8k
    index_map[0] = 0;
3177
3178
    /* mark used consts */
3179
703k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3180
5.26M
        for (int i = 0; i < b->b_iused; i++) {
3181
4.63M
            int opcode = b->b_instr[i].i_opcode;
3182
4.63M
            if (OPCODE_HAS_CONST(opcode)) {
3183
586k
                int index = b->b_instr[i].i_oparg;
3184
586k
                index_map[index] = index;
3185
586k
            }
3186
4.63M
        }
3187
626k
    }
3188
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3189
    /* condense consts */
3190
76.8k
    Py_ssize_t n_used_consts = 0;
3191
622k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3192
545k
        if (index_map[i] != -1) {
3193
276k
            assert(index_map[i] == i);
3194
276k
            index_map[n_used_consts++] = index_map[i];
3195
276k
        }
3196
545k
    }
3197
76.8k
    if (n_used_consts == nconsts) {
3198
        /* nothing to do */
3199
36.3k
        err = SUCCESS;
3200
36.3k
        goto end;
3201
36.3k
    }
3202
3203
    /* move all used consts to the beginning of the consts list */
3204
40.4k
    assert(n_used_consts < nconsts);
3205
191k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3206
151k
        Py_ssize_t old_index = index_map[i];
3207
151k
        assert(i <= old_index && old_index < nconsts);
3208
151k
        if (i != old_index) {
3209
98.0k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3210
0
            assert(value != NULL);
3211
98.0k
            PyList_SetItem(consts, i, Py_NewRef(value));
3212
98.0k
        }
3213
151k
    }
3214
3215
    /* truncate the consts list at its new size */
3216
40.4k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3217
0
        goto end;
3218
0
    }
3219
    /* adjust const indices in the bytecode */
3220
40.4k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3221
40.4k
    if (reverse_index_map == NULL) {
3222
0
        goto end;
3223
0
    }
3224
459k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3225
419k
        reverse_index_map[i] = -1;
3226
419k
    }
3227
191k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3228
151k
        assert(index_map[i] != -1);
3229
151k
        assert(reverse_index_map[index_map[i]] == -1);
3230
151k
        reverse_index_map[index_map[i]] = i;
3231
151k
    }
3232
3233
514k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
3.64M
        for (int i = 0; i < b->b_iused; i++) {
3235
3.17M
            int opcode = b->b_instr[i].i_opcode;
3236
3.17M
            if (OPCODE_HAS_CONST(opcode)) {
3237
429k
                int index = b->b_instr[i].i_oparg;
3238
429k
                assert(reverse_index_map[index] >= 0);
3239
429k
                assert(reverse_index_map[index] < n_used_consts);
3240
429k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3241
429k
            }
3242
3.17M
        }
3243
474k
    }
3244
3245
40.4k
    err = SUCCESS;
3246
76.8k
end:
3247
76.8k
    PyMem_Free(index_map);
3248
76.8k
    PyMem_Free(reverse_index_map);
3249
76.8k
    return err;
3250
40.4k
}
3251
3252
3253
3254
static int
3255
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3256
                                                int nlocals,
3257
                                                int nparams)
3258
77.1k
{
3259
77.1k
    if (nlocals == 0) {
3260
34.7k
        return SUCCESS;
3261
34.7k
    }
3262
42.3k
    if (nlocals > 64) {
3263
        // To avoid O(nlocals**2) compilation, locals beyond the first
3264
        // 64 are only analyzed one basicblock at a time: initialization
3265
        // info is not passed between basicblocks.
3266
236
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3267
0
            return ERROR;
3268
0
        }
3269
236
        nlocals = 64;
3270
236
    }
3271
42.3k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3272
42.3k
    if (stack == NULL) {
3273
0
        return ERROR;
3274
0
    }
3275
42.3k
    basicblock **sp = stack;
3276
3277
    // First origin of being uninitialized:
3278
    // The non-parameter locals in the entry block.
3279
42.3k
    uint64_t start_mask = 0;
3280
146k
    for (int i = nparams; i < nlocals; i++) {
3281
103k
        start_mask |= (uint64_t)1 << i;
3282
103k
    }
3283
42.3k
    maybe_push(entryblock, start_mask, &sp);
3284
3285
    // Second origin of being uninitialized:
3286
    // There could be DELETE_FAST somewhere, so
3287
    // be sure to scan each basicblock at least once.
3288
379k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3289
337k
        scan_block_for_locals(b, &sp);
3290
337k
    }
3291
    // Now propagate the uncertainty from the origins we found: Use
3292
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3293
180k
    while (sp > stack) {
3294
137k
        basicblock *b = *--sp;
3295
        // mark as no longer on stack
3296
137k
        b->b_visited = 0;
3297
137k
        scan_block_for_locals(b, &sp);
3298
137k
    }
3299
42.3k
    PyMem_Free(stack);
3300
42.3k
    return SUCCESS;
3301
42.3k
}
3302
3303
3304
static int
3305
49.1k
mark_warm(basicblock *entryblock) {
3306
49.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3307
49.1k
    if (stack == NULL) {
3308
0
        return ERROR;
3309
0
    }
3310
49.1k
    basicblock **sp = stack;
3311
3312
49.1k
    *sp++ = entryblock;
3313
49.1k
    entryblock->b_visited = 1;
3314
383k
    while (sp > stack) {
3315
334k
        basicblock *b = *(--sp);
3316
334k
        assert(!b->b_except_handler);
3317
334k
        b->b_warm = 1;
3318
334k
        basicblock *next = b->b_next;
3319
334k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3320
199k
            *sp++ = next;
3321
199k
            next->b_visited = 1;
3322
199k
        }
3323
2.94M
        for (int i=0; i < b->b_iused; i++) {
3324
2.60M
            cfg_instr *instr = &b->b_instr[i];
3325
2.60M
            if (is_jump(instr) && !instr->i_target->b_visited) {
3326
86.2k
                *sp++ = instr->i_target;
3327
86.2k
                instr->i_target->b_visited = 1;
3328
86.2k
            }
3329
2.60M
        }
3330
334k
    }
3331
49.1k
    PyMem_Free(stack);
3332
49.1k
    return SUCCESS;
3333
49.1k
}
3334
3335
static int
3336
49.1k
mark_cold(basicblock *entryblock) {
3337
649k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3338
600k
        assert(!b->b_cold && !b->b_warm);
3339
600k
    }
3340
49.1k
    if (mark_warm(entryblock) < 0) {
3341
0
        return ERROR;
3342
0
    }
3343
3344
49.1k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3345
49.1k
    if (stack == NULL) {
3346
0
        return ERROR;
3347
0
    }
3348
3349
49.1k
    basicblock **sp = stack;
3350
649k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3351
600k
        if (b->b_except_handler) {
3352
82.0k
            assert(!b->b_warm);
3353
82.0k
            *sp++ = b;
3354
82.0k
            b->b_visited = 1;
3355
82.0k
        }
3356
600k
    }
3357
3358
253k
    while (sp > stack) {
3359
203k
        basicblock *b = *(--sp);
3360
203k
        b->b_cold = 1;
3361
203k
        basicblock *next = b->b_next;
3362
203k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3363
110k
            if (!next->b_warm && !next->b_visited) {
3364
86.7k
                *sp++ = next;
3365
86.7k
                next->b_visited = 1;
3366
86.7k
            }
3367
110k
        }
3368
1.10M
        for (int i = 0; i < b->b_iused; i++) {
3369
899k
            cfg_instr *instr = &b->b_instr[i];
3370
899k
            if (is_jump(instr)) {
3371
95.6k
                assert(i == b->b_iused - 1);
3372
95.6k
                basicblock *target = b->b_instr[i].i_target;
3373
95.6k
                if (!target->b_warm && !target->b_visited) {
3374
35.1k
                    *sp++ = target;
3375
35.1k
                    target->b_visited = 1;
3376
35.1k
                }
3377
95.6k
            }
3378
899k
        }
3379
203k
    }
3380
49.1k
    PyMem_Free(stack);
3381
49.1k
    return SUCCESS;
3382
49.1k
}
3383
3384
3385
static int
3386
77.1k
push_cold_blocks_to_end(cfg_builder *g) {
3387
77.1k
    basicblock *entryblock = g->g_entryblock;
3388
77.1k
    if (entryblock->b_next == NULL) {
3389
        /* single basicblock, no need to reorder */
3390
28.0k
        return SUCCESS;
3391
28.0k
    }
3392
49.1k
    RETURN_IF_ERROR(mark_cold(entryblock));
3393
3394
49.1k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3395
3396
    /* If we have a cold block with fallthrough to a warm block, add */
3397
    /* an explicit jump instead of fallthrough */
3398
665k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3399
615k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3400
15.6k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3401
15.6k
            if (explicit_jump == NULL) {
3402
0
                return ERROR;
3403
0
            }
3404
15.6k
            if (!IS_LABEL(b->b_next->b_label)) {
3405
0
                b->b_next->b_label.id = next_lbl++;
3406
0
            }
3407
15.6k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3408
15.6k
                             NO_LOCATION);
3409
15.6k
            explicit_jump->b_cold = 1;
3410
15.6k
            explicit_jump->b_next = b->b_next;
3411
15.6k
            explicit_jump->b_predecessors = 1;
3412
15.6k
            b->b_next = explicit_jump;
3413
3414
            /* set target */
3415
15.6k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3416
15.6k
            last->i_target = explicit_jump->b_next;
3417
15.6k
        }
3418
615k
    }
3419
3420
49.1k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3421
49.1k
    basicblock *cold_blocks = NULL;
3422
49.1k
    basicblock *cold_blocks_tail = NULL;
3423
3424
49.1k
    basicblock *b = entryblock;
3425
93.4k
    while(b->b_next) {
3426
93.4k
        assert(!b->b_cold);
3427
440k
        while (b->b_next && !b->b_next->b_cold) {
3428
347k
            b = b->b_next;
3429
347k
        }
3430
93.4k
        if (b->b_next == NULL) {
3431
            /* no more cold blocks */
3432
49.1k
            break;
3433
49.1k
        }
3434
3435
        /* b->b_next is the beginning of a cold streak */
3436
44.3k
        assert(!b->b_cold && b->b_next->b_cold);
3437
3438
44.3k
        basicblock *b_end = b->b_next;
3439
219k
        while (b_end->b_next && b_end->b_next->b_cold) {
3440
175k
            b_end = b_end->b_next;
3441
175k
        }
3442
3443
        /* b_end is the end of the cold streak */
3444
44.3k
        assert(b_end && b_end->b_cold);
3445
44.3k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3446
3447
44.3k
        if (cold_blocks == NULL) {
3448
7.63k
            cold_blocks = b->b_next;
3449
7.63k
        }
3450
36.7k
        else {
3451
36.7k
            cold_blocks_tail->b_next = b->b_next;
3452
36.7k
        }
3453
44.3k
        cold_blocks_tail = b_end;
3454
44.3k
        b->b_next = b_end->b_next;
3455
44.3k
        b_end->b_next = NULL;
3456
44.3k
    }
3457
49.1k
    assert(b != NULL && b->b_next == NULL);
3458
49.1k
    b->b_next = cold_blocks;
3459
3460
49.1k
    if (cold_blocks != NULL) {
3461
7.63k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3462
7.63k
    }
3463
49.1k
    return SUCCESS;
3464
49.1k
}
3465
3466
static int
3467
convert_pseudo_conditional_jumps(cfg_builder *g)
3468
77.1k
{
3469
77.1k
    basicblock *entryblock = g->g_entryblock;
3470
721k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3471
5.28M
        for (int i = 0; i < b->b_iused; i++) {
3472
4.63M
            cfg_instr *instr = &b->b_instr[i];
3473
4.63M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3474
3.10k
                assert(i == b->b_iused - 1);
3475
3.10k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3476
1.80k
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3477
3.10k
                location loc = instr->i_loc;
3478
3.10k
                basicblock *except = instr->i_except;
3479
3.10k
                cfg_instr copy = {
3480
3.10k
                            .i_opcode = COPY,
3481
3.10k
                            .i_oparg = 1,
3482
3.10k
                            .i_loc = loc,
3483
3.10k
                            .i_target = NULL,
3484
3.10k
                            .i_except = except,
3485
3.10k
                };
3486
3.10k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3487
3.10k
                cfg_instr to_bool = {
3488
3.10k
                            .i_opcode = TO_BOOL,
3489
3.10k
                            .i_oparg = 0,
3490
3.10k
                            .i_loc = loc,
3491
3.10k
                            .i_target = NULL,
3492
3.10k
                            .i_except = except,
3493
3.10k
                };
3494
3.10k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3495
3.10k
            }
3496
4.63M
        }
3497
643k
    }
3498
77.1k
    return SUCCESS;
3499
77.1k
}
3500
3501
static int
3502
convert_pseudo_ops(cfg_builder *g)
3503
77.1k
{
3504
77.1k
    basicblock *entryblock = g->g_entryblock;
3505
721k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3506
5.33M
        for (int i = 0; i < b->b_iused; i++) {
3507
4.68M
            cfg_instr *instr = &b->b_instr[i];
3508
4.68M
            if (is_block_push(instr)) {
3509
82.0k
                INSTR_SET_OP0(instr, NOP);
3510
82.0k
            }
3511
4.60M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3512
28.3k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3513
28.3k
                instr->i_opcode = LOAD_FAST;
3514
28.3k
            }
3515
4.57M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3516
17.7k
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3517
17.7k
                instr->i_opcode = STORE_FAST;
3518
17.7k
            }
3519
4.68M
        }
3520
643k
    }
3521
77.1k
    return remove_redundant_nops_and_jumps(g);
3522
77.1k
}
3523
3524
static inline bool
3525
1.23M
is_exit_or_eval_check_without_lineno(basicblock *b) {
3526
1.23M
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3527
357k
        return basicblock_has_no_lineno(b);
3528
357k
    }
3529
875k
    else {
3530
875k
        return false;
3531
875k
    }
3532
1.23M
}
3533
3534
3535
/* PEP 626 mandates that the f_lineno of a frame is correct
3536
 * after a frame terminates. It would be prohibitively expensive
3537
 * to continuously update the f_lineno field at runtime,
3538
 * so we make sure that all exiting instruction (raises and returns)
3539
 * have a valid line number, allowing us to compute f_lineno lazily.
3540
 * We can do this by duplicating the exit blocks without line number
3541
 * so that none have more than one predecessor. We can then safely
3542
 * copy the line number from the sole predecessor block.
3543
 */
3544
static int
3545
duplicate_exits_without_lineno(cfg_builder *g)
3546
154k
{
3547
154k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3548
3549
    /* Copy all exit blocks without line number that are targets of a jump.
3550
     */
3551
154k
    basicblock *entryblock = g->g_entryblock;
3552
1.42M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3553
1.27M
        cfg_instr *last = basicblock_last_instr(b);
3554
1.27M
        if (last == NULL) {
3555
128k
            continue;
3556
128k
        }
3557
1.14M
        if (is_jump(last)) {
3558
586k
            basicblock *target = next_nonempty_block(last->i_target);
3559
586k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3560
2.53k
                basicblock *new_target = copy_basicblock(g, target);
3561
2.53k
                if (new_target == NULL) {
3562
0
                    return ERROR;
3563
0
                }
3564
2.53k
                new_target->b_instr[0].i_loc = last->i_loc;
3565
2.53k
                last->i_target = new_target;
3566
2.53k
                target->b_predecessors--;
3567
2.53k
                new_target->b_predecessors = 1;
3568
2.53k
                new_target->b_next = target->b_next;
3569
2.53k
                new_target->b_label.id = next_lbl++;
3570
2.53k
                target->b_next = new_target;
3571
2.53k
            }
3572
586k
        }
3573
1.14M
    }
3574
3575
    /* Any remaining reachable exit blocks without line number can only be reached by
3576
     * fall through, and thus can only have a single predecessor */
3577
1.42M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3578
1.27M
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3579
646k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3580
16.2k
                cfg_instr *last = basicblock_last_instr(b);
3581
16.2k
                assert(last != NULL);
3582
16.2k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3583
16.2k
            }
3584
646k
        }
3585
1.27M
    }
3586
154k
    return SUCCESS;
3587
154k
}
3588
3589
3590
/* If an instruction has no line number, but it's predecessor in the BB does,
3591
 * then copy the line number. If a successor block has no line number, and only
3592
 * one predecessor, then inherit the line number.
3593
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3594
 * Also reduces the size of the line number table,
3595
 * but has no impact on the generated line number events.
3596
 */
3597
static void
3598
154k
propagate_line_numbers(basicblock *entryblock) {
3599
1.42M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3600
1.27M
        cfg_instr *last = basicblock_last_instr(b);
3601
1.27M
        if (last == NULL) {
3602
128k
            continue;
3603
128k
        }
3604
3605
1.14M
        location prev_location = NO_LOCATION;
3606
12.1M
        for (int i = 0; i < b->b_iused; i++) {
3607
10.9M
            if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3608
740k
                b->b_instr[i].i_loc = prev_location;
3609
740k
            }
3610
10.2M
            else {
3611
10.2M
                prev_location = b->b_instr[i].i_loc;
3612
10.2M
            }
3613
10.9M
        }
3614
1.14M
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3615
422k
            if (b->b_next->b_iused > 0) {
3616
421k
                if (b->b_next->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3617
38.4k
                    b->b_next->b_instr[0].i_loc = prev_location;
3618
38.4k
                }
3619
421k
            }
3620
422k
        }
3621
1.14M
        if (is_jump(last)) {
3622
586k
            basicblock *target = last->i_target;
3623
586k
            if (target->b_predecessors == 1) {
3624
122k
                if (target->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3625
26.3k
                    target->b_instr[0].i_loc = prev_location;
3626
26.3k
                }
3627
122k
            }
3628
586k
        }
3629
1.14M
    }
3630
154k
}
3631
3632
static int
3633
resolve_line_numbers(cfg_builder *g, int firstlineno)
3634
154k
{
3635
154k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3636
154k
    propagate_line_numbers(g->g_entryblock);
3637
154k
    return SUCCESS;
3638
154k
}
3639
3640
int
3641
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3642
                        int nlocals, int nparams, int firstlineno)
3643
77.1k
{
3644
77.1k
    assert(cfg_builder_check(g));
3645
    /** Preprocessing **/
3646
    /* Map labels to targets and mark exception handlers */
3647
77.1k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3648
77.1k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3649
77.1k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3650
3651
    /** Optimization **/
3652
77.1k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3653
77.1k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3654
77.1k
    RETURN_IF_ERROR(
3655
77.1k
        add_checks_for_loads_of_uninitialized_variables(
3656
77.1k
            g->g_entryblock, nlocals, nparams));
3657
77.1k
    RETURN_IF_ERROR(insert_superinstructions(g));
3658
3659
77.1k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3660
77.1k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3661
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3662
    // assert(all_exits_have_lineno(g->g_entryblock));
3663
77.1k
    return SUCCESS;
3664
77.1k
}
3665
3666
static int *
3667
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3668
77.1k
{
3669
77.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3670
77.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3671
77.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3672
3673
77.1k
    int noffsets = ncellvars + nfreevars;
3674
77.1k
    int *fixed = PyMem_New(int, noffsets);
3675
77.1k
    if (fixed == NULL) {
3676
0
        PyErr_NoMemory();
3677
0
        return NULL;
3678
0
    }
3679
118k
    for (int i = 0; i < noffsets; i++) {
3680
40.9k
        fixed[i] = nlocals + i;
3681
40.9k
    }
3682
3683
77.1k
    PyObject *varname, *cellindex;
3684
77.1k
    Py_ssize_t pos = 0;
3685
92.2k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3686
15.1k
        PyObject *varindex;
3687
15.1k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3688
0
            goto error;
3689
0
        }
3690
15.1k
        if (varindex == NULL) {
3691
14.6k
            continue;
3692
14.6k
        }
3693
3694
518
        int argoffset = PyLong_AsInt(varindex);
3695
518
        Py_DECREF(varindex);
3696
518
        if (argoffset == -1 && PyErr_Occurred()) {
3697
0
            goto error;
3698
0
        }
3699
3700
518
        int oldindex = PyLong_AsInt(cellindex);
3701
518
        if (oldindex == -1 && PyErr_Occurred()) {
3702
0
            goto error;
3703
0
        }
3704
518
        fixed[oldindex] = argoffset;
3705
518
    }
3706
77.1k
    return fixed;
3707
3708
0
error:
3709
0
    PyMem_Free(fixed);
3710
0
    return NULL;
3711
77.1k
}
3712
3713
#define IS_GENERATOR(CF) \
3714
77.1k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3715
3716
static int
3717
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3718
                           int *fixed, int nfreevars, int code_flags)
3719
77.1k
{
3720
77.1k
    assert(umd->u_firstlineno > 0);
3721
3722
    /* Add the generator prefix instructions. */
3723
77.1k
    if (IS_GENERATOR(code_flags)) {
3724
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3725
         * of 0. This is because RETURN_GENERATOR pushes an element
3726
         * with _PyFrame_StackPush before switching stacks.
3727
         */
3728
3729
5.69k
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3730
5.69k
        cfg_instr make_gen = {
3731
5.69k
            .i_opcode = RETURN_GENERATOR,
3732
5.69k
            .i_oparg = 0,
3733
5.69k
            .i_loc = loc,
3734
5.69k
            .i_target = NULL,
3735
5.69k
            .i_except = NULL,
3736
5.69k
        };
3737
5.69k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3738
5.69k
        cfg_instr pop_top = {
3739
5.69k
            .i_opcode = POP_TOP,
3740
5.69k
            .i_oparg = 0,
3741
5.69k
            .i_loc = loc,
3742
5.69k
            .i_target = NULL,
3743
5.69k
            .i_except = NULL,
3744
5.69k
        };
3745
5.69k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3746
5.69k
    }
3747
3748
    /* Set up cells for any variable that escapes, to be put in a closure. */
3749
77.1k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3750
77.1k
    if (ncellvars) {
3751
        // umd->u_cellvars has the cells out of order so we sort them
3752
        // before adding the MAKE_CELL instructions.  Note that we
3753
        // adjust for arg cells, which come first.
3754
13.7k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3755
13.7k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3756
13.7k
        if (sorted == NULL) {
3757
0
            PyErr_NoMemory();
3758
0
            return ERROR;
3759
0
        }
3760
28.8k
        for (int i = 0; i < ncellvars; i++) {
3761
15.1k
            sorted[fixed[i]] = i + 1;
3762
15.1k
        }
3763
55.2k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3764
41.5k
            int oldindex = sorted[i] - 1;
3765
41.5k
            if (oldindex == -1) {
3766
26.4k
                continue;
3767
26.4k
            }
3768
15.1k
            cfg_instr make_cell = {
3769
15.1k
                .i_opcode = MAKE_CELL,
3770
                // This will get fixed in offset_derefs().
3771
15.1k
                .i_oparg = oldindex,
3772
15.1k
                .i_loc = NO_LOCATION,
3773
15.1k
                .i_target = NULL,
3774
15.1k
                .i_except = NULL,
3775
15.1k
            };
3776
15.1k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3777
0
                PyMem_RawFree(sorted);
3778
0
                return ERROR;
3779
0
            }
3780
15.1k
            ncellsused += 1;
3781
15.1k
        }
3782
13.7k
        PyMem_RawFree(sorted);
3783
13.7k
    }
3784
3785
77.1k
    if (nfreevars) {
3786
19.5k
        cfg_instr copy_frees = {
3787
19.5k
            .i_opcode = COPY_FREE_VARS,
3788
19.5k
            .i_oparg = nfreevars,
3789
19.5k
            .i_loc = NO_LOCATION,
3790
19.5k
            .i_target = NULL,
3791
19.5k
            .i_except = NULL,
3792
19.5k
        };
3793
19.5k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3794
19.5k
    }
3795
3796
77.1k
    return SUCCESS;
3797
77.1k
}
3798
3799
static int
3800
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3801
77.1k
{
3802
77.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3803
77.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3804
77.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3805
77.1k
    int noffsets = ncellvars + nfreevars;
3806
3807
    // First deal with duplicates (arg cells).
3808
77.1k
    int numdropped = 0;
3809
118k
    for (int i = 0; i < noffsets ; i++) {
3810
40.9k
        if (fixedmap[i] == i + nlocals) {
3811
40.4k
            fixedmap[i] -= numdropped;
3812
40.4k
        }
3813
518
        else {
3814
            // It was a duplicate (cell/arg).
3815
518
            numdropped += 1;
3816
518
        }
3817
40.9k
    }
3818
3819
    // Then update offsets, either relative to locals or by cell2arg.
3820
721k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3821
5.33M
        for (int i = 0; i < b->b_iused; i++) {
3822
4.68M
            cfg_instr *inst = &b->b_instr[i];
3823
            // This is called before extended args are generated.
3824
4.68M
            assert(inst->i_opcode != EXTENDED_ARG);
3825
4.68M
            int oldoffset = inst->i_oparg;
3826
4.68M
            switch(inst->i_opcode) {
3827
15.1k
                case MAKE_CELL:
3828
43.5k
                case LOAD_CLOSURE:
3829
66.0k
                case LOAD_DEREF:
3830
77.9k
                case STORE_DEREF:
3831
78.1k
                case DELETE_DEREF:
3832
88.9k
                case LOAD_FROM_DICT_OR_DEREF:
3833
88.9k
                    assert(oldoffset >= 0);
3834
88.9k
                    assert(oldoffset < noffsets);
3835
88.9k
                    assert(fixedmap[oldoffset] >= 0);
3836
88.9k
                    inst->i_oparg = fixedmap[oldoffset];
3837
4.68M
            }
3838
4.68M
        }
3839
643k
    }
3840
3841
77.1k
    return numdropped;
3842
77.1k
}
3843
3844
static int
3845
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3846
77.1k
{
3847
77.1k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3848
77.1k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3849
77.1k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3850
77.1k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3851
77.1k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3852
77.1k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3853
77.1k
    assert(INT_MAX - nlocals - ncellvars > 0);
3854
77.1k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3855
77.1k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3856
77.1k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3857
77.1k
    if (cellfixedoffsets == NULL) {
3858
0
        return ERROR;
3859
0
    }
3860
3861
    // This must be called before fix_cell_offsets().
3862
77.1k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3863
0
        PyMem_Free(cellfixedoffsets);
3864
0
        return ERROR;
3865
0
    }
3866
3867
77.1k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3868
77.1k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3869
77.1k
    cellfixedoffsets = NULL;
3870
77.1k
    if (numdropped < 0) {
3871
0
        return ERROR;
3872
0
    }
3873
3874
77.1k
    nlocalsplus -= numdropped;
3875
77.1k
    return nlocalsplus;
3876
77.1k
}
3877
3878
cfg_builder *
3879
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3880
77.1k
{
3881
77.1k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3882
0
        return NULL;
3883
0
    }
3884
77.1k
    cfg_builder *g = _PyCfgBuilder_New();
3885
77.1k
    if (g == NULL) {
3886
0
        return NULL;
3887
0
    }
3888
6.54M
    for (int i = 0; i < seq->s_used; i++) {
3889
6.47M
        seq->s_instrs[i].i_target = 0;
3890
6.47M
    }
3891
6.54M
    for (int i = 0; i < seq->s_used; i++) {
3892
6.47M
        _PyInstruction *instr = &seq->s_instrs[i];
3893
6.47M
        if (HAS_TARGET(instr->i_opcode)) {
3894
398k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3895
398k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3896
398k
        }
3897
6.47M
    }
3898
77.1k
    int offset = 0;
3899
6.54M
    for (int i = 0; i < seq->s_used; i++) {
3900
6.47M
        _PyInstruction *instr = &seq->s_instrs[i];
3901
6.47M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3902
15.8k
            if (seq->s_annotations_code != NULL) {
3903
914
                assert(seq->s_annotations_code->s_labelmap_size == 0
3904
914
                    && seq->s_annotations_code->s_nested == NULL);
3905
3.65k
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3906
2.74k
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3907
2.74k
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3908
2.74k
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3909
0
                        goto error;
3910
0
                    }
3911
2.74k
                }
3912
914
                offset += seq->s_annotations_code->s_used - 1;
3913
914
            }
3914
14.9k
            else {
3915
14.9k
                offset -= 1;
3916
14.9k
            }
3917
15.8k
            continue;
3918
15.8k
        }
3919
6.45M
        if (instr->i_target) {
3920
296k
            jump_target_label lbl_ = {i + offset};
3921
296k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3922
0
                goto error;
3923
0
            }
3924
296k
        }
3925
6.45M
        int opcode = instr->i_opcode;
3926
6.45M
        int oparg = instr->i_oparg;
3927
6.45M
        if (HAS_TARGET(opcode)) {
3928
398k
            oparg += offset;
3929
398k
        }
3930
6.45M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3931
0
            goto error;
3932
0
        }
3933
6.45M
    }
3934
77.1k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3935
0
        goto error;
3936
0
    }
3937
77.1k
    return g;
3938
0
error:
3939
0
    _PyCfgBuilder_Free(g);
3940
0
    return NULL;
3941
77.1k
}
3942
3943
int
3944
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3945
77.1k
{
3946
77.1k
    int lbl = 0;
3947
722k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3948
645k
        b->b_label = (jump_target_label){lbl};
3949
645k
        lbl += 1;
3950
645k
    }
3951
722k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3952
645k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3953
5.40M
        for (int i = 0; i < b->b_iused; i++) {
3954
4.76M
            cfg_instr *instr = &b->b_instr[i];
3955
4.76M
            if (HAS_TARGET(instr->i_opcode)) {
3956
                /* Set oparg to the label id (it will later be mapped to an offset) */
3957
288k
                instr->i_oparg = instr->i_target->b_label.id;
3958
288k
            }
3959
4.76M
            RETURN_IF_ERROR(
3960
4.76M
                _PyInstructionSequence_Addop(
3961
4.76M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3962
3963
4.76M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3964
4.76M
            if (instr->i_except != NULL) {
3965
1.34M
                hi->h_label = instr->i_except->b_label.id;
3966
1.34M
                hi->h_startdepth = instr->i_except->b_startdepth;
3967
1.34M
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3968
1.34M
            }
3969
3.42M
            else {
3970
3.42M
                hi->h_label = -1;
3971
3.42M
            }
3972
4.76M
        }
3973
645k
    }
3974
77.1k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3975
0
        return ERROR;
3976
0
    }
3977
77.1k
    return SUCCESS;
3978
77.1k
}
3979
3980
3981
int
3982
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3983
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3984
                                     int *stackdepth, int *nlocalsplus,
3985
                                     _PyInstructionSequence *seq)
3986
77.1k
{
3987
77.1k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3988
3989
77.1k
    *stackdepth = calculate_stackdepth(g);
3990
77.1k
    if (*stackdepth < 0) {
3991
0
        return ERROR;
3992
0
    }
3993
3994
    /* prepare_localsplus adds instructions for generators that push
3995
     * and pop an item on the stack. This assertion makes sure there
3996
     * is space on the stack for that.
3997
     * It should always be true, because a generator must have at
3998
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
3999
     * which requires stackspace.
4000
     */
4001
77.1k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4002
4003
77.1k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4004
77.1k
    if (*nlocalsplus < 0) {
4005
0
        return ERROR;
4006
0
    }
4007
4008
77.1k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4009
4010
    /* Order of basic blocks must have been determined by now */
4011
4012
77.1k
    RETURN_IF_ERROR(normalize_jumps(g));
4013
77.1k
    assert(no_redundant_jumps(g));
4014
4015
    /* Can't modify the bytecode after inserting instructions that produce
4016
     * borrowed references.
4017
     */
4018
77.1k
    RETURN_IF_ERROR(optimize_load_fast(g));
4019
4020
    /* Can't modify the bytecode after computing jump offsets. */
4021
77.1k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4022
0
        return ERROR;
4023
0
    }
4024
4025
77.1k
    return SUCCESS;
4026
77.1k
}
4027
4028
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4029
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4030
 */
4031
int
4032
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4033
0
{
4034
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4035
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4036
0
    return SUCCESS;
4037
0
}
4038
4039
/* Exported API functions */
4040
4041
int
4042
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4043
0
{
4044
0
    stack_effects effs;
4045
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4046
0
        return PY_INVALID_STACK_EFFECT;
4047
0
    }
4048
0
    return effs.net;
4049
0
}
4050
4051
int
4052
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4053
6.27k
{
4054
6.27k
    stack_effects effs;
4055
6.27k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4056
0
        return PY_INVALID_STACK_EFFECT;
4057
0
    }
4058
6.27k
    return effs.net;
4059
6.27k
}
4060
4061
/* Access to compiler optimizations for unit tests.
4062
4063
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4064
 * a CFG, optimizes it and converts back to an instruction list.
4065
 */
4066
4067
static PyObject *
4068
cfg_to_instruction_sequence(cfg_builder *g)
4069
0
{
4070
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4071
0
    if (seq == NULL) {
4072
0
        return NULL;
4073
0
    }
4074
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4075
0
        PyInstructionSequence_Fini(seq);
4076
0
        return NULL;
4077
0
    }
4078
0
    return (PyObject*)seq;
4079
0
}
4080
4081
PyObject *
4082
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4083
0
{
4084
0
    if (!_PyInstructionSequence_Check(seq)) {
4085
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4086
0
        return NULL;
4087
0
    }
4088
0
    PyObject *const_cache = PyDict_New();
4089
0
    if (const_cache == NULL) {
4090
0
        return NULL;
4091
0
    }
4092
4093
0
    PyObject *res = NULL;
4094
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4095
0
    if (g == NULL) {
4096
0
        goto error;
4097
0
    }
4098
0
    int nparams = 0, firstlineno = 1;
4099
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4100
0
                                nparams, firstlineno) < 0) {
4101
0
        goto error;
4102
0
    }
4103
4104
0
    if (calculate_stackdepth(g) == ERROR) {
4105
0
        goto error;
4106
0
    }
4107
4108
0
    if (optimize_load_fast(g) != SUCCESS) {
4109
0
        goto error;
4110
0
    }
4111
4112
0
    res = cfg_to_instruction_sequence(g);
4113
0
error:
4114
0
    Py_DECREF(const_cache);
4115
0
    _PyCfgBuilder_Free(g);
4116
0
    return res;
4117
0
}