Coverage Report

Created: 2025-07-12 07:00

/src/cpython3/Python/flowgraph.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
17.7M
#define SUCCESS 0
19
66.0k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
27.4M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
5.99M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
6.22M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
6.22M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
4.53k
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
25.1M
{
101
25.1M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
25.1M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
25.1M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
21.5M
{
108
21.5M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
21.5M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
1.24M
    do { \
114
1.24M
        assert(OPCODE_HAS_ARG(OP)); \
115
1.24M
        cfg_instr *_instr__ptr_ = (I); \
116
1.24M
        _instr__ptr_->i_opcode = (OP); \
117
1.24M
        _instr__ptr_->i_oparg = (ARG); \
118
1.24M
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
2.01M
    do { \
123
2.01M
        assert(!OPCODE_HAS_ARG(OP)); \
124
2.01M
        cfg_instr *_instr__ptr_ = (I); \
125
2.01M
        _instr__ptr_->i_opcode = (OP); \
126
2.01M
        _instr__ptr_->i_oparg = 0; \
127
2.01M
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
1.17M
    do { \
131
1.17M
        cfg_instr *_instr__ptr_ = (I); \
132
1.17M
        _instr__ptr_->i_loc = (LOC); \
133
1.17M
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
5.99M
{
144
5.99M
    assert(b != NULL);
145
5.99M
    _Py_c_array_t array = {
146
5.99M
        .array = (void*)b->b_instr,
147
5.99M
        .allocated_entries = b->b_ialloc,
148
5.99M
        .item_size = sizeof(cfg_instr),
149
5.99M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
5.99M
    };
151
152
5.99M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
5.99M
    b->b_instr = array.array;
154
5.99M
    b->b_ialloc = array.allocated_entries;
155
5.99M
    return b->b_iused++;
156
5.99M
}
157
158
static cfg_instr *
159
21.1M
basicblock_last_instr(const basicblock *b) {
160
21.1M
    assert(b->b_iused >= 0);
161
21.1M
    if (b->b_iused > 0) {
162
19.8M
        assert(b->b_instr != NULL);
163
19.8M
        return &b->b_instr[b->b_iused - 1];
164
19.8M
    }
165
1.26M
    return NULL;
166
21.1M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
576k
{
175
576k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
576k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
576k
    b->b_list = g->g_block_list;
182
576k
    g->g_block_list = b;
183
576k
    b->b_label = NO_LABEL;
184
576k
    return b;
185
576k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
5.90M
{
190
5.90M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
5.90M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
5.90M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
5.90M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
5.90M
    int off = basicblock_next_instr(b);
196
5.90M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
5.90M
    cfg_instr *i = &b->b_instr[off];
200
5.90M
    i->i_opcode = opcode;
201
5.90M
    i->i_oparg = oparg;
202
5.90M
    i->i_target = NULL;
203
5.90M
    i->i_loc = loc;
204
205
5.90M
    return SUCCESS;
206
5.90M
}
207
208
static int
209
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
210
2.49k
{
211
2.49k
    cfg_instr *last = basicblock_last_instr(b);
212
2.49k
    if (last && is_jump(last)) {
213
0
        return ERROR;
214
0
    }
215
216
2.49k
    RETURN_IF_ERROR(
217
2.49k
        basicblock_addop(b, opcode, target->b_label.id, loc));
218
2.49k
    last = basicblock_last_instr(b);
219
2.49k
    assert(last && last->i_opcode == opcode);
220
2.49k
    last->i_target = target;
221
2.49k
    return SUCCESS;
222
2.49k
}
223
224
static inline int
225
basicblock_append_instructions(basicblock *to, basicblock *from)
226
17.5k
{
227
55.9k
    for (int i = 0; i < from->b_iused; i++) {
228
38.4k
        int n = basicblock_next_instr(to);
229
38.4k
        if (n < 0) {
230
0
            return ERROR;
231
0
        }
232
38.4k
        to->b_instr[n] = from->b_instr[i];
233
38.4k
    }
234
17.5k
    return SUCCESS;
235
17.5k
}
236
237
static inline int
238
5.70M
basicblock_nofallthrough(const basicblock *b) {
239
5.70M
    cfg_instr *last = basicblock_last_instr(b);
240
5.70M
    return (last &&
241
5.70M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
242
5.50M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
243
5.70M
}
244
245
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
246
8.73M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
247
248
static basicblock *
249
copy_basicblock(cfg_builder *g, basicblock *block)
250
2.02k
{
251
    /* Cannot copy a block if it has a fallthrough, since
252
     * a block can only have one fallthrough predecessor.
253
     */
254
2.02k
    assert(BB_NO_FALLTHROUGH(block));
255
2.02k
    basicblock *result = cfg_builder_new_block(g);
256
2.02k
    if (result == NULL) {
257
0
        return NULL;
258
0
    }
259
2.02k
    if (basicblock_append_instructions(result, block) < 0) {
260
0
        return NULL;
261
0
    }
262
2.02k
    return result;
263
2.02k
}
264
265
static int
266
45.4k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
267
45.4k
    RETURN_IF_ERROR(basicblock_next_instr(block));
268
916k
    for (int i = block->b_iused - 1; i > pos; i--) {
269
870k
        block->b_instr[i] = block->b_instr[i-1];
270
870k
    }
271
45.4k
    block->b_instr[pos] = *instr;
272
45.4k
    return SUCCESS;
273
45.4k
}
274
275
/* For debugging purposes only */
276
#if 0
277
static void
278
dump_instr(cfg_instr *i)
279
{
280
    const char *jump = is_jump(i) ? "jump " : "";
281
282
    char arg[128];
283
284
    *arg = '\0';
285
    if (OPCODE_HAS_ARG(i->i_opcode)) {
286
        sprintf(arg, "arg: %d ", i->i_oparg);
287
    }
288
    if (HAS_TARGET(i->i_opcode)) {
289
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
290
    }
291
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
292
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
293
}
294
295
static inline int
296
basicblock_returns(const basicblock *b) {
297
    cfg_instr *last = basicblock_last_instr(b);
298
    return last && IS_RETURN_OPCODE(last->i_opcode);
299
}
300
301
static void
302
dump_basicblock(const basicblock *b, bool highlight)
303
{
304
    const char *b_return = basicblock_returns(b) ? "return " : "";
305
    if (highlight) {
306
        fprintf(stderr, ">>> ");
307
    }
308
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
309
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
310
        b->b_startdepth, b->b_predecessors, b_return);
311
    int depth = b->b_startdepth;
312
    if (b->b_instr) {
313
        int i;
314
        for (i = 0; i < b->b_iused; i++) {
315
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
316
            dump_instr(b->b_instr + i);
317
318
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
319
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
320
            depth += (pushed - popped);
321
        }
322
    }
323
}
324
325
void
326
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
327
{
328
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
329
        dump_basicblock(b, b == mark);
330
    }
331
}
332
333
#endif
334
335
336
/***** CFG construction and modification *****/
337
338
static basicblock *
339
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
340
495k
{
341
495k
    assert(block != NULL);
342
495k
    g->g_curblock->b_next = block;
343
495k
    g->g_curblock = block;
344
495k
    return block;
345
495k
}
346
347
static inline int
348
1.31M
basicblock_exits_scope(const basicblock *b) {
349
1.31M
    cfg_instr *last = basicblock_last_instr(b);
350
1.31M
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
351
1.31M
}
352
353
static inline int
354
995k
basicblock_has_eval_break(const basicblock *b) {
355
6.09M
    for (int i = 0; i < b->b_iused; i++) {
356
5.26M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
357
169k
            return true;
358
169k
        }
359
5.26M
    }
360
826k
    return false;
361
995k
}
362
363
static bool
364
cfg_builder_current_block_is_terminated(cfg_builder *g)
365
6.01M
{
366
6.01M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
367
6.01M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
368
384k
        return true;
369
384k
    }
370
5.63M
    if (IS_LABEL(g->g_current_label)) {
371
110k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
372
110k
            return true;
373
110k
        }
374
0
        else {
375
            /* current block is empty, label it */
376
0
            g->g_curblock->b_label = g->g_current_label;
377
0
            g->g_current_label = NO_LABEL;
378
0
        }
379
110k
    }
380
5.52M
    return false;
381
5.63M
}
382
383
static int
384
cfg_builder_maybe_start_new_block(cfg_builder *g)
385
6.01M
{
386
6.01M
    if (cfg_builder_current_block_is_terminated(g)) {
387
495k
        basicblock *b = cfg_builder_new_block(g);
388
495k
        if (b == NULL) {
389
0
            return ERROR;
390
0
        }
391
495k
        b->b_label = g->g_current_label;
392
495k
        g->g_current_label = NO_LABEL;
393
495k
        cfg_builder_use_next_block(g, b);
394
495k
    }
395
6.01M
    return SUCCESS;
396
6.01M
}
397
398
#ifndef NDEBUG
399
static bool
400
cfg_builder_check(cfg_builder *g)
401
132k
{
402
132k
    assert(g->g_entryblock->b_iused > 0);
403
1.27M
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
404
1.13M
        assert(!_PyMem_IsPtrFreed(block));
405
1.13M
        if (block->b_instr != NULL) {
406
1.13M
            assert(block->b_ialloc > 0);
407
1.13M
            assert(block->b_iused >= 0);
408
1.13M
            assert(block->b_ialloc >= block->b_iused);
409
1.13M
        }
410
0
        else {
411
0
            assert (block->b_iused == 0);
412
0
            assert (block->b_ialloc == 0);
413
0
        }
414
1.13M
    }
415
132k
    return true;
416
132k
}
417
#endif
418
419
static int
420
init_cfg_builder(cfg_builder *g)
421
66.2k
{
422
66.2k
    g->g_block_list = NULL;
423
66.2k
    basicblock *block = cfg_builder_new_block(g);
424
66.2k
    if (block == NULL) {
425
0
        return ERROR;
426
0
    }
427
66.2k
    g->g_curblock = g->g_entryblock = block;
428
66.2k
    g->g_current_label = NO_LABEL;
429
66.2k
    return SUCCESS;
430
66.2k
}
431
432
cfg_builder *
433
_PyCfgBuilder_New(void)
434
66.2k
{
435
66.2k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
436
66.2k
    if (g == NULL) {
437
0
        PyErr_NoMemory();
438
0
        return NULL;
439
0
    }
440
66.2k
    memset(g, 0, sizeof(cfg_builder));
441
66.2k
    if (init_cfg_builder(g) < 0) {
442
0
        PyMem_Free(g);
443
0
        return NULL;
444
0
    }
445
66.2k
    return g;
446
66.2k
}
447
448
void
449
_PyCfgBuilder_Free(cfg_builder *g)
450
66.2k
{
451
66.2k
    if (g == NULL) {
452
0
        return;
453
0
    }
454
66.2k
    assert(cfg_builder_check(g));
455
66.2k
    basicblock *b = g->g_block_list;
456
642k
    while (b != NULL) {
457
576k
        if (b->b_instr) {
458
576k
            PyMem_Free((void *)b->b_instr);
459
576k
        }
460
576k
        basicblock *next = b->b_list;
461
576k
        PyMem_Free((void *)b);
462
576k
        b = next;
463
576k
    }
464
66.2k
    PyMem_Free(g);
465
66.2k
}
466
467
int
468
_PyCfgBuilder_CheckSize(cfg_builder *g)
469
66.2k
{
470
66.2k
    int nblocks = 0;
471
627k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
472
561k
        nblocks++;
473
561k
    }
474
66.2k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
475
0
        PyErr_NoMemory();
476
0
        return ERROR;
477
0
    }
478
66.2k
    return SUCCESS;
479
66.2k
}
480
481
int
482
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
483
264k
{
484
264k
    g->g_current_label = lbl;
485
264k
    return cfg_builder_maybe_start_new_block(g);
486
264k
}
487
488
int
489
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
490
5.75M
{
491
5.75M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
492
5.75M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
493
5.75M
}
494
495
496
static basicblock *
497
next_nonempty_block(basicblock *b)
498
1.81M
{
499
1.88M
    while (b && b->b_iused == 0) {
500
72.0k
        b = b->b_next;
501
72.0k
    }
502
1.81M
    return b;
503
1.81M
}
504
505
/***** debugging helpers *****/
506
507
#ifndef NDEBUG
508
static int remove_redundant_nops(cfg_builder *g);
509
510
static bool
511
66.2k
no_redundant_nops(cfg_builder *g) {
512
66.2k
    if (remove_redundant_nops(g) != 0) {
513
0
        return false;
514
0
    }
515
66.2k
    return true;
516
66.2k
}
517
518
static bool
519
132k
no_redundant_jumps(cfg_builder *g) {
520
1.27M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
521
1.13M
        cfg_instr *last = basicblock_last_instr(b);
522
1.13M
        if (last != NULL) {
523
1.00M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
524
167k
                basicblock *next = next_nonempty_block(b->b_next);
525
167k
                basicblock *jump_target = next_nonempty_block(last->i_target);
526
167k
                if (jump_target == next) {
527
0
                    assert(next);
528
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
529
0
                        assert(0);
530
0
                        return false;
531
0
                    }
532
0
                }
533
167k
            }
534
1.00M
        }
535
1.13M
    }
536
132k
    return true;
537
132k
}
538
#endif
539
540
/***** CFG preprocessing (jump targets and exceptions) *****/
541
542
static int
543
576k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
544
576k
    cfg_instr *last = basicblock_last_instr(b);
545
576k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
546
433k
        return SUCCESS;
547
433k
    }
548
142k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
549
550
142k
    bool is_forward = last->i_target->b_visited == 0;
551
142k
    if (is_forward) {
552
141k
        RETURN_IF_ERROR(
553
141k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
554
141k
        return SUCCESS;
555
141k
    }
556
557
866
    int reversed_opcode = 0;
558
866
    switch(last->i_opcode) {
559
0
        case POP_JUMP_IF_NOT_NONE:
560
0
            reversed_opcode = POP_JUMP_IF_NONE;
561
0
            break;
562
0
        case POP_JUMP_IF_NONE:
563
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
564
0
            break;
565
772
        case POP_JUMP_IF_FALSE:
566
772
            reversed_opcode = POP_JUMP_IF_TRUE;
567
772
            break;
568
94
        case POP_JUMP_IF_TRUE:
569
94
            reversed_opcode = POP_JUMP_IF_FALSE;
570
94
            break;
571
866
    }
572
    /* transform 'conditional jump T' to
573
     * 'reversed_jump b_next' followed by 'jump_backwards T'
574
     */
575
576
866
    basicblock *target = last->i_target;
577
866
    basicblock *backwards_jump = cfg_builder_new_block(g);
578
866
    if (backwards_jump == NULL) {
579
0
        return ERROR;
580
0
    }
581
866
    RETURN_IF_ERROR(
582
866
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
583
866
    RETURN_IF_ERROR(
584
866
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
585
866
    backwards_jump->b_startdepth = target->b_startdepth;
586
866
    last->i_opcode = reversed_opcode;
587
866
    last->i_target = b->b_next;
588
589
866
    backwards_jump->b_cold = b->b_cold;
590
866
    backwards_jump->b_next = b->b_next;
591
866
    b->b_next = backwards_jump;
592
866
    return SUCCESS;
593
866
}
594
595
596
static int
597
normalize_jumps(cfg_builder *g)
598
66.2k
{
599
66.2k
    basicblock *entryblock = g->g_entryblock;
600
641k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
601
575k
        b->b_visited = 0;
602
575k
    }
603
642k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
604
576k
        b->b_visited = 1;
605
576k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
606
576k
    }
607
66.2k
    return SUCCESS;
608
66.2k
}
609
610
static int
611
66.2k
check_cfg(cfg_builder *g) {
612
627k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
613
        /* Raise SystemError if jump or exit is not last instruction in the block. */
614
6.31M
        for (int i = 0; i < b->b_iused; i++) {
615
5.75M
            int opcode = b->b_instr[i].i_opcode;
616
5.75M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
617
5.75M
            if (IS_TERMINATOR_OPCODE(opcode)) {
618
450k
                if (i != b->b_iused - 1) {
619
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
620
0
                    return ERROR;
621
0
                }
622
450k
            }
623
5.75M
        }
624
561k
    }
625
66.2k
    return SUCCESS;
626
66.2k
}
627
628
static int
629
get_max_label(basicblock *entryblock)
630
242k
{
631
242k
    int lbl = -1;
632
2.48M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
633
2.23M
        if (b->b_label.id > lbl) {
634
933k
            lbl = b->b_label.id;
635
933k
        }
636
2.23M
    }
637
242k
    return lbl;
638
242k
}
639
640
/* Calculate the actual jump target from the target_label */
641
static int
642
translate_jump_labels_to_targets(basicblock *entryblock)
643
66.2k
{
644
66.2k
    int max_label = get_max_label(entryblock);
645
66.2k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
646
66.2k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
647
66.2k
    if (!label2block) {
648
0
        PyErr_NoMemory();
649
0
        return ERROR;
650
0
    }
651
66.2k
    memset(label2block, 0, mapsize);
652
627k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
653
561k
        if (b->b_label.id >= 0) {
654
264k
            label2block[b->b_label.id] = b;
655
264k
        }
656
561k
    }
657
627k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
658
6.31M
        for (int i = 0; i < b->b_iused; i++) {
659
5.75M
            cfg_instr *instr = &b->b_instr[i];
660
5.75M
            assert(instr->i_target == NULL);
661
5.75M
            if (HAS_TARGET(instr->i_opcode)) {
662
356k
                int lbl = instr->i_oparg;
663
356k
                assert(lbl >= 0 && lbl <= max_label);
664
356k
                instr->i_target = label2block[lbl];
665
356k
                assert(instr->i_target != NULL);
666
356k
                assert(instr->i_target->b_label.id == lbl);
667
356k
            }
668
5.75M
        }
669
561k
    }
670
66.2k
    PyMem_Free(label2block);
671
66.2k
    return SUCCESS;
672
66.2k
}
673
674
static int
675
66.2k
mark_except_handlers(basicblock *entryblock) {
676
66.2k
#ifndef NDEBUG
677
627k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
678
561k
        assert(!b->b_except_handler);
679
561k
    }
680
66.2k
#endif
681
627k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
682
6.31M
        for (int i=0; i < b->b_iused; i++) {
683
5.75M
            cfg_instr *instr = &b->b_instr[i];
684
5.75M
            if (is_block_push(instr)) {
685
69.2k
                instr->i_target->b_except_handler = 1;
686
69.2k
            }
687
5.75M
        }
688
561k
    }
689
66.2k
    return SUCCESS;
690
66.2k
}
691
692
693
struct _PyCfgExceptStack {
694
    basicblock *handlers[CO_MAXBLOCKS+2];
695
    int depth;
696
};
697
698
699
static basicblock *
700
68.6k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
701
68.6k
    assert(is_block_push(setup));
702
68.6k
    int opcode = setup->i_opcode;
703
68.6k
    basicblock * target = setup->i_target;
704
68.6k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
705
35.1k
        target->b_preserve_lasti = 1;
706
35.1k
    }
707
68.6k
    assert(stack->depth <= CO_MAXBLOCKS);
708
68.6k
    stack->handlers[++stack->depth] = target;
709
68.6k
    return target;
710
68.6k
}
711
712
static basicblock *
713
64.6k
pop_except_block(struct _PyCfgExceptStack *stack) {
714
64.6k
    assert(stack->depth > 0);
715
64.6k
    return stack->handlers[--stack->depth];
716
64.6k
}
717
718
static basicblock *
719
515k
except_stack_top(struct _PyCfgExceptStack *stack) {
720
515k
    return stack->handlers[stack->depth];
721
515k
}
722
723
static struct _PyCfgExceptStack *
724
66.2k
make_except_stack(void) {
725
66.2k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
726
66.2k
    if (new == NULL) {
727
0
        PyErr_NoMemory();
728
0
        return NULL;
729
0
    }
730
66.2k
    new->depth = 0;
731
66.2k
    new->handlers[0] = NULL;
732
66.2k
    return new;
733
66.2k
}
734
735
static struct _PyCfgExceptStack *
736
187k
copy_except_stack(struct _PyCfgExceptStack *stack) {
737
187k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
738
187k
    if (copy == NULL) {
739
0
        PyErr_NoMemory();
740
0
        return NULL;
741
0
    }
742
187k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
743
187k
    return copy;
744
187k
}
745
746
static basicblock**
747
455k
make_cfg_traversal_stack(basicblock *entryblock) {
748
455k
    int nblocks = 0;
749
4.66M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
750
4.21M
        b->b_visited = 0;
751
4.21M
        nblocks++;
752
4.21M
    }
753
455k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
754
455k
    if (!stack) {
755
0
        PyErr_NoMemory();
756
0
    }
757
455k
    return stack;
758
455k
}
759
760
/* Compute the stack effects of opcode with argument oparg.
761
762
   Some opcodes have different stack effect when jump to the target and
763
   when not jump. The 'jump' parameter specifies the case:
764
765
   * 0 -- when not jump
766
   * 1 -- when jump
767
   * -1 -- maximal
768
 */
769
typedef struct {
770
    /* The stack effect of the instruction. */
771
    int net;
772
} stack_effects;
773
774
Py_LOCAL(int)
775
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
776
4.55M
{
777
4.55M
    if (opcode < 0) {
778
0
        return -1;
779
0
    }
780
4.55M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
781
        // Specialized instructions are not supported.
782
0
        return -1;
783
0
    }
784
4.55M
    int popped = _PyOpcode_num_popped(opcode, oparg);
785
4.55M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
786
4.55M
    if (popped < 0 || pushed < 0) {
787
0
        return -1;
788
0
    }
789
4.55M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
790
68.5k
        effects->net = 0;
791
68.5k
        return 0;
792
68.5k
    }
793
4.48M
    effects->net = pushed - popped;
794
4.48M
    return 0;
795
4.55M
}
796
797
Py_LOCAL_INLINE(int)
798
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
799
702k
{
800
702k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
801
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
802
0
        return ERROR;
803
0
    }
804
702k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
805
522k
        assert(b->b_startdepth < 0);
806
522k
        b->b_startdepth = depth;
807
522k
        *(*sp)++ = b;
808
522k
    }
809
702k
    return SUCCESS;
810
702k
}
811
812
/* Find the flow path that needs the largest stack.  We assume that
813
 * cycles in the flow graph have no net effect on the stack depth.
814
 */
815
static int
816
calculate_stackdepth(cfg_builder *g)
817
66.2k
{
818
66.2k
    basicblock *entryblock = g->g_entryblock;
819
641k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
820
575k
        b->b_startdepth = INT_MIN;
821
575k
    }
822
66.2k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
823
66.2k
    if (!stack) {
824
0
        return ERROR;
825
0
    }
826
827
828
66.2k
    int stackdepth = -1;
829
66.2k
    int maxdepth = 0;
830
66.2k
    basicblock **sp = stack;
831
66.2k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
832
0
        goto error;
833
0
    }
834
588k
    while (sp != stack) {
835
522k
        basicblock *b = *--sp;
836
522k
        int depth = b->b_startdepth;
837
522k
        assert(depth >= 0);
838
522k
        basicblock *next = b->b_next;
839
4.53M
        for (int i = 0; i < b->b_iused; i++) {
840
4.22M
            cfg_instr *instr = &b->b_instr[i];
841
4.22M
            stack_effects effects;
842
4.22M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
843
0
                PyErr_Format(PyExc_SystemError,
844
0
                             "Invalid stack effect for opcode=%d, arg=%i",
845
0
                             instr->i_opcode, instr->i_oparg);
846
0
                goto error;
847
0
            }
848
4.22M
            int new_depth = depth + effects.net;
849
4.22M
            if (new_depth < 0) {
850
0
                PyErr_Format(PyExc_ValueError,
851
0
                             "Invalid CFG, stack underflow");
852
0
                goto error;
853
0
            }
854
4.22M
            maxdepth = Py_MAX(maxdepth, depth);
855
4.22M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
856
323k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
857
0
                    PyErr_Format(PyExc_SystemError,
858
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
859
0
                                 instr->i_opcode, instr->i_oparg);
860
0
                    goto error;
861
0
                }
862
323k
                int target_depth = depth + effects.net;
863
323k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
864
323k
                maxdepth = Py_MAX(maxdepth, depth);
865
323k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
866
0
                    goto error;
867
0
                }
868
323k
            }
869
4.22M
            depth = new_depth;
870
4.22M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
871
4.22M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
872
4.22M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
873
210k
            {
874
                /* remaining code is dead */
875
210k
                next = NULL;
876
210k
                break;
877
210k
            }
878
4.22M
        }
879
522k
        if (next != NULL) {
880
312k
            assert(BB_HAS_FALLTHROUGH(b));
881
312k
            if (stackdepth_push(&sp, next, depth) < 0) {
882
0
                goto error;
883
0
            }
884
312k
        }
885
522k
    }
886
66.2k
    stackdepth = maxdepth;
887
66.2k
error:
888
66.2k
    PyMem_Free(stack);
889
66.2k
    return stackdepth;
890
66.2k
}
891
892
static int
893
66.2k
label_exception_targets(basicblock *entryblock) {
894
66.2k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
895
66.2k
    if (todo_stack == NULL) {
896
0
        return ERROR;
897
0
    }
898
66.2k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
899
66.2k
    if (except_stack == NULL) {
900
0
        PyMem_Free(todo_stack);
901
0
        PyErr_NoMemory();
902
0
        return ERROR;
903
0
    }
904
66.2k
    except_stack->depth = 0;
905
66.2k
    todo_stack[0] = entryblock;
906
66.2k
    entryblock->b_visited = 1;
907
66.2k
    entryblock->b_exceptstack = except_stack;
908
66.2k
    basicblock **todo = &todo_stack[1];
909
66.2k
    basicblock *handler = NULL;
910
581k
    while (todo > todo_stack) {
911
515k
        todo--;
912
515k
        basicblock *b = todo[0];
913
515k
        assert(b->b_visited == 1);
914
515k
        except_stack = b->b_exceptstack;
915
515k
        assert(except_stack != NULL);
916
515k
        b->b_exceptstack = NULL;
917
515k
        handler = except_stack_top(except_stack);
918
515k
        int last_yield_except_depth = -1;
919
6.15M
        for (int i = 0; i < b->b_iused; i++) {
920
5.63M
            cfg_instr *instr = &b->b_instr[i];
921
5.63M
            if (is_block_push(instr)) {
922
68.6k
                if (!instr->i_target->b_visited) {
923
68.6k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
924
68.6k
                    if (copy == NULL) {
925
0
                        goto error;
926
0
                    }
927
68.6k
                    instr->i_target->b_exceptstack = copy;
928
68.6k
                    todo[0] = instr->i_target;
929
68.6k
                    instr->i_target->b_visited = 1;
930
68.6k
                    todo++;
931
68.6k
                }
932
68.6k
                handler = push_except_block(except_stack, instr);
933
68.6k
            }
934
5.56M
            else if (instr->i_opcode == POP_BLOCK) {
935
64.6k
                handler = pop_except_block(except_stack);
936
64.6k
                INSTR_SET_OP0(instr, NOP);
937
64.6k
            }
938
5.50M
            else if (is_jump(instr)) {
939
284k
                instr->i_except = handler;
940
284k
                assert(i == b->b_iused -1);
941
284k
                if (!instr->i_target->b_visited) {
942
160k
                    if (BB_HAS_FALLTHROUGH(b)) {
943
118k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
944
118k
                        if (copy == NULL) {
945
0
                            goto error;
946
0
                        }
947
118k
                        instr->i_target->b_exceptstack = copy;
948
118k
                    }
949
41.7k
                    else {
950
41.7k
                        instr->i_target->b_exceptstack = except_stack;
951
41.7k
                        except_stack = NULL;
952
41.7k
                    }
953
160k
                    todo[0] = instr->i_target;
954
160k
                    instr->i_target->b_visited = 1;
955
160k
                    todo++;
956
160k
                }
957
284k
            }
958
5.22M
            else if (instr->i_opcode == YIELD_VALUE) {
959
26.7k
                instr->i_except = handler;
960
26.7k
                last_yield_except_depth = except_stack->depth;
961
26.7k
            }
962
5.19M
            else if (instr->i_opcode == RESUME) {
963
93.0k
                instr->i_except = handler;
964
93.0k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
965
26.7k
                    assert(last_yield_except_depth >= 0);
966
26.7k
                    if (last_yield_except_depth == 1) {
967
1.46k
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
968
1.46k
                    }
969
26.7k
                    last_yield_except_depth = -1;
970
26.7k
                }
971
93.0k
            }
972
5.10M
            else {
973
5.10M
                instr->i_except = handler;
974
5.10M
            }
975
5.63M
        }
976
515k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
977
219k
            assert(except_stack != NULL);
978
219k
            b->b_next->b_exceptstack = except_stack;
979
219k
            todo[0] = b->b_next;
980
219k
            b->b_next->b_visited = 1;
981
219k
            todo++;
982
219k
        }
983
295k
        else if (except_stack != NULL) {
984
253k
           PyMem_Free(except_stack);
985
253k
        }
986
515k
    }
987
#ifdef Py_DEBUG
988
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
989
        assert(b->b_exceptstack == NULL);
990
    }
991
#endif
992
66.2k
    PyMem_Free(todo_stack);
993
66.2k
    return SUCCESS;
994
0
error:
995
0
    PyMem_Free(todo_stack);
996
0
    PyMem_Free(except_stack);
997
0
    return ERROR;
998
66.2k
}
999
1000
/***** CFG optimizations *****/
1001
1002
static int
1003
132k
remove_unreachable(basicblock *entryblock) {
1004
1.25M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1005
1.12M
        b->b_predecessors = 0;
1006
1.12M
    }
1007
132k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1008
132k
    if (stack == NULL) {
1009
0
        return ERROR;
1010
0
    }
1011
132k
    basicblock **sp = stack;
1012
132k
    entryblock->b_predecessors = 1;
1013
132k
    *sp++ = entryblock;
1014
132k
    entryblock->b_visited = 1;
1015
1.14M
    while (sp > stack) {
1016
1.01M
        basicblock *b = *(--sp);
1017
1.01M
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1018
580k
            if (!b->b_next->b_visited) {
1019
505k
                assert(b->b_next->b_predecessors == 0);
1020
505k
                *sp++ = b->b_next;
1021
505k
                b->b_next->b_visited = 1;
1022
505k
            }
1023
580k
            b->b_next->b_predecessors++;
1024
580k
        }
1025
10.9M
        for (int i = 0; i < b->b_iused; i++) {
1026
9.91M
            basicblock *target;
1027
9.91M
            cfg_instr *instr = &b->b_instr[i];
1028
9.91M
            if (is_jump(instr) || is_block_push(instr)) {
1029
672k
                target = instr->i_target;
1030
672k
                if (!target->b_visited) {
1031
372k
                    *sp++ = target;
1032
372k
                    target->b_visited = 1;
1033
372k
                }
1034
672k
                target->b_predecessors++;
1035
672k
            }
1036
9.91M
        }
1037
1.01M
    }
1038
132k
    PyMem_Free(stack);
1039
1040
    /* Delete unreachable instructions */
1041
1.25M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1042
1.12M
       if (b->b_predecessors == 0) {
1043
114k
            b->b_iused = 0;
1044
114k
            b->b_except_handler = 0;
1045
114k
       }
1046
1.12M
    }
1047
132k
    return SUCCESS;
1048
132k
}
1049
1050
static int
1051
4.07M
basicblock_remove_redundant_nops(basicblock *bb) {
1052
    /* Remove NOPs when legal to do so. */
1053
4.07M
    int dest = 0;
1054
4.07M
    int prev_lineno = -1;
1055
32.9M
    for (int src = 0; src < bb->b_iused; src++) {
1056
28.8M
        int lineno = bb->b_instr[src].i_loc.lineno;
1057
28.8M
        if (bb->b_instr[src].i_opcode == NOP) {
1058
            /* Eliminate no-op if it doesn't have a line number */
1059
1.46M
            if (lineno < 0) {
1060
1.17M
                continue;
1061
1.17M
            }
1062
            /* or, if the previous instruction had the same line number. */
1063
291k
            if (prev_lineno == lineno) {
1064
189k
                continue;
1065
189k
            }
1066
            /* or, if the next instruction has same line number or no line number */
1067
102k
            if (src < bb->b_iused - 1) {
1068
93.5k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1069
93.5k
                if (next_lineno == lineno) {
1070
67.5k
                    continue;
1071
67.5k
                }
1072
26.0k
                if (next_lineno < 0) {
1073
2.13k
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1074
2.13k
                    continue;
1075
2.13k
                }
1076
26.0k
            }
1077
8.60k
            else {
1078
8.60k
                basicblock *next = next_nonempty_block(bb->b_next);
1079
                /* or if last instruction in BB and next BB has same line number */
1080
8.60k
                if (next) {
1081
8.60k
                    location next_loc = NO_LOCATION;
1082
9.08k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1083
9.07k
                        cfg_instr *instr = &next->b_instr[next_i];
1084
9.07k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1085
                            /* Skip over NOPs without a location, they will be removed */
1086
471
                            continue;
1087
471
                        }
1088
8.60k
                        next_loc = instr->i_loc;
1089
8.60k
                        break;
1090
9.07k
                    }
1091
8.60k
                    if (lineno == next_loc.lineno) {
1092
3.02k
                        continue;
1093
3.02k
                    }
1094
8.60k
                }
1095
8.60k
            }
1096
1097
102k
        }
1098
27.4M
        if (dest != src) {
1099
2.18M
            bb->b_instr[dest] = bb->b_instr[src];
1100
2.18M
        }
1101
27.4M
        dest++;
1102
27.4M
        prev_lineno = lineno;
1103
27.4M
    }
1104
4.07M
    assert(dest <= bb->b_iused);
1105
4.07M
    int num_removed = bb->b_iused - dest;
1106
4.07M
    bb->b_iused = dest;
1107
4.07M
    return num_removed;
1108
4.07M
}
1109
1110
static int
1111
279k
remove_redundant_nops(cfg_builder *g) {
1112
279k
    int changes = 0;
1113
3.76M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1114
3.48M
        int change = basicblock_remove_redundant_nops(b);
1115
3.48M
        RETURN_IF_ERROR(change);
1116
3.48M
        changes += change;
1117
3.48M
    }
1118
279k
    return changes;
1119
279k
}
1120
1121
static int
1122
remove_redundant_nops_and_pairs(basicblock *entryblock)
1123
66.2k
{
1124
66.2k
    bool done = false;
1125
1126
134k
    while (! done) {
1127
67.8k
        done = true;
1128
67.8k
        cfg_instr *prev_instr = NULL;
1129
67.8k
        cfg_instr *instr = NULL;
1130
653k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1131
585k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1132
585k
            if (IS_LABEL(b->b_label)) {
1133
                /* this block is a jump target, forget instr */
1134
276k
                instr = NULL;
1135
276k
            }
1136
5.26M
            for (int i = 0; i < b->b_iused; i++) {
1137
4.67M
                prev_instr = instr;
1138
4.67M
                instr = &b->b_instr[i];
1139
4.67M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1140
4.67M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1141
4.67M
                int opcode = instr->i_opcode;
1142
4.67M
                bool is_redundant_pair = false;
1143
4.67M
                if (opcode == POP_TOP) {
1144
377k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1145
5.93k
                       is_redundant_pair = true;
1146
5.93k
                   }
1147
371k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1148
547
                       is_redundant_pair = true;
1149
547
                   }
1150
377k
                }
1151
4.67M
                if (is_redundant_pair) {
1152
6.47k
                    INSTR_SET_OP0(prev_instr, NOP);
1153
6.47k
                    INSTR_SET_OP0(instr, NOP);
1154
6.47k
                    done = false;
1155
6.47k
                }
1156
4.67M
            }
1157
585k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1158
416k
                instr = NULL;
1159
416k
            }
1160
585k
        }
1161
67.8k
    }
1162
66.2k
    return SUCCESS;
1163
66.2k
}
1164
1165
static int
1166
147k
remove_redundant_jumps(cfg_builder *g) {
1167
    /* If a non-empty block ends with a jump instruction, check if the next
1168
     * non-empty block reached through normal flow control is the target
1169
     * of that jump. If it is, then the jump instruction is redundant and
1170
     * can be deleted.
1171
     *
1172
     * Return the number of changes applied, or -1 on error.
1173
     */
1174
1175
147k
    int changes = 0;
1176
2.50M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1177
2.35M
        cfg_instr *last = basicblock_last_instr(b);
1178
2.35M
        if (last == NULL) {
1179
256k
            continue;
1180
256k
        }
1181
2.10M
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1182
2.10M
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1183
469k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1184
469k
            if (jump_target == NULL) {
1185
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1186
0
                return ERROR;
1187
0
            }
1188
469k
            basicblock *next = next_nonempty_block(b->b_next);
1189
469k
            if (jump_target == next) {
1190
16.2k
                changes++;
1191
16.2k
                INSTR_SET_OP0(last, NOP);
1192
16.2k
            }
1193
469k
        }
1194
2.10M
    }
1195
1196
147k
    return changes;
1197
147k
}
1198
1199
static inline bool
1200
487k
basicblock_has_no_lineno(basicblock *b) {
1201
637k
    for (int i = 0; i < b->b_iused; i++) {
1202
585k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1203
435k
            return false;
1204
435k
        }
1205
585k
    }
1206
52.0k
    return true;
1207
487k
}
1208
1209
/* Maximum size of basic block that should be copied in optimizer */
1210
12.1k
#define MAX_COPY_SIZE 4
1211
1212
/* If this block ends with an unconditional jump to a small exit block or
1213
 * a block that has no line numbers (and no fallthrough), then
1214
 * remove the jump and extend this block with the target.
1215
 * Returns 1 if extended, 0 if no change, and -1 on error.
1216
 */
1217
static int
1218
955k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1219
955k
    cfg_instr *last = basicblock_last_instr(bb);
1220
955k
    if (last == NULL) {
1221
0
        return 0;
1222
0
    }
1223
955k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1224
768k
        return 0;
1225
768k
    }
1226
187k
    basicblock *target = last->i_target;
1227
187k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1228
187k
                             target->b_iused <= MAX_COPY_SIZE);
1229
187k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1230
187k
                                     !BB_HAS_FALLTHROUGH(target));
1231
187k
    if (small_exit_block || no_lineno_no_fallthrough) {
1232
15.4k
        assert(is_jump(last));
1233
15.4k
        int removed_jump_opcode = last->i_opcode;
1234
15.4k
        INSTR_SET_OP0(last, NOP);
1235
15.4k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1236
15.4k
        if (no_lineno_no_fallthrough) {
1237
13.7k
            last = basicblock_last_instr(bb);
1238
13.7k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1239
13.7k
                removed_jump_opcode == JUMP)
1240
93
            {
1241
                /* Make sure we don't lose eval breaker checks */
1242
93
                last->i_opcode = JUMP;
1243
93
            }
1244
13.7k
        }
1245
15.4k
        target->b_predecessors--;
1246
15.4k
        return 1;
1247
15.4k
    }
1248
172k
    return 0;
1249
187k
}
1250
1251
static int
1252
66.2k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1253
66.2k
    bool changes;
1254
68.8k
    do {
1255
68.8k
        changes = false;
1256
1.02M
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1257
955k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1258
955k
            RETURN_IF_ERROR(res);
1259
955k
            if (res) {
1260
15.4k
                changes = true;
1261
15.4k
            }
1262
955k
        }
1263
68.8k
    } while(changes); /* every change removes a jump, ensuring convergence */
1264
66.2k
    return changes;
1265
66.2k
}
1266
1267
// Attempt to eliminate jumps to jumps by updating inst to jump to
1268
// target->i_target using the provided opcode. Return whether or not the
1269
// optimization was successful.
1270
static bool
1271
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1272
1.62k
{
1273
1.62k
    assert(is_jump(inst));
1274
1.62k
    assert(is_jump(target));
1275
1.62k
    assert(inst == basicblock_last_instr(bb));
1276
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1277
    // changes (and we fall into an infinite loop):
1278
1.62k
    if (inst->i_target != target->i_target) {
1279
        /* Change inst to NOP and append a jump to target->i_target. The
1280
         * NOP will be removed later if it's not needed for the lineno.
1281
         */
1282
1.62k
        INSTR_SET_OP0(inst, NOP);
1283
1284
1.62k
        RETURN_IF_ERROR(
1285
1.62k
            basicblock_add_jump(
1286
1.62k
                bb, opcode, target->i_target, target->i_loc));
1287
1288
1.62k
        return true;
1289
1.62k
    }
1290
0
    return false;
1291
1.62k
}
1292
1293
static int
1294
loads_const(int opcode)
1295
5.70M
{
1296
5.70M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1297
5.70M
}
1298
1299
/* Returns new reference */
1300
static PyObject*
1301
get_const_value(int opcode, int oparg, PyObject *co_consts)
1302
2.46M
{
1303
2.46M
    PyObject *constant = NULL;
1304
2.46M
    assert(loads_const(opcode));
1305
2.46M
    if (opcode == LOAD_CONST) {
1306
2.06M
        constant = PyList_GET_ITEM(co_consts, oparg);
1307
2.06M
    }
1308
2.46M
    if (opcode == LOAD_SMALL_INT) {
1309
403k
        return PyLong_FromLong(oparg);
1310
403k
    }
1311
1312
2.06M
    if (constant == NULL) {
1313
0
        PyErr_SetString(PyExc_SystemError,
1314
0
                        "Internal error: failed to get value of a constant");
1315
0
        return NULL;
1316
0
    }
1317
2.06M
    return Py_NewRef(constant);
1318
2.06M
}
1319
1320
// Steals a reference to newconst.
1321
static int
1322
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1323
603k
{
1324
603k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1325
0
        Py_DECREF(newconst);
1326
0
        return -1;
1327
0
    }
1328
1329
603k
    Py_ssize_t index;
1330
54.1M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1331
108M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1332
467k
            break;
1333
467k
        }
1334
54.0M
    }
1335
603k
    if (index == PyList_GET_SIZE(consts)) {
1336
136k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1337
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1338
0
            Py_DECREF(newconst);
1339
0
            return -1;
1340
0
        }
1341
136k
        if (PyList_Append(consts, newconst)) {
1342
0
            Py_DECREF(newconst);
1343
0
            return -1;
1344
0
        }
1345
136k
    }
1346
603k
    Py_DECREF(newconst);
1347
603k
    return (int)index;
1348
603k
}
1349
1350
/*
1351
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1352
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1353
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1354
1355
  Return boolean indicating whether "size" such instructions were found.
1356
*/
1357
static bool
1358
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1359
1.30M
{
1360
1.30M
    assert(start < bb->b_iused);
1361
1.30M
    assert(size >= 0);
1362
1.30M
    assert(size <= _PY_STACK_USE_GUIDELINE);
1363
1364
3.62M
    for (; start >= 0 && size > 0; start--) {
1365
2.89M
        cfg_instr *instr = &bb->b_instr[start];
1366
2.89M
        if (instr->i_opcode == NOP) {
1367
949k
            continue;
1368
949k
        }
1369
1.94M
        if (!loads_const(instr->i_opcode)) {
1370
572k
            return false;
1371
572k
        }
1372
1.37M
        instrs[--size] = instr;
1373
1.37M
    }
1374
1375
732k
    return size == 0;
1376
1.30M
}
1377
1378
/*
1379
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1380
  Caller must make sure "instrs" has at least "size" elements.
1381
*/
1382
static void
1383
nop_out(cfg_instr **instrs, int size)
1384
693k
{
1385
1.86M
    for (int i = 0; i < size; i++) {
1386
1.17M
        cfg_instr *instr = instrs[i];
1387
1.17M
        assert(instr->i_opcode != NOP);
1388
1.17M
        INSTR_SET_OP0(instr, NOP);
1389
1.17M
        INSTR_SET_LOC(instr, NO_LOCATION);
1390
1.17M
    }
1391
693k
}
1392
1393
/* Does not steal reference to "newconst".
1394
   Return 1 if changed instruction to LOAD_SMALL_INT.
1395
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1396
   Return -1 on error.
1397
*/
1398
static int
1399
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1400
                               PyObject *consts, PyObject *const_cache)
1401
1.85M
{
1402
1.85M
    if (PyLong_CheckExact(newconst)) {
1403
1.11M
        int overflow;
1404
1.11M
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1405
1.11M
        if (val == -1 && PyErr_Occurred()) {
1406
0
            return -1;
1407
0
        }
1408
1.11M
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1409
564k
            assert(_Py_IsImmortal(newconst));
1410
564k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1411
564k
            return 1;
1412
564k
        }
1413
1.11M
    }
1414
1.29M
    return 0;
1415
1.85M
}
1416
1417
1418
/* Steals reference to "newconst" */
1419
static int
1420
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1421
                      PyObject *consts, PyObject *const_cache)
1422
672k
{
1423
672k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1424
672k
    if (res < 0) {
1425
0
        Py_DECREF(newconst);
1426
0
        return ERROR;
1427
0
    }
1428
672k
    if (res > 0) {
1429
78.2k
        return SUCCESS;
1430
78.2k
    }
1431
594k
    int oparg = add_const(newconst, consts, const_cache);
1432
594k
    RETURN_IF_ERROR(oparg);
1433
594k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1434
594k
    return SUCCESS;
1435
594k
}
1436
1437
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1438
   with    LOAD_CONST (c1, c2, ... cn).
1439
   The consts table must still be in list form so that the
1440
   new constant (c1, c2, ... cn) can be appended.
1441
   Called with codestr pointing to the first LOAD_CONST.
1442
*/
1443
static int
1444
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1445
102k
{
1446
    /* Pre-conditions */
1447
102k
    assert(PyDict_CheckExact(const_cache));
1448
102k
    assert(PyList_CheckExact(consts));
1449
1450
102k
    cfg_instr *instr = &bb->b_instr[i];
1451
102k
    assert(instr->i_opcode == BUILD_TUPLE);
1452
1453
102k
    int seq_size = instr->i_oparg;
1454
102k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1455
21
        return SUCCESS;
1456
21
    }
1457
1458
102k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1459
102k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1460
        /* not a const sequence */
1461
56.3k
        return SUCCESS;
1462
56.3k
    }
1463
1464
46.0k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1465
46.0k
    if (const_tuple == NULL) {
1466
0
        return ERROR;
1467
0
    }
1468
1469
111k
    for (int i = 0; i < seq_size; i++) {
1470
65.8k
        cfg_instr *inst = const_instrs[i];
1471
65.8k
        assert(loads_const(inst->i_opcode));
1472
65.8k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1473
65.8k
        if (element == NULL) {
1474
0
            Py_DECREF(const_tuple);
1475
0
            return ERROR;
1476
0
        }
1477
65.8k
        PyTuple_SET_ITEM(const_tuple, i, element);
1478
65.8k
    }
1479
1480
46.0k
    nop_out(const_instrs, seq_size);
1481
46.0k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1482
46.0k
}
1483
1484
/* Replace:
1485
    BUILD_LIST 0
1486
    LOAD_CONST c1
1487
    LIST_APPEND 1
1488
    LOAD_CONST c2
1489
    LIST_APPEND 1
1490
    ...
1491
    LOAD_CONST cN
1492
    LIST_APPEND 1
1493
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1494
   with:
1495
    LOAD_CONST (c1, c2, ... cN)
1496
*/
1497
static int
1498
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1499
                                      PyObject *consts, PyObject *const_cache)
1500
7.11k
{
1501
7.11k
    assert(PyDict_CheckExact(const_cache));
1502
7.11k
    assert(PyList_CheckExact(consts));
1503
7.11k
    assert(i >= 0);
1504
7.11k
    assert(i < bb->b_iused);
1505
1506
7.11k
    cfg_instr *intrinsic = &bb->b_instr[i];
1507
7.11k
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1508
7.11k
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1509
1510
7.11k
    int consts_found = 0;
1511
7.11k
    bool expect_append = true;
1512
1513
36.0k
    for (int pos = i - 1; pos >= 0; pos--) {
1514
35.6k
        cfg_instr *instr = &bb->b_instr[pos];
1515
35.6k
        int opcode = instr->i_opcode;
1516
35.6k
        int oparg = instr->i_oparg;
1517
1518
35.6k
        if (opcode == NOP) {
1519
7.10k
            continue;
1520
7.10k
        }
1521
1522
28.5k
        if (opcode == BUILD_LIST && oparg == 0) {
1523
258
            if (!expect_append) {
1524
                /* Not a sequence start. */
1525
132
                return SUCCESS;
1526
132
            }
1527
1528
            /* Sequence start, we are done. */
1529
126
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1530
126
            if (newconst == NULL) {
1531
0
                return ERROR;
1532
0
            }
1533
1534
21.5k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1535
21.4k
                instr = &bb->b_instr[newpos];
1536
21.4k
                if (instr->i_opcode == NOP) {
1537
4.99k
                    continue;
1538
4.99k
                }
1539
16.4k
                if (loads_const(instr->i_opcode)) {
1540
8.14k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1541
8.14k
                    if (constant == NULL) {
1542
0
                        Py_DECREF(newconst);
1543
0
                        return ERROR;
1544
0
                    }
1545
8.14k
                    assert(consts_found > 0);
1546
8.14k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1547
8.14k
                }
1548
16.4k
                nop_out(&instr, 1);
1549
16.4k
            }
1550
126
            assert(consts_found == 0);
1551
126
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1552
126
        }
1553
1554
28.2k
        if (expect_append) {
1555
16.5k
            if (opcode != LIST_APPEND || oparg != 1) {
1556
4.27k
                return SUCCESS;
1557
4.27k
            }
1558
16.5k
        }
1559
11.7k
        else {
1560
11.7k
            if (!loads_const(opcode)) {
1561
2.21k
                return SUCCESS;
1562
2.21k
            }
1563
9.54k
            consts_found++;
1564
9.54k
        }
1565
1566
21.7k
        expect_append = !expect_append;
1567
21.7k
    }
1568
1569
    /* Did not find sequence start. */
1570
365
    return SUCCESS;
1571
7.11k
}
1572
1573
52.2k
#define MIN_CONST_SEQUENCE_SIZE 3
1574
/*
1575
Optimize lists and sets for:
1576
    1. "for" loop, comprehension or "in"/"not in" tests:
1577
           Change literal list or set of constants into constant
1578
           tuple or frozenset respectively. Change list of
1579
           non-constants into tuple.
1580
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1581
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1582
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1583
           or BUILD_SET & SET_UPDATE respectively.
1584
*/
1585
static int
1586
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1587
                        PyObject *consts, PyObject *const_cache)
1588
26.1k
{
1589
26.1k
    assert(PyDict_CheckExact(const_cache));
1590
26.1k
    assert(PyList_CheckExact(consts));
1591
1592
26.1k
    cfg_instr *instr = &bb->b_instr[i];
1593
26.1k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1594
1595
26.1k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1596
26.1k
    int seq_size = instr->i_oparg;
1597
26.1k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1598
26.1k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1599
20.0k
    {
1600
20.0k
        return SUCCESS;
1601
20.0k
    }
1602
1603
6.08k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1604
6.08k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1605
1.39k
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1606
            /* iterate over a tuple instead of list */
1607
8
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1608
8
        }
1609
1.39k
        return SUCCESS;
1610
1.39k
    }
1611
1612
4.69k
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1613
4.69k
    if (const_result == NULL) {
1614
0
        return ERROR;
1615
0
    }
1616
1617
50.6k
    for (int i = 0; i < seq_size; i++) {
1618
45.9k
        cfg_instr *inst = const_instrs[i];
1619
45.9k
        assert(loads_const(inst->i_opcode));
1620
45.9k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1621
45.9k
        if (element == NULL) {
1622
0
            Py_DECREF(const_result);
1623
0
            return ERROR;
1624
0
        }
1625
45.9k
        PyTuple_SET_ITEM(const_result, i, element);
1626
45.9k
    }
1627
1628
4.69k
    if (instr->i_opcode == BUILD_SET) {
1629
4.47k
        PyObject *frozenset = PyFrozenSet_New(const_result);
1630
4.47k
        if (frozenset == NULL) {
1631
0
            Py_DECREF(const_result);
1632
0
            return ERROR;
1633
0
        }
1634
4.47k
        Py_SETREF(const_result, frozenset);
1635
4.47k
    }
1636
1637
4.69k
    int index = add_const(const_result, consts, const_cache);
1638
4.69k
    RETURN_IF_ERROR(index);
1639
4.69k
    nop_out(const_instrs, seq_size);
1640
1641
4.69k
    if (contains_or_iter) {
1642
206
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1643
206
    }
1644
4.48k
    else {
1645
4.48k
        assert(i >= 2);
1646
4.48k
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1647
1648
4.48k
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1649
1650
4.48k
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1651
4.48k
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1652
4.48k
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1653
4.48k
    }
1654
4.69k
    return SUCCESS;
1655
4.69k
}
1656
1657
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1658
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1659
 * Used to avoid creating constants which are slow to hash.
1660
 */
1661
static Py_ssize_t
1662
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1663
586k
{
1664
586k
    if (PyTuple_Check(obj)) {
1665
527k
        Py_ssize_t i;
1666
527k
        limit -= PyTuple_GET_SIZE(obj);
1667
1.08M
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1668
557k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1669
557k
            if (limit < 0) {
1670
1.80k
                return limit;
1671
1.80k
            }
1672
557k
        }
1673
527k
    }
1674
584k
    return limit;
1675
586k
}
1676
1677
74.8k
#define MAX_INT_SIZE           128  /* bits */
1678
29.0k
#define MAX_COLLECTION_SIZE    256  /* items */
1679
9.97k
#define MAX_STR_SIZE          4096  /* characters */
1680
28.0k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1681
1682
static PyObject *
1683
const_folding_safe_multiply(PyObject *v, PyObject *w)
1684
166k
{
1685
166k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1686
166k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1687
166k
    ) {
1688
32.7k
        int64_t vbits = _PyLong_NumBits(v);
1689
32.7k
        int64_t wbits = _PyLong_NumBits(w);
1690
32.7k
        assert(vbits >= 0);
1691
32.7k
        assert(wbits >= 0);
1692
32.7k
        if (vbits + wbits > MAX_INT_SIZE) {
1693
323
            return NULL;
1694
323
        }
1695
32.7k
    }
1696
133k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1697
30.5k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1698
30.5k
        if (size) {
1699
29.5k
            long n = PyLong_AsLong(v);
1700
29.5k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1701
601
                return NULL;
1702
601
            }
1703
28.9k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1704
280
                return NULL;
1705
280
            }
1706
28.9k
        }
1707
30.5k
    }
1708
103k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1709
22.9k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1710
22.9k
                                               PyBytes_GET_SIZE(w);
1711
22.9k
        if (size) {
1712
10.7k
            long n = PyLong_AsLong(v);
1713
10.7k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1714
1.37k
                return NULL;
1715
1.37k
            }
1716
10.7k
        }
1717
22.9k
    }
1718
80.1k
    else if (PyLong_Check(w) &&
1719
80.1k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1720
50.7k
    {
1721
50.7k
        return const_folding_safe_multiply(w, v);
1722
50.7k
    }
1723
1724
113k
    return PyNumber_Multiply(v, w);
1725
166k
}
1726
1727
static PyObject *
1728
const_folding_safe_power(PyObject *v, PyObject *w)
1729
72.1k
{
1730
72.1k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1731
72.1k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1732
72.1k
    ) {
1733
25.5k
        int64_t vbits = _PyLong_NumBits(v);
1734
25.5k
        size_t wbits = PyLong_AsSize_t(w);
1735
25.5k
        assert(vbits >= 0);
1736
25.5k
        if (wbits == (size_t)-1) {
1737
439
            return NULL;
1738
439
        }
1739
25.0k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1740
3.69k
            return NULL;
1741
3.69k
        }
1742
25.0k
    }
1743
1744
67.9k
    return PyNumber_Power(v, w, Py_None);
1745
72.1k
}
1746
1747
static PyObject *
1748
const_folding_safe_lshift(PyObject *v, PyObject *w)
1749
8.08k
{
1750
8.08k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1751
8.08k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1752
8.08k
    ) {
1753
6.52k
        int64_t vbits = _PyLong_NumBits(v);
1754
6.52k
        size_t wbits = PyLong_AsSize_t(w);
1755
6.52k
        assert(vbits >= 0);
1756
6.52k
        if (wbits == (size_t)-1) {
1757
400
            return NULL;
1758
400
        }
1759
6.12k
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1760
1.84k
            return NULL;
1761
1.84k
        }
1762
6.12k
    }
1763
1764
5.84k
    return PyNumber_Lshift(v, w);
1765
8.08k
}
1766
1767
static PyObject *
1768
const_folding_safe_mod(PyObject *v, PyObject *w)
1769
32.9k
{
1770
32.9k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1771
1.42k
        return NULL;
1772
1.42k
    }
1773
1774
31.4k
    return PyNumber_Remainder(v, w);
1775
32.9k
}
1776
1777
static PyObject *
1778
eval_const_binop(PyObject *left, int op, PyObject *right)
1779
467k
{
1780
467k
    assert(left != NULL && right != NULL);
1781
467k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1782
1783
467k
    PyObject *result = NULL;
1784
467k
    switch (op) {
1785
44.1k
        case NB_ADD:
1786
44.1k
            result = PyNumber_Add(left, right);
1787
44.1k
            break;
1788
51.5k
        case NB_SUBTRACT:
1789
51.5k
            result = PyNumber_Subtract(left, right);
1790
51.5k
            break;
1791
115k
        case NB_MULTIPLY:
1792
115k
            result = const_folding_safe_multiply(left, right);
1793
115k
            break;
1794
64.4k
        case NB_TRUE_DIVIDE:
1795
64.4k
            result = PyNumber_TrueDivide(left, right);
1796
64.4k
            break;
1797
23.4k
        case NB_FLOOR_DIVIDE:
1798
23.4k
            result = PyNumber_FloorDivide(left, right);
1799
23.4k
            break;
1800
32.9k
        case NB_REMAINDER:
1801
32.9k
            result = const_folding_safe_mod(left, right);
1802
32.9k
            break;
1803
72.1k
        case NB_POWER:
1804
72.1k
            result = const_folding_safe_power(left, right);
1805
72.1k
            break;
1806
8.08k
        case NB_LSHIFT:
1807
8.08k
            result = const_folding_safe_lshift(left, right);
1808
8.08k
            break;
1809
16.0k
        case NB_RSHIFT:
1810
16.0k
            result = PyNumber_Rshift(left, right);
1811
16.0k
            break;
1812
6.06k
        case NB_OR:
1813
6.06k
            result = PyNumber_Or(left, right);
1814
6.06k
            break;
1815
6.78k
        case NB_XOR:
1816
6.78k
            result = PyNumber_Xor(left, right);
1817
6.78k
            break;
1818
9.76k
        case NB_AND:
1819
9.76k
            result = PyNumber_And(left, right);
1820
9.76k
            break;
1821
15.4k
        case NB_SUBSCR:
1822
15.4k
            result = PyObject_GetItem(left, right);
1823
15.4k
            break;
1824
761
        case NB_MATRIX_MULTIPLY:
1825
            // No builtin constants implement matrix multiplication
1826
761
            break;
1827
0
        default:
1828
0
            Py_UNREACHABLE();
1829
467k
    }
1830
467k
    return result;
1831
467k
}
1832
1833
static int
1834
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1835
700k
{
1836
1.11M
    #define BINOP_OPERAND_COUNT 2
1837
700k
    assert(PyDict_CheckExact(const_cache));
1838
700k
    assert(PyList_CheckExact(consts));
1839
1840
700k
    cfg_instr *binop = &bb->b_instr[i];
1841
700k
    assert(binop->i_opcode == BINARY_OP);
1842
1843
700k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1844
700k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1845
        /* not a const sequence */
1846
233k
        return SUCCESS;
1847
233k
    }
1848
1849
467k
    cfg_instr *lhs_instr = operands_instrs[0];
1850
467k
    assert(loads_const(lhs_instr->i_opcode));
1851
467k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1852
467k
    if (lhs == NULL) {
1853
0
        return ERROR;
1854
0
    }
1855
1856
467k
    cfg_instr *rhs_instr = operands_instrs[1];
1857
467k
    assert(loads_const(rhs_instr->i_opcode));
1858
467k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1859
467k
    if (rhs == NULL) {
1860
0
        Py_DECREF(lhs);
1861
0
        return ERROR;
1862
0
    }
1863
1864
467k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1865
467k
    Py_DECREF(lhs);
1866
467k
    Py_DECREF(rhs);
1867
467k
    if (newconst == NULL) {
1868
51.4k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1869
0
            return ERROR;
1870
0
        }
1871
51.4k
        PyErr_Clear();
1872
51.4k
        return SUCCESS;
1873
51.4k
    }
1874
1875
415k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1876
415k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1877
467k
}
1878
1879
static PyObject *
1880
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1881
213k
{
1882
213k
    assert(operand != NULL);
1883
213k
    assert(
1884
213k
        opcode == UNARY_NEGATIVE ||
1885
213k
        opcode == UNARY_INVERT ||
1886
213k
        opcode == UNARY_NOT ||
1887
213k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1888
213k
    );
1889
213k
    PyObject *result;
1890
213k
    switch (opcode) {
1891
151k
        case UNARY_NEGATIVE:
1892
151k
            result = PyNumber_Negative(operand);
1893
151k
            break;
1894
10.6k
        case UNARY_INVERT:
1895
            // XXX: This should be removed once the ~bool depreciation expires.
1896
10.6k
            if (PyBool_Check(operand)) {
1897
222
                return NULL;
1898
222
            }
1899
10.4k
            result = PyNumber_Invert(operand);
1900
10.4k
            break;
1901
808
        case UNARY_NOT: {
1902
808
            int r = PyObject_IsTrue(operand);
1903
808
            if (r < 0) {
1904
0
                return NULL;
1905
0
            }
1906
808
            result = PyBool_FromLong(!r);
1907
808
            break;
1908
808
        }
1909
50.2k
        case CALL_INTRINSIC_1:
1910
50.2k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1911
0
                Py_UNREACHABLE();
1912
0
            }
1913
50.2k
            result = PyNumber_Positive(operand);
1914
50.2k
            break;
1915
0
        default:
1916
0
            Py_UNREACHABLE();
1917
213k
    }
1918
213k
    return result;
1919
213k
}
1920
1921
static int
1922
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1923
496k
{
1924
706k
    #define UNARYOP_OPERAND_COUNT 1
1925
496k
    assert(PyDict_CheckExact(const_cache));
1926
496k
    assert(PyList_CheckExact(consts));
1927
496k
    cfg_instr *unaryop = &bb->b_instr[i];
1928
1929
496k
    cfg_instr *operand_instr;
1930
496k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1931
        /* not a const */
1932
282k
        return SUCCESS;
1933
282k
    }
1934
1935
213k
    assert(loads_const(operand_instr->i_opcode));
1936
213k
    PyObject *operand = get_const_value(
1937
213k
        operand_instr->i_opcode,
1938
213k
        operand_instr->i_oparg,
1939
213k
        consts
1940
213k
    );
1941
213k
    if (operand == NULL) {
1942
0
        return ERROR;
1943
0
    }
1944
1945
213k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1946
213k
    Py_DECREF(operand);
1947
213k
    if (newconst == NULL) {
1948
2.93k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1949
0
            return ERROR;
1950
0
        }
1951
2.93k
        PyErr_Clear();
1952
2.93k
        return SUCCESS;
1953
2.93k
    }
1954
1955
210k
    if (unaryop->i_opcode == UNARY_NOT) {
1956
808
        assert(PyBool_Check(newconst));
1957
808
    }
1958
210k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1959
210k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1960
210k
}
1961
1962
326k
#define VISITED (-1)
1963
1964
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1965
// same effect.
1966
static int
1967
swaptimize(basicblock *block, int *ix)
1968
112k
{
1969
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1970
    // for this function. Make sure to blow away cached *.pyc files first!
1971
112k
    assert(*ix < block->b_iused);
1972
112k
    cfg_instr *instructions = &block->b_instr[*ix];
1973
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1974
    // maximum depth of the stack manipulations:
1975
112k
    assert(instructions[0].i_opcode == SWAP);
1976
112k
    int depth = instructions[0].i_oparg;
1977
112k
    int len = 0;
1978
112k
    int more = false;
1979
112k
    int limit = block->b_iused - *ix;
1980
165k
    while (++len < limit) {
1981
165k
        int opcode = instructions[len].i_opcode;
1982
165k
        if (opcode == SWAP) {
1983
52.5k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1984
52.5k
            more = true;
1985
52.5k
        }
1986
113k
        else if (opcode != NOP) {
1987
112k
            break;
1988
112k
        }
1989
165k
    }
1990
    // It's already optimal if there's only one SWAP:
1991
112k
    if (!more) {
1992
96.7k
        return SUCCESS;
1993
96.7k
    }
1994
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1995
16.0k
    int *stack = PyMem_Malloc(depth * sizeof(int));
1996
16.0k
    if (stack == NULL) {
1997
0
        PyErr_NoMemory();
1998
0
        return ERROR;
1999
0
    }
2000
95.5k
    for (int i = 0; i < depth; i++) {
2001
79.5k
        stack[i] = i;
2002
79.5k
    }
2003
    // Simulate the combined effect of these instructions by "running" them on
2004
    // our "stack":
2005
85.1k
    for (int i = 0; i < len; i++) {
2006
69.1k
        if (instructions[i].i_opcode == SWAP) {
2007
68.5k
            int oparg = instructions[i].i_oparg;
2008
68.5k
            int top = stack[0];
2009
            // SWAPs are 1-indexed:
2010
68.5k
            stack[0] = stack[oparg - 1];
2011
68.5k
            stack[oparg - 1] = top;
2012
68.5k
        }
2013
69.1k
    }
2014
    // Now we can begin! Our approach here is based on a solution to a closely
2015
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2016
    // think of this algorithm as determining the steps needed to efficiently
2017
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2018
    // though, we can efficiently *shuffle* it! For this reason, we will be
2019
    // replacing instructions starting from the *end* of the run. Since the
2020
    // solution is optimal, we don't need to worry about running out of space:
2021
16.0k
    int current = len - 1;
2022
95.5k
    for (int i = 0; i < depth; i++) {
2023
        // Skip items that have already been visited, or just happen to be in
2024
        // the correct location:
2025
79.5k
        if (stack[i] == VISITED || stack[i] == i) {
2026
64.6k
            continue;
2027
64.6k
        }
2028
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2029
        // with other items; traversing the cycle and swapping each item with
2030
        // the next will put them all in the correct place. The weird
2031
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2032
        // can only swap from that position:
2033
14.8k
        int j = i;
2034
91.0k
        while (true) {
2035
            // Skip the actual swap if our item is zero, since swapping the top
2036
            // item with itself is pointless:
2037
91.0k
            if (j) {
2038
61.4k
                assert(0 <= current);
2039
                // SWAPs are 1-indexed:
2040
61.4k
                instructions[current].i_opcode = SWAP;
2041
61.4k
                instructions[current--].i_oparg = j + 1;
2042
61.4k
            }
2043
91.0k
            if (stack[j] == VISITED) {
2044
                // Completed the cycle:
2045
14.8k
                assert(j == i);
2046
14.8k
                break;
2047
14.8k
            }
2048
76.1k
            int next_j = stack[j];
2049
76.1k
            stack[j] = VISITED;
2050
76.1k
            j = next_j;
2051
76.1k
        }
2052
14.8k
    }
2053
    // NOP out any unused instructions:
2054
23.7k
    while (0 <= current) {
2055
7.70k
        INSTR_SET_OP0(&instructions[current--], NOP);
2056
7.70k
    }
2057
16.0k
    PyMem_Free(stack);
2058
16.0k
    *ix += len - 1;
2059
16.0k
    return SUCCESS;
2060
16.0k
}
2061
2062
2063
// This list is pretty small, since it's only okay to reorder opcodes that:
2064
// - can't affect control flow (like jumping or raising exceptions)
2065
// - can't invoke arbitrary code (besides finalizers)
2066
// - only touch the TOS (and pop it when finished)
2067
#define SWAPPABLE(opcode) \
2068
132k
    ((opcode) == STORE_FAST || \
2069
132k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2070
132k
     (opcode) == POP_TOP)
2071
2072
#define STORES_TO(instr) \
2073
5.97k
    (((instr).i_opcode == STORE_FAST || \
2074
5.97k
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2075
5.97k
     ? (instr).i_oparg : -1)
2076
2077
static int
2078
next_swappable_instruction(basicblock *block, int i, int lineno)
2079
144k
{
2080
144k
    while (++i < block->b_iused) {
2081
129k
        cfg_instr *instruction = &block->b_instr[i];
2082
129k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2083
            // Optimizing across this instruction could cause user-visible
2084
            // changes in the names bound between line tracing events!
2085
73
            return -1;
2086
73
        }
2087
129k
        if (instruction->i_opcode == NOP) {
2088
270
            continue;
2089
270
        }
2090
129k
        if (SWAPPABLE(instruction->i_opcode)) {
2091
34.6k
            return i;
2092
34.6k
        }
2093
94.9k
        return -1;
2094
129k
    }
2095
14.3k
    return -1;
2096
144k
}
2097
2098
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2099
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2100
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2101
static void
2102
apply_static_swaps(basicblock *block, int i)
2103
112k
{
2104
    // SWAPs are to our left, and potential swaperands are to our right:
2105
123k
    for (; 0 <= i; i--) {
2106
122k
        assert(i < block->b_iused);
2107
122k
        cfg_instr *swap = &block->b_instr[i];
2108
122k
        if (swap->i_opcode != SWAP) {
2109
10.7k
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2110
                // Nope, but we know how to handle these. Keep looking:
2111
8.49k
                continue;
2112
8.49k
            }
2113
            // We can't reason about what this instruction does. Bail:
2114
2.28k
            return;
2115
10.7k
        }
2116
111k
        int j = next_swappable_instruction(block, i, -1);
2117
111k
        if (j < 0) {
2118
84.2k
            return;
2119
84.2k
        }
2120
27.5k
        int k = j;
2121
27.5k
        int lineno = block->b_instr[j].i_loc.lineno;
2122
34.6k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2123
32.3k
            k = next_swappable_instruction(block, k, lineno);
2124
32.3k
            if (k < 0) {
2125
25.1k
                return;
2126
25.1k
            }
2127
32.3k
        }
2128
        // The reordering is not safe if the two instructions to be swapped
2129
        // store to the same location, or if any intervening instruction stores
2130
        // to the same location as either of them.
2131
2.30k
        int store_j = STORES_TO(block->b_instr[j]);
2132
2.30k
        int store_k = STORES_TO(block->b_instr[k]);
2133
2.30k
        if (store_j >= 0 || store_k >= 0) {
2134
2.20k
            if (store_j == store_k) {
2135
355
                return;
2136
355
            }
2137
3.20k
            for (int idx = j + 1; idx < k; idx++) {
2138
1.36k
                int store_idx = STORES_TO(block->b_instr[idx]);
2139
1.36k
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2140
0
                    return;
2141
0
                }
2142
1.36k
            }
2143
1.84k
        }
2144
2145
        // Success!
2146
1.95k
        INSTR_SET_OP0(swap, NOP);
2147
1.95k
        cfg_instr temp = block->b_instr[j];
2148
1.95k
        block->b_instr[j] = block->b_instr[k];
2149
1.95k
        block->b_instr[k] = temp;
2150
1.95k
    }
2151
112k
}
2152
2153
static int
2154
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2155
563k
{
2156
563k
    assert(PyDict_CheckExact(const_cache));
2157
563k
    assert(PyList_CheckExact(consts));
2158
563k
    int opcode = 0;
2159
563k
    int oparg = 0;
2160
6.22M
    for (int i = 0; i < bb->b_iused; i++) {
2161
5.66M
        cfg_instr *inst = &bb->b_instr[i];
2162
5.66M
        if (inst->i_opcode == LOAD_CONST) {
2163
1.18M
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2164
1.18M
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2165
1.18M
            Py_DECREF(constant);
2166
1.18M
            if (res < 0) {
2167
0
                return ERROR;
2168
0
            }
2169
1.18M
        }
2170
5.66M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2171
5.66M
                                      inst->i_opcode == COPY &&
2172
5.66M
                                      inst->i_oparg == 1);
2173
5.66M
        if (! is_copy_of_load_const) {
2174
5.66M
            opcode = inst->i_opcode;
2175
5.66M
            oparg = inst->i_oparg;
2176
5.66M
        }
2177
5.66M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2178
5.66M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2179
4.47M
            continue;
2180
4.47M
        }
2181
1.18M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2182
1.18M
        switch(nextop) {
2183
3.00k
            case POP_JUMP_IF_FALSE:
2184
3.96k
            case POP_JUMP_IF_TRUE:
2185
5.37k
            case JUMP_IF_FALSE:
2186
6.17k
            case JUMP_IF_TRUE:
2187
6.17k
            {
2188
                /* Remove LOAD_CONST const; conditional jump */
2189
6.17k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2190
6.17k
                if (cnt == NULL) {
2191
0
                    return ERROR;
2192
0
                }
2193
6.17k
                int is_true = PyObject_IsTrue(cnt);
2194
6.17k
                Py_DECREF(cnt);
2195
6.17k
                if (is_true == -1) {
2196
0
                    return ERROR;
2197
0
                }
2198
6.17k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2199
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2200
3.96k
                    INSTR_SET_OP0(inst, NOP);
2201
3.96k
                }
2202
6.17k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2203
6.17k
                if (is_true == jump_if_true) {
2204
2.50k
                    bb->b_instr[i+1].i_opcode = JUMP;
2205
2.50k
                }
2206
3.67k
                else {
2207
3.67k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2208
3.67k
                }
2209
6.17k
                break;
2210
6.17k
            }
2211
6.17k
            case IS_OP:
2212
2.44k
            {
2213
                // Fold to POP_JUMP_IF_NONE:
2214
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2215
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2216
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2217
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2218
                // Fold to POP_JUMP_IF_NOT_NONE:
2219
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2220
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2223
2.44k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2224
2.44k
                if (cnt == NULL) {
2225
0
                    return ERROR;
2226
0
                }
2227
2.44k
                if (!Py_IsNone(cnt)) {
2228
1.40k
                    Py_DECREF(cnt);
2229
1.40k
                    break;
2230
1.40k
                }
2231
1.04k
                if (bb->b_iused <= i + 2) {
2232
0
                    break;
2233
0
                }
2234
1.04k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2235
1.04k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2236
                // Get rid of TO_BOOL regardless:
2237
1.04k
                if (jump_instr->i_opcode == TO_BOOL) {
2238
0
                    INSTR_SET_OP0(jump_instr, NOP);
2239
0
                    if (bb->b_iused <= i + 3) {
2240
0
                        break;
2241
0
                    }
2242
0
                    jump_instr = &bb->b_instr[i + 3];
2243
0
                }
2244
1.04k
                bool invert = is_instr->i_oparg;
2245
1.04k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2246
1.04k
                    invert = !invert;
2247
1.04k
                }
2248
1
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2249
1
                    break;
2250
1
                }
2251
1.04k
                INSTR_SET_OP0(inst, NOP);
2252
1.04k
                INSTR_SET_OP0(is_instr, NOP);
2253
1.04k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2254
1.04k
                                              : POP_JUMP_IF_NONE;
2255
1.04k
                break;
2256
1.04k
            }
2257
4.77k
            case TO_BOOL:
2258
4.77k
            {
2259
4.77k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2260
4.77k
                if (cnt == NULL) {
2261
0
                    return ERROR;
2262
0
                }
2263
4.77k
                int is_true = PyObject_IsTrue(cnt);
2264
4.77k
                Py_DECREF(cnt);
2265
4.77k
                if (is_true == -1) {
2266
0
                    return ERROR;
2267
0
                }
2268
4.77k
                cnt = PyBool_FromLong(is_true);
2269
4.77k
                int index = add_const(cnt, consts, const_cache);
2270
4.77k
                if (index < 0) {
2271
0
                    return ERROR;
2272
0
                }
2273
4.77k
                INSTR_SET_OP0(inst, NOP);
2274
4.77k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2275
4.77k
                break;
2276
4.77k
            }
2277
1.18M
        }
2278
1.18M
    }
2279
563k
    return SUCCESS;
2280
563k
}
2281
2282
static int
2283
66.2k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2284
629k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2285
563k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2286
563k
    }
2287
66.2k
    return SUCCESS;
2288
66.2k
}
2289
2290
static int
2291
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2292
563k
{
2293
563k
    assert(PyDict_CheckExact(const_cache));
2294
563k
    assert(PyList_CheckExact(consts));
2295
563k
    cfg_instr nop;
2296
563k
    INSTR_SET_OP0(&nop, NOP);
2297
6.23M
    for (int i = 0; i < bb->b_iused; i++) {
2298
5.66M
        cfg_instr *inst = &bb->b_instr[i];
2299
5.66M
        cfg_instr *target;
2300
5.66M
        int opcode = inst->i_opcode;
2301
5.66M
        int oparg = inst->i_oparg;
2302
5.66M
        if (HAS_TARGET(opcode)) {
2303
341k
            assert(inst->i_target->b_iused > 0);
2304
341k
            target = &inst->i_target->b_instr[0];
2305
341k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2306
341k
        }
2307
5.32M
        else {
2308
5.32M
            target = &nop;
2309
5.32M
        }
2310
5.66M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2311
5.66M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2312
5.66M
        switch (opcode) {
2313
            /* Try to fold tuples of constants.
2314
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2315
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2316
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2317
104k
            case BUILD_TUPLE:
2318
104k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2319
2.35k
                    switch(oparg) {
2320
68
                        case 1:
2321
68
                            INSTR_SET_OP0(inst, NOP);
2322
68
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2323
68
                            continue;
2324
566
                        case 2:
2325
2.08k
                        case 3:
2326
2.08k
                            INSTR_SET_OP0(inst, NOP);
2327
2.08k
                            bb->b_instr[i+1].i_opcode = SWAP;
2328
2.08k
                            continue;
2329
2.35k
                    }
2330
2.35k
                }
2331
102k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2332
102k
                break;
2333
16.4k
            case BUILD_LIST:
2334
26.1k
            case BUILD_SET:
2335
26.1k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2336
26.1k
                break;
2337
2.15k
            case POP_JUMP_IF_NOT_NONE:
2338
5.19k
            case POP_JUMP_IF_NONE:
2339
5.19k
                switch (target->i_opcode) {
2340
0
                    case JUMP:
2341
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2342
5.19k
                }
2343
5.19k
                break;
2344
127k
            case POP_JUMP_IF_FALSE:
2345
127k
                switch (target->i_opcode) {
2346
1.04k
                    case JUMP:
2347
1.04k
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2348
127k
                }
2349
127k
                break;
2350
127k
            case POP_JUMP_IF_TRUE:
2351
14.5k
                switch (target->i_opcode) {
2352
94
                    case JUMP:
2353
94
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2354
14.5k
                }
2355
14.5k
                break;
2356
14.5k
            case JUMP_IF_FALSE:
2357
1.78k
                switch (target->i_opcode) {
2358
0
                    case JUMP:
2359
0
                    case JUMP_IF_FALSE:
2360
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2361
0
                        continue;
2362
89
                    case JUMP_IF_TRUE:
2363
                        // No need to check for loops here, a block's b_next
2364
                        // cannot point to itself.
2365
89
                        assert(inst->i_target != inst->i_target->b_next);
2366
89
                        inst->i_target = inst->i_target->b_next;
2367
89
                        i--;
2368
89
                        continue;
2369
1.78k
                }
2370
1.69k
                break;
2371
1.96k
            case JUMP_IF_TRUE:
2372
1.96k
                switch (target->i_opcode) {
2373
0
                    case JUMP:
2374
76
                    case JUMP_IF_TRUE:
2375
76
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2376
76
                        continue;
2377
0
                    case JUMP_IF_FALSE:
2378
                        // No need to check for loops here, a block's b_next
2379
                        // cannot point to itself.
2380
0
                        assert(inst->i_target != inst->i_target->b_next);
2381
0
                        inst->i_target = inst->i_target->b_next;
2382
0
                        i--;
2383
0
                        continue;
2384
1.96k
                }
2385
1.88k
                break;
2386
22.6k
            case JUMP:
2387
89.9k
            case JUMP_NO_INTERRUPT:
2388
89.9k
                switch (target->i_opcode) {
2389
378
                    case JUMP:
2390
378
                        i -= jump_thread(bb, inst, target, JUMP);
2391
378
                        continue;
2392
34
                    case JUMP_NO_INTERRUPT:
2393
34
                        i -= jump_thread(bb, inst, target, opcode);
2394
34
                        continue;
2395
89.9k
                }
2396
89.5k
                break;
2397
89.5k
            case FOR_ITER:
2398
4.82k
                if (target->i_opcode == JUMP) {
2399
                    /* This will not work now because the jump (at target) could
2400
                     * be forward or backward and FOR_ITER only jumps forward. We
2401
                     * can re-enable this if ever we implement a backward version
2402
                     * of FOR_ITER.
2403
                     */
2404
                    /*
2405
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2406
                    */
2407
0
                }
2408
4.82k
                break;
2409
151k
            case STORE_FAST:
2410
151k
                if (opcode == nextop &&
2411
151k
                    oparg == bb->b_instr[i+1].i_oparg &&
2412
151k
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2413
1.76k
                    bb->b_instr[i].i_opcode = POP_TOP;
2414
1.76k
                    bb->b_instr[i].i_oparg = 0;
2415
1.76k
                }
2416
151k
                break;
2417
165k
            case SWAP:
2418
165k
                if (oparg == 1) {
2419
0
                    INSTR_SET_OP0(inst, NOP);
2420
0
                }
2421
165k
                break;
2422
165k
            case LOAD_GLOBAL:
2423
134k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2424
1.58k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2425
1.58k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2426
1.58k
                }
2427
134k
                break;
2428
168k
            case COMPARE_OP:
2429
168k
                if (nextop == TO_BOOL) {
2430
38.7k
                    INSTR_SET_OP0(inst, NOP);
2431
38.7k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2432
38.7k
                    continue;
2433
38.7k
                }
2434
130k
                break;
2435
130k
            case CONTAINS_OP:
2436
14.1k
            case IS_OP:
2437
14.1k
                if (nextop == TO_BOOL) {
2438
1.22k
                    INSTR_SET_OP0(inst, NOP);
2439
1.22k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2440
1.22k
                    continue;
2441
1.22k
                }
2442
12.9k
                if (nextop == UNARY_NOT) {
2443
262
                    INSTR_SET_OP0(inst, NOP);
2444
262
                    int inverted = oparg ^ 1;
2445
262
                    assert(inverted == 0 || inverted == 1);
2446
262
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2447
262
                    continue;
2448
262
                }
2449
12.7k
                break;
2450
74.8k
            case TO_BOOL:
2451
74.8k
                if (nextop == TO_BOOL) {
2452
0
                    INSTR_SET_OP0(inst, NOP);
2453
0
                    continue;
2454
0
                }
2455
74.8k
                break;
2456
74.8k
            case UNARY_NOT:
2457
1.88k
                if (nextop == TO_BOOL) {
2458
158
                    INSTR_SET_OP0(inst, NOP);
2459
158
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2460
158
                    continue;
2461
158
                }
2462
1.72k
                if (nextop == UNARY_NOT) {
2463
158
                    INSTR_SET_OP0(inst, NOP);
2464
158
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2465
158
                    continue;
2466
158
                }
2467
1.56k
                _Py_FALLTHROUGH;
2468
18.2k
            case UNARY_INVERT:
2469
257k
            case UNARY_NEGATIVE:
2470
257k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2471
257k
                break;
2472
282k
            case CALL_INTRINSIC_1:
2473
282k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2474
7.13k
                    if (nextop == GET_ITER) {
2475
21
                        INSTR_SET_OP0(inst, NOP);
2476
21
                    }
2477
7.11k
                    else {
2478
7.11k
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2479
7.11k
                    }
2480
7.13k
                }
2481
275k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2482
238k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2483
238k
                }
2484
282k
                break;
2485
700k
            case BINARY_OP:
2486
700k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2487
700k
                break;
2488
5.66M
        }
2489
5.66M
    }
2490
2491
6.17M
    for (int i = 0; i < bb->b_iused; i++) {
2492
5.61M
        cfg_instr *inst = &bb->b_instr[i];
2493
5.61M
        if (inst->i_opcode == SWAP) {
2494
112k
            if (swaptimize(bb, &i) < 0) {
2495
0
                goto error;
2496
0
            }
2497
112k
            apply_static_swaps(bb, i);
2498
112k
        }
2499
5.61M
    }
2500
563k
    return SUCCESS;
2501
0
error:
2502
0
    return ERROR;
2503
563k
}
2504
2505
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2506
2507
static int
2508
remove_redundant_nops_and_jumps(cfg_builder *g)
2509
138k
{
2510
138k
    int removed_nops, removed_jumps;
2511
147k
    do {
2512
        /* Convergence is guaranteed because the number of
2513
         * redundant jumps and nops only decreases.
2514
         */
2515
147k
        removed_nops = remove_redundant_nops(g);
2516
147k
        RETURN_IF_ERROR(removed_nops);
2517
147k
        removed_jumps = remove_redundant_jumps(g);
2518
147k
        RETURN_IF_ERROR(removed_jumps);
2519
147k
    } while(removed_nops + removed_jumps > 0);
2520
138k
    return SUCCESS;
2521
138k
}
2522
2523
/* Perform optimizations on a control flow graph.
2524
   The consts object should still be in list form to allow new constants
2525
   to be appended.
2526
2527
   Code trasnformations that reduce code size initially fill the gaps with
2528
   NOPs.  Later those NOPs are removed.
2529
*/
2530
static int
2531
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2532
66.2k
{
2533
66.2k
    assert(PyDict_CheckExact(const_cache));
2534
66.2k
    RETURN_IF_ERROR(check_cfg(g));
2535
66.2k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2536
66.2k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2537
66.2k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2538
66.2k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2539
629k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2540
563k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2541
563k
    }
2542
66.2k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2543
66.2k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2544
66.2k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2545
66.2k
    assert(no_redundant_jumps(g));
2546
66.2k
    return SUCCESS;
2547
66.2k
}
2548
2549
static void
2550
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2551
60.4k
{
2552
60.4k
    int32_t line1 = inst1->i_loc.lineno;
2553
60.4k
    int32_t line2 = inst2->i_loc.lineno;
2554
    /* Skip if instructions are on different lines */
2555
60.4k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2556
608
        return;
2557
608
    }
2558
59.8k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2559
32.0k
        return;
2560
32.0k
    }
2561
27.7k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2562
27.7k
    INSTR_SET_OP0(inst2, NOP);
2563
27.7k
}
2564
2565
static int
2566
insert_superinstructions(cfg_builder *g)
2567
66.2k
{
2568
629k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2569
2570
4.81M
        for (int i = 0; i < b->b_iused; i++) {
2571
4.25M
            cfg_instr *inst = &b->b_instr[i];
2572
4.25M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2573
4.25M
            switch(inst->i_opcode) {
2574
25.3k
                case LOAD_FAST:
2575
25.3k
                    if (nextop == LOAD_FAST) {
2576
3.74k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2577
3.74k
                    }
2578
25.3k
                    break;
2579
132k
                case STORE_FAST:
2580
132k
                    switch (nextop) {
2581
9.85k
                        case LOAD_FAST:
2582
9.85k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2583
9.85k
                            break;
2584
46.8k
                        case STORE_FAST:
2585
46.8k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2586
46.8k
                            break;
2587
132k
                    }
2588
132k
                    break;
2589
4.25M
            }
2590
4.25M
        }
2591
563k
    }
2592
66.2k
    int res = remove_redundant_nops(g);
2593
66.2k
    assert(no_redundant_nops(g));
2594
66.2k
    return res;
2595
66.2k
}
2596
2597
#define NOT_LOCAL -1
2598
150k
#define DUMMY_INSTR -1
2599
2600
typedef struct {
2601
    // Index of instruction that produced the reference or DUMMY_INSTR.
2602
    int instr;
2603
2604
    // The local to which the reference refers or NOT_LOCAL.
2605
    int local;
2606
} ref;
2607
2608
typedef struct {
2609
    ref *refs;
2610
    Py_ssize_t size;
2611
    Py_ssize_t capacity;
2612
} ref_stack;
2613
2614
static int
2615
ref_stack_push(ref_stack *stack, ref r)
2616
15.3M
{
2617
15.3M
    if (stack->size == stack->capacity) {
2618
66.3k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2619
66.3k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2620
66.3k
        if (refs == NULL) {
2621
0
            PyErr_NoMemory();
2622
0
            return -1;
2623
0
        }
2624
66.3k
        stack->refs = refs;
2625
66.3k
        stack->capacity = new_cap;
2626
66.3k
    }
2627
15.3M
    stack->refs[stack->size] = r;
2628
15.3M
    stack->size++;
2629
15.3M
    return 0;
2630
15.3M
}
2631
2632
static ref
2633
ref_stack_pop(ref_stack *stack)
2634
2.47M
{
2635
2.47M
    assert(stack->size > 0);
2636
2.47M
    stack->size--;
2637
2.47M
    ref r = stack->refs[stack->size];
2638
2.47M
    return r;
2639
2.47M
}
2640
2641
static void
2642
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2643
109k
{
2644
109k
    Py_ssize_t idx = stack->size - off;
2645
109k
    assert(idx >= 0 && idx < stack->size);
2646
109k
    ref tmp = stack->refs[idx];
2647
109k
    stack->refs[idx] = stack->refs[stack->size - 1];
2648
109k
    stack->refs[stack->size - 1] = tmp;
2649
109k
}
2650
2651
static ref
2652
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2653
13.6M
{
2654
13.6M
    assert(idx >= 0 && idx < stack->size);
2655
13.6M
    return stack->refs[idx];
2656
13.6M
}
2657
2658
static void
2659
ref_stack_clear(ref_stack *stack)
2660
302k
{
2661
302k
    stack->size = 0;
2662
302k
}
2663
2664
static void
2665
ref_stack_fini(ref_stack *stack)
2666
66.2k
{
2667
66.2k
    if (stack->refs != NULL) {
2668
66.1k
        PyMem_Free(stack->refs);
2669
66.1k
    }
2670
66.2k
    stack->refs = NULL;
2671
66.2k
    stack->capacity = 0;
2672
66.2k
    stack->size = 0;
2673
66.2k
}
2674
2675
typedef enum {
2676
    // The loaded reference is still on the stack when the local is killed
2677
    SUPPORT_KILLED  = 1,
2678
    // The loaded reference is stored into a local
2679
    STORED_AS_LOCAL = 2,
2680
    // The loaded reference is still on the stack at the end of the basic block
2681
    REF_UNCONSUMED  = 4,
2682
} LoadFastInstrFlag;
2683
2684
static void
2685
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2686
158k
{
2687
744k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2688
586k
        ref r = ref_stack_at(refs, i);
2689
586k
        if (r.local == local) {
2690
800
            assert(r.instr >= 0);
2691
800
            instr_flags[r.instr] |= SUPPORT_KILLED;
2692
800
        }
2693
586k
    }
2694
158k
}
2695
2696
static void
2697
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2698
150k
{
2699
150k
    kill_local(instr_flags, refs, local);
2700
150k
    if (r.instr != DUMMY_INSTR) {
2701
139k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2702
139k
    }
2703
150k
}
2704
2705
static void
2706
load_fast_push_block(basicblock ***sp, basicblock *target,
2707
                     Py_ssize_t start_depth)
2708
338k
{
2709
338k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2710
338k
    if (!target->b_visited) {
2711
236k
        target->b_visited = 1;
2712
236k
        *(*sp)++ = target;
2713
236k
    }
2714
338k
}
2715
2716
/*
2717
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2718
 * load borrowed references onto the operand stack.
2719
 *
2720
 * This is only safe when we can prove that the reference in the frame outlives
2721
 * the borrowed reference produced by the instruction. We make this tractable
2722
 * by enforcing the following lifetimes:
2723
 *
2724
 * 1. Borrowed references loaded onto the operand stack live until the end of
2725
 *    the instruction that consumes them from the stack. Any borrowed
2726
 *    references that would escape into the heap (e.g. into frame objects or
2727
 *    generators) are converted into new, strong references.
2728
 *
2729
 * 2. Locals live until they are either killed by an instruction
2730
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2731
 *    via `f_locals` is added to a tuple owned by the frame object.
2732
 *
2733
 * To simplify the problem of detecting which supporting references in the
2734
 * frame are killed by instructions that overwrite locals, we only allow
2735
 * borrowed references to be stored as a local in the frame if they were passed
2736
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2737
 * strong references.
2738
 *
2739
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2740
 * that meet the following criteria:
2741
 *
2742
 * 1. The produced reference must be consumed from the stack before the
2743
 *    supporting reference in the frame is killed.
2744
 *
2745
 * 2. The produced reference cannot be stored as a local.
2746
 *
2747
 * We use abstract interpretation to identify instructions that meet these
2748
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2749
 * stack of abstract references and note any instructions that violate the
2750
 * criteria above. Once we've processed all the instructions in a block, any
2751
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2752
 */
2753
static int
2754
optimize_load_fast(cfg_builder *g)
2755
66.2k
{
2756
66.2k
    int status;
2757
66.2k
    ref_stack refs = {0};
2758
66.2k
    int max_instrs = 0;
2759
66.2k
    basicblock *entryblock = g->g_entryblock;
2760
642k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2761
576k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2762
576k
    }
2763
66.2k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2764
66.2k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2765
66.2k
    if (instr_flags == NULL) {
2766
0
        PyErr_NoMemory();
2767
0
        return ERROR;
2768
0
    }
2769
66.2k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2770
66.2k
    if (blocks == NULL) {
2771
0
        status = ERROR;
2772
0
        goto done;
2773
0
    }
2774
66.2k
    basicblock **sp = blocks;
2775
66.2k
    *sp = entryblock;
2776
66.2k
    sp++;
2777
66.2k
    entryblock->b_startdepth = 0;
2778
66.2k
    entryblock->b_visited = 1;
2779
2780
66.2k
    #define PUSH_REF(instr, local)                \
2781
15.3M
        do {                                      \
2782
15.3M
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2783
0
                status = ERROR;                   \
2784
0
                goto done;                        \
2785
0
            }                                     \
2786
15.3M
        } while(0)
2787
2788
368k
    while (sp != blocks) {
2789
302k
        basicblock *block = *--sp;
2790
302k
        assert(block->b_startdepth > -1);
2791
2792
        // Reset per-block state.
2793
302k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2794
2795
        // Reset the stack of refs. We don't track references on the stack
2796
        // across basic blocks, but the bytecode will expect their
2797
        // presence. Add dummy references as necessary.
2798
302k
        ref_stack_clear(&refs);
2799
13.1M
        for (int i = 0; i < block->b_startdepth; i++) {
2800
12.8M
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2801
12.8M
        }
2802
2803
3.72M
        for (int i = 0; i < block->b_iused; i++) {
2804
3.42M
            cfg_instr *instr = &block->b_instr[i];
2805
3.42M
            int opcode = instr->i_opcode;
2806
3.42M
            int oparg = instr->i_oparg;
2807
3.42M
            assert(opcode != EXTENDED_ARG);
2808
3.42M
            switch (opcode) {
2809
                // Opcodes that load and store locals
2810
4.62k
                case DELETE_FAST: {
2811
4.62k
                    kill_local(instr_flags, &refs, oparg);
2812
4.62k
                    break;
2813
0
                }
2814
2815
53.5k
                case LOAD_FAST: {
2816
53.5k
                    PUSH_REF(i, oparg);
2817
53.5k
                    break;
2818
53.5k
                }
2819
2820
53.5k
                case LOAD_FAST_AND_CLEAR: {
2821
2.89k
                    kill_local(instr_flags, &refs, oparg);
2822
2.89k
                    PUSH_REF(i, oparg);
2823
2.89k
                    break;
2824
2.89k
                }
2825
2826
2.89k
                case LOAD_FAST_LOAD_FAST: {
2827
1.85k
                    PUSH_REF(i, oparg >> 4);
2828
1.85k
                    PUSH_REF(i, oparg & 15);
2829
1.85k
                    break;
2830
1.85k
                }
2831
2832
107k
                case STORE_FAST: {
2833
107k
                    ref r = ref_stack_pop(&refs);
2834
107k
                    store_local(instr_flags, &refs, oparg, r);
2835
107k
                    break;
2836
1.85k
                }
2837
2838
7.72k
                case STORE_FAST_LOAD_FAST: {
2839
                    // STORE_FAST
2840
7.72k
                    ref r = ref_stack_pop(&refs);
2841
7.72k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2842
                    // LOAD_FAST
2843
7.72k
                    PUSH_REF(i, oparg & 15);
2844
7.72k
                    break;
2845
7.72k
                }
2846
2847
17.7k
                case STORE_FAST_STORE_FAST: {
2848
                    // STORE_FAST
2849
17.7k
                    ref r = ref_stack_pop(&refs);
2850
17.7k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2851
                    // STORE_FAST
2852
17.7k
                    r = ref_stack_pop(&refs);
2853
17.7k
                    store_local(instr_flags, &refs, oparg & 15, r);
2854
17.7k
                    break;
2855
7.72k
                }
2856
2857
                // Opcodes that shuffle values on the stack
2858
184k
                case COPY: {
2859
184k
                    assert(oparg > 0);
2860
184k
                    Py_ssize_t idx = refs.size - oparg;
2861
184k
                    ref r = ref_stack_at(&refs, idx);
2862
184k
                    PUSH_REF(r.instr, r.local);
2863
184k
                    break;
2864
184k
                }
2865
2866
184k
                case SWAP: {
2867
109k
                    assert(oparg >= 2);
2868
109k
                    ref_stack_swap_top(&refs, oparg);
2869
109k
                    break;
2870
109k
                }
2871
2872
                // We treat opcodes that do not consume all of their inputs on
2873
                // a case by case basis, as we have no generic way of knowing
2874
                // how many inputs should be left on the stack.
2875
2876
                // Opcodes that consume no inputs
2877
3.62k
                case FORMAT_SIMPLE:
2878
4.05k
                case GET_ANEXT:
2879
8.49k
                case GET_ITER:
2880
24.8k
                case GET_LEN:
2881
24.8k
                case GET_YIELD_FROM_ITER:
2882
31.8k
                case IMPORT_FROM:
2883
31.8k
                case MATCH_KEYS:
2884
32.5k
                case MATCH_MAPPING:
2885
35.7k
                case MATCH_SEQUENCE:
2886
35.7k
                case WITH_EXCEPT_START: {
2887
35.7k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2888
35.7k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2889
35.7k
                    int net_pushed = num_pushed - num_popped;
2890
35.7k
                    assert(net_pushed >= 0);
2891
67.8k
                    for (int i = 0; i < net_pushed; i++) {
2892
32.0k
                        PUSH_REF(i, NOT_LOCAL);
2893
32.0k
                    }
2894
35.7k
                    break;
2895
35.7k
                }
2896
2897
                // Opcodes that consume some inputs and push no new values
2898
35.7k
                case DICT_MERGE:
2899
1.27k
                case DICT_UPDATE:
2900
36.9k
                case LIST_APPEND:
2901
44.2k
                case LIST_EXTEND:
2902
45.1k
                case MAP_ADD:
2903
45.1k
                case RERAISE:
2904
58.7k
                case SET_ADD:
2905
63.0k
                case SET_UPDATE: {
2906
63.0k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2907
63.0k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2908
63.0k
                    int net_popped = num_popped - num_pushed;
2909
63.0k
                    assert(net_popped > 0);
2910
127k
                    for (int i = 0; i < net_popped; i++) {
2911
63.9k
                        ref_stack_pop(&refs);
2912
63.9k
                    }
2913
63.0k
                    break;
2914
63.0k
                }
2915
2916
11.8k
                case END_SEND:
2917
43.8k
                case SET_FUNCTION_ATTRIBUTE: {
2918
43.8k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2919
43.8k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2920
43.8k
                    ref tos = ref_stack_pop(&refs);
2921
43.8k
                    ref_stack_pop(&refs);
2922
43.8k
                    PUSH_REF(tos.instr, tos.local);
2923
43.8k
                    break;
2924
43.8k
                }
2925
2926
                // Opcodes that consume some inputs and push new values
2927
43.8k
                case CHECK_EXC_MATCH: {
2928
0
                    ref_stack_pop(&refs);
2929
0
                    PUSH_REF(i, NOT_LOCAL);
2930
0
                    break;
2931
0
                }
2932
2933
4.43k
                case FOR_ITER: {
2934
4.43k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2935
4.43k
                    PUSH_REF(i, NOT_LOCAL);
2936
4.43k
                    break;
2937
4.43k
                }
2938
2939
13.0k
                case LOAD_ATTR:
2940
13.8k
                case LOAD_SUPER_ATTR: {
2941
13.8k
                    ref self = ref_stack_pop(&refs);
2942
13.8k
                    if (opcode == LOAD_SUPER_ATTR) {
2943
805
                        ref_stack_pop(&refs);
2944
805
                        ref_stack_pop(&refs);
2945
805
                    }
2946
13.8k
                    PUSH_REF(i, NOT_LOCAL);
2947
13.8k
                    if (oparg & 1) {
2948
                        // A method call; conservatively assume that self is pushed
2949
                        // back onto the stack
2950
1.16k
                        PUSH_REF(self.instr, self.local);
2951
1.16k
                    }
2952
13.8k
                    break;
2953
13.8k
                }
2954
2955
17.9k
                case LOAD_SPECIAL:
2956
17.9k
                case PUSH_EXC_INFO: {
2957
17.9k
                    ref tos = ref_stack_pop(&refs);
2958
17.9k
                    PUSH_REF(i, NOT_LOCAL);
2959
17.9k
                    PUSH_REF(tos.instr, tos.local);
2960
17.9k
                    break;
2961
17.9k
                }
2962
2963
17.9k
                case SEND: {
2964
11.8k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2965
11.8k
                    ref_stack_pop(&refs);
2966
11.8k
                    PUSH_REF(i, NOT_LOCAL);
2967
11.8k
                    break;
2968
11.8k
                }
2969
2970
                // Opcodes that consume all of their inputs
2971
2.74M
                default: {
2972
2.74M
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2973
2.74M
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2974
2.74M
                    if (HAS_TARGET(instr->i_opcode)) {
2975
133k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2976
133k
                    }
2977
2.74M
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2978
                        // Block push opcodes only affect the stack when jumping
2979
                        // to the target.
2980
4.87M
                        for (int j = 0; j < num_popped; j++) {
2981
2.12M
                            ref_stack_pop(&refs);
2982
2.12M
                        }
2983
4.85M
                        for (int j = 0; j < num_pushed; j++) {
2984
2.10M
                            PUSH_REF(i, NOT_LOCAL);
2985
2.10M
                        }
2986
2.74M
                    }
2987
2.74M
                    break;
2988
2.74M
                }
2989
3.42M
            }
2990
3.42M
        }
2991
2992
        // Push fallthrough block
2993
302k
        cfg_instr *term = basicblock_last_instr(block);
2994
302k
        if (term != NULL && block->b_next != NULL &&
2995
302k
            !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2996
276k
              IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
2997
188k
            assert(BB_HAS_FALLTHROUGH(block));
2998
188k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
188k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
13.2M
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
12.9M
            ref r = ref_stack_at(&refs, i);
3005
12.9M
            if (r.instr != -1) {
3006
264k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
264k
            }
3008
12.9M
        }
3009
3010
        // Optimize instructions
3011
3.72M
        for (int i = 0; i < block->b_iused; i++) {
3012
3.42M
            if (!instr_flags[i]) {
3013
3.11M
                cfg_instr *instr = &block->b_instr[i];
3014
3.11M
                switch (instr->i_opcode) {
3015
50.5k
                    case LOAD_FAST:
3016
50.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
50.5k
                        break;
3018
1.15k
                    case LOAD_FAST_LOAD_FAST:
3019
1.15k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
1.15k
                        break;
3021
3.06M
                    default:
3022
3.06M
                        break;
3023
3.11M
                }
3024
3.11M
            }
3025
3.42M
        }
3026
302k
    }
3027
3028
66.2k
    #undef PUSH_REF
3029
3030
66.2k
    status = SUCCESS;
3031
3032
66.2k
done:
3033
66.2k
    ref_stack_fini(&refs);
3034
66.2k
    PyMem_Free(instr_flags);
3035
66.2k
    PyMem_Free(blocks);
3036
66.2k
    return status;
3037
66.2k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
1.74M
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
1.74M
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
1.74M
    if (b->b_unsafe_locals_mask != both) {
3048
159k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
159k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
157k
            *(*sp)++ = b;
3053
157k
            b->b_visited = 1;
3054
157k
        }
3055
159k
    }
3056
1.74M
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
449k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
449k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
3.27M
    for (int i = 0; i < b->b_iused; i++) {
3064
2.82M
        cfg_instr *instr = &b->b_instr[i];
3065
2.82M
        assert(instr->i_opcode != EXTENDED_ARG);
3066
2.82M
        if (instr->i_except != NULL) {
3067
1.27M
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
1.27M
        }
3069
2.82M
        if (instr->i_oparg >= 64) {
3070
278k
            continue;
3071
278k
        }
3072
2.54M
        assert(instr->i_oparg >= 0);
3073
2.54M
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
2.54M
        switch (instr->i_opcode) {
3075
9.08k
            case DELETE_FAST:
3076
17.0k
            case LOAD_FAST_AND_CLEAR:
3077
33.0k
            case STORE_FAST_MAYBE_NULL:
3078
33.0k
                unsafe_mask |= bit;
3079
33.0k
                break;
3080
289k
            case STORE_FAST:
3081
289k
                unsafe_mask &= ~bit;
3082
289k
                break;
3083
6.83k
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
6.83k
                unsafe_mask &= ~bit;
3086
6.83k
                break;
3087
62.2k
            case LOAD_FAST:
3088
62.2k
                if (unsafe_mask & bit) {
3089
6.83k
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
6.83k
                }
3091
62.2k
                unsafe_mask &= ~bit;
3092
62.2k
                break;
3093
2.54M
        }
3094
2.54M
    }
3095
449k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
227k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
227k
    }
3098
449k
    cfg_instr *last = basicblock_last_instr(b);
3099
449k
    if (last && is_jump(last)) {
3100
198k
        assert(last->i_target != NULL);
3101
198k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
198k
    }
3103
449k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
203
{
3108
203
    assert(nlocals > 64);
3109
203
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
203
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
203
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
5.43k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
5.23k
        blocknum++;
3121
98.0k
        for (int i = 0; i < b->b_iused; i++) {
3122
92.8k
            cfg_instr *instr = &b->b_instr[i];
3123
92.8k
            assert(instr->i_opcode != EXTENDED_ARG);
3124
92.8k
            int arg = instr->i_oparg;
3125
92.8k
            if (arg < 64) {
3126
84.9k
                continue;
3127
84.9k
            }
3128
7.90k
            assert(arg >= 0);
3129
7.90k
            switch (instr->i_opcode) {
3130
129
                case DELETE_FAST:
3131
178
                case LOAD_FAST_AND_CLEAR:
3132
276
                case STORE_FAST_MAYBE_NULL:
3133
276
                    states[arg - 64] = blocknum - 1;
3134
276
                    break;
3135
3.19k
                case STORE_FAST:
3136
3.19k
                    states[arg - 64] = blocknum;
3137
3.19k
                    break;
3138
509
                case LOAD_FAST:
3139
509
                    if (states[arg - 64] != blocknum) {
3140
282
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
282
                    }
3142
509
                    states[arg - 64] = blocknum;
3143
509
                    break;
3144
7.90k
                    Py_UNREACHABLE();
3145
7.90k
            }
3146
7.90k
        }
3147
5.23k
    }
3148
203
    PyMem_Free(states);
3149
203
    return SUCCESS;
3150
203
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
66.2k
{
3155
66.2k
    assert(PyList_CheckExact(consts));
3156
66.2k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
66.2k
    if (nconsts == 0) {
3158
170
        return SUCCESS;  /* nothing to do */
3159
170
    }
3160
3161
66.0k
    Py_ssize_t *index_map = NULL;
3162
66.0k
    Py_ssize_t *reverse_index_map = NULL;
3163
66.0k
    int err = ERROR;
3164
3165
66.0k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
66.0k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
456k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
390k
        index_map[i] = -1;
3171
390k
    }
3172
    // The first constant may be docstring; keep it always.
3173
66.0k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
629k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
4.80M
        for (int i = 0; i < b->b_iused; i++) {
3178
4.24M
            int opcode = b->b_instr[i].i_opcode;
3179
4.24M
            if (OPCODE_HAS_CONST(opcode)) {
3180
495k
                int index = b->b_instr[i].i_oparg;
3181
495k
                index_map[index] = index;
3182
495k
            }
3183
4.24M
        }
3184
562k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
66.0k
    Py_ssize_t n_used_consts = 0;
3188
522k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
456k
        if (index_map[i] != -1) {
3190
237k
            assert(index_map[i] == i);
3191
237k
            index_map[n_used_consts++] = index_map[i];
3192
237k
        }
3193
456k
    }
3194
66.0k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
30.9k
        err = SUCCESS;
3197
30.9k
        goto end;
3198
30.9k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
35.1k
    assert(n_used_consts < nconsts);
3202
164k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
129k
        Py_ssize_t old_index = index_map[i];
3204
129k
        assert(i <= old_index && old_index < nconsts);
3205
129k
        if (i != old_index) {
3206
84.3k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
0
            assert(value != NULL);
3208
84.3k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
84.3k
        }
3210
129k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
35.1k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
35.1k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
35.1k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
383k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
348k
        reverse_index_map[i] = -1;
3223
348k
    }
3224
164k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
129k
        assert(index_map[i] != -1);
3226
129k
        assert(reverse_index_map[index_map[i]] == -1);
3227
129k
        reverse_index_map[index_map[i]] = i;
3228
129k
    }
3229
3230
474k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
3.38M
        for (int i = 0; i < b->b_iused; i++) {
3232
2.94M
            int opcode = b->b_instr[i].i_opcode;
3233
2.94M
            if (OPCODE_HAS_CONST(opcode)) {
3234
362k
                int index = b->b_instr[i].i_oparg;
3235
362k
                assert(reverse_index_map[index] >= 0);
3236
362k
                assert(reverse_index_map[index] < n_used_consts);
3237
362k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
362k
            }
3239
2.94M
        }
3240
439k
    }
3241
3242
35.1k
    err = SUCCESS;
3243
66.0k
end:
3244
66.0k
    PyMem_Free(index_map);
3245
66.0k
    PyMem_Free(reverse_index_map);
3246
66.0k
    return err;
3247
35.1k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
66.2k
{
3256
66.2k
    if (nlocals == 0) {
3257
28.8k
        return SUCCESS;
3258
28.8k
    }
3259
37.3k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
203
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
203
        nlocals = 64;
3267
203
    }
3268
37.3k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
37.3k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
37.3k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
37.3k
    uint64_t start_mask = 0;
3277
134k
    for (int i = nparams; i < nlocals; i++) {
3278
96.8k
        start_mask |= (uint64_t)1 << i;
3279
96.8k
    }
3280
37.3k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
329k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
291k
        scan_block_for_locals(b, &sp);
3287
291k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
194k
    while (sp > stack) {
3291
157k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
157k
        b->b_visited = 0;
3294
157k
        scan_block_for_locals(b, &sp);
3295
157k
    }
3296
37.3k
    PyMem_Free(stack);
3297
37.3k
    return SUCCESS;
3298
37.3k
}
3299
3300
3301
static int
3302
43.3k
mark_warm(basicblock *entryblock) {
3303
43.3k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
43.3k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
43.3k
    basicblock **sp = stack;
3308
3309
43.3k
    *sp++ = entryblock;
3310
43.3k
    entryblock->b_visited = 1;
3311
348k
    while (sp > stack) {
3312
305k
        basicblock *b = *(--sp);
3313
305k
        assert(!b->b_except_handler);
3314
305k
        b->b_warm = 1;
3315
305k
        basicblock *next = b->b_next;
3316
305k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
187k
            *sp++ = next;
3318
187k
            next->b_visited = 1;
3319
187k
        }
3320
2.69M
        for (int i=0; i < b->b_iused; i++) {
3321
2.39M
            cfg_instr *instr = &b->b_instr[i];
3322
2.39M
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
73.9k
                *sp++ = instr->i_target;
3324
73.9k
                instr->i_target->b_visited = 1;
3325
73.9k
            }
3326
2.39M
        }
3327
305k
    }
3328
43.3k
    PyMem_Free(stack);
3329
43.3k
    return SUCCESS;
3330
43.3k
}
3331
3332
static int
3333
43.3k
mark_cold(basicblock *entryblock) {
3334
584k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
540k
        assert(!b->b_cold && !b->b_warm);
3336
540k
    }
3337
43.3k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
43.3k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
43.3k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
43.3k
    basicblock **sp = stack;
3347
584k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
540k
        if (b->b_except_handler) {
3349
68.5k
            assert(!b->b_warm);
3350
68.5k
            *sp++ = b;
3351
68.5k
            b->b_visited = 1;
3352
68.5k
        }
3353
540k
    }
3354
3355
222k
    while (sp > stack) {
3356
179k
        basicblock *b = *(--sp);
3357
179k
        b->b_cold = 1;
3358
179k
        basicblock *next = b->b_next;
3359
179k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
96.8k
            if (!next->b_warm && !next->b_visited) {
3361
79.8k
                *sp++ = next;
3362
79.8k
                next->b_visited = 1;
3363
79.8k
            }
3364
96.8k
        }
3365
920k
        for (int i = 0; i < b->b_iused; i++) {
3366
741k
            cfg_instr *instr = &b->b_instr[i];
3367
741k
            if (is_jump(instr)) {
3368
83.9k
                assert(i == b->b_iused - 1);
3369
83.9k
                basicblock *target = b->b_instr[i].i_target;
3370
83.9k
                if (!target->b_warm && !target->b_visited) {
3371
30.6k
                    *sp++ = target;
3372
30.6k
                    target->b_visited = 1;
3373
30.6k
                }
3374
83.9k
            }
3375
741k
        }
3376
179k
    }
3377
43.3k
    PyMem_Free(stack);
3378
43.3k
    return SUCCESS;
3379
43.3k
}
3380
3381
3382
static int
3383
66.2k
push_cold_blocks_to_end(cfg_builder *g) {
3384
66.2k
    basicblock *entryblock = g->g_entryblock;
3385
66.2k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
22.8k
        return SUCCESS;
3388
22.8k
    }
3389
43.3k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
43.3k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
595k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
552k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
11.8k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
11.8k
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
11.8k
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
11.8k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
11.8k
                             NO_LOCATION);
3406
11.8k
            explicit_jump->b_cold = 1;
3407
11.8k
            explicit_jump->b_next = b->b_next;
3408
11.8k
            explicit_jump->b_predecessors = 1;
3409
11.8k
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
11.8k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
11.8k
            last->i_target = explicit_jump->b_next;
3414
11.8k
        }
3415
552k
    }
3416
3417
43.3k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
43.3k
    basicblock *cold_blocks = NULL;
3419
43.3k
    basicblock *cold_blocks_tail = NULL;
3420
3421
43.3k
    basicblock *b = entryblock;
3422
78.9k
    while(b->b_next) {
3423
78.9k
        assert(!b->b_cold);
3424
397k
        while (b->b_next && !b->b_next->b_cold) {
3425
318k
            b = b->b_next;
3426
318k
        }
3427
78.9k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
43.3k
            break;
3430
43.3k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
35.5k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
35.5k
        basicblock *b_end = b->b_next;
3436
190k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
155k
            b_end = b_end->b_next;
3438
155k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
35.5k
        assert(b_end && b_end->b_cold);
3442
35.5k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
35.5k
        if (cold_blocks == NULL) {
3445
5.95k
            cold_blocks = b->b_next;
3446
5.95k
        }
3447
29.6k
        else {
3448
29.6k
            cold_blocks_tail->b_next = b->b_next;
3449
29.6k
        }
3450
35.5k
        cold_blocks_tail = b_end;
3451
35.5k
        b->b_next = b_end->b_next;
3452
35.5k
        b_end->b_next = NULL;
3453
35.5k
    }
3454
43.3k
    assert(b != NULL && b->b_next == NULL);
3455
43.3k
    b->b_next = cold_blocks;
3456
3457
43.3k
    if (cold_blocks != NULL) {
3458
5.95k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
5.95k
    }
3460
43.3k
    return SUCCESS;
3461
43.3k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
66.2k
{
3466
66.2k
    basicblock *entryblock = g->g_entryblock;
3467
641k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
4.79M
        for (int i = 0; i < b->b_iused; i++) {
3469
4.22M
            cfg_instr *instr = &b->b_instr[i];
3470
4.22M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
3.19k
                assert(i == b->b_iused - 1);
3472
3.19k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
1.68k
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
3.19k
                location loc = instr->i_loc;
3475
3.19k
                cfg_instr copy = {
3476
3.19k
                            .i_opcode = COPY,
3477
3.19k
                            .i_oparg = 1,
3478
3.19k
                            .i_loc = loc,
3479
3.19k
                            .i_target = NULL,
3480
3.19k
                };
3481
3.19k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3482
3.19k
                cfg_instr to_bool = {
3483
3.19k
                            .i_opcode = TO_BOOL,
3484
3.19k
                            .i_oparg = 0,
3485
3.19k
                            .i_loc = loc,
3486
3.19k
                            .i_target = NULL,
3487
3.19k
                };
3488
3.19k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3489
3.19k
            }
3490
4.22M
        }
3491
575k
    }
3492
66.2k
    return SUCCESS;
3493
66.2k
}
3494
3495
static int
3496
convert_pseudo_ops(cfg_builder *g)
3497
66.2k
{
3498
66.2k
    basicblock *entryblock = g->g_entryblock;
3499
641k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3500
4.84M
        for (int i = 0; i < b->b_iused; i++) {
3501
4.26M
            cfg_instr *instr = &b->b_instr[i];
3502
4.26M
            if (is_block_push(instr)) {
3503
68.5k
                INSTR_SET_OP0(instr, NOP);
3504
68.5k
            }
3505
4.19M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3506
30.3k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3507
30.3k
                instr->i_opcode = LOAD_FAST;
3508
30.3k
            }
3509
4.16M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3510
8.09k
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3511
8.09k
                instr->i_opcode = STORE_FAST;
3512
8.09k
            }
3513
4.26M
        }
3514
575k
    }
3515
66.2k
    return remove_redundant_nops_and_jumps(g);
3516
66.2k
}
3517
3518
static inline bool
3519
1.12M
is_exit_or_eval_check_without_lineno(basicblock *b) {
3520
1.12M
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3521
299k
        return basicblock_has_no_lineno(b);
3522
299k
    }
3523
826k
    else {
3524
826k
        return false;
3525
826k
    }
3526
1.12M
}
3527
3528
3529
/* PEP 626 mandates that the f_lineno of a frame is correct
3530
 * after a frame terminates. It would be prohibitively expensive
3531
 * to continuously update the f_lineno field at runtime,
3532
 * so we make sure that all exiting instruction (raises and returns)
3533
 * have a valid line number, allowing us to compute f_lineno lazily.
3534
 * We can do this by duplicating the exit blocks without line number
3535
 * so that none have more than one predecessor. We can then safely
3536
 * copy the line number from the sole predecessor block.
3537
 */
3538
static int
3539
duplicate_exits_without_lineno(cfg_builder *g)
3540
132k
{
3541
132k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3542
3543
    /* Copy all exit blocks without line number that are targets of a jump.
3544
     */
3545
132k
    basicblock *entryblock = g->g_entryblock;
3546
1.27M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3547
1.13M
        cfg_instr *last = basicblock_last_instr(b);
3548
1.13M
        if (last == NULL) {
3549
116k
            continue;
3550
116k
        }
3551
1.02M
        if (is_jump(last)) {
3552
531k
            basicblock *target = next_nonempty_block(last->i_target);
3553
531k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3554
2.02k
                basicblock *new_target = copy_basicblock(g, target);
3555
2.02k
                if (new_target == NULL) {
3556
0
                    return ERROR;
3557
0
                }
3558
2.02k
                new_target->b_instr[0].i_loc = last->i_loc;
3559
2.02k
                last->i_target = new_target;
3560
2.02k
                target->b_predecessors--;
3561
2.02k
                new_target->b_predecessors = 1;
3562
2.02k
                new_target->b_next = target->b_next;
3563
2.02k
                new_target->b_label.id = next_lbl++;
3564
2.02k
                target->b_next = new_target;
3565
2.02k
            }
3566
531k
        }
3567
1.02M
    }
3568
3569
    /* Any remaining reachable exit blocks without line number can only be reached by
3570
     * fall through, and thus can only have a single predecessor */
3571
1.27M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3572
1.13M
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3573
594k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3574
13.8k
                cfg_instr *last = basicblock_last_instr(b);
3575
13.8k
                assert(last != NULL);
3576
13.8k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3577
13.8k
            }
3578
594k
        }
3579
1.13M
    }
3580
132k
    return SUCCESS;
3581
132k
}
3582
3583
3584
/* If an instruction has no line number, but it's predecessor in the BB does,
3585
 * then copy the line number. If a successor block has no line number, and only
3586
 * one predecessor, then inherit the line number.
3587
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3588
 * Also reduces the size of the line number table,
3589
 * but has no impact on the generated line number events.
3590
 */
3591
static void
3592
132k
propagate_line_numbers(basicblock *entryblock) {
3593
1.27M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3594
1.13M
        cfg_instr *last = basicblock_last_instr(b);
3595
1.13M
        if (last == NULL) {
3596
116k
            continue;
3597
116k
        }
3598
3599
1.02M
        location prev_location = NO_LOCATION;
3600
10.9M
        for (int i = 0; i < b->b_iused; i++) {
3601
9.88M
            if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3602
649k
                b->b_instr[i].i_loc = prev_location;
3603
649k
            }
3604
9.23M
            else {
3605
9.23M
                prev_location = b->b_instr[i].i_loc;
3606
9.23M
            }
3607
9.88M
        }
3608
1.02M
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3609
385k
            if (b->b_next->b_iused > 0) {
3610
384k
                if (b->b_next->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3611
31.5k
                    b->b_next->b_instr[0].i_loc = prev_location;
3612
31.5k
                }
3613
384k
            }
3614
385k
        }
3615
1.02M
        if (is_jump(last)) {
3616
531k
            basicblock *target = last->i_target;
3617
531k
            if (target->b_predecessors == 1) {
3618
103k
                if (target->b_instr[0].i_loc.lineno == NO_LOCATION.lineno) {
3619
22.0k
                    target->b_instr[0].i_loc = prev_location;
3620
22.0k
                }
3621
103k
            }
3622
531k
        }
3623
1.02M
    }
3624
132k
}
3625
3626
static int
3627
resolve_line_numbers(cfg_builder *g, int firstlineno)
3628
132k
{
3629
132k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3630
132k
    propagate_line_numbers(g->g_entryblock);
3631
132k
    return SUCCESS;
3632
132k
}
3633
3634
int
3635
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3636
                        int nlocals, int nparams, int firstlineno)
3637
66.2k
{
3638
66.2k
    assert(cfg_builder_check(g));
3639
    /** Preprocessing **/
3640
    /* Map labels to targets and mark exception handlers */
3641
66.2k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3642
66.2k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3643
66.2k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3644
3645
    /** Optimization **/
3646
66.2k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3647
66.2k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3648
66.2k
    RETURN_IF_ERROR(
3649
66.2k
        add_checks_for_loads_of_uninitialized_variables(
3650
66.2k
            g->g_entryblock, nlocals, nparams));
3651
66.2k
    RETURN_IF_ERROR(insert_superinstructions(g));
3652
3653
66.2k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3654
66.2k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3655
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3656
    // assert(all_exits_have_lineno(g->g_entryblock));
3657
66.2k
    return SUCCESS;
3658
66.2k
}
3659
3660
static int *
3661
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3662
66.2k
{
3663
66.2k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3664
66.2k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3665
66.2k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3666
3667
66.2k
    int noffsets = ncellvars + nfreevars;
3668
66.2k
    int *fixed = PyMem_New(int, noffsets);
3669
66.2k
    if (fixed == NULL) {
3670
0
        PyErr_NoMemory();
3671
0
        return NULL;
3672
0
    }
3673
107k
    for (int i = 0; i < noffsets; i++) {
3674
40.7k
        fixed[i] = nlocals + i;
3675
40.7k
    }
3676
3677
66.2k
    PyObject *varname, *cellindex;
3678
66.2k
    Py_ssize_t pos = 0;
3679
78.5k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3680
12.3k
        PyObject *varindex;
3681
12.3k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3682
0
            goto error;
3683
0
        }
3684
12.3k
        if (varindex == NULL) {
3685
11.5k
            continue;
3686
11.5k
        }
3687
3688
775
        int argoffset = PyLong_AsInt(varindex);
3689
775
        Py_DECREF(varindex);
3690
775
        if (argoffset == -1 && PyErr_Occurred()) {
3691
0
            goto error;
3692
0
        }
3693
3694
775
        int oldindex = PyLong_AsInt(cellindex);
3695
775
        if (oldindex == -1 && PyErr_Occurred()) {
3696
0
            goto error;
3697
0
        }
3698
775
        fixed[oldindex] = argoffset;
3699
775
    }
3700
66.2k
    return fixed;
3701
3702
0
error:
3703
0
    PyMem_Free(fixed);
3704
0
    return NULL;
3705
66.2k
}
3706
3707
#define IS_GENERATOR(CF) \
3708
66.2k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3709
3710
static int
3711
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3712
                           int *fixed, int nfreevars, int code_flags)
3713
66.2k
{
3714
66.2k
    assert(umd->u_firstlineno > 0);
3715
3716
    /* Add the generator prefix instructions. */
3717
66.2k
    if (IS_GENERATOR(code_flags)) {
3718
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3719
         * of 0. This is because RETURN_GENERATOR pushes an element
3720
         * with _PyFrame_StackPush before switching stacks.
3721
         */
3722
3723
4.53k
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3724
4.53k
        cfg_instr make_gen = {
3725
4.53k
            .i_opcode = RETURN_GENERATOR,
3726
4.53k
            .i_oparg = 0,
3727
4.53k
            .i_loc = loc,
3728
4.53k
            .i_target = NULL,
3729
4.53k
        };
3730
4.53k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3731
4.53k
        cfg_instr pop_top = {
3732
4.53k
            .i_opcode = POP_TOP,
3733
4.53k
            .i_oparg = 0,
3734
4.53k
            .i_loc = loc,
3735
4.53k
            .i_target = NULL,
3736
4.53k
        };
3737
4.53k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3738
4.53k
    }
3739
3740
    /* Set up cells for any variable that escapes, to be put in a closure. */
3741
66.2k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3742
66.2k
    if (ncellvars) {
3743
        // umd->u_cellvars has the cells out of order so we sort them
3744
        // before adding the MAKE_CELL instructions.  Note that we
3745
        // adjust for arg cells, which come first.
3746
11.4k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3747
11.4k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3748
11.4k
        if (sorted == NULL) {
3749
0
            PyErr_NoMemory();
3750
0
            return ERROR;
3751
0
        }
3752
23.7k
        for (int i = 0; i < ncellvars; i++) {
3753
12.3k
            sorted[fixed[i]] = i + 1;
3754
12.3k
        }
3755
46.5k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3756
35.1k
            int oldindex = sorted[i] - 1;
3757
35.1k
            if (oldindex == -1) {
3758
22.7k
                continue;
3759
22.7k
            }
3760
12.3k
            cfg_instr make_cell = {
3761
12.3k
                .i_opcode = MAKE_CELL,
3762
                // This will get fixed in offset_derefs().
3763
12.3k
                .i_oparg = oldindex,
3764
12.3k
                .i_loc = NO_LOCATION,
3765
12.3k
                .i_target = NULL,
3766
12.3k
            };
3767
12.3k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3768
0
                PyMem_RawFree(sorted);
3769
0
                return ERROR;
3770
0
            }
3771
12.3k
            ncellsused += 1;
3772
12.3k
        }
3773
11.4k
        PyMem_RawFree(sorted);
3774
11.4k
    }
3775
3776
66.2k
    if (nfreevars) {
3777
17.6k
        cfg_instr copy_frees = {
3778
17.6k
            .i_opcode = COPY_FREE_VARS,
3779
17.6k
            .i_oparg = nfreevars,
3780
17.6k
            .i_loc = NO_LOCATION,
3781
17.6k
            .i_target = NULL,
3782
17.6k
        };
3783
17.6k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3784
17.6k
    }
3785
3786
66.2k
    return SUCCESS;
3787
66.2k
}
3788
3789
static int
3790
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3791
66.2k
{
3792
66.2k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3793
66.2k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3794
66.2k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3795
66.2k
    int noffsets = ncellvars + nfreevars;
3796
3797
    // First deal with duplicates (arg cells).
3798
66.2k
    int numdropped = 0;
3799
107k
    for (int i = 0; i < noffsets ; i++) {
3800
40.7k
        if (fixedmap[i] == i + nlocals) {
3801
40.0k
            fixedmap[i] -= numdropped;
3802
40.0k
        }
3803
775
        else {
3804
            // It was a duplicate (cell/arg).
3805
775
            numdropped += 1;
3806
775
        }
3807
40.7k
    }
3808
3809
    // Then update offsets, either relative to locals or by cell2arg.
3810
641k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3811
4.84M
        for (int i = 0; i < b->b_iused; i++) {
3812
4.26M
            cfg_instr *inst = &b->b_instr[i];
3813
            // This is called before extended args are generated.
3814
4.26M
            assert(inst->i_opcode != EXTENDED_ARG);
3815
4.26M
            int oldoffset = inst->i_oparg;
3816
4.26M
            switch(inst->i_opcode) {
3817
12.3k
                case MAKE_CELL:
3818
42.6k
                case LOAD_CLOSURE:
3819
60.2k
                case LOAD_DEREF:
3820
69.4k
                case STORE_DEREF:
3821
69.4k
                case DELETE_DEREF:
3822
77.7k
                case LOAD_FROM_DICT_OR_DEREF:
3823
77.7k
                    assert(oldoffset >= 0);
3824
77.7k
                    assert(oldoffset < noffsets);
3825
77.7k
                    assert(fixedmap[oldoffset] >= 0);
3826
77.7k
                    inst->i_oparg = fixedmap[oldoffset];
3827
4.26M
            }
3828
4.26M
        }
3829
575k
    }
3830
3831
66.2k
    return numdropped;
3832
66.2k
}
3833
3834
static int
3835
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3836
66.2k
{
3837
66.2k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3838
66.2k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3839
66.2k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3840
66.2k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3841
66.2k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3842
66.2k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3843
66.2k
    assert(INT_MAX - nlocals - ncellvars > 0);
3844
66.2k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3845
66.2k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3846
66.2k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3847
66.2k
    if (cellfixedoffsets == NULL) {
3848
0
        return ERROR;
3849
0
    }
3850
3851
    // This must be called before fix_cell_offsets().
3852
66.2k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3853
0
        PyMem_Free(cellfixedoffsets);
3854
0
        return ERROR;
3855
0
    }
3856
3857
66.2k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3858
66.2k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3859
66.2k
    cellfixedoffsets = NULL;
3860
66.2k
    if (numdropped < 0) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
66.2k
    nlocalsplus -= numdropped;
3865
66.2k
    return nlocalsplus;
3866
66.2k
}
3867
3868
cfg_builder *
3869
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3870
66.2k
{
3871
66.2k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3872
0
        return NULL;
3873
0
    }
3874
66.2k
    cfg_builder *g = _PyCfgBuilder_New();
3875
66.2k
    if (g == NULL) {
3876
0
        return NULL;
3877
0
    }
3878
5.82M
    for (int i = 0; i < seq->s_used; i++) {
3879
5.76M
        seq->s_instrs[i].i_target = 0;
3880
5.76M
    }
3881
5.82M
    for (int i = 0; i < seq->s_used; i++) {
3882
5.76M
        _PyInstruction *instr = &seq->s_instrs[i];
3883
5.76M
        if (HAS_TARGET(instr->i_opcode)) {
3884
356k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3885
356k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3886
356k
        }
3887
5.76M
    }
3888
66.2k
    int offset = 0;
3889
5.82M
    for (int i = 0; i < seq->s_used; i++) {
3890
5.76M
        _PyInstruction *instr = &seq->s_instrs[i];
3891
5.76M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3892
13.2k
            if (seq->s_annotations_code != NULL) {
3893
841
                assert(seq->s_annotations_code->s_labelmap_size == 0
3894
841
                    && seq->s_annotations_code->s_nested == NULL);
3895
3.36k
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3896
2.52k
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3897
2.52k
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3898
2.52k
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3899
0
                        goto error;
3900
0
                    }
3901
2.52k
                }
3902
841
                offset += seq->s_annotations_code->s_used - 1;
3903
841
            }
3904
12.4k
            else {
3905
12.4k
                offset -= 1;
3906
12.4k
            }
3907
13.2k
            continue;
3908
13.2k
        }
3909
5.74M
        if (instr->i_target) {
3910
264k
            jump_target_label lbl_ = {i + offset};
3911
264k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3912
0
                goto error;
3913
0
            }
3914
264k
        }
3915
5.74M
        int opcode = instr->i_opcode;
3916
5.74M
        int oparg = instr->i_oparg;
3917
5.74M
        if (HAS_TARGET(opcode)) {
3918
356k
            oparg += offset;
3919
356k
        }
3920
5.74M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3921
0
            goto error;
3922
0
        }
3923
5.74M
    }
3924
66.2k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3925
0
        goto error;
3926
0
    }
3927
66.2k
    return g;
3928
0
error:
3929
0
    _PyCfgBuilder_Free(g);
3930
0
    return NULL;
3931
66.2k
}
3932
3933
int
3934
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3935
66.2k
{
3936
66.2k
    int lbl = 0;
3937
642k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3938
576k
        b->b_label = (jump_target_label){lbl};
3939
576k
        lbl += 1;
3940
576k
    }
3941
642k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3942
576k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3943
4.91M
        for (int i = 0; i < b->b_iused; i++) {
3944
4.34M
            cfg_instr *instr = &b->b_instr[i];
3945
4.34M
            if (HAS_TARGET(instr->i_opcode)) {
3946
                /* Set oparg to the label id (it will later be mapped to an offset) */
3947
257k
                instr->i_oparg = instr->i_target->b_label.id;
3948
257k
            }
3949
4.34M
            RETURN_IF_ERROR(
3950
4.34M
                _PyInstructionSequence_Addop(
3951
4.34M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3952
3953
4.34M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3954
4.34M
            if (instr->i_except != NULL) {
3955
1.12M
                hi->h_label = instr->i_except->b_label.id;
3956
1.12M
                hi->h_startdepth = instr->i_except->b_startdepth;
3957
1.12M
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3958
1.12M
            }
3959
3.21M
            else {
3960
3.21M
                hi->h_label = -1;
3961
3.21M
            }
3962
4.34M
        }
3963
576k
    }
3964
66.2k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3965
0
        return ERROR;
3966
0
    }
3967
66.2k
    return SUCCESS;
3968
66.2k
}
3969
3970
3971
int
3972
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3973
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3974
                                     int *stackdepth, int *nlocalsplus,
3975
                                     _PyInstructionSequence *seq)
3976
66.2k
{
3977
66.2k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3978
3979
66.2k
    *stackdepth = calculate_stackdepth(g);
3980
66.2k
    if (*stackdepth < 0) {
3981
0
        return ERROR;
3982
0
    }
3983
3984
    /* prepare_localsplus adds instructions for generators that push
3985
     * and pop an item on the stack. This assertion makes sure there
3986
     * is space on the stack for that.
3987
     * It should always be true, because a generator must have at
3988
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
3989
     * which requires stackspace.
3990
     */
3991
66.2k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
3992
3993
66.2k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
3994
66.2k
    if (*nlocalsplus < 0) {
3995
0
        return ERROR;
3996
0
    }
3997
3998
66.2k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3999
4000
    /* Order of basic blocks must have been determined by now */
4001
4002
66.2k
    RETURN_IF_ERROR(normalize_jumps(g));
4003
66.2k
    assert(no_redundant_jumps(g));
4004
4005
    /* Can't modify the bytecode after inserting instructions that produce
4006
     * borrowed references.
4007
     */
4008
66.2k
    RETURN_IF_ERROR(optimize_load_fast(g));
4009
4010
    /* Can't modify the bytecode after computing jump offsets. */
4011
66.2k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4012
0
        return ERROR;
4013
0
    }
4014
4015
66.2k
    return SUCCESS;
4016
66.2k
}
4017
4018
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4019
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4020
 */
4021
int
4022
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4023
0
{
4024
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4025
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4026
0
    return SUCCESS;
4027
0
}
4028
4029
/* Exported API functions */
4030
4031
int
4032
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4033
0
{
4034
0
    stack_effects effs;
4035
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4036
0
        return PY_INVALID_STACK_EFFECT;
4037
0
    }
4038
0
    return effs.net;
4039
0
}
4040
4041
int
4042
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4043
6.17k
{
4044
6.17k
    stack_effects effs;
4045
6.17k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4046
0
        return PY_INVALID_STACK_EFFECT;
4047
0
    }
4048
6.17k
    return effs.net;
4049
6.17k
}
4050
4051
/* Access to compiler optimizations for unit tests.
4052
4053
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4054
 * a CFG, optimizes it and converts back to an instruction list.
4055
 */
4056
4057
static PyObject *
4058
cfg_to_instruction_sequence(cfg_builder *g)
4059
0
{
4060
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4061
0
    if (seq == NULL) {
4062
0
        return NULL;
4063
0
    }
4064
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4065
0
        PyInstructionSequence_Fini(seq);
4066
0
        return NULL;
4067
0
    }
4068
0
    return (PyObject*)seq;
4069
0
}
4070
4071
PyObject *
4072
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4073
0
{
4074
0
    if (!_PyInstructionSequence_Check(seq)) {
4075
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4076
0
        return NULL;
4077
0
    }
4078
0
    PyObject *const_cache = PyDict_New();
4079
0
    if (const_cache == NULL) {
4080
0
        return NULL;
4081
0
    }
4082
4083
0
    PyObject *res = NULL;
4084
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4085
0
    if (g == NULL) {
4086
0
        goto error;
4087
0
    }
4088
0
    int nparams = 0, firstlineno = 1;
4089
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4090
0
                                nparams, firstlineno) < 0) {
4091
0
        goto error;
4092
0
    }
4093
4094
0
    if (calculate_stackdepth(g) == ERROR) {
4095
0
        goto error;
4096
0
    }
4097
4098
0
    if (optimize_load_fast(g) != SUCCESS) {
4099
0
        goto error;
4100
0
    }
4101
4102
0
    res = cfg_to_instruction_sequence(g);
4103
0
error:
4104
0
    Py_DECREF(const_cache);
4105
0
    _PyCfgBuilder_Free(g);
4106
0
    return res;
4107
0
}