Coverage Report

Created: 2025-11-11 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
1.42M
#define SUCCESS 0
19
7.90k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
2.23M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
476k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
488k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
488k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
401
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
2.07M
{
101
2.07M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
2.07M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
2.07M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
1.91M
{
108
1.91M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
1.91M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
47.9k
    do { \
114
47.9k
        assert(OPCODE_HAS_ARG(OP)); \
115
47.9k
        cfg_instr *_instr__ptr_ = (I); \
116
47.9k
        _instr__ptr_->i_opcode = (OP); \
117
47.9k
        _instr__ptr_->i_oparg = (ARG); \
118
47.9k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
102k
    do { \
123
102k
        assert(!OPCODE_HAS_ARG(OP)); \
124
102k
        cfg_instr *_instr__ptr_ = (I); \
125
102k
        _instr__ptr_->i_opcode = (OP); \
126
102k
        _instr__ptr_->i_oparg = 0; \
127
102k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
5.84k
    do { \
131
5.84k
        cfg_instr *_instr__ptr_ = (I); \
132
5.84k
        _instr__ptr_->i_loc = (LOC); \
133
5.84k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
476k
{
144
476k
    assert(b != NULL);
145
476k
    _Py_c_array_t array = {
146
476k
        .array = (void*)b->b_instr,
147
476k
        .allocated_entries = b->b_ialloc,
148
476k
        .item_size = sizeof(cfg_instr),
149
476k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
476k
    };
151
152
476k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
476k
    b->b_instr = array.array;
154
476k
    b->b_ialloc = array.allocated_entries;
155
476k
    return b->b_iused++;
156
476k
}
157
158
static cfg_instr *
159
1.81M
basicblock_last_instr(const basicblock *b) {
160
1.81M
    assert(b->b_iused >= 0);
161
1.81M
    if (b->b_iused > 0) {
162
1.67M
        assert(b->b_instr != NULL);
163
1.67M
        return &b->b_instr[b->b_iused - 1];
164
1.67M
    }
165
138k
    return NULL;
166
1.81M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
56.8k
{
175
56.8k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
56.8k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
56.8k
    b->b_list = g->g_block_list;
182
56.8k
    g->g_block_list = b;
183
56.8k
    b->b_label = NO_LABEL;
184
56.8k
    return b;
185
56.8k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
466k
{
190
466k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
466k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
466k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
466k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
466k
    int off = basicblock_next_instr(b);
196
466k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
466k
    cfg_instr *i = &b->b_instr[off];
200
466k
    i->i_opcode = opcode;
201
466k
    i->i_oparg = oparg;
202
466k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
466k
    assert(i->i_target == NULL);
205
466k
    assert(i->i_except == NULL);
206
207
466k
    return SUCCESS;
208
466k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
1.66k
{
213
1.66k
    cfg_instr *last = basicblock_last_instr(b);
214
1.66k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
1.66k
    RETURN_IF_ERROR(
219
1.66k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
1.66k
    last = basicblock_last_instr(b);
221
1.66k
    assert(last && last->i_opcode == opcode);
222
1.66k
    last->i_target = target;
223
1.66k
    return SUCCESS;
224
1.66k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
4.15k
{
229
10.9k
    for (int i = 0; i < from->b_iused; i++) {
230
6.83k
        int n = basicblock_next_instr(to);
231
6.83k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
6.83k
        to->b_instr[n] = from->b_instr[i];
235
6.83k
    }
236
4.15k
    return SUCCESS;
237
4.15k
}
238
239
static inline int
240
569k
basicblock_nofallthrough(const basicblock *b) {
241
569k
    cfg_instr *last = basicblock_last_instr(b);
242
569k
    return (last &&
243
540k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
355k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
569k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
918k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
729
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
729
    assert(BB_NO_FALLTHROUGH(block));
257
729
    basicblock *result = cfg_builder_new_block(g);
258
729
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
729
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
729
    return result;
265
729
}
266
267
static int
268
3.99k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
3.99k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
66.2k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
62.2k
        block->b_instr[i] = block->b_instr[i-1];
272
62.2k
    }
273
3.99k
    block->b_instr[pos] = *instr;
274
3.99k
    return SUCCESS;
275
3.99k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
47.3k
{
343
47.3k
    assert(block != NULL);
344
47.3k
    g->g_curblock->b_next = block;
345
47.3k
    g->g_curblock = block;
346
47.3k
    return block;
347
47.3k
}
348
349
static inline int
350
110k
basicblock_exits_scope(const basicblock *b) {
351
110k
    cfg_instr *last = basicblock_last_instr(b);
352
110k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
110k
}
354
355
static inline int
356
69.9k
basicblock_has_eval_break(const basicblock *b) {
357
349k
    for (int i = 0; i < b->b_iused; i++) {
358
307k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
28.2k
            return true;
360
28.2k
        }
361
307k
    }
362
41.7k
    return false;
363
69.9k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
473k
{
368
473k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
473k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
40.3k
        return true;
371
40.3k
    }
372
432k
    if (IS_LABEL(g->g_current_label)) {
373
6.95k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
6.95k
            return true;
375
6.95k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
6.95k
    }
382
426k
    return false;
383
432k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
473k
{
388
473k
    if (cfg_builder_current_block_is_terminated(g)) {
389
47.3k
        basicblock *b = cfg_builder_new_block(g);
390
47.3k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
47.3k
        b->b_label = g->g_current_label;
394
47.3k
        g->g_current_label = NO_LABEL;
395
47.3k
        cfg_builder_use_next_block(g, b);
396
47.3k
    }
397
473k
    return SUCCESS;
398
473k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
7.90k
{
424
7.90k
    g->g_block_list = NULL;
425
7.90k
    basicblock *block = cfg_builder_new_block(g);
426
7.90k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
7.90k
    g->g_curblock = g->g_entryblock = block;
430
7.90k
    g->g_current_label = NO_LABEL;
431
7.90k
    return SUCCESS;
432
7.90k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
7.90k
{
437
7.90k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
7.90k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
7.90k
    memset(g, 0, sizeof(cfg_builder));
443
7.90k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
7.90k
    return g;
448
7.90k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
7.90k
{
453
7.90k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
7.90k
    assert(cfg_builder_check(g));
457
7.90k
    basicblock *b = g->g_block_list;
458
64.8k
    while (b != NULL) {
459
56.8k
        if (b->b_instr) {
460
56.8k
            PyMem_Free((void *)b->b_instr);
461
56.8k
        }
462
56.8k
        basicblock *next = b->b_list;
463
56.8k
        PyMem_Free((void *)b);
464
56.8k
        b = next;
465
56.8k
    }
466
7.90k
    PyMem_Free(g);
467
7.90k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
7.90k
{
472
7.90k
    int nblocks = 0;
473
63.1k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
55.2k
        nblocks++;
475
55.2k
    }
476
7.90k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
7.90k
    return SUCCESS;
481
7.90k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
23.8k
{
486
23.8k
    g->g_current_label = lbl;
487
23.8k
    return cfg_builder_maybe_start_new_block(g);
488
23.8k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
449k
{
493
449k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
449k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
449k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
95.7k
{
501
99.6k
    while (b && b->b_iused == 0) {
502
3.91k
        b = b->b_next;
503
3.91k
    }
504
95.7k
    return b;
505
95.7k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
56.8k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
56.8k
    cfg_instr *last = basicblock_last_instr(b);
547
56.8k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
42.1k
        return SUCCESS;
549
42.1k
    }
550
56.8k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
14.7k
    bool is_forward = last->i_target->b_visited == 0;
553
14.7k
    if (is_forward) {
554
13.9k
        RETURN_IF_ERROR(
555
13.9k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
13.9k
        return SUCCESS;
557
13.9k
    }
558
559
857
    int reversed_opcode = 0;
560
857
    switch(last->i_opcode) {
561
37
        case POP_JUMP_IF_NOT_NONE:
562
37
            reversed_opcode = POP_JUMP_IF_NONE;
563
37
            break;
564
39
        case POP_JUMP_IF_NONE:
565
39
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
39
            break;
567
707
        case POP_JUMP_IF_FALSE:
568
707
            reversed_opcode = POP_JUMP_IF_TRUE;
569
707
            break;
570
74
        case POP_JUMP_IF_TRUE:
571
74
            reversed_opcode = POP_JUMP_IF_FALSE;
572
74
            break;
573
857
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
857
    basicblock *target = last->i_target;
579
857
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
857
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
857
    RETURN_IF_ERROR(
584
857
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
857
    RETURN_IF_ERROR(
586
857
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
857
    backwards_jump->b_startdepth = target->b_startdepth;
588
857
    last->i_opcode = reversed_opcode;
589
857
    last->i_target = b->b_next;
590
591
857
    backwards_jump->b_cold = b->b_cold;
592
857
    backwards_jump->b_next = b->b_next;
593
857
    b->b_next = backwards_jump;
594
857
    return SUCCESS;
595
857
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
7.90k
{
601
7.90k
    basicblock *entryblock = g->g_entryblock;
602
63.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
56.0k
        b->b_visited = 0;
604
56.0k
    }
605
64.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
56.8k
        b->b_visited = 1;
607
56.8k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
56.8k
    }
609
7.90k
    return SUCCESS;
610
7.90k
}
611
612
static int
613
7.90k
check_cfg(cfg_builder *g) {
614
63.1k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
504k
        for (int i = 0; i < b->b_iused; i++) {
617
449k
            int opcode = b->b_instr[i].i_opcode;
618
449k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
449k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
48.3k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
48.3k
            }
625
449k
        }
626
55.2k
    }
627
7.90k
    return SUCCESS;
628
7.90k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
30.7k
{
633
30.7k
    int lbl = -1;
634
252k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
221k
        if (b->b_label.id > lbl) {
636
86.5k
            lbl = b->b_label.id;
637
86.5k
        }
638
221k
    }
639
30.7k
    return lbl;
640
30.7k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
7.90k
{
646
7.90k
    int max_label = get_max_label(entryblock);
647
7.90k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
7.90k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
7.90k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
7.90k
    memset(label2block, 0, mapsize);
654
63.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
55.2k
        if (b->b_label.id >= 0) {
656
23.8k
            label2block[b->b_label.id] = b;
657
23.8k
        }
658
55.2k
    }
659
63.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
504k
        for (int i = 0; i < b->b_iused; i++) {
661
449k
            cfg_instr *instr = &b->b_instr[i];
662
449k
            assert(instr->i_target == NULL);
663
449k
            if (HAS_TARGET(instr->i_opcode)) {
664
29.6k
                int lbl = instr->i_oparg;
665
29.6k
                assert(lbl >= 0 && lbl <= max_label);
666
29.6k
                instr->i_target = label2block[lbl];
667
29.6k
                assert(instr->i_target != NULL);
668
29.6k
                assert(instr->i_target->b_label.id == lbl);
669
29.6k
            }
670
449k
        }
671
55.2k
    }
672
7.90k
    PyMem_Free(label2block);
673
7.90k
    return SUCCESS;
674
7.90k
}
675
676
static int
677
7.90k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
63.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
504k
        for (int i=0; i < b->b_iused; i++) {
685
449k
            cfg_instr *instr = &b->b_instr[i];
686
449k
            if (is_block_push(instr)) {
687
3.39k
                instr->i_target->b_except_handler = 1;
688
3.39k
            }
689
449k
        }
690
55.2k
    }
691
7.90k
    return SUCCESS;
692
7.90k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
3.38k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
3.38k
    assert(is_block_push(setup));
704
3.38k
    int opcode = setup->i_opcode;
705
3.38k
    basicblock * target = setup->i_target;
706
3.38k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
1.98k
        target->b_preserve_lasti = 1;
708
1.98k
    }
709
3.38k
    assert(stack->depth <= CO_MAXBLOCKS);
710
3.38k
    stack->handlers[++stack->depth] = target;
711
3.38k
    return target;
712
3.38k
}
713
714
static basicblock *
715
2.76k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
2.76k
    assert(stack->depth > 0);
717
2.76k
    return stack->handlers[--stack->depth];
718
2.76k
}
719
720
static basicblock *
721
47.5k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
47.5k
    return stack->handlers[stack->depth];
723
47.5k
}
724
725
static struct _PyCfgExceptStack *
726
7.90k
make_except_stack(void) {
727
7.90k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
7.90k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
7.90k
    new->depth = 0;
733
7.90k
    new->handlers[0] = NULL;
734
7.90k
    return new;
735
7.90k
}
736
737
static struct _PyCfgExceptStack *
738
18.0k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
18.0k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
18.0k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
18.0k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
18.0k
    return copy;
746
18.0k
}
747
748
static basicblock**
749
59.6k
make_cfg_traversal_stack(basicblock *entryblock) {
750
59.6k
    int nblocks = 0;
751
501k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
441k
        b->b_visited = 0;
753
441k
        nblocks++;
754
441k
    }
755
59.6k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
59.6k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
59.6k
    return stack;
760
59.6k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
418k
{
779
418k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
418k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
418k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
418k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
418k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
418k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
3.38k
        effects->net = 0;
793
3.38k
        return 0;
794
3.38k
    }
795
415k
    effects->net = pushed - popped;
796
415k
    return 0;
797
418k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
58.5k
{
802
58.5k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
58.5k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
47.4k
        assert(b->b_startdepth < 0);
808
47.4k
        b->b_startdepth = depth;
809
47.4k
        *(*sp)++ = b;
810
47.4k
    }
811
58.5k
    return SUCCESS;
812
58.5k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
7.90k
{
820
7.90k
    basicblock *entryblock = g->g_entryblock;
821
63.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
56.0k
        b->b_startdepth = INT_MIN;
823
56.0k
    }
824
7.90k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
7.90k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
7.90k
    int stackdepth = -1;
831
7.90k
    int maxdepth = 0;
832
7.90k
    basicblock **sp = stack;
833
7.90k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
55.3k
    while (sp != stack) {
837
47.4k
        basicblock *b = *--sp;
838
47.4k
        int depth = b->b_startdepth;
839
47.4k
        assert(depth >= 0);
840
47.4k
        basicblock *next = b->b_next;
841
418k
        for (int i = 0; i < b->b_iused; i++) {
842
393k
            cfg_instr *instr = &b->b_instr[i];
843
393k
            stack_effects effects;
844
393k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
393k
            int new_depth = depth + effects.net;
851
393k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
393k
            maxdepth = Py_MAX(maxdepth, depth);
857
393k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
25.7k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
25.7k
                int target_depth = depth + effects.net;
865
25.7k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
25.7k
                maxdepth = Py_MAX(maxdepth, depth);
867
25.7k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
25.7k
            }
871
393k
            depth = new_depth;
872
393k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
393k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
387k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
22.5k
            {
876
                /* remaining code is dead */
877
22.5k
                next = NULL;
878
22.5k
                break;
879
22.5k
            }
880
393k
        }
881
47.4k
        if (next != NULL) {
882
24.9k
            assert(BB_HAS_FALLTHROUGH(b));
883
24.9k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
24.9k
        }
887
47.4k
    }
888
7.90k
    stackdepth = maxdepth;
889
7.90k
error:
890
7.90k
    PyMem_Free(stack);
891
7.90k
    return stackdepth;
892
7.90k
}
893
894
static int
895
7.90k
label_exception_targets(basicblock *entryblock) {
896
7.90k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
7.90k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
7.90k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
7.90k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
7.90k
    except_stack->depth = 0;
907
7.90k
    todo_stack[0] = entryblock;
908
7.90k
    entryblock->b_visited = 1;
909
7.90k
    entryblock->b_exceptstack = except_stack;
910
7.90k
    basicblock **todo = &todo_stack[1];
911
7.90k
    basicblock *handler = NULL;
912
55.4k
    while (todo > todo_stack) {
913
47.5k
        todo--;
914
47.5k
        basicblock *b = todo[0];
915
47.5k
        assert(b->b_visited == 1);
916
47.5k
        except_stack = b->b_exceptstack;
917
47.5k
        assert(except_stack != NULL);
918
47.5k
        b->b_exceptstack = NULL;
919
47.5k
        handler = except_stack_top(except_stack);
920
47.5k
        int last_yield_except_depth = -1;
921
481k
        for (int i = 0; i < b->b_iused; i++) {
922
433k
            cfg_instr *instr = &b->b_instr[i];
923
433k
            if (is_block_push(instr)) {
924
3.38k
                if (!instr->i_target->b_visited) {
925
3.38k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
3.38k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
3.38k
                    instr->i_target->b_exceptstack = copy;
930
3.38k
                    todo[0] = instr->i_target;
931
3.38k
                    instr->i_target->b_visited = 1;
932
3.38k
                    todo++;
933
3.38k
                }
934
3.38k
                handler = push_except_block(except_stack, instr);
935
3.38k
            }
936
430k
            else if (instr->i_opcode == POP_BLOCK) {
937
2.76k
                handler = pop_except_block(except_stack);
938
2.76k
                INSTR_SET_OP0(instr, NOP);
939
2.76k
            }
940
427k
            else if (is_jump(instr)) {
941
24.6k
                instr->i_except = handler;
942
24.6k
                assert(i == b->b_iused -1);
943
24.6k
                if (!instr->i_target->b_visited) {
944
17.8k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
14.6k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
14.6k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
14.6k
                        instr->i_target->b_exceptstack = copy;
950
14.6k
                    }
951
3.16k
                    else {
952
3.16k
                        instr->i_target->b_exceptstack = except_stack;
953
3.16k
                        except_stack = NULL;
954
3.16k
                    }
955
17.8k
                    todo[0] = instr->i_target;
956
17.8k
                    instr->i_target->b_visited = 1;
957
17.8k
                    todo++;
958
17.8k
                }
959
24.6k
            }
960
402k
            else if (instr->i_opcode == YIELD_VALUE) {
961
536
                instr->i_except = handler;
962
536
                last_yield_except_depth = except_stack->depth;
963
536
            }
964
402k
            else if (instr->i_opcode == RESUME) {
965
8.44k
                instr->i_except = handler;
966
8.44k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
536
                    assert(last_yield_except_depth >= 0);
968
536
                    if (last_yield_except_depth == 1) {
969
459
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
459
                    }
971
536
                    last_yield_except_depth = -1;
972
536
                }
973
8.44k
            }
974
393k
            else {
975
393k
                instr->i_except = handler;
976
393k
            }
977
433k
        }
978
47.5k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
18.4k
            assert(except_stack != NULL);
980
18.4k
            b->b_next->b_exceptstack = except_stack;
981
18.4k
            todo[0] = b->b_next;
982
18.4k
            b->b_next->b_visited = 1;
983
18.4k
            todo++;
984
18.4k
        }
985
29.0k
        else if (except_stack != NULL) {
986
25.9k
           PyMem_Free(except_stack);
987
25.9k
        }
988
47.5k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
7.90k
    PyMem_Free(todo_stack);
995
7.90k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
7.90k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
15.8k
remove_unreachable(basicblock *entryblock) {
1006
127k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
111k
        b->b_predecessors = 0;
1008
111k
    }
1009
15.8k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
15.8k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
15.8k
    basicblock **sp = stack;
1014
15.8k
    entryblock->b_predecessors = 1;
1015
15.8k
    *sp++ = entryblock;
1016
15.8k
    entryblock->b_visited = 1;
1017
109k
    while (sp > stack) {
1018
94.0k
        basicblock *b = *(--sp);
1019
94.0k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
47.4k
            if (!b->b_next->b_visited) {
1021
40.5k
                assert(b->b_next->b_predecessors == 0);
1022
40.5k
                *sp++ = b->b_next;
1023
40.5k
                b->b_next->b_visited = 1;
1024
40.5k
            }
1025
47.4k
            b->b_next->b_predecessors++;
1026
47.4k
        }
1027
931k
        for (int i = 0; i < b->b_iused; i++) {
1028
837k
            basicblock *target;
1029
837k
            cfg_instr *instr = &b->b_instr[i];
1030
837k
            if (is_jump(instr) || is_block_push(instr)) {
1031
53.3k
                target = instr->i_target;
1032
53.3k
                if (!target->b_visited) {
1033
37.6k
                    *sp++ = target;
1034
37.6k
                    target->b_visited = 1;
1035
37.6k
                }
1036
53.3k
                target->b_predecessors++;
1037
53.3k
            }
1038
837k
        }
1039
94.0k
    }
1040
15.8k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
127k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
111k
       if (b->b_predecessors == 0) {
1045
17.1k
            b->b_iused = 0;
1046
17.1k
            b->b_except_handler = 0;
1047
17.1k
       }
1048
111k
    }
1049
15.8k
    return SUCCESS;
1050
15.8k
}
1051
1052
static int
1053
310k
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
310k
    int dest = 0;
1056
310k
    int prev_lineno = -1;
1057
2.43M
    for (int src = 0; src < bb->b_iused; src++) {
1058
2.12M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
2.12M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
51.9k
            if (lineno < 0) {
1062
7.10k
                continue;
1063
7.10k
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
44.8k
            if (prev_lineno == lineno) {
1066
36.0k
                continue;
1067
36.0k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
8.77k
            if (src < bb->b_iused - 1) {
1070
7.49k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
7.49k
                if (next_lineno == lineno) {
1072
4.95k
                    continue;
1073
4.95k
                }
1074
2.53k
                if (next_lineno < 0) {
1075
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
0
                    continue;
1077
0
                }
1078
2.53k
            }
1079
1.27k
            else {
1080
1.27k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
1.27k
                if (next) {
1083
1.27k
                    location next_loc = NO_LOCATION;
1084
1.27k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
1.27k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
1.27k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
0
                            continue;
1089
0
                        }
1090
1.27k
                        next_loc = instr->i_loc;
1091
1.27k
                        break;
1092
1.27k
                    }
1093
1.27k
                    if (lineno == next_loc.lineno) {
1094
9
                        continue;
1095
9
                    }
1096
1.27k
                }
1097
1.27k
            }
1098
1099
8.77k
        }
1100
2.07M
        if (dest != src) {
1101
196k
            bb->b_instr[dest] = bb->b_instr[src];
1102
196k
        }
1103
2.07M
        dest++;
1104
2.07M
        prev_lineno = lineno;
1105
2.07M
    }
1106
310k
    assert(dest <= bb->b_iused);
1107
310k
    int num_removed = bb->b_iused - dest;
1108
310k
    bb->b_iused = dest;
1109
310k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
310k
    return num_removed;
1111
310k
}
1112
1113
static int
1114
27.4k
remove_redundant_nops(cfg_builder *g) {
1115
27.4k
    int changes = 0;
1116
282k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
254k
        int change = basicblock_remove_redundant_nops(b);
1118
254k
        RETURN_IF_ERROR(change);
1119
254k
        changes += change;
1120
254k
    }
1121
27.4k
    return changes;
1122
27.4k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
7.90k
{
1127
7.90k
    bool done = false;
1128
1129
15.8k
    while (! done) {
1130
7.90k
        done = true;
1131
7.90k
        cfg_instr *prev_instr = NULL;
1132
7.90k
        cfg_instr *instr = NULL;
1133
63.8k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
55.9k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
55.9k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
24.5k
                instr = NULL;
1138
24.5k
            }
1139
458k
            for (int i = 0; i < b->b_iused; i++) {
1140
402k
                prev_instr = instr;
1141
402k
                instr = &b->b_instr[i];
1142
402k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
402k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
402k
                int opcode = instr->i_opcode;
1145
402k
                bool is_redundant_pair = false;
1146
402k
                if (opcode == POP_TOP) {
1147
9.87k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
9.87k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
9.87k
                }
1154
402k
                if (is_redundant_pair) {
1155
0
                    INSTR_SET_OP0(prev_instr, NOP);
1156
0
                    INSTR_SET_OP0(instr, NOP);
1157
0
                    done = false;
1158
0
                }
1159
402k
            }
1160
55.9k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
40.6k
                instr = NULL;
1162
40.6k
            }
1163
55.9k
        }
1164
7.90k
    }
1165
7.90k
    return SUCCESS;
1166
7.90k
}
1167
1168
static int
1169
19.5k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
19.5k
    int changes = 0;
1179
218k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
198k
        cfg_instr *last = basicblock_last_instr(b);
1181
198k
        if (last == NULL) {
1182
27.0k
            continue;
1183
27.0k
        }
1184
198k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
171k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
24.0k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
24.0k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
24.0k
            basicblock *next = next_nonempty_block(b->b_next);
1192
24.0k
            if (jump_target == next) {
1193
811
                changes++;
1194
811
                INSTR_SET_OP0(last, NOP);
1195
811
            }
1196
24.0k
        }
1197
171k
    }
1198
1199
19.5k
    return changes;
1200
19.5k
}
1201
1202
static inline bool
1203
68.7k
basicblock_has_no_lineno(basicblock *b) {
1204
82.4k
    for (int i = 0; i < b->b_iused; i++) {
1205
75.6k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
62.0k
            return false;
1207
62.0k
        }
1208
75.6k
    }
1209
6.72k
    return true;
1210
68.7k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
2.95k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
88.6k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
88.6k
    cfg_instr *last = basicblock_last_instr(bb);
1223
88.6k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
88.6k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
72.7k
        return 0;
1228
72.7k
    }
1229
15.9k
    basicblock *target = last->i_target;
1230
15.9k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
2.95k
                             target->b_iused <= MAX_COPY_SIZE);
1232
15.9k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
2.95k
                                     !BB_HAS_FALLTHROUGH(target));
1234
15.9k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
3.42k
        assert(is_jump(last));
1236
3.42k
        int removed_jump_opcode = last->i_opcode;
1237
3.42k
        INSTR_SET_OP0(last, NOP);
1238
3.42k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
3.42k
        if (no_lineno_no_fallthrough) {
1240
2.95k
            last = basicblock_last_instr(bb);
1241
2.95k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
1.72k
                removed_jump_opcode == JUMP)
1243
75
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
75
                last->i_opcode = JUMP;
1246
75
            }
1247
2.95k
        }
1248
3.42k
        target->b_predecessors--;
1249
3.42k
        return 1;
1250
3.42k
    }
1251
12.4k
    return 0;
1252
15.9k
}
1253
1254
static int
1255
7.90k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
7.90k
    bool changes;
1257
9.29k
    do {
1258
9.29k
        changes = false;
1259
97.9k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
88.6k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
88.6k
            RETURN_IF_ERROR(res);
1262
88.6k
            if (res) {
1263
3.42k
                changes = true;
1264
3.42k
            }
1265
88.6k
        }
1266
9.29k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
7.90k
    return changes;
1268
7.90k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
812
{
1276
812
    assert(is_jump(inst));
1277
812
    assert(is_jump(target));
1278
812
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
812
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
812
        INSTR_SET_OP0(inst, NOP);
1286
1287
812
        RETURN_IF_ERROR(
1288
812
            basicblock_add_jump(
1289
812
                bb, opcode, target->i_target, target->i_loc));
1290
1291
812
        return true;
1292
812
    }
1293
0
    return false;
1294
812
}
1295
1296
static int
1297
loads_const(int opcode)
1298
22.3k
{
1299
22.3k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
22.3k
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
83.5k
{
1306
83.5k
    PyObject *constant = NULL;
1307
83.5k
    assert(loads_const(opcode));
1308
83.5k
    if (opcode == LOAD_CONST) {
1309
82.4k
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
82.4k
    }
1311
83.5k
    if (opcode == LOAD_SMALL_INT) {
1312
1.09k
        return PyLong_FromLong(oparg);
1313
1.09k
    }
1314
1315
82.4k
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
82.4k
    return Py_NewRef(constant);
1321
82.4k
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
2.76k
{
1327
2.76k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
2.76k
    Py_ssize_t index;
1333
117k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
115k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
817
            break;
1336
817
        }
1337
115k
    }
1338
2.76k
    if (index == PyList_GET_SIZE(consts)) {
1339
1.94k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
1.94k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
1.94k
    }
1349
2.76k
    Py_DECREF(newconst);
1350
2.76k
    return (int)index;
1351
2.76k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
14.7k
{
1363
14.7k
    assert(start < bb->b_iused);
1364
14.7k
    assert(size >= 0);
1365
14.7k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
25.0k
    for (; start >= 0 && size > 0; start--) {
1368
22.3k
        cfg_instr *instr = &bb->b_instr[start];
1369
22.3k
        if (instr->i_opcode == NOP) {
1370
332
            continue;
1371
332
        }
1372
22.0k
        if (!loads_const(instr->i_opcode)) {
1373
12.0k
            return false;
1374
12.0k
        }
1375
9.95k
        instrs[--size] = instr;
1376
9.95k
    }
1377
1378
2.66k
    return size == 0;
1379
14.7k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
2.80k
{
1388
8.51k
    for (int i = 0; i < size; i++) {
1389
5.71k
        cfg_instr *instr = instrs[i];
1390
5.71k
        assert(instr->i_opcode != NOP);
1391
5.71k
        INSTR_SET_OP0(instr, NOP);
1392
5.71k
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
5.71k
    }
1394
2.80k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
78.3k
{
1405
78.3k
    if (PyLong_CheckExact(newconst)) {
1406
26.3k
        int overflow;
1407
26.3k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
26.3k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
26.3k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
20.6k
            assert(_Py_IsImmortal(newconst));
1413
20.6k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
20.6k
            return 1;
1415
20.6k
        }
1416
26.3k
    }
1417
57.6k
    return 0;
1418
78.3k
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
2.43k
{
1426
2.43k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
2.43k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
2.43k
    if (res > 0) {
1432
16
        return SUCCESS;
1433
16
    }
1434
2.41k
    int oparg = add_const(newconst, consts, const_cache);
1435
2.41k
    RETURN_IF_ERROR(oparg);
1436
2.41k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
2.41k
    return SUCCESS;
1438
2.41k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
4.61k
{
1449
    /* Pre-conditions */
1450
4.61k
    assert(PyDict_CheckExact(const_cache));
1451
4.61k
    assert(PyList_CheckExact(consts));
1452
1453
4.61k
    cfg_instr *instr = &bb->b_instr[i];
1454
4.61k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
4.61k
    int seq_size = instr->i_oparg;
1457
4.61k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
0
        return SUCCESS;
1459
0
    }
1460
1461
4.61k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
4.61k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
2.93k
        return SUCCESS;
1465
2.93k
    }
1466
1467
1.67k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
1.67k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
4.71k
    for (int i = 0; i < seq_size; i++) {
1473
3.04k
        cfg_instr *inst = const_instrs[i];
1474
3.04k
        assert(loads_const(inst->i_opcode));
1475
3.04k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
3.04k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
3.04k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
3.04k
    }
1482
1483
1.67k
    nop_out(const_instrs, seq_size);
1484
1.67k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
1.67k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
118
{
1504
118
    assert(PyDict_CheckExact(const_cache));
1505
118
    assert(PyList_CheckExact(consts));
1506
118
    assert(i >= 0);
1507
118
    assert(i < bb->b_iused);
1508
1509
118
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
118
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
118
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
118
    int consts_found = 0;
1514
118
    bool expect_append = true;
1515
1516
304
    for (int pos = i - 1; pos >= 0; pos--) {
1517
304
        cfg_instr *instr = &bb->b_instr[pos];
1518
304
        int opcode = instr->i_opcode;
1519
304
        int oparg = instr->i_oparg;
1520
1521
304
        if (opcode == NOP) {
1522
0
            continue;
1523
0
        }
1524
1525
304
        if (opcode == BUILD_LIST && oparg == 0) {
1526
2
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
0
                return SUCCESS;
1529
0
            }
1530
1531
            /* Sequence start, we are done. */
1532
2
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
2
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
184
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
182
                instr = &bb->b_instr[newpos];
1539
182
                if (instr->i_opcode == NOP) {
1540
0
                    continue;
1541
0
                }
1542
182
                if (loads_const(instr->i_opcode)) {
1543
90
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
90
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
90
                    assert(consts_found > 0);
1549
90
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
90
                }
1551
182
                nop_out(&instr, 1);
1552
182
            }
1553
2
            assert(consts_found == 0);
1554
2
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
2
        }
1556
1557
302
        if (expect_append) {
1558
208
            if (opcode != LIST_APPEND || oparg != 1) {
1559
114
                return SUCCESS;
1560
114
            }
1561
208
        }
1562
94
        else {
1563
94
            if (!loads_const(opcode)) {
1564
2
                return SUCCESS;
1565
2
            }
1566
92
            consts_found++;
1567
92
        }
1568
1569
186
        expect_append = !expect_append;
1570
186
    }
1571
1572
    /* Did not find sequence start. */
1573
0
    return SUCCESS;
1574
118
}
1575
1576
3.02k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
1.51k
{
1592
1.51k
    assert(PyDict_CheckExact(const_cache));
1593
1.51k
    assert(PyList_CheckExact(consts));
1594
1595
1.51k
    cfg_instr *instr = &bb->b_instr[i];
1596
1.51k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
1.51k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
1.51k
    int seq_size = instr->i_oparg;
1600
1.51k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
1.51k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
1.25k
    {
1603
1.25k
        return SUCCESS;
1604
1.25k
    }
1605
1606
258
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
258
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
68
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
0
        }
1612
68
        return SUCCESS;
1613
68
    }
1614
1615
190
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
190
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
1.82k
    for (int i = 0; i < seq_size; i++) {
1621
1.63k
        cfg_instr *inst = const_instrs[i];
1622
1.63k
        assert(loads_const(inst->i_opcode));
1623
1.63k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
1.63k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
1.63k
        PyTuple_SET_ITEM(const_result, i, element);
1629
1.63k
    }
1630
1631
190
    if (instr->i_opcode == BUILD_SET) {
1632
56
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
56
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
56
        Py_SETREF(const_result, frozenset);
1638
56
    }
1639
1640
190
    int index = add_const(const_result, consts, const_cache);
1641
190
    RETURN_IF_ERROR(index);
1642
190
    nop_out(const_instrs, seq_size);
1643
1644
190
    if (contains_or_iter) {
1645
55
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
55
    }
1647
135
    else {
1648
135
        assert(i >= 2);
1649
135
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
135
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
135
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
135
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
135
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
135
    }
1657
190
    return SUCCESS;
1658
190
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
0
{
1667
0
    if (PyTuple_Check(obj)) {
1668
0
        Py_ssize_t i;
1669
0
        limit -= PyTuple_GET_SIZE(obj);
1670
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
0
            if (limit < 0) {
1673
0
                return limit;
1674
0
            }
1675
0
        }
1676
0
    }
1677
0
    return limit;
1678
0
}
1679
1680
139
#define MAX_INT_SIZE           128  /* bits */
1681
0
#define MAX_COLLECTION_SIZE    256  /* items */
1682
15
#define MAX_STR_SIZE          4096  /* characters */
1683
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
36
{
1688
36
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
6
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
36
    ) {
1691
6
        int64_t vbits = _PyLong_NumBits(v);
1692
6
        int64_t wbits = _PyLong_NumBits(w);
1693
6
        assert(vbits >= 0);
1694
6
        assert(wbits >= 0);
1695
6
        if (vbits + wbits > MAX_INT_SIZE) {
1696
0
            return NULL;
1697
0
        }
1698
6
    }
1699
30
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
0
        if (size) {
1702
0
            long n = PyLong_AsLong(v);
1703
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
0
                return NULL;
1705
0
            }
1706
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
0
                return NULL;
1708
0
            }
1709
0
        }
1710
0
    }
1711
30
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
15
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
15
                                               PyBytes_GET_SIZE(w);
1714
15
        if (size) {
1715
15
            long n = PyLong_AsLong(v);
1716
15
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
5
                return NULL;
1718
5
            }
1719
15
        }
1720
15
    }
1721
15
    else if (PyLong_Check(w) &&
1722
15
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
15
    {
1724
15
        return const_folding_safe_multiply(w, v);
1725
15
    }
1726
1727
16
    return PyNumber_Multiply(v, w);
1728
36
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
13
{
1733
13
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
13
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
13
    ) {
1736
13
        int64_t vbits = _PyLong_NumBits(v);
1737
13
        size_t wbits = PyLong_AsSize_t(w);
1738
13
        assert(vbits >= 0);
1739
13
        if (wbits == (size_t)-1) {
1740
0
            return NULL;
1741
0
        }
1742
13
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
0
            return NULL;
1744
0
        }
1745
13
    }
1746
1747
13
    return PyNumber_Power(v, w, Py_None);
1748
13
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
41
{
1753
41
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
41
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
41
    ) {
1756
40
        int64_t vbits = _PyLong_NumBits(v);
1757
40
        size_t wbits = PyLong_AsSize_t(w);
1758
40
        assert(vbits >= 0);
1759
40
        if (wbits == (size_t)-1) {
1760
0
            return NULL;
1761
0
        }
1762
40
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
0
            return NULL;
1764
0
        }
1765
40
    }
1766
1767
41
    return PyNumber_Lshift(v, w);
1768
41
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
0
{
1773
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
0
        return NULL;
1775
0
    }
1776
1777
0
    return PyNumber_Remainder(v, w);
1778
0
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
101
{
1783
101
    assert(left != NULL && right != NULL);
1784
101
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
101
    PyObject *result = NULL;
1787
101
    switch (op) {
1788
11
        case NB_ADD:
1789
11
            result = PyNumber_Add(left, right);
1790
11
            break;
1791
4
        case NB_SUBTRACT:
1792
4
            result = PyNumber_Subtract(left, right);
1793
4
            break;
1794
21
        case NB_MULTIPLY:
1795
21
            result = const_folding_safe_multiply(left, right);
1796
21
            break;
1797
1
        case NB_TRUE_DIVIDE:
1798
1
            result = PyNumber_TrueDivide(left, right);
1799
1
            break;
1800
0
        case NB_FLOOR_DIVIDE:
1801
0
            result = PyNumber_FloorDivide(left, right);
1802
0
            break;
1803
0
        case NB_REMAINDER:
1804
0
            result = const_folding_safe_mod(left, right);
1805
0
            break;
1806
13
        case NB_POWER:
1807
13
            result = const_folding_safe_power(left, right);
1808
13
            break;
1809
41
        case NB_LSHIFT:
1810
41
            result = const_folding_safe_lshift(left, right);
1811
41
            break;
1812
0
        case NB_RSHIFT:
1813
0
            result = PyNumber_Rshift(left, right);
1814
0
            break;
1815
0
        case NB_OR:
1816
0
            result = PyNumber_Or(left, right);
1817
0
            break;
1818
0
        case NB_XOR:
1819
0
            result = PyNumber_Xor(left, right);
1820
0
            break;
1821
0
        case NB_AND:
1822
0
            result = PyNumber_And(left, right);
1823
0
            break;
1824
10
        case NB_SUBSCR:
1825
10
            result = PyObject_GetItem(left, right);
1826
10
            break;
1827
0
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
0
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
101
    }
1833
101
    return result;
1834
101
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
9.04k
{
1839
9.14k
    #define BINOP_OPERAND_COUNT 2
1840
9.04k
    assert(PyDict_CheckExact(const_cache));
1841
9.04k
    assert(PyList_CheckExact(consts));
1842
1843
9.04k
    cfg_instr *binop = &bb->b_instr[i];
1844
9.04k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
9.04k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
9.04k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
8.94k
        return SUCCESS;
1850
8.94k
    }
1851
1852
101
    cfg_instr *lhs_instr = operands_instrs[0];
1853
101
    assert(loads_const(lhs_instr->i_opcode));
1854
101
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
101
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
101
    cfg_instr *rhs_instr = operands_instrs[1];
1860
101
    assert(loads_const(rhs_instr->i_opcode));
1861
101
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
101
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
101
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
101
    Py_DECREF(lhs);
1869
101
    Py_DECREF(rhs);
1870
101
    if (newconst == NULL) {
1871
5
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
5
        PyErr_Clear();
1875
5
        return SUCCESS;
1876
5
    }
1877
1878
96
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
96
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
101
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
663
{
1885
663
    assert(operand != NULL);
1886
663
    assert(
1887
663
        opcode == UNARY_NEGATIVE ||
1888
663
        opcode == UNARY_INVERT ||
1889
663
        opcode == UNARY_NOT ||
1890
663
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
663
    );
1892
663
    PyObject *result;
1893
663
    switch (opcode) {
1894
662
        case UNARY_NEGATIVE:
1895
662
            result = PyNumber_Negative(operand);
1896
662
            break;
1897
1
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
1
            if (PyBool_Check(operand)) {
1900
0
                return NULL;
1901
0
            }
1902
1
            result = PyNumber_Invert(operand);
1903
1
            break;
1904
0
        case UNARY_NOT: {
1905
0
            int r = PyObject_IsTrue(operand);
1906
0
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
0
            result = PyBool_FromLong(!r);
1910
0
            break;
1911
0
        }
1912
0
        case CALL_INTRINSIC_1:
1913
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
0
            result = PyNumber_Positive(operand);
1917
0
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
663
    }
1921
663
    return result;
1922
663
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
843
{
1927
1.50k
    #define UNARYOP_OPERAND_COUNT 1
1928
843
    assert(PyDict_CheckExact(const_cache));
1929
843
    assert(PyList_CheckExact(consts));
1930
843
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
843
    cfg_instr *operand_instr;
1933
843
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
180
        return SUCCESS;
1936
180
    }
1937
1938
843
    assert(loads_const(operand_instr->i_opcode));
1939
663
    PyObject *operand = get_const_value(
1940
663
        operand_instr->i_opcode,
1941
663
        operand_instr->i_oparg,
1942
663
        consts
1943
663
    );
1944
663
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
663
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
663
    Py_DECREF(operand);
1950
663
    if (newconst == NULL) {
1951
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
0
        PyErr_Clear();
1955
0
        return SUCCESS;
1956
0
    }
1957
1958
663
    if (unaryop->i_opcode == UNARY_NOT) {
1959
0
        assert(PyBool_Check(newconst));
1960
0
    }
1961
663
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
663
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
663
}
1964
1965
2.62k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
1.98k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
1.98k
    assert(*ix < block->b_iused);
1975
1.98k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
1.98k
    assert(instructions[0].i_opcode == SWAP);
1979
1.98k
    int depth = instructions[0].i_oparg;
1980
1.98k
    int len = 0;
1981
1.98k
    int more = false;
1982
1.98k
    int limit = block->b_iused - *ix;
1983
2.29k
    while (++len < limit) {
1984
2.29k
        int opcode = instructions[len].i_opcode;
1985
2.29k
        if (opcode == SWAP) {
1986
202
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
202
            more = true;
1988
202
        }
1989
2.08k
        else if (opcode != NOP) {
1990
1.98k
            break;
1991
1.98k
        }
1992
2.29k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
1.98k
    if (!more) {
1995
1.78k
        return SUCCESS;
1996
1.78k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
202
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
202
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
808
    for (int i = 0; i < depth; i++) {
2004
606
        stack[i] = i;
2005
606
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
606
    for (int i = 0; i < len; i++) {
2009
404
        if (instructions[i].i_opcode == SWAP) {
2010
404
            int oparg = instructions[i].i_oparg;
2011
404
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
404
            stack[0] = stack[oparg - 1];
2014
404
            stack[oparg - 1] = top;
2015
404
        }
2016
404
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
202
    int current = len - 1;
2025
808
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
606
        if (stack[i] == VISITED || stack[i] == i) {
2029
404
            continue;
2030
404
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
202
        int j = i;
2037
808
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
808
            if (j) {
2041
404
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
404
                instructions[current].i_opcode = SWAP;
2044
404
                instructions[current--].i_oparg = j + 1;
2045
404
            }
2046
808
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
202
                assert(j == i);
2049
202
                break;
2050
202
            }
2051
606
            int next_j = stack[j];
2052
606
            stack[j] = VISITED;
2053
606
            j = next_j;
2054
606
        }
2055
202
    }
2056
    // NOP out any unused instructions:
2057
202
    while (0 <= current) {
2058
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
0
    }
2060
202
    PyMem_Free(stack);
2061
202
    *ix += len - 1;
2062
202
    return SUCCESS;
2063
202
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
3.27k
    ((opcode) == STORE_FAST || \
2072
3.27k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
3.27k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
463
    (((instr).i_opcode == STORE_FAST || \
2077
463
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
463
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
3.21k
{
2083
3.31k
    while (++i < block->b_iused) {
2084
3.29k
        cfg_instr *instruction = &block->b_instr[i];
2085
3.29k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
14
            return -1;
2089
14
        }
2090
3.27k
        if (instruction->i_opcode == NOP) {
2091
104
            continue;
2092
104
        }
2093
3.17k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
1.43k
            return i;
2095
1.43k
        }
2096
1.73k
        return -1;
2097
3.17k
    }
2098
24
    return -1;
2099
3.21k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
1.98k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
2.29k
    for (; 0 <= i; i--) {
2109
2.18k
        assert(i < block->b_iused);
2110
2.18k
        cfg_instr *swap = &block->b_instr[i];
2111
2.18k
        if (swap->i_opcode != SWAP) {
2112
198
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
99
                continue;
2115
99
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
99
            return;
2118
198
        }
2119
1.98k
        int j = next_swappable_instruction(block, i, -1);
2120
1.98k
        if (j < 0) {
2121
1.02k
            return;
2122
1.02k
        }
2123
966
        int k = j;
2124
966
        int lineno = block->b_instr[j].i_loc.lineno;
2125
1.43k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
1.22k
            k = next_swappable_instruction(block, k, lineno);
2127
1.22k
            if (k < 0) {
2128
754
                return;
2129
754
            }
2130
1.22k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
212
        int store_j = STORES_TO(block->b_instr[j]);
2135
212
        int store_k = STORES_TO(block->b_instr[k]);
2136
212
        if (store_j >= 0 || store_k >= 0) {
2137
212
            if (store_j == store_k) {
2138
0
                return;
2139
0
            }
2140
251
            for (int idx = j + 1; idx < k; idx++) {
2141
39
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
39
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
39
            }
2146
212
        }
2147
2148
        // Success!
2149
212
        INSTR_SET_OP0(swap, NOP);
2150
212
        cfg_instr temp = block->b_instr[j];
2151
212
        block->b_instr[j] = block->b_instr[k];
2152
212
        block->b_instr[k] = temp;
2153
212
    }
2154
1.98k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
55.9k
{
2159
55.9k
    assert(PyDict_CheckExact(const_cache));
2160
55.9k
    assert(PyList_CheckExact(consts));
2161
55.9k
    int opcode = 0;
2162
55.9k
    int oparg = 0;
2163
493k
    for (int i = 0; i < bb->b_iused; i++) {
2164
437k
        cfg_instr *inst = &bb->b_instr[i];
2165
437k
        if (inst->i_opcode == LOAD_CONST) {
2166
75.8k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
75.8k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
75.8k
            Py_DECREF(constant);
2169
75.8k
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
75.8k
        }
2173
437k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
55.0k
                                      inst->i_opcode == COPY &&
2175
42
                                      inst->i_oparg == 1);
2176
437k
        if (! is_copy_of_load_const) {
2177
437k
            opcode = inst->i_opcode;
2178
437k
            oparg = inst->i_oparg;
2179
437k
        }
2180
437k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
437k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
361k
            continue;
2183
361k
        }
2184
75.9k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
75.9k
        switch(nextop) {
2186
155
            case POP_JUMP_IF_FALSE:
2187
158
            case POP_JUMP_IF_TRUE:
2188
158
            case JUMP_IF_FALSE:
2189
158
            case JUMP_IF_TRUE:
2190
158
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
158
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
158
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
158
                int is_true = PyObject_IsTrue(cnt);
2197
158
                Py_DECREF(cnt);
2198
158
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
158
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
158
                    INSTR_SET_OP0(inst, NOP);
2204
158
                }
2205
158
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
158
                if (is_true == jump_if_true) {
2207
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
1
                }
2209
157
                else {
2210
157
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
157
                }
2212
158
                break;
2213
158
            }
2214
1.73k
            case IS_OP:
2215
1.73k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
1.73k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
1.73k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
1.73k
                if (!Py_IsNone(cnt)) {
2231
22
                    Py_DECREF(cnt);
2232
22
                    break;
2233
22
                }
2234
1.71k
                if (bb->b_iused <= i + 2) {
2235
10
                    break;
2236
10
                }
2237
1.70k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.70k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.70k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
1.66k
                    INSTR_SET_OP0(jump_instr, NOP);
2242
1.66k
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
1.66k
                    jump_instr = &bb->b_instr[i + 3];
2246
1.66k
                }
2247
1.70k
                bool invert = is_instr->i_oparg;
2248
1.70k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.60k
                    invert = !invert;
2250
1.60k
                }
2251
105
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
38
                    break;
2253
38
                }
2254
1.66k
                INSTR_SET_OP0(inst, NOP);
2255
1.66k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.66k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.66k
                                              : POP_JUMP_IF_NONE;
2258
1.66k
                break;
2259
1.70k
            }
2260
158
            case TO_BOOL:
2261
158
            {
2262
158
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
158
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
158
                int is_true = PyObject_IsTrue(cnt);
2267
158
                Py_DECREF(cnt);
2268
158
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
158
                cnt = PyBool_FromLong(is_true);
2272
158
                int index = add_const(cnt, consts, const_cache);
2273
158
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
158
                INSTR_SET_OP0(inst, NOP);
2277
158
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
158
                break;
2279
158
            }
2280
75.9k
        }
2281
75.9k
    }
2282
55.9k
    return SUCCESS;
2283
55.9k
}
2284
2285
static int
2286
7.90k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
63.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
55.9k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
55.9k
    }
2290
7.90k
    return SUCCESS;
2291
7.90k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
55.9k
{
2296
55.9k
    assert(PyDict_CheckExact(const_cache));
2297
55.9k
    assert(PyList_CheckExact(consts));
2298
55.9k
    cfg_instr nop;
2299
55.9k
    INSTR_SET_OP0(&nop, NOP);
2300
495k
    for (int i = 0; i < bb->b_iused; i++) {
2301
439k
        cfg_instr *inst = &bb->b_instr[i];
2302
439k
        cfg_instr *target;
2303
439k
        int opcode = inst->i_opcode;
2304
439k
        int oparg = inst->i_oparg;
2305
439k
        if (HAS_TARGET(opcode)) {
2306
28.0k
            assert(inst->i_target->b_iused > 0);
2307
28.0k
            target = &inst->i_target->b_instr[0];
2308
28.0k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
28.0k
        }
2310
411k
        else {
2311
411k
            target = &nop;
2312
411k
        }
2313
439k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
439k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
439k
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
4.71k
            case BUILD_TUPLE:
2321
4.71k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
109
                    switch(oparg) {
2323
0
                        case 1:
2324
0
                            INSTR_SET_OP0(inst, NOP);
2325
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
0
                            continue;
2327
94
                        case 2:
2328
106
                        case 3:
2329
106
                            INSTR_SET_OP0(inst, NOP);
2330
106
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
106
                            continue;
2332
109
                    }
2333
109
                }
2334
4.61k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
4.61k
                break;
2336
1.39k
            case BUILD_LIST:
2337
1.51k
            case BUILD_SET:
2338
1.51k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
1.51k
                break;
2340
882
            case POP_JUMP_IF_NOT_NONE:
2341
1.74k
            case POP_JUMP_IF_NONE:
2342
1.74k
                switch (target->i_opcode) {
2343
79
                    case JUMP:
2344
79
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
1.74k
                }
2346
1.74k
                break;
2347
11.0k
            case POP_JUMP_IF_FALSE:
2348
11.0k
                switch (target->i_opcode) {
2349
674
                    case JUMP:
2350
674
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
11.0k
                }
2352
11.0k
                break;
2353
11.0k
            case POP_JUMP_IF_TRUE:
2354
2.14k
                switch (target->i_opcode) {
2355
59
                    case JUMP:
2356
59
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
2.14k
                }
2358
2.14k
                break;
2359
2.14k
            case JUMP_IF_FALSE:
2360
323
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
0
                    case JUMP_IF_FALSE:
2363
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
0
                        continue;
2365
3
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
3
                        assert(inst->i_target != inst->i_target->b_next);
2369
3
                        inst->i_target = inst->i_target->b_next;
2370
3
                        i--;
2371
3
                        continue;
2372
323
                }
2373
320
                break;
2374
329
            case JUMP_IF_TRUE:
2375
329
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
0
                    case JUMP_IF_TRUE:
2378
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
0
                        continue;
2380
7
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
7
                        assert(inst->i_target != inst->i_target->b_next);
2384
7
                        inst->i_target = inst->i_target->b_next;
2385
7
                        i--;
2386
7
                        continue;
2387
329
                }
2388
322
                break;
2389
4.19k
            case JUMP:
2390
7.16k
            case JUMP_NO_INTERRUPT:
2391
7.16k
                switch (target->i_opcode) {
2392
0
                    case JUMP:
2393
0
                        i -= jump_thread(bb, inst, target, JUMP);
2394
0
                        continue;
2395
0
                    case JUMP_NO_INTERRUPT:
2396
0
                        i -= jump_thread(bb, inst, target, opcode);
2397
0
                        continue;
2398
7.16k
                }
2399
7.16k
                break;
2400
7.16k
            case FOR_ITER:
2401
1.81k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
1.81k
                break;
2412
16.0k
            case STORE_FAST:
2413
16.0k
                if (opcode == nextop &&
2414
1.91k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
31
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
31
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
31
                    bb->b_instr[i].i_oparg = 0;
2418
31
                }
2419
16.0k
                break;
2420
2.18k
            case SWAP:
2421
2.18k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
2.18k
                break;
2425
20.4k
            case LOAD_GLOBAL:
2426
20.4k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
9.99k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
9.99k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
9.99k
                }
2430
20.4k
                break;
2431
7.29k
            case COMPARE_OP:
2432
7.29k
                if (nextop == TO_BOOL) {
2433
3.37k
                    INSTR_SET_OP0(inst, NOP);
2434
3.37k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
3.37k
                    continue;
2436
3.37k
                }
2437
3.92k
                break;
2438
3.92k
            case CONTAINS_OP:
2439
4.74k
            case IS_OP:
2440
4.74k
                if (nextop == TO_BOOL) {
2441
2.20k
                    INSTR_SET_OP0(inst, NOP);
2442
2.20k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
2.20k
                    continue;
2444
2.20k
                }
2445
2.53k
                if (nextop == UNARY_NOT) {
2446
0
                    INSTR_SET_OP0(inst, NOP);
2447
0
                    int inverted = oparg ^ 1;
2448
0
                    assert(inverted == 0 || inverted == 1);
2449
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
0
                    continue;
2451
0
                }
2452
2.53k
                break;
2453
5.74k
            case TO_BOOL:
2454
5.74k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
5.74k
                break;
2459
5.74k
            case UNARY_NOT:
2460
69
                if (nextop == TO_BOOL) {
2461
0
                    INSTR_SET_OP0(inst, NOP);
2462
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
0
                    continue;
2464
0
                }
2465
69
                if (nextop == UNARY_NOT) {
2466
0
                    INSTR_SET_OP0(inst, NOP);
2467
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
0
                    continue;
2469
0
                }
2470
69
                _Py_FALLTHROUGH;
2471
121
            case UNARY_INVERT:
2472
841
            case UNARY_NEGATIVE:
2473
841
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
841
                break;
2475
550
            case CALL_INTRINSIC_1:
2476
550
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
118
                    if (nextop == GET_ITER) {
2478
0
                        INSTR_SET_OP0(inst, NOP);
2479
0
                    }
2480
118
                    else {
2481
118
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
118
                    }
2483
118
                }
2484
432
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
2
                }
2487
550
                break;
2488
9.04k
            case BINARY_OP:
2489
9.04k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
9.04k
                break;
2491
439k
        }
2492
439k
    }
2493
2494
494k
    for (int i = 0; i < bb->b_iused; i++) {
2495
438k
        cfg_instr *inst = &bb->b_instr[i];
2496
438k
        if (inst->i_opcode == SWAP) {
2497
1.98k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
1.98k
            apply_static_swaps(bb, i);
2501
1.98k
        }
2502
438k
    }
2503
55.9k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
55.9k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
17.1k
{
2513
17.1k
    int removed_nops, removed_jumps;
2514
19.5k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
19.5k
        removed_nops = remove_redundant_nops(g);
2519
19.5k
        RETURN_IF_ERROR(removed_nops);
2520
19.5k
        removed_jumps = remove_redundant_jumps(g);
2521
19.5k
        RETURN_IF_ERROR(removed_jumps);
2522
19.5k
    } while(removed_nops + removed_jumps > 0);
2523
17.1k
    return SUCCESS;
2524
17.1k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
7.90k
{
2536
7.90k
    assert(PyDict_CheckExact(const_cache));
2537
7.90k
    RETURN_IF_ERROR(check_cfg(g));
2538
7.90k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
7.90k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
7.90k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
7.90k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
63.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
55.9k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
55.9k
    }
2545
7.90k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
7.90k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
7.90k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
7.90k
    assert(no_redundant_jumps(g));
2549
7.90k
    return SUCCESS;
2550
7.90k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
15.2k
{
2555
15.2k
    int32_t line1 = inst1->i_loc.lineno;
2556
15.2k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
15.2k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
5.52k
        return;
2560
5.52k
    }
2561
9.68k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
958
        return;
2563
958
    }
2564
8.72k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
8.72k
    INSTR_SET_OP0(inst2, NOP);
2566
8.72k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
7.90k
{
2571
63.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
457k
        for (int i = 0; i < b->b_iused; i++) {
2574
401k
            cfg_instr *inst = &b->b_instr[i];
2575
401k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
401k
            switch(inst->i_opcode) {
2577
53.8k
                case LOAD_FAST:
2578
53.8k
                    if (nextop == LOAD_FAST) {
2579
8.09k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
8.09k
                    }
2581
53.8k
                    break;
2582
14.7k
                case STORE_FAST:
2583
14.7k
                    switch (nextop) {
2584
5.55k
                        case LOAD_FAST:
2585
5.55k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
5.55k
                            break;
2587
1.55k
                        case STORE_FAST:
2588
1.55k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
1.55k
                            break;
2590
14.7k
                    }
2591
14.7k
                    break;
2592
401k
            }
2593
401k
        }
2594
55.9k
    }
2595
7.90k
    int res = remove_redundant_nops(g);
2596
7.90k
    assert(no_redundant_nops(g));
2597
7.90k
    return res;
2598
7.90k
}
2599
2600
#define NOT_LOCAL -1
2601
15.4k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
335k
{
2620
335k
    if (stack->size == stack->capacity) {
2621
7.90k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
7.90k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
7.90k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
7.90k
        stack->refs = refs;
2628
7.90k
        stack->capacity = new_cap;
2629
7.90k
    }
2630
335k
    stack->refs[stack->size] = r;
2631
335k
    stack->size++;
2632
335k
    return 0;
2633
335k
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
290k
{
2638
290k
    assert(stack->size > 0);
2639
290k
    stack->size--;
2640
290k
    ref r = stack->refs[stack->size];
2641
290k
    return r;
2642
290k
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
1.35k
{
2647
1.35k
    Py_ssize_t idx = stack->size - off;
2648
1.35k
    assert(idx >= 0 && idx < stack->size);
2649
1.35k
    ref tmp = stack->refs[idx];
2650
1.35k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
1.35k
    stack->refs[stack->size - 1] = tmp;
2652
1.35k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
61.4k
{
2657
61.4k
    assert(idx >= 0 && idx < stack->size);
2658
61.4k
    return stack->refs[idx];
2659
61.4k
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
41.4k
{
2664
41.4k
    stack->size = 0;
2665
41.4k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
7.90k
{
2670
7.90k
    if (stack->refs != NULL) {
2671
7.90k
        PyMem_Free(stack->refs);
2672
7.90k
    }
2673
7.90k
    stack->refs = NULL;
2674
7.90k
    stack->capacity = 0;
2675
7.90k
    stack->size = 0;
2676
7.90k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
15.7k
{
2690
30.5k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
14.8k
        ref r = ref_stack_at(refs, i);
2692
14.8k
        if (r.local == local) {
2693
14
            assert(r.instr >= 0);
2694
14
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
14
        }
2696
14.8k
    }
2697
15.7k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
15.4k
{
2702
15.4k
    kill_local(instr_flags, refs, local);
2703
15.4k
    if (r.instr != DUMMY_INSTR) {
2704
13.5k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
13.5k
    }
2706
15.4k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
43.8k
{
2712
43.8k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
43.8k
    if (!target->b_visited) {
2714
33.5k
        target->b_visited = 1;
2715
33.5k
        *(*sp)++ = target;
2716
33.5k
    }
2717
43.8k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
7.90k
{
2759
7.90k
    int status;
2760
7.90k
    ref_stack refs = {0};
2761
7.90k
    int max_instrs = 0;
2762
7.90k
    basicblock *entryblock = g->g_entryblock;
2763
64.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
56.8k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
56.8k
    }
2766
7.90k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
7.90k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
7.90k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
7.90k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
7.90k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
7.90k
    basicblock **sp = blocks;
2778
7.90k
    *sp = entryblock;
2779
7.90k
    sp++;
2780
7.90k
    entryblock->b_startdepth = 0;
2781
7.90k
    entryblock->b_visited = 1;
2782
2783
7.90k
    #define PUSH_REF(instr, local)                \
2784
335k
        do {                                      \
2785
335k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
335k
        } while(0)
2790
2791
49.3k
    while (sp != blocks) {
2792
41.4k
        basicblock *block = *--sp;
2793
41.4k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
41.4k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
41.4k
        ref_stack_clear(&refs);
2802
78.4k
        for (int i = 0; i < block->b_startdepth; i++) {
2803
37.0k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
37.0k
        }
2805
2806
421k
        for (int i = 0; i < block->b_iused; i++) {
2807
379k
            cfg_instr *instr = &block->b_instr[i];
2808
379k
            int opcode = instr->i_opcode;
2809
379k
            int oparg = instr->i_oparg;
2810
379k
            assert(opcode != EXTENDED_ARG);
2811
379k
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
2
                case DELETE_FAST: {
2814
2
                    kill_local(instr_flags, &refs, oparg);
2815
2
                    break;
2816
0
                }
2817
2818
46.6k
                case LOAD_FAST: {
2819
46.6k
                    PUSH_REF(i, oparg);
2820
46.6k
                    break;
2821
46.6k
                }
2822
2823
46.6k
                case LOAD_FAST_AND_CLEAR: {
2824
317
                    kill_local(instr_flags, &refs, oparg);
2825
317
                    PUSH_REF(i, oparg);
2826
317
                    break;
2827
317
                }
2828
2829
6.96k
                case LOAD_FAST_LOAD_FAST: {
2830
6.96k
                    PUSH_REF(i, oparg >> 4);
2831
6.96k
                    PUSH_REF(i, oparg & 15);
2832
6.96k
                    break;
2833
6.96k
                }
2834
2835
12.6k
                case STORE_FAST: {
2836
12.6k
                    ref r = ref_stack_pop(&refs);
2837
12.6k
                    store_local(instr_flags, &refs, oparg, r);
2838
12.6k
                    break;
2839
6.96k
                }
2840
2841
239
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
239
                    ref r = ref_stack_pop(&refs);
2844
239
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
239
                    PUSH_REF(i, oparg & 15);
2847
239
                    break;
2848
239
                }
2849
2850
1.30k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
1.30k
                    ref r = ref_stack_pop(&refs);
2853
1.30k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
1.30k
                    r = ref_stack_pop(&refs);
2856
1.30k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
1.30k
                    break;
2858
239
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
1.53k
                case COPY: {
2862
1.53k
                    assert(oparg > 0);
2863
1.53k
                    Py_ssize_t idx = refs.size - oparg;
2864
1.53k
                    ref r = ref_stack_at(&refs, idx);
2865
1.53k
                    PUSH_REF(r.instr, r.local);
2866
1.53k
                    break;
2867
1.53k
                }
2868
2869
1.53k
                case SWAP: {
2870
1.35k
                    assert(oparg >= 2);
2871
1.35k
                    ref_stack_swap_top(&refs, oparg);
2872
1.35k
                    break;
2873
1.53k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
1.56k
                case FORMAT_SIMPLE:
2881
1.56k
                case GET_ANEXT:
2882
3.27k
                case GET_ITER:
2883
3.27k
                case GET_LEN:
2884
3.31k
                case GET_YIELD_FROM_ITER:
2885
3.95k
                case IMPORT_FROM:
2886
3.95k
                case MATCH_KEYS:
2887
3.95k
                case MATCH_MAPPING:
2888
3.95k
                case MATCH_SEQUENCE:
2889
3.95k
                case WITH_EXCEPT_START: {
2890
3.95k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
3.95k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
3.95k
                    int net_pushed = num_pushed - num_popped;
2893
3.95k
                    assert(net_pushed >= 0);
2894
6.29k
                    for (int j = 0; j < net_pushed; j++) {
2895
2.34k
                        PUSH_REF(i, NOT_LOCAL);
2896
2.34k
                    }
2897
3.95k
                    break;
2898
3.95k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
3.95k
                case DICT_MERGE:
2902
781
                case DICT_UPDATE:
2903
1.55k
                case LIST_APPEND:
2904
1.77k
                case LIST_EXTEND:
2905
12.5k
                case MAP_ADD:
2906
12.5k
                case RERAISE:
2907
12.7k
                case SET_ADD:
2908
12.7k
                case SET_UPDATE: {
2909
12.7k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
12.7k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
12.7k
                    int net_popped = num_popped - num_pushed;
2912
12.7k
                    assert(net_popped > 0);
2913
36.2k
                    for (int i = 0; i < net_popped; i++) {
2914
23.5k
                        ref_stack_pop(&refs);
2915
23.5k
                    }
2916
12.7k
                    break;
2917
12.7k
                }
2918
2919
54
                case END_SEND:
2920
2.10k
                case SET_FUNCTION_ATTRIBUTE: {
2921
2.10k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
2.10k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
2.10k
                    ref tos = ref_stack_pop(&refs);
2924
2.10k
                    ref_stack_pop(&refs);
2925
2.10k
                    PUSH_REF(tos.instr, tos.local);
2926
2.10k
                    break;
2927
2.10k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
2.10k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
1.77k
                case FOR_ITER: {
2937
1.77k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
1.77k
                    PUSH_REF(i, NOT_LOCAL);
2939
1.77k
                    break;
2940
1.77k
                }
2941
2942
24.5k
                case LOAD_ATTR:
2943
24.7k
                case LOAD_SUPER_ATTR: {
2944
24.7k
                    ref self = ref_stack_pop(&refs);
2945
24.7k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
241
                        ref_stack_pop(&refs);
2947
241
                        ref_stack_pop(&refs);
2948
241
                    }
2949
24.7k
                    PUSH_REF(i, NOT_LOCAL);
2950
24.7k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
9.74k
                        PUSH_REF(self.instr, self.local);
2954
9.74k
                    }
2955
24.7k
                    break;
2956
24.7k
                }
2957
2958
24.7k
                case LOAD_SPECIAL:
2959
306
                case PUSH_EXC_INFO: {
2960
306
                    ref tos = ref_stack_pop(&refs);
2961
306
                    PUSH_REF(i, NOT_LOCAL);
2962
306
                    PUSH_REF(tos.instr, tos.local);
2963
306
                    break;
2964
306
                }
2965
2966
306
                case SEND: {
2967
54
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
54
                    ref_stack_pop(&refs);
2969
54
                    PUSH_REF(i, NOT_LOCAL);
2970
54
                    break;
2971
54
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
262k
                default: {
2975
262k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
262k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
262k
                    if (HAS_TARGET(instr->i_opcode)) {
2978
18.9k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
18.9k
                    }
2980
262k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
484k
                        for (int j = 0; j < num_popped; j++) {
2984
221k
                            ref_stack_pop(&refs);
2985
221k
                        }
2986
457k
                        for (int j = 0; j < num_pushed; j++) {
2987
194k
                            PUSH_REF(i, NOT_LOCAL);
2988
194k
                        }
2989
262k
                    }
2990
262k
                    break;
2991
262k
                }
2992
379k
            }
2993
379k
        }
2994
2995
        // Push fallthrough block
2996
41.4k
        if (BB_HAS_FALLTHROUGH(block)) {
2997
23.0k
            assert(block->b_next != NULL);
2998
23.0k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
23.0k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
86.5k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
45.0k
            ref r = ref_stack_at(&refs, i);
3005
45.0k
            if (r.instr != -1) {
3006
19.1k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
19.1k
            }
3008
45.0k
        }
3009
3010
        // Optimize instructions
3011
421k
        for (int i = 0; i < block->b_iused; i++) {
3012
379k
            if (!instr_flags[i]) {
3013
348k
                cfg_instr *instr = &block->b_instr[i];
3014
348k
                switch (instr->i_opcode) {
3015
45.0k
                    case LOAD_FAST:
3016
45.0k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
45.0k
                        break;
3018
6.88k
                    case LOAD_FAST_LOAD_FAST:
3019
6.88k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
6.88k
                        break;
3021
296k
                    default:
3022
296k
                        break;
3023
348k
                }
3024
348k
            }
3025
379k
        }
3026
41.4k
    }
3027
3028
7.90k
    #undef PUSH_REF
3029
3030
7.90k
    status = SUCCESS;
3031
3032
7.90k
done:
3033
7.90k
    ref_stack_fini(&refs);
3034
7.90k
    PyMem_Free(instr_flags);
3035
7.90k
    PyMem_Free(blocks);
3036
7.90k
    return status;
3037
7.90k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
191k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
191k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
191k
    if (b->b_unsafe_locals_mask != both) {
3048
31.8k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
31.8k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
31.6k
            *(*sp)++ = b;
3053
31.6k
            b->b_visited = 1;
3054
31.6k
        }
3055
31.8k
    }
3056
191k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
83.7k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
83.7k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
586k
    for (int i = 0; i < b->b_iused; i++) {
3064
502k
        cfg_instr *instr = &b->b_instr[i];
3065
502k
        assert(instr->i_opcode != EXTENDED_ARG);
3066
502k
        if (instr->i_except != NULL) {
3067
99.3k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
99.3k
        }
3069
502k
        if (instr->i_oparg >= 64) {
3070
35.4k
            continue;
3071
35.4k
        }
3072
502k
        assert(instr->i_oparg >= 0);
3073
467k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
467k
        switch (instr->i_opcode) {
3075
482
            case DELETE_FAST:
3076
1.12k
            case LOAD_FAST_AND_CLEAR:
3077
2.41k
            case STORE_FAST_MAYBE_NULL:
3078
2.41k
                unsafe_mask |= bit;
3079
2.41k
                break;
3080
31.1k
            case STORE_FAST:
3081
31.1k
                unsafe_mask &= ~bit;
3082
31.1k
                break;
3083
137
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
137
                unsafe_mask &= ~bit;
3086
137
                break;
3087
104k
            case LOAD_FAST:
3088
104k
                if (unsafe_mask & bit) {
3089
137
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
137
                }
3091
104k
                unsafe_mask &= ~bit;
3092
104k
                break;
3093
467k
        }
3094
467k
    }
3095
83.7k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
44.7k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
44.7k
    }
3098
83.7k
    cfg_instr *last = basicblock_last_instr(b);
3099
83.7k
    if (last && is_jump(last)) {
3100
40.9k
        assert(last->i_target != NULL);
3101
40.9k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
40.9k
    }
3103
83.7k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
0
{
3108
0
    assert(nlocals > 64);
3109
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
0
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
0
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
0
        blocknum++;
3121
0
        for (int i = 0; i < b->b_iused; i++) {
3122
0
            cfg_instr *instr = &b->b_instr[i];
3123
0
            assert(instr->i_opcode != EXTENDED_ARG);
3124
0
            int arg = instr->i_oparg;
3125
0
            if (arg < 64) {
3126
0
                continue;
3127
0
            }
3128
0
            assert(arg >= 0);
3129
0
            switch (instr->i_opcode) {
3130
0
                case DELETE_FAST:
3131
0
                case LOAD_FAST_AND_CLEAR:
3132
0
                case STORE_FAST_MAYBE_NULL:
3133
0
                    states[arg - 64] = blocknum - 1;
3134
0
                    break;
3135
0
                case STORE_FAST:
3136
0
                    states[arg - 64] = blocknum;
3137
0
                    break;
3138
0
                case LOAD_FAST:
3139
0
                    if (states[arg - 64] != blocknum) {
3140
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
0
                    }
3142
0
                    states[arg - 64] = blocknum;
3143
0
                    break;
3144
0
                    Py_UNREACHABLE();
3145
0
            }
3146
0
        }
3147
0
    }
3148
0
    PyMem_Free(states);
3149
0
    return SUCCESS;
3150
0
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
7.90k
{
3155
7.90k
    assert(PyList_CheckExact(consts));
3156
7.90k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
7.90k
    if (nconsts == 0) {
3158
0
        return SUCCESS;  /* nothing to do */
3159
0
    }
3160
3161
7.90k
    Py_ssize_t *index_map = NULL;
3162
7.90k
    Py_ssize_t *reverse_index_map = NULL;
3163
7.90k
    int err = ERROR;
3164
3165
7.90k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
7.90k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
58.7k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
50.8k
        index_map[i] = -1;
3171
50.8k
    }
3172
    // The first constant may be docstring; keep it always.
3173
7.90k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
63.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
457k
        for (int i = 0; i < b->b_iused; i++) {
3178
401k
            int opcode = b->b_instr[i].i_opcode;
3179
401k
            if (OPCODE_HAS_CONST(opcode)) {
3180
51.3k
                int index = b->b_instr[i].i_oparg;
3181
51.3k
                index_map[index] = index;
3182
51.3k
            }
3183
401k
        }
3184
55.9k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
7.90k
    Py_ssize_t n_used_consts = 0;
3188
66.6k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
58.7k
        if (index_map[i] != -1) {
3190
44.2k
            assert(index_map[i] == i);
3191
44.2k
            index_map[n_used_consts++] = index_map[i];
3192
44.2k
        }
3193
58.7k
    }
3194
7.90k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
3.23k
        err = SUCCESS;
3197
3.23k
        goto end;
3198
3.23k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
7.90k
    assert(n_used_consts < nconsts);
3202
39.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
35.0k
        Py_ssize_t old_index = index_map[i];
3204
35.0k
        assert(i <= old_index && old_index < nconsts);
3205
35.0k
        if (i != old_index) {
3206
25.2k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
25.2k
            assert(value != NULL);
3208
25.2k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
25.2k
        }
3210
35.0k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
4.66k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
4.66k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
4.66k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
54.2k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
49.5k
        reverse_index_map[i] = -1;
3223
49.5k
    }
3224
39.7k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
35.0k
        assert(index_map[i] != -1);
3226
35.0k
        assert(reverse_index_map[index_map[i]] == -1);
3227
35.0k
        reverse_index_map[index_map[i]] = i;
3228
35.0k
    }
3229
3230
45.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
351k
        for (int i = 0; i < b->b_iused; i++) {
3232
311k
            int opcode = b->b_instr[i].i_opcode;
3233
311k
            if (OPCODE_HAS_CONST(opcode)) {
3234
41.7k
                int index = b->b_instr[i].i_oparg;
3235
41.7k
                assert(reverse_index_map[index] >= 0);
3236
41.7k
                assert(reverse_index_map[index] < n_used_consts);
3237
41.7k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
41.7k
            }
3239
311k
        }
3240
40.3k
    }
3241
3242
4.66k
    err = SUCCESS;
3243
7.90k
end:
3244
7.90k
    PyMem_Free(index_map);
3245
7.90k
    PyMem_Free(reverse_index_map);
3246
7.90k
    return err;
3247
4.66k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
7.90k
{
3256
7.90k
    if (nlocals == 0) {
3257
1.84k
        return SUCCESS;
3258
1.84k
    }
3259
6.06k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
0
        nlocals = 64;
3267
0
    }
3268
6.06k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
6.06k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
6.06k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
6.06k
    uint64_t start_mask = 0;
3277
15.5k
    for (int i = nparams; i < nlocals; i++) {
3278
9.47k
        start_mask |= (uint64_t)1 << i;
3279
9.47k
    }
3280
6.06k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
58.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
52.0k
        scan_block_for_locals(b, &sp);
3287
52.0k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
37.7k
    while (sp > stack) {
3291
31.6k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
31.6k
        b->b_visited = 0;
3294
31.6k
        scan_block_for_locals(b, &sp);
3295
31.6k
    }
3296
6.06k
    PyMem_Free(stack);
3297
6.06k
    return SUCCESS;
3298
6.06k
}
3299
3300
3301
static int
3302
7.00k
mark_warm(basicblock *entryblock) {
3303
7.00k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
7.00k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
7.00k
    basicblock **sp = stack;
3308
3309
7.00k
    *sp++ = entryblock;
3310
7.00k
    entryblock->b_visited = 1;
3311
46.3k
    while (sp > stack) {
3312
39.3k
        basicblock *b = *(--sp);
3313
39.3k
        assert(!b->b_except_handler);
3314
39.3k
        b->b_warm = 1;
3315
39.3k
        basicblock *next = b->b_next;
3316
39.3k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
18.9k
            *sp++ = next;
3318
18.9k
            next->b_visited = 1;
3319
18.9k
        }
3320
348k
        for (int i=0; i < b->b_iused; i++) {
3321
308k
            cfg_instr *instr = &b->b_instr[i];
3322
308k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
13.4k
                *sp++ = instr->i_target;
3324
13.4k
                instr->i_target->b_visited = 1;
3325
13.4k
            }
3326
308k
        }
3327
39.3k
    }
3328
7.00k
    PyMem_Free(stack);
3329
7.00k
    return SUCCESS;
3330
7.00k
}
3331
3332
static int
3333
7.00k
mark_cold(basicblock *entryblock) {
3334
62.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
55.0k
        assert(!b->b_cold && !b->b_warm);
3336
55.0k
    }
3337
7.00k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
7.00k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
7.00k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
7.00k
    basicblock **sp = stack;
3347
62.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
55.0k
        if (b->b_except_handler) {
3349
3.38k
            assert(!b->b_warm);
3350
3.38k
            *sp++ = b;
3351
3.38k
            b->b_visited = 1;
3352
3.38k
        }
3353
55.0k
    }
3354
3355
13.7k
    while (sp > stack) {
3356
6.75k
        basicblock *b = *(--sp);
3357
6.75k
        b->b_cold = 1;
3358
6.75k
        basicblock *next = b->b_next;
3359
6.75k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
1.87k
            if (!next->b_warm && !next->b_visited) {
3361
1.74k
                *sp++ = next;
3362
1.74k
                next->b_visited = 1;
3363
1.74k
            }
3364
1.87k
        }
3365
35.9k
        for (int i = 0; i < b->b_iused; i++) {
3366
29.2k
            cfg_instr *instr = &b->b_instr[i];
3367
29.2k
            if (is_jump(instr)) {
3368
2.32k
                assert(i == b->b_iused - 1);
3369
2.32k
                basicblock *target = b->b_instr[i].i_target;
3370
2.32k
                if (!target->b_warm && !target->b_visited) {
3371
1.63k
                    *sp++ = target;
3372
1.63k
                    target->b_visited = 1;
3373
1.63k
                }
3374
2.32k
            }
3375
29.2k
        }
3376
6.75k
    }
3377
7.00k
    PyMem_Free(stack);
3378
7.00k
    return SUCCESS;
3379
7.00k
}
3380
3381
3382
static int
3383
7.90k
push_cold_blocks_to_end(cfg_builder *g) {
3384
7.90k
    basicblock *entryblock = g->g_entryblock;
3385
7.90k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
903
        return SUCCESS;
3388
903
    }
3389
7.00k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
7.00k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
62.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
55.1k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
54
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
54
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
54
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
54
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
54
                             NO_LOCATION);
3406
54
            explicit_jump->b_cold = 1;
3407
54
            explicit_jump->b_next = b->b_next;
3408
54
            explicit_jump->b_predecessors = 1;
3409
54
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
54
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
54
            last->i_target = explicit_jump->b_next;
3414
54
        }
3415
55.1k
    }
3416
3417
7.00k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
7.00k
    basicblock *cold_blocks = NULL;
3419
7.00k
    basicblock *cold_blocks_tail = NULL;
3420
3421
7.00k
    basicblock *b = entryblock;
3422
9.56k
    while(b->b_next) {
3423
9.56k
        assert(!b->b_cold);
3424
50.8k
        while (b->b_next && !b->b_next->b_cold) {
3425
41.3k
            b = b->b_next;
3426
41.3k
        }
3427
9.56k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
7.00k
            break;
3430
7.00k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
9.56k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
2.56k
        basicblock *b_end = b->b_next;
3436
6.80k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
4.24k
            b_end = b_end->b_next;
3438
4.24k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
2.56k
        assert(b_end && b_end->b_cold);
3442
2.56k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
2.56k
        if (cold_blocks == NULL) {
3445
1.32k
            cold_blocks = b->b_next;
3446
1.32k
        }
3447
1.23k
        else {
3448
1.23k
            cold_blocks_tail->b_next = b->b_next;
3449
1.23k
        }
3450
2.56k
        cold_blocks_tail = b_end;
3451
2.56k
        b->b_next = b_end->b_next;
3452
2.56k
        b_end->b_next = NULL;
3453
2.56k
    }
3454
7.00k
    assert(b != NULL && b->b_next == NULL);
3455
7.00k
    b->b_next = cold_blocks;
3456
3457
7.00k
    if (cold_blocks != NULL) {
3458
1.32k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
1.32k
    }
3460
7.00k
    return SUCCESS;
3461
7.00k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
7.90k
{
3466
7.90k
    basicblock *entryblock = g->g_entryblock;
3467
63.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
447k
        for (int i = 0; i < b->b_iused; i++) {
3469
391k
            cfg_instr *instr = &b->b_instr[i];
3470
391k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
642
                assert(i == b->b_iused - 1);
3472
642
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
322
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
642
                location loc = instr->i_loc;
3475
642
                basicblock *except = instr->i_except;
3476
642
                cfg_instr copy = {
3477
642
                            .i_opcode = COPY,
3478
642
                            .i_oparg = 1,
3479
642
                            .i_loc = loc,
3480
642
                            .i_target = NULL,
3481
642
                            .i_except = except,
3482
642
                };
3483
642
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3484
642
                cfg_instr to_bool = {
3485
642
                            .i_opcode = TO_BOOL,
3486
642
                            .i_oparg = 0,
3487
642
                            .i_loc = loc,
3488
642
                            .i_target = NULL,
3489
642
                            .i_except = except,
3490
642
                };
3491
642
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3492
642
            }
3493
391k
        }
3494
56.0k
    }
3495
7.90k
    return SUCCESS;
3496
7.90k
}
3497
3498
static int
3499
convert_pseudo_ops(cfg_builder *g)
3500
7.90k
{
3501
7.90k
    basicblock *entryblock = g->g_entryblock;
3502
63.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3503
451k
        for (int i = 0; i < b->b_iused; i++) {
3504
395k
            cfg_instr *instr = &b->b_instr[i];
3505
395k
            if (is_block_push(instr)) {
3506
3.38k
                INSTR_SET_OP0(instr, NOP);
3507
3.38k
            }
3508
392k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3509
1.79k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3510
1.79k
                instr->i_opcode = LOAD_FAST;
3511
1.79k
            }
3512
390k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3513
644
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3514
644
                instr->i_opcode = STORE_FAST;
3515
644
            }
3516
395k
        }
3517
56.0k
    }
3518
7.90k
    return remove_redundant_nops_and_jumps(g);
3519
7.90k
}
3520
3521
static inline bool
3522
94.5k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3523
94.5k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3524
52.8k
        return basicblock_has_no_lineno(b);
3525
52.8k
    }
3526
41.7k
    else {
3527
41.7k
        return false;
3528
41.7k
    }
3529
94.5k
}
3530
3531
3532
/* PEP 626 mandates that the f_lineno of a frame is correct
3533
 * after a frame terminates. It would be prohibitively expensive
3534
 * to continuously update the f_lineno field at runtime,
3535
 * so we make sure that all exiting instruction (raises and returns)
3536
 * have a valid line number, allowing us to compute f_lineno lazily.
3537
 * We can do this by duplicating the exit blocks without line number
3538
 * so that none have more than one predecessor. We can then safely
3539
 * copy the line number from the sole predecessor block.
3540
 */
3541
static int
3542
duplicate_exits_without_lineno(cfg_builder *g)
3543
15.8k
{
3544
15.8k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3545
3546
    /* Copy all exit blocks without line number that are targets of a jump.
3547
     */
3548
15.8k
    basicblock *entryblock = g->g_entryblock;
3549
127k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3550
112k
        cfg_instr *last = basicblock_last_instr(b);
3551
112k
        if (last == NULL) {
3552
17.2k
            continue;
3553
17.2k
        }
3554
94.8k
        if (is_jump(last)) {
3555
46.3k
            basicblock *target = next_nonempty_block(last->i_target);
3556
46.3k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3557
729
                basicblock *new_target = copy_basicblock(g, target);
3558
729
                if (new_target == NULL) {
3559
0
                    return ERROR;
3560
0
                }
3561
729
                new_target->b_instr[0].i_loc = last->i_loc;
3562
729
                last->i_target = new_target;
3563
729
                target->b_predecessors--;
3564
729
                new_target->b_predecessors = 1;
3565
729
                new_target->b_next = target->b_next;
3566
729
                new_target->b_label.id = next_lbl++;
3567
729
                target->b_next = new_target;
3568
729
            }
3569
46.3k
        }
3570
94.8k
    }
3571
3572
    /* Any remaining reachable exit blocks without line number can only be reached by
3573
     * fall through, and thus can only have a single predecessor */
3574
127k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3575
112k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3576
48.2k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3577
994
                cfg_instr *last = basicblock_last_instr(b);
3578
994
                assert(last != NULL);
3579
994
                b->b_next->b_instr[0].i_loc = last->i_loc;
3580
994
            }
3581
48.2k
        }
3582
112k
    }
3583
15.8k
    return SUCCESS;
3584
15.8k
}
3585
3586
3587
/* If an instruction has no line number, but it's predecessor in the BB does,
3588
 * then copy the line number. If a successor block has no line number, and only
3589
 * one predecessor, then inherit the line number.
3590
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3591
 * Also reduces the size of the line number table,
3592
 * but has no impact on the generated line number events.
3593
 */
3594
3595
static inline void
3596
maybe_propagate_location(basicblock *b, int i, location loc)
3597
885k
{
3598
885k
    assert(b->b_iused > i);
3599
885k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3600
52.8k
         b->b_instr[i].i_loc = loc;
3601
52.8k
    }
3602
885k
}
3603
3604
static void
3605
propagate_line_numbers(basicblock *entryblock)
3606
15.8k
{
3607
127k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3608
112k
        cfg_instr *last = basicblock_last_instr(b);
3609
112k
        if (last == NULL) {
3610
17.2k
            continue;
3611
17.2k
        }
3612
3613
94.8k
        location prev_location = NO_LOCATION;
3614
924k
        for (int i = 0; i < b->b_iused; i++) {
3615
829k
            maybe_propagate_location(b, i, prev_location);
3616
829k
            prev_location = b->b_instr[i].i_loc;
3617
829k
        }
3618
94.8k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3619
34.8k
            if (b->b_next->b_iused > 0) {
3620
34.8k
                maybe_propagate_location(b->b_next, 0, prev_location);
3621
34.8k
            }
3622
34.8k
        }
3623
94.8k
        if (is_jump(last)) {
3624
46.3k
            basicblock *target = last->i_target;
3625
46.3k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3626
3
                target = target->b_next;
3627
3
            }
3628
46.3k
            if (target->b_predecessors == 1) {
3629
21.4k
                maybe_propagate_location(target, 0, prev_location);
3630
21.4k
            }
3631
46.3k
        }
3632
94.8k
    }
3633
15.8k
}
3634
3635
static int
3636
resolve_line_numbers(cfg_builder *g, int firstlineno)
3637
15.8k
{
3638
15.8k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3639
15.8k
    propagate_line_numbers(g->g_entryblock);
3640
15.8k
    return SUCCESS;
3641
15.8k
}
3642
3643
int
3644
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3645
                        int nlocals, int nparams, int firstlineno)
3646
7.90k
{
3647
7.90k
    assert(cfg_builder_check(g));
3648
    /** Preprocessing **/
3649
    /* Map labels to targets and mark exception handlers */
3650
7.90k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3651
7.90k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3652
7.90k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3653
3654
    /** Optimization **/
3655
7.90k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3656
7.90k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3657
7.90k
    RETURN_IF_ERROR(
3658
7.90k
        add_checks_for_loads_of_uninitialized_variables(
3659
7.90k
            g->g_entryblock, nlocals, nparams));
3660
7.90k
    RETURN_IF_ERROR(insert_superinstructions(g));
3661
3662
7.90k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3663
7.90k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3664
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3665
    // assert(all_exits_have_lineno(g->g_entryblock));
3666
7.90k
    return SUCCESS;
3667
7.90k
}
3668
3669
static int *
3670
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3671
7.90k
{
3672
7.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3673
7.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3674
7.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3675
3676
7.90k
    int noffsets = ncellvars + nfreevars;
3677
7.90k
    int *fixed = PyMem_New(int, noffsets);
3678
7.90k
    if (fixed == NULL) {
3679
0
        PyErr_NoMemory();
3680
0
        return NULL;
3681
0
    }
3682
10.1k
    for (int i = 0; i < noffsets; i++) {
3683
2.28k
        fixed[i] = nlocals + i;
3684
2.28k
    }
3685
3686
7.90k
    PyObject *varname, *cellindex;
3687
7.90k
    Py_ssize_t pos = 0;
3688
9.24k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3689
1.33k
        PyObject *varindex;
3690
1.33k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3691
0
            goto error;
3692
0
        }
3693
1.33k
        if (varindex == NULL) {
3694
1.03k
            continue;
3695
1.03k
        }
3696
3697
299
        int argoffset = PyLong_AsInt(varindex);
3698
299
        Py_DECREF(varindex);
3699
299
        if (argoffset == -1 && PyErr_Occurred()) {
3700
0
            goto error;
3701
0
        }
3702
3703
299
        int oldindex = PyLong_AsInt(cellindex);
3704
299
        if (oldindex == -1 && PyErr_Occurred()) {
3705
0
            goto error;
3706
0
        }
3707
299
        fixed[oldindex] = argoffset;
3708
299
    }
3709
7.90k
    return fixed;
3710
3711
0
error:
3712
0
    PyMem_Free(fixed);
3713
0
    return NULL;
3714
7.90k
}
3715
3716
#define IS_GENERATOR(CF) \
3717
7.90k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3718
3719
static int
3720
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3721
                           int *fixed, int nfreevars, int code_flags)
3722
7.90k
{
3723
7.90k
    assert(umd->u_firstlineno > 0);
3724
3725
    /* Add the generator prefix instructions. */
3726
7.90k
    if (IS_GENERATOR(code_flags)) {
3727
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3728
         * of 0. This is because RETURN_GENERATOR pushes an element
3729
         * with _PyFrame_StackPush before switching stacks.
3730
         */
3731
3732
401
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3733
401
        cfg_instr make_gen = {
3734
401
            .i_opcode = RETURN_GENERATOR,
3735
401
            .i_oparg = 0,
3736
401
            .i_loc = loc,
3737
401
            .i_target = NULL,
3738
401
            .i_except = NULL,
3739
401
        };
3740
401
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3741
401
        cfg_instr pop_top = {
3742
401
            .i_opcode = POP_TOP,
3743
401
            .i_oparg = 0,
3744
401
            .i_loc = loc,
3745
401
            .i_target = NULL,
3746
401
            .i_except = NULL,
3747
401
        };
3748
401
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3749
401
    }
3750
3751
    /* Set up cells for any variable that escapes, to be put in a closure. */
3752
7.90k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3753
7.90k
    if (ncellvars) {
3754
        // umd->u_cellvars has the cells out of order so we sort them
3755
        // before adding the MAKE_CELL instructions.  Note that we
3756
        // adjust for arg cells, which come first.
3757
949
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3758
949
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3759
949
        if (sorted == NULL) {
3760
0
            PyErr_NoMemory();
3761
0
            return ERROR;
3762
0
        }
3763
2.28k
        for (int i = 0; i < ncellvars; i++) {
3764
1.33k
            sorted[fixed[i]] = i + 1;
3765
1.33k
        }
3766
2.94k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3767
1.99k
            int oldindex = sorted[i] - 1;
3768
1.99k
            if (oldindex == -1) {
3769
656
                continue;
3770
656
            }
3771
1.33k
            cfg_instr make_cell = {
3772
1.33k
                .i_opcode = MAKE_CELL,
3773
                // This will get fixed in offset_derefs().
3774
1.33k
                .i_oparg = oldindex,
3775
1.33k
                .i_loc = NO_LOCATION,
3776
1.33k
                .i_target = NULL,
3777
1.33k
                .i_except = NULL,
3778
1.33k
            };
3779
1.33k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3780
0
                PyMem_RawFree(sorted);
3781
0
                return ERROR;
3782
0
            }
3783
1.33k
            ncellsused += 1;
3784
1.33k
        }
3785
949
        PyMem_RawFree(sorted);
3786
949
    }
3787
3788
7.90k
    if (nfreevars) {
3789
569
        cfg_instr copy_frees = {
3790
569
            .i_opcode = COPY_FREE_VARS,
3791
569
            .i_oparg = nfreevars,
3792
569
            .i_loc = NO_LOCATION,
3793
569
            .i_target = NULL,
3794
569
            .i_except = NULL,
3795
569
        };
3796
569
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3797
569
    }
3798
3799
7.90k
    return SUCCESS;
3800
7.90k
}
3801
3802
static int
3803
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3804
7.90k
{
3805
7.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3806
7.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3807
7.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3808
7.90k
    int noffsets = ncellvars + nfreevars;
3809
3810
    // First deal with duplicates (arg cells).
3811
7.90k
    int numdropped = 0;
3812
10.1k
    for (int i = 0; i < noffsets ; i++) {
3813
2.28k
        if (fixedmap[i] == i + nlocals) {
3814
1.98k
            fixedmap[i] -= numdropped;
3815
1.98k
        }
3816
299
        else {
3817
            // It was a duplicate (cell/arg).
3818
299
            numdropped += 1;
3819
299
        }
3820
2.28k
    }
3821
3822
    // Then update offsets, either relative to locals or by cell2arg.
3823
63.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3824
451k
        for (int i = 0; i < b->b_iused; i++) {
3825
395k
            cfg_instr *inst = &b->b_instr[i];
3826
            // This is called before extended args are generated.
3827
395k
            assert(inst->i_opcode != EXTENDED_ARG);
3828
395k
            int oldoffset = inst->i_oparg;
3829
395k
            switch(inst->i_opcode) {
3830
1.33k
                case MAKE_CELL:
3831
3.12k
                case LOAD_CLOSURE:
3832
5.37k
                case LOAD_DEREF:
3833
6.44k
                case STORE_DEREF:
3834
6.44k
                case DELETE_DEREF:
3835
6.44k
                case LOAD_FROM_DICT_OR_DEREF:
3836
6.44k
                    assert(oldoffset >= 0);
3837
6.44k
                    assert(oldoffset < noffsets);
3838
6.44k
                    assert(fixedmap[oldoffset] >= 0);
3839
6.44k
                    inst->i_oparg = fixedmap[oldoffset];
3840
395k
            }
3841
395k
        }
3842
56.0k
    }
3843
3844
7.90k
    return numdropped;
3845
7.90k
}
3846
3847
static int
3848
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3849
7.90k
{
3850
7.90k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3851
7.90k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3852
7.90k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3853
7.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3854
7.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3855
7.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3856
7.90k
    assert(INT_MAX - nlocals - ncellvars > 0);
3857
7.90k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3858
7.90k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3859
7.90k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3860
7.90k
    if (cellfixedoffsets == NULL) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
    // This must be called before fix_cell_offsets().
3865
7.90k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3866
0
        PyMem_Free(cellfixedoffsets);
3867
0
        return ERROR;
3868
0
    }
3869
3870
7.90k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3871
7.90k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3872
7.90k
    cellfixedoffsets = NULL;
3873
7.90k
    if (numdropped < 0) {
3874
0
        return ERROR;
3875
0
    }
3876
3877
7.90k
    nlocalsplus -= numdropped;
3878
7.90k
    return nlocalsplus;
3879
7.90k
}
3880
3881
cfg_builder *
3882
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3883
7.90k
{
3884
7.90k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3885
0
        return NULL;
3886
0
    }
3887
7.90k
    cfg_builder *g = _PyCfgBuilder_New();
3888
7.90k
    if (g == NULL) {
3889
0
        return NULL;
3890
0
    }
3891
457k
    for (int i = 0; i < seq->s_used; i++) {
3892
449k
        seq->s_instrs[i].i_target = 0;
3893
449k
    }
3894
457k
    for (int i = 0; i < seq->s_used; i++) {
3895
449k
        _PyInstruction *instr = &seq->s_instrs[i];
3896
449k
        if (HAS_TARGET(instr->i_opcode)) {
3897
29.6k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3898
29.6k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3899
29.6k
        }
3900
449k
    }
3901
7.90k
    int offset = 0;
3902
457k
    for (int i = 0; i < seq->s_used; i++) {
3903
449k
        _PyInstruction *instr = &seq->s_instrs[i];
3904
449k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3905
413
            if (seq->s_annotations_code != NULL) {
3906
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
3907
1
                    && seq->s_annotations_code->s_nested == NULL);
3908
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3909
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3910
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3911
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3912
0
                        goto error;
3913
0
                    }
3914
3
                }
3915
1
                offset += seq->s_annotations_code->s_used - 1;
3916
1
            }
3917
412
            else {
3918
412
                offset -= 1;
3919
412
            }
3920
413
            continue;
3921
413
        }
3922
449k
        if (instr->i_target) {
3923
23.8k
            jump_target_label lbl_ = {i + offset};
3924
23.8k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3925
0
                goto error;
3926
0
            }
3927
23.8k
        }
3928
449k
        int opcode = instr->i_opcode;
3929
449k
        int oparg = instr->i_oparg;
3930
449k
        if (HAS_TARGET(opcode)) {
3931
29.6k
            oparg += offset;
3932
29.6k
        }
3933
449k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3934
0
            goto error;
3935
0
        }
3936
449k
    }
3937
7.90k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3938
0
        goto error;
3939
0
    }
3940
7.90k
    return g;
3941
0
error:
3942
0
    _PyCfgBuilder_Free(g);
3943
0
    return NULL;
3944
7.90k
}
3945
3946
int
3947
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3948
7.90k
{
3949
7.90k
    int lbl = 0;
3950
64.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3951
56.8k
        b->b_label = (jump_target_label){lbl};
3952
56.8k
        lbl += 1;
3953
56.8k
    }
3954
64.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3955
56.8k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3956
466k
        for (int i = 0; i < b->b_iused; i++) {
3957
409k
            cfg_instr *instr = &b->b_instr[i];
3958
409k
            if (HAS_TARGET(instr->i_opcode)) {
3959
                /* Set oparg to the label id (it will later be mapped to an offset) */
3960
23.1k
                instr->i_oparg = instr->i_target->b_label.id;
3961
23.1k
            }
3962
409k
            RETURN_IF_ERROR(
3963
409k
                _PyInstructionSequence_Addop(
3964
409k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3965
3966
409k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3967
409k
            if (instr->i_except != NULL) {
3968
53.5k
                hi->h_label = instr->i_except->b_label.id;
3969
53.5k
                hi->h_startdepth = instr->i_except->b_startdepth;
3970
53.5k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3971
53.5k
            }
3972
355k
            else {
3973
355k
                hi->h_label = -1;
3974
355k
            }
3975
409k
        }
3976
56.8k
    }
3977
7.90k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3978
0
        return ERROR;
3979
0
    }
3980
7.90k
    return SUCCESS;
3981
7.90k
}
3982
3983
3984
int
3985
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3986
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3987
                                     int *stackdepth, int *nlocalsplus,
3988
                                     _PyInstructionSequence *seq)
3989
7.90k
{
3990
7.90k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3991
3992
7.90k
    *stackdepth = calculate_stackdepth(g);
3993
7.90k
    if (*stackdepth < 0) {
3994
0
        return ERROR;
3995
0
    }
3996
3997
    /* prepare_localsplus adds instructions for generators that push
3998
     * and pop an item on the stack. This assertion makes sure there
3999
     * is space on the stack for that.
4000
     * It should always be true, because a generator must have at
4001
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
4002
     * which requires stackspace.
4003
     */
4004
7.90k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4005
4006
7.90k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4007
7.90k
    if (*nlocalsplus < 0) {
4008
0
        return ERROR;
4009
0
    }
4010
4011
7.90k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4012
4013
    /* Order of basic blocks must have been determined by now */
4014
4015
7.90k
    RETURN_IF_ERROR(normalize_jumps(g));
4016
7.90k
    assert(no_redundant_jumps(g));
4017
4018
    /* Can't modify the bytecode after inserting instructions that produce
4019
     * borrowed references.
4020
     */
4021
7.90k
    RETURN_IF_ERROR(optimize_load_fast(g));
4022
4023
    /* Can't modify the bytecode after computing jump offsets. */
4024
7.90k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4025
0
        return ERROR;
4026
0
    }
4027
4028
7.90k
    return SUCCESS;
4029
7.90k
}
4030
4031
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4032
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4033
 */
4034
int
4035
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4036
0
{
4037
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4038
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4039
0
    return SUCCESS;
4040
0
}
4041
4042
/* Exported API functions */
4043
4044
int
4045
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4046
0
{
4047
0
    stack_effects effs;
4048
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4049
0
        return PY_INVALID_STACK_EFFECT;
4050
0
    }
4051
0
    return effs.net;
4052
0
}
4053
4054
int
4055
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4056
158
{
4057
158
    stack_effects effs;
4058
158
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4059
0
        return PY_INVALID_STACK_EFFECT;
4060
0
    }
4061
158
    return effs.net;
4062
158
}
4063
4064
/* Access to compiler optimizations for unit tests.
4065
4066
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4067
 * a CFG, optimizes it and converts back to an instruction list.
4068
 */
4069
4070
static PyObject *
4071
cfg_to_instruction_sequence(cfg_builder *g)
4072
0
{
4073
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4074
0
    if (seq == NULL) {
4075
0
        return NULL;
4076
0
    }
4077
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4078
0
        PyInstructionSequence_Fini(seq);
4079
0
        return NULL;
4080
0
    }
4081
0
    return (PyObject*)seq;
4082
0
}
4083
4084
PyObject *
4085
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4086
0
{
4087
0
    if (!_PyInstructionSequence_Check(seq)) {
4088
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4089
0
        return NULL;
4090
0
    }
4091
0
    PyObject *const_cache = PyDict_New();
4092
0
    if (const_cache == NULL) {
4093
0
        return NULL;
4094
0
    }
4095
4096
0
    PyObject *res = NULL;
4097
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4098
0
    if (g == NULL) {
4099
0
        goto error;
4100
0
    }
4101
0
    int nparams = 0, firstlineno = 1;
4102
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4103
0
                                nparams, firstlineno) < 0) {
4104
0
        goto error;
4105
0
    }
4106
4107
0
    if (calculate_stackdepth(g) == ERROR) {
4108
0
        goto error;
4109
0
    }
4110
4111
0
    if (optimize_load_fast(g) != SUCCESS) {
4112
0
        goto error;
4113
0
    }
4114
4115
0
    res = cfg_to_instruction_sequence(g);
4116
0
error:
4117
0
    Py_DECREF(const_cache);
4118
0
    _PyCfgBuilder_Free(g);
4119
0
    return res;
4120
0
}