Coverage Report

Created: 2025-10-12 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
1.22M
#define SUCCESS 0
19
6.90k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
1.92M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
411k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
421k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
421k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
380
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
1.78M
{
101
1.78M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
1.78M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
1.78M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
1.65M
{
108
1.65M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
1.65M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
42.2k
    do { \
114
42.2k
        assert(OPCODE_HAS_ARG(OP)); \
115
42.2k
        cfg_instr *_instr__ptr_ = (I); \
116
42.2k
        _instr__ptr_->i_opcode = (OP); \
117
42.2k
        _instr__ptr_->i_oparg = (ARG); \
118
42.2k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
87.8k
    do { \
123
87.8k
        assert(!OPCODE_HAS_ARG(OP)); \
124
87.8k
        cfg_instr *_instr__ptr_ = (I); \
125
87.8k
        _instr__ptr_->i_opcode = (OP); \
126
87.8k
        _instr__ptr_->i_oparg = 0; \
127
87.8k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
4.99k
    do { \
131
4.99k
        cfg_instr *_instr__ptr_ = (I); \
132
4.99k
        _instr__ptr_->i_loc = (LOC); \
133
4.99k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
411k
{
144
411k
    assert(b != NULL);
145
411k
    _Py_c_array_t array = {
146
411k
        .array = (void*)b->b_instr,
147
411k
        .allocated_entries = b->b_ialloc,
148
411k
        .item_size = sizeof(cfg_instr),
149
411k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
411k
    };
151
152
411k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
411k
    b->b_instr = array.array;
154
411k
    b->b_ialloc = array.allocated_entries;
155
411k
    return b->b_iused++;
156
411k
}
157
158
static cfg_instr *
159
1.55M
basicblock_last_instr(const basicblock *b) {
160
1.55M
    assert(b->b_iused >= 0);
161
1.55M
    if (b->b_iused > 0) {
162
1.43M
        assert(b->b_instr != NULL);
163
1.43M
        return &b->b_instr[b->b_iused - 1];
164
1.43M
    }
165
120k
    return NULL;
166
1.55M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
48.8k
{
175
48.8k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
48.8k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
48.8k
    b->b_list = g->g_block_list;
182
48.8k
    g->g_block_list = b;
183
48.8k
    b->b_label = NO_LABEL;
184
48.8k
    return b;
185
48.8k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
402k
{
190
402k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
402k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
402k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
402k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
402k
    int off = basicblock_next_instr(b);
196
402k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
402k
    cfg_instr *i = &b->b_instr[off];
200
402k
    i->i_opcode = opcode;
201
402k
    i->i_oparg = oparg;
202
402k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
402k
    assert(i->i_target == NULL);
205
402k
    assert(i->i_except == NULL);
206
207
402k
    return SUCCESS;
208
402k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
1.58k
{
213
1.58k
    cfg_instr *last = basicblock_last_instr(b);
214
1.58k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
1.58k
    RETURN_IF_ERROR(
219
1.58k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
1.58k
    last = basicblock_last_instr(b);
221
1.58k
    assert(last && last->i_opcode == opcode);
222
1.58k
    last->i_target = target;
223
1.58k
    return SUCCESS;
224
1.58k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
3.36k
{
229
8.80k
    for (int i = 0; i < from->b_iused; i++) {
230
5.43k
        int n = basicblock_next_instr(to);
231
5.43k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
5.43k
        to->b_instr[n] = from->b_instr[i];
235
5.43k
    }
236
3.36k
    return SUCCESS;
237
3.36k
}
238
239
static inline int
240
485k
basicblock_nofallthrough(const basicblock *b) {
241
485k
    cfg_instr *last = basicblock_last_instr(b);
242
485k
    return (last &&
243
461k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
304k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
485k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
784k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
648
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
648
    assert(BB_NO_FALLTHROUGH(block));
257
648
    basicblock *result = cfg_builder_new_block(g);
258
648
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
648
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
648
    return result;
265
648
}
266
267
static int
268
3.93k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
3.93k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
73.3k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
69.4k
        block->b_instr[i] = block->b_instr[i-1];
272
69.4k
    }
273
3.93k
    block->b_instr[pos] = *instr;
274
3.93k
    return SUCCESS;
275
3.93k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
40.3k
{
343
40.3k
    assert(block != NULL);
344
40.3k
    g->g_curblock->b_next = block;
345
40.3k
    g->g_curblock = block;
346
40.3k
    return block;
347
40.3k
}
348
349
static inline int
350
94.0k
basicblock_exits_scope(const basicblock *b) {
351
94.0k
    cfg_instr *last = basicblock_last_instr(b);
352
94.0k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
94.0k
}
354
355
static inline int
356
59.9k
basicblock_has_eval_break(const basicblock *b) {
357
293k
    for (int i = 0; i < b->b_iused; i++) {
358
257k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
24.1k
            return true;
360
24.1k
        }
361
257k
    }
362
35.8k
    return false;
363
59.9k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
408k
{
368
408k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
408k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
34.3k
        return true;
371
34.3k
    }
372
373k
    if (IS_LABEL(g->g_current_label)) {
373
6.01k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
6.01k
            return true;
375
6.01k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
6.01k
    }
382
367k
    return false;
383
373k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
408k
{
388
408k
    if (cfg_builder_current_block_is_terminated(g)) {
389
40.3k
        basicblock *b = cfg_builder_new_block(g);
390
40.3k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
40.3k
        b->b_label = g->g_current_label;
394
40.3k
        g->g_current_label = NO_LABEL;
395
40.3k
        cfg_builder_use_next_block(g, b);
396
40.3k
    }
397
408k
    return SUCCESS;
398
408k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
6.90k
{
424
6.90k
    g->g_block_list = NULL;
425
6.90k
    basicblock *block = cfg_builder_new_block(g);
426
6.90k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
6.90k
    g->g_curblock = g->g_entryblock = block;
430
6.90k
    g->g_current_label = NO_LABEL;
431
6.90k
    return SUCCESS;
432
6.90k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
6.90k
{
437
6.90k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
6.90k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
6.90k
    memset(g, 0, sizeof(cfg_builder));
443
6.90k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
6.90k
    return g;
448
6.90k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
6.90k
{
453
6.90k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
6.90k
    assert(cfg_builder_check(g));
457
6.90k
    basicblock *b = g->g_block_list;
458
55.7k
    while (b != NULL) {
459
48.8k
        if (b->b_instr) {
460
48.8k
            PyMem_Free((void *)b->b_instr);
461
48.8k
        }
462
48.8k
        basicblock *next = b->b_list;
463
48.8k
        PyMem_Free((void *)b);
464
48.8k
        b = next;
465
48.8k
    }
466
6.90k
    PyMem_Free(g);
467
6.90k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
6.90k
{
472
6.90k
    int nblocks = 0;
473
54.2k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
47.2k
        nblocks++;
475
47.2k
    }
476
6.90k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
6.90k
    return SUCCESS;
481
6.90k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
20.2k
{
486
20.2k
    g->g_current_label = lbl;
487
20.2k
    return cfg_builder_maybe_start_new_block(g);
488
20.2k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
388k
{
493
388k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
388k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
388k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
82.5k
{
501
86.1k
    while (b && b->b_iused == 0) {
502
3.59k
        b = b->b_next;
503
3.59k
    }
504
82.5k
    return b;
505
82.5k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
48.8k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
48.8k
    cfg_instr *last = basicblock_last_instr(b);
547
48.8k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
36.4k
        return SUCCESS;
549
36.4k
    }
550
48.8k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
12.3k
    bool is_forward = last->i_target->b_visited == 0;
553
12.3k
    if (is_forward) {
554
11.5k
        RETURN_IF_ERROR(
555
11.5k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
11.5k
        return SUCCESS;
557
11.5k
    }
558
559
818
    int reversed_opcode = 0;
560
818
    switch(last->i_opcode) {
561
30
        case POP_JUMP_IF_NOT_NONE:
562
30
            reversed_opcode = POP_JUMP_IF_NONE;
563
30
            break;
564
36
        case POP_JUMP_IF_NONE:
565
36
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
36
            break;
567
676
        case POP_JUMP_IF_FALSE:
568
676
            reversed_opcode = POP_JUMP_IF_TRUE;
569
676
            break;
570
76
        case POP_JUMP_IF_TRUE:
571
76
            reversed_opcode = POP_JUMP_IF_FALSE;
572
76
            break;
573
818
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
818
    basicblock *target = last->i_target;
579
818
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
818
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
818
    RETURN_IF_ERROR(
584
818
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
818
    RETURN_IF_ERROR(
586
818
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
818
    backwards_jump->b_startdepth = target->b_startdepth;
588
818
    last->i_opcode = reversed_opcode;
589
818
    last->i_target = b->b_next;
590
591
818
    backwards_jump->b_cold = b->b_cold;
592
818
    backwards_jump->b_next = b->b_next;
593
818
    b->b_next = backwards_jump;
594
818
    return SUCCESS;
595
818
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
6.90k
{
601
6.90k
    basicblock *entryblock = g->g_entryblock;
602
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
47.9k
        b->b_visited = 0;
604
47.9k
    }
605
55.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
48.8k
        b->b_visited = 1;
607
48.8k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
48.8k
    }
609
6.90k
    return SUCCESS;
610
6.90k
}
611
612
static int
613
6.90k
check_cfg(cfg_builder *g) {
614
54.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
435k
        for (int i = 0; i < b->b_iused; i++) {
617
388k
            int opcode = b->b_instr[i].i_opcode;
618
388k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
388k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
41.2k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
41.2k
            }
625
388k
        }
626
47.2k
    }
627
6.90k
    return SUCCESS;
628
6.90k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
26.8k
{
633
26.8k
    int lbl = -1;
634
216k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
189k
        if (b->b_label.id > lbl) {
636
73.5k
            lbl = b->b_label.id;
637
73.5k
        }
638
189k
    }
639
26.8k
    return lbl;
640
26.8k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
6.90k
{
646
6.90k
    int max_label = get_max_label(entryblock);
647
6.90k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
6.90k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
6.90k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
6.90k
    memset(label2block, 0, mapsize);
654
54.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
47.2k
        if (b->b_label.id >= 0) {
656
20.2k
            label2block[b->b_label.id] = b;
657
20.2k
        }
658
47.2k
    }
659
54.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
435k
        for (int i = 0; i < b->b_iused; i++) {
661
388k
            cfg_instr *instr = &b->b_instr[i];
662
388k
            assert(instr->i_target == NULL);
663
388k
            if (HAS_TARGET(instr->i_opcode)) {
664
25.0k
                int lbl = instr->i_oparg;
665
25.0k
                assert(lbl >= 0 && lbl <= max_label);
666
25.0k
                instr->i_target = label2block[lbl];
667
25.0k
                assert(instr->i_target != NULL);
668
25.0k
                assert(instr->i_target->b_label.id == lbl);
669
25.0k
            }
670
388k
        }
671
47.2k
    }
672
6.90k
    PyMem_Free(label2block);
673
6.90k
    return SUCCESS;
674
6.90k
}
675
676
static int
677
6.90k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
54.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
435k
        for (int i=0; i < b->b_iused; i++) {
685
388k
            cfg_instr *instr = &b->b_instr[i];
686
388k
            if (is_block_push(instr)) {
687
2.83k
                instr->i_target->b_except_handler = 1;
688
2.83k
            }
689
388k
        }
690
47.2k
    }
691
6.90k
    return SUCCESS;
692
6.90k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
2.82k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
2.82k
    assert(is_block_push(setup));
704
2.82k
    int opcode = setup->i_opcode;
705
2.82k
    basicblock * target = setup->i_target;
706
2.82k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
1.62k
        target->b_preserve_lasti = 1;
708
1.62k
    }
709
2.82k
    assert(stack->depth <= CO_MAXBLOCKS);
710
2.82k
    stack->handlers[++stack->depth] = target;
711
2.82k
    return target;
712
2.82k
}
713
714
static basicblock *
715
2.28k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
2.28k
    assert(stack->depth > 0);
717
2.28k
    return stack->handlers[--stack->depth];
718
2.28k
}
719
720
static basicblock *
721
40.5k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
40.5k
    return stack->handlers[stack->depth];
723
40.5k
}
724
725
static struct _PyCfgExceptStack *
726
6.90k
make_except_stack(void) {
727
6.90k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
6.90k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
6.90k
    new->depth = 0;
733
6.90k
    new->handlers[0] = NULL;
734
6.90k
    return new;
735
6.90k
}
736
737
static struct _PyCfgExceptStack *
738
15.1k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
15.1k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
15.1k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
15.1k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
15.1k
    return copy;
746
15.1k
}
747
748
static basicblock**
749
52.1k
make_cfg_traversal_stack(basicblock *entryblock) {
750
52.1k
    int nblocks = 0;
751
430k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
378k
        b->b_visited = 0;
753
378k
        nblocks++;
754
378k
    }
755
52.1k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
52.1k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
52.1k
    return stack;
760
52.1k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
361k
{
779
361k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
361k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
361k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
361k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
361k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
361k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
2.82k
        effects->net = 0;
793
2.82k
        return 0;
794
2.82k
    }
795
358k
    effects->net = pushed - popped;
796
358k
    return 0;
797
361k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
49.9k
{
802
49.9k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
49.9k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
40.4k
        assert(b->b_startdepth < 0);
808
40.4k
        b->b_startdepth = depth;
809
40.4k
        *(*sp)++ = b;
810
40.4k
    }
811
49.9k
    return SUCCESS;
812
49.9k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
6.90k
{
820
6.90k
    basicblock *entryblock = g->g_entryblock;
821
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
47.9k
        b->b_startdepth = INT_MIN;
823
47.9k
    }
824
6.90k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
6.90k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
6.90k
    int stackdepth = -1;
831
6.90k
    int maxdepth = 0;
832
6.90k
    basicblock **sp = stack;
833
6.90k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
47.3k
    while (sp != stack) {
837
40.4k
        basicblock *b = *--sp;
838
40.4k
        int depth = b->b_startdepth;
839
40.4k
        assert(depth >= 0);
840
40.4k
        basicblock *next = b->b_next;
841
360k
        for (int i = 0; i < b->b_iused; i++) {
842
339k
            cfg_instr *instr = &b->b_instr[i];
843
339k
            stack_effects effects;
844
339k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
339k
            int new_depth = depth + effects.net;
851
339k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow");
854
0
                goto error;
855
0
            }
856
339k
            maxdepth = Py_MAX(maxdepth, depth);
857
339k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
21.8k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
21.8k
                int target_depth = depth + effects.net;
865
21.8k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
21.8k
                maxdepth = Py_MAX(maxdepth, depth);
867
21.8k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
21.8k
            }
871
339k
            depth = new_depth;
872
339k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
339k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
334k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
19.1k
            {
876
                /* remaining code is dead */
877
19.1k
                next = NULL;
878
19.1k
                break;
879
19.1k
            }
880
339k
        }
881
40.4k
        if (next != NULL) {
882
21.2k
            assert(BB_HAS_FALLTHROUGH(b));
883
21.2k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
21.2k
        }
887
40.4k
    }
888
6.90k
    stackdepth = maxdepth;
889
6.90k
error:
890
6.90k
    PyMem_Free(stack);
891
6.90k
    return stackdepth;
892
6.90k
}
893
894
static int
895
6.90k
label_exception_targets(basicblock *entryblock) {
896
6.90k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
6.90k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
6.90k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
6.90k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
6.90k
    except_stack->depth = 0;
907
6.90k
    todo_stack[0] = entryblock;
908
6.90k
    entryblock->b_visited = 1;
909
6.90k
    entryblock->b_exceptstack = except_stack;
910
6.90k
    basicblock **todo = &todo_stack[1];
911
6.90k
    basicblock *handler = NULL;
912
47.5k
    while (todo > todo_stack) {
913
40.5k
        todo--;
914
40.5k
        basicblock *b = todo[0];
915
40.5k
        assert(b->b_visited == 1);
916
40.5k
        except_stack = b->b_exceptstack;
917
40.5k
        assert(except_stack != NULL);
918
40.5k
        b->b_exceptstack = NULL;
919
40.5k
        handler = except_stack_top(except_stack);
920
40.5k
        int last_yield_except_depth = -1;
921
414k
        for (int i = 0; i < b->b_iused; i++) {
922
374k
            cfg_instr *instr = &b->b_instr[i];
923
374k
            if (is_block_push(instr)) {
924
2.82k
                if (!instr->i_target->b_visited) {
925
2.82k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
2.82k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
2.82k
                    instr->i_target->b_exceptstack = copy;
930
2.82k
                    todo[0] = instr->i_target;
931
2.82k
                    instr->i_target->b_visited = 1;
932
2.82k
                    todo++;
933
2.82k
                }
934
2.82k
                handler = push_except_block(except_stack, instr);
935
2.82k
            }
936
371k
            else if (instr->i_opcode == POP_BLOCK) {
937
2.28k
                handler = pop_except_block(except_stack);
938
2.28k
                INSTR_SET_OP0(instr, NOP);
939
2.28k
            }
940
369k
            else if (is_jump(instr)) {
941
20.9k
                instr->i_except = handler;
942
20.9k
                assert(i == b->b_iused -1);
943
20.9k
                if (!instr->i_target->b_visited) {
944
15.0k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
12.3k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
12.3k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
12.3k
                        instr->i_target->b_exceptstack = copy;
950
12.3k
                    }
951
2.66k
                    else {
952
2.66k
                        instr->i_target->b_exceptstack = except_stack;
953
2.66k
                        except_stack = NULL;
954
2.66k
                    }
955
15.0k
                    todo[0] = instr->i_target;
956
15.0k
                    instr->i_target->b_visited = 1;
957
15.0k
                    todo++;
958
15.0k
                }
959
20.9k
            }
960
348k
            else if (instr->i_opcode == YIELD_VALUE) {
961
500
                instr->i_except = handler;
962
500
                last_yield_except_depth = except_stack->depth;
963
500
            }
964
347k
            else if (instr->i_opcode == RESUME) {
965
7.40k
                instr->i_except = handler;
966
7.40k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
500
                    assert(last_yield_except_depth >= 0);
968
500
                    if (last_yield_except_depth == 1) {
969
435
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
435
                    }
971
500
                    last_yield_except_depth = -1;
972
500
                }
973
7.40k
            }
974
340k
            else {
975
340k
                instr->i_except = handler;
976
340k
            }
977
374k
        }
978
40.5k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
979
15.8k
            assert(except_stack != NULL);
980
15.8k
            b->b_next->b_exceptstack = except_stack;
981
15.8k
            todo[0] = b->b_next;
982
15.8k
            b->b_next->b_visited = 1;
983
15.8k
            todo++;
984
15.8k
        }
985
24.7k
        else if (except_stack != NULL) {
986
22.0k
           PyMem_Free(except_stack);
987
22.0k
        }
988
40.5k
    }
989
#ifdef Py_DEBUG
990
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
991
        assert(b->b_exceptstack == NULL);
992
    }
993
#endif
994
6.90k
    PyMem_Free(todo_stack);
995
6.90k
    return SUCCESS;
996
0
error:
997
0
    PyMem_Free(todo_stack);
998
0
    PyMem_Free(except_stack);
999
0
    return ERROR;
1000
6.90k
}
1001
1002
/***** CFG optimizations *****/
1003
1004
static int
1005
13.8k
remove_unreachable(basicblock *entryblock) {
1006
109k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1007
95.2k
        b->b_predecessors = 0;
1008
95.2k
    }
1009
13.8k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1010
13.8k
    if (stack == NULL) {
1011
0
        return ERROR;
1012
0
    }
1013
13.8k
    basicblock **sp = stack;
1014
13.8k
    entryblock->b_predecessors = 1;
1015
13.8k
    *sp++ = entryblock;
1016
13.8k
    entryblock->b_visited = 1;
1017
94.0k
    while (sp > stack) {
1018
80.2k
        basicblock *b = *(--sp);
1019
80.2k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1020
40.4k
            if (!b->b_next->b_visited) {
1021
34.7k
                assert(b->b_next->b_predecessors == 0);
1022
34.7k
                *sp++ = b->b_next;
1023
34.7k
                b->b_next->b_visited = 1;
1024
34.7k
            }
1025
40.4k
            b->b_next->b_predecessors++;
1026
40.4k
        }
1027
803k
        for (int i = 0; i < b->b_iused; i++) {
1028
723k
            basicblock *target;
1029
723k
            cfg_instr *instr = &b->b_instr[i];
1030
723k
            if (is_jump(instr) || is_block_push(instr)) {
1031
45.3k
                target = instr->i_target;
1032
45.3k
                if (!target->b_visited) {
1033
31.6k
                    *sp++ = target;
1034
31.6k
                    target->b_visited = 1;
1035
31.6k
                }
1036
45.3k
                target->b_predecessors++;
1037
45.3k
            }
1038
723k
        }
1039
80.2k
    }
1040
13.8k
    PyMem_Free(stack);
1041
1042
    /* Delete unreachable instructions */
1043
109k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1044
95.2k
       if (b->b_predecessors == 0) {
1045
15.0k
            b->b_iused = 0;
1046
15.0k
            b->b_except_handler = 0;
1047
15.0k
       }
1048
95.2k
    }
1049
13.8k
    return SUCCESS;
1050
13.8k
}
1051
1052
static int
1053
264k
basicblock_remove_redundant_nops(basicblock *bb) {
1054
    /* Remove NOPs when legal to do so. */
1055
264k
    int dest = 0;
1056
264k
    int prev_lineno = -1;
1057
2.08M
    for (int src = 0; src < bb->b_iused; src++) {
1058
1.82M
        int lineno = bb->b_instr[src].i_loc.lineno;
1059
1.82M
        if (bb->b_instr[src].i_opcode == NOP) {
1060
            /* Eliminate no-op if it doesn't have a line number */
1061
44.0k
            if (lineno < 0) {
1062
6.00k
                continue;
1063
6.00k
            }
1064
            /* or, if the previous instruction had the same line number. */
1065
38.0k
            if (prev_lineno == lineno) {
1066
30.7k
                continue;
1067
30.7k
            }
1068
            /* or, if the next instruction has same line number or no line number */
1069
7.30k
            if (src < bb->b_iused - 1) {
1070
6.25k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1071
6.25k
                if (next_lineno == lineno) {
1072
4.19k
                    continue;
1073
4.19k
                }
1074
2.06k
                if (next_lineno < 0) {
1075
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1076
0
                    continue;
1077
0
                }
1078
2.06k
            }
1079
1.04k
            else {
1080
1.04k
                basicblock *next = next_nonempty_block(bb->b_next);
1081
                /* or if last instruction in BB and next BB has same line number */
1082
1.04k
                if (next) {
1083
1.04k
                    location next_loc = NO_LOCATION;
1084
1.04k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1085
1.04k
                        cfg_instr *instr = &next->b_instr[next_i];
1086
1.04k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1087
                            /* Skip over NOPs without a location, they will be removed */
1088
0
                            continue;
1089
0
                        }
1090
1.04k
                        next_loc = instr->i_loc;
1091
1.04k
                        break;
1092
1.04k
                    }
1093
1.04k
                    if (lineno == next_loc.lineno) {
1094
7
                        continue;
1095
7
                    }
1096
1.04k
                }
1097
1.04k
            }
1098
1099
7.30k
        }
1100
1.78M
        if (dest != src) {
1101
169k
            bb->b_instr[dest] = bb->b_instr[src];
1102
169k
        }
1103
1.78M
        dest++;
1104
1.78M
        prev_lineno = lineno;
1105
1.78M
    }
1106
264k
    assert(dest <= bb->b_iused);
1107
264k
    int num_removed = bb->b_iused - dest;
1108
264k
    bb->b_iused = dest;
1109
264k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1110
264k
    return num_removed;
1111
264k
}
1112
1113
static int
1114
23.9k
remove_redundant_nops(cfg_builder *g) {
1115
23.9k
    int changes = 0;
1116
240k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1117
217k
        int change = basicblock_remove_redundant_nops(b);
1118
217k
        RETURN_IF_ERROR(change);
1119
217k
        changes += change;
1120
217k
    }
1121
23.9k
    return changes;
1122
23.9k
}
1123
1124
static int
1125
remove_redundant_nops_and_pairs(basicblock *entryblock)
1126
6.90k
{
1127
6.90k
    bool done = false;
1128
1129
13.8k
    while (! done) {
1130
6.90k
        done = true;
1131
6.90k
        cfg_instr *prev_instr = NULL;
1132
6.90k
        cfg_instr *instr = NULL;
1133
54.8k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1134
47.9k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1135
47.9k
            if (IS_LABEL(b->b_label)) {
1136
                /* this block is a jump target, forget instr */
1137
20.8k
                instr = NULL;
1138
20.8k
            }
1139
395k
            for (int i = 0; i < b->b_iused; i++) {
1140
347k
                prev_instr = instr;
1141
347k
                instr = &b->b_instr[i];
1142
347k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1143
347k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1144
347k
                int opcode = instr->i_opcode;
1145
347k
                bool is_redundant_pair = false;
1146
347k
                if (opcode == POP_TOP) {
1147
8.21k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1148
0
                       is_redundant_pair = true;
1149
0
                   }
1150
8.21k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
8.21k
                }
1154
347k
                if (is_redundant_pair) {
1155
0
                    INSTR_SET_OP0(prev_instr, NOP);
1156
0
                    INSTR_SET_OP0(instr, NOP);
1157
0
                    done = false;
1158
0
                }
1159
347k
            }
1160
47.9k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1161
34.6k
                instr = NULL;
1162
34.6k
            }
1163
47.9k
        }
1164
6.90k
    }
1165
6.90k
    return SUCCESS;
1166
6.90k
}
1167
1168
static int
1169
17.0k
remove_redundant_jumps(cfg_builder *g) {
1170
    /* If a non-empty block ends with a jump instruction, check if the next
1171
     * non-empty block reached through normal flow control is the target
1172
     * of that jump. If it is, then the jump instruction is redundant and
1173
     * can be deleted.
1174
     *
1175
     * Return the number of changes applied, or -1 on error.
1176
     */
1177
1178
17.0k
    int changes = 0;
1179
186k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1180
169k
        cfg_instr *last = basicblock_last_instr(b);
1181
169k
        if (last == NULL) {
1182
23.4k
            continue;
1183
23.4k
        }
1184
169k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1185
145k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1186
20.9k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1187
20.9k
            if (jump_target == NULL) {
1188
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1189
0
                return ERROR;
1190
0
            }
1191
20.9k
            basicblock *next = next_nonempty_block(b->b_next);
1192
20.9k
            if (jump_target == next) {
1193
699
                changes++;
1194
699
                INSTR_SET_OP0(last, NOP);
1195
699
            }
1196
20.9k
        }
1197
145k
    }
1198
1199
17.0k
    return changes;
1200
17.0k
}
1201
1202
static inline bool
1203
58.2k
basicblock_has_no_lineno(basicblock *b) {
1204
69.3k
    for (int i = 0; i < b->b_iused; i++) {
1205
63.8k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1206
52.6k
            return false;
1207
52.6k
        }
1208
63.8k
    }
1209
5.52k
    return true;
1210
58.2k
}
1211
1212
/* Maximum size of basic block that should be copied in optimizer */
1213
2.36k
#define MAX_COPY_SIZE 4
1214
1215
/* If this block ends with an unconditional jump to a small exit block or
1216
 * a block that has no line numbers (and no fallthrough), then
1217
 * remove the jump and extend this block with the target.
1218
 * Returns 1 if extended, 0 if no change, and -1 on error.
1219
 */
1220
static int
1221
74.3k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1222
74.3k
    cfg_instr *last = basicblock_last_instr(bb);
1223
74.3k
    if (last == NULL) {
1224
0
        return 0;
1225
0
    }
1226
74.3k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1227
61.0k
        return 0;
1228
61.0k
    }
1229
13.3k
    basicblock *target = last->i_target;
1230
13.3k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1231
2.36k
                             target->b_iused <= MAX_COPY_SIZE);
1232
13.3k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1233
2.31k
                                     !BB_HAS_FALLTHROUGH(target));
1234
13.3k
    if (small_exit_block || no_lineno_no_fallthrough) {
1235
2.72k
        assert(is_jump(last));
1236
2.72k
        int removed_jump_opcode = last->i_opcode;
1237
2.72k
        INSTR_SET_OP0(last, NOP);
1238
2.72k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1239
2.72k
        if (no_lineno_no_fallthrough) {
1240
2.31k
            last = basicblock_last_instr(bb);
1241
2.31k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1242
1.43k
                removed_jump_opcode == JUMP)
1243
59
            {
1244
                /* Make sure we don't lose eval breaker checks */
1245
59
                last->i_opcode = JUMP;
1246
59
            }
1247
2.31k
        }
1248
2.72k
        target->b_predecessors--;
1249
2.72k
        return 1;
1250
2.72k
    }
1251
10.6k
    return 0;
1252
13.3k
}
1253
1254
static int
1255
6.90k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1256
6.90k
    bool changes;
1257
8.05k
    do {
1258
8.05k
        changes = false;
1259
82.4k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1260
74.3k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1261
74.3k
            RETURN_IF_ERROR(res);
1262
74.3k
            if (res) {
1263
2.72k
                changes = true;
1264
2.72k
            }
1265
74.3k
        }
1266
8.05k
    } while(changes); /* every change removes a jump, ensuring convergence */
1267
6.90k
    return changes;
1268
6.90k
}
1269
1270
// Attempt to eliminate jumps to jumps by updating inst to jump to
1271
// target->i_target using the provided opcode. Return whether or not the
1272
// optimization was successful.
1273
static bool
1274
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1275
767
{
1276
767
    assert(is_jump(inst));
1277
767
    assert(is_jump(target));
1278
767
    assert(inst == basicblock_last_instr(bb));
1279
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1280
    // changes (and we fall into an infinite loop):
1281
767
    if (inst->i_target != target->i_target) {
1282
        /* Change inst to NOP and append a jump to target->i_target. The
1283
         * NOP will be removed later if it's not needed for the lineno.
1284
         */
1285
767
        INSTR_SET_OP0(inst, NOP);
1286
1287
767
        RETURN_IF_ERROR(
1288
767
            basicblock_add_jump(
1289
767
                bb, opcode, target->i_target, target->i_loc));
1290
1291
767
        return true;
1292
767
    }
1293
0
    return false;
1294
767
}
1295
1296
static int
1297
loads_const(int opcode)
1298
19.0k
{
1299
19.0k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1300
19.0k
}
1301
1302
/* Returns new reference */
1303
static PyObject*
1304
get_const_value(int opcode, int oparg, PyObject *co_consts)
1305
73.7k
{
1306
73.7k
    PyObject *constant = NULL;
1307
73.7k
    assert(loads_const(opcode));
1308
73.7k
    if (opcode == LOAD_CONST) {
1309
72.9k
        constant = PyList_GET_ITEM(co_consts, oparg);
1310
72.9k
    }
1311
73.7k
    if (opcode == LOAD_SMALL_INT) {
1312
811
        return PyLong_FromLong(oparg);
1313
811
    }
1314
1315
72.9k
    if (constant == NULL) {
1316
0
        PyErr_SetString(PyExc_SystemError,
1317
0
                        "Internal error: failed to get value of a constant");
1318
0
        return NULL;
1319
0
    }
1320
72.9k
    return Py_NewRef(constant);
1321
72.9k
}
1322
1323
// Steals a reference to newconst.
1324
static int
1325
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1326
2.34k
{
1327
2.34k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1328
0
        Py_DECREF(newconst);
1329
0
        return -1;
1330
0
    }
1331
1332
2.34k
    Py_ssize_t index;
1333
101k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1334
99.9k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1335
683
            break;
1336
683
        }
1337
99.9k
    }
1338
2.34k
    if (index == PyList_GET_SIZE(consts)) {
1339
1.65k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1340
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1341
0
            Py_DECREF(newconst);
1342
0
            return -1;
1343
0
        }
1344
1.65k
        if (PyList_Append(consts, newconst)) {
1345
0
            Py_DECREF(newconst);
1346
0
            return -1;
1347
0
        }
1348
1.65k
    }
1349
2.34k
    Py_DECREF(newconst);
1350
2.34k
    return (int)index;
1351
2.34k
}
1352
1353
/*
1354
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1355
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1356
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1357
1358
  Return boolean indicating whether "size" such instructions were found.
1359
*/
1360
static bool
1361
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1362
12.6k
{
1363
12.6k
    assert(start < bb->b_iused);
1364
12.6k
    assert(size >= 0);
1365
12.6k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1366
1367
21.3k
    for (; start >= 0 && size > 0; start--) {
1368
19.0k
        cfg_instr *instr = &bb->b_instr[start];
1369
19.0k
        if (instr->i_opcode == NOP) {
1370
273
            continue;
1371
273
        }
1372
18.8k
        if (!loads_const(instr->i_opcode)) {
1373
10.3k
            return false;
1374
10.3k
        }
1375
8.46k
        instrs[--size] = instr;
1376
8.46k
    }
1377
1378
2.26k
    return size == 0;
1379
12.6k
}
1380
1381
/*
1382
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1383
  Caller must make sure "instrs" has at least "size" elements.
1384
*/
1385
static void
1386
nop_out(cfg_instr **instrs, int size)
1387
2.40k
{
1388
7.27k
    for (int i = 0; i < size; i++) {
1389
4.87k
        cfg_instr *instr = instrs[i];
1390
4.87k
        assert(instr->i_opcode != NOP);
1391
4.87k
        INSTR_SET_OP0(instr, NOP);
1392
4.87k
        INSTR_SET_LOC(instr, NO_LOCATION);
1393
4.87k
    }
1394
2.40k
}
1395
1396
/* Does not steal reference to "newconst".
1397
   Return 1 if changed instruction to LOAD_SMALL_INT.
1398
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1399
   Return -1 on error.
1400
*/
1401
static int
1402
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1403
                               PyObject *consts, PyObject *const_cache)
1404
69.2k
{
1405
69.2k
    if (PyLong_CheckExact(newconst)) {
1406
24.3k
        int overflow;
1407
24.3k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1408
24.3k
        if (val == -1 && PyErr_Occurred()) {
1409
0
            return -1;
1410
0
        }
1411
24.3k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1412
19.0k
            assert(_Py_IsImmortal(newconst));
1413
19.0k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1414
19.0k
            return 1;
1415
19.0k
        }
1416
24.3k
    }
1417
50.2k
    return 0;
1418
69.2k
}
1419
1420
1421
/* Steals reference to "newconst" */
1422
static int
1423
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1424
                      PyObject *consts, PyObject *const_cache)
1425
2.03k
{
1426
2.03k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1427
2.03k
    if (res < 0) {
1428
0
        Py_DECREF(newconst);
1429
0
        return ERROR;
1430
0
    }
1431
2.03k
    if (res > 0) {
1432
9
        return SUCCESS;
1433
9
    }
1434
2.03k
    int oparg = add_const(newconst, consts, const_cache);
1435
2.03k
    RETURN_IF_ERROR(oparg);
1436
2.03k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1437
2.03k
    return SUCCESS;
1438
2.03k
}
1439
1440
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1441
   with    LOAD_CONST (c1, c2, ... cn).
1442
   The consts table must still be in list form so that the
1443
   new constant (c1, c2, ... cn) can be appended.
1444
   Called with codestr pointing to the first LOAD_CONST.
1445
*/
1446
static int
1447
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1448
4.09k
{
1449
    /* Pre-conditions */
1450
4.09k
    assert(PyDict_CheckExact(const_cache));
1451
4.09k
    assert(PyList_CheckExact(consts));
1452
1453
4.09k
    cfg_instr *instr = &bb->b_instr[i];
1454
4.09k
    assert(instr->i_opcode == BUILD_TUPLE);
1455
1456
4.09k
    int seq_size = instr->i_oparg;
1457
4.09k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1458
0
        return SUCCESS;
1459
0
    }
1460
1461
4.09k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1462
4.09k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1463
        /* not a const sequence */
1464
2.65k
        return SUCCESS;
1465
2.65k
    }
1466
1467
1.44k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1468
1.44k
    if (const_tuple == NULL) {
1469
0
        return ERROR;
1470
0
    }
1471
1472
3.93k
    for (int i = 0; i < seq_size; i++) {
1473
2.49k
        cfg_instr *inst = const_instrs[i];
1474
2.49k
        assert(loads_const(inst->i_opcode));
1475
2.49k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1476
2.49k
        if (element == NULL) {
1477
0
            Py_DECREF(const_tuple);
1478
0
            return ERROR;
1479
0
        }
1480
2.49k
        PyTuple_SET_ITEM(const_tuple, i, element);
1481
2.49k
    }
1482
1483
1.44k
    nop_out(const_instrs, seq_size);
1484
1.44k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1485
1.44k
}
1486
1487
/* Replace:
1488
    BUILD_LIST 0
1489
    LOAD_CONST c1
1490
    LIST_APPEND 1
1491
    LOAD_CONST c2
1492
    LIST_APPEND 1
1493
    ...
1494
    LOAD_CONST cN
1495
    LIST_APPEND 1
1496
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1497
   with:
1498
    LOAD_CONST (c1, c2, ... cN)
1499
*/
1500
static int
1501
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1502
                                      PyObject *consts, PyObject *const_cache)
1503
115
{
1504
115
    assert(PyDict_CheckExact(const_cache));
1505
115
    assert(PyList_CheckExact(consts));
1506
115
    assert(i >= 0);
1507
115
    assert(i < bb->b_iused);
1508
1509
115
    cfg_instr *intrinsic = &bb->b_instr[i];
1510
115
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1511
115
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1512
1513
115
    int consts_found = 0;
1514
115
    bool expect_append = true;
1515
1516
298
    for (int pos = i - 1; pos >= 0; pos--) {
1517
298
        cfg_instr *instr = &bb->b_instr[pos];
1518
298
        int opcode = instr->i_opcode;
1519
298
        int oparg = instr->i_oparg;
1520
1521
298
        if (opcode == NOP) {
1522
0
            continue;
1523
0
        }
1524
1525
298
        if (opcode == BUILD_LIST && oparg == 0) {
1526
2
            if (!expect_append) {
1527
                /* Not a sequence start. */
1528
0
                return SUCCESS;
1529
0
            }
1530
1531
            /* Sequence start, we are done. */
1532
2
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1533
2
            if (newconst == NULL) {
1534
0
                return ERROR;
1535
0
            }
1536
1537
184
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1538
182
                instr = &bb->b_instr[newpos];
1539
182
                if (instr->i_opcode == NOP) {
1540
0
                    continue;
1541
0
                }
1542
182
                if (loads_const(instr->i_opcode)) {
1543
90
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1544
90
                    if (constant == NULL) {
1545
0
                        Py_DECREF(newconst);
1546
0
                        return ERROR;
1547
0
                    }
1548
90
                    assert(consts_found > 0);
1549
90
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1550
90
                }
1551
182
                nop_out(&instr, 1);
1552
182
            }
1553
2
            assert(consts_found == 0);
1554
2
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1555
2
        }
1556
1557
296
        if (expect_append) {
1558
204
            if (opcode != LIST_APPEND || oparg != 1) {
1559
112
                return SUCCESS;
1560
112
            }
1561
204
        }
1562
92
        else {
1563
92
            if (!loads_const(opcode)) {
1564
1
                return SUCCESS;
1565
1
            }
1566
91
            consts_found++;
1567
91
        }
1568
1569
183
        expect_append = !expect_append;
1570
183
    }
1571
1572
    /* Did not find sequence start. */
1573
0
    return SUCCESS;
1574
115
}
1575
1576
2.89k
#define MIN_CONST_SEQUENCE_SIZE 3
1577
/*
1578
Optimize lists and sets for:
1579
    1. "for" loop, comprehension or "in"/"not in" tests:
1580
           Change literal list or set of constants into constant
1581
           tuple or frozenset respectively. Change list of
1582
           non-constants into tuple.
1583
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1584
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1585
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1586
           or BUILD_SET & SET_UPDATE respectively.
1587
*/
1588
static int
1589
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1590
                        PyObject *consts, PyObject *const_cache)
1591
1.44k
{
1592
1.44k
    assert(PyDict_CheckExact(const_cache));
1593
1.44k
    assert(PyList_CheckExact(consts));
1594
1595
1.44k
    cfg_instr *instr = &bb->b_instr[i];
1596
1.44k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1597
1598
1.44k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1599
1.44k
    int seq_size = instr->i_oparg;
1600
1.44k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1601
1.44k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1602
1.17k
    {
1603
1.17k
        return SUCCESS;
1604
1.17k
    }
1605
1606
270
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1607
270
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1608
88
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1609
            /* iterate over a tuple instead of list */
1610
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1611
0
        }
1612
88
        return SUCCESS;
1613
88
    }
1614
1615
182
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1616
182
    if (const_result == NULL) {
1617
0
        return ERROR;
1618
0
    }
1619
1620
1.74k
    for (int i = 0; i < seq_size; i++) {
1621
1.55k
        cfg_instr *inst = const_instrs[i];
1622
1.55k
        assert(loads_const(inst->i_opcode));
1623
1.55k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1624
1.55k
        if (element == NULL) {
1625
0
            Py_DECREF(const_result);
1626
0
            return ERROR;
1627
0
        }
1628
1.55k
        PyTuple_SET_ITEM(const_result, i, element);
1629
1.55k
    }
1630
1631
182
    if (instr->i_opcode == BUILD_SET) {
1632
55
        PyObject *frozenset = PyFrozenSet_New(const_result);
1633
55
        if (frozenset == NULL) {
1634
0
            Py_DECREF(const_result);
1635
0
            return ERROR;
1636
0
        }
1637
55
        Py_SETREF(const_result, frozenset);
1638
55
    }
1639
1640
182
    int index = add_const(const_result, consts, const_cache);
1641
182
    RETURN_IF_ERROR(index);
1642
182
    nop_out(const_instrs, seq_size);
1643
1644
182
    if (contains_or_iter) {
1645
55
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1646
55
    }
1647
127
    else {
1648
127
        assert(i >= 2);
1649
127
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1650
1651
127
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1652
1653
127
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1654
127
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1655
127
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1656
127
    }
1657
182
    return SUCCESS;
1658
182
}
1659
1660
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1661
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1662
 * Used to avoid creating constants which are slow to hash.
1663
 */
1664
static Py_ssize_t
1665
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1666
0
{
1667
0
    if (PyTuple_Check(obj)) {
1668
0
        Py_ssize_t i;
1669
0
        limit -= PyTuple_GET_SIZE(obj);
1670
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1671
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1672
0
            if (limit < 0) {
1673
0
                return limit;
1674
0
            }
1675
0
        }
1676
0
    }
1677
0
    return limit;
1678
0
}
1679
1680
38
#define MAX_INT_SIZE           128  /* bits */
1681
0
#define MAX_COLLECTION_SIZE    256  /* items */
1682
12
#define MAX_STR_SIZE          4096  /* characters */
1683
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1684
1685
static PyObject *
1686
const_folding_safe_multiply(PyObject *v, PyObject *w)
1687
27
{
1688
27
    if (PyLong_Check(v) && PyLong_Check(w) &&
1689
3
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1690
27
    ) {
1691
3
        int64_t vbits = _PyLong_NumBits(v);
1692
3
        int64_t wbits = _PyLong_NumBits(w);
1693
3
        assert(vbits >= 0);
1694
3
        assert(wbits >= 0);
1695
3
        if (vbits + wbits > MAX_INT_SIZE) {
1696
0
            return NULL;
1697
0
        }
1698
3
    }
1699
24
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1700
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1701
0
        if (size) {
1702
0
            long n = PyLong_AsLong(v);
1703
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1704
0
                return NULL;
1705
0
            }
1706
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1707
0
                return NULL;
1708
0
            }
1709
0
        }
1710
0
    }
1711
24
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1712
12
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1713
12
                                               PyBytes_GET_SIZE(w);
1714
12
        if (size) {
1715
12
            long n = PyLong_AsLong(v);
1716
12
            if (n < 0 || n > MAX_STR_SIZE / size) {
1717
4
                return NULL;
1718
4
            }
1719
12
        }
1720
12
    }
1721
12
    else if (PyLong_Check(w) &&
1722
12
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1723
12
    {
1724
12
        return const_folding_safe_multiply(w, v);
1725
12
    }
1726
1727
11
    return PyNumber_Multiply(v, w);
1728
27
}
1729
1730
static PyObject *
1731
const_folding_safe_power(PyObject *v, PyObject *w)
1732
8
{
1733
8
    if (PyLong_Check(v) && PyLong_Check(w) &&
1734
8
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1735
8
    ) {
1736
8
        int64_t vbits = _PyLong_NumBits(v);
1737
8
        size_t wbits = PyLong_AsSize_t(w);
1738
8
        assert(vbits >= 0);
1739
8
        if (wbits == (size_t)-1) {
1740
0
            return NULL;
1741
0
        }
1742
8
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1743
0
            return NULL;
1744
0
        }
1745
8
    }
1746
1747
8
    return PyNumber_Power(v, w, Py_None);
1748
8
}
1749
1750
static PyObject *
1751
const_folding_safe_lshift(PyObject *v, PyObject *w)
1752
9
{
1753
9
    if (PyLong_Check(v) && PyLong_Check(w) &&
1754
9
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1755
9
    ) {
1756
9
        int64_t vbits = _PyLong_NumBits(v);
1757
9
        size_t wbits = PyLong_AsSize_t(w);
1758
9
        assert(vbits >= 0);
1759
9
        if (wbits == (size_t)-1) {
1760
0
            return NULL;
1761
0
        }
1762
9
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1763
0
            return NULL;
1764
0
        }
1765
9
    }
1766
1767
9
    return PyNumber_Lshift(v, w);
1768
9
}
1769
1770
static PyObject *
1771
const_folding_safe_mod(PyObject *v, PyObject *w)
1772
0
{
1773
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1774
0
        return NULL;
1775
0
    }
1776
1777
0
    return PyNumber_Remainder(v, w);
1778
0
}
1779
1780
static PyObject *
1781
eval_const_binop(PyObject *left, int op, PyObject *right)
1782
47
{
1783
47
    assert(left != NULL && right != NULL);
1784
47
    assert(op >= 0 && op <= NB_OPARG_LAST);
1785
1786
47
    PyObject *result = NULL;
1787
47
    switch (op) {
1788
5
        case NB_ADD:
1789
5
            result = PyNumber_Add(left, right);
1790
5
            break;
1791
0
        case NB_SUBTRACT:
1792
0
            result = PyNumber_Subtract(left, right);
1793
0
            break;
1794
15
        case NB_MULTIPLY:
1795
15
            result = const_folding_safe_multiply(left, right);
1796
15
            break;
1797
1
        case NB_TRUE_DIVIDE:
1798
1
            result = PyNumber_TrueDivide(left, right);
1799
1
            break;
1800
0
        case NB_FLOOR_DIVIDE:
1801
0
            result = PyNumber_FloorDivide(left, right);
1802
0
            break;
1803
0
        case NB_REMAINDER:
1804
0
            result = const_folding_safe_mod(left, right);
1805
0
            break;
1806
8
        case NB_POWER:
1807
8
            result = const_folding_safe_power(left, right);
1808
8
            break;
1809
9
        case NB_LSHIFT:
1810
9
            result = const_folding_safe_lshift(left, right);
1811
9
            break;
1812
0
        case NB_RSHIFT:
1813
0
            result = PyNumber_Rshift(left, right);
1814
0
            break;
1815
0
        case NB_OR:
1816
0
            result = PyNumber_Or(left, right);
1817
0
            break;
1818
0
        case NB_XOR:
1819
0
            result = PyNumber_Xor(left, right);
1820
0
            break;
1821
0
        case NB_AND:
1822
0
            result = PyNumber_And(left, right);
1823
0
            break;
1824
9
        case NB_SUBSCR:
1825
9
            result = PyObject_GetItem(left, right);
1826
9
            break;
1827
0
        case NB_MATRIX_MULTIPLY:
1828
            // No builtin constants implement matrix multiplication
1829
0
            break;
1830
0
        default:
1831
0
            Py_UNREACHABLE();
1832
47
    }
1833
47
    return result;
1834
47
}
1835
1836
static int
1837
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1838
7.52k
{
1839
7.56k
    #define BINOP_OPERAND_COUNT 2
1840
7.52k
    assert(PyDict_CheckExact(const_cache));
1841
7.52k
    assert(PyList_CheckExact(consts));
1842
1843
7.52k
    cfg_instr *binop = &bb->b_instr[i];
1844
7.52k
    assert(binop->i_opcode == BINARY_OP);
1845
1846
7.52k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1847
7.52k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1848
        /* not a const sequence */
1849
7.47k
        return SUCCESS;
1850
7.47k
    }
1851
1852
47
    cfg_instr *lhs_instr = operands_instrs[0];
1853
47
    assert(loads_const(lhs_instr->i_opcode));
1854
47
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1855
47
    if (lhs == NULL) {
1856
0
        return ERROR;
1857
0
    }
1858
1859
47
    cfg_instr *rhs_instr = operands_instrs[1];
1860
47
    assert(loads_const(rhs_instr->i_opcode));
1861
47
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1862
47
    if (rhs == NULL) {
1863
0
        Py_DECREF(lhs);
1864
0
        return ERROR;
1865
0
    }
1866
1867
47
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1868
47
    Py_DECREF(lhs);
1869
47
    Py_DECREF(rhs);
1870
47
    if (newconst == NULL) {
1871
4
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1872
0
            return ERROR;
1873
0
        }
1874
4
        PyErr_Clear();
1875
4
        return SUCCESS;
1876
4
    }
1877
1878
43
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1879
43
    return instr_make_load_const(binop, newconst, consts, const_cache);
1880
47
}
1881
1882
static PyObject *
1883
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1884
551
{
1885
551
    assert(operand != NULL);
1886
551
    assert(
1887
551
        opcode == UNARY_NEGATIVE ||
1888
551
        opcode == UNARY_INVERT ||
1889
551
        opcode == UNARY_NOT ||
1890
551
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1891
551
    );
1892
551
    PyObject *result;
1893
551
    switch (opcode) {
1894
551
        case UNARY_NEGATIVE:
1895
551
            result = PyNumber_Negative(operand);
1896
551
            break;
1897
0
        case UNARY_INVERT:
1898
            // XXX: This should be removed once the ~bool depreciation expires.
1899
0
            if (PyBool_Check(operand)) {
1900
0
                return NULL;
1901
0
            }
1902
0
            result = PyNumber_Invert(operand);
1903
0
            break;
1904
0
        case UNARY_NOT: {
1905
0
            int r = PyObject_IsTrue(operand);
1906
0
            if (r < 0) {
1907
0
                return NULL;
1908
0
            }
1909
0
            result = PyBool_FromLong(!r);
1910
0
            break;
1911
0
        }
1912
0
        case CALL_INTRINSIC_1:
1913
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1914
0
                Py_UNREACHABLE();
1915
0
            }
1916
0
            result = PyNumber_Positive(operand);
1917
0
            break;
1918
0
        default:
1919
0
            Py_UNREACHABLE();
1920
551
    }
1921
551
    return result;
1922
551
}
1923
1924
static int
1925
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1926
739
{
1927
1.29k
    #define UNARYOP_OPERAND_COUNT 1
1928
739
    assert(PyDict_CheckExact(const_cache));
1929
739
    assert(PyList_CheckExact(consts));
1930
739
    cfg_instr *unaryop = &bb->b_instr[i];
1931
1932
739
    cfg_instr *operand_instr;
1933
739
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1934
        /* not a const */
1935
188
        return SUCCESS;
1936
188
    }
1937
1938
739
    assert(loads_const(operand_instr->i_opcode));
1939
551
    PyObject *operand = get_const_value(
1940
551
        operand_instr->i_opcode,
1941
551
        operand_instr->i_oparg,
1942
551
        consts
1943
551
    );
1944
551
    if (operand == NULL) {
1945
0
        return ERROR;
1946
0
    }
1947
1948
551
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1949
551
    Py_DECREF(operand);
1950
551
    if (newconst == NULL) {
1951
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1952
0
            return ERROR;
1953
0
        }
1954
0
        PyErr_Clear();
1955
0
        return SUCCESS;
1956
0
    }
1957
1958
551
    if (unaryop->i_opcode == UNARY_NOT) {
1959
0
        assert(PyBool_Check(newconst));
1960
0
    }
1961
551
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1962
551
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1963
551
}
1964
1965
1.95k
#define VISITED (-1)
1966
1967
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1968
// same effect.
1969
static int
1970
swaptimize(basicblock *block, int *ix)
1971
1.85k
{
1972
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1973
    // for this function. Make sure to blow away cached *.pyc files first!
1974
1.85k
    assert(*ix < block->b_iused);
1975
1.85k
    cfg_instr *instructions = &block->b_instr[*ix];
1976
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1977
    // maximum depth of the stack manipulations:
1978
1.85k
    assert(instructions[0].i_opcode == SWAP);
1979
1.85k
    int depth = instructions[0].i_oparg;
1980
1.85k
    int len = 0;
1981
1.85k
    int more = false;
1982
1.85k
    int limit = block->b_iused - *ix;
1983
2.07k
    while (++len < limit) {
1984
2.07k
        int opcode = instructions[len].i_opcode;
1985
2.07k
        if (opcode == SWAP) {
1986
150
            depth = Py_MAX(depth, instructions[len].i_oparg);
1987
150
            more = true;
1988
150
        }
1989
1.92k
        else if (opcode != NOP) {
1990
1.85k
            break;
1991
1.85k
        }
1992
2.07k
    }
1993
    // It's already optimal if there's only one SWAP:
1994
1.85k
    if (!more) {
1995
1.70k
        return SUCCESS;
1996
1.70k
    }
1997
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
1998
150
    int *stack = PyMem_Malloc(depth * sizeof(int));
1999
150
    if (stack == NULL) {
2000
0
        PyErr_NoMemory();
2001
0
        return ERROR;
2002
0
    }
2003
600
    for (int i = 0; i < depth; i++) {
2004
450
        stack[i] = i;
2005
450
    }
2006
    // Simulate the combined effect of these instructions by "running" them on
2007
    // our "stack":
2008
450
    for (int i = 0; i < len; i++) {
2009
300
        if (instructions[i].i_opcode == SWAP) {
2010
300
            int oparg = instructions[i].i_oparg;
2011
300
            int top = stack[0];
2012
            // SWAPs are 1-indexed:
2013
300
            stack[0] = stack[oparg - 1];
2014
300
            stack[oparg - 1] = top;
2015
300
        }
2016
300
    }
2017
    // Now we can begin! Our approach here is based on a solution to a closely
2018
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2019
    // think of this algorithm as determining the steps needed to efficiently
2020
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2021
    // though, we can efficiently *shuffle* it! For this reason, we will be
2022
    // replacing instructions starting from the *end* of the run. Since the
2023
    // solution is optimal, we don't need to worry about running out of space:
2024
150
    int current = len - 1;
2025
600
    for (int i = 0; i < depth; i++) {
2026
        // Skip items that have already been visited, or just happen to be in
2027
        // the correct location:
2028
450
        if (stack[i] == VISITED || stack[i] == i) {
2029
300
            continue;
2030
300
        }
2031
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2032
        // with other items; traversing the cycle and swapping each item with
2033
        // the next will put them all in the correct place. The weird
2034
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2035
        // can only swap from that position:
2036
150
        int j = i;
2037
600
        while (true) {
2038
            // Skip the actual swap if our item is zero, since swapping the top
2039
            // item with itself is pointless:
2040
600
            if (j) {
2041
300
                assert(0 <= current);
2042
                // SWAPs are 1-indexed:
2043
300
                instructions[current].i_opcode = SWAP;
2044
300
                instructions[current--].i_oparg = j + 1;
2045
300
            }
2046
600
            if (stack[j] == VISITED) {
2047
                // Completed the cycle:
2048
150
                assert(j == i);
2049
150
                break;
2050
150
            }
2051
450
            int next_j = stack[j];
2052
450
            stack[j] = VISITED;
2053
450
            j = next_j;
2054
450
        }
2055
150
    }
2056
    // NOP out any unused instructions:
2057
150
    while (0 <= current) {
2058
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2059
0
    }
2060
150
    PyMem_Free(stack);
2061
150
    *ix += len - 1;
2062
150
    return SUCCESS;
2063
150
}
2064
2065
2066
// This list is pretty small, since it's only okay to reorder opcodes that:
2067
// - can't affect control flow (like jumping or raising exceptions)
2068
// - can't invoke arbitrary code (besides finalizers)
2069
// - only touch the TOS (and pop it when finished)
2070
#define SWAPPABLE(opcode) \
2071
3.10k
    ((opcode) == STORE_FAST || \
2072
3.10k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2073
3.10k
     (opcode) == POP_TOP)
2074
2075
#define STORES_TO(instr) \
2076
445
    (((instr).i_opcode == STORE_FAST || \
2077
445
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2078
445
     ? (instr).i_oparg : -1)
2079
2080
static int
2081
next_swappable_instruction(basicblock *block, int i, int lineno)
2082
3.04k
{
2083
3.12k
    while (++i < block->b_iused) {
2084
3.10k
        cfg_instr *instruction = &block->b_instr[i];
2085
3.10k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2086
            // Optimizing across this instruction could cause user-visible
2087
            // changes in the names bound between line tracing events!
2088
12
            return -1;
2089
12
        }
2090
3.09k
        if (instruction->i_opcode == NOP) {
2091
75
            continue;
2092
75
        }
2093
3.01k
        if (SWAPPABLE(instruction->i_opcode)) {
2094
1.40k
            return i;
2095
1.40k
        }
2096
1.61k
        return -1;
2097
3.01k
    }
2098
20
    return -1;
2099
3.04k
}
2100
2101
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2102
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2103
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2104
static void
2105
apply_static_swaps(basicblock *block, int i)
2106
1.85k
{
2107
    // SWAPs are to our left, and potential swaperands are to our right:
2108
2.14k
    for (; 0 <= i; i--) {
2109
2.02k
        assert(i < block->b_iused);
2110
2.02k
        cfg_instr *swap = &block->b_instr[i];
2111
2.02k
        if (swap->i_opcode != SWAP) {
2112
172
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2113
                // Nope, but we know how to handle these. Keep looking:
2114
86
                continue;
2115
86
            }
2116
            // We can't reason about what this instruction does. Bail:
2117
86
            return;
2118
172
        }
2119
1.85k
        int j = next_swappable_instruction(block, i, -1);
2120
1.85k
        if (j < 0) {
2121
893
            return;
2122
893
        }
2123
961
        int k = j;
2124
961
        int lineno = block->b_instr[j].i_loc.lineno;
2125
1.40k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2126
1.19k
            k = next_swappable_instruction(block, k, lineno);
2127
1.19k
            if (k < 0) {
2128
756
                return;
2129
756
            }
2130
1.19k
        }
2131
        // The reordering is not safe if the two instructions to be swapped
2132
        // store to the same location, or if any intervening instruction stores
2133
        // to the same location as either of them.
2134
205
        int store_j = STORES_TO(block->b_instr[j]);
2135
205
        int store_k = STORES_TO(block->b_instr[k]);
2136
205
        if (store_j >= 0 || store_k >= 0) {
2137
205
            if (store_j == store_k) {
2138
0
                return;
2139
0
            }
2140
240
            for (int idx = j + 1; idx < k; idx++) {
2141
35
                int store_idx = STORES_TO(block->b_instr[idx]);
2142
35
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2143
0
                    return;
2144
0
                }
2145
35
            }
2146
205
        }
2147
2148
        // Success!
2149
205
        INSTR_SET_OP0(swap, NOP);
2150
205
        cfg_instr temp = block->b_instr[j];
2151
205
        block->b_instr[j] = block->b_instr[k];
2152
205
        block->b_instr[k] = temp;
2153
205
    }
2154
1.85k
}
2155
2156
static int
2157
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2158
47.9k
{
2159
47.9k
    assert(PyDict_CheckExact(const_cache));
2160
47.9k
    assert(PyList_CheckExact(consts));
2161
47.9k
    int opcode = 0;
2162
47.9k
    int oparg = 0;
2163
425k
    for (int i = 0; i < bb->b_iused; i++) {
2164
377k
        cfg_instr *inst = &bb->b_instr[i];
2165
377k
        if (inst->i_opcode == LOAD_CONST) {
2166
67.1k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2167
67.1k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2168
67.1k
            Py_DECREF(constant);
2169
67.1k
            if (res < 0) {
2170
0
                return ERROR;
2171
0
            }
2172
67.1k
        }
2173
377k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2174
47.9k
                                      inst->i_opcode == COPY &&
2175
40
                                      inst->i_oparg == 1);
2176
377k
        if (! is_copy_of_load_const) {
2177
377k
            opcode = inst->i_opcode;
2178
377k
            oparg = inst->i_oparg;
2179
377k
        }
2180
377k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2181
377k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2182
310k
            continue;
2183
310k
        }
2184
67.2k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2185
67.2k
        switch(nextop) {
2186
127
            case POP_JUMP_IF_FALSE:
2187
130
            case POP_JUMP_IF_TRUE:
2188
130
            case JUMP_IF_FALSE:
2189
130
            case JUMP_IF_TRUE:
2190
130
            {
2191
                /* Remove LOAD_CONST const; conditional jump */
2192
130
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2193
130
                if (cnt == NULL) {
2194
0
                    return ERROR;
2195
0
                }
2196
130
                int is_true = PyObject_IsTrue(cnt);
2197
130
                Py_DECREF(cnt);
2198
130
                if (is_true == -1) {
2199
0
                    return ERROR;
2200
0
                }
2201
130
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2202
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2203
130
                    INSTR_SET_OP0(inst, NOP);
2204
130
                }
2205
130
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2206
130
                if (is_true == jump_if_true) {
2207
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2208
1
                }
2209
129
                else {
2210
129
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2211
129
                }
2212
130
                break;
2213
130
            }
2214
1.49k
            case IS_OP:
2215
1.49k
            {
2216
                // Fold to POP_JUMP_IF_NONE:
2217
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2218
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2219
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2220
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2221
                // Fold to POP_JUMP_IF_NOT_NONE:
2222
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2223
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2224
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2225
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2226
1.49k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2227
1.49k
                if (cnt == NULL) {
2228
0
                    return ERROR;
2229
0
                }
2230
1.49k
                if (!Py_IsNone(cnt)) {
2231
18
                    Py_DECREF(cnt);
2232
18
                    break;
2233
18
                }
2234
1.47k
                if (bb->b_iused <= i + 2) {
2235
8
                    break;
2236
8
                }
2237
1.46k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2238
1.46k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2239
                // Get rid of TO_BOOL regardless:
2240
1.46k
                if (jump_instr->i_opcode == TO_BOOL) {
2241
1.43k
                    INSTR_SET_OP0(jump_instr, NOP);
2242
1.43k
                    if (bb->b_iused <= i + 3) {
2243
0
                        break;
2244
0
                    }
2245
1.43k
                    jump_instr = &bb->b_instr[i + 3];
2246
1.43k
                }
2247
1.46k
                bool invert = is_instr->i_oparg;
2248
1.46k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2249
1.38k
                    invert = !invert;
2250
1.38k
                }
2251
77
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2252
26
                    break;
2253
26
                }
2254
1.43k
                INSTR_SET_OP0(inst, NOP);
2255
1.43k
                INSTR_SET_OP0(is_instr, NOP);
2256
1.43k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2257
1.43k
                                              : POP_JUMP_IF_NONE;
2258
1.43k
                break;
2259
1.46k
            }
2260
130
            case TO_BOOL:
2261
130
            {
2262
130
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2263
130
                if (cnt == NULL) {
2264
0
                    return ERROR;
2265
0
                }
2266
130
                int is_true = PyObject_IsTrue(cnt);
2267
130
                Py_DECREF(cnt);
2268
130
                if (is_true == -1) {
2269
0
                    return ERROR;
2270
0
                }
2271
130
                cnt = PyBool_FromLong(is_true);
2272
130
                int index = add_const(cnt, consts, const_cache);
2273
130
                if (index < 0) {
2274
0
                    return ERROR;
2275
0
                }
2276
130
                INSTR_SET_OP0(inst, NOP);
2277
130
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2278
130
                break;
2279
130
            }
2280
67.2k
        }
2281
67.2k
    }
2282
47.9k
    return SUCCESS;
2283
47.9k
}
2284
2285
static int
2286
6.90k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2287
54.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2288
47.9k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2289
47.9k
    }
2290
6.90k
    return SUCCESS;
2291
6.90k
}
2292
2293
static int
2294
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2295
47.9k
{
2296
47.9k
    assert(PyDict_CheckExact(const_cache));
2297
47.9k
    assert(PyList_CheckExact(consts));
2298
47.9k
    cfg_instr nop;
2299
47.9k
    INSTR_SET_OP0(&nop, NOP);
2300
427k
    for (int i = 0; i < bb->b_iused; i++) {
2301
379k
        cfg_instr *inst = &bb->b_instr[i];
2302
379k
        cfg_instr *target;
2303
379k
        int opcode = inst->i_opcode;
2304
379k
        int oparg = inst->i_oparg;
2305
379k
        if (HAS_TARGET(opcode)) {
2306
23.9k
            assert(inst->i_target->b_iused > 0);
2307
23.9k
            target = &inst->i_target->b_instr[0];
2308
23.9k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2309
23.9k
        }
2310
355k
        else {
2311
355k
            target = &nop;
2312
355k
        }
2313
379k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2314
379k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2315
379k
        switch (opcode) {
2316
            /* Try to fold tuples of constants.
2317
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2318
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2319
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2320
4.19k
            case BUILD_TUPLE:
2321
4.19k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2322
104
                    switch(oparg) {
2323
0
                        case 1:
2324
0
                            INSTR_SET_OP0(inst, NOP);
2325
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2326
0
                            continue;
2327
87
                        case 2:
2328
97
                        case 3:
2329
97
                            INSTR_SET_OP0(inst, NOP);
2330
97
                            bb->b_instr[i+1].i_opcode = SWAP;
2331
97
                            continue;
2332
104
                    }
2333
104
                }
2334
4.09k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2335
4.09k
                break;
2336
1.33k
            case BUILD_LIST:
2337
1.44k
            case BUILD_SET:
2338
1.44k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2339
1.44k
                break;
2340
770
            case POP_JUMP_IF_NOT_NONE:
2341
1.50k
            case POP_JUMP_IF_NONE:
2342
1.50k
                switch (target->i_opcode) {
2343
69
                    case JUMP:
2344
69
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2345
1.50k
                }
2346
1.50k
                break;
2347
9.23k
            case POP_JUMP_IF_FALSE:
2348
9.23k
                switch (target->i_opcode) {
2349
641
                    case JUMP:
2350
641
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2351
9.23k
                }
2352
9.23k
                break;
2353
9.23k
            case POP_JUMP_IF_TRUE:
2354
1.77k
                switch (target->i_opcode) {
2355
57
                    case JUMP:
2356
57
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2357
1.77k
                }
2358
1.77k
                break;
2359
1.77k
            case JUMP_IF_FALSE:
2360
318
                switch (target->i_opcode) {
2361
0
                    case JUMP:
2362
0
                    case JUMP_IF_FALSE:
2363
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2364
0
                        continue;
2365
3
                    case JUMP_IF_TRUE:
2366
                        // No need to check for loops here, a block's b_next
2367
                        // cannot point to itself.
2368
3
                        assert(inst->i_target != inst->i_target->b_next);
2369
3
                        inst->i_target = inst->i_target->b_next;
2370
3
                        i--;
2371
3
                        continue;
2372
318
                }
2373
315
                break;
2374
315
            case JUMP_IF_TRUE:
2375
280
                switch (target->i_opcode) {
2376
0
                    case JUMP:
2377
0
                    case JUMP_IF_TRUE:
2378
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2379
0
                        continue;
2380
6
                    case JUMP_IF_FALSE:
2381
                        // No need to check for loops here, a block's b_next
2382
                        // cannot point to itself.
2383
6
                        assert(inst->i_target != inst->i_target->b_next);
2384
6
                        inst->i_target = inst->i_target->b_next;
2385
6
                        i--;
2386
6
                        continue;
2387
280
                }
2388
274
                break;
2389
3.78k
            case JUMP:
2390
6.25k
            case JUMP_NO_INTERRUPT:
2391
6.25k
                switch (target->i_opcode) {
2392
0
                    case JUMP:
2393
0
                        i -= jump_thread(bb, inst, target, JUMP);
2394
0
                        continue;
2395
0
                    case JUMP_NO_INTERRUPT:
2396
0
                        i -= jump_thread(bb, inst, target, opcode);
2397
0
                        continue;
2398
6.25k
                }
2399
6.25k
                break;
2400
6.25k
            case FOR_ITER:
2401
1.71k
                if (target->i_opcode == JUMP) {
2402
                    /* This will not work now because the jump (at target) could
2403
                     * be forward or backward and FOR_ITER only jumps forward. We
2404
                     * can re-enable this if ever we implement a backward version
2405
                     * of FOR_ITER.
2406
                     */
2407
                    /*
2408
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2409
                    */
2410
0
                }
2411
1.71k
                break;
2412
13.8k
            case STORE_FAST:
2413
13.8k
                if (opcode == nextop &&
2414
1.61k
                    oparg == bb->b_instr[i+1].i_oparg &&
2415
27
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2416
27
                    bb->b_instr[i].i_opcode = POP_TOP;
2417
27
                    bb->b_instr[i].i_oparg = 0;
2418
27
                }
2419
13.8k
                break;
2420
2.00k
            case SWAP:
2421
2.00k
                if (oparg == 1) {
2422
0
                    INSTR_SET_OP0(inst, NOP);
2423
0
                }
2424
2.00k
                break;
2425
17.2k
            case LOAD_GLOBAL:
2426
17.2k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2427
8.61k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2428
8.61k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2429
8.61k
                }
2430
17.2k
                break;
2431
5.96k
            case COMPARE_OP:
2432
5.96k
                if (nextop == TO_BOOL) {
2433
2.73k
                    INSTR_SET_OP0(inst, NOP);
2434
2.73k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2435
2.73k
                    continue;
2436
2.73k
                }
2437
3.22k
                break;
2438
3.22k
            case CONTAINS_OP:
2439
4.20k
            case IS_OP:
2440
4.20k
                if (nextop == TO_BOOL) {
2441
1.95k
                    INSTR_SET_OP0(inst, NOP);
2442
1.95k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2443
1.95k
                    continue;
2444
1.95k
                }
2445
2.24k
                if (nextop == UNARY_NOT) {
2446
0
                    INSTR_SET_OP0(inst, NOP);
2447
0
                    int inverted = oparg ^ 1;
2448
0
                    assert(inverted == 0 || inverted == 1);
2449
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2450
0
                    continue;
2451
0
                }
2452
2.24k
                break;
2453
4.73k
            case TO_BOOL:
2454
4.73k
                if (nextop == TO_BOOL) {
2455
0
                    INSTR_SET_OP0(inst, NOP);
2456
0
                    continue;
2457
0
                }
2458
4.73k
                break;
2459
4.73k
            case UNARY_NOT:
2460
97
                if (nextop == TO_BOOL) {
2461
0
                    INSTR_SET_OP0(inst, NOP);
2462
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2463
0
                    continue;
2464
0
                }
2465
97
                if (nextop == UNARY_NOT) {
2466
0
                    INSTR_SET_OP0(inst, NOP);
2467
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2468
0
                    continue;
2469
0
                }
2470
97
                _Py_FALLTHROUGH;
2471
139
            case UNARY_INVERT:
2472
737
            case UNARY_NEGATIVE:
2473
737
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2474
737
                break;
2475
520
            case CALL_INTRINSIC_1:
2476
520
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2477
115
                    if (nextop == GET_ITER) {
2478
0
                        INSTR_SET_OP0(inst, NOP);
2479
0
                    }
2480
115
                    else {
2481
115
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2482
115
                    }
2483
115
                }
2484
405
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2485
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2486
2
                }
2487
520
                break;
2488
7.52k
            case BINARY_OP:
2489
7.52k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2490
7.52k
                break;
2491
379k
        }
2492
379k
    }
2493
2494
426k
    for (int i = 0; i < bb->b_iused; i++) {
2495
378k
        cfg_instr *inst = &bb->b_instr[i];
2496
378k
        if (inst->i_opcode == SWAP) {
2497
1.85k
            if (swaptimize(bb, &i) < 0) {
2498
0
                goto error;
2499
0
            }
2500
1.85k
            apply_static_swaps(bb, i);
2501
1.85k
        }
2502
378k
    }
2503
47.9k
    return SUCCESS;
2504
0
error:
2505
0
    return ERROR;
2506
47.9k
}
2507
2508
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2509
2510
static int
2511
remove_redundant_nops_and_jumps(cfg_builder *g)
2512
14.9k
{
2513
14.9k
    int removed_nops, removed_jumps;
2514
17.0k
    do {
2515
        /* Convergence is guaranteed because the number of
2516
         * redundant jumps and nops only decreases.
2517
         */
2518
17.0k
        removed_nops = remove_redundant_nops(g);
2519
17.0k
        RETURN_IF_ERROR(removed_nops);
2520
17.0k
        removed_jumps = remove_redundant_jumps(g);
2521
17.0k
        RETURN_IF_ERROR(removed_jumps);
2522
17.0k
    } while(removed_nops + removed_jumps > 0);
2523
14.9k
    return SUCCESS;
2524
14.9k
}
2525
2526
/* Perform optimizations on a control flow graph.
2527
   The consts object should still be in list form to allow new constants
2528
   to be appended.
2529
2530
   Code trasnformations that reduce code size initially fill the gaps with
2531
   NOPs.  Later those NOPs are removed.
2532
*/
2533
static int
2534
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2535
6.90k
{
2536
6.90k
    assert(PyDict_CheckExact(const_cache));
2537
6.90k
    RETURN_IF_ERROR(check_cfg(g));
2538
6.90k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2539
6.90k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2540
6.90k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2541
6.90k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2542
54.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2543
47.9k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2544
47.9k
    }
2545
6.90k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2546
6.90k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2547
6.90k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2548
6.90k
    assert(no_redundant_jumps(g));
2549
6.90k
    return SUCCESS;
2550
6.90k
}
2551
2552
static void
2553
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2554
12.9k
{
2555
12.9k
    int32_t line1 = inst1->i_loc.lineno;
2556
12.9k
    int32_t line2 = inst2->i_loc.lineno;
2557
    /* Skip if instructions are on different lines */
2558
12.9k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2559
4.73k
        return;
2560
4.73k
    }
2561
8.24k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2562
845
        return;
2563
845
    }
2564
7.39k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2565
7.39k
    INSTR_SET_OP0(inst2, NOP);
2566
7.39k
}
2567
2568
static int
2569
insert_superinstructions(cfg_builder *g)
2570
6.90k
{
2571
54.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2572
2573
394k
        for (int i = 0; i < b->b_iused; i++) {
2574
346k
            cfg_instr *inst = &b->b_instr[i];
2575
346k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2576
346k
            switch(inst->i_opcode) {
2577
44.7k
                case LOAD_FAST:
2578
44.7k
                    if (nextop == LOAD_FAST) {
2579
6.85k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2580
6.85k
                    }
2581
44.7k
                    break;
2582
12.6k
                case STORE_FAST:
2583
12.6k
                    switch (nextop) {
2584
4.78k
                        case LOAD_FAST:
2585
4.78k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2586
4.78k
                            break;
2587
1.33k
                        case STORE_FAST:
2588
1.33k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2589
1.33k
                            break;
2590
12.6k
                    }
2591
12.6k
                    break;
2592
346k
            }
2593
346k
        }
2594
47.9k
    }
2595
6.90k
    int res = remove_redundant_nops(g);
2596
6.90k
    assert(no_redundant_nops(g));
2597
6.90k
    return res;
2598
6.90k
}
2599
2600
#define NOT_LOCAL -1
2601
13.3k
#define DUMMY_INSTR -1
2602
2603
typedef struct {
2604
    // Index of instruction that produced the reference or DUMMY_INSTR.
2605
    int instr;
2606
2607
    // The local to which the reference refers or NOT_LOCAL.
2608
    int local;
2609
} ref;
2610
2611
typedef struct {
2612
    ref *refs;
2613
    Py_ssize_t size;
2614
    Py_ssize_t capacity;
2615
} ref_stack;
2616
2617
static int
2618
ref_stack_push(ref_stack *stack, ref r)
2619
291k
{
2620
291k
    if (stack->size == stack->capacity) {
2621
6.90k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2622
6.90k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2623
6.90k
        if (refs == NULL) {
2624
0
            PyErr_NoMemory();
2625
0
            return -1;
2626
0
        }
2627
6.90k
        stack->refs = refs;
2628
6.90k
        stack->capacity = new_cap;
2629
6.90k
    }
2630
291k
    stack->refs[stack->size] = r;
2631
291k
    stack->size++;
2632
291k
    return 0;
2633
291k
}
2634
2635
static ref
2636
ref_stack_pop(ref_stack *stack)
2637
250k
{
2638
250k
    assert(stack->size > 0);
2639
250k
    stack->size--;
2640
250k
    ref r = stack->refs[stack->size];
2641
250k
    return r;
2642
250k
}
2643
2644
static void
2645
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2646
1.20k
{
2647
1.20k
    Py_ssize_t idx = stack->size - off;
2648
1.20k
    assert(idx >= 0 && idx < stack->size);
2649
1.20k
    ref tmp = stack->refs[idx];
2650
1.20k
    stack->refs[idx] = stack->refs[stack->size - 1];
2651
1.20k
    stack->refs[stack->size - 1] = tmp;
2652
1.20k
}
2653
2654
static ref
2655
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2656
55.7k
{
2657
55.7k
    assert(idx >= 0 && idx < stack->size);
2658
55.7k
    return stack->refs[idx];
2659
55.7k
}
2660
2661
static void
2662
ref_stack_clear(ref_stack *stack)
2663
35.6k
{
2664
35.6k
    stack->size = 0;
2665
35.6k
}
2666
2667
static void
2668
ref_stack_fini(ref_stack *stack)
2669
6.90k
{
2670
6.90k
    if (stack->refs != NULL) {
2671
6.90k
        PyMem_Free(stack->refs);
2672
6.90k
    }
2673
6.90k
    stack->refs = NULL;
2674
6.90k
    stack->capacity = 0;
2675
6.90k
    stack->size = 0;
2676
6.90k
}
2677
2678
typedef enum {
2679
    // The loaded reference is still on the stack when the local is killed
2680
    SUPPORT_KILLED  = 1,
2681
    // The loaded reference is stored into a local
2682
    STORED_AS_LOCAL = 2,
2683
    // The loaded reference is still on the stack at the end of the basic block
2684
    REF_UNCONSUMED  = 4,
2685
} LoadFastInstrFlag;
2686
2687
static void
2688
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2689
13.6k
{
2690
26.9k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2691
13.2k
        ref r = ref_stack_at(refs, i);
2692
13.2k
        if (r.local == local) {
2693
12
            assert(r.instr >= 0);
2694
12
            instr_flags[r.instr] |= SUPPORT_KILLED;
2695
12
        }
2696
13.2k
    }
2697
13.6k
}
2698
2699
static void
2700
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2701
13.3k
{
2702
13.3k
    kill_local(instr_flags, refs, local);
2703
13.3k
    if (r.instr != DUMMY_INSTR) {
2704
11.5k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2705
11.5k
    }
2706
13.3k
}
2707
2708
static void
2709
load_fast_push_block(basicblock ***sp, basicblock *target,
2710
                     Py_ssize_t start_depth)
2711
37.6k
{
2712
37.6k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2713
37.6k
    if (!target->b_visited) {
2714
28.7k
        target->b_visited = 1;
2715
28.7k
        *(*sp)++ = target;
2716
28.7k
    }
2717
37.6k
}
2718
2719
/*
2720
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2721
 * load borrowed references onto the operand stack.
2722
 *
2723
 * This is only safe when we can prove that the reference in the frame outlives
2724
 * the borrowed reference produced by the instruction. We make this tractable
2725
 * by enforcing the following lifetimes:
2726
 *
2727
 * 1. Borrowed references loaded onto the operand stack live until the end of
2728
 *    the instruction that consumes them from the stack. Any borrowed
2729
 *    references that would escape into the heap (e.g. into frame objects or
2730
 *    generators) are converted into new, strong references.
2731
 *
2732
 * 2. Locals live until they are either killed by an instruction
2733
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2734
 *    via `f_locals` is added to a tuple owned by the frame object.
2735
 *
2736
 * To simplify the problem of detecting which supporting references in the
2737
 * frame are killed by instructions that overwrite locals, we only allow
2738
 * borrowed references to be stored as a local in the frame if they were passed
2739
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2740
 * strong references.
2741
 *
2742
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2743
 * that meet the following criteria:
2744
 *
2745
 * 1. The produced reference must be consumed from the stack before the
2746
 *    supporting reference in the frame is killed.
2747
 *
2748
 * 2. The produced reference cannot be stored as a local.
2749
 *
2750
 * We use abstract interpretation to identify instructions that meet these
2751
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2752
 * stack of abstract references and note any instructions that violate the
2753
 * criteria above. Once we've processed all the instructions in a block, any
2754
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2755
 */
2756
static int
2757
optimize_load_fast(cfg_builder *g)
2758
6.90k
{
2759
6.90k
    int status;
2760
6.90k
    ref_stack refs = {0};
2761
6.90k
    int max_instrs = 0;
2762
6.90k
    basicblock *entryblock = g->g_entryblock;
2763
55.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2764
48.8k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2765
48.8k
    }
2766
6.90k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2767
6.90k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2768
6.90k
    if (instr_flags == NULL) {
2769
0
        PyErr_NoMemory();
2770
0
        return ERROR;
2771
0
    }
2772
6.90k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2773
6.90k
    if (blocks == NULL) {
2774
0
        status = ERROR;
2775
0
        goto done;
2776
0
    }
2777
6.90k
    basicblock **sp = blocks;
2778
6.90k
    *sp = entryblock;
2779
6.90k
    sp++;
2780
6.90k
    entryblock->b_startdepth = 0;
2781
6.90k
    entryblock->b_visited = 1;
2782
2783
6.90k
    #define PUSH_REF(instr, local)                \
2784
291k
        do {                                      \
2785
291k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2786
0
                status = ERROR;                   \
2787
0
                goto done;                        \
2788
0
            }                                     \
2789
291k
        } while(0)
2790
2791
42.5k
    while (sp != blocks) {
2792
35.6k
        basicblock *block = *--sp;
2793
35.6k
        assert(block->b_startdepth > -1);
2794
2795
        // Reset per-block state.
2796
35.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2797
2798
        // Reset the stack of refs. We don't track references on the stack
2799
        // across basic blocks, but the bytecode will expect their
2800
        // presence. Add dummy references as necessary.
2801
35.6k
        ref_stack_clear(&refs);
2802
70.0k
        for (int i = 0; i < block->b_startdepth; i++) {
2803
34.3k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2804
34.3k
        }
2805
2806
365k
        for (int i = 0; i < block->b_iused; i++) {
2807
329k
            cfg_instr *instr = &block->b_instr[i];
2808
329k
            int opcode = instr->i_opcode;
2809
329k
            int oparg = instr->i_oparg;
2810
329k
            assert(opcode != EXTENDED_ARG);
2811
329k
            switch (opcode) {
2812
                // Opcodes that load and store locals
2813
2
                case DELETE_FAST: {
2814
2
                    kill_local(instr_flags, &refs, oparg);
2815
2
                    break;
2816
0
                }
2817
2818
39.1k
                case LOAD_FAST: {
2819
39.1k
                    PUSH_REF(i, oparg);
2820
39.1k
                    break;
2821
39.1k
                }
2822
2823
39.1k
                case LOAD_FAST_AND_CLEAR: {
2824
319
                    kill_local(instr_flags, &refs, oparg);
2825
319
                    PUSH_REF(i, oparg);
2826
319
                    break;
2827
319
                }
2828
2829
5.87k
                case LOAD_FAST_LOAD_FAST: {
2830
5.87k
                    PUSH_REF(i, oparg >> 4);
2831
5.87k
                    PUSH_REF(i, oparg & 15);
2832
5.87k
                    break;
2833
5.87k
                }
2834
2835
10.8k
                case STORE_FAST: {
2836
10.8k
                    ref r = ref_stack_pop(&refs);
2837
10.8k
                    store_local(instr_flags, &refs, oparg, r);
2838
10.8k
                    break;
2839
5.87k
                }
2840
2841
221
                case STORE_FAST_LOAD_FAST: {
2842
                    // STORE_FAST
2843
221
                    ref r = ref_stack_pop(&refs);
2844
221
                    store_local(instr_flags, &refs, oparg >> 4, r);
2845
                    // LOAD_FAST
2846
221
                    PUSH_REF(i, oparg & 15);
2847
221
                    break;
2848
221
                }
2849
2850
1.12k
                case STORE_FAST_STORE_FAST: {
2851
                    // STORE_FAST
2852
1.12k
                    ref r = ref_stack_pop(&refs);
2853
1.12k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2854
                    // STORE_FAST
2855
1.12k
                    r = ref_stack_pop(&refs);
2856
1.12k
                    store_local(instr_flags, &refs, oparg & 15, r);
2857
1.12k
                    break;
2858
221
                }
2859
2860
                // Opcodes that shuffle values on the stack
2861
1.34k
                case COPY: {
2862
1.34k
                    assert(oparg > 0);
2863
1.34k
                    Py_ssize_t idx = refs.size - oparg;
2864
1.34k
                    ref r = ref_stack_at(&refs, idx);
2865
1.34k
                    PUSH_REF(r.instr, r.local);
2866
1.34k
                    break;
2867
1.34k
                }
2868
2869
1.34k
                case SWAP: {
2870
1.20k
                    assert(oparg >= 2);
2871
1.20k
                    ref_stack_swap_top(&refs, oparg);
2872
1.20k
                    break;
2873
1.34k
                }
2874
2875
                // We treat opcodes that do not consume all of their inputs on
2876
                // a case by case basis, as we have no generic way of knowing
2877
                // how many inputs should be left on the stack.
2878
2879
                // Opcodes that consume no inputs
2880
1.38k
                case FORMAT_SIMPLE:
2881
1.38k
                case GET_ANEXT:
2882
2.98k
                case GET_ITER:
2883
2.98k
                case GET_LEN:
2884
3.01k
                case GET_YIELD_FROM_ITER:
2885
3.62k
                case IMPORT_FROM:
2886
3.62k
                case MATCH_KEYS:
2887
3.62k
                case MATCH_MAPPING:
2888
3.62k
                case MATCH_SEQUENCE:
2889
3.62k
                case WITH_EXCEPT_START: {
2890
3.62k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2891
3.62k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2892
3.62k
                    int net_pushed = num_pushed - num_popped;
2893
3.62k
                    assert(net_pushed >= 0);
2894
5.83k
                    for (int j = 0; j < net_pushed; j++) {
2895
2.20k
                        PUSH_REF(i, NOT_LOCAL);
2896
2.20k
                    }
2897
3.62k
                    break;
2898
3.62k
                }
2899
2900
                // Opcodes that consume some inputs and push no new values
2901
3.62k
                case DICT_MERGE:
2902
806
                case DICT_UPDATE:
2903
1.37k
                case LIST_APPEND:
2904
1.59k
                case LIST_EXTEND:
2905
12.3k
                case MAP_ADD:
2906
12.3k
                case RERAISE:
2907
12.5k
                case SET_ADD:
2908
12.5k
                case SET_UPDATE: {
2909
12.5k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2910
12.5k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2911
12.5k
                    int net_popped = num_popped - num_pushed;
2912
12.5k
                    assert(net_popped > 0);
2913
35.7k
                    for (int i = 0; i < net_popped; i++) {
2914
23.2k
                        ref_stack_pop(&refs);
2915
23.2k
                    }
2916
12.5k
                    break;
2917
12.5k
                }
2918
2919
44
                case END_SEND:
2920
1.90k
                case SET_FUNCTION_ATTRIBUTE: {
2921
1.90k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2922
1.90k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2923
1.90k
                    ref tos = ref_stack_pop(&refs);
2924
1.90k
                    ref_stack_pop(&refs);
2925
1.90k
                    PUSH_REF(tos.instr, tos.local);
2926
1.90k
                    break;
2927
1.90k
                }
2928
2929
                // Opcodes that consume some inputs and push new values
2930
1.90k
                case CHECK_EXC_MATCH: {
2931
0
                    ref_stack_pop(&refs);
2932
0
                    PUSH_REF(i, NOT_LOCAL);
2933
0
                    break;
2934
0
                }
2935
2936
1.67k
                case FOR_ITER: {
2937
1.67k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2938
1.67k
                    PUSH_REF(i, NOT_LOCAL);
2939
1.67k
                    break;
2940
1.67k
                }
2941
2942
19.8k
                case LOAD_ATTR:
2943
20.0k
                case LOAD_SUPER_ATTR: {
2944
20.0k
                    ref self = ref_stack_pop(&refs);
2945
20.0k
                    if (opcode == LOAD_SUPER_ATTR) {
2946
207
                        ref_stack_pop(&refs);
2947
207
                        ref_stack_pop(&refs);
2948
207
                    }
2949
20.0k
                    PUSH_REF(i, NOT_LOCAL);
2950
20.0k
                    if (oparg & 1) {
2951
                        // A method call; conservatively assume that self is pushed
2952
                        // back onto the stack
2953
7.84k
                        PUSH_REF(self.instr, self.local);
2954
7.84k
                    }
2955
20.0k
                    break;
2956
20.0k
                }
2957
2958
20.0k
                case LOAD_SPECIAL:
2959
220
                case PUSH_EXC_INFO: {
2960
220
                    ref tos = ref_stack_pop(&refs);
2961
220
                    PUSH_REF(i, NOT_LOCAL);
2962
220
                    PUSH_REF(tos.instr, tos.local);
2963
220
                    break;
2964
220
                }
2965
2966
220
                case SEND: {
2967
44
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2968
44
                    ref_stack_pop(&refs);
2969
44
                    PUSH_REF(i, NOT_LOCAL);
2970
44
                    break;
2971
44
                }
2972
2973
                // Opcodes that consume all of their inputs
2974
229k
                default: {
2975
229k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2976
229k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2977
229k
                    if (HAS_TARGET(instr->i_opcode)) {
2978
16.1k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2979
16.1k
                    }
2980
229k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2981
                        // Block push opcodes only affect the stack when jumping
2982
                        // to the target.
2983
418k
                        for (int j = 0; j < num_popped; j++) {
2984
189k
                            ref_stack_pop(&refs);
2985
189k
                        }
2986
399k
                        for (int j = 0; j < num_pushed; j++) {
2987
170k
                            PUSH_REF(i, NOT_LOCAL);
2988
170k
                        }
2989
229k
                    }
2990
229k
                    break;
2991
229k
                }
2992
329k
            }
2993
329k
        }
2994
2995
        // Push fallthrough block
2996
35.6k
        if (BB_HAS_FALLTHROUGH(block)) {
2997
19.7k
            assert(block->b_next != NULL);
2998
19.7k
            load_fast_push_block(&sp, block->b_next, refs.size);
2999
19.7k
        }
3000
3001
        // Mark instructions that produce values that are on the stack at the
3002
        // end of the basic block
3003
76.7k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3004
41.1k
            ref r = ref_stack_at(&refs, i);
3005
41.1k
            if (r.instr != -1) {
3006
17.1k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3007
17.1k
            }
3008
41.1k
        }
3009
3010
        // Optimize instructions
3011
365k
        for (int i = 0; i < block->b_iused; i++) {
3012
329k
            if (!instr_flags[i]) {
3013
302k
                cfg_instr *instr = &block->b_instr[i];
3014
302k
                switch (instr->i_opcode) {
3015
37.7k
                    case LOAD_FAST:
3016
37.7k
                        instr->i_opcode = LOAD_FAST_BORROW;
3017
37.7k
                        break;
3018
5.80k
                    case LOAD_FAST_LOAD_FAST:
3019
5.80k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3020
5.80k
                        break;
3021
258k
                    default:
3022
258k
                        break;
3023
302k
                }
3024
302k
            }
3025
329k
        }
3026
35.6k
    }
3027
3028
6.90k
    #undef PUSH_REF
3029
3030
6.90k
    status = SUCCESS;
3031
3032
6.90k
done:
3033
6.90k
    ref_stack_fini(&refs);
3034
6.90k
    PyMem_Free(instr_flags);
3035
6.90k
    PyMem_Free(blocks);
3036
6.90k
    return status;
3037
6.90k
}
3038
3039
// helper functions for add_checks_for_loads_of_unknown_variables
3040
static inline void
3041
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3042
162k
{
3043
    // Push b if the unsafe mask is giving us any new information.
3044
    // To avoid overflowing the stack, only allow each block once.
3045
    // Use b->b_visited=1 to mean that b is currently on the stack.
3046
162k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3047
162k
    if (b->b_unsafe_locals_mask != both) {
3048
27.2k
        b->b_unsafe_locals_mask = both;
3049
        // More work left to do.
3050
27.2k
        if (!b->b_visited) {
3051
            // not on the stack, so push it.
3052
27.0k
            *(*sp)++ = b;
3053
27.0k
            b->b_visited = 1;
3054
27.0k
        }
3055
27.2k
    }
3056
162k
}
3057
3058
static void
3059
scan_block_for_locals(basicblock *b, basicblock ***sp)
3060
71.5k
{
3061
    // bit i is set if local i is potentially uninitialized
3062
71.5k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3063
497k
    for (int i = 0; i < b->b_iused; i++) {
3064
425k
        cfg_instr *instr = &b->b_instr[i];
3065
425k
        assert(instr->i_opcode != EXTENDED_ARG);
3066
425k
        if (instr->i_except != NULL) {
3067
84.1k
            maybe_push(instr->i_except, unsafe_mask, sp);
3068
84.1k
        }
3069
425k
        if (instr->i_oparg >= 64) {
3070
29.2k
            continue;
3071
29.2k
        }
3072
425k
        assert(instr->i_oparg >= 0);
3073
396k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3074
396k
        switch (instr->i_opcode) {
3075
345
            case DELETE_FAST:
3076
993
            case LOAD_FAST_AND_CLEAR:
3077
2.28k
            case STORE_FAST_MAYBE_NULL:
3078
2.28k
                unsafe_mask |= bit;
3079
2.28k
                break;
3080
26.7k
            case STORE_FAST:
3081
26.7k
                unsafe_mask &= ~bit;
3082
26.7k
                break;
3083
124
            case LOAD_FAST_CHECK:
3084
                // If this doesn't raise, then the local is defined.
3085
124
                unsafe_mask &= ~bit;
3086
124
                break;
3087
87.4k
            case LOAD_FAST:
3088
87.4k
                if (unsafe_mask & bit) {
3089
124
                    instr->i_opcode = LOAD_FAST_CHECK;
3090
124
                }
3091
87.4k
                unsafe_mask &= ~bit;
3092
87.4k
                break;
3093
396k
        }
3094
396k
    }
3095
71.5k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3096
38.1k
        maybe_push(b->b_next, unsafe_mask, sp);
3097
38.1k
    }
3098
71.5k
    cfg_instr *last = basicblock_last_instr(b);
3099
71.5k
    if (last && is_jump(last)) {
3100
35.0k
        assert(last->i_target != NULL);
3101
35.0k
        maybe_push(last->i_target, unsafe_mask, sp);
3102
35.0k
    }
3103
71.5k
}
3104
3105
static int
3106
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3107
0
{
3108
0
    assert(nlocals > 64);
3109
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3110
0
    if (states == NULL) {
3111
0
        PyErr_NoMemory();
3112
0
        return ERROR;
3113
0
    }
3114
0
    Py_ssize_t blocknum = 0;
3115
    // state[i - 64] == blocknum if local i is guaranteed to
3116
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3117
    // STORE_FAST within that basicblock (not followed by
3118
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3119
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3120
0
        blocknum++;
3121
0
        for (int i = 0; i < b->b_iused; i++) {
3122
0
            cfg_instr *instr = &b->b_instr[i];
3123
0
            assert(instr->i_opcode != EXTENDED_ARG);
3124
0
            int arg = instr->i_oparg;
3125
0
            if (arg < 64) {
3126
0
                continue;
3127
0
            }
3128
0
            assert(arg >= 0);
3129
0
            switch (instr->i_opcode) {
3130
0
                case DELETE_FAST:
3131
0
                case LOAD_FAST_AND_CLEAR:
3132
0
                case STORE_FAST_MAYBE_NULL:
3133
0
                    states[arg - 64] = blocknum - 1;
3134
0
                    break;
3135
0
                case STORE_FAST:
3136
0
                    states[arg - 64] = blocknum;
3137
0
                    break;
3138
0
                case LOAD_FAST:
3139
0
                    if (states[arg - 64] != blocknum) {
3140
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3141
0
                    }
3142
0
                    states[arg - 64] = blocknum;
3143
0
                    break;
3144
0
                    Py_UNREACHABLE();
3145
0
            }
3146
0
        }
3147
0
    }
3148
0
    PyMem_Free(states);
3149
0
    return SUCCESS;
3150
0
}
3151
3152
static int
3153
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3154
6.90k
{
3155
6.90k
    assert(PyList_CheckExact(consts));
3156
6.90k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3157
6.90k
    if (nconsts == 0) {
3158
0
        return SUCCESS;  /* nothing to do */
3159
0
    }
3160
3161
6.90k
    Py_ssize_t *index_map = NULL;
3162
6.90k
    Py_ssize_t *reverse_index_map = NULL;
3163
6.90k
    int err = ERROR;
3164
3165
6.90k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3166
6.90k
    if (index_map == NULL) {
3167
0
        goto end;
3168
0
    }
3169
51.9k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3170
45.0k
        index_map[i] = -1;
3171
45.0k
    }
3172
    // The first constant may be docstring; keep it always.
3173
6.90k
    index_map[0] = 0;
3174
3175
    /* mark used consts */
3176
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3177
394k
        for (int i = 0; i < b->b_iused; i++) {
3178
346k
            int opcode = b->b_instr[i].i_opcode;
3179
346k
            if (OPCODE_HAS_CONST(opcode)) {
3180
44.7k
                int index = b->b_instr[i].i_oparg;
3181
44.7k
                index_map[index] = index;
3182
44.7k
            }
3183
346k
        }
3184
47.9k
    }
3185
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3186
    /* condense consts */
3187
6.90k
    Py_ssize_t n_used_consts = 0;
3188
58.8k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3189
51.9k
        if (index_map[i] != -1) {
3190
38.6k
            assert(index_map[i] == i);
3191
38.6k
            index_map[n_used_consts++] = index_map[i];
3192
38.6k
        }
3193
51.9k
    }
3194
6.90k
    if (n_used_consts == nconsts) {
3195
        /* nothing to do */
3196
2.75k
        err = SUCCESS;
3197
2.75k
        goto end;
3198
2.75k
    }
3199
3200
    /* move all used consts to the beginning of the consts list */
3201
6.90k
    assert(n_used_consts < nconsts);
3202
35.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3203
30.8k
        Py_ssize_t old_index = index_map[i];
3204
30.8k
        assert(i <= old_index && old_index < nconsts);
3205
30.8k
        if (i != old_index) {
3206
22.8k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3207
22.8k
            assert(value != NULL);
3208
22.8k
            PyList_SetItem(consts, i, Py_NewRef(value));
3209
22.8k
        }
3210
30.8k
    }
3211
3212
    /* truncate the consts list at its new size */
3213
4.15k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3214
0
        goto end;
3215
0
    }
3216
    /* adjust const indices in the bytecode */
3217
4.15k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3218
4.15k
    if (reverse_index_map == NULL) {
3219
0
        goto end;
3220
0
    }
3221
48.3k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3222
44.2k
        reverse_index_map[i] = -1;
3223
44.2k
    }
3224
35.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3225
30.8k
        assert(index_map[i] != -1);
3226
30.8k
        assert(reverse_index_map[index_map[i]] == -1);
3227
30.8k
        reverse_index_map[index_map[i]] = i;
3228
30.8k
    }
3229
3230
38.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3231
306k
        for (int i = 0; i < b->b_iused; i++) {
3232
271k
            int opcode = b->b_instr[i].i_opcode;
3233
271k
            if (OPCODE_HAS_CONST(opcode)) {
3234
36.7k
                int index = b->b_instr[i].i_oparg;
3235
36.7k
                assert(reverse_index_map[index] >= 0);
3236
36.7k
                assert(reverse_index_map[index] < n_used_consts);
3237
36.7k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3238
36.7k
            }
3239
271k
        }
3240
34.7k
    }
3241
3242
4.15k
    err = SUCCESS;
3243
6.90k
end:
3244
6.90k
    PyMem_Free(index_map);
3245
6.90k
    PyMem_Free(reverse_index_map);
3246
6.90k
    return err;
3247
4.15k
}
3248
3249
3250
3251
static int
3252
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3253
                                                int nlocals,
3254
                                                int nparams)
3255
6.90k
{
3256
6.90k
    if (nlocals == 0) {
3257
1.66k
        return SUCCESS;
3258
1.66k
    }
3259
5.24k
    if (nlocals > 64) {
3260
        // To avoid O(nlocals**2) compilation, locals beyond the first
3261
        // 64 are only analyzed one basicblock at a time: initialization
3262
        // info is not passed between basicblocks.
3263
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3264
0
            return ERROR;
3265
0
        }
3266
0
        nlocals = 64;
3267
0
    }
3268
5.24k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3269
5.24k
    if (stack == NULL) {
3270
0
        return ERROR;
3271
0
    }
3272
5.24k
    basicblock **sp = stack;
3273
3274
    // First origin of being uninitialized:
3275
    // The non-parameter locals in the entry block.
3276
5.24k
    uint64_t start_mask = 0;
3277
13.4k
    for (int i = nparams; i < nlocals; i++) {
3278
8.21k
        start_mask |= (uint64_t)1 << i;
3279
8.21k
    }
3280
5.24k
    maybe_push(entryblock, start_mask, &sp);
3281
3282
    // Second origin of being uninitialized:
3283
    // There could be DELETE_FAST somewhere, so
3284
    // be sure to scan each basicblock at least once.
3285
49.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
44.4k
        scan_block_for_locals(b, &sp);
3287
44.4k
    }
3288
    // Now propagate the uncertainty from the origins we found: Use
3289
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3290
32.3k
    while (sp > stack) {
3291
27.0k
        basicblock *b = *--sp;
3292
        // mark as no longer on stack
3293
27.0k
        b->b_visited = 0;
3294
27.0k
        scan_block_for_locals(b, &sp);
3295
27.0k
    }
3296
5.24k
    PyMem_Free(stack);
3297
5.24k
    return SUCCESS;
3298
5.24k
}
3299
3300
3301
static int
3302
6.17k
mark_warm(basicblock *entryblock) {
3303
6.17k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3304
6.17k
    if (stack == NULL) {
3305
0
        return ERROR;
3306
0
    }
3307
6.17k
    basicblock **sp = stack;
3308
3309
6.17k
    *sp++ = entryblock;
3310
6.17k
    entryblock->b_visited = 1;
3311
40.0k
    while (sp > stack) {
3312
33.8k
        basicblock *b = *(--sp);
3313
33.8k
        assert(!b->b_except_handler);
3314
33.8k
        b->b_warm = 1;
3315
33.8k
        basicblock *next = b->b_next;
3316
33.8k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3317
16.3k
            *sp++ = next;
3318
16.3k
            next->b_visited = 1;
3319
16.3k
        }
3320
299k
        for (int i=0; i < b->b_iused; i++) {
3321
265k
            cfg_instr *instr = &b->b_instr[i];
3322
265k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3323
11.3k
                *sp++ = instr->i_target;
3324
11.3k
                instr->i_target->b_visited = 1;
3325
11.3k
            }
3326
265k
        }
3327
33.8k
    }
3328
6.17k
    PyMem_Free(stack);
3329
6.17k
    return SUCCESS;
3330
6.17k
}
3331
3332
static int
3333
6.17k
mark_cold(basicblock *entryblock) {
3334
53.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3335
47.2k
        assert(!b->b_cold && !b->b_warm);
3336
47.2k
    }
3337
6.17k
    if (mark_warm(entryblock) < 0) {
3338
0
        return ERROR;
3339
0
    }
3340
3341
6.17k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3342
6.17k
    if (stack == NULL) {
3343
0
        return ERROR;
3344
0
    }
3345
3346
6.17k
    basicblock **sp = stack;
3347
53.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3348
47.2k
        if (b->b_except_handler) {
3349
2.82k
            assert(!b->b_warm);
3350
2.82k
            *sp++ = b;
3351
2.82k
            b->b_visited = 1;
3352
2.82k
        }
3353
47.2k
    }
3354
3355
11.6k
    while (sp > stack) {
3356
5.52k
        basicblock *b = *(--sp);
3357
5.52k
        b->b_cold = 1;
3358
5.52k
        basicblock *next = b->b_next;
3359
5.52k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3360
1.49k
            if (!next->b_warm && !next->b_visited) {
3361
1.38k
                *sp++ = next;
3362
1.38k
                next->b_visited = 1;
3363
1.38k
            }
3364
1.49k
        }
3365
29.5k
        for (int i = 0; i < b->b_iused; i++) {
3366
24.0k
            cfg_instr *instr = &b->b_instr[i];
3367
24.0k
            if (is_jump(instr)) {
3368
1.89k
                assert(i == b->b_iused - 1);
3369
1.89k
                basicblock *target = b->b_instr[i].i_target;
3370
1.89k
                if (!target->b_warm && !target->b_visited) {
3371
1.30k
                    *sp++ = target;
3372
1.30k
                    target->b_visited = 1;
3373
1.30k
                }
3374
1.89k
            }
3375
24.0k
        }
3376
5.52k
    }
3377
6.17k
    PyMem_Free(stack);
3378
6.17k
    return SUCCESS;
3379
6.17k
}
3380
3381
3382
static int
3383
6.90k
push_cold_blocks_to_end(cfg_builder *g) {
3384
6.90k
    basicblock *entryblock = g->g_entryblock;
3385
6.90k
    if (entryblock->b_next == NULL) {
3386
        /* single basicblock, no need to reorder */
3387
735
        return SUCCESS;
3388
735
    }
3389
6.17k
    RETURN_IF_ERROR(mark_cold(entryblock));
3390
3391
6.17k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3392
3393
    /* If we have a cold block with fallthrough to a warm block, add */
3394
    /* an explicit jump instead of fallthrough */
3395
53.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3396
47.2k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3397
44
            basicblock *explicit_jump = cfg_builder_new_block(g);
3398
44
            if (explicit_jump == NULL) {
3399
0
                return ERROR;
3400
0
            }
3401
44
            if (!IS_LABEL(b->b_next->b_label)) {
3402
0
                b->b_next->b_label.id = next_lbl++;
3403
0
            }
3404
44
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3405
44
                             NO_LOCATION);
3406
44
            explicit_jump->b_cold = 1;
3407
44
            explicit_jump->b_next = b->b_next;
3408
44
            explicit_jump->b_predecessors = 1;
3409
44
            b->b_next = explicit_jump;
3410
3411
            /* set target */
3412
44
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3413
44
            last->i_target = explicit_jump->b_next;
3414
44
        }
3415
47.2k
    }
3416
3417
6.17k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3418
6.17k
    basicblock *cold_blocks = NULL;
3419
6.17k
    basicblock *cold_blocks_tail = NULL;
3420
3421
6.17k
    basicblock *b = entryblock;
3422
8.30k
    while(b->b_next) {
3423
8.30k
        assert(!b->b_cold);
3424
43.8k
        while (b->b_next && !b->b_next->b_cold) {
3425
35.5k
            b = b->b_next;
3426
35.5k
        }
3427
8.30k
        if (b->b_next == NULL) {
3428
            /* no more cold blocks */
3429
6.17k
            break;
3430
6.17k
        }
3431
3432
        /* b->b_next is the beginning of a cold streak */
3433
8.30k
        assert(!b->b_cold && b->b_next->b_cold);
3434
3435
2.13k
        basicblock *b_end = b->b_next;
3436
5.56k
        while (b_end->b_next && b_end->b_next->b_cold) {
3437
3.43k
            b_end = b_end->b_next;
3438
3.43k
        }
3439
3440
        /* b_end is the end of the cold streak */
3441
2.13k
        assert(b_end && b_end->b_cold);
3442
2.13k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3443
3444
2.13k
        if (cold_blocks == NULL) {
3445
1.14k
            cold_blocks = b->b_next;
3446
1.14k
        }
3447
985
        else {
3448
985
            cold_blocks_tail->b_next = b->b_next;
3449
985
        }
3450
2.13k
        cold_blocks_tail = b_end;
3451
2.13k
        b->b_next = b_end->b_next;
3452
2.13k
        b_end->b_next = NULL;
3453
2.13k
    }
3454
6.17k
    assert(b != NULL && b->b_next == NULL);
3455
6.17k
    b->b_next = cold_blocks;
3456
3457
6.17k
    if (cold_blocks != NULL) {
3458
1.14k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3459
1.14k
    }
3460
6.17k
    return SUCCESS;
3461
6.17k
}
3462
3463
static int
3464
convert_pseudo_conditional_jumps(cfg_builder *g)
3465
6.90k
{
3466
6.90k
    basicblock *entryblock = g->g_entryblock;
3467
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3468
386k
        for (int i = 0; i < b->b_iused; i++) {
3469
338k
            cfg_instr *instr = &b->b_instr[i];
3470
338k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3471
589
                assert(i == b->b_iused - 1);
3472
589
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3473
315
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3474
589
                location loc = instr->i_loc;
3475
589
                basicblock *except = instr->i_except;
3476
589
                cfg_instr copy = {
3477
589
                            .i_opcode = COPY,
3478
589
                            .i_oparg = 1,
3479
589
                            .i_loc = loc,
3480
589
                            .i_target = NULL,
3481
589
                            .i_except = except,
3482
589
                };
3483
589
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3484
589
                cfg_instr to_bool = {
3485
589
                            .i_opcode = TO_BOOL,
3486
589
                            .i_oparg = 0,
3487
589
                            .i_loc = loc,
3488
589
                            .i_target = NULL,
3489
589
                            .i_except = except,
3490
589
                };
3491
589
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3492
589
            }
3493
338k
        }
3494
47.9k
    }
3495
6.90k
    return SUCCESS;
3496
6.90k
}
3497
3498
static int
3499
convert_pseudo_ops(cfg_builder *g)
3500
6.90k
{
3501
6.90k
    basicblock *entryblock = g->g_entryblock;
3502
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3503
390k
        for (int i = 0; i < b->b_iused; i++) {
3504
342k
            cfg_instr *instr = &b->b_instr[i];
3505
342k
            if (is_block_push(instr)) {
3506
2.82k
                INSTR_SET_OP0(instr, NOP);
3507
2.82k
            }
3508
339k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3509
1.93k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3510
1.93k
                instr->i_opcode = LOAD_FAST;
3511
1.93k
            }
3512
337k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3513
648
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3514
648
                instr->i_opcode = STORE_FAST;
3515
648
            }
3516
342k
        }
3517
47.9k
    }
3518
6.90k
    return remove_redundant_nops_and_jumps(g);
3519
6.90k
}
3520
3521
static inline bool
3522
80.6k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3523
80.6k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3524
44.8k
        return basicblock_has_no_lineno(b);
3525
44.8k
    }
3526
35.8k
    else {
3527
35.8k
        return false;
3528
35.8k
    }
3529
80.6k
}
3530
3531
3532
/* PEP 626 mandates that the f_lineno of a frame is correct
3533
 * after a frame terminates. It would be prohibitively expensive
3534
 * to continuously update the f_lineno field at runtime,
3535
 * so we make sure that all exiting instruction (raises and returns)
3536
 * have a valid line number, allowing us to compute f_lineno lazily.
3537
 * We can do this by duplicating the exit blocks without line number
3538
 * so that none have more than one predecessor. We can then safely
3539
 * copy the line number from the sole predecessor block.
3540
 */
3541
static int
3542
duplicate_exits_without_lineno(cfg_builder *g)
3543
13.8k
{
3544
13.8k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3545
3546
    /* Copy all exit blocks without line number that are targets of a jump.
3547
     */
3548
13.8k
    basicblock *entryblock = g->g_entryblock;
3549
109k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3550
95.9k
        cfg_instr *last = basicblock_last_instr(b);
3551
95.9k
        if (last == NULL) {
3552
15.0k
            continue;
3553
15.0k
        }
3554
80.9k
        if (is_jump(last)) {
3555
39.5k
            basicblock *target = next_nonempty_block(last->i_target);
3556
39.5k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3557
648
                basicblock *new_target = copy_basicblock(g, target);
3558
648
                if (new_target == NULL) {
3559
0
                    return ERROR;
3560
0
                }
3561
648
                new_target->b_instr[0].i_loc = last->i_loc;
3562
648
                last->i_target = new_target;
3563
648
                target->b_predecessors--;
3564
648
                new_target->b_predecessors = 1;
3565
648
                new_target->b_next = target->b_next;
3566
648
                new_target->b_label.id = next_lbl++;
3567
648
                target->b_next = new_target;
3568
648
            }
3569
39.5k
        }
3570
80.9k
    }
3571
3572
    /* Any remaining reachable exit blocks without line number can only be reached by
3573
     * fall through, and thus can only have a single predecessor */
3574
109k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3575
95.9k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3576
41.1k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3577
829
                cfg_instr *last = basicblock_last_instr(b);
3578
829
                assert(last != NULL);
3579
829
                b->b_next->b_instr[0].i_loc = last->i_loc;
3580
829
            }
3581
41.1k
        }
3582
95.9k
    }
3583
13.8k
    return SUCCESS;
3584
13.8k
}
3585
3586
3587
/* If an instruction has no line number, but it's predecessor in the BB does,
3588
 * then copy the line number. If a successor block has no line number, and only
3589
 * one predecessor, then inherit the line number.
3590
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3591
 * Also reduces the size of the line number table,
3592
 * but has no impact on the generated line number events.
3593
 */
3594
3595
static inline void
3596
maybe_propagate_location(basicblock *b, int i, location loc)
3597
763k
{
3598
763k
    assert(b->b_iused > i);
3599
763k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3600
44.3k
         b->b_instr[i].i_loc = loc;
3601
44.3k
    }
3602
763k
}
3603
3604
static void
3605
propagate_line_numbers(basicblock *entryblock)
3606
13.8k
{
3607
109k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3608
95.9k
        cfg_instr *last = basicblock_last_instr(b);
3609
95.9k
        if (last == NULL) {
3610
15.0k
            continue;
3611
15.0k
        }
3612
3613
80.9k
        location prev_location = NO_LOCATION;
3614
796k
        for (int i = 0; i < b->b_iused; i++) {
3615
716k
            maybe_propagate_location(b, i, prev_location);
3616
716k
            prev_location = b->b_instr[i].i_loc;
3617
716k
        }
3618
80.9k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3619
29.6k
            if (b->b_next->b_iused > 0) {
3620
29.6k
                maybe_propagate_location(b->b_next, 0, prev_location);
3621
29.6k
            }
3622
29.6k
        }
3623
80.9k
        if (is_jump(last)) {
3624
39.5k
            basicblock *target = last->i_target;
3625
39.5k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3626
2
                target = target->b_next;
3627
2
            }
3628
39.5k
            if (target->b_predecessors == 1) {
3629
18.2k
                maybe_propagate_location(target, 0, prev_location);
3630
18.2k
            }
3631
39.5k
        }
3632
80.9k
    }
3633
13.8k
}
3634
3635
static int
3636
resolve_line_numbers(cfg_builder *g, int firstlineno)
3637
13.8k
{
3638
13.8k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3639
13.8k
    propagate_line_numbers(g->g_entryblock);
3640
13.8k
    return SUCCESS;
3641
13.8k
}
3642
3643
int
3644
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3645
                        int nlocals, int nparams, int firstlineno)
3646
6.90k
{
3647
6.90k
    assert(cfg_builder_check(g));
3648
    /** Preprocessing **/
3649
    /* Map labels to targets and mark exception handlers */
3650
6.90k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3651
6.90k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3652
6.90k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3653
3654
    /** Optimization **/
3655
6.90k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3656
6.90k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3657
6.90k
    RETURN_IF_ERROR(
3658
6.90k
        add_checks_for_loads_of_uninitialized_variables(
3659
6.90k
            g->g_entryblock, nlocals, nparams));
3660
6.90k
    RETURN_IF_ERROR(insert_superinstructions(g));
3661
3662
6.90k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3663
6.90k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3664
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3665
    // assert(all_exits_have_lineno(g->g_entryblock));
3666
6.90k
    return SUCCESS;
3667
6.90k
}
3668
3669
static int *
3670
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3671
6.90k
{
3672
6.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3673
6.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3674
6.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3675
3676
6.90k
    int noffsets = ncellvars + nfreevars;
3677
6.90k
    int *fixed = PyMem_New(int, noffsets);
3678
6.90k
    if (fixed == NULL) {
3679
0
        PyErr_NoMemory();
3680
0
        return NULL;
3681
0
    }
3682
9.50k
    for (int i = 0; i < noffsets; i++) {
3683
2.59k
        fixed[i] = nlocals + i;
3684
2.59k
    }
3685
3686
6.90k
    PyObject *varname, *cellindex;
3687
6.90k
    Py_ssize_t pos = 0;
3688
8.30k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3689
1.39k
        PyObject *varindex;
3690
1.39k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3691
0
            goto error;
3692
0
        }
3693
1.39k
        if (varindex == NULL) {
3694
973
            continue;
3695
973
        }
3696
3697
419
        int argoffset = PyLong_AsInt(varindex);
3698
419
        Py_DECREF(varindex);
3699
419
        if (argoffset == -1 && PyErr_Occurred()) {
3700
0
            goto error;
3701
0
        }
3702
3703
419
        int oldindex = PyLong_AsInt(cellindex);
3704
419
        if (oldindex == -1 && PyErr_Occurred()) {
3705
0
            goto error;
3706
0
        }
3707
419
        fixed[oldindex] = argoffset;
3708
419
    }
3709
6.90k
    return fixed;
3710
3711
0
error:
3712
0
    PyMem_Free(fixed);
3713
0
    return NULL;
3714
6.90k
}
3715
3716
#define IS_GENERATOR(CF) \
3717
6.90k
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3718
3719
static int
3720
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3721
                           int *fixed, int nfreevars, int code_flags)
3722
6.90k
{
3723
6.90k
    assert(umd->u_firstlineno > 0);
3724
3725
    /* Add the generator prefix instructions. */
3726
6.90k
    if (IS_GENERATOR(code_flags)) {
3727
        /* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
3728
         * of 0. This is because RETURN_GENERATOR pushes an element
3729
         * with _PyFrame_StackPush before switching stacks.
3730
         */
3731
3732
380
        location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
3733
380
        cfg_instr make_gen = {
3734
380
            .i_opcode = RETURN_GENERATOR,
3735
380
            .i_oparg = 0,
3736
380
            .i_loc = loc,
3737
380
            .i_target = NULL,
3738
380
            .i_except = NULL,
3739
380
        };
3740
380
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
3741
380
        cfg_instr pop_top = {
3742
380
            .i_opcode = POP_TOP,
3743
380
            .i_oparg = 0,
3744
380
            .i_loc = loc,
3745
380
            .i_target = NULL,
3746
380
            .i_except = NULL,
3747
380
        };
3748
380
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
3749
380
    }
3750
3751
    /* Set up cells for any variable that escapes, to be put in a closure. */
3752
6.90k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3753
6.90k
    if (ncellvars) {
3754
        // umd->u_cellvars has the cells out of order so we sort them
3755
        // before adding the MAKE_CELL instructions.  Note that we
3756
        // adjust for arg cells, which come first.
3757
866
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3758
866
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3759
866
        if (sorted == NULL) {
3760
0
            PyErr_NoMemory();
3761
0
            return ERROR;
3762
0
        }
3763
2.25k
        for (int i = 0; i < ncellvars; i++) {
3764
1.39k
            sorted[fixed[i]] = i + 1;
3765
1.39k
        }
3766
2.94k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3767
2.08k
            int oldindex = sorted[i] - 1;
3768
2.08k
            if (oldindex == -1) {
3769
690
                continue;
3770
690
            }
3771
1.39k
            cfg_instr make_cell = {
3772
1.39k
                .i_opcode = MAKE_CELL,
3773
                // This will get fixed in offset_derefs().
3774
1.39k
                .i_oparg = oldindex,
3775
1.39k
                .i_loc = NO_LOCATION,
3776
1.39k
                .i_target = NULL,
3777
1.39k
                .i_except = NULL,
3778
1.39k
            };
3779
1.39k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3780
0
                PyMem_RawFree(sorted);
3781
0
                return ERROR;
3782
0
            }
3783
1.39k
            ncellsused += 1;
3784
1.39k
        }
3785
866
        PyMem_RawFree(sorted);
3786
866
    }
3787
3788
6.90k
    if (nfreevars) {
3789
609
        cfg_instr copy_frees = {
3790
609
            .i_opcode = COPY_FREE_VARS,
3791
609
            .i_oparg = nfreevars,
3792
609
            .i_loc = NO_LOCATION,
3793
609
            .i_target = NULL,
3794
609
            .i_except = NULL,
3795
609
        };
3796
609
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3797
609
    }
3798
3799
6.90k
    return SUCCESS;
3800
6.90k
}
3801
3802
static int
3803
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3804
6.90k
{
3805
6.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3806
6.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3807
6.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3808
6.90k
    int noffsets = ncellvars + nfreevars;
3809
3810
    // First deal with duplicates (arg cells).
3811
6.90k
    int numdropped = 0;
3812
9.50k
    for (int i = 0; i < noffsets ; i++) {
3813
2.59k
        if (fixedmap[i] == i + nlocals) {
3814
2.17k
            fixedmap[i] -= numdropped;
3815
2.17k
        }
3816
419
        else {
3817
            // It was a duplicate (cell/arg).
3818
419
            numdropped += 1;
3819
419
        }
3820
2.59k
    }
3821
3822
    // Then update offsets, either relative to locals or by cell2arg.
3823
54.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3824
390k
        for (int i = 0; i < b->b_iused; i++) {
3825
342k
            cfg_instr *inst = &b->b_instr[i];
3826
            // This is called before extended args are generated.
3827
342k
            assert(inst->i_opcode != EXTENDED_ARG);
3828
342k
            int oldoffset = inst->i_oparg;
3829
342k
            switch(inst->i_opcode) {
3830
1.39k
                case MAKE_CELL:
3831
3.32k
                case LOAD_CLOSURE:
3832
5.96k
                case LOAD_DEREF:
3833
7.05k
                case STORE_DEREF:
3834
7.05k
                case DELETE_DEREF:
3835
7.05k
                case LOAD_FROM_DICT_OR_DEREF:
3836
7.05k
                    assert(oldoffset >= 0);
3837
7.05k
                    assert(oldoffset < noffsets);
3838
7.05k
                    assert(fixedmap[oldoffset] >= 0);
3839
7.05k
                    inst->i_oparg = fixedmap[oldoffset];
3840
342k
            }
3841
342k
        }
3842
47.9k
    }
3843
3844
6.90k
    return numdropped;
3845
6.90k
}
3846
3847
static int
3848
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
3849
6.90k
{
3850
6.90k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3851
6.90k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3852
6.90k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3853
6.90k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3854
6.90k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3855
6.90k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3856
6.90k
    assert(INT_MAX - nlocals - ncellvars > 0);
3857
6.90k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3858
6.90k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3859
6.90k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3860
6.90k
    if (cellfixedoffsets == NULL) {
3861
0
        return ERROR;
3862
0
    }
3863
3864
    // This must be called before fix_cell_offsets().
3865
6.90k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
3866
0
        PyMem_Free(cellfixedoffsets);
3867
0
        return ERROR;
3868
0
    }
3869
3870
6.90k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3871
6.90k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3872
6.90k
    cellfixedoffsets = NULL;
3873
6.90k
    if (numdropped < 0) {
3874
0
        return ERROR;
3875
0
    }
3876
3877
6.90k
    nlocalsplus -= numdropped;
3878
6.90k
    return nlocalsplus;
3879
6.90k
}
3880
3881
cfg_builder *
3882
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3883
6.90k
{
3884
6.90k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3885
0
        return NULL;
3886
0
    }
3887
6.90k
    cfg_builder *g = _PyCfgBuilder_New();
3888
6.90k
    if (g == NULL) {
3889
0
        return NULL;
3890
0
    }
3891
395k
    for (int i = 0; i < seq->s_used; i++) {
3892
388k
        seq->s_instrs[i].i_target = 0;
3893
388k
    }
3894
395k
    for (int i = 0; i < seq->s_used; i++) {
3895
388k
        _PyInstruction *instr = &seq->s_instrs[i];
3896
388k
        if (HAS_TARGET(instr->i_opcode)) {
3897
25.0k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3898
25.0k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3899
25.0k
        }
3900
388k
    }
3901
6.90k
    int offset = 0;
3902
395k
    for (int i = 0; i < seq->s_used; i++) {
3903
388k
        _PyInstruction *instr = &seq->s_instrs[i];
3904
388k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3905
366
            if (seq->s_annotations_code != NULL) {
3906
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
3907
1
                    && seq->s_annotations_code->s_nested == NULL);
3908
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3909
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3910
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3911
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3912
0
                        goto error;
3913
0
                    }
3914
3
                }
3915
1
                offset += seq->s_annotations_code->s_used - 1;
3916
1
            }
3917
365
            else {
3918
365
                offset -= 1;
3919
365
            }
3920
366
            continue;
3921
366
        }
3922
388k
        if (instr->i_target) {
3923
20.2k
            jump_target_label lbl_ = {i + offset};
3924
20.2k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3925
0
                goto error;
3926
0
            }
3927
20.2k
        }
3928
388k
        int opcode = instr->i_opcode;
3929
388k
        int oparg = instr->i_oparg;
3930
388k
        if (HAS_TARGET(opcode)) {
3931
25.0k
            oparg += offset;
3932
25.0k
        }
3933
388k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3934
0
            goto error;
3935
0
        }
3936
388k
    }
3937
6.90k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3938
0
        goto error;
3939
0
    }
3940
6.90k
    return g;
3941
0
error:
3942
0
    _PyCfgBuilder_Free(g);
3943
0
    return NULL;
3944
6.90k
}
3945
3946
int
3947
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3948
6.90k
{
3949
6.90k
    int lbl = 0;
3950
55.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3951
48.8k
        b->b_label = (jump_target_label){lbl};
3952
48.8k
        lbl += 1;
3953
48.8k
    }
3954
55.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3955
48.8k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3956
402k
        for (int i = 0; i < b->b_iused; i++) {
3957
353k
            cfg_instr *instr = &b->b_instr[i];
3958
353k
            if (HAS_TARGET(instr->i_opcode)) {
3959
                /* Set oparg to the label id (it will later be mapped to an offset) */
3960
19.8k
                instr->i_oparg = instr->i_target->b_label.id;
3961
19.8k
            }
3962
353k
            RETURN_IF_ERROR(
3963
353k
                _PyInstructionSequence_Addop(
3964
353k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3965
3966
353k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3967
353k
            if (instr->i_except != NULL) {
3968
45.1k
                hi->h_label = instr->i_except->b_label.id;
3969
45.1k
                hi->h_startdepth = instr->i_except->b_startdepth;
3970
45.1k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3971
45.1k
            }
3972
308k
            else {
3973
308k
                hi->h_label = -1;
3974
308k
            }
3975
353k
        }
3976
48.8k
    }
3977
6.90k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3978
0
        return ERROR;
3979
0
    }
3980
6.90k
    return SUCCESS;
3981
6.90k
}
3982
3983
3984
int
3985
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3986
                                     _PyCompile_CodeUnitMetadata *umd, int code_flags,
3987
                                     int *stackdepth, int *nlocalsplus,
3988
                                     _PyInstructionSequence *seq)
3989
6.90k
{
3990
6.90k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3991
3992
6.90k
    *stackdepth = calculate_stackdepth(g);
3993
6.90k
    if (*stackdepth < 0) {
3994
0
        return ERROR;
3995
0
    }
3996
3997
    /* prepare_localsplus adds instructions for generators that push
3998
     * and pop an item on the stack. This assertion makes sure there
3999
     * is space on the stack for that.
4000
     * It should always be true, because a generator must have at
4001
     * least one expression or call to INTRINSIC_STOPITERATION_ERROR,
4002
     * which requires stackspace.
4003
     */
4004
6.90k
    assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
4005
4006
6.90k
    *nlocalsplus = prepare_localsplus(umd, g, code_flags);
4007
6.90k
    if (*nlocalsplus < 0) {
4008
0
        return ERROR;
4009
0
    }
4010
4011
6.90k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4012
4013
    /* Order of basic blocks must have been determined by now */
4014
4015
6.90k
    RETURN_IF_ERROR(normalize_jumps(g));
4016
6.90k
    assert(no_redundant_jumps(g));
4017
4018
    /* Can't modify the bytecode after inserting instructions that produce
4019
     * borrowed references.
4020
     */
4021
6.90k
    RETURN_IF_ERROR(optimize_load_fast(g));
4022
4023
    /* Can't modify the bytecode after computing jump offsets. */
4024
6.90k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4025
0
        return ERROR;
4026
0
    }
4027
4028
6.90k
    return SUCCESS;
4029
6.90k
}
4030
4031
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4032
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4033
 */
4034
int
4035
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4036
0
{
4037
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4038
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4039
0
    return SUCCESS;
4040
0
}
4041
4042
/* Exported API functions */
4043
4044
int
4045
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4046
0
{
4047
0
    stack_effects effs;
4048
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4049
0
        return PY_INVALID_STACK_EFFECT;
4050
0
    }
4051
0
    return effs.net;
4052
0
}
4053
4054
int
4055
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4056
130
{
4057
130
    stack_effects effs;
4058
130
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4059
0
        return PY_INVALID_STACK_EFFECT;
4060
0
    }
4061
130
    return effs.net;
4062
130
}
4063
4064
/* Access to compiler optimizations for unit tests.
4065
4066
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4067
 * a CFG, optimizes it and converts back to an instruction list.
4068
 */
4069
4070
static PyObject *
4071
cfg_to_instruction_sequence(cfg_builder *g)
4072
0
{
4073
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4074
0
    if (seq == NULL) {
4075
0
        return NULL;
4076
0
    }
4077
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4078
0
        PyInstructionSequence_Fini(seq);
4079
0
        return NULL;
4080
0
    }
4081
0
    return (PyObject*)seq;
4082
0
}
4083
4084
PyObject *
4085
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4086
0
{
4087
0
    if (!_PyInstructionSequence_Check(seq)) {
4088
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4089
0
        return NULL;
4090
0
    }
4091
0
    PyObject *const_cache = PyDict_New();
4092
0
    if (const_cache == NULL) {
4093
0
        return NULL;
4094
0
    }
4095
4096
0
    PyObject *res = NULL;
4097
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4098
0
    if (g == NULL) {
4099
0
        goto error;
4100
0
    }
4101
0
    int nparams = 0, firstlineno = 1;
4102
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4103
0
                                nparams, firstlineno) < 0) {
4104
0
        goto error;
4105
0
    }
4106
4107
0
    if (calculate_stackdepth(g) == ERROR) {
4108
0
        goto error;
4109
0
    }
4110
4111
0
    if (optimize_load_fast(g) != SUCCESS) {
4112
0
        goto error;
4113
0
    }
4114
4115
0
    res = cfg_to_instruction_sequence(g);
4116
0
error:
4117
0
    Py_DECREF(const_cache);
4118
0
    _PyCfgBuilder_Free(g);
4119
0
    return res;
4120
0
}