Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
742k
#define SUCCESS 0
19
5.61k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
1.12M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
240k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
244k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
244k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
1.06M
{
101
1.06M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
1.06M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
1.06M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
925k
{
108
925k
    return OPCODE_HAS_JUMP(i->i_opcode);
109
925k
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
23.1k
    do { \
114
23.1k
        assert(OPCODE_HAS_ARG(OP)); \
115
23.1k
        cfg_instr *_instr__ptr_ = (I); \
116
23.1k
        _instr__ptr_->i_opcode = (OP); \
117
23.1k
        _instr__ptr_->i_oparg = (ARG); \
118
23.1k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
43.2k
    do { \
123
43.2k
        assert(!OPCODE_HAS_ARG(OP)); \
124
43.2k
        cfg_instr *_instr__ptr_ = (I); \
125
43.2k
        _instr__ptr_->i_opcode = (OP); \
126
43.2k
        _instr__ptr_->i_oparg = 0; \
127
43.2k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
3.02k
    do { \
131
3.02k
        cfg_instr *_instr__ptr_ = (I); \
132
3.02k
        _instr__ptr_->i_loc = (LOC); \
133
3.02k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
240k
{
144
240k
    assert(b != NULL);
145
240k
    _Py_c_array_t array = {
146
240k
        .array = (void*)b->b_instr,
147
240k
        .allocated_entries = b->b_ialloc,
148
240k
        .item_size = sizeof(cfg_instr),
149
240k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
240k
    };
151
152
240k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
240k
    b->b_instr = array.array;
154
240k
    b->b_ialloc = array.allocated_entries;
155
240k
    return b->b_iused++;
156
240k
}
157
158
static cfg_instr *
159
783k
basicblock_last_instr(const basicblock *b) {
160
783k
    assert(b->b_iused >= 0);
161
783k
    if (b->b_iused > 0) {
162
717k
        assert(b->b_instr != NULL);
163
717k
        return &b->b_instr[b->b_iused - 1];
164
717k
    }
165
66.7k
    return NULL;
166
783k
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
25.2k
{
175
25.2k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
25.2k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
25.2k
    b->b_list = g->g_block_list;
182
25.2k
    g->g_block_list = b;
183
25.2k
    b->b_label = NO_LABEL;
184
25.2k
    return b;
185
25.2k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
233k
{
190
233k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
233k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
233k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
233k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
233k
    int off = basicblock_next_instr(b);
196
233k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
233k
    cfg_instr *i = &b->b_instr[off];
200
233k
    i->i_opcode = opcode;
201
233k
    i->i_oparg = oparg;
202
233k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
233k
    assert(i->i_target == NULL);
205
233k
    assert(i->i_except == NULL);
206
207
233k
    return SUCCESS;
208
233k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
379
{
213
379
    cfg_instr *last = basicblock_last_instr(b);
214
379
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
379
    RETURN_IF_ERROR(
219
379
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
379
    last = basicblock_last_instr(b);
221
379
    assert(last && last->i_opcode == opcode);
222
379
    last->i_target = target;
223
379
    return SUCCESS;
224
379
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
1.48k
{
229
4.26k
    for (int i = 0; i < from->b_iused; i++) {
230
2.78k
        int n = basicblock_next_instr(to);
231
2.78k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
2.78k
        to->b_instr[n] = from->b_instr[i];
235
2.78k
    }
236
1.48k
    return SUCCESS;
237
1.48k
}
238
239
static inline int
240
238k
basicblock_nofallthrough(const basicblock *b) {
241
238k
    cfg_instr *last = basicblock_last_instr(b);
242
238k
    return (last &&
243
224k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
125k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
238k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
389k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
376
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
376
    assert(BB_NO_FALLTHROUGH(block));
257
376
    basicblock *result = cfg_builder_new_block(g);
258
376
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
376
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
376
    return result;
265
376
}
266
267
static int
268
4.16k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
4.16k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
65.6k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
61.4k
        block->b_instr[i] = block->b_instr[i-1];
272
61.4k
    }
273
4.16k
    block->b_instr[pos] = *instr;
274
4.16k
    return SUCCESS;
275
4.16k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
18.9k
{
343
18.9k
    assert(block != NULL);
344
18.9k
    g->g_curblock->b_next = block;
345
18.9k
    g->g_curblock = block;
346
18.9k
    return block;
347
18.9k
}
348
349
static inline int
350
39.4k
basicblock_exits_scope(const basicblock *b) {
351
39.4k
    cfg_instr *last = basicblock_last_instr(b);
352
39.4k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
39.4k
}
354
355
static inline int
356
22.4k
basicblock_has_eval_break(const basicblock *b) {
357
120k
    for (int i = 0; i < b->b_iused; i++) {
358
106k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
8.28k
            return true;
360
8.28k
        }
361
106k
    }
362
14.1k
    return false;
363
22.4k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
235k
{
368
235k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
235k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
16.2k
        return true;
371
16.2k
    }
372
219k
    if (IS_LABEL(g->g_current_label)) {
373
2.60k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
2.60k
            return true;
375
2.60k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
2.60k
    }
382
216k
    return false;
383
219k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
235k
{
388
235k
    if (cfg_builder_current_block_is_terminated(g)) {
389
18.9k
        basicblock *b = cfg_builder_new_block(g);
390
18.9k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
18.9k
        b->b_label = g->g_current_label;
394
18.9k
        g->g_current_label = NO_LABEL;
395
18.9k
        cfg_builder_use_next_block(g, b);
396
18.9k
    }
397
235k
    return SUCCESS;
398
235k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
5.64k
{
424
5.64k
    g->g_block_list = NULL;
425
5.64k
    basicblock *block = cfg_builder_new_block(g);
426
5.64k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
5.64k
    g->g_curblock = g->g_entryblock = block;
430
5.64k
    g->g_current_label = NO_LABEL;
431
5.64k
    return SUCCESS;
432
5.64k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
5.64k
{
437
5.64k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
5.64k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
5.64k
    memset(g, 0, sizeof(cfg_builder));
443
5.64k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
5.64k
    return g;
448
5.64k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
5.64k
{
453
5.64k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
5.64k
    assert(cfg_builder_check(g));
457
5.64k
    basicblock *b = g->g_block_list;
458
30.9k
    while (b != NULL) {
459
25.2k
        if (b->b_instr) {
460
25.2k
            PyMem_Free((void *)b->b_instr);
461
25.2k
        }
462
25.2k
        basicblock *next = b->b_list;
463
25.2k
        PyMem_Free((void *)b);
464
25.2k
        b = next;
465
25.2k
    }
466
5.64k
    PyMem_Free(g);
467
5.64k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
5.64k
{
472
5.64k
    int nblocks = 0;
473
30.1k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
24.5k
        nblocks++;
475
24.5k
    }
476
5.64k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
5.64k
    return SUCCESS;
481
5.64k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
8.70k
{
486
8.70k
    g->g_current_label = lbl;
487
8.70k
    return cfg_builder_maybe_start_new_block(g);
488
8.70k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
226k
{
493
226k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
226k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
226k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
30.6k
{
501
31.7k
    while (b && b->b_iused == 0) {
502
1.06k
        b = b->b_next;
503
1.06k
    }
504
30.6k
    return b;
505
30.6k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
25.2k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
25.2k
    cfg_instr *last = basicblock_last_instr(b);
547
25.2k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
19.3k
        return SUCCESS;
549
19.3k
    }
550
25.2k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
5.93k
    bool is_forward = last->i_target->b_visited == 0;
553
5.93k
    if (is_forward) {
554
5.74k
        RETURN_IF_ERROR(
555
5.74k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
5.74k
        return SUCCESS;
557
5.74k
    }
558
559
194
    int reversed_opcode = 0;
560
194
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
16
        case POP_JUMP_IF_NONE:
565
16
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
16
            break;
567
140
        case POP_JUMP_IF_FALSE:
568
140
            reversed_opcode = POP_JUMP_IF_TRUE;
569
140
            break;
570
38
        case POP_JUMP_IF_TRUE:
571
38
            reversed_opcode = POP_JUMP_IF_FALSE;
572
38
            break;
573
194
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
194
    basicblock *target = last->i_target;
579
194
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
194
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
194
    RETURN_IF_ERROR(
584
194
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
194
    RETURN_IF_ERROR(
586
194
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
194
    backwards_jump->b_startdepth = target->b_startdepth;
588
194
    last->i_opcode = reversed_opcode;
589
194
    last->i_target = b->b_next;
590
591
194
    backwards_jump->b_cold = b->b_cold;
592
194
    backwards_jump->b_next = b->b_next;
593
194
    b->b_next = backwards_jump;
594
194
    return SUCCESS;
595
194
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
5.64k
{
601
5.64k
    basicblock *entryblock = g->g_entryblock;
602
30.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
25.0k
        b->b_visited = 0;
604
25.0k
    }
605
30.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
25.2k
        b->b_visited = 1;
607
25.2k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
25.2k
    }
609
5.64k
    return SUCCESS;
610
5.64k
}
611
612
static int
613
5.64k
check_cfg(cfg_builder *g) {
614
30.1k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
251k
        for (int i = 0; i < b->b_iused; i++) {
617
226k
            int opcode = b->b_instr[i].i_opcode;
618
226k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
226k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
21.9k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
21.9k
            }
625
226k
        }
626
24.5k
    }
627
5.64k
    return SUCCESS;
628
5.64k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
21.2k
{
633
21.2k
    int lbl = -1;
634
119k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
97.7k
        if (b->b_label.id > lbl) {
636
32.2k
            lbl = b->b_label.id;
637
32.2k
        }
638
97.7k
    }
639
21.2k
    return lbl;
640
21.2k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
5.64k
{
646
5.64k
    int max_label = get_max_label(entryblock);
647
5.64k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
5.64k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
5.64k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
5.64k
    memset(label2block, 0, mapsize);
654
30.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
24.5k
        if (b->b_label.id >= 0) {
656
8.70k
            label2block[b->b_label.id] = b;
657
8.70k
        }
658
24.5k
    }
659
30.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
251k
        for (int i = 0; i < b->b_iused; i++) {
661
226k
            cfg_instr *instr = &b->b_instr[i];
662
226k
            assert(instr->i_target == NULL);
663
226k
            if (HAS_TARGET(instr->i_opcode)) {
664
11.0k
                int lbl = instr->i_oparg;
665
11.0k
                assert(lbl >= 0 && lbl <= max_label);
666
11.0k
                instr->i_target = label2block[lbl];
667
11.0k
                assert(instr->i_target != NULL);
668
11.0k
                assert(instr->i_target->b_label.id == lbl);
669
11.0k
            }
670
226k
        }
671
24.5k
    }
672
5.64k
    PyMem_Free(label2block);
673
5.64k
    return SUCCESS;
674
5.64k
}
675
676
static int
677
5.64k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
30.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
251k
        for (int i=0; i < b->b_iused; i++) {
685
226k
            cfg_instr *instr = &b->b_instr[i];
686
226k
            if (is_block_push(instr)) {
687
1.64k
                instr->i_target->b_except_handler = 1;
688
1.64k
            }
689
226k
        }
690
24.5k
    }
691
5.64k
    return SUCCESS;
692
5.64k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
1.64k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
1.64k
    assert(is_block_push(setup));
704
1.64k
    int opcode = setup->i_opcode;
705
1.64k
    basicblock * target = setup->i_target;
706
1.64k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
1.01k
        target->b_preserve_lasti = 1;
708
1.01k
    }
709
1.64k
    assert(stack->depth <= CO_MAXBLOCKS);
710
1.64k
    stack->handlers[++stack->depth] = target;
711
1.64k
    return target;
712
1.64k
}
713
714
static basicblock *
715
1.35k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
1.35k
    assert(stack->depth > 0);
717
1.35k
    return stack->handlers[--stack->depth];
718
1.35k
}
719
720
static basicblock *
721
20.4k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
20.4k
    return stack->handlers[stack->depth];
723
20.4k
}
724
725
static struct _PyCfgExceptStack *
726
5.64k
make_except_stack(void) {
727
5.64k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
5.64k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
5.64k
    new->depth = 0;
733
5.64k
    new->handlers[0] = NULL;
734
5.64k
    return new;
735
5.64k
}
736
737
static struct _PyCfgExceptStack *
738
6.81k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
6.81k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
6.81k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
6.81k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
6.81k
    return copy;
746
6.81k
}
747
748
static basicblock**
749
40.7k
make_cfg_traversal_stack(basicblock *entryblock) {
750
40.7k
    int nblocks = 0;
751
234k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
193k
        b->b_visited = 0;
753
193k
        nblocks++;
754
193k
    }
755
40.7k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
40.7k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
40.7k
    return stack;
760
40.7k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
214k
{
779
214k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
214k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
214k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
214k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
214k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
214k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
1.64k
        effects->net = 0;
793
1.64k
        return 0;
794
1.64k
    }
795
212k
    effects->net = pushed - popped;
796
212k
    return 0;
797
214k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
24.9k
{
802
24.9k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
24.9k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
20.6k
        assert(b->b_startdepth < 0);
808
20.6k
        b->b_startdepth = depth;
809
20.6k
        *(*sp)++ = b;
810
20.6k
    }
811
24.9k
    return SUCCESS;
812
24.9k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
5.64k
{
820
5.64k
    basicblock *entryblock = g->g_entryblock;
821
30.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
25.0k
        b->b_startdepth = INT_MIN;
823
25.0k
    }
824
5.64k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
5.64k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
5.64k
    int stackdepth = -1;
831
5.64k
    int maxdepth = 0;
832
5.64k
    basicblock **sp = stack;
833
5.64k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
26.3k
    while (sp != stack) {
837
20.6k
        basicblock *b = *--sp;
838
20.6k
        int depth = b->b_startdepth;
839
20.6k
        assert(depth >= 0);
840
20.6k
        basicblock *next = b->b_next;
841
213k
        for (int i = 0; i < b->b_iused; i++) {
842
204k
            cfg_instr *instr = &b->b_instr[i];
843
204k
            stack_effects effects;
844
204k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
204k
            int new_depth = depth + effects.net;
851
204k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
854
0
                goto error;
855
0
            }
856
204k
            maxdepth = Py_MAX(maxdepth, depth);
857
204k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
9.81k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
9.81k
                int target_depth = depth + effects.net;
865
9.81k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
9.81k
                maxdepth = Py_MAX(maxdepth, depth);
867
9.81k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
9.81k
            }
871
204k
            depth = new_depth;
872
204k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
204k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
202k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
11.1k
            {
876
                /* remaining code is dead */
877
11.1k
                next = NULL;
878
11.1k
                break;
879
11.1k
            }
880
204k
        }
881
20.6k
        if (next != NULL) {
882
9.53k
            assert(BB_HAS_FALLTHROUGH(b));
883
9.53k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
9.53k
        }
887
20.6k
    }
888
5.64k
    stackdepth = maxdepth;
889
5.64k
error:
890
5.64k
    PyMem_Free(stack);
891
5.64k
    return stackdepth;
892
5.64k
}
893
894
static int
895
5.64k
label_exception_targets(basicblock *entryblock) {
896
5.64k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
5.64k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
5.64k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
5.64k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
5.64k
    except_stack->depth = 0;
907
5.64k
    todo_stack[0] = entryblock;
908
5.64k
    entryblock->b_visited = 1;
909
5.64k
    entryblock->b_exceptstack = except_stack;
910
5.64k
    basicblock **todo = &todo_stack[1];
911
5.64k
    basicblock *handler = NULL;
912
26.1k
    while (todo > todo_stack) {
913
20.4k
        todo--;
914
20.4k
        basicblock *b = todo[0];
915
20.4k
        assert(b->b_visited == 1);
916
20.4k
        except_stack = b->b_exceptstack;
917
20.4k
        assert(except_stack != NULL);
918
20.4k
        b->b_exceptstack = NULL;
919
20.4k
        handler = except_stack_top(except_stack);
920
20.4k
        int last_yield_except_depth = -1;
921
238k
        for (int i = 0; i < b->b_iused; i++) {
922
218k
            cfg_instr *instr = &b->b_instr[i];
923
218k
            if (is_block_push(instr)) {
924
1.64k
                if (!instr->i_target->b_visited) {
925
1.64k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
1.64k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
1.64k
                    instr->i_target->b_exceptstack = copy;
930
1.64k
                    todo[0] = instr->i_target;
931
1.64k
                    instr->i_target->b_visited = 1;
932
1.64k
                    todo++;
933
1.64k
                }
934
1.64k
                handler = push_except_block(except_stack, instr);
935
1.64k
            }
936
216k
            else if (instr->i_opcode == POP_BLOCK) {
937
1.35k
                handler = pop_except_block(except_stack);
938
1.35k
                INSTR_SET_OP0(instr, NOP);
939
1.35k
            }
940
215k
            else if (is_jump(instr)) {
941
8.97k
                instr->i_except = handler;
942
8.97k
                assert(i == b->b_iused -1);
943
8.97k
                if (!instr->i_target->b_visited) {
944
6.24k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
5.17k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
5.17k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
5.17k
                        instr->i_target->b_exceptstack = copy;
950
5.17k
                    }
951
1.07k
                    else {
952
1.07k
                        instr->i_target->b_exceptstack = except_stack;
953
1.07k
                        except_stack = NULL;
954
1.07k
                    }
955
6.24k
                    todo[0] = instr->i_target;
956
6.24k
                    instr->i_target->b_visited = 1;
957
6.24k
                    todo++;
958
6.24k
                }
959
8.97k
            }
960
206k
            else if (instr->i_opcode == YIELD_VALUE) {
961
306
                instr->i_except = handler;
962
306
                last_yield_except_depth = except_stack->depth;
963
306
            }
964
205k
            else if (instr->i_opcode == RESUME) {
965
6.00k
                instr->i_except = handler;
966
6.00k
                if (instr->i_oparg != RESUME_AT_FUNC_START && instr->i_oparg != RESUME_AT_GEN_EXPR_START) {
967
306
                    assert(last_yield_except_depth >= 0);
968
306
                    if (last_yield_except_depth == 1) {
969
135
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
135
                    }
971
306
                    last_yield_except_depth = -1;
972
306
                }
973
6.00k
            }
974
199k
            else if (instr->i_opcode == RETURN_GENERATOR) {
975
250
                instr->i_except = NULL;
976
250
            }
977
199k
            else {
978
199k
                instr->i_except = handler;
979
199k
            }
980
218k
        }
981
20.4k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
982
6.95k
            assert(except_stack != NULL);
983
6.95k
            b->b_next->b_exceptstack = except_stack;
984
6.95k
            todo[0] = b->b_next;
985
6.95k
            b->b_next->b_visited = 1;
986
6.95k
            todo++;
987
6.95k
        }
988
13.5k
        else if (except_stack != NULL) {
989
12.4k
           PyMem_Free(except_stack);
990
12.4k
        }
991
20.4k
    }
992
#ifdef Py_DEBUG
993
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
994
        assert(b->b_exceptstack == NULL);
995
    }
996
#endif
997
5.64k
    PyMem_Free(todo_stack);
998
5.64k
    return SUCCESS;
999
0
error:
1000
0
    PyMem_Free(todo_stack);
1001
0
    PyMem_Free(except_stack);
1002
0
    return ERROR;
1003
5.64k
}
1004
1005
/***** CFG optimizations *****/
1006
1007
static int
1008
11.2k
remove_unreachable(basicblock *entryblock) {
1009
60.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1010
49.4k
        b->b_predecessors = 0;
1011
49.4k
    }
1012
11.2k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1013
11.2k
    if (stack == NULL) {
1014
0
        return ERROR;
1015
0
    }
1016
11.2k
    basicblock **sp = stack;
1017
11.2k
    entryblock->b_predecessors = 1;
1018
11.2k
    *sp++ = entryblock;
1019
11.2k
    entryblock->b_visited = 1;
1020
51.9k
    while (sp > stack) {
1021
40.6k
        basicblock *b = *(--sp);
1022
40.6k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1023
18.3k
            if (!b->b_next->b_visited) {
1024
15.1k
                assert(b->b_next->b_predecessors == 0);
1025
15.1k
                *sp++ = b->b_next;
1026
15.1k
                b->b_next->b_visited = 1;
1027
15.1k
            }
1028
18.3k
            b->b_next->b_predecessors++;
1029
18.3k
        }
1030
465k
        for (int i = 0; i < b->b_iused; i++) {
1031
425k
            basicblock *target;
1032
425k
            cfg_instr *instr = &b->b_instr[i];
1033
425k
            if (is_jump(instr) || is_block_push(instr)) {
1034
19.8k
                target = instr->i_target;
1035
19.8k
                if (!target->b_visited) {
1036
14.1k
                    *sp++ = target;
1037
14.1k
                    target->b_visited = 1;
1038
14.1k
                }
1039
19.8k
                target->b_predecessors++;
1040
19.8k
            }
1041
425k
        }
1042
40.6k
    }
1043
11.2k
    PyMem_Free(stack);
1044
1045
    /* Delete unreachable instructions */
1046
60.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1047
49.4k
       if (b->b_predecessors == 0) {
1048
8.84k
            b->b_iused = 0;
1049
8.84k
            b->b_except_handler = 0;
1050
8.84k
       }
1051
49.4k
    }
1052
11.2k
    return SUCCESS;
1053
11.2k
}
1054
1055
static int
1056
127k
basicblock_remove_redundant_nops(basicblock *bb) {
1057
    /* Remove NOPs when legal to do so. */
1058
127k
    int dest = 0;
1059
127k
    int prev_lineno = -1;
1060
1.11M
    for (int src = 0; src < bb->b_iused; src++) {
1061
987k
        int lineno = bb->b_instr[src].i_loc.lineno;
1062
987k
        if (bb->b_instr[src].i_opcode == NOP) {
1063
            /* Eliminate no-op if it doesn't have a line number */
1064
20.3k
            if (lineno < 0) {
1065
3.45k
                continue;
1066
3.45k
            }
1067
            /* or, if the previous instruction had the same line number. */
1068
16.8k
            if (prev_lineno == lineno) {
1069
13.6k
                continue;
1070
13.6k
            }
1071
            /* or, if the next instruction has same line number or no line number */
1072
3.22k
            if (src < bb->b_iused - 1) {
1073
2.98k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1074
2.98k
                if (next_lineno == lineno) {
1075
1.97k
                    continue;
1076
1.97k
                }
1077
1.00k
                if (next_lineno < 0) {
1078
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1079
0
                    continue;
1080
0
                }
1081
1.00k
            }
1082
236
            else {
1083
236
                basicblock *next = next_nonempty_block(bb->b_next);
1084
                /* or if last instruction in BB and next BB has same line number */
1085
236
                if (next) {
1086
236
                    location next_loc = NO_LOCATION;
1087
236
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1088
236
                        cfg_instr *instr = &next->b_instr[next_i];
1089
236
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1090
                            /* Skip over NOPs without a location, they will be removed */
1091
0
                            continue;
1092
0
                        }
1093
236
                        next_loc = instr->i_loc;
1094
236
                        break;
1095
236
                    }
1096
236
                    if (lineno == next_loc.lineno) {
1097
1
                        continue;
1098
1
                    }
1099
236
                }
1100
236
            }
1101
1102
3.22k
        }
1103
968k
        if (dest != src) {
1104
74.1k
            bb->b_instr[dest] = bb->b_instr[src];
1105
74.1k
        }
1106
968k
        dest++;
1107
968k
        prev_lineno = lineno;
1108
968k
    }
1109
127k
    assert(dest <= bb->b_iused);
1110
127k
    int num_removed = bb->b_iused - dest;
1111
127k
    bb->b_iused = dest;
1112
127k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1113
127k
    return num_removed;
1114
127k
}
1115
1116
static int
1117
18.4k
remove_redundant_nops(cfg_builder *g) {
1118
18.4k
    int changes = 0;
1119
121k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1120
102k
        int change = basicblock_remove_redundant_nops(b);
1121
102k
        RETURN_IF_ERROR(change);
1122
102k
        changes += change;
1123
102k
    }
1124
18.4k
    return changes;
1125
18.4k
}
1126
1127
static int
1128
remove_redundant_nops_and_pairs(basicblock *entryblock)
1129
5.64k
{
1130
5.64k
    bool done = false;
1131
1132
11.2k
    while (! done) {
1133
5.64k
        done = true;
1134
5.64k
        cfg_instr *prev_instr = NULL;
1135
5.64k
        cfg_instr *instr = NULL;
1136
30.5k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1137
24.9k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1138
24.9k
            if (IS_LABEL(b->b_label)) {
1139
                /* this block is a jump target, forget instr */
1140
9.07k
                instr = NULL;
1141
9.07k
            }
1142
231k
            for (int i = 0; i < b->b_iused; i++) {
1143
206k
                prev_instr = instr;
1144
206k
                instr = &b->b_instr[i];
1145
206k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1146
206k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1147
206k
                int opcode = instr->i_opcode;
1148
206k
                bool is_redundant_pair = false;
1149
206k
                if (opcode == POP_TOP) {
1150
7.21k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
7.21k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1154
0
                       is_redundant_pair = true;
1155
0
                   }
1156
7.21k
                }
1157
206k
                if (is_redundant_pair) {
1158
0
                    INSTR_SET_OP0(prev_instr, NOP);
1159
0
                    INSTR_SET_OP0(instr, NOP);
1160
0
                    done = false;
1161
0
                }
1162
206k
            }
1163
24.9k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1164
17.9k
                instr = NULL;
1165
17.9k
            }
1166
24.9k
        }
1167
5.64k
    }
1168
5.64k
    return SUCCESS;
1169
5.64k
}
1170
1171
static int
1172
12.7k
remove_redundant_jumps(cfg_builder *g) {
1173
    /* If a non-empty block ends with a jump instruction, check if the next
1174
     * non-empty block reached through normal flow control is the target
1175
     * of that jump. If it is, then the jump instruction is redundant and
1176
     * can be deleted.
1177
     *
1178
     * Return the number of changes applied, or -1 on error.
1179
     */
1180
1181
12.7k
    int changes = 0;
1182
90.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1183
77.6k
        cfg_instr *last = basicblock_last_instr(b);
1184
77.6k
        if (last == NULL) {
1185
12.2k
            continue;
1186
12.2k
        }
1187
77.6k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1188
65.3k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1189
6.91k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1190
6.91k
            if (jump_target == NULL) {
1191
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1192
0
                return ERROR;
1193
0
            }
1194
6.91k
            basicblock *next = next_nonempty_block(b->b_next);
1195
6.91k
            if (jump_target == next) {
1196
242
                changes++;
1197
242
                INSTR_SET_OP0(last, NOP);
1198
242
            }
1199
6.91k
        }
1200
65.3k
    }
1201
1202
12.7k
    return changes;
1203
12.7k
}
1204
1205
static inline bool
1206
25.3k
basicblock_has_no_lineno(basicblock *b) {
1207
31.1k
    for (int i = 0; i < b->b_iused; i++) {
1208
28.5k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1209
22.7k
            return false;
1210
22.7k
        }
1211
28.5k
    }
1212
2.53k
    return true;
1213
25.3k
}
1214
1215
/* Maximum size of basic block that should be copied in optimizer */
1216
1.16k
#define MAX_COPY_SIZE 4
1217
1218
/* If this block ends with an unconditional jump to a small exit block or
1219
 * a block that has no line numbers (and no fallthrough), then
1220
 * remove the jump and extend this block with the target.
1221
 * Returns 1 if extended, 0 if no change, and -1 on error.
1222
 */
1223
static int
1224
33.7k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1225
33.7k
    cfg_instr *last = basicblock_last_instr(bb);
1226
33.7k
    if (last == NULL) {
1227
0
        return 0;
1228
0
    }
1229
33.7k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1230
29.5k
        return 0;
1231
29.5k
    }
1232
4.25k
    basicblock *target = last->i_target;
1233
4.25k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1234
1.16k
                             target->b_iused <= MAX_COPY_SIZE);
1235
4.25k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1236
994
                                     !BB_HAS_FALLTHROUGH(target));
1237
4.25k
    if (small_exit_block || no_lineno_no_fallthrough) {
1238
1.10k
        assert(is_jump(last));
1239
1.10k
        int removed_jump_opcode = last->i_opcode;
1240
1.10k
        INSTR_SET_OP0(last, NOP);
1241
1.10k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1242
1.10k
        if (no_lineno_no_fallthrough) {
1243
986
            last = basicblock_last_instr(bb);
1244
986
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1245
299
                removed_jump_opcode == JUMP)
1246
26
            {
1247
                /* Make sure we don't lose eval breaker checks */
1248
26
                last->i_opcode = JUMP;
1249
26
            }
1250
986
        }
1251
1.10k
        target->b_predecessors--;
1252
1.10k
        return 1;
1253
1.10k
    }
1254
3.15k
    return 0;
1255
4.25k
}
1256
1257
static int
1258
5.64k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1259
5.64k
    bool changes;
1260
6.16k
    do {
1261
6.16k
        changes = false;
1262
39.9k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1263
33.7k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1264
33.7k
            RETURN_IF_ERROR(res);
1265
33.7k
            if (res) {
1266
1.10k
                changes = true;
1267
1.10k
            }
1268
33.7k
        }
1269
6.16k
    } while(changes); /* every change removes a jump, ensuring convergence */
1270
5.64k
    return changes;
1271
5.64k
}
1272
1273
// Attempt to eliminate jumps to jumps by updating inst to jump to
1274
// target->i_target using the provided opcode. Return whether or not the
1275
// optimization was successful.
1276
static bool
1277
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1278
185
{
1279
185
    assert(is_jump(inst));
1280
185
    assert(is_jump(target));
1281
185
    assert(inst == basicblock_last_instr(bb));
1282
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1283
    // changes (and we fall into an infinite loop):
1284
185
    if (inst->i_target != target->i_target) {
1285
        /* Change inst to NOP and append a jump to target->i_target. The
1286
         * NOP will be removed later if it's not needed for the lineno.
1287
         */
1288
185
        INSTR_SET_OP0(inst, NOP);
1289
1290
185
        RETURN_IF_ERROR(
1291
185
            basicblock_add_jump(
1292
185
                bb, opcode, target->i_target, target->i_loc));
1293
1294
185
        return true;
1295
185
    }
1296
0
    return false;
1297
185
}
1298
1299
static int
1300
loads_const(int opcode)
1301
7.16k
{
1302
7.16k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1303
7.16k
}
1304
1305
/* Returns new reference */
1306
static PyObject*
1307
get_const_value(int opcode, int oparg, PyObject *co_consts)
1308
46.2k
{
1309
46.2k
    PyObject *constant = NULL;
1310
46.2k
    assert(loads_const(opcode));
1311
46.2k
    if (opcode == LOAD_CONST) {
1312
46.0k
        constant = PyList_GET_ITEM(co_consts, oparg);
1313
46.0k
    }
1314
46.2k
    if (opcode == LOAD_SMALL_INT) {
1315
208
        return PyLong_FromLong(oparg);
1316
208
    }
1317
1318
46.0k
    if (constant == NULL) {
1319
0
        PyErr_SetString(PyExc_SystemError,
1320
0
                        "Internal error: failed to get value of a constant");
1321
0
        return NULL;
1322
0
    }
1323
46.0k
    return Py_NewRef(constant);
1324
46.0k
}
1325
1326
// Steals a reference to newconst.
1327
static int
1328
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329
1.12k
{
1330
1.12k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1331
0
        Py_DECREF(newconst);
1332
0
        return -1;
1333
0
    }
1334
1335
1.12k
    Py_ssize_t index;
1336
25.6k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337
24.7k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1338
271
            break;
1339
271
        }
1340
24.7k
    }
1341
1.12k
    if (index == PyList_GET_SIZE(consts)) {
1342
852
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1343
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1344
0
            Py_DECREF(newconst);
1345
0
            return -1;
1346
0
        }
1347
852
        if (PyList_Append(consts, newconst)) {
1348
0
            Py_DECREF(newconst);
1349
0
            return -1;
1350
0
        }
1351
852
    }
1352
1.12k
    Py_DECREF(newconst);
1353
1.12k
    return (int)index;
1354
1.12k
}
1355
1356
/*
1357
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1358
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1359
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1360
1361
  Return boolean indicating whether "size" such instructions were found.
1362
*/
1363
static bool
1364
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1365
4.44k
{
1366
4.44k
    assert(start < bb->b_iused);
1367
4.44k
    assert(size >= 0);
1368
4.44k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1369
1370
8.30k
    for (; start >= 0 && size > 0; start--) {
1371
7.20k
        cfg_instr *instr = &bb->b_instr[start];
1372
7.20k
        if (instr->i_opcode == NOP) {
1373
48
            continue;
1374
48
        }
1375
7.15k
        if (!loads_const(instr->i_opcode)) {
1376
3.33k
            return false;
1377
3.33k
        }
1378
3.81k
        instrs[--size] = instr;
1379
3.81k
    }
1380
1381
1.10k
    return size == 0;
1382
4.44k
}
1383
1384
/*
1385
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1386
  Caller must make sure "instrs" has at least "size" elements.
1387
*/
1388
static void
1389
nop_out(cfg_instr **instrs, int size)
1390
1.09k
{
1391
4.10k
    for (int i = 0; i < size; i++) {
1392
3.00k
        cfg_instr *instr = instrs[i];
1393
3.00k
        assert(instr->i_opcode != NOP);
1394
3.00k
        INSTR_SET_OP0(instr, NOP);
1395
3.00k
        INSTR_SET_LOC(instr, NO_LOCATION);
1396
3.00k
    }
1397
1.09k
}
1398
1399
/* Does not steal reference to "newconst".
1400
   Return 1 if changed instruction to LOAD_SMALL_INT.
1401
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1402
   Return -1 on error.
1403
*/
1404
static int
1405
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1406
                               PyObject *consts, PyObject *const_cache)
1407
43.3k
{
1408
43.3k
    if (PyLong_CheckExact(newconst)) {
1409
16.5k
        int overflow;
1410
16.5k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1411
16.5k
        if (val == -1 && PyErr_Occurred()) {
1412
0
            return -1;
1413
0
        }
1414
16.5k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1415
13.3k
            assert(_Py_IsImmortal(newconst));
1416
13.3k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1417
13.3k
            return 1;
1418
13.3k
        }
1419
16.5k
    }
1420
30.0k
    return 0;
1421
43.3k
}
1422
1423
1424
/* Steals reference to "newconst" */
1425
static int
1426
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427
                      PyObject *consts, PyObject *const_cache)
1428
873
{
1429
873
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1430
873
    if (res < 0) {
1431
0
        Py_DECREF(newconst);
1432
0
        return ERROR;
1433
0
    }
1434
873
    if (res > 0) {
1435
0
        return SUCCESS;
1436
0
    }
1437
873
    int oparg = add_const(newconst, consts, const_cache);
1438
873
    RETURN_IF_ERROR(oparg);
1439
873
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1440
873
    return SUCCESS;
1441
873
}
1442
1443
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1444
   with    LOAD_CONST (c1, c2, ... cn).
1445
   The consts table must still be in list form so that the
1446
   new constant (c1, c2, ... cn) can be appended.
1447
   Called with codestr pointing to the first LOAD_CONST.
1448
*/
1449
static int
1450
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1451
2.58k
{
1452
    /* Pre-conditions */
1453
2.58k
    assert(PyDict_CheckExact(const_cache));
1454
2.58k
    assert(PyList_CheckExact(consts));
1455
1456
2.58k
    cfg_instr *instr = &bb->b_instr[i];
1457
2.58k
    assert(instr->i_opcode == BUILD_TUPLE);
1458
1459
2.58k
    int seq_size = instr->i_oparg;
1460
2.58k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1461
0
        return SUCCESS;
1462
0
    }
1463
1464
2.58k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1465
2.58k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1466
        /* not a const sequence */
1467
1.83k
        return SUCCESS;
1468
1.83k
    }
1469
1470
750
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1471
750
    if (const_tuple == NULL) {
1472
0
        return ERROR;
1473
0
    }
1474
1475
1.82k
    for (int i = 0; i < seq_size; i++) {
1476
1.07k
        cfg_instr *inst = const_instrs[i];
1477
1.07k
        assert(loads_const(inst->i_opcode));
1478
1.07k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1479
1.07k
        if (element == NULL) {
1480
0
            Py_DECREF(const_tuple);
1481
0
            return ERROR;
1482
0
        }
1483
1.07k
        PyTuple_SET_ITEM(const_tuple, i, element);
1484
1.07k
    }
1485
1486
750
    nop_out(const_instrs, seq_size);
1487
750
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1488
750
}
1489
1490
/* Replace:
1491
    BUILD_LIST 0
1492
    LOAD_CONST c1
1493
    LIST_APPEND 1
1494
    LOAD_CONST c2
1495
    LIST_APPEND 1
1496
    ...
1497
    LOAD_CONST cN
1498
    LIST_APPEND 1
1499
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1500
   with:
1501
    LOAD_CONST (c1, c2, ... cN)
1502
*/
1503
static int
1504
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505
                                      PyObject *consts, PyObject *const_cache)
1506
53
{
1507
53
    assert(PyDict_CheckExact(const_cache));
1508
53
    assert(PyList_CheckExact(consts));
1509
53
    assert(i >= 0);
1510
53
    assert(i < bb->b_iused);
1511
1512
53
    cfg_instr *intrinsic = &bb->b_instr[i];
1513
53
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1514
53
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1515
1516
53
    int consts_found = 0;
1517
53
    bool expect_append = true;
1518
1519
60
    for (int pos = i - 1; pos >= 0; pos--) {
1520
60
        cfg_instr *instr = &bb->b_instr[pos];
1521
60
        int opcode = instr->i_opcode;
1522
60
        int oparg = instr->i_oparg;
1523
1524
60
        if (opcode == NOP) {
1525
0
            continue;
1526
0
        }
1527
1528
60
        if (opcode == BUILD_LIST && oparg == 0) {
1529
0
            if (!expect_append) {
1530
                /* Not a sequence start. */
1531
0
                return SUCCESS;
1532
0
            }
1533
1534
            /* Sequence start, we are done. */
1535
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1536
0
            if (newconst == NULL) {
1537
0
                return ERROR;
1538
0
            }
1539
1540
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1541
0
                instr = &bb->b_instr[newpos];
1542
0
                if (instr->i_opcode == NOP) {
1543
0
                    continue;
1544
0
                }
1545
0
                if (loads_const(instr->i_opcode)) {
1546
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1547
0
                    if (constant == NULL) {
1548
0
                        Py_DECREF(newconst);
1549
0
                        return ERROR;
1550
0
                    }
1551
0
                    assert(consts_found > 0);
1552
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1553
0
                }
1554
0
                nop_out(&instr, 1);
1555
0
            }
1556
0
            assert(consts_found == 0);
1557
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1558
0
        }
1559
1560
60
        if (expect_append) {
1561
53
            if (opcode != LIST_APPEND || oparg != 1) {
1562
46
                return SUCCESS;
1563
46
            }
1564
53
        }
1565
7
        else {
1566
7
            if (!loads_const(opcode)) {
1567
7
                return SUCCESS;
1568
7
            }
1569
0
            consts_found++;
1570
0
        }
1571
1572
7
        expect_append = !expect_append;
1573
7
    }
1574
1575
    /* Did not find sequence start. */
1576
0
    return SUCCESS;
1577
53
}
1578
1579
1.16k
#define MIN_CONST_SEQUENCE_SIZE 3
1580
/*
1581
Optimize lists and sets for:
1582
    1. "for" loop, comprehension or "in"/"not in" tests:
1583
           Change literal list or set of constants into constant
1584
           tuple or frozenset respectively. Change list of
1585
           non-constants into tuple.
1586
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1587
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1588
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1589
           or BUILD_SET & SET_UPDATE respectively.
1590
*/
1591
static int
1592
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593
                        PyObject *consts, PyObject *const_cache)
1594
582
{
1595
582
    assert(PyDict_CheckExact(const_cache));
1596
582
    assert(PyList_CheckExact(consts));
1597
1598
582
    cfg_instr *instr = &bb->b_instr[i];
1599
582
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1600
1601
582
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1602
582
    int seq_size = instr->i_oparg;
1603
582
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1604
582
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1605
333
    {
1606
333
        return SUCCESS;
1607
333
    }
1608
1609
249
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1610
249
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1611
24
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1612
            /* iterate over a tuple instead of list */
1613
13
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1614
13
        }
1615
24
        return SUCCESS;
1616
24
    }
1617
1618
225
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1619
225
    if (const_result == NULL) {
1620
0
        return ERROR;
1621
0
    }
1622
1623
2.00k
    for (int i = 0; i < seq_size; i++) {
1624
1.78k
        cfg_instr *inst = const_instrs[i];
1625
1.78k
        assert(loads_const(inst->i_opcode));
1626
1.78k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1627
1.78k
        if (element == NULL) {
1628
0
            Py_DECREF(const_result);
1629
0
            return ERROR;
1630
0
        }
1631
1.78k
        PyTuple_SET_ITEM(const_result, i, element);
1632
1.78k
    }
1633
1634
225
    if (instr->i_opcode == BUILD_SET) {
1635
217
        PyObject *frozenset = PyFrozenSet_New(const_result);
1636
217
        if (frozenset == NULL) {
1637
0
            Py_DECREF(const_result);
1638
0
            return ERROR;
1639
0
        }
1640
217
        Py_SETREF(const_result, frozenset);
1641
217
    }
1642
1643
225
    int index = add_const(const_result, consts, const_cache);
1644
225
    RETURN_IF_ERROR(index);
1645
225
    nop_out(const_instrs, seq_size);
1646
1647
225
    if (contains_or_iter) {
1648
212
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1649
212
    }
1650
13
    else {
1651
13
        assert(i >= 2);
1652
13
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1653
1654
13
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1655
1656
13
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1657
13
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1658
13
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1659
13
    }
1660
225
    return SUCCESS;
1661
225
}
1662
1663
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1664
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1665
 * Used to avoid creating constants which are slow to hash.
1666
 */
1667
static Py_ssize_t
1668
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1669
0
{
1670
0
    if (PyTuple_Check(obj)) {
1671
0
        Py_ssize_t i;
1672
0
        limit -= PyTuple_GET_SIZE(obj);
1673
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1674
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1675
0
            if (limit < 0) {
1676
0
                return limit;
1677
0
            }
1678
0
        }
1679
0
    }
1680
0
    return limit;
1681
0
}
1682
1683
67
#define MAX_INT_SIZE           128  /* bits */
1684
0
#define MAX_COLLECTION_SIZE    256  /* items */
1685
2
#define MAX_STR_SIZE          4096  /* characters */
1686
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1687
1688
static PyObject *
1689
const_folding_safe_multiply(PyObject *v, PyObject *w)
1690
12
{
1691
12
    if (PyLong_Check(v) && PyLong_Check(w) &&
1692
8
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1693
12
    ) {
1694
8
        int64_t vbits = _PyLong_NumBits(v);
1695
8
        int64_t wbits = _PyLong_NumBits(w);
1696
8
        assert(vbits >= 0);
1697
8
        assert(wbits >= 0);
1698
8
        if (vbits + wbits > MAX_INT_SIZE) {
1699
0
            return NULL;
1700
0
        }
1701
8
    }
1702
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1703
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1704
0
        if (size) {
1705
0
            long n = PyLong_AsLong(v);
1706
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1707
0
                return NULL;
1708
0
            }
1709
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1710
0
                return NULL;
1711
0
            }
1712
0
        }
1713
0
    }
1714
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1715
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1716
2
                                               PyBytes_GET_SIZE(w);
1717
2
        if (size) {
1718
2
            long n = PyLong_AsLong(v);
1719
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1720
0
                return NULL;
1721
0
            }
1722
2
        }
1723
2
    }
1724
2
    else if (PyLong_Check(w) &&
1725
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1726
2
    {
1727
2
        return const_folding_safe_multiply(w, v);
1728
2
    }
1729
1730
10
    return PyNumber_Multiply(v, w);
1731
12
}
1732
1733
static PyObject *
1734
const_folding_safe_power(PyObject *v, PyObject *w)
1735
2
{
1736
2
    if (PyLong_Check(v) && PyLong_Check(w) &&
1737
2
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1738
2
    ) {
1739
2
        int64_t vbits = _PyLong_NumBits(v);
1740
2
        size_t wbits = PyLong_AsSize_t(w);
1741
2
        assert(vbits >= 0);
1742
2
        if (wbits == (size_t)-1) {
1743
0
            return NULL;
1744
0
        }
1745
2
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1746
0
            return NULL;
1747
0
        }
1748
2
    }
1749
1750
2
    return PyNumber_Power(v, w, Py_None);
1751
2
}
1752
1753
static PyObject *
1754
const_folding_safe_lshift(PyObject *v, PyObject *w)
1755
19
{
1756
19
    if (PyLong_Check(v) && PyLong_Check(w) &&
1757
19
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1758
19
    ) {
1759
19
        int64_t vbits = _PyLong_NumBits(v);
1760
19
        size_t wbits = PyLong_AsSize_t(w);
1761
19
        assert(vbits >= 0);
1762
19
        if (wbits == (size_t)-1) {
1763
0
            return NULL;
1764
0
        }
1765
19
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1766
0
            return NULL;
1767
0
        }
1768
19
    }
1769
1770
19
    return PyNumber_Lshift(v, w);
1771
19
}
1772
1773
static PyObject *
1774
const_folding_safe_mod(PyObject *v, PyObject *w)
1775
0
{
1776
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1777
0
        return NULL;
1778
0
    }
1779
1780
0
    return PyNumber_Remainder(v, w);
1781
0
}
1782
1783
static PyObject *
1784
eval_const_binop(PyObject *left, int op, PyObject *right)
1785
32
{
1786
32
    assert(left != NULL && right != NULL);
1787
32
    assert(op >= 0 && op <= NB_OPARG_LAST);
1788
1789
32
    PyObject *result = NULL;
1790
32
    switch (op) {
1791
1
        case NB_ADD:
1792
1
            result = PyNumber_Add(left, right);
1793
1
            break;
1794
0
        case NB_SUBTRACT:
1795
0
            result = PyNumber_Subtract(left, right);
1796
0
            break;
1797
10
        case NB_MULTIPLY:
1798
10
            result = const_folding_safe_multiply(left, right);
1799
10
            break;
1800
0
        case NB_TRUE_DIVIDE:
1801
0
            result = PyNumber_TrueDivide(left, right);
1802
0
            break;
1803
0
        case NB_FLOOR_DIVIDE:
1804
0
            result = PyNumber_FloorDivide(left, right);
1805
0
            break;
1806
0
        case NB_REMAINDER:
1807
0
            result = const_folding_safe_mod(left, right);
1808
0
            break;
1809
2
        case NB_POWER:
1810
2
            result = const_folding_safe_power(left, right);
1811
2
            break;
1812
19
        case NB_LSHIFT:
1813
19
            result = const_folding_safe_lshift(left, right);
1814
19
            break;
1815
0
        case NB_RSHIFT:
1816
0
            result = PyNumber_Rshift(left, right);
1817
0
            break;
1818
0
        case NB_OR:
1819
0
            result = PyNumber_Or(left, right);
1820
0
            break;
1821
0
        case NB_XOR:
1822
0
            result = PyNumber_Xor(left, right);
1823
0
            break;
1824
0
        case NB_AND:
1825
0
            result = PyNumber_And(left, right);
1826
0
            break;
1827
0
        case NB_SUBSCR:
1828
0
            result = PyObject_GetItem(left, right);
1829
0
            break;
1830
0
        case NB_MATRIX_MULTIPLY:
1831
            // No builtin constants implement matrix multiplication
1832
0
            break;
1833
0
        default:
1834
0
            Py_UNREACHABLE();
1835
32
    }
1836
32
    return result;
1837
32
}
1838
1839
static int
1840
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1841
1.49k
{
1842
1.53k
    #define BINOP_OPERAND_COUNT 2
1843
1.49k
    assert(PyDict_CheckExact(const_cache));
1844
1.49k
    assert(PyList_CheckExact(consts));
1845
1846
1.49k
    cfg_instr *binop = &bb->b_instr[i];
1847
1.49k
    assert(binop->i_opcode == BINARY_OP);
1848
1849
1.49k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1850
1.49k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1851
        /* not a const sequence */
1852
1.46k
        return SUCCESS;
1853
1.46k
    }
1854
1855
32
    cfg_instr *lhs_instr = operands_instrs[0];
1856
32
    assert(loads_const(lhs_instr->i_opcode));
1857
32
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1858
32
    if (lhs == NULL) {
1859
0
        return ERROR;
1860
0
    }
1861
1862
32
    cfg_instr *rhs_instr = operands_instrs[1];
1863
32
    assert(loads_const(rhs_instr->i_opcode));
1864
32
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1865
32
    if (rhs == NULL) {
1866
0
        Py_DECREF(lhs);
1867
0
        return ERROR;
1868
0
    }
1869
1870
32
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1871
32
    Py_DECREF(lhs);
1872
32
    Py_DECREF(rhs);
1873
32
    if (newconst == NULL) {
1874
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1875
0
            return ERROR;
1876
0
        }
1877
0
        PyErr_Clear();
1878
0
        return SUCCESS;
1879
0
    }
1880
1881
32
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882
32
    return instr_make_load_const(binop, newconst, consts, const_cache);
1883
32
}
1884
1885
static PyObject *
1886
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1887
91
{
1888
91
    assert(operand != NULL);
1889
91
    assert(
1890
91
        opcode == UNARY_NEGATIVE ||
1891
91
        opcode == UNARY_INVERT ||
1892
91
        opcode == UNARY_NOT ||
1893
91
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1894
91
    );
1895
91
    PyObject *result;
1896
91
    switch (opcode) {
1897
91
        case UNARY_NEGATIVE:
1898
91
            result = PyNumber_Negative(operand);
1899
91
            break;
1900
0
        case UNARY_INVERT:
1901
            // XXX: This should be removed once the ~bool depreciation expires.
1902
0
            if (PyBool_Check(operand)) {
1903
0
                return NULL;
1904
0
            }
1905
0
            result = PyNumber_Invert(operand);
1906
0
            break;
1907
0
        case UNARY_NOT: {
1908
0
            int r = PyObject_IsTrue(operand);
1909
0
            if (r < 0) {
1910
0
                return NULL;
1911
0
            }
1912
0
            result = PyBool_FromLong(!r);
1913
0
            break;
1914
0
        }
1915
0
        case CALL_INTRINSIC_1:
1916
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1917
0
                Py_UNREACHABLE();
1918
0
            }
1919
0
            result = PyNumber_Positive(operand);
1920
0
            break;
1921
0
        default:
1922
0
            Py_UNREACHABLE();
1923
91
    }
1924
91
    return result;
1925
91
}
1926
1927
static int
1928
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1929
113
{
1930
204
    #define UNARYOP_OPERAND_COUNT 1
1931
113
    assert(PyDict_CheckExact(const_cache));
1932
113
    assert(PyList_CheckExact(consts));
1933
113
    cfg_instr *unaryop = &bb->b_instr[i];
1934
1935
113
    cfg_instr *operand_instr;
1936
113
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1937
        /* not a const */
1938
22
        return SUCCESS;
1939
22
    }
1940
1941
113
    assert(loads_const(operand_instr->i_opcode));
1942
91
    PyObject *operand = get_const_value(
1943
91
        operand_instr->i_opcode,
1944
91
        operand_instr->i_oparg,
1945
91
        consts
1946
91
    );
1947
91
    if (operand == NULL) {
1948
0
        return ERROR;
1949
0
    }
1950
1951
91
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1952
91
    Py_DECREF(operand);
1953
91
    if (newconst == NULL) {
1954
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1955
0
            return ERROR;
1956
0
        }
1957
0
        PyErr_Clear();
1958
0
        return SUCCESS;
1959
0
    }
1960
1961
91
    if (unaryop->i_opcode == UNARY_NOT) {
1962
0
        assert(PyBool_Check(newconst));
1963
0
    }
1964
91
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965
91
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1966
91
}
1967
1968
1.96k
#define VISITED (-1)
1969
1970
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1971
// same effect.
1972
static int
1973
swaptimize(basicblock *block, int *ix)
1974
586
{
1975
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1976
    // for this function. Make sure to blow away cached *.pyc files first!
1977
586
    assert(*ix < block->b_iused);
1978
586
    cfg_instr *instructions = &block->b_instr[*ix];
1979
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1980
    // maximum depth of the stack manipulations:
1981
586
    assert(instructions[0].i_opcode == SWAP);
1982
586
    int depth = instructions[0].i_oparg;
1983
586
    int len = 0;
1984
586
    int more = false;
1985
586
    int limit = block->b_iused - *ix;
1986
757
    while (++len < limit) {
1987
757
        int opcode = instructions[len].i_opcode;
1988
757
        if (opcode == SWAP) {
1989
151
            depth = Py_MAX(depth, instructions[len].i_oparg);
1990
151
            more = true;
1991
151
        }
1992
606
        else if (opcode != NOP) {
1993
586
            break;
1994
586
        }
1995
757
    }
1996
    // It's already optimal if there's only one SWAP:
1997
586
    if (!more) {
1998
435
        return SUCCESS;
1999
435
    }
2000
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2001
151
    int *stack = PyMem_Malloc(depth * sizeof(int));
2002
151
    if (stack == NULL) {
2003
0
        PyErr_NoMemory();
2004
0
        return ERROR;
2005
0
    }
2006
604
    for (int i = 0; i < depth; i++) {
2007
453
        stack[i] = i;
2008
453
    }
2009
    // Simulate the combined effect of these instructions by "running" them on
2010
    // our "stack":
2011
453
    for (int i = 0; i < len; i++) {
2012
302
        if (instructions[i].i_opcode == SWAP) {
2013
302
            int oparg = instructions[i].i_oparg;
2014
302
            int top = stack[0];
2015
            // SWAPs are 1-indexed:
2016
302
            stack[0] = stack[oparg - 1];
2017
302
            stack[oparg - 1] = top;
2018
302
        }
2019
302
    }
2020
    // Now we can begin! Our approach here is based on a solution to a closely
2021
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2022
    // think of this algorithm as determining the steps needed to efficiently
2023
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2024
    // though, we can efficiently *shuffle* it! For this reason, we will be
2025
    // replacing instructions starting from the *end* of the run. Since the
2026
    // solution is optimal, we don't need to worry about running out of space:
2027
151
    int current = len - 1;
2028
604
    for (int i = 0; i < depth; i++) {
2029
        // Skip items that have already been visited, or just happen to be in
2030
        // the correct location:
2031
453
        if (stack[i] == VISITED || stack[i] == i) {
2032
302
            continue;
2033
302
        }
2034
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2035
        // with other items; traversing the cycle and swapping each item with
2036
        // the next will put them all in the correct place. The weird
2037
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2038
        // can only swap from that position:
2039
151
        int j = i;
2040
604
        while (true) {
2041
            // Skip the actual swap if our item is zero, since swapping the top
2042
            // item with itself is pointless:
2043
604
            if (j) {
2044
302
                assert(0 <= current);
2045
                // SWAPs are 1-indexed:
2046
302
                instructions[current].i_opcode = SWAP;
2047
302
                instructions[current--].i_oparg = j + 1;
2048
302
            }
2049
604
            if (stack[j] == VISITED) {
2050
                // Completed the cycle:
2051
151
                assert(j == i);
2052
151
                break;
2053
151
            }
2054
453
            int next_j = stack[j];
2055
453
            stack[j] = VISITED;
2056
453
            j = next_j;
2057
453
        }
2058
151
    }
2059
    // NOP out any unused instructions:
2060
151
    while (0 <= current) {
2061
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2062
0
    }
2063
151
    PyMem_Free(stack);
2064
151
    *ix += len - 1;
2065
151
    return SUCCESS;
2066
151
}
2067
2068
2069
// This list is pretty small, since it's only okay to reorder opcodes that:
2070
// - can't affect control flow (like jumping or raising exceptions)
2071
// - can't invoke arbitrary code (besides finalizers)
2072
// - only touch the TOS (and pop it when finished)
2073
#define SWAPPABLE(opcode) \
2074
895
    ((opcode) == STORE_FAST || \
2075
895
     (opcode) == STORE_FAST_MAYBE_NULL || \
2076
895
     (opcode) == POP_TOP)
2077
2078
#define STORES_TO(instr) \
2079
169
    (((instr).i_opcode == STORE_FAST || \
2080
169
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2081
169
     ? (instr).i_oparg : -1)
2082
2083
static int
2084
next_swappable_instruction(basicblock *block, int i, int lineno)
2085
859
{
2086
879
    while (++i < block->b_iused) {
2087
874
        cfg_instr *instruction = &block->b_instr[i];
2088
874
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2089
            // Optimizing across this instruction could cause user-visible
2090
            // changes in the names bound between line tracing events!
2091
4
            return -1;
2092
4
        }
2093
870
        if (instruction->i_opcode == NOP) {
2094
20
            continue;
2095
20
        }
2096
850
        if (SWAPPABLE(instruction->i_opcode)) {
2097
351
            return i;
2098
351
        }
2099
499
        return -1;
2100
850
    }
2101
5
    return -1;
2102
859
}
2103
2104
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2105
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2106
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2107
static void
2108
apply_static_swaps(basicblock *block, int i)
2109
586
{
2110
    // SWAPs are to our left, and potential swaperands are to our right:
2111
709
    for (; 0 <= i; i--) {
2112
676
        assert(i < block->b_iused);
2113
676
        cfg_instr *swap = &block->b_instr[i];
2114
676
        if (swap->i_opcode != SWAP) {
2115
90
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2116
                // Nope, but we know how to handle these. Keep looking:
2117
45
                continue;
2118
45
            }
2119
            // We can't reason about what this instruction does. Bail:
2120
45
            return;
2121
90
        }
2122
586
        int j = next_swappable_instruction(block, i, -1);
2123
586
        if (j < 0) {
2124
352
            return;
2125
352
        }
2126
234
        int k = j;
2127
234
        int lineno = block->b_instr[j].i_loc.lineno;
2128
351
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2129
273
            k = next_swappable_instruction(block, k, lineno);
2130
273
            if (k < 0) {
2131
156
                return;
2132
156
            }
2133
273
        }
2134
        // The reordering is not safe if the two instructions to be swapped
2135
        // store to the same location, or if any intervening instruction stores
2136
        // to the same location as either of them.
2137
78
        int store_j = STORES_TO(block->b_instr[j]);
2138
78
        int store_k = STORES_TO(block->b_instr[k]);
2139
78
        if (store_j >= 0 || store_k >= 0) {
2140
78
            if (store_j == store_k) {
2141
0
                return;
2142
0
            }
2143
91
            for (int idx = j + 1; idx < k; idx++) {
2144
13
                int store_idx = STORES_TO(block->b_instr[idx]);
2145
13
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2146
0
                    return;
2147
0
                }
2148
13
            }
2149
78
        }
2150
2151
        // Success!
2152
78
        INSTR_SET_OP0(swap, NOP);
2153
78
        cfg_instr temp = block->b_instr[j];
2154
78
        block->b_instr[j] = block->b_instr[k];
2155
78
        block->b_instr[k] = temp;
2156
78
    }
2157
586
}
2158
2159
static int
2160
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2161
24.9k
{
2162
24.9k
    assert(PyDict_CheckExact(const_cache));
2163
24.9k
    assert(PyList_CheckExact(consts));
2164
24.9k
    int opcode = 0;
2165
24.9k
    int oparg = 0;
2166
244k
    for (int i = 0; i < bb->b_iused; i++) {
2167
219k
        cfg_instr *inst = &bb->b_instr[i];
2168
219k
        if (inst->i_opcode == LOAD_CONST) {
2169
42.5k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2170
42.5k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2171
42.5k
            Py_DECREF(constant);
2172
42.5k
            if (res < 0) {
2173
0
                return ERROR;
2174
0
            }
2175
42.5k
        }
2176
219k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2177
28.9k
                                      inst->i_opcode == COPY &&
2178
12
                                      inst->i_oparg == 1);
2179
219k
        if (! is_copy_of_load_const) {
2180
219k
            opcode = inst->i_opcode;
2181
219k
            oparg = inst->i_oparg;
2182
219k
        }
2183
219k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2184
219k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2185
177k
            continue;
2186
177k
        }
2187
42.5k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2188
42.5k
        switch(nextop) {
2189
25
            case POP_JUMP_IF_FALSE:
2190
25
            case POP_JUMP_IF_TRUE:
2191
25
            case JUMP_IF_FALSE:
2192
25
            case JUMP_IF_TRUE:
2193
25
            {
2194
                /* Remove LOAD_CONST const; conditional jump */
2195
25
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2196
25
                if (cnt == NULL) {
2197
0
                    return ERROR;
2198
0
                }
2199
25
                int is_true = PyObject_IsTrue(cnt);
2200
25
                Py_DECREF(cnt);
2201
25
                if (is_true == -1) {
2202
0
                    return ERROR;
2203
0
                }
2204
25
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2205
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2206
25
                    INSTR_SET_OP0(inst, NOP);
2207
25
                }
2208
25
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2209
25
                if (is_true == jump_if_true) {
2210
0
                    bb->b_instr[i+1].i_opcode = JUMP;
2211
0
                }
2212
25
                else {
2213
25
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2214
25
                }
2215
25
                break;
2216
25
            }
2217
688
            case IS_OP:
2218
688
            {
2219
                // Fold to POP_JUMP_IF_NONE:
2220
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2223
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2224
                // Fold to POP_JUMP_IF_NOT_NONE:
2225
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2226
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2227
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2228
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2229
688
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2230
688
                if (cnt == NULL) {
2231
0
                    return ERROR;
2232
0
                }
2233
688
                if (!Py_IsNone(cnt)) {
2234
15
                    Py_DECREF(cnt);
2235
15
                    break;
2236
15
                }
2237
673
                if (bb->b_iused <= i + 2) {
2238
1
                    break;
2239
1
                }
2240
672
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2241
672
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2242
                // Get rid of TO_BOOL regardless:
2243
672
                if (jump_instr->i_opcode == TO_BOOL) {
2244
657
                    INSTR_SET_OP0(jump_instr, NOP);
2245
657
                    if (bb->b_iused <= i + 3) {
2246
0
                        break;
2247
0
                    }
2248
657
                    jump_instr = &bb->b_instr[i + 3];
2249
657
                }
2250
672
                bool invert = is_instr->i_oparg;
2251
672
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2252
624
                    invert = !invert;
2253
624
                }
2254
48
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2255
15
                    break;
2256
15
                }
2257
657
                INSTR_SET_OP0(inst, NOP);
2258
657
                INSTR_SET_OP0(is_instr, NOP);
2259
657
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2260
657
                                              : POP_JUMP_IF_NONE;
2261
657
                break;
2262
672
            }
2263
25
            case TO_BOOL:
2264
25
            {
2265
25
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2266
25
                if (cnt == NULL) {
2267
0
                    return ERROR;
2268
0
                }
2269
25
                int is_true = PyObject_IsTrue(cnt);
2270
25
                Py_DECREF(cnt);
2271
25
                if (is_true == -1) {
2272
0
                    return ERROR;
2273
0
                }
2274
25
                cnt = PyBool_FromLong(is_true);
2275
25
                int index = add_const(cnt, consts, const_cache);
2276
25
                if (index < 0) {
2277
0
                    return ERROR;
2278
0
                }
2279
25
                INSTR_SET_OP0(inst, NOP);
2280
25
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2281
25
                break;
2282
25
            }
2283
42.5k
        }
2284
42.5k
    }
2285
24.9k
    return SUCCESS;
2286
24.9k
}
2287
2288
static int
2289
5.64k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2290
30.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291
24.9k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2292
24.9k
    }
2293
5.64k
    return SUCCESS;
2294
5.64k
}
2295
2296
static int
2297
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2298
24.9k
{
2299
24.9k
    assert(PyDict_CheckExact(const_cache));
2300
24.9k
    assert(PyList_CheckExact(consts));
2301
24.9k
    cfg_instr nop;
2302
24.9k
    INSTR_SET_OP0(&nop, NOP);
2303
245k
    for (int i = 0; i < bb->b_iused; i++) {
2304
220k
        cfg_instr *inst = &bb->b_instr[i];
2305
220k
        cfg_instr *target;
2306
220k
        int opcode = inst->i_opcode;
2307
220k
        int oparg = inst->i_oparg;
2308
220k
        if (HAS_TARGET(opcode)) {
2309
10.2k
            assert(inst->i_target->b_iused > 0);
2310
10.2k
            target = &inst->i_target->b_instr[0];
2311
10.2k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2312
10.2k
        }
2313
210k
        else {
2314
210k
            target = &nop;
2315
210k
        }
2316
220k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2317
220k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2318
220k
        switch (opcode) {
2319
            /* Try to fold tuples of constants.
2320
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2321
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2322
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2323
2.63k
            case BUILD_TUPLE:
2324
2.63k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2325
54
                    switch(oparg) {
2326
0
                        case 1:
2327
0
                            INSTR_SET_OP0(inst, NOP);
2328
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2329
0
                            continue;
2330
45
                        case 2:
2331
53
                        case 3:
2332
53
                            INSTR_SET_OP0(inst, NOP);
2333
53
                            bb->b_instr[i+1].i_opcode = SWAP;
2334
53
                            continue;
2335
54
                    }
2336
54
                }
2337
2.58k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2338
2.58k
                break;
2339
335
            case BUILD_LIST:
2340
582
            case BUILD_SET:
2341
582
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2342
582
                break;
2343
249
            case POP_JUMP_IF_NOT_NONE:
2344
673
            case POP_JUMP_IF_NONE:
2345
673
                switch (target->i_opcode) {
2346
16
                    case JUMP:
2347
16
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2348
673
                }
2349
673
                break;
2350
3.38k
            case POP_JUMP_IF_FALSE:
2351
3.38k
                switch (target->i_opcode) {
2352
136
                    case JUMP:
2353
136
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2354
3.38k
                }
2355
3.38k
                break;
2356
3.38k
            case POP_JUMP_IF_TRUE:
2357
978
                switch (target->i_opcode) {
2358
33
                    case JUMP:
2359
33
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2360
978
                }
2361
978
                break;
2362
1.04k
            case JUMP_IF_FALSE:
2363
1.04k
                switch (target->i_opcode) {
2364
0
                    case JUMP:
2365
0
                    case JUMP_IF_FALSE:
2366
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2367
0
                        continue;
2368
2
                    case JUMP_IF_TRUE:
2369
                        // No need to check for loops here, a block's b_next
2370
                        // cannot point to itself.
2371
2
                        assert(inst->i_target != inst->i_target->b_next);
2372
2
                        inst->i_target = inst->i_target->b_next;
2373
2
                        i--;
2374
2
                        continue;
2375
1.04k
                }
2376
1.03k
                break;
2377
1.03k
            case JUMP_IF_TRUE:
2378
53
                switch (target->i_opcode) {
2379
0
                    case JUMP:
2380
0
                    case JUMP_IF_TRUE:
2381
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2382
0
                        continue;
2383
0
                    case JUMP_IF_FALSE:
2384
                        // No need to check for loops here, a block's b_next
2385
                        // cannot point to itself.
2386
0
                        assert(inst->i_target != inst->i_target->b_next);
2387
0
                        inst->i_target = inst->i_target->b_next;
2388
0
                        i--;
2389
0
                        continue;
2390
53
                }
2391
53
                break;
2392
853
            case JUMP:
2393
1.88k
            case JUMP_NO_INTERRUPT:
2394
1.88k
                switch (target->i_opcode) {
2395
0
                    case JUMP:
2396
0
                        i -= jump_thread(bb, inst, target, JUMP);
2397
0
                        continue;
2398
0
                    case JUMP_NO_INTERRUPT:
2399
0
                        i -= jump_thread(bb, inst, target, opcode);
2400
0
                        continue;
2401
1.88k
                }
2402
1.88k
                break;
2403
1.88k
            case FOR_ITER:
2404
446
                if (target->i_opcode == JUMP) {
2405
                    /* This will not work now because the jump (at target) could
2406
                     * be forward or backward and FOR_ITER only jumps forward. We
2407
                     * can re-enable this if ever we implement a backward version
2408
                     * of FOR_ITER.
2409
                     */
2410
                    /*
2411
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2412
                    */
2413
0
                }
2414
446
                break;
2415
4.92k
            case STORE_FAST:
2416
4.92k
                if (opcode == nextop &&
2417
378
                    oparg == bb->b_instr[i+1].i_oparg &&
2418
19
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2419
19
                    bb->b_instr[i].i_opcode = POP_TOP;
2420
19
                    bb->b_instr[i].i_oparg = 0;
2421
19
                }
2422
4.92k
                break;
2423
737
            case SWAP:
2424
737
                if (oparg == 1) {
2425
0
                    INSTR_SET_OP0(inst, NOP);
2426
0
                }
2427
737
                break;
2428
7.82k
            case LOAD_GLOBAL:
2429
7.82k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2430
3.05k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2431
3.05k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
3.05k
                }
2433
7.82k
                break;
2434
2.72k
            case COMPARE_OP:
2435
2.72k
                if (nextop == TO_BOOL) {
2436
720
                    INSTR_SET_OP0(inst, NOP);
2437
720
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2438
720
                    continue;
2439
720
                }
2440
2.00k
                break;
2441
2.00k
            case CONTAINS_OP:
2442
2.26k
            case IS_OP:
2443
2.26k
                if (nextop == TO_BOOL) {
2444
1.10k
                    INSTR_SET_OP0(inst, NOP);
2445
1.10k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2446
1.10k
                    continue;
2447
1.10k
                }
2448
1.16k
                if (nextop == UNARY_NOT) {
2449
0
                    INSTR_SET_OP0(inst, NOP);
2450
0
                    int inverted = oparg ^ 1;
2451
0
                    assert(inverted == 0 || inverted == 1);
2452
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2453
0
                    continue;
2454
0
                }
2455
1.16k
                break;
2456
1.90k
            case TO_BOOL:
2457
1.90k
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    continue;
2460
0
                }
2461
1.90k
                break;
2462
1.90k
            case UNARY_NOT:
2463
17
                if (nextop == TO_BOOL) {
2464
0
                    INSTR_SET_OP0(inst, NOP);
2465
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2466
0
                    continue;
2467
0
                }
2468
17
                if (nextop == UNARY_NOT) {
2469
0
                    INSTR_SET_OP0(inst, NOP);
2470
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2471
0
                    continue;
2472
0
                }
2473
17
                _Py_FALLTHROUGH;
2474
19
            case UNARY_INVERT:
2475
113
            case UNARY_NEGATIVE:
2476
113
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2477
113
                break;
2478
324
            case CALL_INTRINSIC_1:
2479
324
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2480
53
                    if (nextop == GET_ITER) {
2481
0
                        INSTR_SET_OP0(inst, NOP);
2482
0
                    }
2483
53
                    else {
2484
53
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2485
53
                    }
2486
53
                }
2487
271
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2489
0
                }
2490
324
                break;
2491
1.49k
            case BINARY_OP:
2492
1.49k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2493
1.49k
                break;
2494
220k
        }
2495
220k
    }
2496
2497
244k
    for (int i = 0; i < bb->b_iused; i++) {
2498
219k
        cfg_instr *inst = &bb->b_instr[i];
2499
219k
        if (inst->i_opcode == SWAP) {
2500
586
            if (swaptimize(bb, &i) < 0) {
2501
0
                goto error;
2502
0
            }
2503
586
            apply_static_swaps(bb, i);
2504
586
        }
2505
219k
    }
2506
24.9k
    return SUCCESS;
2507
0
error:
2508
0
    return ERROR;
2509
24.9k
}
2510
2511
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2512
2513
static int
2514
remove_redundant_nops_and_jumps(cfg_builder *g)
2515
11.8k
{
2516
11.8k
    int removed_nops, removed_jumps;
2517
12.7k
    do {
2518
        /* Convergence is guaranteed because the number of
2519
         * redundant jumps and nops only decreases.
2520
         */
2521
12.7k
        removed_nops = remove_redundant_nops(g);
2522
12.7k
        RETURN_IF_ERROR(removed_nops);
2523
12.7k
        removed_jumps = remove_redundant_jumps(g);
2524
12.7k
        RETURN_IF_ERROR(removed_jumps);
2525
12.7k
    } while(removed_nops + removed_jumps > 0);
2526
11.8k
    return SUCCESS;
2527
11.8k
}
2528
2529
/* Perform optimizations on a control flow graph.
2530
   The consts object should still be in list form to allow new constants
2531
   to be appended.
2532
2533
   Code trasnformations that reduce code size initially fill the gaps with
2534
   NOPs.  Later those NOPs are removed.
2535
*/
2536
static int
2537
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2538
5.64k
{
2539
5.64k
    assert(PyDict_CheckExact(const_cache));
2540
5.64k
    RETURN_IF_ERROR(check_cfg(g));
2541
5.64k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2542
5.64k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2543
5.64k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544
5.64k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2545
30.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546
24.9k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2547
24.9k
    }
2548
5.64k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2549
5.64k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2550
5.64k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2551
5.64k
    assert(no_redundant_jumps(g));
2552
5.64k
    return SUCCESS;
2553
5.64k
}
2554
2555
static void
2556
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2557
6.00k
{
2558
6.00k
    int32_t line1 = inst1->i_loc.lineno;
2559
6.00k
    int32_t line2 = inst2->i_loc.lineno;
2560
    /* Skip if instructions are on different lines */
2561
6.00k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2562
1.91k
        return;
2563
1.91k
    }
2564
4.08k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2565
365
        return;
2566
365
    }
2567
3.72k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2568
3.72k
    INSTR_SET_OP0(inst2, NOP);
2569
3.72k
}
2570
2571
static int
2572
insert_superinstructions(cfg_builder *g)
2573
5.64k
{
2574
30.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2575
2576
230k
        for (int i = 0; i < b->b_iused; i++) {
2577
205k
            cfg_instr *inst = &b->b_instr[i];
2578
205k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2579
205k
            switch(inst->i_opcode) {
2580
26.6k
                case LOAD_FAST:
2581
26.6k
                    if (nextop == LOAD_FAST) {
2582
3.83k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2583
3.83k
                    }
2584
26.6k
                    break;
2585
4.66k
                case STORE_FAST:
2586
4.66k
                    switch (nextop) {
2587
1.87k
                        case LOAD_FAST:
2588
1.87k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2589
1.87k
                            break;
2590
294
                        case STORE_FAST:
2591
294
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2592
294
                            break;
2593
4.66k
                    }
2594
4.66k
                    break;
2595
205k
            }
2596
205k
        }
2597
24.9k
    }
2598
5.64k
    int res = remove_redundant_nops(g);
2599
5.64k
    assert(no_redundant_nops(g));
2600
5.64k
    return res;
2601
5.64k
}
2602
2603
#define NOT_LOCAL -1
2604
4.51k
#define DUMMY_INSTR -1
2605
2606
typedef struct {
2607
    // Index of instruction that produced the reference or DUMMY_INSTR.
2608
    int instr;
2609
2610
    // The local to which the reference refers or NOT_LOCAL.
2611
    int local;
2612
} ref;
2613
2614
typedef struct {
2615
    ref *refs;
2616
    Py_ssize_t size;
2617
    Py_ssize_t capacity;
2618
} ref_stack;
2619
2620
static int
2621
ref_stack_push(ref_stack *stack, ref r)
2622
166k
{
2623
166k
    if (stack->size == stack->capacity) {
2624
5.65k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2625
5.65k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2626
5.65k
        if (refs == NULL) {
2627
0
            PyErr_NoMemory();
2628
0
            return -1;
2629
0
        }
2630
5.65k
        stack->refs = refs;
2631
5.65k
        stack->capacity = new_cap;
2632
5.65k
    }
2633
166k
    stack->refs[stack->size] = r;
2634
166k
    stack->size++;
2635
166k
    return 0;
2636
166k
}
2637
2638
static ref
2639
ref_stack_pop(ref_stack *stack)
2640
149k
{
2641
149k
    assert(stack->size > 0);
2642
149k
    stack->size--;
2643
149k
    ref r = stack->refs[stack->size];
2644
149k
    return r;
2645
149k
}
2646
2647
static void
2648
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2649
518
{
2650
518
    Py_ssize_t idx = stack->size - off;
2651
518
    assert(idx >= 0 && idx < stack->size);
2652
518
    ref tmp = stack->refs[idx];
2653
518
    stack->refs[idx] = stack->refs[stack->size - 1];
2654
518
    stack->refs[stack->size - 1] = tmp;
2655
518
}
2656
2657
static ref
2658
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2659
21.9k
{
2660
21.9k
    assert(idx >= 0 && idx < stack->size);
2661
21.9k
    return stack->refs[idx];
2662
21.9k
}
2663
2664
static void
2665
ref_stack_clear(ref_stack *stack)
2666
17.6k
{
2667
17.6k
    stack->size = 0;
2668
17.6k
}
2669
2670
static void
2671
ref_stack_fini(ref_stack *stack)
2672
5.64k
{
2673
5.64k
    if (stack->refs != NULL) {
2674
5.64k
        PyMem_Free(stack->refs);
2675
5.64k
    }
2676
5.64k
    stack->refs = NULL;
2677
5.64k
    stack->capacity = 0;
2678
5.64k
    stack->size = 0;
2679
5.64k
}
2680
2681
typedef enum {
2682
    // The loaded reference is still on the stack when the local is killed
2683
    SUPPORT_KILLED  = 1,
2684
    // The loaded reference is stored into a local
2685
    STORED_AS_LOCAL = 2,
2686
    // The loaded reference is still on the stack at the end of the basic block
2687
    REF_UNCONSUMED  = 4,
2688
} LoadFastInstrFlag;
2689
2690
static void
2691
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2692
4.58k
{
2693
7.75k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2694
3.16k
        ref r = ref_stack_at(refs, i);
2695
3.16k
        if (r.local == local) {
2696
4
            assert(r.instr >= 0);
2697
4
            instr_flags[r.instr] |= SUPPORT_KILLED;
2698
4
        }
2699
3.16k
    }
2700
4.58k
}
2701
2702
static void
2703
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2704
4.51k
{
2705
4.51k
    kill_local(instr_flags, refs, local);
2706
4.51k
    if (r.instr != DUMMY_INSTR) {
2707
4.01k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2708
4.01k
    }
2709
4.51k
}
2710
2711
static void
2712
load_fast_push_block(basicblock ***sp, basicblock *target,
2713
                     Py_ssize_t start_depth)
2714
15.9k
{
2715
15.9k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2716
15.9k
    if (!target->b_visited) {
2717
12.0k
        target->b_visited = 1;
2718
12.0k
        *(*sp)++ = target;
2719
12.0k
    }
2720
15.9k
}
2721
2722
/*
2723
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2724
 * load borrowed references onto the operand stack.
2725
 *
2726
 * This is only safe when we can prove that the reference in the frame outlives
2727
 * the borrowed reference produced by the instruction. We make this tractable
2728
 * by enforcing the following lifetimes:
2729
 *
2730
 * 1. Borrowed references loaded onto the operand stack live until the end of
2731
 *    the instruction that consumes them from the stack. Any borrowed
2732
 *    references that would escape into the heap (e.g. into frame objects or
2733
 *    generators) are converted into new, strong references.
2734
 *
2735
 * 2. Locals live until they are either killed by an instruction
2736
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2737
 *    via `f_locals` is added to a tuple owned by the frame object.
2738
 *
2739
 * To simplify the problem of detecting which supporting references in the
2740
 * frame are killed by instructions that overwrite locals, we only allow
2741
 * borrowed references to be stored as a local in the frame if they were passed
2742
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2743
 * strong references.
2744
 *
2745
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2746
 * that meet the following criteria:
2747
 *
2748
 * 1. The produced reference must be consumed from the stack before the
2749
 *    supporting reference in the frame is killed.
2750
 *
2751
 * 2. The produced reference cannot be stored as a local.
2752
 *
2753
 * We use abstract interpretation to identify instructions that meet these
2754
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2755
 * stack of abstract references and note any instructions that violate the
2756
 * criteria above. Once we've processed all the instructions in a block, any
2757
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2758
 */
2759
static int
2760
optimize_load_fast(cfg_builder *g)
2761
5.64k
{
2762
5.64k
    int status;
2763
5.64k
    ref_stack refs = {0};
2764
5.64k
    int max_instrs = 0;
2765
5.64k
    basicblock *entryblock = g->g_entryblock;
2766
30.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2767
25.2k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2768
25.2k
    }
2769
5.64k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2770
5.64k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2771
5.64k
    if (instr_flags == NULL) {
2772
0
        PyErr_NoMemory();
2773
0
        return ERROR;
2774
0
    }
2775
5.64k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2776
5.64k
    if (blocks == NULL) {
2777
0
        status = ERROR;
2778
0
        goto done;
2779
0
    }
2780
5.64k
    basicblock **sp = blocks;
2781
5.64k
    *sp = entryblock;
2782
5.64k
    sp++;
2783
5.64k
    entryblock->b_startdepth = 0;
2784
5.64k
    entryblock->b_visited = 1;
2785
2786
5.64k
    #define PUSH_REF(instr, local)                \
2787
166k
        do {                                      \
2788
166k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2789
0
                status = ERROR;                   \
2790
0
                goto done;                        \
2791
0
            }                                     \
2792
166k
        } while(0)
2793
2794
23.3k
    while (sp != blocks) {
2795
17.6k
        basicblock *block = *--sp;
2796
17.6k
        assert(block->b_startdepth > -1);
2797
2798
        // Reset per-block state.
2799
17.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2800
2801
        // Reset the stack of refs. We don't track references on the stack
2802
        // across basic blocks, but the bytecode will expect their
2803
        // presence. Add dummy references as necessary.
2804
17.6k
        ref_stack_clear(&refs);
2805
28.8k
        for (int i = 0; i < block->b_startdepth; i++) {
2806
11.1k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2807
11.1k
        }
2808
2809
215k
        for (int i = 0; i < block->b_iused; i++) {
2810
198k
            cfg_instr *instr = &block->b_instr[i];
2811
198k
            int opcode = instr->i_opcode;
2812
198k
            int oparg = instr->i_oparg;
2813
198k
            assert(opcode != EXTENDED_ARG);
2814
198k
            switch (opcode) {
2815
                // Opcodes that load and store locals
2816
7
                case DELETE_FAST: {
2817
7
                    kill_local(instr_flags, &refs, oparg);
2818
7
                    break;
2819
0
                }
2820
2821
24.0k
                case LOAD_FAST: {
2822
24.0k
                    PUSH_REF(i, oparg);
2823
24.0k
                    break;
2824
24.0k
                }
2825
2826
24.0k
                case LOAD_FAST_AND_CLEAR: {
2827
65
                    kill_local(instr_flags, &refs, oparg);
2828
65
                    PUSH_REF(i, oparg);
2829
65
                    break;
2830
65
                }
2831
2832
3.33k
                case LOAD_FAST_LOAD_FAST: {
2833
3.33k
                    PUSH_REF(i, oparg >> 4);
2834
3.33k
                    PUSH_REF(i, oparg & 15);
2835
3.33k
                    break;
2836
3.33k
                }
2837
2838
3.99k
                case STORE_FAST: {
2839
3.99k
                    ref r = ref_stack_pop(&refs);
2840
3.99k
                    store_local(instr_flags, &refs, oparg, r);
2841
3.99k
                    break;
2842
3.33k
                }
2843
2844
39
                case STORE_FAST_LOAD_FAST: {
2845
                    // STORE_FAST
2846
39
                    ref r = ref_stack_pop(&refs);
2847
39
                    store_local(instr_flags, &refs, oparg >> 4, r);
2848
                    // LOAD_FAST
2849
39
                    PUSH_REF(i, oparg & 15);
2850
39
                    break;
2851
39
                }
2852
2853
240
                case STORE_FAST_STORE_FAST: {
2854
                    // STORE_FAST
2855
240
                    ref r = ref_stack_pop(&refs);
2856
240
                    store_local(instr_flags, &refs, oparg >> 4, r);
2857
                    // STORE_FAST
2858
240
                    r = ref_stack_pop(&refs);
2859
240
                    store_local(instr_flags, &refs, oparg & 15, r);
2860
240
                    break;
2861
39
                }
2862
2863
                // Opcodes that shuffle values on the stack
2864
1.40k
                case COPY: {
2865
1.40k
                    assert(oparg > 0);
2866
1.40k
                    Py_ssize_t idx = refs.size - oparg;
2867
1.40k
                    ref r = ref_stack_at(&refs, idx);
2868
1.40k
                    PUSH_REF(r.instr, r.local);
2869
1.40k
                    break;
2870
1.40k
                }
2871
2872
1.40k
                case SWAP: {
2873
518
                    assert(oparg >= 2);
2874
518
                    ref_stack_swap_top(&refs, oparg);
2875
518
                    break;
2876
1.40k
                }
2877
2878
                // We treat opcodes that do not consume all of their inputs on
2879
                // a case by case basis, as we have no generic way of knowing
2880
                // how many inputs should be left on the stack.
2881
2882
                // Opcodes that consume no inputs
2883
2.16k
                case FORMAT_SIMPLE:
2884
2.16k
                case GET_ANEXT:
2885
2.59k
                case GET_ITER:
2886
2.59k
                case GET_LEN:
2887
3.02k
                case IMPORT_FROM:
2888
3.02k
                case MATCH_KEYS:
2889
3.02k
                case MATCH_MAPPING:
2890
3.02k
                case MATCH_SEQUENCE:
2891
3.02k
                case WITH_EXCEPT_START: {
2892
3.02k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2893
3.02k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2894
3.02k
                    int net_pushed = num_pushed - num_popped;
2895
3.02k
                    assert(net_pushed >= 0);
2896
3.88k
                    for (int j = 0; j < net_pushed; j++) {
2897
860
                        PUSH_REF(i, NOT_LOCAL);
2898
860
                    }
2899
3.02k
                    break;
2900
3.02k
                }
2901
2902
                // Opcodes that consume some inputs and push no new values
2903
3.02k
                case DICT_MERGE:
2904
466
                case DICT_UPDATE:
2905
1.50k
                case LIST_APPEND:
2906
1.55k
                case LIST_EXTEND:
2907
7.91k
                case MAP_ADD:
2908
7.91k
                case RERAISE:
2909
8.29k
                case SET_ADD:
2910
8.30k
                case SET_UPDATE: {
2911
8.30k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2912
8.30k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2913
8.30k
                    int net_popped = num_popped - num_pushed;
2914
8.30k
                    assert(net_popped > 0);
2915
22.9k
                    for (int i = 0; i < net_popped; i++) {
2916
14.6k
                        ref_stack_pop(&refs);
2917
14.6k
                    }
2918
8.30k
                    break;
2919
8.29k
                }
2920
2921
145
                case END_SEND: {
2922
145
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
2923
145
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2924
145
                    ref tos = ref_stack_pop(&refs);
2925
145
                    ref_stack_pop(&refs);
2926
145
                    ref_stack_pop(&refs);
2927
145
                    PUSH_REF(tos.instr, tos.local);
2928
145
                    break;
2929
145
                }
2930
2931
1.78k
                case SET_FUNCTION_ATTRIBUTE: {
2932
1.78k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2933
1.78k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2934
1.78k
                    ref tos = ref_stack_pop(&refs);
2935
1.78k
                    ref_stack_pop(&refs);
2936
1.78k
                    PUSH_REF(tos.instr, tos.local);
2937
1.78k
                    break;
2938
1.78k
                }
2939
2940
                // Opcodes that consume some inputs and push new values
2941
1.78k
                case CHECK_EXC_MATCH: {
2942
0
                    ref_stack_pop(&refs);
2943
0
                    PUSH_REF(i, NOT_LOCAL);
2944
0
                    break;
2945
0
                }
2946
2947
434
                case FOR_ITER: {
2948
434
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2949
434
                    PUSH_REF(i, NOT_LOCAL);
2950
434
                    break;
2951
434
                }
2952
2953
16.6k
                case LOAD_ATTR:
2954
16.9k
                case LOAD_SUPER_ATTR: {
2955
16.9k
                    ref self = ref_stack_pop(&refs);
2956
16.9k
                    if (opcode == LOAD_SUPER_ATTR) {
2957
280
                        ref_stack_pop(&refs);
2958
280
                        ref_stack_pop(&refs);
2959
280
                    }
2960
16.9k
                    PUSH_REF(i, NOT_LOCAL);
2961
16.9k
                    if (oparg & 1) {
2962
                        // A method call; conservatively assume that self is pushed
2963
                        // back onto the stack
2964
4.86k
                        PUSH_REF(self.instr, self.local);
2965
4.86k
                    }
2966
16.9k
                    break;
2967
16.9k
                }
2968
2969
16.9k
                case LOAD_SPECIAL:
2970
244
                case PUSH_EXC_INFO: {
2971
244
                    ref tos = ref_stack_pop(&refs);
2972
244
                    PUSH_REF(i, NOT_LOCAL);
2973
244
                    PUSH_REF(tos.instr, tos.local);
2974
244
                    break;
2975
244
                }
2976
2977
244
                case SEND: {
2978
145
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2979
145
                    ref_stack_pop(&refs);
2980
145
                    PUSH_REF(i, NOT_LOCAL);
2981
145
                    break;
2982
145
                }
2983
2984
                // Opcodes that consume all of their inputs
2985
133k
                default: {
2986
133k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2987
133k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2988
133k
                    if (HAS_TARGET(instr->i_opcode)) {
2989
6.74k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2990
6.74k
                    }
2991
133k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2992
                        // Block push opcodes only affect the stack when jumping
2993
                        // to the target.
2994
241k
                        for (int j = 0; j < num_popped; j++) {
2995
108k
                            ref_stack_pop(&refs);
2996
108k
                        }
2997
231k
                        for (int j = 0; j < num_pushed; j++) {
2998
97.6k
                            PUSH_REF(i, NOT_LOCAL);
2999
97.6k
                        }
3000
133k
                    }
3001
133k
                    break;
3002
133k
                }
3003
198k
            }
3004
198k
        }
3005
3006
        // Push fallthrough block
3007
17.6k
        if (BB_HAS_FALLTHROUGH(block)) {
3008
8.61k
            assert(block->b_next != NULL);
3009
8.61k
            load_fast_push_block(&sp, block->b_next, refs.size);
3010
8.61k
        }
3011
3012
        // Mark instructions that produce values that are on the stack at the
3013
        // end of the basic block
3014
35.1k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3015
17.4k
            ref r = ref_stack_at(&refs, i);
3016
17.4k
            if (r.instr != -1) {
3017
10.9k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3018
10.9k
            }
3019
17.4k
        }
3020
3021
        // Optimize instructions
3022
215k
        for (int i = 0; i < block->b_iused; i++) {
3023
198k
            if (!instr_flags[i]) {
3024
183k
                cfg_instr *instr = &block->b_instr[i];
3025
183k
                switch (instr->i_opcode) {
3026
23.5k
                    case LOAD_FAST:
3027
23.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3028
23.5k
                        break;
3029
3.32k
                    case LOAD_FAST_LOAD_FAST:
3030
3.32k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3031
3.32k
                        break;
3032
156k
                    default:
3033
156k
                        break;
3034
183k
                }
3035
183k
            }
3036
198k
        }
3037
17.6k
    }
3038
3039
5.64k
    #undef PUSH_REF
3040
3041
5.64k
    status = SUCCESS;
3042
3043
5.64k
done:
3044
5.64k
    ref_stack_fini(&refs);
3045
5.64k
    PyMem_Free(instr_flags);
3046
5.64k
    PyMem_Free(blocks);
3047
5.64k
    return status;
3048
5.64k
}
3049
3050
// helper functions for add_checks_for_loads_of_unknown_variables
3051
static inline void
3052
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3053
80.6k
{
3054
    // Push b if the unsafe mask is giving us any new information.
3055
    // To avoid overflowing the stack, only allow each block once.
3056
    // Use b->b_visited=1 to mean that b is currently on the stack.
3057
80.6k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3058
80.6k
    if (b->b_unsafe_locals_mask != both) {
3059
9.24k
        b->b_unsafe_locals_mask = both;
3060
        // More work left to do.
3061
9.24k
        if (!b->b_visited) {
3062
            // not on the stack, so push it.
3063
9.17k
            *(*sp)++ = b;
3064
9.17k
            b->b_visited = 1;
3065
9.17k
        }
3066
9.24k
    }
3067
80.6k
}
3068
3069
static void
3070
scan_block_for_locals(basicblock *b, basicblock ***sp)
3071
31.2k
{
3072
    // bit i is set if local i is potentially uninitialized
3073
31.2k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3074
245k
    for (int i = 0; i < b->b_iused; i++) {
3075
214k
        cfg_instr *instr = &b->b_instr[i];
3076
214k
        assert(instr->i_opcode != EXTENDED_ARG);
3077
214k
        if (instr->i_except != NULL) {
3078
49.0k
            maybe_push(instr->i_except, unsafe_mask, sp);
3079
49.0k
        }
3080
214k
        if (instr->i_oparg >= 64) {
3081
10.8k
            continue;
3082
10.8k
        }
3083
214k
        assert(instr->i_oparg >= 0);
3084
203k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3085
203k
        switch (instr->i_opcode) {
3086
399
            case DELETE_FAST:
3087
529
            case LOAD_FAST_AND_CLEAR:
3088
789
            case STORE_FAST_MAYBE_NULL:
3089
789
                unsafe_mask |= bit;
3090
789
                break;
3091
9.51k
            case STORE_FAST:
3092
9.51k
                unsafe_mask &= ~bit;
3093
9.51k
                break;
3094
43
            case LOAD_FAST_CHECK:
3095
                // If this doesn't raise, then the local is defined.
3096
43
                unsafe_mask &= ~bit;
3097
43
                break;
3098
42.9k
            case LOAD_FAST:
3099
42.9k
                if (unsafe_mask & bit) {
3100
43
                    instr->i_opcode = LOAD_FAST_CHECK;
3101
43
                }
3102
42.9k
                unsafe_mask &= ~bit;
3103
42.9k
                break;
3104
203k
        }
3105
203k
    }
3106
31.2k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3107
14.9k
        maybe_push(b->b_next, unsafe_mask, sp);
3108
14.9k
    }
3109
31.2k
    cfg_instr *last = basicblock_last_instr(b);
3110
31.2k
    if (last && is_jump(last)) {
3111
12.8k
        assert(last->i_target != NULL);
3112
12.8k
        maybe_push(last->i_target, unsafe_mask, sp);
3113
12.8k
    }
3114
31.2k
}
3115
3116
static int
3117
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3118
0
{
3119
0
    assert(nlocals > 64);
3120
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3121
0
    if (states == NULL) {
3122
0
        PyErr_NoMemory();
3123
0
        return ERROR;
3124
0
    }
3125
0
    Py_ssize_t blocknum = 0;
3126
    // state[i - 64] == blocknum if local i is guaranteed to
3127
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3128
    // STORE_FAST within that basicblock (not followed by
3129
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3130
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3131
0
        blocknum++;
3132
0
        for (int i = 0; i < b->b_iused; i++) {
3133
0
            cfg_instr *instr = &b->b_instr[i];
3134
0
            assert(instr->i_opcode != EXTENDED_ARG);
3135
0
            int arg = instr->i_oparg;
3136
0
            if (arg < 64) {
3137
0
                continue;
3138
0
            }
3139
0
            assert(arg >= 0);
3140
0
            switch (instr->i_opcode) {
3141
0
                case DELETE_FAST:
3142
0
                case LOAD_FAST_AND_CLEAR:
3143
0
                case STORE_FAST_MAYBE_NULL:
3144
0
                    states[arg - 64] = blocknum - 1;
3145
0
                    break;
3146
0
                case STORE_FAST:
3147
0
                    states[arg - 64] = blocknum;
3148
0
                    break;
3149
0
                case LOAD_FAST:
3150
0
                    if (states[arg - 64] != blocknum) {
3151
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3152
0
                    }
3153
0
                    states[arg - 64] = blocknum;
3154
0
                    break;
3155
0
                    Py_UNREACHABLE();
3156
0
            }
3157
0
        }
3158
0
    }
3159
0
    PyMem_Free(states);
3160
0
    return SUCCESS;
3161
0
}
3162
3163
static int
3164
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3165
5.64k
{
3166
5.64k
    assert(PyList_CheckExact(consts));
3167
5.64k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3168
5.64k
    if (nconsts == 0) {
3169
32
        return SUCCESS;  /* nothing to do */
3170
32
    }
3171
3172
5.61k
    Py_ssize_t *index_map = NULL;
3173
5.61k
    Py_ssize_t *reverse_index_map = NULL;
3174
5.61k
    int err = ERROR;
3175
3176
5.61k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3177
5.61k
    if (index_map == NULL) {
3178
0
        goto end;
3179
0
    }
3180
33.1k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3181
27.5k
        index_map[i] = -1;
3182
27.5k
    }
3183
    // The first constant may be docstring; keep it always.
3184
5.61k
    index_map[0] = 0;
3185
3186
    /* mark used consts */
3187
30.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3188
230k
        for (int i = 0; i < b->b_iused; i++) {
3189
205k
            int opcode = b->b_instr[i].i_opcode;
3190
205k
            if (OPCODE_HAS_CONST(opcode)) {
3191
26.7k
                int index = b->b_instr[i].i_oparg;
3192
26.7k
                index_map[index] = index;
3193
26.7k
            }
3194
205k
        }
3195
24.8k
    }
3196
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3197
    /* condense consts */
3198
5.61k
    Py_ssize_t n_used_consts = 0;
3199
38.7k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3200
33.1k
        if (index_map[i] != -1) {
3201
23.6k
            assert(index_map[i] == i);
3202
23.6k
            index_map[n_used_consts++] = index_map[i];
3203
23.6k
        }
3204
33.1k
    }
3205
5.61k
    if (n_used_consts == nconsts) {
3206
        /* nothing to do */
3207
2.77k
        err = SUCCESS;
3208
2.77k
        goto end;
3209
2.77k
    }
3210
3211
    /* move all used consts to the beginning of the consts list */
3212
5.61k
    assert(n_used_consts < nconsts);
3213
18.6k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3214
15.7k
        Py_ssize_t old_index = index_map[i];
3215
15.7k
        assert(i <= old_index && old_index < nconsts);
3216
15.7k
        if (i != old_index) {
3217
8.95k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3218
8.95k
            assert(value != NULL);
3219
8.95k
            PyList_SetItem(consts, i, Py_NewRef(value));
3220
8.95k
        }
3221
15.7k
    }
3222
3223
    /* truncate the consts list at its new size */
3224
2.83k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3225
0
        goto end;
3226
0
    }
3227
    /* adjust const indices in the bytecode */
3228
2.83k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3229
2.83k
    if (reverse_index_map == NULL) {
3230
0
        goto end;
3231
0
    }
3232
28.1k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3233
25.2k
        reverse_index_map[i] = -1;
3234
25.2k
    }
3235
18.6k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3236
15.7k
        assert(index_map[i] != -1);
3237
15.7k
        assert(reverse_index_map[index_map[i]] == -1);
3238
15.7k
        reverse_index_map[index_map[i]] = i;
3239
15.7k
    }
3240
3241
16.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3242
147k
        for (int i = 0; i < b->b_iused; i++) {
3243
132k
            int opcode = b->b_instr[i].i_opcode;
3244
132k
            if (OPCODE_HAS_CONST(opcode)) {
3245
18.4k
                int index = b->b_instr[i].i_oparg;
3246
18.4k
                assert(reverse_index_map[index] >= 0);
3247
18.4k
                assert(reverse_index_map[index] < n_used_consts);
3248
18.4k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3249
18.4k
            }
3250
132k
        }
3251
14.1k
    }
3252
3253
2.83k
    err = SUCCESS;
3254
5.61k
end:
3255
5.61k
    PyMem_Free(index_map);
3256
5.61k
    PyMem_Free(reverse_index_map);
3257
5.61k
    return err;
3258
2.83k
}
3259
3260
3261
3262
static int
3263
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3264
                                                int nlocals,
3265
                                                int nparams)
3266
5.64k
{
3267
5.64k
    if (nlocals == 0) {
3268
1.78k
        return SUCCESS;
3269
1.78k
    }
3270
3.85k
    if (nlocals > 64) {
3271
        // To avoid O(nlocals**2) compilation, locals beyond the first
3272
        // 64 are only analyzed one basicblock at a time: initialization
3273
        // info is not passed between basicblocks.
3274
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3275
0
            return ERROR;
3276
0
        }
3277
0
        nlocals = 64;
3278
0
    }
3279
3.85k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3280
3.85k
    if (stack == NULL) {
3281
0
        return ERROR;
3282
0
    }
3283
3.85k
    basicblock **sp = stack;
3284
3285
    // First origin of being uninitialized:
3286
    // The non-parameter locals in the entry block.
3287
3.85k
    uint64_t start_mask = 0;
3288
7.29k
    for (int i = nparams; i < nlocals; i++) {
3289
3.43k
        start_mask |= (uint64_t)1 << i;
3290
3.43k
    }
3291
3.85k
    maybe_push(entryblock, start_mask, &sp);
3292
3293
    // Second origin of being uninitialized:
3294
    // There could be DELETE_FAST somewhere, so
3295
    // be sure to scan each basicblock at least once.
3296
25.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3297
22.1k
        scan_block_for_locals(b, &sp);
3298
22.1k
    }
3299
    // Now propagate the uncertainty from the origins we found: Use
3300
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3301
13.0k
    while (sp > stack) {
3302
9.17k
        basicblock *b = *--sp;
3303
        // mark as no longer on stack
3304
9.17k
        b->b_visited = 0;
3305
9.17k
        scan_block_for_locals(b, &sp);
3306
9.17k
    }
3307
3.85k
    PyMem_Free(stack);
3308
3.85k
    return SUCCESS;
3309
3.85k
}
3310
3311
3312
static int
3313
4.34k
mark_warm(basicblock *entryblock) {
3314
4.34k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3315
4.34k
    if (stack == NULL) {
3316
0
        return ERROR;
3317
0
    }
3318
4.34k
    basicblock **sp = stack;
3319
3320
4.34k
    *sp++ = entryblock;
3321
4.34k
    entryblock->b_visited = 1;
3322
20.4k
    while (sp > stack) {
3323
16.0k
        basicblock *b = *(--sp);
3324
16.0k
        assert(!b->b_except_handler);
3325
16.0k
        b->b_warm = 1;
3326
16.0k
        basicblock *next = b->b_next;
3327
16.0k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3328
7.03k
            *sp++ = next;
3329
7.03k
            next->b_visited = 1;
3330
7.03k
        }
3331
157k
        for (int i=0; i < b->b_iused; i++) {
3332
140k
            cfg_instr *instr = &b->b_instr[i];
3333
140k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3334
4.69k
                *sp++ = instr->i_target;
3335
4.69k
                instr->i_target->b_visited = 1;
3336
4.69k
            }
3337
140k
        }
3338
16.0k
    }
3339
4.34k
    PyMem_Free(stack);
3340
4.34k
    return SUCCESS;
3341
4.34k
}
3342
3343
static int
3344
4.34k
mark_cold(basicblock *entryblock) {
3345
27.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3346
23.6k
        assert(!b->b_cold && !b->b_warm);
3347
23.6k
    }
3348
4.34k
    if (mark_warm(entryblock) < 0) {
3349
0
        return ERROR;
3350
0
    }
3351
3352
4.34k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3353
4.34k
    if (stack == NULL) {
3354
0
        return ERROR;
3355
0
    }
3356
3357
4.34k
    basicblock **sp = stack;
3358
27.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3359
23.6k
        if (b->b_except_handler) {
3360
1.64k
            assert(!b->b_warm);
3361
1.64k
            *sp++ = b;
3362
1.64k
            b->b_visited = 1;
3363
1.64k
        }
3364
23.6k
    }
3365
3366
7.38k
    while (sp > stack) {
3367
3.04k
        basicblock *b = *(--sp);
3368
3.04k
        b->b_cold = 1;
3369
3.04k
        basicblock *next = b->b_next;
3370
3.04k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3371
919
            if (!next->b_warm && !next->b_visited) {
3372
741
                *sp++ = next;
3373
741
                next->b_visited = 1;
3374
741
            }
3375
919
        }
3376
15.9k
        for (int i = 0; i < b->b_iused; i++) {
3377
12.8k
            cfg_instr *instr = &b->b_instr[i];
3378
12.8k
            if (is_jump(instr)) {
3379
899
                assert(i == b->b_iused - 1);
3380
899
                basicblock *target = b->b_instr[i].i_target;
3381
899
                if (!target->b_warm && !target->b_visited) {
3382
662
                    *sp++ = target;
3383
662
                    target->b_visited = 1;
3384
662
                }
3385
899
            }
3386
12.8k
        }
3387
3.04k
    }
3388
4.34k
    PyMem_Free(stack);
3389
4.34k
    return SUCCESS;
3390
4.34k
}
3391
3392
3393
static int
3394
5.64k
push_cold_blocks_to_end(cfg_builder *g) {
3395
5.64k
    basicblock *entryblock = g->g_entryblock;
3396
5.64k
    if (entryblock->b_next == NULL) {
3397
        /* single basicblock, no need to reorder */
3398
1.29k
        return SUCCESS;
3399
1.29k
    }
3400
4.34k
    RETURN_IF_ERROR(mark_cold(entryblock));
3401
3402
4.34k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3403
3404
    /* If we have a cold block with fallthrough to a warm block, add */
3405
    /* an explicit jump instead of fallthrough */
3406
28.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3407
23.7k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3408
145
            basicblock *explicit_jump = cfg_builder_new_block(g);
3409
145
            if (explicit_jump == NULL) {
3410
0
                return ERROR;
3411
0
            }
3412
145
            if (!IS_LABEL(b->b_next->b_label)) {
3413
0
                b->b_next->b_label.id = next_lbl++;
3414
0
            }
3415
145
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3416
145
                             NO_LOCATION);
3417
145
            explicit_jump->b_cold = 1;
3418
145
            explicit_jump->b_next = b->b_next;
3419
145
            explicit_jump->b_predecessors = 1;
3420
145
            b->b_next = explicit_jump;
3421
3422
            /* set target */
3423
145
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3424
145
            last->i_target = explicit_jump->b_next;
3425
145
        }
3426
23.7k
    }
3427
3428
4.34k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3429
4.34k
    basicblock *cold_blocks = NULL;
3430
4.34k
    basicblock *cold_blocks_tail = NULL;
3431
3432
4.34k
    basicblock *b = entryblock;
3433
5.57k
    while(b->b_next) {
3434
5.57k
        assert(!b->b_cold);
3435
21.8k
        while (b->b_next && !b->b_next->b_cold) {
3436
16.2k
            b = b->b_next;
3437
16.2k
        }
3438
5.57k
        if (b->b_next == NULL) {
3439
            /* no more cold blocks */
3440
4.34k
            break;
3441
4.34k
        }
3442
3443
        /* b->b_next is the beginning of a cold streak */
3444
5.57k
        assert(!b->b_cold && b->b_next->b_cold);
3445
3446
1.22k
        basicblock *b_end = b->b_next;
3447
3.18k
        while (b_end->b_next && b_end->b_next->b_cold) {
3448
1.96k
            b_end = b_end->b_next;
3449
1.96k
        }
3450
3451
        /* b_end is the end of the cold streak */
3452
1.22k
        assert(b_end && b_end->b_cold);
3453
1.22k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3454
3455
1.22k
        if (cold_blocks == NULL) {
3456
580
            cold_blocks = b->b_next;
3457
580
        }
3458
644
        else {
3459
644
            cold_blocks_tail->b_next = b->b_next;
3460
644
        }
3461
1.22k
        cold_blocks_tail = b_end;
3462
1.22k
        b->b_next = b_end->b_next;
3463
1.22k
        b_end->b_next = NULL;
3464
1.22k
    }
3465
4.34k
    assert(b != NULL && b->b_next == NULL);
3466
4.34k
    b->b_next = cold_blocks;
3467
3468
4.34k
    if (cold_blocks != NULL) {
3469
580
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3470
580
    }
3471
4.34k
    return SUCCESS;
3472
4.34k
}
3473
3474
static int
3475
convert_pseudo_conditional_jumps(cfg_builder *g)
3476
5.64k
{
3477
5.64k
    basicblock *entryblock = g->g_entryblock;
3478
30.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3479
227k
        for (int i = 0; i < b->b_iused; i++) {
3480
202k
            cfg_instr *instr = &b->b_instr[i];
3481
202k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3482
1.09k
                assert(i == b->b_iused - 1);
3483
1.09k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3484
1.03k
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3485
1.09k
                location loc = instr->i_loc;
3486
1.09k
                basicblock *except = instr->i_except;
3487
1.09k
                cfg_instr copy = {
3488
1.09k
                            .i_opcode = COPY,
3489
1.09k
                            .i_oparg = 1,
3490
1.09k
                            .i_loc = loc,
3491
1.09k
                            .i_target = NULL,
3492
1.09k
                            .i_except = except,
3493
1.09k
                };
3494
1.09k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3495
1.09k
                cfg_instr to_bool = {
3496
1.09k
                            .i_opcode = TO_BOOL,
3497
1.09k
                            .i_oparg = 0,
3498
1.09k
                            .i_loc = loc,
3499
1.09k
                            .i_target = NULL,
3500
1.09k
                            .i_except = except,
3501
1.09k
                };
3502
1.09k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3503
1.09k
            }
3504
202k
        }
3505
25.0k
    }
3506
5.64k
    return SUCCESS;
3507
5.64k
}
3508
3509
static int
3510
convert_pseudo_ops(cfg_builder *g)
3511
5.64k
{
3512
5.64k
    basicblock *entryblock = g->g_entryblock;
3513
30.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3514
231k
        for (int i = 0; i < b->b_iused; i++) {
3515
206k
            cfg_instr *instr = &b->b_instr[i];
3516
206k
            if (is_block_push(instr)) {
3517
1.64k
                INSTR_SET_OP0(instr, NOP);
3518
1.64k
            }
3519
204k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3520
1.60k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3521
1.60k
                instr->i_opcode = LOAD_FAST;
3522
1.60k
            }
3523
203k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3524
130
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3525
130
                instr->i_opcode = STORE_FAST;
3526
130
            }
3527
206k
        }
3528
25.0k
    }
3529
5.64k
    return remove_redundant_nops_and_jumps(g);
3530
5.64k
}
3531
3532
static inline bool
3533
35.1k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3534
35.1k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3535
21.0k
        return basicblock_has_no_lineno(b);
3536
21.0k
    }
3537
14.1k
    else {
3538
14.1k
        return false;
3539
14.1k
    }
3540
35.1k
}
3541
3542
3543
/* PEP 626 mandates that the f_lineno of a frame is correct
3544
 * after a frame terminates. It would be prohibitively expensive
3545
 * to continuously update the f_lineno field at runtime,
3546
 * so we make sure that all exiting instruction (raises and returns)
3547
 * have a valid line number, allowing us to compute f_lineno lazily.
3548
 * We can do this by duplicating the exit blocks without line number
3549
 * so that none have more than one predecessor. We can then safely
3550
 * copy the line number from the sole predecessor block.
3551
 */
3552
static int
3553
duplicate_exits_without_lineno(cfg_builder *g)
3554
11.2k
{
3555
11.2k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3556
3557
    /* Copy all exit blocks without line number that are targets of a jump.
3558
     */
3559
11.2k
    basicblock *entryblock = g->g_entryblock;
3560
61.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3561
49.9k
        cfg_instr *last = basicblock_last_instr(b);
3562
49.9k
        if (last == NULL) {
3563
8.85k
            continue;
3564
8.85k
        }
3565
41.1k
        if (is_jump(last)) {
3566
16.6k
            basicblock *target = next_nonempty_block(last->i_target);
3567
16.6k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3568
376
                basicblock *new_target = copy_basicblock(g, target);
3569
376
                if (new_target == NULL) {
3570
0
                    return ERROR;
3571
0
                }
3572
376
                new_target->b_instr[0].i_loc = last->i_loc;
3573
376
                last->i_target = new_target;
3574
376
                target->b_predecessors--;
3575
376
                new_target->b_predecessors = 1;
3576
376
                new_target->b_next = target->b_next;
3577
376
                new_target->b_label.id = next_lbl++;
3578
376
                target->b_next = new_target;
3579
376
            }
3580
16.6k
        }
3581
41.1k
    }
3582
3583
    /* Any remaining reachable exit blocks without line number can only be reached by
3584
     * fall through, and thus can only have a single predecessor */
3585
61.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3586
49.9k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3587
18.5k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3588
515
                cfg_instr *last = basicblock_last_instr(b);
3589
515
                assert(last != NULL);
3590
515
                b->b_next->b_instr[0].i_loc = last->i_loc;
3591
515
            }
3592
18.5k
        }
3593
49.9k
    }
3594
11.2k
    return SUCCESS;
3595
11.2k
}
3596
3597
3598
/* If an instruction has no line number, but it's predecessor in the BB does,
3599
 * then copy the line number. If a successor block has no line number, and only
3600
 * one predecessor, then inherit the line number.
3601
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3602
 * Also reduces the size of the line number table,
3603
 * but has no impact on the generated line number events.
3604
 */
3605
3606
static inline void
3607
maybe_propagate_location(basicblock *b, int i, location loc)
3608
442k
{
3609
442k
    assert(b->b_iused > i);
3610
442k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3611
27.1k
         b->b_instr[i].i_loc = loc;
3612
27.1k
    }
3613
442k
}
3614
3615
static void
3616
propagate_line_numbers(basicblock *entryblock)
3617
11.2k
{
3618
61.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3619
49.9k
        cfg_instr *last = basicblock_last_instr(b);
3620
49.9k
        if (last == NULL) {
3621
8.85k
            continue;
3622
8.85k
        }
3623
3624
41.1k
        location prev_location = NO_LOCATION;
3625
463k
        for (int i = 0; i < b->b_iused; i++) {
3626
422k
            maybe_propagate_location(b, i, prev_location);
3627
422k
            prev_location = b->b_instr[i].i_loc;
3628
422k
        }
3629
41.1k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3630
13.6k
            if (b->b_next->b_iused > 0) {
3631
13.6k
                maybe_propagate_location(b->b_next, 0, prev_location);
3632
13.6k
            }
3633
13.6k
        }
3634
41.1k
        if (is_jump(last)) {
3635
16.6k
            basicblock *target = last->i_target;
3636
16.6k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3637
1
                target = target->b_next;
3638
1
            }
3639
16.6k
            if (target->b_predecessors == 1) {
3640
7.25k
                maybe_propagate_location(target, 0, prev_location);
3641
7.25k
            }
3642
16.6k
        }
3643
41.1k
    }
3644
11.2k
}
3645
3646
static int
3647
resolve_line_numbers(cfg_builder *g, int firstlineno)
3648
11.2k
{
3649
11.2k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3650
11.2k
    propagate_line_numbers(g->g_entryblock);
3651
11.2k
    return SUCCESS;
3652
11.2k
}
3653
3654
int
3655
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3656
                        int nlocals, int nparams, int firstlineno)
3657
5.64k
{
3658
5.64k
    assert(cfg_builder_check(g));
3659
    /** Preprocessing **/
3660
    /* Map labels to targets and mark exception handlers */
3661
5.64k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3662
5.64k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3663
5.64k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3664
3665
    /** Optimization **/
3666
5.64k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3667
5.64k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3668
5.64k
    RETURN_IF_ERROR(
3669
5.64k
        add_checks_for_loads_of_uninitialized_variables(
3670
5.64k
            g->g_entryblock, nlocals, nparams));
3671
5.64k
    RETURN_IF_ERROR(insert_superinstructions(g));
3672
3673
5.64k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3674
5.64k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3675
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3676
    // assert(all_exits_have_lineno(g->g_entryblock));
3677
5.64k
    return SUCCESS;
3678
5.64k
}
3679
3680
static int *
3681
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3682
5.64k
{
3683
5.64k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3684
5.64k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3685
5.64k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3686
3687
5.64k
    int noffsets = ncellvars + nfreevars;
3688
5.64k
    int *fixed = PyMem_New(int, noffsets);
3689
5.64k
    if (fixed == NULL) {
3690
0
        PyErr_NoMemory();
3691
0
        return NULL;
3692
0
    }
3693
8.03k
    for (int i = 0; i < noffsets; i++) {
3694
2.38k
        fixed[i] = nlocals + i;
3695
2.38k
    }
3696
3697
5.64k
    PyObject *varname, *cellindex;
3698
5.64k
    Py_ssize_t pos = 0;
3699
6.87k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3700
1.22k
        PyObject *varindex;
3701
1.22k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3702
0
            goto error;
3703
0
        }
3704
1.22k
        if (varindex == NULL) {
3705
529
            continue;
3706
529
        }
3707
3708
696
        int argoffset = PyLong_AsInt(varindex);
3709
696
        Py_DECREF(varindex);
3710
696
        if (argoffset == -1 && PyErr_Occurred()) {
3711
0
            goto error;
3712
0
        }
3713
3714
696
        int oldindex = PyLong_AsInt(cellindex);
3715
696
        if (oldindex == -1 && PyErr_Occurred()) {
3716
0
            goto error;
3717
0
        }
3718
696
        fixed[oldindex] = argoffset;
3719
696
    }
3720
5.64k
    return fixed;
3721
3722
0
error:
3723
0
    PyMem_Free(fixed);
3724
0
    return NULL;
3725
5.64k
}
3726
3727
#define IS_GENERATOR(CF) \
3728
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3729
3730
static int
3731
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3732
                           int *fixed, int nfreevars)
3733
5.64k
{
3734
5.64k
    assert(umd->u_firstlineno > 0);
3735
3736
    /* Set up cells for any variable that escapes, to be put in a closure. */
3737
5.64k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3738
5.64k
    if (ncellvars) {
3739
        // umd->u_cellvars has the cells out of order so we sort them
3740
        // before adding the MAKE_CELL instructions.  Note that we
3741
        // adjust for arg cells, which come first.
3742
789
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3743
789
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3744
789
        if (sorted == NULL) {
3745
0
            PyErr_NoMemory();
3746
0
            return ERROR;
3747
0
        }
3748
2.01k
        for (int i = 0; i < ncellvars; i++) {
3749
1.22k
            sorted[fixed[i]] = i + 1;
3750
1.22k
        }
3751
3.17k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3752
2.39k
            int oldindex = sorted[i] - 1;
3753
2.39k
            if (oldindex == -1) {
3754
1.16k
                continue;
3755
1.16k
            }
3756
1.22k
            cfg_instr make_cell = {
3757
1.22k
                .i_opcode = MAKE_CELL,
3758
                // This will get fixed in offset_derefs().
3759
1.22k
                .i_oparg = oldindex,
3760
1.22k
                .i_loc = NO_LOCATION,
3761
1.22k
                .i_target = NULL,
3762
1.22k
                .i_except = NULL,
3763
1.22k
            };
3764
1.22k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3765
0
                PyMem_RawFree(sorted);
3766
0
                return ERROR;
3767
0
            }
3768
1.22k
            ncellsused += 1;
3769
1.22k
        }
3770
789
        PyMem_RawFree(sorted);
3771
789
    }
3772
3773
5.64k
    if (nfreevars) {
3774
754
        cfg_instr copy_frees = {
3775
754
            .i_opcode = COPY_FREE_VARS,
3776
754
            .i_oparg = nfreevars,
3777
754
            .i_loc = NO_LOCATION,
3778
754
            .i_target = NULL,
3779
754
            .i_except = NULL,
3780
754
        };
3781
754
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3782
754
    }
3783
3784
5.64k
    return SUCCESS;
3785
5.64k
}
3786
3787
static int
3788
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3789
5.64k
{
3790
5.64k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3791
5.64k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3792
5.64k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3793
5.64k
    int noffsets = ncellvars + nfreevars;
3794
3795
    // First deal with duplicates (arg cells).
3796
5.64k
    int numdropped = 0;
3797
8.03k
    for (int i = 0; i < noffsets ; i++) {
3798
2.38k
        if (fixedmap[i] == i + nlocals) {
3799
1.68k
            fixedmap[i] -= numdropped;
3800
1.68k
        }
3801
696
        else {
3802
            // It was a duplicate (cell/arg).
3803
696
            numdropped += 1;
3804
696
        }
3805
2.38k
    }
3806
3807
    // Then update offsets, either relative to locals or by cell2arg.
3808
30.7k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3809
231k
        for (int i = 0; i < b->b_iused; i++) {
3810
206k
            cfg_instr *inst = &b->b_instr[i];
3811
            // This is called before extended args are generated.
3812
206k
            assert(inst->i_opcode != EXTENDED_ARG);
3813
206k
            int oldoffset = inst->i_oparg;
3814
206k
            switch(inst->i_opcode) {
3815
1.22k
                case MAKE_CELL:
3816
2.82k
                case LOAD_CLOSURE:
3817
5.77k
                case LOAD_DEREF:
3818
6.30k
                case STORE_DEREF:
3819
6.31k
                case DELETE_DEREF:
3820
6.31k
                case LOAD_FROM_DICT_OR_DEREF:
3821
6.31k
                    assert(oldoffset >= 0);
3822
6.31k
                    assert(oldoffset < noffsets);
3823
6.31k
                    assert(fixedmap[oldoffset] >= 0);
3824
6.31k
                    inst->i_oparg = fixedmap[oldoffset];
3825
206k
            }
3826
206k
        }
3827
25.0k
    }
3828
3829
5.64k
    return numdropped;
3830
5.64k
}
3831
3832
static int
3833
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3834
5.64k
{
3835
5.64k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3836
5.64k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3837
5.64k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3838
5.64k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3839
5.64k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3840
5.64k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3841
5.64k
    assert(INT_MAX - nlocals - ncellvars > 0);
3842
5.64k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3843
5.64k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3844
5.64k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3845
5.64k
    if (cellfixedoffsets == NULL) {
3846
0
        return ERROR;
3847
0
    }
3848
3849
    // This must be called before fix_cell_offsets().
3850
5.64k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3851
0
        PyMem_Free(cellfixedoffsets);
3852
0
        return ERROR;
3853
0
    }
3854
3855
5.64k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3856
5.64k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3857
5.64k
    cellfixedoffsets = NULL;
3858
5.64k
    if (numdropped < 0) {
3859
0
        return ERROR;
3860
0
    }
3861
3862
5.64k
    nlocalsplus -= numdropped;
3863
5.64k
    return nlocalsplus;
3864
5.64k
}
3865
3866
cfg_builder *
3867
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3868
5.64k
{
3869
5.64k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3870
0
        return NULL;
3871
0
    }
3872
5.64k
    cfg_builder *g = _PyCfgBuilder_New();
3873
5.64k
    if (g == NULL) {
3874
0
        return NULL;
3875
0
    }
3876
233k
    for (int i = 0; i < seq->s_used; i++) {
3877
227k
        seq->s_instrs[i].i_target = 0;
3878
227k
    }
3879
233k
    for (int i = 0; i < seq->s_used; i++) {
3880
227k
        _PyInstruction *instr = &seq->s_instrs[i];
3881
227k
        if (HAS_TARGET(instr->i_opcode)) {
3882
11.0k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3883
11.0k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3884
11.0k
        }
3885
227k
    }
3886
5.64k
    int offset = 0;
3887
233k
    for (int i = 0; i < seq->s_used; i++) {
3888
227k
        _PyInstruction *instr = &seq->s_instrs[i];
3889
227k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3890
856
            if (seq->s_annotations_code != NULL) {
3891
0
                assert(seq->s_annotations_code->s_labelmap_size == 0
3892
0
                    && seq->s_annotations_code->s_nested == NULL);
3893
0
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3894
0
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3895
0
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3896
0
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3897
0
                        goto error;
3898
0
                    }
3899
0
                }
3900
0
                offset += seq->s_annotations_code->s_used - 1;
3901
0
            }
3902
856
            else {
3903
856
                offset -= 1;
3904
856
            }
3905
856
            continue;
3906
856
        }
3907
226k
        if (instr->i_target) {
3908
8.70k
            jump_target_label lbl_ = {i + offset};
3909
8.70k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3910
0
                goto error;
3911
0
            }
3912
8.70k
        }
3913
226k
        int opcode = instr->i_opcode;
3914
226k
        int oparg = instr->i_oparg;
3915
226k
        if (HAS_TARGET(opcode)) {
3916
11.0k
            oparg += offset;
3917
11.0k
        }
3918
226k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3919
0
            goto error;
3920
0
        }
3921
226k
    }
3922
5.64k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3923
0
        goto error;
3924
0
    }
3925
5.64k
    return g;
3926
0
error:
3927
0
    _PyCfgBuilder_Free(g);
3928
0
    return NULL;
3929
5.64k
}
3930
3931
int
3932
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3933
5.64k
{
3934
5.64k
    int lbl = 0;
3935
30.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3936
25.2k
        b->b_label = (jump_target_label){lbl};
3937
25.2k
        lbl += 1;
3938
25.2k
    }
3939
30.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3940
25.2k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3941
236k
        for (int i = 0; i < b->b_iused; i++) {
3942
211k
            cfg_instr *instr = &b->b_instr[i];
3943
211k
            if (HAS_TARGET(instr->i_opcode)) {
3944
                /* Set oparg to the label id (it will later be mapped to an offset) */
3945
8.37k
                instr->i_oparg = instr->i_target->b_label.id;
3946
8.37k
            }
3947
211k
            RETURN_IF_ERROR(
3948
211k
                _PyInstructionSequence_Addop(
3949
211k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3950
3951
211k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3952
211k
            if (instr->i_except != NULL) {
3953
27.2k
                hi->h_label = instr->i_except->b_label.id;
3954
27.2k
                hi->h_startdepth = instr->i_except->b_startdepth;
3955
27.2k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3956
27.2k
            }
3957
184k
            else {
3958
184k
                hi->h_label = -1;
3959
184k
            }
3960
211k
        }
3961
25.2k
    }
3962
5.64k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3963
0
        return ERROR;
3964
0
    }
3965
5.64k
    return SUCCESS;
3966
5.64k
}
3967
3968
3969
int
3970
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3971
                                     _PyCompile_CodeUnitMetadata *umd,
3972
                                     int *stackdepth, int *nlocalsplus,
3973
                                     _PyInstructionSequence *seq)
3974
5.64k
{
3975
5.64k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3976
3977
5.64k
    *stackdepth = calculate_stackdepth(g);
3978
5.64k
    if (*stackdepth < 0) {
3979
0
        return ERROR;
3980
0
    }
3981
3982
5.64k
    *nlocalsplus = prepare_localsplus(umd, g);
3983
5.64k
    if (*nlocalsplus < 0) {
3984
0
        return ERROR;
3985
0
    }
3986
3987
5.64k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3988
3989
    /* Order of basic blocks must have been determined by now */
3990
3991
5.64k
    RETURN_IF_ERROR(normalize_jumps(g));
3992
5.64k
    assert(no_redundant_jumps(g));
3993
3994
    /* Can't modify the bytecode after inserting instructions that produce
3995
     * borrowed references.
3996
     */
3997
5.64k
    RETURN_IF_ERROR(optimize_load_fast(g));
3998
3999
    /* Can't modify the bytecode after computing jump offsets. */
4000
5.64k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4001
0
        return ERROR;
4002
0
    }
4003
4004
5.64k
    return SUCCESS;
4005
5.64k
}
4006
4007
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4008
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4009
 */
4010
int
4011
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4012
0
{
4013
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4014
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4015
0
    return SUCCESS;
4016
0
}
4017
4018
/* Exported API functions */
4019
4020
int
4021
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4022
0
{
4023
0
    stack_effects effs;
4024
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4025
0
        return PY_INVALID_STACK_EFFECT;
4026
0
    }
4027
0
    return effs.net;
4028
0
}
4029
4030
int
4031
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4032
25
{
4033
25
    stack_effects effs;
4034
25
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4035
0
        return PY_INVALID_STACK_EFFECT;
4036
0
    }
4037
25
    return effs.net;
4038
25
}
4039
4040
/* Access to compiler optimizations for unit tests.
4041
4042
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4043
 * a CFG, optimizes it and converts back to an instruction list.
4044
 */
4045
4046
static PyObject *
4047
cfg_to_instruction_sequence(cfg_builder *g)
4048
0
{
4049
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4050
0
    if (seq == NULL) {
4051
0
        return NULL;
4052
0
    }
4053
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4054
0
        PyInstructionSequence_Fini(seq);
4055
0
        return NULL;
4056
0
    }
4057
0
    return (PyObject*)seq;
4058
0
}
4059
4060
PyObject *
4061
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4062
0
{
4063
0
    if (!_PyInstructionSequence_Check(seq)) {
4064
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4065
0
        return NULL;
4066
0
    }
4067
0
    PyObject *const_cache = PyDict_New();
4068
0
    if (const_cache == NULL) {
4069
0
        return NULL;
4070
0
    }
4071
4072
0
    PyObject *res = NULL;
4073
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4074
0
    if (g == NULL) {
4075
0
        goto error;
4076
0
    }
4077
0
    int nparams = 0, firstlineno = 1;
4078
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4079
0
                                nparams, firstlineno) < 0) {
4080
0
        goto error;
4081
0
    }
4082
4083
0
    if (calculate_stackdepth(g) == ERROR) {
4084
0
        goto error;
4085
0
    }
4086
4087
0
    if (optimize_load_fast(g) != SUCCESS) {
4088
0
        goto error;
4089
0
    }
4090
4091
0
    res = cfg_to_instruction_sequence(g);
4092
0
error:
4093
0
    Py_DECREF(const_cache);
4094
0
    _PyCfgBuilder_Free(g);
4095
0
    return res;
4096
0
}