Coverage Report

Created: 2026-01-17 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
1.50M
#define SUCCESS 0
19
8.29k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
2.37M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
505k
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
519k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
519k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
2.19M
{
101
2.19M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
2.19M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
2.19M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
2.04M
{
108
2.04M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
2.04M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
50.1k
    do { \
114
50.1k
        assert(OPCODE_HAS_ARG(OP)); \
115
50.1k
        cfg_instr *_instr__ptr_ = (I); \
116
50.1k
        _instr__ptr_->i_opcode = (OP); \
117
50.1k
        _instr__ptr_->i_oparg = (ARG); \
118
50.1k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
109k
    do { \
123
109k
        assert(!OPCODE_HAS_ARG(OP)); \
124
109k
        cfg_instr *_instr__ptr_ = (I); \
125
109k
        _instr__ptr_->i_opcode = (OP); \
126
109k
        _instr__ptr_->i_oparg = 0; \
127
109k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
5.95k
    do { \
131
5.95k
        cfg_instr *_instr__ptr_ = (I); \
132
5.95k
        _instr__ptr_->i_loc = (LOC); \
133
5.95k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
505k
{
144
505k
    assert(b != NULL);
145
505k
    _Py_c_array_t array = {
146
505k
        .array = (void*)b->b_instr,
147
505k
        .allocated_entries = b->b_ialloc,
148
505k
        .item_size = sizeof(cfg_instr),
149
505k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
505k
    };
151
152
505k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
505k
    b->b_instr = array.array;
154
505k
    b->b_ialloc = array.allocated_entries;
155
505k
    return b->b_iused++;
156
505k
}
157
158
static cfg_instr *
159
1.93M
basicblock_last_instr(const basicblock *b) {
160
1.93M
    assert(b->b_iused >= 0);
161
1.93M
    if (b->b_iused > 0) {
162
1.78M
        assert(b->b_instr != NULL);
163
1.78M
        return &b->b_instr[b->b_iused - 1];
164
1.78M
    }
165
146k
    return NULL;
166
1.93M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
60.5k
{
175
60.5k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
60.5k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
60.5k
    b->b_list = g->g_block_list;
182
60.5k
    g->g_block_list = b;
183
60.5k
    b->b_label = NO_LABEL;
184
60.5k
    return b;
185
60.5k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
494k
{
190
494k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
494k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
494k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
494k
    assert(0 <= oparg && oparg < (1 << 30));
194
195
494k
    int off = basicblock_next_instr(b);
196
494k
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
494k
    cfg_instr *i = &b->b_instr[off];
200
494k
    i->i_opcode = opcode;
201
494k
    i->i_oparg = oparg;
202
494k
    i->i_loc = loc;
203
    // memory is already zero initialized
204
494k
    assert(i->i_target == NULL);
205
494k
    assert(i->i_except == NULL);
206
207
494k
    return SUCCESS;
208
494k
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
1.80k
{
213
1.80k
    cfg_instr *last = basicblock_last_instr(b);
214
1.80k
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
1.80k
    RETURN_IF_ERROR(
219
1.80k
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
1.80k
    last = basicblock_last_instr(b);
221
1.80k
    assert(last && last->i_opcode == opcode);
222
1.80k
    last->i_target = target;
223
1.80k
    return SUCCESS;
224
1.80k
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
4.46k
{
229
11.7k
    for (int i = 0; i < from->b_iused; i++) {
230
7.31k
        int n = basicblock_next_instr(to);
231
7.31k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
7.31k
        to->b_instr[n] = from->b_instr[i];
235
7.31k
    }
236
4.46k
    return SUCCESS;
237
4.46k
}
238
239
static inline int
240
607k
basicblock_nofallthrough(const basicblock *b) {
241
607k
    cfg_instr *last = basicblock_last_instr(b);
242
607k
    return (last &&
243
577k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
381k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
607k
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
979k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
806
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
806
    assert(BB_NO_FALLTHROUGH(block));
257
806
    basicblock *result = cfg_builder_new_block(g);
258
806
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
806
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
806
    return result;
265
806
}
266
267
static int
268
3.48k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
3.48k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
64.1k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
60.6k
        block->b_instr[i] = block->b_instr[i-1];
272
60.6k
    }
273
3.48k
    block->b_instr[pos] = *instr;
274
3.48k
    return SUCCESS;
275
3.48k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
50.5k
{
343
50.5k
    assert(block != NULL);
344
50.5k
    g->g_curblock->b_next = block;
345
50.5k
    g->g_curblock = block;
346
50.5k
    return block;
347
50.5k
}
348
349
static inline int
350
118k
basicblock_exits_scope(const basicblock *b) {
351
118k
    cfg_instr *last = basicblock_last_instr(b);
352
118k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
118k
}
354
355
static inline int
356
75.2k
basicblock_has_eval_break(const basicblock *b) {
357
375k
    for (int i = 0; i < b->b_iused; i++) {
358
330k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
30.5k
            return true;
360
30.5k
        }
361
330k
    }
362
44.6k
    return false;
363
75.2k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
502k
{
368
502k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
502k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
43.0k
        return true;
371
43.0k
    }
372
459k
    if (IS_LABEL(g->g_current_label)) {
373
7.41k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
7.41k
            return true;
375
7.41k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
7.41k
    }
382
452k
    return false;
383
459k
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
502k
{
388
502k
    if (cfg_builder_current_block_is_terminated(g)) {
389
50.5k
        basicblock *b = cfg_builder_new_block(g);
390
50.5k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
50.5k
        b->b_label = g->g_current_label;
394
50.5k
        g->g_current_label = NO_LABEL;
395
50.5k
        cfg_builder_use_next_block(g, b);
396
50.5k
    }
397
502k
    return SUCCESS;
398
502k
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
{
404
    assert(g->g_entryblock->b_iused > 0);
405
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
        assert(!_PyMem_IsPtrFreed(block));
407
        if (block->b_instr != NULL) {
408
            assert(block->b_ialloc > 0);
409
            assert(block->b_iused >= 0);
410
            assert(block->b_ialloc >= block->b_iused);
411
        }
412
        else {
413
            assert (block->b_iused == 0);
414
            assert (block->b_ialloc == 0);
415
        }
416
    }
417
    return true;
418
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
8.29k
{
424
8.29k
    g->g_block_list = NULL;
425
8.29k
    basicblock *block = cfg_builder_new_block(g);
426
8.29k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
8.29k
    g->g_curblock = g->g_entryblock = block;
430
8.29k
    g->g_current_label = NO_LABEL;
431
8.29k
    return SUCCESS;
432
8.29k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
8.29k
{
437
8.29k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
8.29k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
8.29k
    memset(g, 0, sizeof(cfg_builder));
443
8.29k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
8.29k
    return g;
448
8.29k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
8.29k
{
453
8.29k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
8.29k
    assert(cfg_builder_check(g));
457
8.29k
    basicblock *b = g->g_block_list;
458
68.8k
    while (b != NULL) {
459
60.5k
        if (b->b_instr) {
460
60.5k
            PyMem_Free((void *)b->b_instr);
461
60.5k
        }
462
60.5k
        basicblock *next = b->b_list;
463
60.5k
        PyMem_Free((void *)b);
464
60.5k
        b = next;
465
60.5k
    }
466
8.29k
    PyMem_Free(g);
467
8.29k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
8.29k
{
472
8.29k
    int nblocks = 0;
473
67.0k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
58.7k
        nblocks++;
475
58.7k
    }
476
8.29k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
8.29k
    return SUCCESS;
481
8.29k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
25.4k
{
486
25.4k
    g->g_current_label = lbl;
487
25.4k
    return cfg_builder_maybe_start_new_block(g);
488
25.4k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
477k
{
493
477k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
477k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
477k
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
102k
{
501
107k
    while (b && b->b_iused == 0) {
502
4.25k
        b = b->b_next;
503
4.25k
    }
504
102k
    return b;
505
102k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
no_redundant_nops(cfg_builder *g) {
514
    if (remove_redundant_nops(g) != 0) {
515
        return false;
516
    }
517
    return true;
518
}
519
520
static bool
521
no_redundant_jumps(cfg_builder *g) {
522
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
        cfg_instr *last = basicblock_last_instr(b);
524
        if (last != NULL) {
525
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
                basicblock *next = next_nonempty_block(b->b_next);
527
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
                if (jump_target == next) {
529
                    assert(next);
530
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
                        assert(0);
532
                        return false;
533
                    }
534
                }
535
            }
536
        }
537
    }
538
    return true;
539
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
60.5k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
60.5k
    cfg_instr *last = basicblock_last_instr(b);
547
60.5k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
44.7k
        return SUCCESS;
549
44.7k
    }
550
60.5k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
15.8k
    bool is_forward = last->i_target->b_visited == 0;
553
15.8k
    if (is_forward) {
554
14.9k
        RETURN_IF_ERROR(
555
14.9k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
14.9k
        return SUCCESS;
557
14.9k
    }
558
559
923
    int reversed_opcode = 0;
560
923
    switch(last->i_opcode) {
561
39
        case POP_JUMP_IF_NOT_NONE:
562
39
            reversed_opcode = POP_JUMP_IF_NONE;
563
39
            break;
564
41
        case POP_JUMP_IF_NONE:
565
41
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
41
            break;
567
760
        case POP_JUMP_IF_FALSE:
568
760
            reversed_opcode = POP_JUMP_IF_TRUE;
569
760
            break;
570
83
        case POP_JUMP_IF_TRUE:
571
83
            reversed_opcode = POP_JUMP_IF_FALSE;
572
83
            break;
573
923
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
923
    basicblock *target = last->i_target;
579
923
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
923
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
923
    RETURN_IF_ERROR(
584
923
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
923
    RETURN_IF_ERROR(
586
923
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
923
    backwards_jump->b_startdepth = target->b_startdepth;
588
923
    last->i_opcode = reversed_opcode;
589
923
    last->i_target = b->b_next;
590
591
923
    backwards_jump->b_cold = b->b_cold;
592
923
    backwards_jump->b_next = b->b_next;
593
923
    b->b_next = backwards_jump;
594
923
    return SUCCESS;
595
923
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
8.29k
{
601
8.29k
    basicblock *entryblock = g->g_entryblock;
602
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
59.6k
        b->b_visited = 0;
604
59.6k
    }
605
68.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
60.5k
        b->b_visited = 1;
607
60.5k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
60.5k
    }
609
8.29k
    return SUCCESS;
610
8.29k
}
611
612
static int
613
8.29k
check_cfg(cfg_builder *g) {
614
67.0k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
536k
        for (int i = 0; i < b->b_iused; i++) {
617
477k
            int opcode = b->b_instr[i].i_opcode;
618
477k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
477k
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
51.3k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
51.3k
            }
625
477k
        }
626
58.7k
    }
627
8.29k
    return SUCCESS;
628
8.29k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
32.1k
{
633
32.1k
    int lbl = -1;
634
268k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
235k
        if (b->b_label.id > lbl) {
636
92.2k
            lbl = b->b_label.id;
637
92.2k
        }
638
235k
    }
639
32.1k
    return lbl;
640
32.1k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
8.29k
{
646
8.29k
    int max_label = get_max_label(entryblock);
647
8.29k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
8.29k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
8.29k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
8.29k
    memset(label2block, 0, mapsize);
654
67.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
58.7k
        if (b->b_label.id >= 0) {
656
25.4k
            label2block[b->b_label.id] = b;
657
25.4k
        }
658
58.7k
    }
659
67.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
536k
        for (int i = 0; i < b->b_iused; i++) {
661
477k
            cfg_instr *instr = &b->b_instr[i];
662
477k
            assert(instr->i_target == NULL);
663
477k
            if (HAS_TARGET(instr->i_opcode)) {
664
31.7k
                int lbl = instr->i_oparg;
665
31.7k
                assert(lbl >= 0 && lbl <= max_label);
666
31.7k
                instr->i_target = label2block[lbl];
667
31.7k
                assert(instr->i_target != NULL);
668
31.7k
                assert(instr->i_target->b_label.id == lbl);
669
31.7k
            }
670
477k
        }
671
58.7k
    }
672
8.29k
    PyMem_Free(label2block);
673
8.29k
    return SUCCESS;
674
8.29k
}
675
676
static int
677
8.29k
mark_except_handlers(basicblock *entryblock) {
678
#ifndef NDEBUG
679
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
        assert(!b->b_except_handler);
681
    }
682
#endif
683
67.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
536k
        for (int i=0; i < b->b_iused; i++) {
685
477k
            cfg_instr *instr = &b->b_instr[i];
686
477k
            if (is_block_push(instr)) {
687
3.57k
                instr->i_target->b_except_handler = 1;
688
3.57k
            }
689
477k
        }
690
58.7k
    }
691
8.29k
    return SUCCESS;
692
8.29k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
3.57k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
3.57k
    assert(is_block_push(setup));
704
3.57k
    int opcode = setup->i_opcode;
705
3.57k
    basicblock * target = setup->i_target;
706
3.57k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
2.09k
        target->b_preserve_lasti = 1;
708
2.09k
    }
709
3.57k
    assert(stack->depth <= CO_MAXBLOCKS);
710
3.57k
    stack->handlers[++stack->depth] = target;
711
3.57k
    return target;
712
3.57k
}
713
714
static basicblock *
715
2.96k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
2.96k
    assert(stack->depth > 0);
717
2.96k
    return stack->handlers[--stack->depth];
718
2.96k
}
719
720
static basicblock *
721
50.8k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
50.8k
    return stack->handlers[stack->depth];
723
50.8k
}
724
725
static struct _PyCfgExceptStack *
726
8.29k
make_except_stack(void) {
727
8.29k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
8.29k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
8.29k
    new->depth = 0;
733
8.29k
    new->handlers[0] = NULL;
734
8.29k
    return new;
735
8.29k
}
736
737
static struct _PyCfgExceptStack *
738
19.2k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
19.2k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
19.2k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
19.2k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
19.2k
    return copy;
746
19.2k
}
747
748
static basicblock**
749
62.4k
make_cfg_traversal_stack(basicblock *entryblock) {
750
62.4k
    int nblocks = 0;
751
532k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
470k
        b->b_visited = 0;
753
470k
        nblocks++;
754
470k
    }
755
62.4k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
62.4k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
62.4k
    return stack;
760
62.4k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
445k
{
779
445k
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
445k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
445k
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
445k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
445k
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
445k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
3.57k
        effects->net = 0;
793
3.57k
        return 0;
794
3.57k
    }
795
441k
    effects->net = pushed - popped;
796
441k
    return 0;
797
445k
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
62.6k
{
802
62.6k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
62.6k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
50.6k
        assert(b->b_startdepth < 0);
808
50.6k
        b->b_startdepth = depth;
809
50.6k
        *(*sp)++ = b;
810
50.6k
    }
811
62.6k
    return SUCCESS;
812
62.6k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
8.29k
{
820
8.29k
    basicblock *entryblock = g->g_entryblock;
821
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
59.6k
        b->b_startdepth = INT_MIN;
823
59.6k
    }
824
8.29k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
8.29k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
8.29k
    int stackdepth = -1;
831
8.29k
    int maxdepth = 0;
832
8.29k
    basicblock **sp = stack;
833
8.29k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
58.9k
    while (sp != stack) {
837
50.6k
        basicblock *b = *--sp;
838
50.6k
        int depth = b->b_startdepth;
839
50.6k
        assert(depth >= 0);
840
50.6k
        basicblock *next = b->b_next;
841
444k
        for (int i = 0; i < b->b_iused; i++) {
842
417k
            cfg_instr *instr = &b->b_instr[i];
843
417k
            stack_effects effects;
844
417k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
417k
            int new_depth = depth + effects.net;
851
417k
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
854
0
                goto error;
855
0
            }
856
417k
            maxdepth = Py_MAX(maxdepth, depth);
857
417k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
27.5k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
27.5k
                int target_depth = depth + effects.net;
865
27.5k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
27.5k
                maxdepth = Py_MAX(maxdepth, depth);
867
27.5k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
27.5k
            }
871
417k
            depth = new_depth;
872
417k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
417k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
411k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
23.9k
            {
876
                /* remaining code is dead */
877
23.9k
                next = NULL;
878
23.9k
                break;
879
23.9k
            }
880
417k
        }
881
50.6k
        if (next != NULL) {
882
26.7k
            assert(BB_HAS_FALLTHROUGH(b));
883
26.7k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
26.7k
        }
887
50.6k
    }
888
8.29k
    stackdepth = maxdepth;
889
8.29k
error:
890
8.29k
    PyMem_Free(stack);
891
8.29k
    return stackdepth;
892
8.29k
}
893
894
static int
895
8.29k
label_exception_targets(basicblock *entryblock) {
896
8.29k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
8.29k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
8.29k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
8.29k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
8.29k
    except_stack->depth = 0;
907
8.29k
    todo_stack[0] = entryblock;
908
8.29k
    entryblock->b_visited = 1;
909
8.29k
    entryblock->b_exceptstack = except_stack;
910
8.29k
    basicblock **todo = &todo_stack[1];
911
8.29k
    basicblock *handler = NULL;
912
59.1k
    while (todo > todo_stack) {
913
50.8k
        todo--;
914
50.8k
        basicblock *b = todo[0];
915
50.8k
        assert(b->b_visited == 1);
916
50.8k
        except_stack = b->b_exceptstack;
917
50.8k
        assert(except_stack != NULL);
918
50.8k
        b->b_exceptstack = NULL;
919
50.8k
        handler = except_stack_top(except_stack);
920
50.8k
        int last_yield_except_depth = -1;
921
511k
        for (int i = 0; i < b->b_iused; i++) {
922
460k
            cfg_instr *instr = &b->b_instr[i];
923
460k
            if (is_block_push(instr)) {
924
3.57k
                if (!instr->i_target->b_visited) {
925
3.57k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
3.57k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
3.57k
                    instr->i_target->b_exceptstack = copy;
930
3.57k
                    todo[0] = instr->i_target;
931
3.57k
                    instr->i_target->b_visited = 1;
932
3.57k
                    todo++;
933
3.57k
                }
934
3.57k
                handler = push_except_block(except_stack, instr);
935
3.57k
            }
936
457k
            else if (instr->i_opcode == POP_BLOCK) {
937
2.96k
                handler = pop_except_block(except_stack);
938
2.96k
                INSTR_SET_OP0(instr, NOP);
939
2.96k
            }
940
454k
            else if (is_jump(instr)) {
941
26.5k
                instr->i_except = handler;
942
26.5k
                assert(i == b->b_iused -1);
943
26.5k
                if (!instr->i_target->b_visited) {
944
19.0k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
15.6k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
15.6k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
15.6k
                        instr->i_target->b_exceptstack = copy;
950
15.6k
                    }
951
3.38k
                    else {
952
3.38k
                        instr->i_target->b_exceptstack = except_stack;
953
3.38k
                        except_stack = NULL;
954
3.38k
                    }
955
19.0k
                    todo[0] = instr->i_target;
956
19.0k
                    instr->i_target->b_visited = 1;
957
19.0k
                    todo++;
958
19.0k
                }
959
26.5k
            }
960
427k
            else if (instr->i_opcode == YIELD_VALUE) {
961
562
                instr->i_except = handler;
962
562
                last_yield_except_depth = except_stack->depth;
963
562
            }
964
427k
            else if (instr->i_opcode == RESUME) {
965
8.85k
                instr->i_except = handler;
966
8.85k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
562
                    assert(last_yield_except_depth >= 0);
968
562
                    if (last_yield_except_depth == 1) {
969
479
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
479
                    }
971
562
                    last_yield_except_depth = -1;
972
562
                }
973
8.85k
            }
974
418k
            else if (instr->i_opcode == RETURN_GENERATOR) {
975
425
                instr->i_except = NULL;
976
425
            }
977
417k
            else {
978
417k
                instr->i_except = handler;
979
417k
            }
980
460k
        }
981
50.8k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
982
19.8k
            assert(except_stack != NULL);
983
19.8k
            b->b_next->b_exceptstack = except_stack;
984
19.8k
            todo[0] = b->b_next;
985
19.8k
            b->b_next->b_visited = 1;
986
19.8k
            todo++;
987
19.8k
        }
988
30.9k
        else if (except_stack != NULL) {
989
27.5k
           PyMem_Free(except_stack);
990
27.5k
        }
991
50.8k
    }
992
#ifdef Py_DEBUG
993
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
994
        assert(b->b_exceptstack == NULL);
995
    }
996
#endif
997
8.29k
    PyMem_Free(todo_stack);
998
8.29k
    return SUCCESS;
999
0
error:
1000
0
    PyMem_Free(todo_stack);
1001
0
    PyMem_Free(except_stack);
1002
0
    return ERROR;
1003
8.29k
}
1004
1005
/***** CFG optimizations *****/
1006
1007
static int
1008
16.5k
remove_unreachable(basicblock *entryblock) {
1009
134k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1010
118k
        b->b_predecessors = 0;
1011
118k
    }
1012
16.5k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1013
16.5k
    if (stack == NULL) {
1014
0
        return ERROR;
1015
0
    }
1016
16.5k
    basicblock **sp = stack;
1017
16.5k
    entryblock->b_predecessors = 1;
1018
16.5k
    *sp++ = entryblock;
1019
16.5k
    entryblock->b_visited = 1;
1020
117k
    while (sp > stack) {
1021
100k
        basicblock *b = *(--sp);
1022
100k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1023
50.9k
            if (!b->b_next->b_visited) {
1024
43.6k
                assert(b->b_next->b_predecessors == 0);
1025
43.6k
                *sp++ = b->b_next;
1026
43.6k
                b->b_next->b_visited = 1;
1027
43.6k
            }
1028
50.9k
            b->b_next->b_predecessors++;
1029
50.9k
        }
1030
990k
        for (int i = 0; i < b->b_iused; i++) {
1031
889k
            basicblock *target;
1032
889k
            cfg_instr *instr = &b->b_instr[i];
1033
889k
            if (is_jump(instr) || is_block_push(instr)) {
1034
57.1k
                target = instr->i_target;
1035
57.1k
                if (!target->b_visited) {
1036
40.2k
                    *sp++ = target;
1037
40.2k
                    target->b_visited = 1;
1038
40.2k
                }
1039
57.1k
                target->b_predecessors++;
1040
57.1k
            }
1041
889k
        }
1042
100k
    }
1043
16.5k
    PyMem_Free(stack);
1044
1045
    /* Delete unreachable instructions */
1046
134k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1047
118k
       if (b->b_predecessors == 0) {
1048
17.9k
            b->b_iused = 0;
1049
17.9k
            b->b_except_handler = 0;
1050
17.9k
       }
1051
118k
    }
1052
16.5k
    return SUCCESS;
1053
16.5k
}
1054
1055
static int
1056
331k
basicblock_remove_redundant_nops(basicblock *bb) {
1057
    /* Remove NOPs when legal to do so. */
1058
331k
    int dest = 0;
1059
331k
    int prev_lineno = -1;
1060
2.58M
    for (int src = 0; src < bb->b_iused; src++) {
1061
2.25M
        int lineno = bb->b_instr[src].i_loc.lineno;
1062
2.25M
        if (bb->b_instr[src].i_opcode == NOP) {
1063
            /* Eliminate no-op if it doesn't have a line number */
1064
55.1k
            if (lineno < 0) {
1065
6.85k
                continue;
1066
6.85k
            }
1067
            /* or, if the previous instruction had the same line number. */
1068
48.3k
            if (prev_lineno == lineno) {
1069
38.8k
                continue;
1070
38.8k
            }
1071
            /* or, if the next instruction has same line number or no line number */
1072
9.49k
            if (src < bb->b_iused - 1) {
1073
8.08k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1074
8.08k
                if (next_lineno == lineno) {
1075
5.38k
                    continue;
1076
5.38k
                }
1077
2.70k
                if (next_lineno < 0) {
1078
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1079
0
                    continue;
1080
0
                }
1081
2.70k
            }
1082
1.40k
            else {
1083
1.40k
                basicblock *next = next_nonempty_block(bb->b_next);
1084
                /* or if last instruction in BB and next BB has same line number */
1085
1.40k
                if (next) {
1086
1.40k
                    location next_loc = NO_LOCATION;
1087
1.40k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1088
1.40k
                        cfg_instr *instr = &next->b_instr[next_i];
1089
1.40k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1090
                            /* Skip over NOPs without a location, they will be removed */
1091
0
                            continue;
1092
0
                        }
1093
1.40k
                        next_loc = instr->i_loc;
1094
1.40k
                        break;
1095
1.40k
                    }
1096
1.40k
                    if (lineno == next_loc.lineno) {
1097
9
                        continue;
1098
9
                    }
1099
1.40k
                }
1100
1.40k
            }
1101
1102
9.49k
        }
1103
2.20M
        if (dest != src) {
1104
207k
            bb->b_instr[dest] = bb->b_instr[src];
1105
207k
        }
1106
2.20M
        dest++;
1107
2.20M
        prev_lineno = lineno;
1108
2.20M
    }
1109
331k
    assert(dest <= bb->b_iused);
1110
331k
    int num_removed = bb->b_iused - dest;
1111
331k
    bb->b_iused = dest;
1112
331k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1113
331k
    return num_removed;
1114
331k
}
1115
1116
static int
1117
28.8k
remove_redundant_nops(cfg_builder *g) {
1118
28.8k
    int changes = 0;
1119
300k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1120
271k
        int change = basicblock_remove_redundant_nops(b);
1121
271k
        RETURN_IF_ERROR(change);
1122
271k
        changes += change;
1123
271k
    }
1124
28.8k
    return changes;
1125
28.8k
}
1126
1127
static int
1128
remove_redundant_nops_and_pairs(basicblock *entryblock)
1129
8.29k
{
1130
8.29k
    bool done = false;
1131
1132
16.5k
    while (! done) {
1133
8.29k
        done = true;
1134
8.29k
        cfg_instr *prev_instr = NULL;
1135
8.29k
        cfg_instr *instr = NULL;
1136
67.9k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1137
59.6k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1138
59.6k
            if (IS_LABEL(b->b_label)) {
1139
                /* this block is a jump target, forget instr */
1140
26.2k
                instr = NULL;
1141
26.2k
            }
1142
486k
            for (int i = 0; i < b->b_iused; i++) {
1143
427k
                prev_instr = instr;
1144
427k
                instr = &b->b_instr[i];
1145
427k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1146
427k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1147
427k
                int opcode = instr->i_opcode;
1148
427k
                bool is_redundant_pair = false;
1149
427k
                if (opcode == POP_TOP) {
1150
11.2k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1151
0
                       is_redundant_pair = true;
1152
0
                   }
1153
11.2k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1154
0
                       is_redundant_pair = true;
1155
0
                   }
1156
11.2k
                }
1157
427k
                if (is_redundant_pair) {
1158
0
                    INSTR_SET_OP0(prev_instr, NOP);
1159
0
                    INSTR_SET_OP0(instr, NOP);
1160
0
                    done = false;
1161
0
                }
1162
427k
            }
1163
59.6k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1164
43.4k
                instr = NULL;
1165
43.4k
            }
1166
59.6k
        }
1167
8.29k
    }
1168
8.29k
    return SUCCESS;
1169
8.29k
}
1170
1171
static int
1172
20.5k
remove_redundant_jumps(cfg_builder *g) {
1173
    /* If a non-empty block ends with a jump instruction, check if the next
1174
     * non-empty block reached through normal flow control is the target
1175
     * of that jump. If it is, then the jump instruction is redundant and
1176
     * can be deleted.
1177
     *
1178
     * Return the number of changes applied, or -1 on error.
1179
     */
1180
1181
20.5k
    int changes = 0;
1182
232k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1183
212k
        cfg_instr *last = basicblock_last_instr(b);
1184
212k
        if (last == NULL) {
1185
28.6k
            continue;
1186
28.6k
        }
1187
212k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1188
183k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1189
25.8k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1190
25.8k
            if (jump_target == NULL) {
1191
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1192
0
                return ERROR;
1193
0
            }
1194
25.8k
            basicblock *next = next_nonempty_block(b->b_next);
1195
25.8k
            if (jump_target == next) {
1196
862
                changes++;
1197
862
                INSTR_SET_OP0(last, NOP);
1198
862
            }
1199
25.8k
        }
1200
183k
    }
1201
1202
20.5k
    return changes;
1203
20.5k
}
1204
1205
static inline bool
1206
74.1k
basicblock_has_no_lineno(basicblock *b) {
1207
88.7k
    for (int i = 0; i < b->b_iused; i++) {
1208
81.5k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1209
66.9k
            return false;
1210
66.9k
        }
1211
81.5k
    }
1212
7.20k
    return true;
1213
74.1k
}
1214
1215
/* Maximum size of basic block that should be copied in optimizer */
1216
3.06k
#define MAX_COPY_SIZE 4
1217
1218
/* If this block ends with an unconditional jump to a small exit block or
1219
 * a block that has no line numbers (and no fallthrough), then
1220
 * remove the jump and extend this block with the target.
1221
 * Returns 1 if extended, 0 if no change, and -1 on error.
1222
 */
1223
static int
1224
94.8k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1225
94.8k
    cfg_instr *last = basicblock_last_instr(bb);
1226
94.8k
    if (last == NULL) {
1227
0
        return 0;
1228
0
    }
1229
94.8k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1230
77.6k
        return 0;
1231
77.6k
    }
1232
17.2k
    basicblock *target = last->i_target;
1233
17.2k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1234
3.06k
                             target->b_iused <= MAX_COPY_SIZE);
1235
17.2k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1236
3.14k
                                     !BB_HAS_FALLTHROUGH(target));
1237
17.2k
    if (small_exit_block || no_lineno_no_fallthrough) {
1238
3.65k
        assert(is_jump(last));
1239
3.65k
        int removed_jump_opcode = last->i_opcode;
1240
3.65k
        INSTR_SET_OP0(last, NOP);
1241
3.65k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1242
3.65k
        if (no_lineno_no_fallthrough) {
1243
3.14k
            last = basicblock_last_instr(bb);
1244
3.14k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1245
1.90k
                removed_jump_opcode == JUMP)
1246
85
            {
1247
                /* Make sure we don't lose eval breaker checks */
1248
85
                last->i_opcode = JUMP;
1249
85
            }
1250
3.14k
        }
1251
3.65k
        target->b_predecessors--;
1252
3.65k
        return 1;
1253
3.65k
    }
1254
13.6k
    return 0;
1255
17.2k
}
1256
1257
static int
1258
8.29k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1259
8.29k
    bool changes;
1260
9.75k
    do {
1261
9.75k
        changes = false;
1262
104k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1263
94.8k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1264
94.8k
            RETURN_IF_ERROR(res);
1265
94.8k
            if (res) {
1266
3.65k
                changes = true;
1267
3.65k
            }
1268
94.8k
        }
1269
9.75k
    } while(changes); /* every change removes a jump, ensuring convergence */
1270
8.29k
    return changes;
1271
8.29k
}
1272
1273
// Attempt to eliminate jumps to jumps by updating inst to jump to
1274
// target->i_target using the provided opcode. Return whether or not the
1275
// optimization was successful.
1276
static bool
1277
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1278
881
{
1279
881
    assert(is_jump(inst));
1280
881
    assert(is_jump(target));
1281
881
    assert(inst == basicblock_last_instr(bb));
1282
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1283
    // changes (and we fall into an infinite loop):
1284
881
    if (inst->i_target != target->i_target) {
1285
        /* Change inst to NOP and append a jump to target->i_target. The
1286
         * NOP will be removed later if it's not needed for the lineno.
1287
         */
1288
881
        INSTR_SET_OP0(inst, NOP);
1289
1290
881
        RETURN_IF_ERROR(
1291
881
            basicblock_add_jump(
1292
881
                bb, opcode, target->i_target, target->i_loc));
1293
1294
881
        return true;
1295
881
    }
1296
0
    return false;
1297
881
}
1298
1299
static int
1300
loads_const(int opcode)
1301
23.3k
{
1302
23.3k
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1303
23.3k
}
1304
1305
/* Returns new reference */
1306
static PyObject*
1307
get_const_value(int opcode, int oparg, PyObject *co_consts)
1308
87.3k
{
1309
87.3k
    PyObject *constant = NULL;
1310
87.3k
    assert(loads_const(opcode));
1311
87.3k
    if (opcode == LOAD_CONST) {
1312
86.2k
        constant = PyList_GET_ITEM(co_consts, oparg);
1313
86.2k
    }
1314
87.3k
    if (opcode == LOAD_SMALL_INT) {
1315
1.15k
        return PyLong_FromLong(oparg);
1316
1.15k
    }
1317
1318
86.2k
    if (constant == NULL) {
1319
0
        PyErr_SetString(PyExc_SystemError,
1320
0
                        "Internal error: failed to get value of a constant");
1321
0
        return NULL;
1322
0
    }
1323
86.2k
    return Py_NewRef(constant);
1324
86.2k
}
1325
1326
// Steals a reference to newconst.
1327
static int
1328
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329
2.89k
{
1330
2.89k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1331
0
        Py_DECREF(newconst);
1332
0
        return -1;
1333
0
    }
1334
1335
2.89k
    Py_ssize_t index;
1336
128k
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337
126k
        if (PyList_GET_ITEM(consts, index) == newconst) {
1338
875
            break;
1339
875
        }
1340
126k
    }
1341
2.89k
    if (index == PyList_GET_SIZE(consts)) {
1342
2.02k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1343
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1344
0
            Py_DECREF(newconst);
1345
0
            return -1;
1346
0
        }
1347
2.02k
        if (PyList_Append(consts, newconst)) {
1348
0
            Py_DECREF(newconst);
1349
0
            return -1;
1350
0
        }
1351
2.02k
    }
1352
2.89k
    Py_DECREF(newconst);
1353
2.89k
    return (int)index;
1354
2.89k
}
1355
1356
/*
1357
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1358
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1359
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1360
1361
  Return boolean indicating whether "size" such instructions were found.
1362
*/
1363
static bool
1364
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1365
15.6k
{
1366
15.6k
    assert(start < bb->b_iused);
1367
15.6k
    assert(size >= 0);
1368
15.6k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1369
1370
26.3k
    for (; start >= 0 && size > 0; start--) {
1371
23.5k
        cfg_instr *instr = &bb->b_instr[start];
1372
23.5k
        if (instr->i_opcode == NOP) {
1373
344
            continue;
1374
344
        }
1375
23.2k
        if (!loads_const(instr->i_opcode)) {
1376
12.8k
            return false;
1377
12.8k
        }
1378
10.3k
        instrs[--size] = instr;
1379
10.3k
    }
1380
1381
2.77k
    return size == 0;
1382
15.6k
}
1383
1384
/*
1385
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1386
  Caller must make sure "instrs" has at least "size" elements.
1387
*/
1388
static void
1389
nop_out(cfg_instr **instrs, int size)
1390
2.82k
{
1391
8.64k
    for (int i = 0; i < size; i++) {
1392
5.81k
        cfg_instr *instr = instrs[i];
1393
5.81k
        assert(instr->i_opcode != NOP);
1394
5.81k
        INSTR_SET_OP0(instr, NOP);
1395
5.81k
        INSTR_SET_LOC(instr, NO_LOCATION);
1396
5.81k
    }
1397
2.82k
}
1398
1399
/* Does not steal reference to "newconst".
1400
   Return 1 if changed instruction to LOAD_SMALL_INT.
1401
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1402
   Return -1 on error.
1403
*/
1404
static int
1405
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1406
                               PyObject *consts, PyObject *const_cache)
1407
81.8k
{
1408
81.8k
    if (PyLong_CheckExact(newconst)) {
1409
26.9k
        int overflow;
1410
26.9k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1411
26.9k
        if (val == -1 && PyErr_Occurred()) {
1412
0
            return -1;
1413
0
        }
1414
26.9k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1415
21.1k
            assert(_Py_IsImmortal(newconst));
1416
21.1k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1417
21.1k
            return 1;
1418
21.1k
        }
1419
26.9k
    }
1420
60.7k
    return 0;
1421
81.8k
}
1422
1423
1424
/* Steals reference to "newconst" */
1425
static int
1426
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427
                      PyObject *consts, PyObject *const_cache)
1428
2.52k
{
1429
2.52k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1430
2.52k
    if (res < 0) {
1431
0
        Py_DECREF(newconst);
1432
0
        return ERROR;
1433
0
    }
1434
2.52k
    if (res > 0) {
1435
16
        return SUCCESS;
1436
16
    }
1437
2.51k
    int oparg = add_const(newconst, consts, const_cache);
1438
2.51k
    RETURN_IF_ERROR(oparg);
1439
2.51k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1440
2.51k
    return SUCCESS;
1441
2.51k
}
1442
1443
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1444
   with    LOAD_CONST (c1, c2, ... cn).
1445
   The consts table must still be in list form so that the
1446
   new constant (c1, c2, ... cn) can be appended.
1447
   Called with codestr pointing to the first LOAD_CONST.
1448
*/
1449
static int
1450
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1451
4.99k
{
1452
    /* Pre-conditions */
1453
4.99k
    assert(PyDict_CheckExact(const_cache));
1454
4.99k
    assert(PyList_CheckExact(consts));
1455
1456
4.99k
    cfg_instr *instr = &bb->b_instr[i];
1457
4.99k
    assert(instr->i_opcode == BUILD_TUPLE);
1458
1459
4.99k
    int seq_size = instr->i_oparg;
1460
4.99k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1461
0
        return SUCCESS;
1462
0
    }
1463
1464
4.99k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1465
4.99k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1466
        /* not a const sequence */
1467
3.24k
        return SUCCESS;
1468
3.24k
    }
1469
1470
1.75k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1471
1.75k
    if (const_tuple == NULL) {
1472
0
        return ERROR;
1473
0
    }
1474
1475
4.87k
    for (int i = 0; i < seq_size; i++) {
1476
3.12k
        cfg_instr *inst = const_instrs[i];
1477
3.12k
        assert(loads_const(inst->i_opcode));
1478
3.12k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1479
3.12k
        if (element == NULL) {
1480
0
            Py_DECREF(const_tuple);
1481
0
            return ERROR;
1482
0
        }
1483
3.12k
        PyTuple_SET_ITEM(const_tuple, i, element);
1484
3.12k
    }
1485
1486
1.75k
    nop_out(const_instrs, seq_size);
1487
1.75k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1488
1.75k
}
1489
1490
/* Replace:
1491
    BUILD_LIST 0
1492
    LOAD_CONST c1
1493
    LIST_APPEND 1
1494
    LOAD_CONST c2
1495
    LIST_APPEND 1
1496
    ...
1497
    LOAD_CONST cN
1498
    LIST_APPEND 1
1499
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1500
   with:
1501
    LOAD_CONST (c1, c2, ... cN)
1502
*/
1503
static int
1504
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505
                                      PyObject *consts, PyObject *const_cache)
1506
143
{
1507
143
    assert(PyDict_CheckExact(const_cache));
1508
143
    assert(PyList_CheckExact(consts));
1509
143
    assert(i >= 0);
1510
143
    assert(i < bb->b_iused);
1511
1512
143
    cfg_instr *intrinsic = &bb->b_instr[i];
1513
143
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1514
143
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1515
1516
143
    int consts_found = 0;
1517
143
    bool expect_append = true;
1518
1519
244
    for (int pos = i - 1; pos >= 0; pos--) {
1520
244
        cfg_instr *instr = &bb->b_instr[pos];
1521
244
        int opcode = instr->i_opcode;
1522
244
        int oparg = instr->i_oparg;
1523
1524
244
        if (opcode == NOP) {
1525
0
            continue;
1526
0
        }
1527
1528
244
        if (opcode == BUILD_LIST && oparg == 0) {
1529
1
            if (!expect_append) {
1530
                /* Not a sequence start. */
1531
0
                return SUCCESS;
1532
0
            }
1533
1534
            /* Sequence start, we are done. */
1535
1
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1536
1
            if (newconst == NULL) {
1537
0
                return ERROR;
1538
0
            }
1539
1540
92
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1541
91
                instr = &bb->b_instr[newpos];
1542
91
                if (instr->i_opcode == NOP) {
1543
0
                    continue;
1544
0
                }
1545
91
                if (loads_const(instr->i_opcode)) {
1546
45
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1547
45
                    if (constant == NULL) {
1548
0
                        Py_DECREF(newconst);
1549
0
                        return ERROR;
1550
0
                    }
1551
45
                    assert(consts_found > 0);
1552
45
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1553
45
                }
1554
91
                nop_out(&instr, 1);
1555
91
            }
1556
1
            assert(consts_found == 0);
1557
1
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1558
1
        }
1559
1560
243
        if (expect_append) {
1561
189
            if (opcode != LIST_APPEND || oparg != 1) {
1562
135
                return SUCCESS;
1563
135
            }
1564
189
        }
1565
54
        else {
1566
54
            if (!loads_const(opcode)) {
1567
7
                return SUCCESS;
1568
7
            }
1569
47
            consts_found++;
1570
47
        }
1571
1572
101
        expect_append = !expect_append;
1573
101
    }
1574
1575
    /* Did not find sequence start. */
1576
0
    return SUCCESS;
1577
143
}
1578
1579
3.25k
#define MIN_CONST_SEQUENCE_SIZE 3
1580
/*
1581
Optimize lists and sets for:
1582
    1. "for" loop, comprehension or "in"/"not in" tests:
1583
           Change literal list or set of constants into constant
1584
           tuple or frozenset respectively. Change list of
1585
           non-constants into tuple.
1586
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1587
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1588
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1589
           or BUILD_SET & SET_UPDATE respectively.
1590
*/
1591
static int
1592
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593
                        PyObject *consts, PyObject *const_cache)
1594
1.62k
{
1595
1.62k
    assert(PyDict_CheckExact(const_cache));
1596
1.62k
    assert(PyList_CheckExact(consts));
1597
1598
1.62k
    cfg_instr *instr = &bb->b_instr[i];
1599
1.62k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1600
1601
1.62k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1602
1.62k
    int seq_size = instr->i_oparg;
1603
1.62k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1604
1.62k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1605
1.35k
    {
1606
1.35k
        return SUCCESS;
1607
1.35k
    }
1608
1609
274
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1610
274
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1611
69
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1612
            /* iterate over a tuple instead of list */
1613
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1614
0
        }
1615
69
        return SUCCESS;
1616
69
    }
1617
1618
205
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1619
205
    if (const_result == NULL) {
1620
0
        return ERROR;
1621
0
    }
1622
1623
1.93k
    for (int i = 0; i < seq_size; i++) {
1624
1.72k
        cfg_instr *inst = const_instrs[i];
1625
1.72k
        assert(loads_const(inst->i_opcode));
1626
1.72k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1627
1.72k
        if (element == NULL) {
1628
0
            Py_DECREF(const_result);
1629
0
            return ERROR;
1630
0
        }
1631
1.72k
        PyTuple_SET_ITEM(const_result, i, element);
1632
1.72k
    }
1633
1634
205
    if (instr->i_opcode == BUILD_SET) {
1635
70
        PyObject *frozenset = PyFrozenSet_New(const_result);
1636
70
        if (frozenset == NULL) {
1637
0
            Py_DECREF(const_result);
1638
0
            return ERROR;
1639
0
        }
1640
70
        Py_SETREF(const_result, frozenset);
1641
70
    }
1642
1643
205
    int index = add_const(const_result, consts, const_cache);
1644
205
    RETURN_IF_ERROR(index);
1645
205
    nop_out(const_instrs, seq_size);
1646
1647
205
    if (contains_or_iter) {
1648
63
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1649
63
    }
1650
142
    else {
1651
142
        assert(i >= 2);
1652
142
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1653
1654
142
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1655
1656
142
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1657
142
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1658
142
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1659
142
    }
1660
205
    return SUCCESS;
1661
205
}
1662
1663
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1664
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1665
 * Used to avoid creating constants which are slow to hash.
1666
 */
1667
static Py_ssize_t
1668
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1669
0
{
1670
0
    if (PyTuple_Check(obj)) {
1671
0
        Py_ssize_t i;
1672
0
        limit -= PyTuple_GET_SIZE(obj);
1673
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1674
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1675
0
            if (limit < 0) {
1676
0
                return limit;
1677
0
            }
1678
0
        }
1679
0
    }
1680
0
    return limit;
1681
0
}
1682
1683
150
#define MAX_INT_SIZE           128  /* bits */
1684
0
#define MAX_COLLECTION_SIZE    256  /* items */
1685
16
#define MAX_STR_SIZE          4096  /* characters */
1686
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1687
1688
static PyObject *
1689
const_folding_safe_multiply(PyObject *v, PyObject *w)
1690
39
{
1691
39
    if (PyLong_Check(v) && PyLong_Check(w) &&
1692
7
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1693
39
    ) {
1694
7
        int64_t vbits = _PyLong_NumBits(v);
1695
7
        int64_t wbits = _PyLong_NumBits(w);
1696
7
        assert(vbits >= 0);
1697
7
        assert(wbits >= 0);
1698
7
        if (vbits + wbits > MAX_INT_SIZE) {
1699
0
            return NULL;
1700
0
        }
1701
7
    }
1702
32
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1703
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1704
0
        if (size) {
1705
0
            long n = PyLong_AsLong(v);
1706
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1707
0
                return NULL;
1708
0
            }
1709
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1710
0
                return NULL;
1711
0
            }
1712
0
        }
1713
0
    }
1714
32
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1715
16
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1716
16
                                               PyBytes_GET_SIZE(w);
1717
16
        if (size) {
1718
16
            long n = PyLong_AsLong(v);
1719
16
            if (n < 0 || n > MAX_STR_SIZE / size) {
1720
6
                return NULL;
1721
6
            }
1722
16
        }
1723
16
    }
1724
16
    else if (PyLong_Check(w) &&
1725
16
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1726
16
    {
1727
16
        return const_folding_safe_multiply(w, v);
1728
16
    }
1729
1730
17
    return PyNumber_Multiply(v, w);
1731
39
}
1732
1733
static PyObject *
1734
const_folding_safe_power(PyObject *v, PyObject *w)
1735
14
{
1736
14
    if (PyLong_Check(v) && PyLong_Check(w) &&
1737
14
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1738
14
    ) {
1739
14
        int64_t vbits = _PyLong_NumBits(v);
1740
14
        size_t wbits = PyLong_AsSize_t(w);
1741
14
        assert(vbits >= 0);
1742
14
        if (wbits == (size_t)-1) {
1743
0
            return NULL;
1744
0
        }
1745
14
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1746
0
            return NULL;
1747
0
        }
1748
14
    }
1749
1750
14
    return PyNumber_Power(v, w, Py_None);
1751
14
}
1752
1753
static PyObject *
1754
const_folding_safe_lshift(PyObject *v, PyObject *w)
1755
44
{
1756
44
    if (PyLong_Check(v) && PyLong_Check(w) &&
1757
44
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1758
44
    ) {
1759
43
        int64_t vbits = _PyLong_NumBits(v);
1760
43
        size_t wbits = PyLong_AsSize_t(w);
1761
43
        assert(vbits >= 0);
1762
43
        if (wbits == (size_t)-1) {
1763
0
            return NULL;
1764
0
        }
1765
43
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1766
0
            return NULL;
1767
0
        }
1768
43
    }
1769
1770
44
    return PyNumber_Lshift(v, w);
1771
44
}
1772
1773
static PyObject *
1774
const_folding_safe_mod(PyObject *v, PyObject *w)
1775
0
{
1776
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1777
0
        return NULL;
1778
0
    }
1779
1780
0
    return PyNumber_Remainder(v, w);
1781
0
}
1782
1783
static PyObject *
1784
eval_const_binop(PyObject *left, int op, PyObject *right)
1785
108
{
1786
108
    assert(left != NULL && right != NULL);
1787
108
    assert(op >= 0 && op <= NB_OPARG_LAST);
1788
1789
108
    PyObject *result = NULL;
1790
108
    switch (op) {
1791
12
        case NB_ADD:
1792
12
            result = PyNumber_Add(left, right);
1793
12
            break;
1794
4
        case NB_SUBTRACT:
1795
4
            result = PyNumber_Subtract(left, right);
1796
4
            break;
1797
23
        case NB_MULTIPLY:
1798
23
            result = const_folding_safe_multiply(left, right);
1799
23
            break;
1800
1
        case NB_TRUE_DIVIDE:
1801
1
            result = PyNumber_TrueDivide(left, right);
1802
1
            break;
1803
0
        case NB_FLOOR_DIVIDE:
1804
0
            result = PyNumber_FloorDivide(left, right);
1805
0
            break;
1806
0
        case NB_REMAINDER:
1807
0
            result = const_folding_safe_mod(left, right);
1808
0
            break;
1809
14
        case NB_POWER:
1810
14
            result = const_folding_safe_power(left, right);
1811
14
            break;
1812
44
        case NB_LSHIFT:
1813
44
            result = const_folding_safe_lshift(left, right);
1814
44
            break;
1815
0
        case NB_RSHIFT:
1816
0
            result = PyNumber_Rshift(left, right);
1817
0
            break;
1818
0
        case NB_OR:
1819
0
            result = PyNumber_Or(left, right);
1820
0
            break;
1821
0
        case NB_XOR:
1822
0
            result = PyNumber_Xor(left, right);
1823
0
            break;
1824
0
        case NB_AND:
1825
0
            result = PyNumber_And(left, right);
1826
0
            break;
1827
10
        case NB_SUBSCR:
1828
10
            result = PyObject_GetItem(left, right);
1829
10
            break;
1830
0
        case NB_MATRIX_MULTIPLY:
1831
            // No builtin constants implement matrix multiplication
1832
0
            break;
1833
0
        default:
1834
0
            Py_UNREACHABLE();
1835
108
    }
1836
108
    return result;
1837
108
}
1838
1839
static int
1840
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1841
9.53k
{
1842
9.63k
    #define BINOP_OPERAND_COUNT 2
1843
9.53k
    assert(PyDict_CheckExact(const_cache));
1844
9.53k
    assert(PyList_CheckExact(consts));
1845
1846
9.53k
    cfg_instr *binop = &bb->b_instr[i];
1847
9.53k
    assert(binop->i_opcode == BINARY_OP);
1848
1849
9.53k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1850
9.53k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1851
        /* not a const sequence */
1852
9.42k
        return SUCCESS;
1853
9.42k
    }
1854
1855
108
    cfg_instr *lhs_instr = operands_instrs[0];
1856
108
    assert(loads_const(lhs_instr->i_opcode));
1857
108
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1858
108
    if (lhs == NULL) {
1859
0
        return ERROR;
1860
0
    }
1861
1862
108
    cfg_instr *rhs_instr = operands_instrs[1];
1863
108
    assert(loads_const(rhs_instr->i_opcode));
1864
108
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1865
108
    if (rhs == NULL) {
1866
0
        Py_DECREF(lhs);
1867
0
        return ERROR;
1868
0
    }
1869
1870
108
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1871
108
    Py_DECREF(lhs);
1872
108
    Py_DECREF(rhs);
1873
108
    if (newconst == NULL) {
1874
6
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1875
0
            return ERROR;
1876
0
        }
1877
6
        PyErr_Clear();
1878
6
        return SUCCESS;
1879
6
    }
1880
1881
102
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882
102
    return instr_make_load_const(binop, newconst, consts, const_cache);
1883
108
}
1884
1885
static PyObject *
1886
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1887
675
{
1888
675
    assert(operand != NULL);
1889
675
    assert(
1890
675
        opcode == UNARY_NEGATIVE ||
1891
675
        opcode == UNARY_INVERT ||
1892
675
        opcode == UNARY_NOT ||
1893
675
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1894
675
    );
1895
675
    PyObject *result;
1896
675
    switch (opcode) {
1897
674
        case UNARY_NEGATIVE:
1898
674
            result = PyNumber_Negative(operand);
1899
674
            break;
1900
1
        case UNARY_INVERT:
1901
            // XXX: This should be removed once the ~bool depreciation expires.
1902
1
            if (PyBool_Check(operand)) {
1903
0
                return NULL;
1904
0
            }
1905
1
            result = PyNumber_Invert(operand);
1906
1
            break;
1907
0
        case UNARY_NOT: {
1908
0
            int r = PyObject_IsTrue(operand);
1909
0
            if (r < 0) {
1910
0
                return NULL;
1911
0
            }
1912
0
            result = PyBool_FromLong(!r);
1913
0
            break;
1914
0
        }
1915
0
        case CALL_INTRINSIC_1:
1916
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1917
0
                Py_UNREACHABLE();
1918
0
            }
1919
0
            result = PyNumber_Positive(operand);
1920
0
            break;
1921
0
        default:
1922
0
            Py_UNREACHABLE();
1923
675
    }
1924
675
    return result;
1925
675
}
1926
1927
static int
1928
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1929
860
{
1930
1.53k
    #define UNARYOP_OPERAND_COUNT 1
1931
860
    assert(PyDict_CheckExact(const_cache));
1932
860
    assert(PyList_CheckExact(consts));
1933
860
    cfg_instr *unaryop = &bb->b_instr[i];
1934
1935
860
    cfg_instr *operand_instr;
1936
860
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1937
        /* not a const */
1938
185
        return SUCCESS;
1939
185
    }
1940
1941
860
    assert(loads_const(operand_instr->i_opcode));
1942
675
    PyObject *operand = get_const_value(
1943
675
        operand_instr->i_opcode,
1944
675
        operand_instr->i_oparg,
1945
675
        consts
1946
675
    );
1947
675
    if (operand == NULL) {
1948
0
        return ERROR;
1949
0
    }
1950
1951
675
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1952
675
    Py_DECREF(operand);
1953
675
    if (newconst == NULL) {
1954
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1955
0
            return ERROR;
1956
0
        }
1957
0
        PyErr_Clear();
1958
0
        return SUCCESS;
1959
0
    }
1960
1961
675
    if (unaryop->i_opcode == UNARY_NOT) {
1962
0
        assert(PyBool_Check(newconst));
1963
0
    }
1964
675
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965
675
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1966
675
}
1967
1968
2.79k
#define VISITED (-1)
1969
1970
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1971
// same effect.
1972
static int
1973
swaptimize(basicblock *block, int *ix)
1974
2.09k
{
1975
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1976
    // for this function. Make sure to blow away cached *.pyc files first!
1977
2.09k
    assert(*ix < block->b_iused);
1978
2.09k
    cfg_instr *instructions = &block->b_instr[*ix];
1979
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1980
    // maximum depth of the stack manipulations:
1981
2.09k
    assert(instructions[0].i_opcode == SWAP);
1982
2.09k
    int depth = instructions[0].i_oparg;
1983
2.09k
    int len = 0;
1984
2.09k
    int more = false;
1985
2.09k
    int limit = block->b_iused - *ix;
1986
2.42k
    while (++len < limit) {
1987
2.42k
        int opcode = instructions[len].i_opcode;
1988
2.42k
        if (opcode == SWAP) {
1989
215
            depth = Py_MAX(depth, instructions[len].i_oparg);
1990
215
            more = true;
1991
215
        }
1992
2.20k
        else if (opcode != NOP) {
1993
2.09k
            break;
1994
2.09k
        }
1995
2.42k
    }
1996
    // It's already optimal if there's only one SWAP:
1997
2.09k
    if (!more) {
1998
1.88k
        return SUCCESS;
1999
1.88k
    }
2000
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2001
215
    int *stack = PyMem_Malloc(depth * sizeof(int));
2002
215
    if (stack == NULL) {
2003
0
        PyErr_NoMemory();
2004
0
        return ERROR;
2005
0
    }
2006
860
    for (int i = 0; i < depth; i++) {
2007
645
        stack[i] = i;
2008
645
    }
2009
    // Simulate the combined effect of these instructions by "running" them on
2010
    // our "stack":
2011
645
    for (int i = 0; i < len; i++) {
2012
430
        if (instructions[i].i_opcode == SWAP) {
2013
430
            int oparg = instructions[i].i_oparg;
2014
430
            int top = stack[0];
2015
            // SWAPs are 1-indexed:
2016
430
            stack[0] = stack[oparg - 1];
2017
430
            stack[oparg - 1] = top;
2018
430
        }
2019
430
    }
2020
    // Now we can begin! Our approach here is based on a solution to a closely
2021
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2022
    // think of this algorithm as determining the steps needed to efficiently
2023
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2024
    // though, we can efficiently *shuffle* it! For this reason, we will be
2025
    // replacing instructions starting from the *end* of the run. Since the
2026
    // solution is optimal, we don't need to worry about running out of space:
2027
215
    int current = len - 1;
2028
860
    for (int i = 0; i < depth; i++) {
2029
        // Skip items that have already been visited, or just happen to be in
2030
        // the correct location:
2031
645
        if (stack[i] == VISITED || stack[i] == i) {
2032
430
            continue;
2033
430
        }
2034
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2035
        // with other items; traversing the cycle and swapping each item with
2036
        // the next will put them all in the correct place. The weird
2037
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2038
        // can only swap from that position:
2039
215
        int j = i;
2040
860
        while (true) {
2041
            // Skip the actual swap if our item is zero, since swapping the top
2042
            // item with itself is pointless:
2043
860
            if (j) {
2044
430
                assert(0 <= current);
2045
                // SWAPs are 1-indexed:
2046
430
                instructions[current].i_opcode = SWAP;
2047
430
                instructions[current--].i_oparg = j + 1;
2048
430
            }
2049
860
            if (stack[j] == VISITED) {
2050
                // Completed the cycle:
2051
215
                assert(j == i);
2052
215
                break;
2053
215
            }
2054
645
            int next_j = stack[j];
2055
645
            stack[j] = VISITED;
2056
645
            j = next_j;
2057
645
        }
2058
215
    }
2059
    // NOP out any unused instructions:
2060
215
    while (0 <= current) {
2061
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2062
0
    }
2063
215
    PyMem_Free(stack);
2064
215
    *ix += len - 1;
2065
215
    return SUCCESS;
2066
215
}
2067
2068
2069
// This list is pretty small, since it's only okay to reorder opcodes that:
2070
// - can't affect control flow (like jumping or raising exceptions)
2071
// - can't invoke arbitrary code (besides finalizers)
2072
// - only touch the TOS (and pop it when finished)
2073
#define SWAPPABLE(opcode) \
2074
3.47k
    ((opcode) == STORE_FAST || \
2075
3.47k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2076
3.47k
     (opcode) == POP_TOP)
2077
2078
#define STORES_TO(instr) \
2079
476
    (((instr).i_opcode == STORE_FAST || \
2080
476
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2081
476
     ? (instr).i_oparg : -1)
2082
2083
static int
2084
next_swappable_instruction(basicblock *block, int i, int lineno)
2085
3.41k
{
2086
3.52k
    while (++i < block->b_iused) {
2087
3.49k
        cfg_instr *instruction = &block->b_instr[i];
2088
3.49k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2089
            // Optimizing across this instruction could cause user-visible
2090
            // changes in the names bound between line tracing events!
2091
16
            return -1;
2092
16
        }
2093
3.48k
        if (instruction->i_opcode == NOP) {
2094
108
            continue;
2095
108
        }
2096
3.37k
        if (SWAPPABLE(instruction->i_opcode)) {
2097
1.53k
            return i;
2098
1.53k
        }
2099
1.83k
        return -1;
2100
3.37k
    }
2101
25
    return -1;
2102
3.41k
}
2103
2104
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2105
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2106
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2107
static void
2108
apply_static_swaps(basicblock *block, int i)
2109
2.09k
{
2110
    // SWAPs are to our left, and potential swaperands are to our right:
2111
2.42k
    for (; 0 <= i; i--) {
2112
2.30k
        assert(i < block->b_iused);
2113
2.30k
        cfg_instr *swap = &block->b_instr[i];
2114
2.30k
        if (swap->i_opcode != SWAP) {
2115
210
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2116
                // Nope, but we know how to handle these. Keep looking:
2117
105
                continue;
2118
105
            }
2119
            // We can't reason about what this instruction does. Bail:
2120
105
            return;
2121
210
        }
2122
2.09k
        int j = next_swappable_instruction(block, i, -1);
2123
2.09k
        if (j < 0) {
2124
1.07k
            return;
2125
1.07k
        }
2126
1.02k
        int k = j;
2127
1.02k
        int lineno = block->b_instr[j].i_loc.lineno;
2128
1.53k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2129
1.31k
            k = next_swappable_instruction(block, k, lineno);
2130
1.31k
            if (k < 0) {
2131
808
                return;
2132
808
            }
2133
1.31k
        }
2134
        // The reordering is not safe if the two instructions to be swapped
2135
        // store to the same location, or if any intervening instruction stores
2136
        // to the same location as either of them.
2137
218
        int store_j = STORES_TO(block->b_instr[j]);
2138
218
        int store_k = STORES_TO(block->b_instr[k]);
2139
218
        if (store_j >= 0 || store_k >= 0) {
2140
218
            if (store_j == store_k) {
2141
0
                return;
2142
0
            }
2143
258
            for (int idx = j + 1; idx < k; idx++) {
2144
40
                int store_idx = STORES_TO(block->b_instr[idx]);
2145
40
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2146
0
                    return;
2147
0
                }
2148
40
            }
2149
218
        }
2150
2151
        // Success!
2152
218
        INSTR_SET_OP0(swap, NOP);
2153
218
        cfg_instr temp = block->b_instr[j];
2154
218
        block->b_instr[j] = block->b_instr[k];
2155
218
        block->b_instr[k] = temp;
2156
218
    }
2157
2.09k
}
2158
2159
static int
2160
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2161
59.6k
{
2162
59.6k
    assert(PyDict_CheckExact(const_cache));
2163
59.6k
    assert(PyList_CheckExact(consts));
2164
59.6k
    int opcode = 0;
2165
59.6k
    int oparg = 0;
2166
524k
    for (int i = 0; i < bb->b_iused; i++) {
2167
464k
        cfg_instr *inst = &bb->b_instr[i];
2168
464k
        if (inst->i_opcode == LOAD_CONST) {
2169
79.3k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2170
79.3k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2171
79.3k
            Py_DECREF(constant);
2172
79.3k
            if (res < 0) {
2173
0
                return ERROR;
2174
0
            }
2175
79.3k
        }
2176
464k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2177
57.9k
                                      inst->i_opcode == COPY &&
2178
43
                                      inst->i_oparg == 1);
2179
464k
        if (! is_copy_of_load_const) {
2180
464k
            opcode = inst->i_opcode;
2181
464k
            oparg = inst->i_oparg;
2182
464k
        }
2183
464k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2184
464k
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2185
385k
            continue;
2186
385k
        }
2187
79.4k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2188
79.4k
        switch(nextop) {
2189
176
            case POP_JUMP_IF_FALSE:
2190
179
            case POP_JUMP_IF_TRUE:
2191
179
            case JUMP_IF_FALSE:
2192
179
            case JUMP_IF_TRUE:
2193
179
            {
2194
                /* Remove LOAD_CONST const; conditional jump */
2195
179
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2196
179
                if (cnt == NULL) {
2197
0
                    return ERROR;
2198
0
                }
2199
179
                int is_true = PyObject_IsTrue(cnt);
2200
179
                Py_DECREF(cnt);
2201
179
                if (is_true == -1) {
2202
0
                    return ERROR;
2203
0
                }
2204
179
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2205
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2206
179
                    INSTR_SET_OP0(inst, NOP);
2207
179
                }
2208
179
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2209
179
                if (is_true == jump_if_true) {
2210
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2211
1
                }
2212
178
                else {
2213
178
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2214
178
                }
2215
179
                break;
2216
179
            }
2217
1.84k
            case IS_OP:
2218
1.84k
            {
2219
                // Fold to POP_JUMP_IF_NONE:
2220
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2223
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2224
                // Fold to POP_JUMP_IF_NOT_NONE:
2225
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2226
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2227
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2228
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2229
1.84k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2230
1.84k
                if (cnt == NULL) {
2231
0
                    return ERROR;
2232
0
                }
2233
1.84k
                if (!Py_IsNone(cnt)) {
2234
28
                    Py_DECREF(cnt);
2235
28
                    break;
2236
28
                }
2237
1.81k
                if (bb->b_iused <= i + 2) {
2238
11
                    break;
2239
11
                }
2240
1.80k
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2241
1.80k
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2242
                // Get rid of TO_BOOL regardless:
2243
1.80k
                if (jump_instr->i_opcode == TO_BOOL) {
2244
1.76k
                    INSTR_SET_OP0(jump_instr, NOP);
2245
1.76k
                    if (bb->b_iused <= i + 3) {
2246
0
                        break;
2247
0
                    }
2248
1.76k
                    jump_instr = &bb->b_instr[i + 3];
2249
1.76k
                }
2250
1.80k
                bool invert = is_instr->i_oparg;
2251
1.80k
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2252
1.69k
                    invert = !invert;
2253
1.69k
                }
2254
110
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2255
38
                    break;
2256
38
                }
2257
1.76k
                INSTR_SET_OP0(inst, NOP);
2258
1.76k
                INSTR_SET_OP0(is_instr, NOP);
2259
1.76k
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2260
1.76k
                                              : POP_JUMP_IF_NONE;
2261
1.76k
                break;
2262
1.80k
            }
2263
179
            case TO_BOOL:
2264
179
            {
2265
179
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2266
179
                if (cnt == NULL) {
2267
0
                    return ERROR;
2268
0
                }
2269
179
                int is_true = PyObject_IsTrue(cnt);
2270
179
                Py_DECREF(cnt);
2271
179
                if (is_true == -1) {
2272
0
                    return ERROR;
2273
0
                }
2274
179
                cnt = PyBool_FromLong(is_true);
2275
179
                int index = add_const(cnt, consts, const_cache);
2276
179
                if (index < 0) {
2277
0
                    return ERROR;
2278
0
                }
2279
179
                INSTR_SET_OP0(inst, NOP);
2280
179
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2281
179
                break;
2282
179
            }
2283
79.4k
        }
2284
79.4k
    }
2285
59.6k
    return SUCCESS;
2286
59.6k
}
2287
2288
static int
2289
8.29k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2290
67.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291
59.6k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2292
59.6k
    }
2293
8.29k
    return SUCCESS;
2294
8.29k
}
2295
2296
static int
2297
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2298
59.6k
{
2299
59.6k
    assert(PyDict_CheckExact(const_cache));
2300
59.6k
    assert(PyList_CheckExact(consts));
2301
59.6k
    cfg_instr nop;
2302
59.6k
    INSTR_SET_OP0(&nop, NOP);
2303
526k
    for (int i = 0; i < bb->b_iused; i++) {
2304
466k
        cfg_instr *inst = &bb->b_instr[i];
2305
466k
        cfg_instr *target;
2306
466k
        int opcode = inst->i_opcode;
2307
466k
        int oparg = inst->i_oparg;
2308
466k
        if (HAS_TARGET(opcode)) {
2309
30.0k
            assert(inst->i_target->b_iused > 0);
2310
30.0k
            target = &inst->i_target->b_instr[0];
2311
30.0k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2312
30.0k
        }
2313
436k
        else {
2314
436k
            target = &nop;
2315
436k
        }
2316
466k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2317
466k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2318
466k
        switch (opcode) {
2319
            /* Try to fold tuples of constants.
2320
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2321
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2322
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2323
5.10k
            case BUILD_TUPLE:
2324
5.10k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2325
116
                    switch(oparg) {
2326
0
                        case 1:
2327
0
                            INSTR_SET_OP0(inst, NOP);
2328
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2329
0
                            continue;
2330
100
                        case 2:
2331
112
                        case 3:
2332
112
                            INSTR_SET_OP0(inst, NOP);
2333
112
                            bb->b_instr[i+1].i_opcode = SWAP;
2334
112
                            continue;
2335
116
                    }
2336
116
                }
2337
4.99k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2338
4.99k
                break;
2339
1.48k
            case BUILD_LIST:
2340
1.62k
            case BUILD_SET:
2341
1.62k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2342
1.62k
                break;
2343
957
            case POP_JUMP_IF_NOT_NONE:
2344
1.84k
            case POP_JUMP_IF_NONE:
2345
1.84k
                switch (target->i_opcode) {
2346
83
                    case JUMP:
2347
83
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2348
1.84k
                }
2349
1.84k
                break;
2350
11.8k
            case POP_JUMP_IF_FALSE:
2351
11.8k
                switch (target->i_opcode) {
2352
728
                    case JUMP:
2353
728
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2354
11.8k
                }
2355
11.8k
                break;
2356
11.8k
            case POP_JUMP_IF_TRUE:
2357
2.30k
                switch (target->i_opcode) {
2358
70
                    case JUMP:
2359
70
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2360
2.30k
                }
2361
2.30k
                break;
2362
2.30k
            case JUMP_IF_FALSE:
2363
382
                switch (target->i_opcode) {
2364
0
                    case JUMP:
2365
0
                    case JUMP_IF_FALSE:
2366
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2367
0
                        continue;
2368
3
                    case JUMP_IF_TRUE:
2369
                        // No need to check for loops here, a block's b_next
2370
                        // cannot point to itself.
2371
3
                        assert(inst->i_target != inst->i_target->b_next);
2372
3
                        inst->i_target = inst->i_target->b_next;
2373
3
                        i--;
2374
3
                        continue;
2375
382
                }
2376
379
                break;
2377
379
            case JUMP_IF_TRUE:
2378
338
                switch (target->i_opcode) {
2379
0
                    case JUMP:
2380
0
                    case JUMP_IF_TRUE:
2381
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2382
0
                        continue;
2383
8
                    case JUMP_IF_FALSE:
2384
                        // No need to check for loops here, a block's b_next
2385
                        // cannot point to itself.
2386
8
                        assert(inst->i_target != inst->i_target->b_next);
2387
8
                        inst->i_target = inst->i_target->b_next;
2388
8
                        i--;
2389
8
                        continue;
2390
338
                }
2391
330
                break;
2392
4.57k
            case JUMP:
2393
7.71k
            case JUMP_NO_INTERRUPT:
2394
7.71k
                switch (target->i_opcode) {
2395
0
                    case JUMP:
2396
0
                        i -= jump_thread(bb, inst, target, JUMP);
2397
0
                        continue;
2398
0
                    case JUMP_NO_INTERRUPT:
2399
0
                        i -= jump_thread(bb, inst, target, opcode);
2400
0
                        continue;
2401
7.71k
                }
2402
7.71k
                break;
2403
7.71k
            case FOR_ITER:
2404
1.94k
                if (target->i_opcode == JUMP) {
2405
                    /* This will not work now because the jump (at target) could
2406
                     * be forward or backward and FOR_ITER only jumps forward. We
2407
                     * can re-enable this if ever we implement a backward version
2408
                     * of FOR_ITER.
2409
                     */
2410
                    /*
2411
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2412
                    */
2413
0
                }
2414
1.94k
                break;
2415
17.0k
            case STORE_FAST:
2416
17.0k
                if (opcode == nextop &&
2417
2.03k
                    oparg == bb->b_instr[i+1].i_oparg &&
2418
31
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2419
31
                    bb->b_instr[i].i_opcode = POP_TOP;
2420
31
                    bb->b_instr[i].i_oparg = 0;
2421
31
                }
2422
17.0k
                break;
2423
2.31k
            case SWAP:
2424
2.31k
                if (oparg == 1) {
2425
0
                    INSTR_SET_OP0(inst, NOP);
2426
0
                }
2427
2.31k
                break;
2428
22.0k
            case LOAD_GLOBAL:
2429
22.0k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2430
10.6k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2431
10.6k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
10.6k
                }
2433
22.0k
                break;
2434
7.75k
            case COMPARE_OP:
2435
7.75k
                if (nextop == TO_BOOL) {
2436
3.54k
                    INSTR_SET_OP0(inst, NOP);
2437
3.54k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2438
3.54k
                    continue;
2439
3.54k
                }
2440
4.20k
                break;
2441
4.20k
            case CONTAINS_OP:
2442
5.22k
            case IS_OP:
2443
5.22k
                if (nextop == TO_BOOL) {
2444
2.45k
                    INSTR_SET_OP0(inst, NOP);
2445
2.45k
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2446
2.45k
                    continue;
2447
2.45k
                }
2448
2.77k
                if (nextop == UNARY_NOT) {
2449
0
                    INSTR_SET_OP0(inst, NOP);
2450
0
                    int inverted = oparg ^ 1;
2451
0
                    assert(inverted == 0 || inverted == 1);
2452
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2453
0
                    continue;
2454
0
                }
2455
2.77k
                break;
2456
6.13k
            case TO_BOOL:
2457
6.13k
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    continue;
2460
0
                }
2461
6.13k
                break;
2462
6.13k
            case UNARY_NOT:
2463
71
                if (nextop == TO_BOOL) {
2464
0
                    INSTR_SET_OP0(inst, NOP);
2465
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2466
0
                    continue;
2467
0
                }
2468
71
                if (nextop == UNARY_NOT) {
2469
0
                    INSTR_SET_OP0(inst, NOP);
2470
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2471
0
                    continue;
2472
0
                }
2473
71
                _Py_FALLTHROUGH;
2474
123
            case UNARY_INVERT:
2475
858
            case UNARY_NEGATIVE:
2476
858
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2477
858
                break;
2478
609
            case CALL_INTRINSIC_1:
2479
609
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2480
144
                    if (nextop == GET_ITER) {
2481
1
                        INSTR_SET_OP0(inst, NOP);
2482
1
                    }
2483
143
                    else {
2484
143
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2485
143
                    }
2486
144
                }
2487
465
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488
2
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2489
2
                }
2490
609
                break;
2491
9.53k
            case BINARY_OP:
2492
9.53k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2493
9.53k
                break;
2494
466k
        }
2495
466k
    }
2496
2497
525k
    for (int i = 0; i < bb->b_iused; i++) {
2498
465k
        cfg_instr *inst = &bb->b_instr[i];
2499
465k
        if (inst->i_opcode == SWAP) {
2500
2.09k
            if (swaptimize(bb, &i) < 0) {
2501
0
                goto error;
2502
0
            }
2503
2.09k
            apply_static_swaps(bb, i);
2504
2.09k
        }
2505
465k
    }
2506
59.6k
    return SUCCESS;
2507
0
error:
2508
0
    return ERROR;
2509
59.6k
}
2510
2511
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2512
2513
static int
2514
remove_redundant_nops_and_jumps(cfg_builder *g)
2515
17.9k
{
2516
17.9k
    int removed_nops, removed_jumps;
2517
20.5k
    do {
2518
        /* Convergence is guaranteed because the number of
2519
         * redundant jumps and nops only decreases.
2520
         */
2521
20.5k
        removed_nops = remove_redundant_nops(g);
2522
20.5k
        RETURN_IF_ERROR(removed_nops);
2523
20.5k
        removed_jumps = remove_redundant_jumps(g);
2524
20.5k
        RETURN_IF_ERROR(removed_jumps);
2525
20.5k
    } while(removed_nops + removed_jumps > 0);
2526
17.9k
    return SUCCESS;
2527
17.9k
}
2528
2529
/* Perform optimizations on a control flow graph.
2530
   The consts object should still be in list form to allow new constants
2531
   to be appended.
2532
2533
   Code trasnformations that reduce code size initially fill the gaps with
2534
   NOPs.  Later those NOPs are removed.
2535
*/
2536
static int
2537
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2538
8.29k
{
2539
8.29k
    assert(PyDict_CheckExact(const_cache));
2540
8.29k
    RETURN_IF_ERROR(check_cfg(g));
2541
8.29k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2542
8.29k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2543
8.29k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544
8.29k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2545
67.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546
59.6k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2547
59.6k
    }
2548
8.29k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2549
8.29k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2550
8.29k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2551
8.29k
    assert(no_redundant_jumps(g));
2552
8.29k
    return SUCCESS;
2553
8.29k
}
2554
2555
static void
2556
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2557
16.1k
{
2558
16.1k
    int32_t line1 = inst1->i_loc.lineno;
2559
16.1k
    int32_t line2 = inst2->i_loc.lineno;
2560
    /* Skip if instructions are on different lines */
2561
16.1k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2562
5.88k
        return;
2563
5.88k
    }
2564
10.2k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2565
1.04k
        return;
2566
1.04k
    }
2567
9.23k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2568
9.23k
    INSTR_SET_OP0(inst2, NOP);
2569
9.23k
}
2570
2571
static int
2572
insert_superinstructions(cfg_builder *g)
2573
8.29k
{
2574
67.9k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2575
2576
485k
        for (int i = 0; i < b->b_iused; i++) {
2577
426k
            cfg_instr *inst = &b->b_instr[i];
2578
426k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2579
426k
            switch(inst->i_opcode) {
2580
57.5k
                case LOAD_FAST:
2581
57.5k
                    if (nextop == LOAD_FAST) {
2582
8.57k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2583
8.57k
                    }
2584
57.5k
                    break;
2585
15.6k
                case STORE_FAST:
2586
15.6k
                    switch (nextop) {
2587
5.92k
                        case LOAD_FAST:
2588
5.92k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2589
5.92k
                            break;
2590
1.66k
                        case STORE_FAST:
2591
1.66k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2592
1.66k
                            break;
2593
15.6k
                    }
2594
15.6k
                    break;
2595
426k
            }
2596
426k
        }
2597
59.6k
    }
2598
8.29k
    int res = remove_redundant_nops(g);
2599
8.29k
    assert(no_redundant_nops(g));
2600
8.29k
    return res;
2601
8.29k
}
2602
2603
#define NOT_LOCAL -1
2604
16.4k
#define DUMMY_INSTR -1
2605
2606
typedef struct {
2607
    // Index of instruction that produced the reference or DUMMY_INSTR.
2608
    int instr;
2609
2610
    // The local to which the reference refers or NOT_LOCAL.
2611
    int local;
2612
} ref;
2613
2614
typedef struct {
2615
    ref *refs;
2616
    Py_ssize_t size;
2617
    Py_ssize_t capacity;
2618
} ref_stack;
2619
2620
static int
2621
ref_stack_push(ref_stack *stack, ref r)
2622
356k
{
2623
356k
    if (stack->size == stack->capacity) {
2624
8.29k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2625
8.29k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2626
8.29k
        if (refs == NULL) {
2627
0
            PyErr_NoMemory();
2628
0
            return -1;
2629
0
        }
2630
8.29k
        stack->refs = refs;
2631
8.29k
        stack->capacity = new_cap;
2632
8.29k
    }
2633
356k
    stack->refs[stack->size] = r;
2634
356k
    stack->size++;
2635
356k
    return 0;
2636
356k
}
2637
2638
static ref
2639
ref_stack_pop(ref_stack *stack)
2640
308k
{
2641
308k
    assert(stack->size > 0);
2642
308k
    stack->size--;
2643
308k
    ref r = stack->refs[stack->size];
2644
308k
    return r;
2645
308k
}
2646
2647
static void
2648
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2649
1.44k
{
2650
1.44k
    Py_ssize_t idx = stack->size - off;
2651
1.44k
    assert(idx >= 0 && idx < stack->size);
2652
1.44k
    ref tmp = stack->refs[idx];
2653
1.44k
    stack->refs[idx] = stack->refs[stack->size - 1];
2654
1.44k
    stack->refs[stack->size - 1] = tmp;
2655
1.44k
}
2656
2657
static ref
2658
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2659
66.1k
{
2660
66.1k
    assert(idx >= 0 && idx < stack->size);
2661
66.1k
    return stack->refs[idx];
2662
66.1k
}
2663
2664
static void
2665
ref_stack_clear(ref_stack *stack)
2666
44.3k
{
2667
44.3k
    stack->size = 0;
2668
44.3k
}
2669
2670
static void
2671
ref_stack_fini(ref_stack *stack)
2672
8.29k
{
2673
8.29k
    if (stack->refs != NULL) {
2674
8.29k
        PyMem_Free(stack->refs);
2675
8.29k
    }
2676
8.29k
    stack->refs = NULL;
2677
8.29k
    stack->capacity = 0;
2678
8.29k
    stack->size = 0;
2679
8.29k
}
2680
2681
typedef enum {
2682
    // The loaded reference is still on the stack when the local is killed
2683
    SUPPORT_KILLED  = 1,
2684
    // The loaded reference is stored into a local
2685
    STORED_AS_LOCAL = 2,
2686
    // The loaded reference is still on the stack at the end of the basic block
2687
    REF_UNCONSUMED  = 4,
2688
} LoadFastInstrFlag;
2689
2690
static void
2691
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2692
16.7k
{
2693
32.7k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2694
15.9k
        ref r = ref_stack_at(refs, i);
2695
15.9k
        if (r.local == local) {
2696
16
            assert(r.instr >= 0);
2697
16
            instr_flags[r.instr] |= SUPPORT_KILLED;
2698
16
        }
2699
15.9k
    }
2700
16.7k
}
2701
2702
static void
2703
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2704
16.4k
{
2705
16.4k
    kill_local(instr_flags, refs, local);
2706
16.4k
    if (r.instr != DUMMY_INSTR) {
2707
14.3k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2708
14.3k
    }
2709
16.4k
}
2710
2711
static void
2712
load_fast_push_block(basicblock ***sp, basicblock *target,
2713
                     Py_ssize_t start_depth)
2714
47.1k
{
2715
47.1k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2716
47.1k
    if (!target->b_visited) {
2717
36.0k
        target->b_visited = 1;
2718
36.0k
        *(*sp)++ = target;
2719
36.0k
    }
2720
47.1k
}
2721
2722
/*
2723
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2724
 * load borrowed references onto the operand stack.
2725
 *
2726
 * This is only safe when we can prove that the reference in the frame outlives
2727
 * the borrowed reference produced by the instruction. We make this tractable
2728
 * by enforcing the following lifetimes:
2729
 *
2730
 * 1. Borrowed references loaded onto the operand stack live until the end of
2731
 *    the instruction that consumes them from the stack. Any borrowed
2732
 *    references that would escape into the heap (e.g. into frame objects or
2733
 *    generators) are converted into new, strong references.
2734
 *
2735
 * 2. Locals live until they are either killed by an instruction
2736
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2737
 *    via `f_locals` is added to a tuple owned by the frame object.
2738
 *
2739
 * To simplify the problem of detecting which supporting references in the
2740
 * frame are killed by instructions that overwrite locals, we only allow
2741
 * borrowed references to be stored as a local in the frame if they were passed
2742
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2743
 * strong references.
2744
 *
2745
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2746
 * that meet the following criteria:
2747
 *
2748
 * 1. The produced reference must be consumed from the stack before the
2749
 *    supporting reference in the frame is killed.
2750
 *
2751
 * 2. The produced reference cannot be stored as a local.
2752
 *
2753
 * We use abstract interpretation to identify instructions that meet these
2754
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2755
 * stack of abstract references and note any instructions that violate the
2756
 * criteria above. Once we've processed all the instructions in a block, any
2757
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2758
 */
2759
static int
2760
optimize_load_fast(cfg_builder *g)
2761
8.29k
{
2762
8.29k
    int status;
2763
8.29k
    ref_stack refs = {0};
2764
8.29k
    int max_instrs = 0;
2765
8.29k
    basicblock *entryblock = g->g_entryblock;
2766
68.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2767
60.5k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2768
60.5k
    }
2769
8.29k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2770
8.29k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2771
8.29k
    if (instr_flags == NULL) {
2772
0
        PyErr_NoMemory();
2773
0
        return ERROR;
2774
0
    }
2775
8.29k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2776
8.29k
    if (blocks == NULL) {
2777
0
        status = ERROR;
2778
0
        goto done;
2779
0
    }
2780
8.29k
    basicblock **sp = blocks;
2781
8.29k
    *sp = entryblock;
2782
8.29k
    sp++;
2783
8.29k
    entryblock->b_startdepth = 0;
2784
8.29k
    entryblock->b_visited = 1;
2785
2786
8.29k
    #define PUSH_REF(instr, local)                \
2787
356k
        do {                                      \
2788
356k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2789
0
                status = ERROR;                   \
2790
0
                goto done;                        \
2791
0
            }                                     \
2792
356k
        } while(0)
2793
2794
52.6k
    while (sp != blocks) {
2795
44.3k
        basicblock *block = *--sp;
2796
44.3k
        assert(block->b_startdepth > -1);
2797
2798
        // Reset per-block state.
2799
44.3k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2800
2801
        // Reset the stack of refs. We don't track references on the stack
2802
        // across basic blocks, but the bytecode will expect their
2803
        // presence. Add dummy references as necessary.
2804
44.3k
        ref_stack_clear(&refs);
2805
84.4k
        for (int i = 0; i < block->b_startdepth; i++) {
2806
40.1k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2807
40.1k
        }
2808
2809
447k
        for (int i = 0; i < block->b_iused; i++) {
2810
402k
            cfg_instr *instr = &block->b_instr[i];
2811
402k
            int opcode = instr->i_opcode;
2812
402k
            int oparg = instr->i_oparg;
2813
402k
            assert(opcode != EXTENDED_ARG);
2814
402k
            switch (opcode) {
2815
                // Opcodes that load and store locals
2816
3
                case DELETE_FAST: {
2817
3
                    kill_local(instr_flags, &refs, oparg);
2818
3
                    break;
2819
0
                }
2820
2821
49.9k
                case LOAD_FAST: {
2822
49.9k
                    PUSH_REF(i, oparg);
2823
49.9k
                    break;
2824
49.9k
                }
2825
2826
49.9k
                case LOAD_FAST_AND_CLEAR: {
2827
343
                    kill_local(instr_flags, &refs, oparg);
2828
343
                    PUSH_REF(i, oparg);
2829
343
                    break;
2830
343
                }
2831
2832
7.35k
                case LOAD_FAST_LOAD_FAST: {
2833
7.35k
                    PUSH_REF(i, oparg >> 4);
2834
7.35k
                    PUSH_REF(i, oparg & 15);
2835
7.35k
                    break;
2836
7.35k
                }
2837
2838
13.4k
                case STORE_FAST: {
2839
13.4k
                    ref r = ref_stack_pop(&refs);
2840
13.4k
                    store_local(instr_flags, &refs, oparg, r);
2841
13.4k
                    break;
2842
7.35k
                }
2843
2844
257
                case STORE_FAST_LOAD_FAST: {
2845
                    // STORE_FAST
2846
257
                    ref r = ref_stack_pop(&refs);
2847
257
                    store_local(instr_flags, &refs, oparg >> 4, r);
2848
                    // LOAD_FAST
2849
257
                    PUSH_REF(i, oparg & 15);
2850
257
                    break;
2851
257
                }
2852
2853
1.39k
                case STORE_FAST_STORE_FAST: {
2854
                    // STORE_FAST
2855
1.39k
                    ref r = ref_stack_pop(&refs);
2856
1.39k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2857
                    // STORE_FAST
2858
1.39k
                    r = ref_stack_pop(&refs);
2859
1.39k
                    store_local(instr_flags, &refs, oparg & 15, r);
2860
1.39k
                    break;
2861
257
                }
2862
2863
                // Opcodes that shuffle values on the stack
2864
1.69k
                case COPY: {
2865
1.69k
                    assert(oparg > 0);
2866
1.69k
                    Py_ssize_t idx = refs.size - oparg;
2867
1.69k
                    ref r = ref_stack_at(&refs, idx);
2868
1.69k
                    PUSH_REF(r.instr, r.local);
2869
1.69k
                    break;
2870
1.69k
                }
2871
2872
1.69k
                case SWAP: {
2873
1.44k
                    assert(oparg >= 2);
2874
1.44k
                    ref_stack_swap_top(&refs, oparg);
2875
1.44k
                    break;
2876
1.69k
                }
2877
2878
                // We treat opcodes that do not consume all of their inputs on
2879
                // a case by case basis, as we have no generic way of knowing
2880
                // how many inputs should be left on the stack.
2881
2882
                // Opcodes that consume no inputs
2883
1.71k
                case FORMAT_SIMPLE:
2884
1.71k
                case GET_ANEXT:
2885
3.53k
                case GET_ITER:
2886
3.53k
                case GET_LEN:
2887
3.57k
                case GET_YIELD_FROM_ITER:
2888
4.22k
                case IMPORT_FROM:
2889
4.22k
                case MATCH_KEYS:
2890
4.22k
                case MATCH_MAPPING:
2891
4.22k
                case MATCH_SEQUENCE:
2892
4.22k
                case WITH_EXCEPT_START: {
2893
4.22k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2894
4.22k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2895
4.22k
                    int net_pushed = num_pushed - num_popped;
2896
4.22k
                    assert(net_pushed >= 0);
2897
6.69k
                    for (int j = 0; j < net_pushed; j++) {
2898
2.46k
                        PUSH_REF(i, NOT_LOCAL);
2899
2.46k
                    }
2900
4.22k
                    break;
2901
4.22k
                }
2902
2903
                // Opcodes that consume some inputs and push no new values
2904
4.22k
                case DICT_MERGE:
2905
796
                case DICT_UPDATE:
2906
1.85k
                case LIST_APPEND:
2907
2.10k
                case LIST_EXTEND:
2908
13.0k
                case MAP_ADD:
2909
13.0k
                case RERAISE:
2910
13.3k
                case SET_ADD:
2911
13.3k
                case SET_UPDATE: {
2912
13.3k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2913
13.3k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2914
13.3k
                    int net_popped = num_popped - num_pushed;
2915
13.3k
                    assert(net_popped > 0);
2916
37.5k
                    for (int i = 0; i < net_popped; i++) {
2917
24.2k
                        ref_stack_pop(&refs);
2918
24.2k
                    }
2919
13.3k
                    break;
2920
13.3k
                }
2921
2922
60
                case END_SEND:
2923
2.31k
                case SET_FUNCTION_ATTRIBUTE: {
2924
2.31k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2925
2.31k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2926
2.31k
                    ref tos = ref_stack_pop(&refs);
2927
2.31k
                    ref_stack_pop(&refs);
2928
2.31k
                    PUSH_REF(tos.instr, tos.local);
2929
2.31k
                    break;
2930
2.31k
                }
2931
2932
                // Opcodes that consume some inputs and push new values
2933
2.31k
                case CHECK_EXC_MATCH: {
2934
0
                    ref_stack_pop(&refs);
2935
0
                    PUSH_REF(i, NOT_LOCAL);
2936
0
                    break;
2937
0
                }
2938
2939
1.90k
                case FOR_ITER: {
2940
1.90k
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2941
1.90k
                    PUSH_REF(i, NOT_LOCAL);
2942
1.90k
                    break;
2943
1.90k
                }
2944
2945
25.9k
                case LOAD_ATTR:
2946
26.1k
                case LOAD_SUPER_ATTR: {
2947
26.1k
                    ref self = ref_stack_pop(&refs);
2948
26.1k
                    if (opcode == LOAD_SUPER_ATTR) {
2949
259
                        ref_stack_pop(&refs);
2950
259
                        ref_stack_pop(&refs);
2951
259
                    }
2952
26.1k
                    PUSH_REF(i, NOT_LOCAL);
2953
26.1k
                    if (oparg & 1) {
2954
                        // A method call; conservatively assume that self is pushed
2955
                        // back onto the stack
2956
10.3k
                        PUSH_REF(self.instr, self.local);
2957
10.3k
                    }
2958
26.1k
                    break;
2959
26.1k
                }
2960
2961
26.1k
                case LOAD_SPECIAL:
2962
334
                case PUSH_EXC_INFO: {
2963
334
                    ref tos = ref_stack_pop(&refs);
2964
334
                    PUSH_REF(i, NOT_LOCAL);
2965
334
                    PUSH_REF(tos.instr, tos.local);
2966
334
                    break;
2967
334
                }
2968
2969
334
                case SEND: {
2970
60
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2971
60
                    ref_stack_pop(&refs);
2972
60
                    PUSH_REF(i, NOT_LOCAL);
2973
60
                    break;
2974
60
                }
2975
2976
                // Opcodes that consume all of their inputs
2977
278k
                default: {
2978
278k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2979
278k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2980
278k
                    if (HAS_TARGET(instr->i_opcode)) {
2981
20.4k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2982
20.4k
                    }
2983
278k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2984
                        // Block push opcodes only affect the stack when jumping
2985
                        // to the target.
2986
514k
                        for (int j = 0; j < num_popped; j++) {
2987
235k
                            ref_stack_pop(&refs);
2988
235k
                        }
2989
484k
                        for (int j = 0; j < num_pushed; j++) {
2990
205k
                            PUSH_REF(i, NOT_LOCAL);
2991
205k
                        }
2992
278k
                    }
2993
278k
                    break;
2994
278k
                }
2995
402k
            }
2996
402k
        }
2997
2998
        // Push fallthrough block
2999
44.3k
        if (BB_HAS_FALLTHROUGH(block)) {
3000
24.7k
            assert(block->b_next != NULL);
3001
24.7k
            load_fast_push_block(&sp, block->b_next, refs.size);
3002
24.7k
        }
3003
3004
        // Mark instructions that produce values that are on the stack at the
3005
        // end of the basic block
3006
92.9k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3007
48.5k
            ref r = ref_stack_at(&refs, i);
3008
48.5k
            if (r.instr != -1) {
3009
20.4k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3010
20.4k
            }
3011
48.5k
        }
3012
3013
        // Optimize instructions
3014
447k
        for (int i = 0; i < block->b_iused; i++) {
3015
402k
            if (!instr_flags[i]) {
3016
370k
                cfg_instr *instr = &block->b_instr[i];
3017
370k
                switch (instr->i_opcode) {
3018
48.3k
                    case LOAD_FAST:
3019
48.3k
                        instr->i_opcode = LOAD_FAST_BORROW;
3020
48.3k
                        break;
3021
7.27k
                    case LOAD_FAST_LOAD_FAST:
3022
7.27k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3023
7.27k
                        break;
3024
314k
                    default:
3025
314k
                        break;
3026
370k
                }
3027
370k
            }
3028
402k
        }
3029
44.3k
    }
3030
3031
8.29k
    #undef PUSH_REF
3032
3033
8.29k
    status = SUCCESS;
3034
3035
8.29k
done:
3036
8.29k
    ref_stack_fini(&refs);
3037
8.29k
    PyMem_Free(instr_flags);
3038
8.29k
    PyMem_Free(blocks);
3039
8.29k
    return status;
3040
8.29k
}
3041
3042
// helper functions for add_checks_for_loads_of_unknown_variables
3043
static inline void
3044
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3045
203k
{
3046
    // Push b if the unsafe mask is giving us any new information.
3047
    // To avoid overflowing the stack, only allow each block once.
3048
    // Use b->b_visited=1 to mean that b is currently on the stack.
3049
203k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3050
203k
    if (b->b_unsafe_locals_mask != both) {
3051
34.2k
        b->b_unsafe_locals_mask = both;
3052
        // More work left to do.
3053
34.2k
        if (!b->b_visited) {
3054
            // not on the stack, so push it.
3055
34.0k
            *(*sp)++ = b;
3056
34.0k
            b->b_visited = 1;
3057
34.0k
        }
3058
34.2k
    }
3059
203k
}
3060
3061
static void
3062
scan_block_for_locals(basicblock *b, basicblock ***sp)
3063
89.6k
{
3064
    // bit i is set if local i is potentially uninitialized
3065
89.6k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3066
629k
    for (int i = 0; i < b->b_iused; i++) {
3067
539k
        cfg_instr *instr = &b->b_instr[i];
3068
539k
        assert(instr->i_opcode != EXTENDED_ARG);
3069
539k
        if (instr->i_except != NULL) {
3070
104k
            maybe_push(instr->i_except, unsafe_mask, sp);
3071
104k
        }
3072
539k
        if (instr->i_oparg >= 64) {
3073
38.5k
            continue;
3074
38.5k
        }
3075
539k
        assert(instr->i_oparg >= 0);
3076
501k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3077
501k
        switch (instr->i_opcode) {
3078
517
            case DELETE_FAST:
3079
1.21k
            case LOAD_FAST_AND_CLEAR:
3080
2.60k
            case STORE_FAST_MAYBE_NULL:
3081
2.60k
                unsafe_mask |= bit;
3082
2.60k
                break;
3083
33.0k
            case STORE_FAST:
3084
33.0k
                unsafe_mask &= ~bit;
3085
33.0k
                break;
3086
147
            case LOAD_FAST_CHECK:
3087
                // If this doesn't raise, then the local is defined.
3088
147
                unsafe_mask &= ~bit;
3089
147
                break;
3090
112k
            case LOAD_FAST:
3091
112k
                if (unsafe_mask & bit) {
3092
147
                    instr->i_opcode = LOAD_FAST_CHECK;
3093
147
                }
3094
112k
                unsafe_mask &= ~bit;
3095
112k
                break;
3096
501k
        }
3097
501k
    }
3098
89.6k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3099
47.9k
        maybe_push(b->b_next, unsafe_mask, sp);
3100
47.9k
    }
3101
89.6k
    cfg_instr *last = basicblock_last_instr(b);
3102
89.6k
    if (last && is_jump(last)) {
3103
43.9k
        assert(last->i_target != NULL);
3104
43.9k
        maybe_push(last->i_target, unsafe_mask, sp);
3105
43.9k
    }
3106
89.6k
}
3107
3108
static int
3109
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3110
0
{
3111
0
    assert(nlocals > 64);
3112
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3113
0
    if (states == NULL) {
3114
0
        PyErr_NoMemory();
3115
0
        return ERROR;
3116
0
    }
3117
0
    Py_ssize_t blocknum = 0;
3118
    // state[i - 64] == blocknum if local i is guaranteed to
3119
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3120
    // STORE_FAST within that basicblock (not followed by
3121
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3122
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3123
0
        blocknum++;
3124
0
        for (int i = 0; i < b->b_iused; i++) {
3125
0
            cfg_instr *instr = &b->b_instr[i];
3126
0
            assert(instr->i_opcode != EXTENDED_ARG);
3127
0
            int arg = instr->i_oparg;
3128
0
            if (arg < 64) {
3129
0
                continue;
3130
0
            }
3131
0
            assert(arg >= 0);
3132
0
            switch (instr->i_opcode) {
3133
0
                case DELETE_FAST:
3134
0
                case LOAD_FAST_AND_CLEAR:
3135
0
                case STORE_FAST_MAYBE_NULL:
3136
0
                    states[arg - 64] = blocknum - 1;
3137
0
                    break;
3138
0
                case STORE_FAST:
3139
0
                    states[arg - 64] = blocknum;
3140
0
                    break;
3141
0
                case LOAD_FAST:
3142
0
                    if (states[arg - 64] != blocknum) {
3143
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3144
0
                    }
3145
0
                    states[arg - 64] = blocknum;
3146
0
                    break;
3147
0
                    Py_UNREACHABLE();
3148
0
            }
3149
0
        }
3150
0
    }
3151
0
    PyMem_Free(states);
3152
0
    return SUCCESS;
3153
0
}
3154
3155
static int
3156
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3157
8.29k
{
3158
8.29k
    assert(PyList_CheckExact(consts));
3159
8.29k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3160
8.29k
    if (nconsts == 0) {
3161
0
        return SUCCESS;  /* nothing to do */
3162
0
    }
3163
3164
8.29k
    Py_ssize_t *index_map = NULL;
3165
8.29k
    Py_ssize_t *reverse_index_map = NULL;
3166
8.29k
    int err = ERROR;
3167
3168
8.29k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3169
8.29k
    if (index_map == NULL) {
3170
0
        goto end;
3171
0
    }
3172
61.3k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3173
53.0k
        index_map[i] = -1;
3174
53.0k
    }
3175
    // The first constant may be docstring; keep it always.
3176
8.29k
    index_map[0] = 0;
3177
3178
    /* mark used consts */
3179
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3180
485k
        for (int i = 0; i < b->b_iused; i++) {
3181
426k
            int opcode = b->b_instr[i].i_opcode;
3182
426k
            if (OPCODE_HAS_CONST(opcode)) {
3183
54.1k
                int index = b->b_instr[i].i_oparg;
3184
54.1k
                index_map[index] = index;
3185
54.1k
            }
3186
426k
        }
3187
59.6k
    }
3188
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3189
    /* condense consts */
3190
8.29k
    Py_ssize_t n_used_consts = 0;
3191
69.6k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3192
61.3k
        if (index_map[i] != -1) {
3193
46.5k
            assert(index_map[i] == i);
3194
46.5k
            index_map[n_used_consts++] = index_map[i];
3195
46.5k
        }
3196
61.3k
    }
3197
8.29k
    if (n_used_consts == nconsts) {
3198
        /* nothing to do */
3199
3.49k
        err = SUCCESS;
3200
3.49k
        goto end;
3201
3.49k
    }
3202
3203
    /* move all used consts to the beginning of the consts list */
3204
8.29k
    assert(n_used_consts < nconsts);
3205
41.3k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3206
36.5k
        Py_ssize_t old_index = index_map[i];
3207
36.5k
        assert(i <= old_index && old_index < nconsts);
3208
36.5k
        if (i != old_index) {
3209
26.1k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3210
26.1k
            assert(value != NULL);
3211
26.1k
            PyList_SetItem(consts, i, Py_NewRef(value));
3212
26.1k
        }
3213
36.5k
    }
3214
3215
    /* truncate the consts list at its new size */
3216
4.80k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3217
0
        goto end;
3218
0
    }
3219
    /* adjust const indices in the bytecode */
3220
4.80k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3221
4.80k
    if (reverse_index_map == NULL) {
3222
0
        goto end;
3223
0
    }
3224
56.1k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3225
51.3k
        reverse_index_map[i] = -1;
3226
51.3k
    }
3227
41.3k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3228
36.5k
        assert(index_map[i] != -1);
3229
36.5k
        assert(reverse_index_map[index_map[i]] == -1);
3230
36.5k
        reverse_index_map[index_map[i]] = i;
3231
36.5k
    }
3232
3233
47.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
370k
        for (int i = 0; i < b->b_iused; i++) {
3235
327k
            int opcode = b->b_instr[i].i_opcode;
3236
327k
            if (OPCODE_HAS_CONST(opcode)) {
3237
43.6k
                int index = b->b_instr[i].i_oparg;
3238
43.6k
                assert(reverse_index_map[index] >= 0);
3239
43.6k
                assert(reverse_index_map[index] < n_used_consts);
3240
43.6k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3241
43.6k
            }
3242
327k
        }
3243
42.6k
    }
3244
3245
4.80k
    err = SUCCESS;
3246
8.29k
end:
3247
8.29k
    PyMem_Free(index_map);
3248
8.29k
    PyMem_Free(reverse_index_map);
3249
8.29k
    return err;
3250
4.80k
}
3251
3252
3253
3254
static int
3255
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3256
                                                int nlocals,
3257
                                                int nparams)
3258
8.29k
{
3259
8.29k
    if (nlocals == 0) {
3260
1.88k
        return SUCCESS;
3261
1.88k
    }
3262
6.40k
    if (nlocals > 64) {
3263
        // To avoid O(nlocals**2) compilation, locals beyond the first
3264
        // 64 are only analyzed one basicblock at a time: initialization
3265
        // info is not passed between basicblocks.
3266
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3267
0
            return ERROR;
3268
0
        }
3269
0
        nlocals = 64;
3270
0
    }
3271
6.40k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3272
6.40k
    if (stack == NULL) {
3273
0
        return ERROR;
3274
0
    }
3275
6.40k
    basicblock **sp = stack;
3276
3277
    // First origin of being uninitialized:
3278
    // The non-parameter locals in the entry block.
3279
6.40k
    uint64_t start_mask = 0;
3280
16.4k
    for (int i = nparams; i < nlocals; i++) {
3281
10.0k
        start_mask |= (uint64_t)1 << i;
3282
10.0k
    }
3283
6.40k
    maybe_push(entryblock, start_mask, &sp);
3284
3285
    // Second origin of being uninitialized:
3286
    // There could be DELETE_FAST somewhere, so
3287
    // be sure to scan each basicblock at least once.
3288
62.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3289
55.6k
        scan_block_for_locals(b, &sp);
3290
55.6k
    }
3291
    // Now propagate the uncertainty from the origins we found: Use
3292
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3293
40.4k
    while (sp > stack) {
3294
34.0k
        basicblock *b = *--sp;
3295
        // mark as no longer on stack
3296
34.0k
        b->b_visited = 0;
3297
34.0k
        scan_block_for_locals(b, &sp);
3298
34.0k
    }
3299
6.40k
    PyMem_Free(stack);
3300
6.40k
    return SUCCESS;
3301
6.40k
}
3302
3303
3304
static int
3305
7.29k
mark_warm(basicblock *entryblock) {
3306
7.29k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3307
7.29k
    if (stack == NULL) {
3308
0
        return ERROR;
3309
0
    }
3310
7.29k
    basicblock **sp = stack;
3311
3312
7.29k
    *sp++ = entryblock;
3313
7.29k
    entryblock->b_visited = 1;
3314
49.3k
    while (sp > stack) {
3315
42.1k
        basicblock *b = *(--sp);
3316
42.1k
        assert(!b->b_except_handler);
3317
42.1k
        b->b_warm = 1;
3318
42.1k
        basicblock *next = b->b_next;
3319
42.1k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3320
20.3k
            *sp++ = next;
3321
20.3k
            next->b_visited = 1;
3322
20.3k
        }
3323
371k
        for (int i=0; i < b->b_iused; i++) {
3324
329k
            cfg_instr *instr = &b->b_instr[i];
3325
329k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3326
14.4k
                *sp++ = instr->i_target;
3327
14.4k
                instr->i_target->b_visited = 1;
3328
14.4k
            }
3329
329k
        }
3330
42.1k
    }
3331
7.29k
    PyMem_Free(stack);
3332
7.29k
    return SUCCESS;
3333
7.29k
}
3334
3335
static int
3336
7.29k
mark_cold(basicblock *entryblock) {
3337
65.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3338
58.5k
        assert(!b->b_cold && !b->b_warm);
3339
58.5k
    }
3340
7.29k
    if (mark_warm(entryblock) < 0) {
3341
0
        return ERROR;
3342
0
    }
3343
3344
7.29k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3345
7.29k
    if (stack == NULL) {
3346
0
        return ERROR;
3347
0
    }
3348
3349
7.29k
    basicblock **sp = stack;
3350
65.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3351
58.5k
        if (b->b_except_handler) {
3352
3.57k
            assert(!b->b_warm);
3353
3.57k
            *sp++ = b;
3354
3.57k
            b->b_visited = 1;
3355
3.57k
        }
3356
58.5k
    }
3357
3358
14.4k
    while (sp > stack) {
3359
7.12k
        basicblock *b = *(--sp);
3360
7.12k
        b->b_cold = 1;
3361
7.12k
        basicblock *next = b->b_next;
3362
7.12k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3363
1.98k
            if (!next->b_warm && !next->b_visited) {
3364
1.84k
                *sp++ = next;
3365
1.84k
                next->b_visited = 1;
3366
1.84k
            }
3367
1.98k
        }
3368
38.0k
        for (int i = 0; i < b->b_iused; i++) {
3369
30.8k
            cfg_instr *instr = &b->b_instr[i];
3370
30.8k
            if (is_jump(instr)) {
3371
2.45k
                assert(i == b->b_iused - 1);
3372
2.45k
                basicblock *target = b->b_instr[i].i_target;
3373
2.45k
                if (!target->b_warm && !target->b_visited) {
3374
1.71k
                    *sp++ = target;
3375
1.71k
                    target->b_visited = 1;
3376
1.71k
                }
3377
2.45k
            }
3378
30.8k
        }
3379
7.12k
    }
3380
7.29k
    PyMem_Free(stack);
3381
7.29k
    return SUCCESS;
3382
7.29k
}
3383
3384
3385
static int
3386
8.29k
push_cold_blocks_to_end(cfg_builder *g) {
3387
8.29k
    basicblock *entryblock = g->g_entryblock;
3388
8.29k
    if (entryblock->b_next == NULL) {
3389
        /* single basicblock, no need to reorder */
3390
1.00k
        return SUCCESS;
3391
1.00k
    }
3392
7.29k
    RETURN_IF_ERROR(mark_cold(entryblock));
3393
3394
7.29k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3395
3396
    /* If we have a cold block with fallthrough to a warm block, add */
3397
    /* an explicit jump instead of fallthrough */
3398
65.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3399
58.6k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3400
60
            basicblock *explicit_jump = cfg_builder_new_block(g);
3401
60
            if (explicit_jump == NULL) {
3402
0
                return ERROR;
3403
0
            }
3404
60
            if (!IS_LABEL(b->b_next->b_label)) {
3405
0
                b->b_next->b_label.id = next_lbl++;
3406
0
            }
3407
60
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3408
60
                             NO_LOCATION);
3409
60
            explicit_jump->b_cold = 1;
3410
60
            explicit_jump->b_next = b->b_next;
3411
60
            explicit_jump->b_predecessors = 1;
3412
60
            b->b_next = explicit_jump;
3413
3414
            /* set target */
3415
60
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3416
60
            last->i_target = explicit_jump->b_next;
3417
60
        }
3418
58.6k
    }
3419
3420
7.29k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3421
7.29k
    basicblock *cold_blocks = NULL;
3422
7.29k
    basicblock *cold_blocks_tail = NULL;
3423
3424
7.29k
    basicblock *b = entryblock;
3425
10.0k
    while(b->b_next) {
3426
9.99k
        assert(!b->b_cold);
3427
54.1k
        while (b->b_next && !b->b_next->b_cold) {
3428
44.1k
            b = b->b_next;
3429
44.1k
        }
3430
9.99k
        if (b->b_next == NULL) {
3431
            /* no more cold blocks */
3432
7.28k
            break;
3433
7.28k
        }
3434
3435
        /* b->b_next is the beginning of a cold streak */
3436
9.99k
        assert(!b->b_cold && b->b_next->b_cold);
3437
3438
2.71k
        basicblock *b_end = b->b_next;
3439
7.18k
        while (b_end->b_next && b_end->b_next->b_cold) {
3440
4.47k
            b_end = b_end->b_next;
3441
4.47k
        }
3442
3443
        /* b_end is the end of the cold streak */
3444
2.71k
        assert(b_end && b_end->b_cold);
3445
2.71k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3446
3447
2.71k
        if (cold_blocks == NULL) {
3448
1.39k
            cold_blocks = b->b_next;
3449
1.39k
        }
3450
1.31k
        else {
3451
1.31k
            cold_blocks_tail->b_next = b->b_next;
3452
1.31k
        }
3453
2.71k
        cold_blocks_tail = b_end;
3454
2.71k
        b->b_next = b_end->b_next;
3455
2.71k
        b_end->b_next = NULL;
3456
2.71k
    }
3457
7.29k
    assert(b != NULL && b->b_next == NULL);
3458
7.29k
    b->b_next = cold_blocks;
3459
3460
7.29k
    if (cold_blocks != NULL) {
3461
1.39k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3462
1.39k
    }
3463
7.29k
    return SUCCESS;
3464
7.29k
}
3465
3466
static int
3467
convert_pseudo_conditional_jumps(cfg_builder *g)
3468
8.29k
{
3469
8.29k
    basicblock *entryblock = g->g_entryblock;
3470
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3471
475k
        for (int i = 0; i < b->b_iused; i++) {
3472
416k
            cfg_instr *instr = &b->b_instr[i];
3473
416k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3474
709
                assert(i == b->b_iused - 1);
3475
709
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3476
379
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3477
709
                location loc = instr->i_loc;
3478
709
                basicblock *except = instr->i_except;
3479
709
                cfg_instr copy = {
3480
709
                            .i_opcode = COPY,
3481
709
                            .i_oparg = 1,
3482
709
                            .i_loc = loc,
3483
709
                            .i_target = NULL,
3484
709
                            .i_except = except,
3485
709
                };
3486
709
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3487
709
                cfg_instr to_bool = {
3488
709
                            .i_opcode = TO_BOOL,
3489
709
                            .i_oparg = 0,
3490
709
                            .i_loc = loc,
3491
709
                            .i_target = NULL,
3492
709
                            .i_except = except,
3493
709
                };
3494
709
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3495
709
            }
3496
416k
        }
3497
59.6k
    }
3498
8.29k
    return SUCCESS;
3499
8.29k
}
3500
3501
static int
3502
convert_pseudo_ops(cfg_builder *g)
3503
8.29k
{
3504
8.29k
    basicblock *entryblock = g->g_entryblock;
3505
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3506
479k
        for (int i = 0; i < b->b_iused; i++) {
3507
419k
            cfg_instr *instr = &b->b_instr[i];
3508
419k
            if (is_block_push(instr)) {
3509
3.57k
                INSTR_SET_OP0(instr, NOP);
3510
3.57k
            }
3511
416k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3512
1.93k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3513
1.93k
                instr->i_opcode = LOAD_FAST;
3514
1.93k
            }
3515
414k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3516
696
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3517
696
                instr->i_opcode = STORE_FAST;
3518
696
            }
3519
419k
        }
3520
59.6k
    }
3521
8.29k
    return remove_redundant_nops_and_jumps(g);
3522
8.29k
}
3523
3524
static inline bool
3525
101k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3526
101k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3527
56.9k
        return basicblock_has_no_lineno(b);
3528
56.9k
    }
3529
44.6k
    else {
3530
44.6k
        return false;
3531
44.6k
    }
3532
101k
}
3533
3534
3535
/* PEP 626 mandates that the f_lineno of a frame is correct
3536
 * after a frame terminates. It would be prohibitively expensive
3537
 * to continuously update the f_lineno field at runtime,
3538
 * so we make sure that all exiting instruction (raises and returns)
3539
 * have a valid line number, allowing us to compute f_lineno lazily.
3540
 * We can do this by duplicating the exit blocks without line number
3541
 * so that none have more than one predecessor. We can then safely
3542
 * copy the line number from the sole predecessor block.
3543
 */
3544
static int
3545
duplicate_exits_without_lineno(cfg_builder *g)
3546
16.5k
{
3547
16.5k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3548
3549
    /* Copy all exit blocks without line number that are targets of a jump.
3550
     */
3551
16.5k
    basicblock *entryblock = g->g_entryblock;
3552
135k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3553
119k
        cfg_instr *last = basicblock_last_instr(b);
3554
119k
        if (last == NULL) {
3555
18.0k
            continue;
3556
18.0k
        }
3557
101k
        if (is_jump(last)) {
3558
49.7k
            basicblock *target = next_nonempty_block(last->i_target);
3559
49.7k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3560
806
                basicblock *new_target = copy_basicblock(g, target);
3561
806
                if (new_target == NULL) {
3562
0
                    return ERROR;
3563
0
                }
3564
806
                new_target->b_instr[0].i_loc = last->i_loc;
3565
806
                last->i_target = new_target;
3566
806
                target->b_predecessors--;
3567
806
                new_target->b_predecessors = 1;
3568
806
                new_target->b_next = target->b_next;
3569
806
                new_target->b_label.id = next_lbl++;
3570
806
                target->b_next = new_target;
3571
806
            }
3572
49.7k
        }
3573
101k
    }
3574
3575
    /* Any remaining reachable exit blocks without line number can only be reached by
3576
     * fall through, and thus can only have a single predecessor */
3577
135k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3578
119k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3579
51.7k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3580
1.08k
                cfg_instr *last = basicblock_last_instr(b);
3581
1.08k
                assert(last != NULL);
3582
1.08k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3583
1.08k
            }
3584
51.7k
        }
3585
119k
    }
3586
16.5k
    return SUCCESS;
3587
16.5k
}
3588
3589
3590
/* If an instruction has no line number, but it's predecessor in the BB does,
3591
 * then copy the line number. If a successor block has no line number, and only
3592
 * one predecessor, then inherit the line number.
3593
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3594
 * Also reduces the size of the line number table,
3595
 * but has no impact on the generated line number events.
3596
 */
3597
3598
static inline void
3599
maybe_propagate_location(basicblock *b, int i, location loc)
3600
941k
{
3601
941k
    assert(b->b_iused > i);
3602
941k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3603
55.5k
         b->b_instr[i].i_loc = loc;
3604
55.5k
    }
3605
941k
}
3606
3607
static void
3608
propagate_line_numbers(basicblock *entryblock)
3609
16.5k
{
3610
135k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3611
119k
        cfg_instr *last = basicblock_last_instr(b);
3612
119k
        if (last == NULL) {
3613
18.0k
            continue;
3614
18.0k
        }
3615
3616
101k
        location prev_location = NO_LOCATION;
3617
982k
        for (int i = 0; i < b->b_iused; i++) {
3618
881k
            maybe_propagate_location(b, i, prev_location);
3619
881k
            prev_location = b->b_instr[i].i_loc;
3620
881k
        }
3621
101k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3622
37.5k
            if (b->b_next->b_iused > 0) {
3623
37.5k
                maybe_propagate_location(b->b_next, 0, prev_location);
3624
37.5k
            }
3625
37.5k
        }
3626
101k
        if (is_jump(last)) {
3627
49.7k
            basicblock *target = last->i_target;
3628
49.7k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3629
3
                target = target->b_next;
3630
3
            }
3631
49.7k
            if (target->b_predecessors == 1) {
3632
23.0k
                maybe_propagate_location(target, 0, prev_location);
3633
23.0k
            }
3634
49.7k
        }
3635
101k
    }
3636
16.5k
}
3637
3638
static int
3639
resolve_line_numbers(cfg_builder *g, int firstlineno)
3640
16.5k
{
3641
16.5k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3642
16.5k
    propagate_line_numbers(g->g_entryblock);
3643
16.5k
    return SUCCESS;
3644
16.5k
}
3645
3646
int
3647
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3648
                        int nlocals, int nparams, int firstlineno)
3649
8.29k
{
3650
8.29k
    assert(cfg_builder_check(g));
3651
    /** Preprocessing **/
3652
    /* Map labels to targets and mark exception handlers */
3653
8.29k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3654
8.29k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3655
8.29k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3656
3657
    /** Optimization **/
3658
8.29k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3659
8.29k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3660
8.29k
    RETURN_IF_ERROR(
3661
8.29k
        add_checks_for_loads_of_uninitialized_variables(
3662
8.29k
            g->g_entryblock, nlocals, nparams));
3663
8.29k
    RETURN_IF_ERROR(insert_superinstructions(g));
3664
3665
8.29k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3666
8.29k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3667
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3668
    // assert(all_exits_have_lineno(g->g_entryblock));
3669
8.29k
    return SUCCESS;
3670
8.29k
}
3671
3672
static int *
3673
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3674
8.29k
{
3675
8.29k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3676
8.29k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3677
8.29k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3678
3679
8.29k
    int noffsets = ncellvars + nfreevars;
3680
8.29k
    int *fixed = PyMem_New(int, noffsets);
3681
8.29k
    if (fixed == NULL) {
3682
0
        PyErr_NoMemory();
3683
0
        return NULL;
3684
0
    }
3685
10.7k
    for (int i = 0; i < noffsets; i++) {
3686
2.45k
        fixed[i] = nlocals + i;
3687
2.45k
    }
3688
3689
8.29k
    PyObject *varname, *cellindex;
3690
8.29k
    Py_ssize_t pos = 0;
3691
9.71k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3692
1.41k
        PyObject *varindex;
3693
1.41k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3694
0
            goto error;
3695
0
        }
3696
1.41k
        if (varindex == NULL) {
3697
1.10k
            continue;
3698
1.10k
        }
3699
3700
311
        int argoffset = PyLong_AsInt(varindex);
3701
311
        Py_DECREF(varindex);
3702
311
        if (argoffset == -1 && PyErr_Occurred()) {
3703
0
            goto error;
3704
0
        }
3705
3706
311
        int oldindex = PyLong_AsInt(cellindex);
3707
311
        if (oldindex == -1 && PyErr_Occurred()) {
3708
0
            goto error;
3709
0
        }
3710
311
        fixed[oldindex] = argoffset;
3711
311
    }
3712
8.29k
    return fixed;
3713
3714
0
error:
3715
0
    PyMem_Free(fixed);
3716
0
    return NULL;
3717
8.29k
}
3718
3719
#define IS_GENERATOR(CF) \
3720
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3721
3722
static int
3723
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3724
                           int *fixed, int nfreevars)
3725
8.29k
{
3726
8.29k
    assert(umd->u_firstlineno > 0);
3727
3728
    /* Set up cells for any variable that escapes, to be put in a closure. */
3729
8.29k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3730
8.29k
    if (ncellvars) {
3731
        // umd->u_cellvars has the cells out of order so we sort them
3732
        // before adding the MAKE_CELL instructions.  Note that we
3733
        // adjust for arg cells, which come first.
3734
996
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3735
996
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3736
996
        if (sorted == NULL) {
3737
0
            PyErr_NoMemory();
3738
0
            return ERROR;
3739
0
        }
3740
2.40k
        for (int i = 0; i < ncellvars; i++) {
3741
1.41k
            sorted[fixed[i]] = i + 1;
3742
1.41k
        }
3743
3.19k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3744
2.20k
            int oldindex = sorted[i] - 1;
3745
2.20k
            if (oldindex == -1) {
3746
788
                continue;
3747
788
            }
3748
1.41k
            cfg_instr make_cell = {
3749
1.41k
                .i_opcode = MAKE_CELL,
3750
                // This will get fixed in offset_derefs().
3751
1.41k
                .i_oparg = oldindex,
3752
1.41k
                .i_loc = NO_LOCATION,
3753
1.41k
                .i_target = NULL,
3754
1.41k
                .i_except = NULL,
3755
1.41k
            };
3756
1.41k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3757
0
                PyMem_RawFree(sorted);
3758
0
                return ERROR;
3759
0
            }
3760
1.41k
            ncellsused += 1;
3761
1.41k
        }
3762
996
        PyMem_RawFree(sorted);
3763
996
    }
3764
3765
8.29k
    if (nfreevars) {
3766
649
        cfg_instr copy_frees = {
3767
649
            .i_opcode = COPY_FREE_VARS,
3768
649
            .i_oparg = nfreevars,
3769
649
            .i_loc = NO_LOCATION,
3770
649
            .i_target = NULL,
3771
649
            .i_except = NULL,
3772
649
        };
3773
649
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3774
649
    }
3775
3776
8.29k
    return SUCCESS;
3777
8.29k
}
3778
3779
static int
3780
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3781
8.29k
{
3782
8.29k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3783
8.29k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3784
8.29k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3785
8.29k
    int noffsets = ncellvars + nfreevars;
3786
3787
    // First deal with duplicates (arg cells).
3788
8.29k
    int numdropped = 0;
3789
10.7k
    for (int i = 0; i < noffsets ; i++) {
3790
2.45k
        if (fixedmap[i] == i + nlocals) {
3791
2.14k
            fixedmap[i] -= numdropped;
3792
2.14k
        }
3793
311
        else {
3794
            // It was a duplicate (cell/arg).
3795
311
            numdropped += 1;
3796
311
        }
3797
2.45k
    }
3798
3799
    // Then update offsets, either relative to locals or by cell2arg.
3800
67.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3801
479k
        for (int i = 0; i < b->b_iused; i++) {
3802
419k
            cfg_instr *inst = &b->b_instr[i];
3803
            // This is called before extended args are generated.
3804
419k
            assert(inst->i_opcode != EXTENDED_ARG);
3805
419k
            int oldoffset = inst->i_oparg;
3806
419k
            switch(inst->i_opcode) {
3807
1.41k
                case MAKE_CELL:
3808
3.34k
                case LOAD_CLOSURE:
3809
5.87k
                case LOAD_DEREF:
3810
7.02k
                case STORE_DEREF:
3811
7.03k
                case DELETE_DEREF:
3812
7.03k
                case LOAD_FROM_DICT_OR_DEREF:
3813
7.03k
                    assert(oldoffset >= 0);
3814
7.03k
                    assert(oldoffset < noffsets);
3815
7.03k
                    assert(fixedmap[oldoffset] >= 0);
3816
7.03k
                    inst->i_oparg = fixedmap[oldoffset];
3817
419k
            }
3818
419k
        }
3819
59.6k
    }
3820
3821
8.29k
    return numdropped;
3822
8.29k
}
3823
3824
static int
3825
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3826
8.29k
{
3827
8.29k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3828
8.29k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3829
8.29k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3830
8.29k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3831
8.29k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3832
8.29k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3833
8.29k
    assert(INT_MAX - nlocals - ncellvars > 0);
3834
8.29k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3835
8.29k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3836
8.29k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3837
8.29k
    if (cellfixedoffsets == NULL) {
3838
0
        return ERROR;
3839
0
    }
3840
3841
    // This must be called before fix_cell_offsets().
3842
8.29k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3843
0
        PyMem_Free(cellfixedoffsets);
3844
0
        return ERROR;
3845
0
    }
3846
3847
8.29k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3848
8.29k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3849
8.29k
    cellfixedoffsets = NULL;
3850
8.29k
    if (numdropped < 0) {
3851
0
        return ERROR;
3852
0
    }
3853
3854
8.29k
    nlocalsplus -= numdropped;
3855
8.29k
    return nlocalsplus;
3856
8.29k
}
3857
3858
cfg_builder *
3859
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3860
8.29k
{
3861
8.29k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3862
0
        return NULL;
3863
0
    }
3864
8.29k
    cfg_builder *g = _PyCfgBuilder_New();
3865
8.29k
    if (g == NULL) {
3866
0
        return NULL;
3867
0
    }
3868
485k
    for (int i = 0; i < seq->s_used; i++) {
3869
477k
        seq->s_instrs[i].i_target = 0;
3870
477k
    }
3871
485k
    for (int i = 0; i < seq->s_used; i++) {
3872
477k
        _PyInstruction *instr = &seq->s_instrs[i];
3873
477k
        if (HAS_TARGET(instr->i_opcode)) {
3874
31.7k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3875
31.7k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3876
31.7k
        }
3877
477k
    }
3878
8.29k
    int offset = 0;
3879
485k
    for (int i = 0; i < seq->s_used; i++) {
3880
477k
        _PyInstruction *instr = &seq->s_instrs[i];
3881
477k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3882
444
            if (seq->s_annotations_code != NULL) {
3883
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
3884
1
                    && seq->s_annotations_code->s_nested == NULL);
3885
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3886
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3887
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3888
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3889
0
                        goto error;
3890
0
                    }
3891
3
                }
3892
1
                offset += seq->s_annotations_code->s_used - 1;
3893
1
            }
3894
443
            else {
3895
443
                offset -= 1;
3896
443
            }
3897
444
            continue;
3898
444
        }
3899
477k
        if (instr->i_target) {
3900
25.4k
            jump_target_label lbl_ = {i + offset};
3901
25.4k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3902
0
                goto error;
3903
0
            }
3904
25.4k
        }
3905
477k
        int opcode = instr->i_opcode;
3906
477k
        int oparg = instr->i_oparg;
3907
477k
        if (HAS_TARGET(opcode)) {
3908
31.7k
            oparg += offset;
3909
31.7k
        }
3910
477k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3911
0
            goto error;
3912
0
        }
3913
477k
    }
3914
8.29k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3915
0
        goto error;
3916
0
    }
3917
8.29k
    return g;
3918
0
error:
3919
0
    _PyCfgBuilder_Free(g);
3920
0
    return NULL;
3921
8.29k
}
3922
3923
int
3924
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3925
8.29k
{
3926
8.29k
    int lbl = 0;
3927
68.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3928
60.5k
        b->b_label = (jump_target_label){lbl};
3929
60.5k
        lbl += 1;
3930
60.5k
    }
3931
68.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3932
60.5k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3933
494k
        for (int i = 0; i < b->b_iused; i++) {
3934
434k
            cfg_instr *instr = &b->b_instr[i];
3935
434k
            if (HAS_TARGET(instr->i_opcode)) {
3936
                /* Set oparg to the label id (it will later be mapped to an offset) */
3937
24.9k
                instr->i_oparg = instr->i_target->b_label.id;
3938
24.9k
            }
3939
434k
            RETURN_IF_ERROR(
3940
434k
                _PyInstructionSequence_Addop(
3941
434k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3942
3943
434k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3944
434k
            if (instr->i_except != NULL) {
3945
56.3k
                hi->h_label = instr->i_except->b_label.id;
3946
56.3k
                hi->h_startdepth = instr->i_except->b_startdepth;
3947
56.3k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3948
56.3k
            }
3949
377k
            else {
3950
377k
                hi->h_label = -1;
3951
377k
            }
3952
434k
        }
3953
60.5k
    }
3954
8.29k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3955
0
        return ERROR;
3956
0
    }
3957
8.29k
    return SUCCESS;
3958
8.29k
}
3959
3960
3961
int
3962
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3963
                                     _PyCompile_CodeUnitMetadata *umd,
3964
                                     int *stackdepth, int *nlocalsplus,
3965
                                     _PyInstructionSequence *seq)
3966
8.29k
{
3967
8.29k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3968
3969
8.29k
    *stackdepth = calculate_stackdepth(g);
3970
8.29k
    if (*stackdepth < 0) {
3971
0
        return ERROR;
3972
0
    }
3973
3974
8.29k
    *nlocalsplus = prepare_localsplus(umd, g);
3975
8.29k
    if (*nlocalsplus < 0) {
3976
0
        return ERROR;
3977
0
    }
3978
3979
8.29k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3980
3981
    /* Order of basic blocks must have been determined by now */
3982
3983
8.29k
    RETURN_IF_ERROR(normalize_jumps(g));
3984
8.29k
    assert(no_redundant_jumps(g));
3985
3986
    /* Can't modify the bytecode after inserting instructions that produce
3987
     * borrowed references.
3988
     */
3989
8.29k
    RETURN_IF_ERROR(optimize_load_fast(g));
3990
3991
    /* Can't modify the bytecode after computing jump offsets. */
3992
8.29k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
3993
0
        return ERROR;
3994
0
    }
3995
3996
8.29k
    return SUCCESS;
3997
8.29k
}
3998
3999
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4000
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4001
 */
4002
int
4003
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4004
0
{
4005
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4006
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4007
0
    return SUCCESS;
4008
0
}
4009
4010
/* Exported API functions */
4011
4012
int
4013
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4014
0
{
4015
0
    stack_effects effs;
4016
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4017
0
        return PY_INVALID_STACK_EFFECT;
4018
0
    }
4019
0
    return effs.net;
4020
0
}
4021
4022
int
4023
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4024
179
{
4025
179
    stack_effects effs;
4026
179
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4027
0
        return PY_INVALID_STACK_EFFECT;
4028
0
    }
4029
179
    return effs.net;
4030
179
}
4031
4032
/* Access to compiler optimizations for unit tests.
4033
4034
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4035
 * a CFG, optimizes it and converts back to an instruction list.
4036
 */
4037
4038
static PyObject *
4039
cfg_to_instruction_sequence(cfg_builder *g)
4040
0
{
4041
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4042
0
    if (seq == NULL) {
4043
0
        return NULL;
4044
0
    }
4045
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4046
0
        PyInstructionSequence_Fini(seq);
4047
0
        return NULL;
4048
0
    }
4049
0
    return (PyObject*)seq;
4050
0
}
4051
4052
PyObject *
4053
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4054
0
{
4055
0
    if (!_PyInstructionSequence_Check(seq)) {
4056
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4057
0
        return NULL;
4058
0
    }
4059
0
    PyObject *const_cache = PyDict_New();
4060
0
    if (const_cache == NULL) {
4061
0
        return NULL;
4062
0
    }
4063
4064
0
    PyObject *res = NULL;
4065
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4066
0
    if (g == NULL) {
4067
0
        goto error;
4068
0
    }
4069
0
    int nparams = 0, firstlineno = 1;
4070
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4071
0
                                nparams, firstlineno) < 0) {
4072
0
        goto error;
4073
0
    }
4074
4075
0
    if (calculate_stackdepth(g) == ERROR) {
4076
0
        goto error;
4077
0
    }
4078
4079
0
    if (optimize_load_fast(g) != SUCCESS) {
4080
0
        goto error;
4081
0
    }
4082
4083
0
    res = cfg_to_instruction_sequence(g);
4084
0
error:
4085
0
    Py_DECREF(const_cache);
4086
0
    _PyCfgBuilder_Free(g);
4087
0
    return res;
4088
0
}