Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
#include "pycore_hashtable.h"     // _Py_hashtable_t
10
11
#include "pycore_opcode_utils.h"
12
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
13
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
14
15
#include <stdbool.h>
16
17
18
#undef SUCCESS
19
#undef ERROR
20
695k
#define SUCCESS 0
21
5.34k
#define ERROR -1
22
23
#define RETURN_IF_ERROR(X)  \
24
1.05M
    if ((X) == -1) {        \
25
0
        return ERROR;       \
26
0
    }
27
28
222k
#define DEFAULT_BLOCK_SIZE 16
29
30
typedef _Py_SourceLocation location;
31
typedef _PyJumpTargetLabel jump_target_label;
32
33
typedef struct _PyCfgInstruction {
34
    int i_opcode;
35
    int i_oparg;
36
    _Py_SourceLocation i_loc;
37
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
38
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
39
} cfg_instr;
40
41
typedef struct _PyCfgBasicblock {
42
    /* Each basicblock in a compilation unit is linked via b_list in the
43
       reverse order that the block are allocated.  b_list points to the next
44
       block in this list, not to be confused with b_next, which is next by
45
       control flow. */
46
    struct _PyCfgBasicblock *b_list;
47
    /* The label of this block if it is a jump target, -1 otherwise */
48
    _PyJumpTargetLabel b_label;
49
    /* Exception stack at start of block, used by assembler to create the exception handling table */
50
    struct _PyCfgExceptStack *b_exceptstack;
51
    /* pointer to an array of instructions, initially NULL */
52
    cfg_instr *b_instr;
53
    /* If b_next is non-NULL, it is a pointer to the next
54
       block reached by normal control flow. */
55
    struct _PyCfgBasicblock *b_next;
56
    /* number of instructions used */
57
    int b_iused;
58
    /* length of instruction array (b_instr) */
59
    int b_ialloc;
60
    /* Used by add_checks_for_loads_of_unknown_variables */
61
    uint64_t b_unsafe_locals_mask;
62
    /* Number of predecessors that a block has. */
63
    int b_predecessors;
64
    /* depth of stack upon entry of block, computed by stackdepth() */
65
    int b_startdepth;
66
    /* Basic block is an exception handler that preserves lasti */
67
    unsigned b_preserve_lasti : 1;
68
    /* Used by compiler passes to mark whether they have visited a basic block. */
69
    unsigned b_visited : 1;
70
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
71
    unsigned b_except_handler : 1;
72
    /* b_cold is true if this block is not perf critical (like an exception handler) */
73
    unsigned b_cold : 1;
74
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
75
    unsigned b_warm : 1;
76
} basicblock;
77
78
79
struct _PyCfgBuilder {
80
    /* The entryblock, at which control flow begins. All blocks of the
81
       CFG are reachable through the b_next links */
82
    struct _PyCfgBasicblock *g_entryblock;
83
    /* Pointer to the most recently allocated block.  By following
84
       b_list links, you can reach all allocated blocks. */
85
    struct _PyCfgBasicblock *g_block_list;
86
    /* pointer to the block currently being constructed */
87
    struct _PyCfgBasicblock *g_curblock;
88
    /* label for the next instruction to be placed */
89
    _PyJumpTargetLabel g_current_label;
90
};
91
92
typedef struct _PyCfgBuilder cfg_builder;
93
94
228k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
95
228k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
96
97
#define LOCATION(LNO, END_LNO, COL, END_COL) \
98
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
99
100
static inline int
101
is_block_push(cfg_instr *i)
102
983k
{
103
983k
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
104
983k
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
105
983k
}
106
107
static inline int
108
is_jump(cfg_instr *i)
109
861k
{
110
861k
    return OPCODE_HAS_JUMP(i->i_opcode);
111
861k
}
112
113
/* One arg*/
114
#define INSTR_SET_OP1(I, OP, ARG) \
115
30.7k
    do { \
116
30.7k
        assert(OPCODE_HAS_ARG(OP)); \
117
30.7k
        cfg_instr *_instr__ptr_ = (I); \
118
30.7k
        _instr__ptr_->i_opcode = (OP); \
119
30.7k
        _instr__ptr_->i_oparg = (ARG); \
120
30.7k
    } while (0);
121
122
/* No args*/
123
#define INSTR_SET_OP0(I, OP) \
124
40.9k
    do { \
125
40.9k
        assert(!OPCODE_HAS_ARG(OP)); \
126
40.9k
        cfg_instr *_instr__ptr_ = (I); \
127
40.9k
        _instr__ptr_->i_opcode = (OP); \
128
40.9k
        _instr__ptr_->i_oparg = 0; \
129
40.9k
    } while (0);
130
131
#define INSTR_SET_LOC(I, LOC) \
132
2.20k
    do { \
133
2.20k
        cfg_instr *_instr__ptr_ = (I); \
134
2.20k
        _instr__ptr_->i_loc = (LOC); \
135
2.20k
    } while (0);
136
137
/***** Blocks *****/
138
139
/* Returns the offset of the next instruction in the current block's
140
   b_instr array.  Resizes the b_instr as necessary.
141
   Returns -1 on failure.
142
*/
143
static int
144
basicblock_next_instr(basicblock *b)
145
222k
{
146
222k
    assert(b != NULL);
147
222k
    _Py_c_array_t array = {
148
222k
        .array = (void*)b->b_instr,
149
222k
        .allocated_entries = b->b_ialloc,
150
222k
        .item_size = sizeof(cfg_instr),
151
222k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
152
222k
    };
153
154
222k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
155
222k
    b->b_instr = array.array;
156
222k
    b->b_ialloc = array.allocated_entries;
157
222k
    return b->b_iused++;
158
222k
}
159
160
static cfg_instr *
161
745k
basicblock_last_instr(const basicblock *b) {
162
745k
    assert(b->b_iused >= 0);
163
745k
    if (b->b_iused > 0) {
164
680k
        assert(b->b_instr != NULL);
165
680k
        return &b->b_instr[b->b_iused - 1];
166
680k
    }
167
65.7k
    return NULL;
168
745k
}
169
170
/* Allocate a new block and return a pointer to it.
171
   Returns NULL on error.
172
*/
173
174
static basicblock *
175
cfg_builder_new_block(cfg_builder *g)
176
24.1k
{
177
24.1k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
178
24.1k
    if (b == NULL) {
179
0
        PyErr_NoMemory();
180
0
        return NULL;
181
0
    }
182
    /* Extend the singly linked list of blocks with new block. */
183
24.1k
    b->b_list = g->g_block_list;
184
24.1k
    g->g_block_list = b;
185
24.1k
    b->b_label = NO_LABEL;
186
24.1k
    return b;
187
24.1k
}
188
189
static int
190
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
191
216k
{
192
216k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
193
216k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
194
216k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
195
216k
    assert(0 <= oparg && oparg < (1 << 30));
196
197
216k
    int off = basicblock_next_instr(b);
198
216k
    if (off < 0) {
199
0
        return ERROR;
200
0
    }
201
216k
    cfg_instr *i = &b->b_instr[off];
202
216k
    i->i_opcode = opcode;
203
216k
    i->i_oparg = oparg;
204
216k
    i->i_loc = loc;
205
    // memory is already zero initialized
206
216k
    assert(i->i_target == NULL);
207
216k
    assert(i->i_except == NULL);
208
209
216k
    return SUCCESS;
210
216k
}
211
212
static int
213
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
214
381
{
215
381
    cfg_instr *last = basicblock_last_instr(b);
216
381
    if (last && is_jump(last)) {
217
0
        return ERROR;
218
0
    }
219
220
381
    RETURN_IF_ERROR(
221
381
        basicblock_addop(b, opcode, target->b_label.id, loc));
222
381
    last = basicblock_last_instr(b);
223
381
    assert(last && last->i_opcode == opcode);
224
381
    last->i_target = target;
225
381
    return SUCCESS;
226
381
}
227
228
static inline int
229
basicblock_append_instructions(basicblock *to, basicblock *from)
230
1.52k
{
231
4.39k
    for (int i = 0; i < from->b_iused; i++) {
232
2.86k
        int n = basicblock_next_instr(to);
233
2.86k
        if (n < 0) {
234
0
            return ERROR;
235
0
        }
236
2.86k
        to->b_instr[n] = from->b_instr[i];
237
2.86k
    }
238
1.52k
    return SUCCESS;
239
1.52k
}
240
241
static inline int
242
228k
basicblock_nofallthrough(const basicblock *b) {
243
228k
    cfg_instr *last = basicblock_last_instr(b);
244
228k
    return (last &&
245
214k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
246
118k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
247
228k
}
248
249
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
250
372k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
251
252
static basicblock *
253
copy_basicblock(cfg_builder *g, basicblock *block)
254
382
{
255
    /* Cannot copy a block if it has a fallthrough, since
256
     * a block can only have one fallthrough predecessor.
257
     */
258
382
    assert(BB_NO_FALLTHROUGH(block));
259
382
    basicblock *result = cfg_builder_new_block(g);
260
382
    if (result == NULL) {
261
0
        return NULL;
262
0
    }
263
382
    if (basicblock_append_instructions(result, block) < 0) {
264
0
        return NULL;
265
0
    }
266
382
    return result;
267
382
}
268
269
static int
270
2.87k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
271
2.87k
    RETURN_IF_ERROR(basicblock_next_instr(block));
272
47.7k
    for (int i = block->b_iused - 1; i > pos; i--) {
273
44.9k
        block->b_instr[i] = block->b_instr[i-1];
274
44.9k
    }
275
2.87k
    block->b_instr[pos] = *instr;
276
2.87k
    return SUCCESS;
277
2.87k
}
278
279
/* For debugging purposes only */
280
#if 0
281
static void
282
dump_instr(cfg_instr *i)
283
{
284
    const char *jump = is_jump(i) ? "jump " : "";
285
286
    char arg[128];
287
288
    *arg = '\0';
289
    if (OPCODE_HAS_ARG(i->i_opcode)) {
290
        sprintf(arg, "arg: %d ", i->i_oparg);
291
    }
292
    if (HAS_TARGET(i->i_opcode)) {
293
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
294
    }
295
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
296
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
297
}
298
299
static inline int
300
basicblock_returns(const basicblock *b) {
301
    cfg_instr *last = basicblock_last_instr(b);
302
    return last && IS_RETURN_OPCODE(last->i_opcode);
303
}
304
305
static void
306
dump_basicblock(const basicblock *b, bool highlight)
307
{
308
    const char *b_return = basicblock_returns(b) ? "return " : "";
309
    if (highlight) {
310
        fprintf(stderr, ">>> ");
311
    }
312
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
313
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
314
        b->b_startdepth, b->b_predecessors, b_return);
315
    int depth = b->b_startdepth;
316
    if (b->b_instr) {
317
        int i;
318
        for (i = 0; i < b->b_iused; i++) {
319
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
320
            dump_instr(b->b_instr + i);
321
322
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
323
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
324
            depth += (pushed - popped);
325
        }
326
    }
327
}
328
329
void
330
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
331
{
332
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
333
        dump_basicblock(b, b == mark);
334
    }
335
}
336
337
#endif
338
339
340
/***** CFG construction and modification *****/
341
342
static basicblock *
343
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
344
18.0k
{
345
18.0k
    assert(block != NULL);
346
18.0k
    g->g_curblock->b_next = block;
347
18.0k
    g->g_curblock = block;
348
18.0k
    return block;
349
18.0k
}
350
351
static inline int
352
36.7k
basicblock_exits_scope(const basicblock *b) {
353
36.7k
    cfg_instr *last = basicblock_last_instr(b);
354
36.7k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
355
36.7k
}
356
357
static inline int
358
21.1k
basicblock_has_eval_break(const basicblock *b) {
359
109k
    for (int i = 0; i < b->b_iused; i++) {
360
96.2k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
361
8.21k
            return true;
362
8.21k
        }
363
96.2k
    }
364
12.9k
    return false;
365
21.1k
}
366
367
static bool
368
cfg_builder_current_block_is_terminated(cfg_builder *g)
369
219k
{
370
219k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
371
219k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
372
15.4k
        return true;
373
15.4k
    }
374
204k
    if (IS_LABEL(g->g_current_label)) {
375
2.57k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
376
2.57k
            return true;
377
2.57k
        }
378
0
        else {
379
            /* current block is empty, label it */
380
0
            g->g_curblock->b_label = g->g_current_label;
381
0
            g->g_current_label = NO_LABEL;
382
0
        }
383
2.57k
    }
384
201k
    return false;
385
204k
}
386
387
static int
388
cfg_builder_maybe_start_new_block(cfg_builder *g)
389
219k
{
390
219k
    if (cfg_builder_current_block_is_terminated(g)) {
391
18.0k
        basicblock *b = cfg_builder_new_block(g);
392
18.0k
        if (b == NULL) {
393
0
            return ERROR;
394
0
        }
395
18.0k
        b->b_label = g->g_current_label;
396
18.0k
        g->g_current_label = NO_LABEL;
397
18.0k
        cfg_builder_use_next_block(g, b);
398
18.0k
    }
399
219k
    return SUCCESS;
400
219k
}
401
402
#ifndef NDEBUG
403
static bool
404
cfg_builder_check(cfg_builder *g)
405
{
406
    assert(g->g_entryblock->b_iused > 0);
407
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
408
        assert(!_PyMem_IsPtrFreed(block));
409
        if (block->b_instr != NULL) {
410
            assert(block->b_ialloc > 0);
411
            assert(block->b_iused >= 0);
412
            assert(block->b_ialloc >= block->b_iused);
413
        }
414
        else {
415
            assert (block->b_iused == 0);
416
            assert (block->b_ialloc == 0);
417
        }
418
    }
419
    return true;
420
}
421
#endif
422
423
static int
424
init_cfg_builder(cfg_builder *g)
425
5.38k
{
426
5.38k
    g->g_block_list = NULL;
427
5.38k
    basicblock *block = cfg_builder_new_block(g);
428
5.38k
    if (block == NULL) {
429
0
        return ERROR;
430
0
    }
431
5.38k
    g->g_curblock = g->g_entryblock = block;
432
5.38k
    g->g_current_label = NO_LABEL;
433
5.38k
    return SUCCESS;
434
5.38k
}
435
436
cfg_builder *
437
_PyCfgBuilder_New(void)
438
5.38k
{
439
5.38k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
440
5.38k
    if (g == NULL) {
441
0
        PyErr_NoMemory();
442
0
        return NULL;
443
0
    }
444
5.38k
    memset(g, 0, sizeof(cfg_builder));
445
5.38k
    if (init_cfg_builder(g) < 0) {
446
0
        PyMem_Free(g);
447
0
        return NULL;
448
0
    }
449
5.38k
    return g;
450
5.38k
}
451
452
void
453
_PyCfgBuilder_Free(cfg_builder *g)
454
5.38k
{
455
5.38k
    if (g == NULL) {
456
0
        return;
457
0
    }
458
5.38k
    assert(cfg_builder_check(g));
459
5.38k
    basicblock *b = g->g_block_list;
460
29.5k
    while (b != NULL) {
461
24.1k
        if (b->b_instr) {
462
24.1k
            PyMem_Free((void *)b->b_instr);
463
24.1k
        }
464
24.1k
        basicblock *next = b->b_list;
465
24.1k
        PyMem_Free((void *)b);
466
24.1k
        b = next;
467
24.1k
    }
468
5.38k
    PyMem_Free(g);
469
5.38k
}
470
471
int
472
_PyCfgBuilder_CheckSize(cfg_builder *g)
473
5.38k
{
474
5.38k
    int nblocks = 0;
475
28.8k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
476
23.4k
        nblocks++;
477
23.4k
    }
478
5.38k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
479
0
        PyErr_NoMemory();
480
0
        return ERROR;
481
0
    }
482
5.38k
    return SUCCESS;
483
5.38k
}
484
485
int
486
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
487
8.53k
{
488
8.53k
    g->g_current_label = lbl;
489
8.53k
    return cfg_builder_maybe_start_new_block(g);
490
8.53k
}
491
492
int
493
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
494
211k
{
495
211k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
496
211k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
497
211k
}
498
499
500
static basicblock *
501
next_nonempty_block(basicblock *b)
502
29.4k
{
503
30.5k
    while (b && b->b_iused == 0) {
504
1.08k
        b = b->b_next;
505
1.08k
    }
506
29.4k
    return b;
507
29.4k
}
508
509
/***** debugging helpers *****/
510
511
#ifndef NDEBUG
512
static int remove_redundant_nops(cfg_builder *g);
513
514
static bool
515
no_redundant_nops(cfg_builder *g) {
516
    if (remove_redundant_nops(g) != 0) {
517
        return false;
518
    }
519
    return true;
520
}
521
522
static bool
523
no_redundant_jumps(cfg_builder *g) {
524
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
525
        cfg_instr *last = basicblock_last_instr(b);
526
        if (last != NULL) {
527
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
528
                basicblock *next = next_nonempty_block(b->b_next);
529
                basicblock *jump_target = next_nonempty_block(last->i_target);
530
                if (jump_target == next) {
531
                    assert(next);
532
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
533
                        assert(0);
534
                        return false;
535
                    }
536
                }
537
            }
538
        }
539
    }
540
    return true;
541
}
542
#endif
543
544
/***** CFG preprocessing (jump targets and exceptions) *****/
545
546
static int
547
24.1k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
548
24.1k
    cfg_instr *last = basicblock_last_instr(b);
549
24.1k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
550
18.9k
        return SUCCESS;
551
18.9k
    }
552
24.1k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
553
554
5.24k
    bool is_forward = last->i_target->b_visited == 0;
555
5.24k
    if (is_forward) {
556
5.05k
        RETURN_IF_ERROR(
557
5.05k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
558
5.05k
        return SUCCESS;
559
5.05k
    }
560
561
195
    int reversed_opcode = 0;
562
195
    switch(last->i_opcode) {
563
0
        case POP_JUMP_IF_NOT_NONE:
564
0
            reversed_opcode = POP_JUMP_IF_NONE;
565
0
            break;
566
16
        case POP_JUMP_IF_NONE:
567
16
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
568
16
            break;
569
137
        case POP_JUMP_IF_FALSE:
570
137
            reversed_opcode = POP_JUMP_IF_TRUE;
571
137
            break;
572
42
        case POP_JUMP_IF_TRUE:
573
42
            reversed_opcode = POP_JUMP_IF_FALSE;
574
42
            break;
575
195
    }
576
    /* transform 'conditional jump T' to
577
     * 'reversed_jump b_next' followed by 'jump_backwards T'
578
     */
579
580
195
    basicblock *target = last->i_target;
581
195
    basicblock *backwards_jump = cfg_builder_new_block(g);
582
195
    if (backwards_jump == NULL) {
583
0
        return ERROR;
584
0
    }
585
195
    RETURN_IF_ERROR(
586
195
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
587
195
    RETURN_IF_ERROR(
588
195
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
589
195
    backwards_jump->b_startdepth = target->b_startdepth;
590
195
    last->i_opcode = reversed_opcode;
591
195
    last->i_target = b->b_next;
592
593
195
    backwards_jump->b_cold = b->b_cold;
594
195
    backwards_jump->b_next = b->b_next;
595
195
    b->b_next = backwards_jump;
596
195
    return SUCCESS;
597
195
}
598
599
600
static int
601
normalize_jumps(cfg_builder *g)
602
5.38k
{
603
5.38k
    basicblock *entryblock = g->g_entryblock;
604
29.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
605
23.9k
        b->b_visited = 0;
606
23.9k
    }
607
29.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
608
24.1k
        b->b_visited = 1;
609
24.1k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
610
24.1k
    }
611
5.38k
    return SUCCESS;
612
5.38k
}
613
614
static int
615
5.38k
check_cfg(cfg_builder *g) {
616
28.8k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
617
        /* Raise SystemError if jump or exit is not last instruction in the block. */
618
234k
        for (int i = 0; i < b->b_iused; i++) {
619
211k
            int opcode = b->b_instr[i].i_opcode;
620
211k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
621
211k
            if (IS_TERMINATOR_OPCODE(opcode)) {
622
20.8k
                if (i != b->b_iused - 1) {
623
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
624
0
                    return ERROR;
625
0
                }
626
20.8k
            }
627
211k
        }
628
23.4k
    }
629
5.38k
    return SUCCESS;
630
5.38k
}
631
632
static int
633
get_max_label(basicblock *entryblock)
634
20.3k
{
635
20.3k
    int lbl = -1;
636
113k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
637
93.5k
        if (b->b_label.id > lbl) {
638
31.5k
            lbl = b->b_label.id;
639
31.5k
        }
640
93.5k
    }
641
20.3k
    return lbl;
642
20.3k
}
643
644
/* Calculate the actual jump target from the target_label */
645
static int
646
translate_jump_labels_to_targets(basicblock *entryblock)
647
5.38k
{
648
5.38k
    int max_label = get_max_label(entryblock);
649
5.38k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
650
5.38k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
651
5.38k
    if (!label2block) {
652
0
        PyErr_NoMemory();
653
0
        return ERROR;
654
0
    }
655
5.38k
    memset(label2block, 0, mapsize);
656
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
657
23.4k
        if (b->b_label.id >= 0) {
658
8.53k
            label2block[b->b_label.id] = b;
659
8.53k
        }
660
23.4k
    }
661
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
662
234k
        for (int i = 0; i < b->b_iused; i++) {
663
211k
            cfg_instr *instr = &b->b_instr[i];
664
211k
            assert(instr->i_target == NULL);
665
211k
            if (HAS_TARGET(instr->i_opcode)) {
666
10.4k
                int lbl = instr->i_oparg;
667
10.4k
                assert(lbl >= 0 && lbl <= max_label);
668
10.4k
                instr->i_target = label2block[lbl];
669
10.4k
                assert(instr->i_target != NULL);
670
10.4k
                assert(instr->i_target->b_label.id == lbl);
671
10.4k
            }
672
211k
        }
673
23.4k
    }
674
5.38k
    PyMem_Free(label2block);
675
5.38k
    return SUCCESS;
676
5.38k
}
677
678
static int
679
5.38k
mark_except_handlers(basicblock *entryblock) {
680
#ifndef NDEBUG
681
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
682
        assert(!b->b_except_handler);
683
    }
684
#endif
685
28.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
686
234k
        for (int i=0; i < b->b_iused; i++) {
687
211k
            cfg_instr *instr = &b->b_instr[i];
688
211k
            if (is_block_push(instr)) {
689
1.67k
                instr->i_target->b_except_handler = 1;
690
1.67k
            }
691
211k
        }
692
23.4k
    }
693
5.38k
    return SUCCESS;
694
5.38k
}
695
696
697
struct _PyCfgExceptStack {
698
    basicblock *handlers[CO_MAXBLOCKS+2];
699
    int depth;
700
};
701
702
703
static basicblock *
704
1.66k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
705
1.66k
    assert(is_block_push(setup));
706
1.66k
    int opcode = setup->i_opcode;
707
1.66k
    basicblock * target = setup->i_target;
708
1.66k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
709
1.02k
        target->b_preserve_lasti = 1;
710
1.02k
    }
711
1.66k
    assert(stack->depth <= CO_MAXBLOCKS);
712
1.66k
    stack->handlers[++stack->depth] = target;
713
1.66k
    return target;
714
1.66k
}
715
716
static basicblock *
717
1.37k
pop_except_block(struct _PyCfgExceptStack *stack) {
718
1.37k
    assert(stack->depth > 0);
719
1.37k
    return stack->handlers[--stack->depth];
720
1.37k
}
721
722
static basicblock *
723
19.4k
except_stack_top(struct _PyCfgExceptStack *stack) {
724
19.4k
    return stack->handlers[stack->depth];
725
19.4k
}
726
727
static struct _PyCfgExceptStack *
728
5.38k
make_except_stack(void) {
729
5.38k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
730
5.38k
    if (new == NULL) {
731
0
        PyErr_NoMemory();
732
0
        return NULL;
733
0
    }
734
5.38k
    new->depth = 0;
735
5.38k
    new->handlers[0] = NULL;
736
5.38k
    return new;
737
5.38k
}
738
739
static struct _PyCfgExceptStack *
740
6.63k
copy_except_stack(struct _PyCfgExceptStack *stack) {
741
6.63k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
742
6.63k
    if (copy == NULL) {
743
0
        PyErr_NoMemory();
744
0
        return NULL;
745
0
    }
746
6.63k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
747
6.63k
    return copy;
748
6.63k
}
749
750
static basicblock**
751
38.8k
make_cfg_traversal_stack(basicblock *entryblock) {
752
38.8k
    int nblocks = 0;
753
224k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
754
185k
        b->b_visited = 0;
755
185k
        nblocks++;
756
185k
    }
757
38.8k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
758
38.8k
    if (!stack) {
759
0
        PyErr_NoMemory();
760
0
    }
761
38.8k
    return stack;
762
38.8k
}
763
764
/* Compute the stack effects of opcode with argument oparg.
765
766
   Some opcodes have different stack effect when jump to the target and
767
   when not jump. The 'jump' parameter specifies the case:
768
769
   * 0 -- when not jump
770
   * 1 -- when jump
771
   * -1 -- maximal
772
 */
773
typedef struct {
774
    /* The stack effect of the instruction. */
775
    int net;
776
} stack_effects;
777
778
Py_LOCAL(int)
779
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
780
198k
{
781
198k
    if (opcode < 0) {
782
0
        return -1;
783
0
    }
784
198k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
785
        // Specialized instructions are not supported.
786
0
        return -1;
787
0
    }
788
198k
    int popped = _PyOpcode_num_popped(opcode, oparg);
789
198k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
790
198k
    if (popped < 0 || pushed < 0) {
791
0
        return -1;
792
0
    }
793
198k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
794
1.66k
        effects->net = 0;
795
1.66k
        return 0;
796
1.66k
    }
797
196k
    effects->net = pushed - popped;
798
196k
    return 0;
799
198k
}
800
801
Py_LOCAL_INLINE(int)
802
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
803
23.3k
{
804
23.3k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
805
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
806
0
        return ERROR;
807
0
    }
808
23.3k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
809
19.6k
        assert(b->b_startdepth < 0);
810
19.6k
        b->b_startdepth = depth;
811
19.6k
        *(*sp)++ = b;
812
19.6k
    }
813
23.3k
    return SUCCESS;
814
23.3k
}
815
816
/* Find the flow path that needs the largest stack.  We assume that
817
 * cycles in the flow graph have no net effect on the stack depth.
818
 */
819
static int
820
calculate_stackdepth(cfg_builder *g)
821
5.38k
{
822
5.38k
    basicblock *entryblock = g->g_entryblock;
823
29.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
824
23.9k
        b->b_startdepth = INT_MIN;
825
23.9k
    }
826
5.38k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
827
5.38k
    if (!stack) {
828
0
        return ERROR;
829
0
    }
830
831
832
5.38k
    int stackdepth = -1;
833
5.38k
    int maxdepth = 0;
834
5.38k
    basicblock **sp = stack;
835
5.38k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
836
0
        goto error;
837
0
    }
838
25.0k
    while (sp != stack) {
839
19.6k
        basicblock *b = *--sp;
840
19.6k
        int depth = b->b_startdepth;
841
19.6k
        assert(depth >= 0);
842
19.6k
        basicblock *next = b->b_next;
843
197k
        for (int i = 0; i < b->b_iused; i++) {
844
188k
            cfg_instr *instr = &b->b_instr[i];
845
188k
            stack_effects effects;
846
188k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
847
0
                PyErr_Format(PyExc_SystemError,
848
0
                             "Invalid stack effect for opcode=%d, arg=%i",
849
0
                             instr->i_opcode, instr->i_oparg);
850
0
                goto error;
851
0
            }
852
188k
            int new_depth = depth + effects.net;
853
188k
            if (new_depth < 0) {
854
0
                PyErr_Format(PyExc_ValueError,
855
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
856
0
                goto error;
857
0
            }
858
188k
            maxdepth = Py_MAX(maxdepth, depth);
859
188k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
860
9.16k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
861
0
                    PyErr_Format(PyExc_SystemError,
862
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
863
0
                                 instr->i_opcode, instr->i_oparg);
864
0
                    goto error;
865
0
                }
866
9.16k
                int target_depth = depth + effects.net;
867
9.16k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
868
9.16k
                maxdepth = Py_MAX(maxdepth, depth);
869
9.16k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
870
0
                    goto error;
871
0
                }
872
9.16k
            }
873
188k
            depth = new_depth;
874
188k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
875
188k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
876
187k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
877
10.8k
            {
878
                /* remaining code is dead */
879
10.8k
                next = NULL;
880
10.8k
                break;
881
10.8k
            }
882
188k
        }
883
19.6k
        if (next != NULL) {
884
8.83k
            assert(BB_HAS_FALLTHROUGH(b));
885
8.83k
            if (stackdepth_push(&sp, next, depth) < 0) {
886
0
                goto error;
887
0
            }
888
8.83k
        }
889
19.6k
    }
890
5.38k
    stackdepth = maxdepth;
891
5.38k
error:
892
5.38k
    PyMem_Free(stack);
893
5.38k
    return stackdepth;
894
5.38k
}
895
896
static int
897
5.38k
label_exception_targets(basicblock *entryblock) {
898
5.38k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
899
5.38k
    if (todo_stack == NULL) {
900
0
        return ERROR;
901
0
    }
902
5.38k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
903
5.38k
    if (except_stack == NULL) {
904
0
        PyMem_Free(todo_stack);
905
0
        PyErr_NoMemory();
906
0
        return ERROR;
907
0
    }
908
5.38k
    except_stack->depth = 0;
909
5.38k
    todo_stack[0] = entryblock;
910
5.38k
    entryblock->b_visited = 1;
911
5.38k
    entryblock->b_exceptstack = except_stack;
912
5.38k
    basicblock **todo = &todo_stack[1];
913
5.38k
    basicblock *handler = NULL;
914
24.8k
    while (todo > todo_stack) {
915
19.4k
        todo--;
916
19.4k
        basicblock *b = todo[0];
917
19.4k
        assert(b->b_visited == 1);
918
19.4k
        except_stack = b->b_exceptstack;
919
19.4k
        assert(except_stack != NULL);
920
19.4k
        b->b_exceptstack = NULL;
921
19.4k
        handler = except_stack_top(except_stack);
922
19.4k
        int last_yield_except_depth = -1;
923
221k
        for (int i = 0; i < b->b_iused; i++) {
924
202k
            cfg_instr *instr = &b->b_instr[i];
925
202k
            if (is_block_push(instr)) {
926
1.66k
                if (!instr->i_target->b_visited) {
927
1.66k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
928
1.66k
                    if (copy == NULL) {
929
0
                        goto error;
930
0
                    }
931
1.66k
                    instr->i_target->b_exceptstack = copy;
932
1.66k
                    todo[0] = instr->i_target;
933
1.66k
                    instr->i_target->b_visited = 1;
934
1.66k
                    todo++;
935
1.66k
                }
936
1.66k
                handler = push_except_block(except_stack, instr);
937
1.66k
            }
938
200k
            else if (instr->i_opcode == POP_BLOCK) {
939
1.37k
                handler = pop_except_block(except_stack);
940
1.37k
                INSTR_SET_OP0(instr, NOP);
941
1.37k
            }
942
199k
            else if (is_jump(instr)) {
943
8.32k
                instr->i_except = handler;
944
8.32k
                assert(i == b->b_iused -1);
945
8.32k
                if (!instr->i_target->b_visited) {
946
6.03k
                    if (BB_HAS_FALLTHROUGH(b)) {
947
4.97k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
948
4.97k
                        if (copy == NULL) {
949
0
                            goto error;
950
0
                        }
951
4.97k
                        instr->i_target->b_exceptstack = copy;
952
4.97k
                    }
953
1.06k
                    else {
954
1.06k
                        instr->i_target->b_exceptstack = except_stack;
955
1.06k
                        except_stack = NULL;
956
1.06k
                    }
957
6.03k
                    todo[0] = instr->i_target;
958
6.03k
                    instr->i_target->b_visited = 1;
959
6.03k
                    todo++;
960
6.03k
                }
961
8.32k
            }
962
191k
            else if (instr->i_opcode == YIELD_VALUE) {
963
308
                instr->i_except = handler;
964
308
                last_yield_except_depth = except_stack->depth;
965
308
            }
966
190k
            else if (instr->i_opcode == RESUME) {
967
5.74k
                instr->i_except = handler;
968
5.74k
                if (instr->i_oparg != RESUME_AT_FUNC_START && instr->i_oparg != RESUME_AT_GEN_EXPR_START) {
969
308
                    assert(last_yield_except_depth >= 0);
970
308
                    if (last_yield_except_depth == 1) {
971
137
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
972
137
                    }
973
308
                    last_yield_except_depth = -1;
974
308
                }
975
5.74k
            }
976
185k
            else if (instr->i_opcode == RETURN_GENERATOR) {
977
252
                instr->i_except = NULL;
978
252
            }
979
184k
            else {
980
184k
                instr->i_except = handler;
981
184k
            }
982
202k
        }
983
19.4k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
984
6.37k
            assert(except_stack != NULL);
985
6.37k
            b->b_next->b_exceptstack = except_stack;
986
6.37k
            todo[0] = b->b_next;
987
6.37k
            b->b_next->b_visited = 1;
988
6.37k
            todo++;
989
6.37k
        }
990
13.0k
        else if (except_stack != NULL) {
991
12.0k
           PyMem_Free(except_stack);
992
12.0k
        }
993
19.4k
    }
994
#ifdef Py_DEBUG
995
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
996
        assert(b->b_exceptstack == NULL);
997
    }
998
#endif
999
5.38k
    PyMem_Free(todo_stack);
1000
5.38k
    return SUCCESS;
1001
0
error:
1002
0
    PyMem_Free(todo_stack);
1003
0
    PyMem_Free(except_stack);
1004
0
    return ERROR;
1005
5.38k
}
1006
1007
/***** CFG optimizations *****/
1008
1009
static int
1010
10.7k
remove_unreachable(basicblock *entryblock) {
1011
58.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1012
47.2k
        b->b_predecessors = 0;
1013
47.2k
    }
1014
10.7k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1015
10.7k
    if (stack == NULL) {
1016
0
        return ERROR;
1017
0
    }
1018
10.7k
    basicblock **sp = stack;
1019
10.7k
    entryblock->b_predecessors = 1;
1020
10.7k
    *sp++ = entryblock;
1021
10.7k
    entryblock->b_visited = 1;
1022
49.3k
    while (sp > stack) {
1023
38.5k
        basicblock *b = *(--sp);
1024
38.5k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1025
16.9k
            if (!b->b_next->b_visited) {
1026
13.9k
                assert(b->b_next->b_predecessors == 0);
1027
13.9k
                *sp++ = b->b_next;
1028
13.9k
                b->b_next->b_visited = 1;
1029
13.9k
            }
1030
16.9k
            b->b_next->b_predecessors++;
1031
16.9k
        }
1032
433k
        for (int i = 0; i < b->b_iused; i++) {
1033
394k
            basicblock *target;
1034
394k
            cfg_instr *instr = &b->b_instr[i];
1035
394k
            if (is_jump(instr) || is_block_push(instr)) {
1036
18.5k
                target = instr->i_target;
1037
18.5k
                if (!target->b_visited) {
1038
13.7k
                    *sp++ = target;
1039
13.7k
                    target->b_visited = 1;
1040
13.7k
                }
1041
18.5k
                target->b_predecessors++;
1042
18.5k
            }
1043
394k
        }
1044
38.5k
    }
1045
10.7k
    PyMem_Free(stack);
1046
1047
    /* Delete unreachable instructions */
1048
58.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1049
47.2k
       if (b->b_predecessors == 0) {
1050
8.73k
            b->b_iused = 0;
1051
8.73k
            b->b_except_handler = 0;
1052
8.73k
       }
1053
47.2k
    }
1054
10.7k
    return SUCCESS;
1055
10.7k
}
1056
1057
static int
1058
123k
basicblock_remove_redundant_nops(basicblock *bb) {
1059
    /* Remove NOPs when legal to do so. */
1060
123k
    int dest = 0;
1061
123k
    int prev_lineno = -1;
1062
1.05M
    for (int src = 0; src < bb->b_iused; src++) {
1063
929k
        int lineno = bb->b_instr[src].i_loc.lineno;
1064
929k
        if (bb->b_instr[src].i_opcode == NOP) {
1065
            /* Eliminate no-op if it doesn't have a line number */
1066
19.0k
            if (lineno < 0) {
1067
2.64k
                continue;
1068
2.64k
            }
1069
            /* or, if the previous instruction had the same line number. */
1070
16.4k
            if (prev_lineno == lineno) {
1071
13.1k
                continue;
1072
13.1k
            }
1073
            /* or, if the next instruction has same line number or no line number */
1074
3.23k
            if (src < bb->b_iused - 1) {
1075
2.99k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1076
2.99k
                if (next_lineno == lineno) {
1077
1.98k
                    continue;
1078
1.98k
                }
1079
1.01k
                if (next_lineno < 0) {
1080
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1081
0
                    continue;
1082
0
                }
1083
1.01k
            }
1084
242
            else {
1085
242
                basicblock *next = next_nonempty_block(bb->b_next);
1086
                /* or if last instruction in BB and next BB has same line number */
1087
242
                if (next) {
1088
242
                    location next_loc = NO_LOCATION;
1089
242
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1090
242
                        cfg_instr *instr = &next->b_instr[next_i];
1091
242
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1092
                            /* Skip over NOPs without a location, they will be removed */
1093
0
                            continue;
1094
0
                        }
1095
242
                        next_loc = instr->i_loc;
1096
242
                        break;
1097
242
                    }
1098
242
                    if (lineno == next_loc.lineno) {
1099
1
                        continue;
1100
1
                    }
1101
242
                }
1102
242
            }
1103
1104
3.23k
        }
1105
912k
        if (dest != src) {
1106
71.8k
            bb->b_instr[dest] = bb->b_instr[src];
1107
71.8k
        }
1108
912k
        dest++;
1109
912k
        prev_lineno = lineno;
1110
912k
    }
1111
123k
    assert(dest <= bb->b_iused);
1112
123k
    int num_removed = bb->b_iused - dest;
1113
123k
    bb->b_iused = dest;
1114
123k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1115
123k
    return num_removed;
1116
123k
}
1117
1118
static int
1119
17.6k
remove_redundant_nops(cfg_builder *g) {
1120
17.6k
    int changes = 0;
1121
117k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1122
99.7k
        int change = basicblock_remove_redundant_nops(b);
1123
99.7k
        RETURN_IF_ERROR(change);
1124
99.7k
        changes += change;
1125
99.7k
    }
1126
17.6k
    return changes;
1127
17.6k
}
1128
1129
static int loads_const(int opcode);
1130
1131
static int
1132
remove_redundant_nops_and_pairs(basicblock *entryblock)
1133
5.38k
{
1134
5.38k
    bool done = false;
1135
1136
10.7k
    while (! done) {
1137
5.38k
        done = true;
1138
5.38k
        cfg_instr *prev_instr = NULL;
1139
5.38k
        cfg_instr *instr = NULL;
1140
29.2k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1141
23.8k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1142
23.8k
            if (IS_LABEL(b->b_label)) {
1143
                /* this block is a jump target, forget instr */
1144
8.91k
                instr = NULL;
1145
8.91k
            }
1146
215k
            for (int i = 0; i < b->b_iused; i++) {
1147
191k
                prev_instr = instr;
1148
191k
                instr = &b->b_instr[i];
1149
191k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1150
191k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1151
191k
                int opcode = instr->i_opcode;
1152
191k
                bool is_redundant_pair = false;
1153
191k
                if (opcode == POP_TOP) {
1154
6.05k
                   if (loads_const(prev_opcode)) {
1155
0
                       is_redundant_pair = true;
1156
0
                   }
1157
6.05k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1158
0
                       is_redundant_pair = true;
1159
0
                   }
1160
6.05k
                }
1161
191k
                if (is_redundant_pair) {
1162
0
                    INSTR_SET_OP0(prev_instr, NOP);
1163
0
                    INSTR_SET_OP0(instr, NOP);
1164
0
                    done = false;
1165
0
                }
1166
191k
            }
1167
23.8k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1168
16.9k
                instr = NULL;
1169
16.9k
            }
1170
23.8k
        }
1171
5.38k
    }
1172
5.38k
    return SUCCESS;
1173
5.38k
}
1174
1175
static int
1176
12.2k
remove_redundant_jumps(cfg_builder *g) {
1177
    /* If a non-empty block ends with a jump instruction, check if the next
1178
     * non-empty block reached through normal flow control is the target
1179
     * of that jump. If it is, then the jump instruction is redundant and
1180
     * can be deleted.
1181
     *
1182
     * Return the number of changes applied, or -1 on error.
1183
     */
1184
1185
12.2k
    int changes = 0;
1186
88.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1187
75.9k
        cfg_instr *last = basicblock_last_instr(b);
1188
75.9k
        if (last == NULL) {
1189
12.2k
            continue;
1190
12.2k
        }
1191
75.9k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1192
63.6k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1193
6.97k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1194
6.97k
            if (jump_target == NULL) {
1195
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1196
0
                return ERROR;
1197
0
            }
1198
6.97k
            basicblock *next = next_nonempty_block(b->b_next);
1199
6.97k
            if (jump_target == next) {
1200
245
                changes++;
1201
245
                INSTR_SET_OP0(last, NOP);
1202
245
            }
1203
6.97k
        }
1204
63.6k
    }
1205
1206
12.2k
    return changes;
1207
12.2k
}
1208
1209
static inline bool
1210
23.7k
basicblock_has_no_lineno(basicblock *b) {
1211
29.7k
    for (int i = 0; i < b->b_iused; i++) {
1212
27.1k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1213
21.1k
            return false;
1214
21.1k
        }
1215
27.1k
    }
1216
2.59k
    return true;
1217
23.7k
}
1218
1219
/* Maximum size of basic block that should be copied in optimizer */
1220
1.20k
#define MAX_COPY_SIZE 4
1221
1222
/* If this block ends with an unconditional jump to a small exit block or
1223
 * a block that has no line numbers (and no fallthrough), then
1224
 * remove the jump and extend this block with the target.
1225
 * Returns 1 if extended, 0 if no change, and -1 on error.
1226
 */
1227
static int
1228
32.8k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1229
32.8k
    cfg_instr *last = basicblock_last_instr(bb);
1230
32.8k
    if (last == NULL) {
1231
0
        return 0;
1232
0
    }
1233
32.8k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1234
28.5k
        return 0;
1235
28.5k
    }
1236
4.29k
    basicblock *target = last->i_target;
1237
4.29k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1238
1.20k
                             target->b_iused <= MAX_COPY_SIZE);
1239
4.29k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1240
1.03k
                                     !BB_HAS_FALLTHROUGH(target));
1241
4.29k
    if (small_exit_block || no_lineno_no_fallthrough) {
1242
1.14k
        assert(is_jump(last));
1243
1.14k
        int removed_jump_opcode = last->i_opcode;
1244
1.14k
        INSTR_SET_OP0(last, NOP);
1245
1.14k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1246
1.14k
        if (no_lineno_no_fallthrough) {
1247
1.02k
            last = basicblock_last_instr(bb);
1248
1.02k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1249
307
                removed_jump_opcode == JUMP)
1250
26
            {
1251
                /* Make sure we don't lose eval breaker checks */
1252
26
                last->i_opcode = JUMP;
1253
26
            }
1254
1.02k
        }
1255
1.14k
        target->b_predecessors--;
1256
1.14k
        return 1;
1257
1.14k
    }
1258
3.14k
    return 0;
1259
4.29k
}
1260
1261
static int
1262
5.38k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1263
5.38k
    bool changes;
1264
5.91k
    do {
1265
5.91k
        changes = false;
1266
38.7k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1267
32.8k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1268
32.8k
            RETURN_IF_ERROR(res);
1269
32.8k
            if (res) {
1270
1.14k
                changes = true;
1271
1.14k
            }
1272
32.8k
        }
1273
5.91k
    } while(changes); /* every change removes a jump, ensuring convergence */
1274
5.38k
    return changes;
1275
5.38k
}
1276
1277
// Attempt to eliminate jumps to jumps by updating inst to jump to
1278
// target->i_target using the provided opcode. Return whether or not the
1279
// optimization was successful.
1280
static bool
1281
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1282
186
{
1283
186
    assert(is_jump(inst));
1284
186
    assert(is_jump(target));
1285
186
    assert(inst == basicblock_last_instr(bb));
1286
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1287
    // changes (and we fall into an infinite loop):
1288
186
    if (inst->i_target != target->i_target) {
1289
        /* Change inst to NOP and append a jump to target->i_target. The
1290
         * NOP will be removed later if it's not needed for the lineno.
1291
         */
1292
186
        INSTR_SET_OP0(inst, NOP);
1293
1294
186
        RETURN_IF_ERROR(
1295
186
            basicblock_add_jump(
1296
186
                bb, opcode, target->i_target, target->i_loc));
1297
1298
186
        return true;
1299
186
    }
1300
0
    return false;
1301
186
}
1302
1303
static int
1304
loads_const(int opcode)
1305
216k
{
1306
216k
    return OPCODE_HAS_CONST(opcode)
1307
188k
        || opcode == LOAD_SMALL_INT
1308
173k
        || opcode == LOAD_COMMON_CONSTANT;
1309
216k
}
1310
1311
/* Returns new reference */
1312
static PyObject*
1313
get_const_value(int opcode, int oparg, PyObject *co_consts)
1314
69.6k
{
1315
69.6k
    PyObject *constant = NULL;
1316
69.6k
    assert(loads_const(opcode));
1317
69.6k
    if (opcode == LOAD_CONST) {
1318
68.8k
        assert(PyList_Check(co_consts));
1319
68.8k
        Py_ssize_t n = PyList_GET_SIZE(co_consts);
1320
68.8k
        if (oparg < 0 || oparg >= n) {
1321
0
            PyErr_Format(PyExc_ValueError,
1322
0
                         "LOAD_CONST index %d is out of range for consts (len=%zd)",
1323
0
                         oparg, n);
1324
0
            return NULL;
1325
0
        }
1326
68.8k
        constant = PyList_GET_ITEM(co_consts, oparg);
1327
68.8k
    }
1328
69.6k
    if (opcode == LOAD_SMALL_INT) {
1329
211
        return PyLong_FromLong(oparg);
1330
211
    }
1331
69.4k
    if (opcode == LOAD_COMMON_CONSTANT) {
1332
598
        assert(oparg < NUM_COMMON_CONSTANTS);
1333
598
        return Py_NewRef(_PyInterpreterState_GET()->common_consts[oparg]);
1334
598
    }
1335
1336
68.8k
    if (constant == NULL) {
1337
0
        PyErr_SetString(PyExc_SystemError,
1338
0
                        "Internal error: failed to get value of a constant");
1339
0
        return NULL;
1340
0
    }
1341
68.8k
    return Py_NewRef(constant);
1342
68.8k
}
1343
1344
// Steals a reference to newconst.
1345
static int
1346
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1347
          _Py_hashtable_t *consts_index)
1348
976
{
1349
976
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1350
0
        Py_DECREF(newconst);
1351
0
        return -1;
1352
0
    }
1353
1354
976
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1355
976
    if (entry != NULL) {
1356
251
        Py_DECREF(newconst);
1357
251
        return (int)(uintptr_t)entry->value;
1358
251
    }
1359
1360
725
    Py_ssize_t index = PyList_GET_SIZE(consts);
1361
725
    if ((size_t)index >= (size_t)INT_MAX - 1) {
1362
0
        PyErr_SetString(PyExc_OverflowError, "too many constants");
1363
0
        Py_DECREF(newconst);
1364
0
        return -1;
1365
0
    }
1366
725
    if (PyList_Append(consts, newconst)) {
1367
0
        Py_DECREF(newconst);
1368
0
        return -1;
1369
0
    }
1370
1371
725
    if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1372
0
        PyList_SetSlice(consts, index, index + 1, NULL);
1373
0
        Py_DECREF(newconst);
1374
0
        PyErr_NoMemory();
1375
0
        return -1;
1376
0
    }
1377
1378
725
    Py_DECREF(newconst);
1379
725
    return (int)index;
1380
725
}
1381
1382
/*
1383
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1384
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1385
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1386
1387
  Return boolean indicating whether "size" such instructions were found.
1388
*/
1389
static bool
1390
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1391
4.26k
{
1392
4.26k
    assert(start < bb->b_iused);
1393
4.26k
    assert(size >= 0);
1394
4.26k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1395
1396
7.36k
    for (; start >= 0 && size > 0; start--) {
1397
6.31k
        cfg_instr *instr = &bb->b_instr[start];
1398
6.31k
        if (instr->i_opcode == NOP) {
1399
50
            continue;
1400
50
        }
1401
6.26k
        if (!loads_const(instr->i_opcode)) {
1402
3.22k
            return false;
1403
3.22k
        }
1404
3.04k
        instrs[--size] = instr;
1405
3.04k
    }
1406
1407
1.04k
    return size == 0;
1408
4.26k
}
1409
1410
/*
1411
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1412
  Caller must make sure "instrs" has at least "size" elements.
1413
*/
1414
static void
1415
nop_out(cfg_instr **instrs, int size)
1416
1.03k
{
1417
3.22k
    for (int i = 0; i < size; i++) {
1418
2.18k
        cfg_instr *instr = instrs[i];
1419
2.18k
        assert(instr->i_opcode != NOP);
1420
2.18k
        INSTR_SET_OP0(instr, NOP);
1421
2.18k
        INSTR_SET_LOC(instr, NO_LOCATION);
1422
2.18k
    }
1423
1.03k
}
1424
1425
/* Does not steal reference to "newconst".
1426
   Return 1 if changed instruction to LOAD_SMALL_INT.
1427
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1428
   Return -1 on error.
1429
*/
1430
static int
1431
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1432
                               PyObject *consts, PyObject *const_cache)
1433
41.6k
{
1434
41.6k
    if (PyLong_CheckExact(newconst)) {
1435
17.5k
        int overflow;
1436
17.5k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1437
17.5k
        if (val == -1 && PyErr_Occurred()) {
1438
0
            return -1;
1439
0
        }
1440
17.5k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1441
14.1k
            assert(_Py_IsImmortal(newconst));
1442
14.1k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1443
14.1k
            return 1;
1444
14.1k
        }
1445
17.5k
    }
1446
27.5k
    return 0;
1447
41.6k
}
1448
1449
/* Does not steal reference to "newconst".
1450
   Return 1 if changed instruction to LOAD_COMMON_CONSTANT.
1451
   Return 0 if could not change instruction to LOAD_COMMON_CONSTANT.
1452
   Return -1 on error.
1453
*/
1454
static int
1455
maybe_instr_make_load_common_const(cfg_instr *instr, PyObject *newconst)
1456
26.8k
{
1457
26.8k
    int oparg;
1458
26.8k
    if (newconst == Py_None) {
1459
6.09k
        oparg = CONSTANT_NONE;
1460
6.09k
    }
1461
20.7k
    else if (newconst == Py_True) {
1462
487
        oparg = CONSTANT_TRUE;
1463
487
    }
1464
20.2k
    else if (newconst == Py_False) {
1465
638
        oparg = CONSTANT_FALSE;
1466
638
    }
1467
19.5k
    else if (PyUnicode_CheckExact(newconst)
1468
8.96k
             && PyUnicode_GET_LENGTH(newconst) == 0) {
1469
130
        oparg = CONSTANT_EMPTY_STR;
1470
130
    }
1471
19.4k
    else if (PyLong_CheckExact(newconst)) {
1472
3.32k
        int overflow;
1473
3.32k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1474
3.32k
        if (val == -1 && PyErr_Occurred()) {
1475
0
            return -1;
1476
0
        }
1477
3.32k
        if (overflow || val != -1) {
1478
3.23k
            return 0;
1479
3.23k
        }
1480
87
        oparg = CONSTANT_MINUS_ONE;
1481
87
    }
1482
16.1k
    else {
1483
16.1k
        return 0;
1484
16.1k
    }
1485
26.8k
    assert(_Py_IsImmortal(newconst));
1486
7.43k
    INSTR_SET_OP1(instr, LOAD_COMMON_CONSTANT, oparg);
1487
7.43k
    return 1;
1488
26.8k
}
1489
1490
/* Steals reference to "newconst" */
1491
static int
1492
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1493
                      PyObject *consts, PyObject *const_cache,
1494
                      _Py_hashtable_t *consts_index)
1495
892
{
1496
892
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1497
892
    if (res < 0) {
1498
0
        Py_DECREF(newconst);
1499
0
        return ERROR;
1500
0
    }
1501
892
    if (res > 0) {
1502
0
        return SUCCESS;
1503
0
    }
1504
892
    res = maybe_instr_make_load_common_const(instr, newconst);
1505
892
    if (res < 0) {
1506
0
        Py_DECREF(newconst);
1507
0
        return ERROR;
1508
0
    }
1509
892
    if (res > 0) {
1510
87
        return SUCCESS;
1511
87
    }
1512
805
    int oparg = add_const(newconst, consts, const_cache, consts_index);
1513
805
    RETURN_IF_ERROR(oparg);
1514
805
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1515
805
    return SUCCESS;
1516
805
}
1517
1518
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1519
   with    LOAD_CONST (c1, c2, ... cn).
1520
   The consts table must still be in list form so that the
1521
   new constant (c1, c2, ... cn) can be appended.
1522
   Called with codestr pointing to the first LOAD_CONST.
1523
*/
1524
static int
1525
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1526
                        PyObject *const_cache, _Py_hashtable_t *consts_index)
1527
2.38k
{
1528
    /* Pre-conditions */
1529
2.38k
    assert(PyDict_CheckExact(const_cache));
1530
2.38k
    assert(PyList_CheckExact(consts));
1531
1532
2.38k
    cfg_instr *instr = &bb->b_instr[i];
1533
2.38k
    assert(instr->i_opcode == BUILD_TUPLE);
1534
1535
2.38k
    int seq_size = instr->i_oparg;
1536
2.38k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1537
0
        return SUCCESS;
1538
0
    }
1539
1540
2.38k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1541
2.38k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1542
        /* not a const sequence */
1543
1.62k
        return SUCCESS;
1544
1.62k
    }
1545
1546
765
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1547
765
    if (const_tuple == NULL) {
1548
0
        return ERROR;
1549
0
    }
1550
1551
1.84k
    for (int i = 0; i < seq_size; i++) {
1552
1.08k
        cfg_instr *inst = const_instrs[i];
1553
1.08k
        assert(loads_const(inst->i_opcode));
1554
1.08k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1555
1.08k
        if (element == NULL) {
1556
0
            Py_DECREF(const_tuple);
1557
0
            return ERROR;
1558
0
        }
1559
1.08k
        PyTuple_SET_ITEM(const_tuple, i, element);
1560
1.08k
    }
1561
1562
765
    nop_out(const_instrs, seq_size);
1563
765
    return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
1564
765
}
1565
1566
/* Replace:
1567
    BUILD_LIST 0
1568
    LOAD_CONST c1
1569
    LIST_APPEND 1
1570
    LOAD_CONST c2
1571
    LIST_APPEND 1
1572
    ...
1573
    LOAD_CONST cN
1574
    LIST_APPEND 1
1575
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1576
   with:
1577
    LOAD_CONST (c1, c2, ... cN)
1578
*/
1579
static int
1580
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1581
                                      PyObject *consts, PyObject *const_cache,
1582
                                      _Py_hashtable_t *consts_index)
1583
48
{
1584
48
    assert(PyDict_CheckExact(const_cache));
1585
48
    assert(PyList_CheckExact(consts));
1586
48
    assert(i >= 0);
1587
48
    assert(i < bb->b_iused);
1588
1589
48
    cfg_instr *intrinsic = &bb->b_instr[i];
1590
48
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1591
48
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1592
1593
48
    int consts_found = 0;
1594
48
    bool expect_append = true;
1595
1596
50
    for (int pos = i - 1; pos >= 0; pos--) {
1597
50
        cfg_instr *instr = &bb->b_instr[pos];
1598
50
        int opcode = instr->i_opcode;
1599
50
        int oparg = instr->i_oparg;
1600
1601
50
        if (opcode == NOP) {
1602
0
            continue;
1603
0
        }
1604
1605
50
        if (opcode == BUILD_LIST && oparg == 0) {
1606
0
            if (!expect_append) {
1607
                /* Not a sequence start. */
1608
0
                return SUCCESS;
1609
0
            }
1610
1611
            /* Sequence start, we are done. */
1612
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1613
0
            if (newconst == NULL) {
1614
0
                return ERROR;
1615
0
            }
1616
1617
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1618
0
                instr = &bb->b_instr[newpos];
1619
0
                if (instr->i_opcode == NOP) {
1620
0
                    continue;
1621
0
                }
1622
0
                if (loads_const(instr->i_opcode)) {
1623
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1624
0
                    if (constant == NULL) {
1625
0
                        Py_DECREF(newconst);
1626
0
                        return ERROR;
1627
0
                    }
1628
0
                    assert(consts_found > 0);
1629
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1630
0
                }
1631
0
                nop_out(&instr, 1);
1632
0
            }
1633
0
            assert(consts_found == 0);
1634
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
1635
0
        }
1636
1637
50
        if (expect_append) {
1638
48
            if (opcode != LIST_APPEND || oparg != 1) {
1639
46
                return SUCCESS;
1640
46
            }
1641
48
        }
1642
2
        else {
1643
2
            if (!loads_const(opcode)) {
1644
2
                return SUCCESS;
1645
2
            }
1646
0
            consts_found++;
1647
0
        }
1648
1649
2
        expect_append = !expect_append;
1650
2
    }
1651
1652
    /* Did not find sequence start. */
1653
0
    return SUCCESS;
1654
48
}
1655
1656
972
#define MIN_CONST_SEQUENCE_SIZE 3
1657
/*
1658
Optimize lists and sets for:
1659
    1. "for" loop, comprehension or "in"/"not in" tests:
1660
           Change literal list or set of constants into constant
1661
           tuple or frozenset respectively. Change list of
1662
           non-constants into tuple.
1663
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1664
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1665
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1666
           or BUILD_SET & SET_UPDATE respectively.
1667
*/
1668
static int
1669
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1670
                        PyObject *consts, PyObject *const_cache,
1671
                        _Py_hashtable_t *consts_index)
1672
486
{
1673
486
    assert(PyDict_CheckExact(const_cache));
1674
486
    assert(PyList_CheckExact(consts));
1675
1676
486
    cfg_instr *instr = &bb->b_instr[i];
1677
486
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1678
1679
486
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1680
486
    int seq_size = instr->i_oparg;
1681
486
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1682
486
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1683
317
    {
1684
317
        return SUCCESS;
1685
317
    }
1686
1687
169
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1688
169
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1689
24
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1690
            /* iterate over a tuple instead of list */
1691
13
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1692
13
        }
1693
24
        return SUCCESS;
1694
24
    }
1695
1696
145
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1697
145
    if (const_result == NULL) {
1698
0
        return ERROR;
1699
0
    }
1700
1701
1.08k
    for (int i = 0; i < seq_size; i++) {
1702
943
        cfg_instr *inst = const_instrs[i];
1703
943
        assert(loads_const(inst->i_opcode));
1704
943
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1705
943
        if (element == NULL) {
1706
0
            Py_DECREF(const_result);
1707
0
            return ERROR;
1708
0
        }
1709
943
        PyTuple_SET_ITEM(const_result, i, element);
1710
943
    }
1711
1712
145
    if (instr->i_opcode == BUILD_SET) {
1713
137
        PyObject *frozenset = PyFrozenSet_New(const_result);
1714
137
        if (frozenset == NULL) {
1715
0
            Py_DECREF(const_result);
1716
0
            return ERROR;
1717
0
        }
1718
137
        Py_SETREF(const_result, frozenset);
1719
137
    }
1720
1721
145
    int index = add_const(const_result, consts, const_cache, consts_index);
1722
145
    RETURN_IF_ERROR(index);
1723
145
    nop_out(const_instrs, seq_size);
1724
1725
145
    if (contains_or_iter) {
1726
132
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1727
132
    }
1728
13
    else {
1729
13
        assert(i >= 2);
1730
13
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1731
1732
13
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1733
1734
13
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1735
13
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1736
13
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1737
13
    }
1738
145
    return SUCCESS;
1739
145
}
1740
1741
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1742
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1743
 * Used to avoid creating constants which are slow to hash.
1744
 */
1745
static Py_ssize_t
1746
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1747
0
{
1748
0
    if (PyTuple_Check(obj)) {
1749
0
        Py_ssize_t i;
1750
0
        limit -= PyTuple_GET_SIZE(obj);
1751
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1752
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1753
0
            if (limit < 0) {
1754
0
                return limit;
1755
0
            }
1756
0
        }
1757
0
    }
1758
0
    return limit;
1759
0
}
1760
1761
70
#define MAX_INT_SIZE           128  /* bits */
1762
0
#define MAX_COLLECTION_SIZE    256  /* items */
1763
2
#define MAX_STR_SIZE          4096  /* characters */
1764
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1765
1766
static PyObject *
1767
const_folding_safe_multiply(PyObject *v, PyObject *w)
1768
12
{
1769
12
    if (PyLong_Check(v) && PyLong_Check(w) &&
1770
8
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1771
12
    ) {
1772
8
        int64_t vbits = _PyLong_NumBits(v);
1773
8
        int64_t wbits = _PyLong_NumBits(w);
1774
8
        assert(vbits >= 0);
1775
8
        assert(wbits >= 0);
1776
8
        if (vbits + wbits > MAX_INT_SIZE) {
1777
0
            return NULL;
1778
0
        }
1779
8
    }
1780
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1781
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1782
0
        if (size) {
1783
0
            long n = PyLong_AsLong(v);
1784
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1785
0
                return NULL;
1786
0
            }
1787
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1788
0
                return NULL;
1789
0
            }
1790
0
        }
1791
0
    }
1792
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1793
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1794
2
                                               PyBytes_GET_SIZE(w);
1795
2
        if (size) {
1796
2
            long n = PyLong_AsLong(v);
1797
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1798
0
                return NULL;
1799
0
            }
1800
2
        }
1801
2
    }
1802
2
    else if (PyLong_Check(w) &&
1803
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1804
2
    {
1805
2
        return const_folding_safe_multiply(w, v);
1806
2
    }
1807
1808
10
    return PyNumber_Multiply(v, w);
1809
12
}
1810
1811
static PyObject *
1812
const_folding_safe_power(PyObject *v, PyObject *w)
1813
2
{
1814
2
    if (PyLong_Check(v) && PyLong_Check(w) &&
1815
2
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1816
2
    ) {
1817
2
        int64_t vbits = _PyLong_NumBits(v);
1818
2
        size_t wbits = PyLong_AsSize_t(w);
1819
2
        assert(vbits >= 0);
1820
2
        if (wbits == (size_t)-1) {
1821
0
            return NULL;
1822
0
        }
1823
2
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1824
0
            return NULL;
1825
0
        }
1826
2
    }
1827
1828
2
    return PyNumber_Power(v, w, Py_None);
1829
2
}
1830
1831
static PyObject *
1832
const_folding_safe_lshift(PyObject *v, PyObject *w)
1833
20
{
1834
20
    if (PyLong_Check(v) && PyLong_Check(w) &&
1835
20
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1836
20
    ) {
1837
20
        int64_t vbits = _PyLong_NumBits(v);
1838
20
        size_t wbits = PyLong_AsSize_t(w);
1839
20
        assert(vbits >= 0);
1840
20
        if (wbits == (size_t)-1) {
1841
0
            return NULL;
1842
0
        }
1843
20
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1844
0
            return NULL;
1845
0
        }
1846
20
    }
1847
1848
20
    return PyNumber_Lshift(v, w);
1849
20
}
1850
1851
static PyObject *
1852
const_folding_safe_mod(PyObject *v, PyObject *w)
1853
0
{
1854
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1855
0
        return NULL;
1856
0
    }
1857
1858
0
    return PyNumber_Remainder(v, w);
1859
0
}
1860
1861
static PyObject *
1862
eval_const_binop(PyObject *left, int op, PyObject *right)
1863
34
{
1864
34
    assert(left != NULL && right != NULL);
1865
34
    assert(op >= 0 && op <= NB_OPARG_LAST);
1866
1867
34
    PyObject *result = NULL;
1868
34
    switch (op) {
1869
1
        case NB_ADD:
1870
1
            result = PyNumber_Add(left, right);
1871
1
            break;
1872
1
        case NB_SUBTRACT:
1873
1
            result = PyNumber_Subtract(left, right);
1874
1
            break;
1875
10
        case NB_MULTIPLY:
1876
10
            result = const_folding_safe_multiply(left, right);
1877
10
            break;
1878
0
        case NB_TRUE_DIVIDE:
1879
0
            result = PyNumber_TrueDivide(left, right);
1880
0
            break;
1881
0
        case NB_FLOOR_DIVIDE:
1882
0
            result = PyNumber_FloorDivide(left, right);
1883
0
            break;
1884
0
        case NB_REMAINDER:
1885
0
            result = const_folding_safe_mod(left, right);
1886
0
            break;
1887
2
        case NB_POWER:
1888
2
            result = const_folding_safe_power(left, right);
1889
2
            break;
1890
20
        case NB_LSHIFT:
1891
20
            result = const_folding_safe_lshift(left, right);
1892
20
            break;
1893
0
        case NB_RSHIFT:
1894
0
            result = PyNumber_Rshift(left, right);
1895
0
            break;
1896
0
        case NB_OR:
1897
0
            result = PyNumber_Or(left, right);
1898
0
            break;
1899
0
        case NB_XOR:
1900
0
            result = PyNumber_Xor(left, right);
1901
0
            break;
1902
0
        case NB_AND:
1903
0
            result = PyNumber_And(left, right);
1904
0
            break;
1905
0
        case NB_SUBSCR:
1906
0
            result = PyObject_GetItem(left, right);
1907
0
            break;
1908
0
        case NB_MATRIX_MULTIPLY:
1909
            // No builtin constants implement matrix multiplication
1910
0
            break;
1911
0
        default:
1912
0
            Py_UNREACHABLE();
1913
34
    }
1914
34
    return result;
1915
34
}
1916
1917
static int
1918
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1919
                 PyObject *const_cache, _Py_hashtable_t *consts_index)
1920
1.59k
{
1921
1.62k
    #define BINOP_OPERAND_COUNT 2
1922
1.59k
    assert(PyDict_CheckExact(const_cache));
1923
1.59k
    assert(PyList_CheckExact(consts));
1924
1925
1.59k
    cfg_instr *binop = &bb->b_instr[i];
1926
1.59k
    assert(binop->i_opcode == BINARY_OP);
1927
1928
1.59k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1929
1.59k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1930
        /* not a const sequence */
1931
1.55k
        return SUCCESS;
1932
1.55k
    }
1933
1934
34
    cfg_instr *lhs_instr = operands_instrs[0];
1935
34
    assert(loads_const(lhs_instr->i_opcode));
1936
34
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1937
34
    if (lhs == NULL) {
1938
0
        return ERROR;
1939
0
    }
1940
1941
34
    cfg_instr *rhs_instr = operands_instrs[1];
1942
34
    assert(loads_const(rhs_instr->i_opcode));
1943
34
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1944
34
    if (rhs == NULL) {
1945
0
        Py_DECREF(lhs);
1946
0
        return ERROR;
1947
0
    }
1948
1949
34
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1950
34
    Py_DECREF(lhs);
1951
34
    Py_DECREF(rhs);
1952
34
    if (newconst == NULL) {
1953
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1954
0
            return ERROR;
1955
0
        }
1956
0
        PyErr_Clear();
1957
0
        return SUCCESS;
1958
0
    }
1959
1960
34
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1961
34
    return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
1962
34
}
1963
1964
static PyObject *
1965
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1966
93
{
1967
93
    assert(operand != NULL);
1968
93
    assert(
1969
93
        opcode == UNARY_NEGATIVE ||
1970
93
        opcode == UNARY_INVERT ||
1971
93
        opcode == UNARY_NOT ||
1972
93
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1973
93
    );
1974
93
    PyObject *result;
1975
93
    switch (opcode) {
1976
93
        case UNARY_NEGATIVE:
1977
93
            result = PyNumber_Negative(operand);
1978
93
            break;
1979
0
        case UNARY_INVERT:
1980
            // XXX: This should be removed once the ~bool depreciation expires.
1981
0
            if (PyBool_Check(operand)) {
1982
0
                return NULL;
1983
0
            }
1984
0
            result = PyNumber_Invert(operand);
1985
0
            break;
1986
0
        case UNARY_NOT: {
1987
0
            int r = PyObject_IsTrue(operand);
1988
0
            if (r < 0) {
1989
0
                return NULL;
1990
0
            }
1991
0
            result = PyBool_FromLong(!r);
1992
0
            break;
1993
0
        }
1994
0
        case CALL_INTRINSIC_1:
1995
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1996
0
                Py_UNREACHABLE();
1997
0
            }
1998
0
            result = PyNumber_Positive(operand);
1999
0
            break;
2000
0
        default:
2001
0
            Py_UNREACHABLE();
2002
93
    }
2003
93
    return result;
2004
93
}
2005
2006
static int
2007
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
2008
                   PyObject *const_cache, _Py_hashtable_t *consts_index)
2009
115
{
2010
208
    #define UNARYOP_OPERAND_COUNT 1
2011
115
    assert(PyDict_CheckExact(const_cache));
2012
115
    assert(PyList_CheckExact(consts));
2013
115
    cfg_instr *unaryop = &bb->b_instr[i];
2014
2015
115
    cfg_instr *operand_instr;
2016
115
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
2017
        /* not a const */
2018
22
        return SUCCESS;
2019
22
    }
2020
2021
115
    assert(loads_const(operand_instr->i_opcode));
2022
93
    PyObject *operand = get_const_value(
2023
93
        operand_instr->i_opcode,
2024
93
        operand_instr->i_oparg,
2025
93
        consts
2026
93
    );
2027
93
    if (operand == NULL) {
2028
0
        return ERROR;
2029
0
    }
2030
2031
93
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
2032
93
    Py_DECREF(operand);
2033
93
    if (newconst == NULL) {
2034
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
2035
0
            return ERROR;
2036
0
        }
2037
0
        PyErr_Clear();
2038
0
        return SUCCESS;
2039
0
    }
2040
2041
93
    if (unaryop->i_opcode == UNARY_NOT) {
2042
0
        assert(PyBool_Check(newconst));
2043
0
    }
2044
93
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
2045
93
    return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
2046
93
}
2047
2048
1.96k
#define VISITED (-1)
2049
2050
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
2051
// same effect.
2052
static int
2053
swaptimize(basicblock *block, int *ix)
2054
628
{
2055
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
2056
    // for this function. Make sure to blow away cached *.pyc files first!
2057
628
    assert(*ix < block->b_iused);
2058
628
    cfg_instr *instructions = &block->b_instr[*ix];
2059
    // Find the length of the current sequence of SWAPs and NOPs, and record the
2060
    // maximum depth of the stack manipulations:
2061
628
    assert(instructions[0].i_opcode == SWAP);
2062
628
    int depth = instructions[0].i_oparg;
2063
628
    int len = 0;
2064
628
    int more = false;
2065
628
    int limit = block->b_iused - *ix;
2066
801
    while (++len < limit) {
2067
801
        int opcode = instructions[len].i_opcode;
2068
801
        if (opcode == SWAP) {
2069
151
            depth = Py_MAX(depth, instructions[len].i_oparg);
2070
151
            more = true;
2071
151
        }
2072
650
        else if (opcode != NOP) {
2073
628
            break;
2074
628
        }
2075
801
    }
2076
    // It's already optimal if there's only one SWAP:
2077
628
    if (!more) {
2078
477
        return SUCCESS;
2079
477
    }
2080
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2081
151
    int *stack = PyMem_Malloc(depth * sizeof(int));
2082
151
    if (stack == NULL) {
2083
0
        PyErr_NoMemory();
2084
0
        return ERROR;
2085
0
    }
2086
604
    for (int i = 0; i < depth; i++) {
2087
453
        stack[i] = i;
2088
453
    }
2089
    // Simulate the combined effect of these instructions by "running" them on
2090
    // our "stack":
2091
453
    for (int i = 0; i < len; i++) {
2092
302
        if (instructions[i].i_opcode == SWAP) {
2093
302
            int oparg = instructions[i].i_oparg;
2094
302
            int top = stack[0];
2095
            // SWAPs are 1-indexed:
2096
302
            stack[0] = stack[oparg - 1];
2097
302
            stack[oparg - 1] = top;
2098
302
        }
2099
302
    }
2100
    // Now we can begin! Our approach here is based on a solution to a closely
2101
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2102
    // think of this algorithm as determining the steps needed to efficiently
2103
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2104
    // though, we can efficiently *shuffle* it! For this reason, we will be
2105
    // replacing instructions starting from the *end* of the run. Since the
2106
    // solution is optimal, we don't need to worry about running out of space:
2107
151
    int current = len - 1;
2108
604
    for (int i = 0; i < depth; i++) {
2109
        // Skip items that have already been visited, or just happen to be in
2110
        // the correct location:
2111
453
        if (stack[i] == VISITED || stack[i] == i) {
2112
302
            continue;
2113
302
        }
2114
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2115
        // with other items; traversing the cycle and swapping each item with
2116
        // the next will put them all in the correct place. The weird
2117
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2118
        // can only swap from that position:
2119
151
        int j = i;
2120
604
        while (true) {
2121
            // Skip the actual swap if our item is zero, since swapping the top
2122
            // item with itself is pointless:
2123
604
            if (j) {
2124
302
                assert(0 <= current);
2125
                // SWAPs are 1-indexed:
2126
302
                instructions[current].i_opcode = SWAP;
2127
302
                instructions[current--].i_oparg = j + 1;
2128
302
            }
2129
604
            if (stack[j] == VISITED) {
2130
                // Completed the cycle:
2131
151
                assert(j == i);
2132
151
                break;
2133
151
            }
2134
453
            int next_j = stack[j];
2135
453
            stack[j] = VISITED;
2136
453
            j = next_j;
2137
453
        }
2138
151
    }
2139
    // NOP out any unused instructions:
2140
151
    while (0 <= current) {
2141
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2142
0
    }
2143
151
    PyMem_Free(stack);
2144
151
    *ix += len - 1;
2145
151
    return SUCCESS;
2146
151
}
2147
2148
2149
// This list is pretty small, since it's only okay to reorder opcodes that:
2150
// - can't affect control flow (like jumping or raising exceptions)
2151
// - can't invoke arbitrary code (besides finalizers)
2152
// - only touch the TOS (and pop it when finished)
2153
#define SWAPPABLE(opcode) \
2154
968
    ((opcode) == STORE_FAST || \
2155
968
     (opcode) == STORE_FAST_MAYBE_NULL || \
2156
968
     (opcode) == POP_TOP)
2157
2158
#define STORES_TO(instr) \
2159
182
    (((instr).i_opcode == STORE_FAST || \
2160
182
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2161
182
     ? (instr).i_oparg : -1)
2162
2163
static int
2164
next_swappable_instruction(basicblock *block, int i, int lineno)
2165
926
{
2166
948
    while (++i < block->b_iused) {
2167
943
        cfg_instr *instruction = &block->b_instr[i];
2168
943
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2169
            // Optimizing across this instruction could cause user-visible
2170
            // changes in the names bound between line tracing events!
2171
4
            return -1;
2172
4
        }
2173
939
        if (instruction->i_opcode == NOP) {
2174
22
            continue;
2175
22
        }
2176
917
        if (SWAPPABLE(instruction->i_opcode)) {
2177
382
            return i;
2178
382
        }
2179
535
        return -1;
2180
917
    }
2181
5
    return -1;
2182
926
}
2183
2184
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2185
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2186
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2187
static void
2188
apply_static_swaps(basicblock *block, int i)
2189
628
{
2190
    // SWAPs are to our left, and potential swaperands are to our right:
2191
763
    for (; 0 <= i; i--) {
2192
730
        assert(i < block->b_iused);
2193
730
        cfg_instr *swap = &block->b_instr[i];
2194
730
        if (swap->i_opcode != SWAP) {
2195
102
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2196
                // Nope, but we know how to handle these. Keep looking:
2197
51
                continue;
2198
51
            }
2199
            // We can't reason about what this instruction does. Bail:
2200
51
            return;
2201
102
        }
2202
628
        int j = next_swappable_instruction(block, i, -1);
2203
628
        if (j < 0) {
2204
370
            return;
2205
370
        }
2206
258
        int k = j;
2207
258
        int lineno = block->b_instr[j].i_loc.lineno;
2208
382
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2209
298
            k = next_swappable_instruction(block, k, lineno);
2210
298
            if (k < 0) {
2211
174
                return;
2212
174
            }
2213
298
        }
2214
        // The reordering is not safe if the two instructions to be swapped
2215
        // store to the same location, or if any intervening instruction stores
2216
        // to the same location as either of them.
2217
84
        int store_j = STORES_TO(block->b_instr[j]);
2218
84
        int store_k = STORES_TO(block->b_instr[k]);
2219
84
        if (store_j >= 0 || store_k >= 0) {
2220
84
            if (store_j == store_k) {
2221
0
                return;
2222
0
            }
2223
98
            for (int idx = j + 1; idx < k; idx++) {
2224
14
                int store_idx = STORES_TO(block->b_instr[idx]);
2225
14
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2226
0
                    return;
2227
0
                }
2228
14
            }
2229
84
        }
2230
2231
        // Success!
2232
84
        INSTR_SET_OP0(swap, NOP);
2233
84
        cfg_instr temp = block->b_instr[j];
2234
84
        block->b_instr[j] = block->b_instr[k];
2235
84
        block->b_instr[k] = temp;
2236
84
    }
2237
628
}
2238
2239
static int
2240
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2241
                               PyObject *consts, _Py_hashtable_t *consts_index)
2242
23.8k
{
2243
23.8k
    assert(PyDict_CheckExact(const_cache));
2244
23.8k
    assert(PyList_CheckExact(consts));
2245
23.8k
    int opcode = 0;
2246
23.8k
    int oparg = 0;
2247
228k
    for (int i = 0; i < bb->b_iused; i++) {
2248
204k
        cfg_instr *inst = &bb->b_instr[i];
2249
204k
        if (inst->i_opcode == LOAD_CONST) {
2250
40.7k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2251
40.7k
            if (constant == NULL) {
2252
0
                return ERROR;
2253
0
            }
2254
40.7k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2255
40.7k
            Py_DECREF(constant);
2256
40.7k
            if (res < 0) {
2257
0
                return ERROR;
2258
0
            }
2259
40.7k
        }
2260
204k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2261
26.4k
                                      inst->i_opcode == COPY &&
2262
12
                                      inst->i_oparg == 1);
2263
204k
        if (! is_copy_of_load_const) {
2264
204k
            opcode = inst->i_opcode;
2265
204k
            oparg = inst->i_oparg;
2266
204k
        }
2267
204k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2268
204k
        if (!loads_const(opcode)) {
2269
163k
            continue;
2270
163k
        }
2271
40.9k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2272
40.9k
        switch(nextop) {
2273
26
            case POP_JUMP_IF_FALSE:
2274
26
            case POP_JUMP_IF_TRUE:
2275
26
            case JUMP_IF_FALSE:
2276
26
            case JUMP_IF_TRUE:
2277
26
            {
2278
                /* Remove LOAD_CONST const; conditional jump */
2279
26
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2280
26
                if (cnt == NULL) {
2281
0
                    return ERROR;
2282
0
                }
2283
26
                int is_true = PyObject_IsTrue(cnt);
2284
26
                Py_DECREF(cnt);
2285
26
                if (is_true == -1) {
2286
0
                    return ERROR;
2287
0
                }
2288
26
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2289
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2290
26
                    INSTR_SET_OP0(inst, NOP);
2291
26
                }
2292
26
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2293
26
                if (is_true == jump_if_true) {
2294
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2295
1
                }
2296
25
                else {
2297
25
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2298
25
                }
2299
26
                break;
2300
26
            }
2301
708
            case IS_OP:
2302
708
            {
2303
                // Fold to POP_JUMP_IF_NONE:
2304
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2305
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2306
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2307
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2308
                // Fold to POP_JUMP_IF_NOT_NONE:
2309
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2310
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2311
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2312
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2313
708
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2314
708
                if (cnt == NULL) {
2315
0
                    return ERROR;
2316
0
                }
2317
708
                if (!Py_IsNone(cnt)) {
2318
32
                    Py_DECREF(cnt);
2319
32
                    break;
2320
32
                }
2321
676
                if (bb->b_iused <= i + 2) {
2322
1
                    break;
2323
1
                }
2324
675
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2325
675
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2326
                // Get rid of TO_BOOL regardless:
2327
675
                if (jump_instr->i_opcode == TO_BOOL) {
2328
660
                    INSTR_SET_OP0(jump_instr, NOP);
2329
660
                    if (bb->b_iused <= i + 3) {
2330
0
                        break;
2331
0
                    }
2332
660
                    jump_instr = &bb->b_instr[i + 3];
2333
660
                }
2334
675
                bool invert = is_instr->i_oparg;
2335
675
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2336
627
                    invert = !invert;
2337
627
                }
2338
48
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2339
15
                    break;
2340
15
                }
2341
660
                INSTR_SET_OP0(inst, NOP);
2342
660
                INSTR_SET_OP0(is_instr, NOP);
2343
660
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2344
660
                                              : POP_JUMP_IF_NONE;
2345
660
                break;
2346
675
            }
2347
26
            case TO_BOOL:
2348
26
            {
2349
26
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2350
26
                if (cnt == NULL) {
2351
0
                    return ERROR;
2352
0
                }
2353
26
                int is_true = PyObject_IsTrue(cnt);
2354
26
                Py_DECREF(cnt);
2355
26
                if (is_true == -1) {
2356
0
                    return ERROR;
2357
0
                }
2358
26
                cnt = PyBool_FromLong(is_true);
2359
26
                int index = add_const(cnt, consts, const_cache, consts_index);
2360
26
                if (index < 0) {
2361
0
                    return ERROR;
2362
0
                }
2363
26
                INSTR_SET_OP0(inst, NOP);
2364
26
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2365
26
                break;
2366
26
            }
2367
40.9k
        }
2368
40.9k
        if (inst->i_opcode == LOAD_CONST) {
2369
25.9k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2370
25.9k
            if (constant == NULL) {
2371
0
                return ERROR;
2372
0
            }
2373
25.9k
            int res = maybe_instr_make_load_common_const(inst, constant);
2374
25.9k
            Py_DECREF(constant);
2375
25.9k
            if (res < 0) {
2376
0
                return ERROR;
2377
0
            }
2378
25.9k
        }
2379
40.9k
    }
2380
23.8k
    return SUCCESS;
2381
23.8k
}
2382
2383
static int
2384
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2385
5.38k
                    _Py_hashtable_t *consts_index) {
2386
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2387
23.8k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
2388
23.8k
    }
2389
5.38k
    return SUCCESS;
2390
5.38k
}
2391
2392
static int
2393
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2394
                     _Py_hashtable_t *consts_index)
2395
23.8k
{
2396
23.8k
    assert(PyDict_CheckExact(const_cache));
2397
23.8k
    assert(PyList_CheckExact(consts));
2398
23.8k
    cfg_instr nop;
2399
23.8k
    INSTR_SET_OP0(&nop, NOP);
2400
228k
    for (int i = 0; i < bb->b_iused; i++) {
2401
204k
        cfg_instr *inst = &bb->b_instr[i];
2402
204k
        cfg_instr *target;
2403
204k
        int opcode = inst->i_opcode;
2404
204k
        int oparg = inst->i_oparg;
2405
204k
        if (HAS_TARGET(opcode)) {
2406
9.61k
            assert(inst->i_target->b_iused > 0);
2407
9.61k
            target = &inst->i_target->b_instr[0];
2408
9.61k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2409
9.61k
        }
2410
194k
        else {
2411
194k
            target = &nop;
2412
194k
        }
2413
204k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2414
204k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2415
204k
        switch (opcode) {
2416
            /* Try to fold tuples of constants.
2417
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2418
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2419
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2420
2.44k
            case BUILD_TUPLE:
2421
2.44k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2422
60
                    switch(oparg) {
2423
0
                        case 1:
2424
0
                            INSTR_SET_OP0(inst, NOP);
2425
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2426
0
                            continue;
2427
50
                        case 2:
2428
59
                        case 3:
2429
59
                            INSTR_SET_OP0(inst, NOP);
2430
59
                            bb->b_instr[i+1].i_opcode = SWAP;
2431
59
                            continue;
2432
60
                    }
2433
60
                }
2434
2.38k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
2435
2.38k
                break;
2436
327
            case BUILD_LIST:
2437
486
            case BUILD_SET:
2438
486
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
2439
486
                break;
2440
251
            case POP_JUMP_IF_NOT_NONE:
2441
676
            case POP_JUMP_IF_NONE:
2442
676
                switch (target->i_opcode) {
2443
16
                    case JUMP:
2444
16
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2445
676
                }
2446
676
                break;
2447
3.26k
            case POP_JUMP_IF_FALSE:
2448
3.26k
                switch (target->i_opcode) {
2449
133
                    case JUMP:
2450
133
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2451
3.26k
                }
2452
3.26k
                break;
2453
3.26k
            case POP_JUMP_IF_TRUE:
2454
918
                switch (target->i_opcode) {
2455
37
                    case JUMP:
2456
37
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2457
918
                }
2458
918
                break;
2459
918
            case JUMP_IF_FALSE:
2460
513
                switch (target->i_opcode) {
2461
0
                    case JUMP:
2462
0
                    case JUMP_IF_FALSE:
2463
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2464
0
                        continue;
2465
2
                    case JUMP_IF_TRUE:
2466
                        // No need to check for loops here, a block's b_next
2467
                        // cannot point to itself.
2468
2
                        assert(inst->i_target != inst->i_target->b_next);
2469
2
                        inst->i_target = inst->i_target->b_next;
2470
2
                        i--;
2471
2
                        continue;
2472
513
                }
2473
511
                break;
2474
511
            case JUMP_IF_TRUE:
2475
66
                switch (target->i_opcode) {
2476
0
                    case JUMP:
2477
0
                    case JUMP_IF_TRUE:
2478
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2479
0
                        continue;
2480
0
                    case JUMP_IF_FALSE:
2481
                        // No need to check for loops here, a block's b_next
2482
                        // cannot point to itself.
2483
0
                        assert(inst->i_target != inst->i_target->b_next);
2484
0
                        inst->i_target = inst->i_target->b_next;
2485
0
                        i--;
2486
0
                        continue;
2487
66
                }
2488
66
                break;
2489
877
            case JUMP:
2490
1.88k
            case JUMP_NO_INTERRUPT:
2491
1.88k
                switch (target->i_opcode) {
2492
0
                    case JUMP:
2493
0
                        i -= jump_thread(bb, inst, target, JUMP);
2494
0
                        continue;
2495
0
                    case JUMP_NO_INTERRUPT:
2496
0
                        i -= jump_thread(bb, inst, target, opcode);
2497
0
                        continue;
2498
1.88k
                }
2499
1.88k
                break;
2500
1.88k
            case FOR_ITER:
2501
464
                if (target->i_opcode == JUMP) {
2502
                    /* This will not work now because the jump (at target) could
2503
                     * be forward or backward and FOR_ITER only jumps forward. We
2504
                     * can re-enable this if ever we implement a backward version
2505
                     * of FOR_ITER.
2506
                     */
2507
                    /*
2508
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2509
                    */
2510
0
                }
2511
464
                break;
2512
4.79k
            case STORE_FAST:
2513
4.79k
                if (opcode == nextop &&
2514
398
                    oparg == bb->b_instr[i+1].i_oparg &&
2515
19
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2516
19
                    bb->b_instr[i].i_opcode = POP_TOP;
2517
19
                    bb->b_instr[i].i_oparg = 0;
2518
19
                }
2519
4.79k
                break;
2520
779
            case SWAP:
2521
779
                if (oparg == 1) {
2522
0
                    INSTR_SET_OP0(inst, NOP);
2523
0
                }
2524
779
                break;
2525
7.78k
            case LOAD_GLOBAL:
2526
7.78k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2527
3.00k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2528
3.00k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2529
3.00k
                }
2530
7.78k
                break;
2531
2.28k
            case COMPARE_OP:
2532
2.28k
                if (nextop == TO_BOOL) {
2533
767
                    INSTR_SET_OP0(inst, NOP);
2534
767
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2535
767
                    continue;
2536
767
                }
2537
1.51k
                break;
2538
1.51k
            case CONTAINS_OP:
2539
1.64k
            case IS_OP:
2540
1.64k
                if (nextop == TO_BOOL) {
2541
795
                    INSTR_SET_OP0(inst, NOP);
2542
795
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2543
795
                    continue;
2544
795
                }
2545
850
                if (nextop == UNARY_NOT) {
2546
0
                    INSTR_SET_OP0(inst, NOP);
2547
0
                    int inverted = oparg ^ 1;
2548
0
                    assert(inverted == 0 || inverted == 1);
2549
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2550
0
                    continue;
2551
0
                }
2552
850
                break;
2553
1.94k
            case TO_BOOL:
2554
1.94k
                if (nextop == TO_BOOL) {
2555
0
                    INSTR_SET_OP0(inst, NOP);
2556
0
                    continue;
2557
0
                }
2558
1.94k
                break;
2559
1.94k
            case UNARY_NOT:
2560
17
                if (nextop == TO_BOOL) {
2561
0
                    INSTR_SET_OP0(inst, NOP);
2562
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2563
0
                    continue;
2564
0
                }
2565
17
                if (nextop == UNARY_NOT) {
2566
0
                    INSTR_SET_OP0(inst, NOP);
2567
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2568
0
                    continue;
2569
0
                }
2570
17
                _Py_FALLTHROUGH;
2571
19
            case UNARY_INVERT:
2572
115
            case UNARY_NEGATIVE:
2573
115
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2574
115
                break;
2575
321
            case CALL_INTRINSIC_1:
2576
321
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2577
48
                    if (nextop == GET_ITER) {
2578
0
                        INSTR_SET_OP0(inst, NOP);
2579
0
                    }
2580
48
                    else {
2581
48
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
2582
48
                    }
2583
48
                }
2584
273
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2585
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2586
0
                }
2587
321
                break;
2588
1.59k
            case BINARY_OP:
2589
1.59k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
2590
1.59k
                break;
2591
204k
        }
2592
204k
    }
2593
2594
228k
    for (int i = 0; i < bb->b_iused; i++) {
2595
204k
        cfg_instr *inst = &bb->b_instr[i];
2596
204k
        if (inst->i_opcode == SWAP) {
2597
628
            if (swaptimize(bb, &i) < 0) {
2598
0
                goto error;
2599
0
            }
2600
628
            apply_static_swaps(bb, i);
2601
628
        }
2602
204k
    }
2603
23.8k
    return SUCCESS;
2604
0
error:
2605
0
    return ERROR;
2606
23.8k
}
2607
2608
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2609
2610
static int
2611
remove_redundant_nops_and_jumps(cfg_builder *g)
2612
11.3k
{
2613
11.3k
    int removed_nops, removed_jumps;
2614
12.2k
    do {
2615
        /* Convergence is guaranteed because the number of
2616
         * redundant jumps and nops only decreases.
2617
         */
2618
12.2k
        removed_nops = remove_redundant_nops(g);
2619
12.2k
        RETURN_IF_ERROR(removed_nops);
2620
12.2k
        removed_jumps = remove_redundant_jumps(g);
2621
12.2k
        RETURN_IF_ERROR(removed_jumps);
2622
12.2k
    } while(removed_nops + removed_jumps > 0);
2623
11.3k
    return SUCCESS;
2624
11.3k
}
2625
2626
/* Perform optimizations on a control flow graph.
2627
   The consts object should still be in list form to allow new constants
2628
   to be appended.
2629
2630
   Code trasnformations that reduce code size initially fill the gaps with
2631
   NOPs.  Later those NOPs are removed.
2632
*/
2633
static int
2634
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2635
             _Py_hashtable_t *consts_index, int firstlineno)
2636
5.38k
{
2637
5.38k
    assert(PyDict_CheckExact(const_cache));
2638
5.38k
    RETURN_IF_ERROR(check_cfg(g));
2639
5.38k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2640
5.38k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2641
5.38k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2642
5.38k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
2643
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2644
23.8k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
2645
23.8k
    }
2646
5.38k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2647
5.38k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2648
5.38k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2649
5.38k
    assert(no_redundant_jumps(g));
2650
5.38k
    return SUCCESS;
2651
5.38k
}
2652
2653
static void
2654
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2655
5.69k
{
2656
5.69k
    int32_t line1 = inst1->i_loc.lineno;
2657
5.69k
    int32_t line2 = inst2->i_loc.lineno;
2658
    /* Skip if instructions are on different lines */
2659
5.69k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2660
1.89k
        return;
2661
1.89k
    }
2662
3.80k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2663
207
        return;
2664
207
    }
2665
3.59k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2666
3.59k
    INSTR_SET_OP0(inst2, NOP);
2667
3.59k
}
2668
2669
static int
2670
insert_superinstructions(cfg_builder *g)
2671
5.38k
{
2672
29.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2673
2674
215k
        for (int i = 0; i < b->b_iused; i++) {
2675
191k
            cfg_instr *inst = &b->b_instr[i];
2676
191k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2677
191k
            switch(inst->i_opcode) {
2678
22.0k
                case LOAD_FAST:
2679
22.0k
                    if (nextop == LOAD_FAST) {
2680
3.53k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2681
3.53k
                    }
2682
22.0k
                    break;
2683
4.52k
                case STORE_FAST:
2684
4.52k
                    switch (nextop) {
2685
1.84k
                        case LOAD_FAST:
2686
1.84k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2687
1.84k
                            break;
2688
312
                        case STORE_FAST:
2689
312
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2690
312
                            break;
2691
4.52k
                    }
2692
4.52k
                    break;
2693
191k
            }
2694
191k
        }
2695
23.8k
    }
2696
5.38k
    int res = remove_redundant_nops(g);
2697
5.38k
    assert(no_redundant_nops(g));
2698
5.38k
    return res;
2699
5.38k
}
2700
2701
#define NOT_LOCAL -1
2702
4.38k
#define DUMMY_INSTR -1
2703
2704
typedef struct {
2705
    // Index of instruction that produced the reference or DUMMY_INSTR.
2706
    int instr;
2707
2708
    // The local to which the reference refers or NOT_LOCAL.
2709
    int local;
2710
} ref;
2711
2712
typedef struct {
2713
    ref *refs;
2714
    Py_ssize_t size;
2715
    Py_ssize_t capacity;
2716
} ref_stack;
2717
2718
static int
2719
ref_stack_push(ref_stack *stack, ref r)
2720
152k
{
2721
152k
    if (stack->size == stack->capacity) {
2722
5.38k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2723
5.38k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2724
5.38k
        if (refs == NULL) {
2725
0
            PyErr_NoMemory();
2726
0
            return -1;
2727
0
        }
2728
5.38k
        stack->refs = refs;
2729
5.38k
        stack->capacity = new_cap;
2730
5.38k
    }
2731
152k
    stack->refs[stack->size] = r;
2732
152k
    stack->size++;
2733
152k
    return 0;
2734
152k
}
2735
2736
static ref
2737
ref_stack_pop(ref_stack *stack)
2738
136k
{
2739
136k
    assert(stack->size > 0);
2740
136k
    stack->size--;
2741
136k
    ref r = stack->refs[stack->size];
2742
136k
    return r;
2743
136k
}
2744
2745
static void
2746
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2747
540
{
2748
540
    Py_ssize_t idx = stack->size - off;
2749
540
    assert(idx >= 0 && idx < stack->size);
2750
540
    ref tmp = stack->refs[idx];
2751
540
    stack->refs[idx] = stack->refs[stack->size - 1];
2752
540
    stack->refs[stack->size - 1] = tmp;
2753
540
}
2754
2755
static ref
2756
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2757
20.8k
{
2758
20.8k
    assert(idx >= 0 && idx < stack->size);
2759
20.8k
    return stack->refs[idx];
2760
20.8k
}
2761
2762
static void
2763
ref_stack_clear(ref_stack *stack)
2764
16.6k
{
2765
16.6k
    stack->size = 0;
2766
16.6k
}
2767
2768
static void
2769
ref_stack_fini(ref_stack *stack)
2770
5.38k
{
2771
5.38k
    if (stack->refs != NULL) {
2772
5.38k
        PyMem_Free(stack->refs);
2773
5.38k
    }
2774
5.38k
    stack->refs = NULL;
2775
5.38k
    stack->capacity = 0;
2776
5.38k
    stack->size = 0;
2777
5.38k
}
2778
2779
typedef enum {
2780
    // The loaded reference is still on the stack when the local is killed
2781
    SUPPORT_KILLED  = 1,
2782
    // The loaded reference is stored into a local
2783
    STORED_AS_LOCAL = 2,
2784
    // The loaded reference is still on the stack at the end of the basic block
2785
    REF_UNCONSUMED  = 4,
2786
} LoadFastInstrFlag;
2787
2788
static void
2789
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2790
4.45k
{
2791
7.77k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2792
3.32k
        ref r = ref_stack_at(refs, i);
2793
3.32k
        if (r.local == local) {
2794
6
            assert(r.instr >= 0);
2795
6
            instr_flags[r.instr] |= SUPPORT_KILLED;
2796
6
        }
2797
3.32k
    }
2798
4.45k
}
2799
2800
static void
2801
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2802
4.38k
{
2803
4.38k
    kill_local(instr_flags, refs, local);
2804
4.38k
    if (r.instr != DUMMY_INSTR) {
2805
3.84k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2806
3.84k
    }
2807
4.38k
}
2808
2809
static void
2810
load_fast_push_block(basicblock ***sp, basicblock *target,
2811
                     Py_ssize_t start_depth)
2812
14.5k
{
2813
14.5k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2814
14.5k
    if (!target->b_visited) {
2815
11.2k
        target->b_visited = 1;
2816
11.2k
        *(*sp)++ = target;
2817
11.2k
    }
2818
14.5k
}
2819
2820
/*
2821
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2822
 * load borrowed references onto the operand stack.
2823
 *
2824
 * This is only safe when we can prove that the reference in the frame outlives
2825
 * the borrowed reference produced by the instruction. We make this tractable
2826
 * by enforcing the following lifetimes:
2827
 *
2828
 * 1. Borrowed references loaded onto the operand stack live until the end of
2829
 *    the instruction that consumes them from the stack. Any borrowed
2830
 *    references that would escape into the heap (e.g. into frame objects or
2831
 *    generators) are converted into new, strong references.
2832
 *
2833
 * 2. Locals live until they are either killed by an instruction
2834
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2835
 *    via `f_locals` is added to a tuple owned by the frame object.
2836
 *
2837
 * To simplify the problem of detecting which supporting references in the
2838
 * frame are killed by instructions that overwrite locals, we only allow
2839
 * borrowed references to be stored as a local in the frame if they were passed
2840
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2841
 * strong references.
2842
 *
2843
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2844
 * that meet the following criteria:
2845
 *
2846
 * 1. The produced reference must be consumed from the stack before the
2847
 *    supporting reference in the frame is killed.
2848
 *
2849
 * 2. The produced reference cannot be stored as a local.
2850
 *
2851
 * We use abstract interpretation to identify instructions that meet these
2852
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2853
 * stack of abstract references and note any instructions that violate the
2854
 * criteria above. Once we've processed all the instructions in a block, any
2855
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2856
 */
2857
static int
2858
optimize_load_fast(cfg_builder *g)
2859
5.38k
{
2860
5.38k
    int status;
2861
5.38k
    ref_stack refs = {0};
2862
5.38k
    int max_instrs = 0;
2863
5.38k
    basicblock *entryblock = g->g_entryblock;
2864
29.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2865
24.1k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2866
24.1k
    }
2867
5.38k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2868
5.38k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2869
5.38k
    if (instr_flags == NULL) {
2870
0
        PyErr_NoMemory();
2871
0
        return ERROR;
2872
0
    }
2873
5.38k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2874
5.38k
    if (blocks == NULL) {
2875
0
        status = ERROR;
2876
0
        goto done;
2877
0
    }
2878
5.38k
    basicblock **sp = blocks;
2879
5.38k
    *sp = entryblock;
2880
5.38k
    sp++;
2881
5.38k
    entryblock->b_startdepth = 0;
2882
5.38k
    entryblock->b_visited = 1;
2883
2884
5.38k
    #define PUSH_REF(instr, local)                \
2885
152k
        do {                                      \
2886
152k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2887
0
                status = ERROR;                   \
2888
0
                goto done;                        \
2889
0
            }                                     \
2890
152k
        } while(0)
2891
2892
22.0k
    while (sp != blocks) {
2893
16.6k
        basicblock *block = *--sp;
2894
16.6k
        assert(block->b_startdepth > -1);
2895
2896
        // Reset per-block state.
2897
16.6k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2898
2899
        // Reset the stack of refs. We don't track references on the stack
2900
        // across basic blocks, but the bytecode will expect their
2901
        // presence. Add dummy references as necessary.
2902
16.6k
        ref_stack_clear(&refs);
2903
27.2k
        for (int i = 0; i < block->b_startdepth; i++) {
2904
10.5k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2905
10.5k
        }
2906
2907
198k
        for (int i = 0; i < block->b_iused; i++) {
2908
181k
            cfg_instr *instr = &block->b_instr[i];
2909
181k
            int opcode = instr->i_opcode;
2910
181k
            int oparg = instr->i_oparg;
2911
181k
            assert(opcode != EXTENDED_ARG);
2912
181k
            switch (opcode) {
2913
                // Opcodes that load and store locals
2914
7
                case DELETE_FAST: {
2915
7
                    kill_local(instr_flags, &refs, oparg);
2916
7
                    break;
2917
0
                }
2918
2919
19.3k
                case LOAD_FAST: {
2920
19.3k
                    PUSH_REF(i, oparg);
2921
19.3k
                    break;
2922
19.3k
                }
2923
2924
19.3k
                case LOAD_FAST_AND_CLEAR: {
2925
71
                    kill_local(instr_flags, &refs, oparg);
2926
71
                    PUSH_REF(i, oparg);
2927
71
                    break;
2928
71
                }
2929
2930
3.19k
                case LOAD_FAST_LOAD_FAST: {
2931
3.19k
                    PUSH_REF(i, oparg >> 4);
2932
3.19k
                    PUSH_REF(i, oparg & 15);
2933
3.19k
                    break;
2934
3.19k
                }
2935
2936
3.84k
                case STORE_FAST: {
2937
3.84k
                    ref r = ref_stack_pop(&refs);
2938
3.84k
                    store_local(instr_flags, &refs, oparg, r);
2939
3.84k
                    break;
2940
3.19k
                }
2941
2942
45
                case STORE_FAST_LOAD_FAST: {
2943
                    // STORE_FAST
2944
45
                    ref r = ref_stack_pop(&refs);
2945
45
                    store_local(instr_flags, &refs, oparg >> 4, r);
2946
                    // LOAD_FAST
2947
45
                    PUSH_REF(i, oparg & 15);
2948
45
                    break;
2949
45
                }
2950
2951
247
                case STORE_FAST_STORE_FAST: {
2952
                    // STORE_FAST
2953
247
                    ref r = ref_stack_pop(&refs);
2954
247
                    store_local(instr_flags, &refs, oparg >> 4, r);
2955
                    // STORE_FAST
2956
247
                    r = ref_stack_pop(&refs);
2957
247
                    store_local(instr_flags, &refs, oparg & 15, r);
2958
247
                    break;
2959
45
                }
2960
2961
                // Opcodes that shuffle values on the stack
2962
1.06k
                case COPY: {
2963
1.06k
                    assert(oparg > 0);
2964
1.06k
                    Py_ssize_t idx = refs.size - oparg;
2965
1.06k
                    ref r = ref_stack_at(&refs, idx);
2966
1.06k
                    PUSH_REF(r.instr, r.local);
2967
1.06k
                    break;
2968
1.06k
                }
2969
2970
1.06k
                case SWAP: {
2971
540
                    assert(oparg >= 2);
2972
540
                    ref_stack_swap_top(&refs, oparg);
2973
540
                    break;
2974
1.06k
                }
2975
2976
                // We treat opcodes that do not consume all of their inputs on
2977
                // a case by case basis, as we have no generic way of knowing
2978
                // how many inputs should be left on the stack.
2979
2980
                // Opcodes that consume no inputs
2981
1.45k
                case FORMAT_SIMPLE:
2982
1.45k
                case GET_ANEXT:
2983
1.90k
                case GET_ITER:
2984
1.90k
                case GET_LEN:
2985
2.34k
                case IMPORT_FROM:
2986
2.34k
                case MATCH_KEYS:
2987
2.34k
                case MATCH_MAPPING:
2988
2.34k
                case MATCH_SEQUENCE:
2989
2.34k
                case WITH_EXCEPT_START: {
2990
2.34k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2991
2.34k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2992
2.34k
                    int net_pushed = num_pushed - num_popped;
2993
2.34k
                    assert(net_pushed >= 0);
2994
3.24k
                    for (int j = 0; j < net_pushed; j++) {
2995
896
                        PUSH_REF(i, NOT_LOCAL);
2996
896
                    }
2997
2.34k
                    break;
2998
2.34k
                }
2999
3000
                // Opcodes that consume some inputs and push no new values
3001
2.34k
                case DICT_MERGE:
3002
490
                case DICT_UPDATE:
3003
710
                case LIST_APPEND:
3004
761
                case LIST_EXTEND:
3005
7.52k
                case MAP_ADD:
3006
7.52k
                case RERAISE:
3007
7.59k
                case SET_ADD:
3008
7.59k
                case SET_UPDATE: {
3009
7.59k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3010
7.59k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3011
7.59k
                    int net_popped = num_popped - num_pushed;
3012
7.59k
                    assert(net_popped > 0);
3013
21.9k
                    for (int i = 0; i < net_popped; i++) {
3014
14.3k
                        ref_stack_pop(&refs);
3015
14.3k
                    }
3016
7.59k
                    break;
3017
7.59k
                }
3018
3019
145
                case END_SEND: {
3020
145
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
3021
145
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3022
145
                    ref tos = ref_stack_pop(&refs);
3023
145
                    ref_stack_pop(&refs);
3024
145
                    ref_stack_pop(&refs);
3025
145
                    PUSH_REF(tos.instr, tos.local);
3026
145
                    break;
3027
145
                }
3028
3029
1.66k
                case SET_FUNCTION_ATTRIBUTE: {
3030
1.66k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
3031
1.66k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3032
1.66k
                    ref tos = ref_stack_pop(&refs);
3033
1.66k
                    ref_stack_pop(&refs);
3034
1.66k
                    PUSH_REF(tos.instr, tos.local);
3035
1.66k
                    break;
3036
1.66k
                }
3037
3038
                // Opcodes that consume some inputs and push new values
3039
1.66k
                case CHECK_EXC_MATCH: {
3040
0
                    ref_stack_pop(&refs);
3041
0
                    PUSH_REF(i, NOT_LOCAL);
3042
0
                    break;
3043
0
                }
3044
3045
452
                case FOR_ITER: {
3046
452
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
3047
452
                    PUSH_REF(i, NOT_LOCAL);
3048
452
                    break;
3049
452
                }
3050
3051
14.0k
                case LOAD_ATTR:
3052
14.2k
                case LOAD_SUPER_ATTR: {
3053
14.2k
                    ref self = ref_stack_pop(&refs);
3054
14.2k
                    if (opcode == LOAD_SUPER_ATTR) {
3055
191
                        ref_stack_pop(&refs);
3056
191
                        ref_stack_pop(&refs);
3057
191
                    }
3058
14.2k
                    PUSH_REF(i, NOT_LOCAL);
3059
14.2k
                    if (oparg & 1) {
3060
                        // A method call; conservatively assume that self is pushed
3061
                        // back onto the stack
3062
4.26k
                        PUSH_REF(self.instr, self.local);
3063
4.26k
                    }
3064
14.2k
                    break;
3065
14.2k
                }
3066
3067
14.2k
                case LOAD_SPECIAL:
3068
244
                case PUSH_EXC_INFO: {
3069
244
                    ref tos = ref_stack_pop(&refs);
3070
244
                    PUSH_REF(i, NOT_LOCAL);
3071
244
                    PUSH_REF(tos.instr, tos.local);
3072
244
                    break;
3073
244
                }
3074
3075
244
                case SEND: {
3076
145
                    load_fast_push_block(&sp, instr->i_target, refs.size);
3077
145
                    ref_stack_pop(&refs);
3078
145
                    PUSH_REF(i, NOT_LOCAL);
3079
145
                    break;
3080
145
                }
3081
3082
                // Opcodes that consume all of their inputs
3083
126k
                default: {
3084
126k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3085
126k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3086
126k
                    if (HAS_TARGET(instr->i_opcode)) {
3087
6.04k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
3088
6.04k
                    }
3089
126k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
3090
                        // Block push opcodes only affect the stack when jumping
3091
                        // to the target.
3092
225k
                        for (int j = 0; j < num_popped; j++) {
3093
98.7k
                            ref_stack_pop(&refs);
3094
98.7k
                        }
3095
219k
                        for (int j = 0; j < num_pushed; j++) {
3096
92.9k
                            PUSH_REF(i, NOT_LOCAL);
3097
92.9k
                        }
3098
126k
                    }
3099
126k
                    break;
3100
126k
                }
3101
181k
            }
3102
181k
        }
3103
3104
        // Push fallthrough block
3105
16.6k
        if (BB_HAS_FALLTHROUGH(block)) {
3106
7.90k
            assert(block->b_next != NULL);
3107
7.90k
            load_fast_push_block(&sp, block->b_next, refs.size);
3108
7.90k
        }
3109
3110
        // Mark instructions that produce values that are on the stack at the
3111
        // end of the basic block
3112
33.1k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3113
16.4k
            ref r = ref_stack_at(&refs, i);
3114
16.4k
            if (r.instr != -1) {
3115
9.95k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3116
9.95k
            }
3117
16.4k
        }
3118
3119
        // Optimize instructions
3120
198k
        for (int i = 0; i < block->b_iused; i++) {
3121
181k
            if (!instr_flags[i]) {
3122
168k
                cfg_instr *instr = &block->b_instr[i];
3123
168k
                switch (instr->i_opcode) {
3124
18.8k
                    case LOAD_FAST:
3125
18.8k
                        instr->i_opcode = LOAD_FAST_BORROW;
3126
18.8k
                        break;
3127
3.18k
                    case LOAD_FAST_LOAD_FAST:
3128
3.18k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3129
3.18k
                        break;
3130
146k
                    default:
3131
146k
                        break;
3132
168k
                }
3133
168k
            }
3134
181k
        }
3135
16.6k
    }
3136
3137
5.38k
    #undef PUSH_REF
3138
3139
5.38k
    status = SUCCESS;
3140
3141
5.38k
done:
3142
5.38k
    ref_stack_fini(&refs);
3143
5.38k
    PyMem_Free(instr_flags);
3144
5.38k
    PyMem_Free(blocks);
3145
5.38k
    return status;
3146
5.38k
}
3147
3148
// helper functions for add_checks_for_loads_of_unknown_variables
3149
static inline void
3150
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3151
79.7k
{
3152
    // Push b if the unsafe mask is giving us any new information.
3153
    // To avoid overflowing the stack, only allow each block once.
3154
    // Use b->b_visited=1 to mean that b is currently on the stack.
3155
79.7k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3156
79.7k
    if (b->b_unsafe_locals_mask != both) {
3157
9.41k
        b->b_unsafe_locals_mask = both;
3158
        // More work left to do.
3159
9.41k
        if (!b->b_visited) {
3160
            // not on the stack, so push it.
3161
9.34k
            *(*sp)++ = b;
3162
9.34k
            b->b_visited = 1;
3163
9.34k
        }
3164
9.41k
    }
3165
79.7k
}
3166
3167
static void
3168
scan_block_for_locals(basicblock *b, basicblock ***sp)
3169
30.3k
{
3170
    // bit i is set if local i is potentially uninitialized
3171
30.3k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3172
225k
    for (int i = 0; i < b->b_iused; i++) {
3173
194k
        cfg_instr *instr = &b->b_instr[i];
3174
194k
        assert(instr->i_opcode != EXTENDED_ARG);
3175
194k
        if (instr->i_except != NULL) {
3176
49.5k
            maybe_push(instr->i_except, unsafe_mask, sp);
3177
49.5k
        }
3178
194k
        if (instr->i_oparg >= 64) {
3179
9.94k
            continue;
3180
9.94k
        }
3181
194k
        assert(instr->i_oparg >= 0);
3182
184k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3183
184k
        switch (instr->i_opcode) {
3184
399
            case DELETE_FAST:
3185
541
            case LOAD_FAST_AND_CLEAR:
3186
825
            case STORE_FAST_MAYBE_NULL:
3187
825
                unsafe_mask |= bit;
3188
825
                break;
3189
9.23k
            case STORE_FAST:
3190
9.23k
                unsafe_mask &= ~bit;
3191
9.23k
                break;
3192
43
            case LOAD_FAST_CHECK:
3193
                // If this doesn't raise, then the local is defined.
3194
43
                unsafe_mask &= ~bit;
3195
43
                break;
3196
37.6k
            case LOAD_FAST:
3197
37.6k
                if (unsafe_mask & bit) {
3198
43
                    instr->i_opcode = LOAD_FAST_CHECK;
3199
43
                }
3200
37.6k
                unsafe_mask &= ~bit;
3201
37.6k
                break;
3202
184k
        }
3203
184k
    }
3204
30.3k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3205
14.3k
        maybe_push(b->b_next, unsafe_mask, sp);
3206
14.3k
    }
3207
30.3k
    cfg_instr *last = basicblock_last_instr(b);
3208
30.3k
    if (last && is_jump(last)) {
3209
12.2k
        assert(last->i_target != NULL);
3210
12.2k
        maybe_push(last->i_target, unsafe_mask, sp);
3211
12.2k
    }
3212
30.3k
}
3213
3214
static int
3215
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3216
0
{
3217
0
    assert(nlocals > 64);
3218
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3219
0
    if (states == NULL) {
3220
0
        PyErr_NoMemory();
3221
0
        return ERROR;
3222
0
    }
3223
0
    Py_ssize_t blocknum = 0;
3224
    // state[i - 64] == blocknum if local i is guaranteed to
3225
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3226
    // STORE_FAST within that basicblock (not followed by
3227
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3228
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3229
0
        blocknum++;
3230
0
        for (int i = 0; i < b->b_iused; i++) {
3231
0
            cfg_instr *instr = &b->b_instr[i];
3232
0
            assert(instr->i_opcode != EXTENDED_ARG);
3233
0
            int arg = instr->i_oparg;
3234
0
            if (arg < 64) {
3235
0
                continue;
3236
0
            }
3237
0
            assert(arg >= 0);
3238
0
            switch (instr->i_opcode) {
3239
0
                case DELETE_FAST:
3240
0
                case LOAD_FAST_AND_CLEAR:
3241
0
                case STORE_FAST_MAYBE_NULL:
3242
0
                    states[arg - 64] = blocknum - 1;
3243
0
                    break;
3244
0
                case STORE_FAST:
3245
0
                    states[arg - 64] = blocknum;
3246
0
                    break;
3247
0
                case LOAD_FAST:
3248
0
                    if (states[arg - 64] != blocknum) {
3249
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3250
0
                    }
3251
0
                    states[arg - 64] = blocknum;
3252
0
                    break;
3253
0
                    Py_UNREACHABLE();
3254
0
            }
3255
0
        }
3256
0
    }
3257
0
    PyMem_Free(states);
3258
0
    return SUCCESS;
3259
0
}
3260
3261
static int
3262
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3263
5.38k
{
3264
5.38k
    assert(PyList_CheckExact(consts));
3265
5.38k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3266
5.38k
    if (nconsts == 0) {
3267
32
        return SUCCESS;  /* nothing to do */
3268
32
    }
3269
3270
5.34k
    Py_ssize_t *index_map = NULL;
3271
5.34k
    Py_ssize_t *reverse_index_map = NULL;
3272
5.34k
    int err = ERROR;
3273
3274
5.34k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3275
5.34k
    if (index_map == NULL) {
3276
0
        goto end;
3277
0
    }
3278
30.5k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3279
25.1k
        index_map[i] = -1;
3280
25.1k
    }
3281
    // The first constant may be docstring; keep it always.
3282
5.34k
    index_map[0] = 0;
3283
3284
    /* mark used consts */
3285
29.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3286
215k
        for (int i = 0; i < b->b_iused; i++) {
3287
191k
            int opcode = b->b_instr[i].i_opcode;
3288
191k
            if (OPCODE_HAS_CONST(opcode)) {
3289
18.1k
                int index = b->b_instr[i].i_oparg;
3290
18.1k
                index_map[index] = index;
3291
18.1k
            }
3292
191k
        }
3293
23.8k
    }
3294
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3295
    /* condense consts */
3296
5.34k
    Py_ssize_t n_used_consts = 0;
3297
35.8k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3298
30.5k
        if (index_map[i] != -1) {
3299
18.7k
            assert(index_map[i] == i);
3300
18.7k
            index_map[n_used_consts++] = index_map[i];
3301
18.7k
        }
3302
30.5k
    }
3303
5.34k
    if (n_used_consts == nconsts) {
3304
        /* nothing to do */
3305
1.40k
        err = SUCCESS;
3306
1.40k
        goto end;
3307
1.40k
    }
3308
3309
    /* move all used consts to the beginning of the consts list */
3310
5.34k
    assert(n_used_consts < nconsts);
3311
21.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3312
17.1k
        Py_ssize_t old_index = index_map[i];
3313
17.1k
        assert(i <= old_index && old_index < nconsts);
3314
17.1k
        if (i != old_index) {
3315
9.33k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3316
9.33k
            assert(value != NULL);
3317
9.33k
            PyList_SetItem(consts, i, Py_NewRef(value));
3318
9.33k
        }
3319
17.1k
    }
3320
3321
    /* truncate the consts list at its new size */
3322
3.94k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3323
0
        goto end;
3324
0
    }
3325
    /* adjust const indices in the bytecode */
3326
3.94k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3327
3.94k
    if (reverse_index_map == NULL) {
3328
0
        goto end;
3329
0
    }
3330
32.8k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3331
28.9k
        reverse_index_map[i] = -1;
3332
28.9k
    }
3333
21.0k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3334
17.1k
        assert(index_map[i] != -1);
3335
17.1k
        assert(reverse_index_map[index_map[i]] == -1);
3336
17.1k
        reverse_index_map[index_map[i]] = i;
3337
17.1k
    }
3338
3339
23.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3340
189k
        for (int i = 0; i < b->b_iused; i++) {
3341
170k
            int opcode = b->b_instr[i].i_opcode;
3342
170k
            if (OPCODE_HAS_CONST(opcode)) {
3343
17.7k
                int index = b->b_instr[i].i_oparg;
3344
17.7k
                assert(reverse_index_map[index] >= 0);
3345
17.7k
                assert(reverse_index_map[index] < n_used_consts);
3346
17.7k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3347
17.7k
            }
3348
170k
        }
3349
19.5k
    }
3350
3351
3.94k
    err = SUCCESS;
3352
5.34k
end:
3353
5.34k
    PyMem_Free(index_map);
3354
5.34k
    PyMem_Free(reverse_index_map);
3355
5.34k
    return err;
3356
3.94k
}
3357
3358
3359
3360
static int
3361
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3362
                                                int nlocals,
3363
                                                int nparams)
3364
5.38k
{
3365
5.38k
    if (nlocals == 0) {
3366
1.76k
        return SUCCESS;
3367
1.76k
    }
3368
3.61k
    if (nlocals > 64) {
3369
        // To avoid O(nlocals**2) compilation, locals beyond the first
3370
        // 64 are only analyzed one basicblock at a time: initialization
3371
        // info is not passed between basicblocks.
3372
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3373
0
            return ERROR;
3374
0
        }
3375
0
        nlocals = 64;
3376
0
    }
3377
3.61k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3378
3.61k
    if (stack == NULL) {
3379
0
        return ERROR;
3380
0
    }
3381
3.61k
    basicblock **sp = stack;
3382
3383
    // First origin of being uninitialized:
3384
    // The non-parameter locals in the entry block.
3385
3.61k
    uint64_t start_mask = 0;
3386
6.87k
    for (int i = nparams; i < nlocals; i++) {
3387
3.25k
        start_mask |= (uint64_t)1 << i;
3388
3.25k
    }
3389
3.61k
    maybe_push(entryblock, start_mask, &sp);
3390
3391
    // Second origin of being uninitialized:
3392
    // There could be DELETE_FAST somewhere, so
3393
    // be sure to scan each basicblock at least once.
3394
24.6k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3395
20.9k
        scan_block_for_locals(b, &sp);
3396
20.9k
    }
3397
    // Now propagate the uncertainty from the origins we found: Use
3398
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3399
12.9k
    while (sp > stack) {
3400
9.34k
        basicblock *b = *--sp;
3401
        // mark as no longer on stack
3402
9.34k
        b->b_visited = 0;
3403
9.34k
        scan_block_for_locals(b, &sp);
3404
9.34k
    }
3405
3.61k
    PyMem_Free(stack);
3406
3.61k
    return SUCCESS;
3407
3.61k
}
3408
3409
3410
static int
3411
4.18k
mark_warm(basicblock *entryblock) {
3412
4.18k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3413
4.18k
    if (stack == NULL) {
3414
0
        return ERROR;
3415
0
    }
3416
4.18k
    basicblock **sp = stack;
3417
3418
4.18k
    *sp++ = entryblock;
3419
4.18k
    entryblock->b_visited = 1;
3420
19.2k
    while (sp > stack) {
3421
15.1k
        basicblock *b = *(--sp);
3422
15.1k
        assert(!b->b_except_handler);
3423
15.1k
        b->b_warm = 1;
3424
15.1k
        basicblock *next = b->b_next;
3425
15.1k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3426
6.43k
            *sp++ = next;
3427
6.43k
            next->b_visited = 1;
3428
6.43k
        }
3429
144k
        for (int i=0; i < b->b_iused; i++) {
3430
129k
            cfg_instr *instr = &b->b_instr[i];
3431
129k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3432
4.49k
                *sp++ = instr->i_target;
3433
4.49k
                instr->i_target->b_visited = 1;
3434
4.49k
            }
3435
129k
        }
3436
15.1k
    }
3437
4.18k
    PyMem_Free(stack);
3438
4.18k
    return SUCCESS;
3439
4.18k
}
3440
3441
static int
3442
4.18k
mark_cold(basicblock *entryblock) {
3443
26.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3444
22.6k
        assert(!b->b_cold && !b->b_warm);
3445
22.6k
    }
3446
4.18k
    if (mark_warm(entryblock) < 0) {
3447
0
        return ERROR;
3448
0
    }
3449
3450
4.18k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3451
4.18k
    if (stack == NULL) {
3452
0
        return ERROR;
3453
0
    }
3454
3455
4.18k
    basicblock **sp = stack;
3456
26.8k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3457
22.6k
        if (b->b_except_handler) {
3458
1.66k
            assert(!b->b_warm);
3459
1.66k
            *sp++ = b;
3460
1.66k
            b->b_visited = 1;
3461
1.66k
        }
3462
22.6k
    }
3463
3464
7.26k
    while (sp > stack) {
3465
3.08k
        basicblock *b = *(--sp);
3466
3.08k
        b->b_cold = 1;
3467
3.08k
        basicblock *next = b->b_next;
3468
3.08k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3469
928
            if (!next->b_warm && !next->b_visited) {
3470
749
                *sp++ = next;
3471
749
                next->b_visited = 1;
3472
749
            }
3473
928
        }
3474
16.1k
        for (int i = 0; i < b->b_iused; i++) {
3475
13.0k
            cfg_instr *instr = &b->b_instr[i];
3476
13.0k
            if (is_jump(instr)) {
3477
910
                assert(i == b->b_iused - 1);
3478
910
                basicblock *target = b->b_instr[i].i_target;
3479
910
                if (!target->b_warm && !target->b_visited) {
3480
670
                    *sp++ = target;
3481
670
                    target->b_visited = 1;
3482
670
                }
3483
910
            }
3484
13.0k
        }
3485
3.08k
    }
3486
4.18k
    PyMem_Free(stack);
3487
4.18k
    return SUCCESS;
3488
4.18k
}
3489
3490
3491
static int
3492
5.38k
push_cold_blocks_to_end(cfg_builder *g) {
3493
5.38k
    basicblock *entryblock = g->g_entryblock;
3494
5.38k
    if (entryblock->b_next == NULL) {
3495
        /* single basicblock, no need to reorder */
3496
1.20k
        return SUCCESS;
3497
1.20k
    }
3498
4.18k
    RETURN_IF_ERROR(mark_cold(entryblock));
3499
3500
4.18k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3501
3502
    /* If we have a cold block with fallthrough to a warm block, add */
3503
    /* an explicit jump instead of fallthrough */
3504
26.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3505
22.7k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3506
145
            basicblock *explicit_jump = cfg_builder_new_block(g);
3507
145
            if (explicit_jump == NULL) {
3508
0
                return ERROR;
3509
0
            }
3510
145
            if (!IS_LABEL(b->b_next->b_label)) {
3511
0
                b->b_next->b_label.id = next_lbl++;
3512
0
            }
3513
145
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3514
145
                             NO_LOCATION);
3515
145
            explicit_jump->b_cold = 1;
3516
145
            explicit_jump->b_next = b->b_next;
3517
145
            explicit_jump->b_predecessors = 1;
3518
145
            b->b_next = explicit_jump;
3519
3520
            /* set target */
3521
145
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3522
145
            last->i_target = explicit_jump->b_next;
3523
145
        }
3524
22.7k
    }
3525
3526
4.18k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3527
4.18k
    basicblock *cold_blocks = NULL;
3528
4.18k
    basicblock *cold_blocks_tail = NULL;
3529
3530
4.18k
    basicblock *b = entryblock;
3531
5.42k
    while(b->b_next) {
3532
5.42k
        assert(!b->b_cold);
3533
20.7k
        while (b->b_next && !b->b_next->b_cold) {
3534
15.3k
            b = b->b_next;
3535
15.3k
        }
3536
5.42k
        if (b->b_next == NULL) {
3537
            /* no more cold blocks */
3538
4.18k
            break;
3539
4.18k
        }
3540
3541
        /* b->b_next is the beginning of a cold streak */
3542
5.42k
        assert(!b->b_cold && b->b_next->b_cold);
3543
3544
1.24k
        basicblock *b_end = b->b_next;
3545
3.22k
        while (b_end->b_next && b_end->b_next->b_cold) {
3546
1.98k
            b_end = b_end->b_next;
3547
1.98k
        }
3548
3549
        /* b_end is the end of the cold streak */
3550
1.24k
        assert(b_end && b_end->b_cold);
3551
1.24k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3552
3553
1.24k
        if (cold_blocks == NULL) {
3554
593
            cold_blocks = b->b_next;
3555
593
        }
3556
649
        else {
3557
649
            cold_blocks_tail->b_next = b->b_next;
3558
649
        }
3559
1.24k
        cold_blocks_tail = b_end;
3560
1.24k
        b->b_next = b_end->b_next;
3561
1.24k
        b_end->b_next = NULL;
3562
1.24k
    }
3563
4.18k
    assert(b != NULL && b->b_next == NULL);
3564
4.18k
    b->b_next = cold_blocks;
3565
3566
4.18k
    if (cold_blocks != NULL) {
3567
593
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3568
593
    }
3569
4.18k
    return SUCCESS;
3570
4.18k
}
3571
3572
static int
3573
convert_pseudo_conditional_jumps(cfg_builder *g)
3574
5.38k
{
3575
5.38k
    basicblock *entryblock = g->g_entryblock;
3576
29.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3577
211k
        for (int i = 0; i < b->b_iused; i++) {
3578
187k
            cfg_instr *instr = &b->b_instr[i];
3579
187k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3580
577
                assert(i == b->b_iused - 1);
3581
577
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3582
511
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3583
577
                location loc = instr->i_loc;
3584
577
                basicblock *except = instr->i_except;
3585
577
                cfg_instr copy = {
3586
577
                            .i_opcode = COPY,
3587
577
                            .i_oparg = 1,
3588
577
                            .i_loc = loc,
3589
577
                            .i_target = NULL,
3590
577
                            .i_except = except,
3591
577
                };
3592
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3593
577
                cfg_instr to_bool = {
3594
577
                            .i_opcode = TO_BOOL,
3595
577
                            .i_oparg = 0,
3596
577
                            .i_loc = loc,
3597
577
                            .i_target = NULL,
3598
577
                            .i_except = except,
3599
577
                };
3600
577
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3601
577
            }
3602
187k
        }
3603
23.9k
    }
3604
5.38k
    return SUCCESS;
3605
5.38k
}
3606
3607
static int
3608
convert_pseudo_ops(cfg_builder *g)
3609
5.38k
{
3610
5.38k
    basicblock *entryblock = g->g_entryblock;
3611
29.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3612
214k
        for (int i = 0; i < b->b_iused; i++) {
3613
190k
            cfg_instr *instr = &b->b_instr[i];
3614
190k
            if (is_block_push(instr)) {
3615
1.66k
                INSTR_SET_OP0(instr, NOP);
3616
1.66k
            }
3617
188k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3618
1.38k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3619
1.38k
                instr->i_opcode = LOAD_FAST;
3620
1.38k
            }
3621
187k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3622
142
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3623
142
                instr->i_opcode = STORE_FAST;
3624
142
            }
3625
190k
        }
3626
23.9k
    }
3627
5.38k
    return remove_redundant_nops_and_jumps(g);
3628
5.38k
}
3629
3630
static inline bool
3631
32.4k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3632
32.4k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3633
19.5k
        return basicblock_has_no_lineno(b);
3634
19.5k
    }
3635
12.9k
    else {
3636
12.9k
        return false;
3637
12.9k
    }
3638
32.4k
}
3639
3640
3641
/* PEP 626 mandates that the f_lineno of a frame is correct
3642
 * after a frame terminates. It would be prohibitively expensive
3643
 * to continuously update the f_lineno field at runtime,
3644
 * so we make sure that all exiting instruction (raises and returns)
3645
 * have a valid line number, allowing us to compute f_lineno lazily.
3646
 * We can do this by duplicating the exit blocks without line number
3647
 * so that none have more than one predecessor. We can then safely
3648
 * copy the line number from the sole predecessor block.
3649
 */
3650
static int
3651
duplicate_exits_without_lineno(cfg_builder *g)
3652
10.7k
{
3653
10.7k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3654
3655
    /* Copy all exit blocks without line number that are targets of a jump.
3656
     */
3657
10.7k
    basicblock *entryblock = g->g_entryblock;
3658
58.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3659
47.8k
        cfg_instr *last = basicblock_last_instr(b);
3660
47.8k
        if (last == NULL) {
3661
8.74k
            continue;
3662
8.74k
        }
3663
39.0k
        if (is_jump(last)) {
3664
15.2k
            basicblock *target = next_nonempty_block(last->i_target);
3665
15.2k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3666
382
                basicblock *new_target = copy_basicblock(g, target);
3667
382
                if (new_target == NULL) {
3668
0
                    return ERROR;
3669
0
                }
3670
382
                new_target->b_instr[0].i_loc = last->i_loc;
3671
382
                last->i_target = new_target;
3672
382
                target->b_predecessors--;
3673
382
                new_target->b_predecessors = 1;
3674
382
                new_target->b_next = target->b_next;
3675
382
                new_target->b_label.id = next_lbl++;
3676
382
                target->b_next = new_target;
3677
382
            }
3678
15.2k
        }
3679
39.0k
    }
3680
3681
    /* Any remaining reachable exit blocks without line number can only be reached by
3682
     * fall through, and thus can only have a single predecessor */
3683
58.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3684
47.8k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3685
17.1k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3686
522
                cfg_instr *last = basicblock_last_instr(b);
3687
522
                assert(last != NULL);
3688
522
                b->b_next->b_instr[0].i_loc = last->i_loc;
3689
522
            }
3690
17.1k
        }
3691
47.8k
    }
3692
10.7k
    return SUCCESS;
3693
10.7k
}
3694
3695
3696
/* If an instruction has no line number, but it's predecessor in the BB does,
3697
 * then copy the line number. If a successor block has no line number, and only
3698
 * one predecessor, then inherit the line number.
3699
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3700
 * Also reduces the size of the line number table,
3701
 * but has no impact on the generated line number events.
3702
 */
3703
3704
static inline void
3705
maybe_propagate_location(basicblock *b, int i, location loc)
3706
411k
{
3707
411k
    assert(b->b_iused > i);
3708
411k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3709
26.5k
         b->b_instr[i].i_loc = loc;
3710
26.5k
    }
3711
411k
}
3712
3713
static void
3714
propagate_line_numbers(basicblock *entryblock)
3715
10.7k
{
3716
58.5k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3717
47.8k
        cfg_instr *last = basicblock_last_instr(b);
3718
47.8k
        if (last == NULL) {
3719
8.74k
            continue;
3720
8.74k
        }
3721
3722
39.0k
        location prev_location = NO_LOCATION;
3723
431k
        for (int i = 0; i < b->b_iused; i++) {
3724
391k
            maybe_propagate_location(b, i, prev_location);
3725
391k
            prev_location = b->b_instr[i].i_loc;
3726
391k
        }
3727
39.0k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3728
12.4k
            if (b->b_next->b_iused > 0) {
3729
12.4k
                maybe_propagate_location(b->b_next, 0, prev_location);
3730
12.4k
            }
3731
12.4k
        }
3732
39.0k
        if (is_jump(last)) {
3733
15.2k
            basicblock *target = last->i_target;
3734
15.2k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3735
1
                target = target->b_next;
3736
1
            }
3737
15.2k
            if (target->b_predecessors == 1) {
3738
7.07k
                maybe_propagate_location(target, 0, prev_location);
3739
7.07k
            }
3740
15.2k
        }
3741
39.0k
    }
3742
10.7k
}
3743
3744
static int
3745
resolve_line_numbers(cfg_builder *g, int firstlineno)
3746
10.7k
{
3747
10.7k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3748
10.7k
    propagate_line_numbers(g->g_entryblock);
3749
10.7k
    return SUCCESS;
3750
10.7k
}
3751
3752
int
3753
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3754
                        int nlocals, int nparams, int firstlineno)
3755
5.38k
{
3756
5.38k
    assert(cfg_builder_check(g));
3757
    /** Preprocessing **/
3758
    /* Map labels to targets and mark exception handlers */
3759
5.38k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3760
5.38k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3761
5.38k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3762
3763
    /** Optimization **/
3764
3765
5.38k
    _Py_hashtable_t *consts_index = _Py_hashtable_new(
3766
5.38k
        _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3767
5.38k
    if (consts_index == NULL) {
3768
0
        PyErr_NoMemory();
3769
0
        return ERROR;
3770
0
    }
3771
3772
35.1k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3773
29.7k
        PyObject *item = PyList_GET_ITEM(consts, i);
3774
29.7k
        if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3775
0
            continue;
3776
0
        }
3777
29.7k
        if (_Py_hashtable_set(consts_index, (void *)item,
3778
29.7k
                              (void *)(uintptr_t)i) < 0) {
3779
0
            _Py_hashtable_destroy(consts_index);
3780
0
            PyErr_NoMemory();
3781
0
            return ERROR;
3782
0
        }
3783
29.7k
    }
3784
3785
5.38k
    int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3786
3787
5.38k
    _Py_hashtable_destroy(consts_index);
3788
3789
5.38k
    RETURN_IF_ERROR(ret);
3790
3791
5.38k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3792
5.38k
    RETURN_IF_ERROR(
3793
5.38k
        add_checks_for_loads_of_uninitialized_variables(
3794
5.38k
            g->g_entryblock, nlocals, nparams));
3795
5.38k
    RETURN_IF_ERROR(insert_superinstructions(g));
3796
3797
5.38k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3798
5.38k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3799
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3800
    // assert(all_exits_have_lineno(g->g_entryblock));
3801
5.38k
    return SUCCESS;
3802
5.38k
}
3803
3804
static int *
3805
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3806
5.38k
{
3807
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3808
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3809
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3810
3811
5.38k
    int noffsets = ncellvars + nfreevars;
3812
5.38k
    int *fixed = PyMem_New(int, noffsets);
3813
5.38k
    if (fixed == NULL) {
3814
0
        PyErr_NoMemory();
3815
0
        return NULL;
3816
0
    }
3817
7.36k
    for (int i = 0; i < noffsets; i++) {
3818
1.98k
        fixed[i] = nlocals + i;
3819
1.98k
    }
3820
3821
5.38k
    PyObject *varname, *cellindex;
3822
5.38k
    Py_ssize_t pos = 0;
3823
6.45k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3824
1.07k
        PyObject *varindex;
3825
1.07k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3826
0
            goto error;
3827
0
        }
3828
1.07k
        if (varindex == NULL) {
3829
556
            continue;
3830
556
        }
3831
3832
516
        int argoffset = PyLong_AsInt(varindex);
3833
516
        Py_DECREF(varindex);
3834
516
        if (argoffset == -1 && PyErr_Occurred()) {
3835
0
            goto error;
3836
0
        }
3837
3838
516
        int oldindex = PyLong_AsInt(cellindex);
3839
516
        if (oldindex == -1 && PyErr_Occurred()) {
3840
0
            goto error;
3841
0
        }
3842
516
        fixed[oldindex] = argoffset;
3843
516
    }
3844
5.38k
    return fixed;
3845
3846
0
error:
3847
0
    PyMem_Free(fixed);
3848
0
    return NULL;
3849
5.38k
}
3850
3851
#define IS_GENERATOR(CF) \
3852
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3853
3854
static int
3855
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3856
                           int *fixed, int nfreevars)
3857
5.38k
{
3858
5.38k
    assert(umd->u_firstlineno > 0);
3859
3860
    /* Set up cells for any variable that escapes, to be put in a closure. */
3861
5.38k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3862
5.38k
    if (ncellvars) {
3863
        // umd->u_cellvars has the cells out of order so we sort them
3864
        // before adding the MAKE_CELL instructions.  Note that we
3865
        // adjust for arg cells, which come first.
3866
770
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3867
770
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3868
770
        if (sorted == NULL) {
3869
0
            PyErr_NoMemory();
3870
0
            return ERROR;
3871
0
        }
3872
1.84k
        for (int i = 0; i < ncellvars; i++) {
3873
1.07k
            sorted[fixed[i]] = i + 1;
3874
1.07k
        }
3875
2.38k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3876
1.61k
            int oldindex = sorted[i] - 1;
3877
1.61k
            if (oldindex == -1) {
3878
546
                continue;
3879
546
            }
3880
1.07k
            cfg_instr make_cell = {
3881
1.07k
                .i_opcode = MAKE_CELL,
3882
                // This will get fixed in offset_derefs().
3883
1.07k
                .i_oparg = oldindex,
3884
1.07k
                .i_loc = NO_LOCATION,
3885
1.07k
                .i_target = NULL,
3886
1.07k
                .i_except = NULL,
3887
1.07k
            };
3888
1.07k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3889
0
                PyMem_RawFree(sorted);
3890
0
                return ERROR;
3891
0
            }
3892
1.07k
            ncellsused += 1;
3893
1.07k
        }
3894
770
        PyMem_RawFree(sorted);
3895
770
    }
3896
3897
5.38k
    if (nfreevars) {
3898
644
        cfg_instr copy_frees = {
3899
644
            .i_opcode = COPY_FREE_VARS,
3900
644
            .i_oparg = nfreevars,
3901
644
            .i_loc = NO_LOCATION,
3902
644
            .i_target = NULL,
3903
644
            .i_except = NULL,
3904
644
        };
3905
644
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3906
644
    }
3907
3908
5.38k
    return SUCCESS;
3909
5.38k
}
3910
3911
static int
3912
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3913
5.38k
{
3914
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3915
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3916
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3917
5.38k
    int noffsets = ncellvars + nfreevars;
3918
3919
    // First deal with duplicates (arg cells).
3920
5.38k
    int numdropped = 0;
3921
7.36k
    for (int i = 0; i < noffsets ; i++) {
3922
1.98k
        if (fixedmap[i] == i + nlocals) {
3923
1.47k
            fixedmap[i] -= numdropped;
3924
1.47k
        }
3925
516
        else {
3926
            // It was a duplicate (cell/arg).
3927
516
            numdropped += 1;
3928
516
        }
3929
1.98k
    }
3930
3931
    // Then update offsets, either relative to locals or by cell2arg.
3932
29.3k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3933
214k
        for (int i = 0; i < b->b_iused; i++) {
3934
190k
            cfg_instr *inst = &b->b_instr[i];
3935
            // This is called before extended args are generated.
3936
190k
            assert(inst->i_opcode != EXTENDED_ARG);
3937
190k
            int oldoffset = inst->i_oparg;
3938
190k
            switch(inst->i_opcode) {
3939
1.07k
                case MAKE_CELL:
3940
2.45k
                case LOAD_CLOSURE:
3941
4.64k
                case LOAD_DEREF:
3942
5.19k
                case STORE_DEREF:
3943
5.19k
                case DELETE_DEREF:
3944
5.20k
                case LOAD_FROM_DICT_OR_DEREF:
3945
5.20k
                    assert(oldoffset >= 0);
3946
5.20k
                    assert(oldoffset < noffsets);
3947
5.20k
                    assert(fixedmap[oldoffset] >= 0);
3948
5.20k
                    inst->i_oparg = fixedmap[oldoffset];
3949
190k
            }
3950
190k
        }
3951
23.9k
    }
3952
3953
5.38k
    return numdropped;
3954
5.38k
}
3955
3956
static int
3957
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3958
5.38k
{
3959
5.38k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3960
5.38k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3961
5.38k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3962
5.38k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3963
5.38k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3964
5.38k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3965
5.38k
    assert(INT_MAX - nlocals - ncellvars > 0);
3966
5.38k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3967
5.38k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3968
5.38k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3969
5.38k
    if (cellfixedoffsets == NULL) {
3970
0
        return ERROR;
3971
0
    }
3972
3973
    // This must be called before fix_cell_offsets().
3974
5.38k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3975
0
        PyMem_Free(cellfixedoffsets);
3976
0
        return ERROR;
3977
0
    }
3978
3979
5.38k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3980
5.38k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3981
5.38k
    cellfixedoffsets = NULL;
3982
5.38k
    if (numdropped < 0) {
3983
0
        return ERROR;
3984
0
    }
3985
3986
5.38k
    nlocalsplus -= numdropped;
3987
5.38k
    return nlocalsplus;
3988
5.38k
}
3989
3990
cfg_builder *
3991
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3992
5.38k
{
3993
5.38k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3994
0
        return NULL;
3995
0
    }
3996
5.38k
    cfg_builder *g = _PyCfgBuilder_New();
3997
5.38k
    if (g == NULL) {
3998
0
        return NULL;
3999
0
    }
4000
217k
    for (int i = 0; i < seq->s_used; i++) {
4001
211k
        seq->s_instrs[i].i_target = 0;
4002
211k
    }
4003
217k
    for (int i = 0; i < seq->s_used; i++) {
4004
211k
        _PyInstruction *instr = &seq->s_instrs[i];
4005
211k
        if (HAS_TARGET(instr->i_opcode)) {
4006
10.4k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
4007
10.4k
            seq->s_instrs[instr->i_oparg].i_target = 1;
4008
10.4k
        }
4009
211k
    }
4010
5.38k
    int offset = 0;
4011
217k
    for (int i = 0; i < seq->s_used; i++) {
4012
211k
        _PyInstruction *instr = &seq->s_instrs[i];
4013
211k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
4014
798
            if (seq->s_annotations_code != NULL) {
4015
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
4016
1
                    && seq->s_annotations_code->s_nested == NULL);
4017
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
4018
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
4019
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
4020
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
4021
0
                        goto error;
4022
0
                    }
4023
3
                }
4024
1
                offset += seq->s_annotations_code->s_used - 1;
4025
1
            }
4026
797
            else {
4027
797
                offset -= 1;
4028
797
            }
4029
798
            continue;
4030
798
        }
4031
211k
        if (instr->i_target) {
4032
8.53k
            jump_target_label lbl_ = {i + offset};
4033
8.53k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
4034
0
                goto error;
4035
0
            }
4036
8.53k
        }
4037
211k
        int opcode = instr->i_opcode;
4038
211k
        int oparg = instr->i_oparg;
4039
211k
        if (HAS_TARGET(opcode)) {
4040
10.4k
            oparg += offset;
4041
10.4k
        }
4042
211k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
4043
0
            goto error;
4044
0
        }
4045
211k
    }
4046
5.38k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
4047
0
        goto error;
4048
0
    }
4049
5.38k
    return g;
4050
0
error:
4051
0
    _PyCfgBuilder_Free(g);
4052
0
    return NULL;
4053
5.38k
}
4054
4055
int
4056
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
4057
5.38k
{
4058
5.38k
    int lbl = 0;
4059
29.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4060
24.1k
        b->b_label = (jump_target_label){lbl};
4061
24.1k
        lbl += 1;
4062
24.1k
    }
4063
29.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4064
24.1k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
4065
218k
        for (int i = 0; i < b->b_iused; i++) {
4066
194k
            cfg_instr *instr = &b->b_instr[i];
4067
194k
            if (HAS_TARGET(instr->i_opcode)) {
4068
                /* Set oparg to the label id (it will later be mapped to an offset) */
4069
7.69k
                instr->i_oparg = instr->i_target->b_label.id;
4070
7.69k
            }
4071
194k
            RETURN_IF_ERROR(
4072
194k
                _PyInstructionSequence_Addop(
4073
194k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
4074
4075
194k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
4076
194k
            if (instr->i_except != NULL) {
4077
27.4k
                hi->h_label = instr->i_except->b_label.id;
4078
27.4k
                hi->h_startdepth = instr->i_except->b_startdepth;
4079
27.4k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
4080
27.4k
            }
4081
167k
            else {
4082
167k
                hi->h_label = -1;
4083
167k
            }
4084
194k
        }
4085
24.1k
    }
4086
5.38k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4087
0
        return ERROR;
4088
0
    }
4089
5.38k
    return SUCCESS;
4090
5.38k
}
4091
4092
4093
int
4094
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
4095
                                     _PyCompile_CodeUnitMetadata *umd,
4096
                                     int *stackdepth, int *nlocalsplus,
4097
                                     _PyInstructionSequence *seq)
4098
5.38k
{
4099
5.38k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
4100
4101
5.38k
    *stackdepth = calculate_stackdepth(g);
4102
5.38k
    if (*stackdepth < 0) {
4103
0
        return ERROR;
4104
0
    }
4105
4106
5.38k
    *nlocalsplus = prepare_localsplus(umd, g);
4107
5.38k
    if (*nlocalsplus < 0) {
4108
0
        return ERROR;
4109
0
    }
4110
4111
5.38k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4112
4113
    /* Order of basic blocks must have been determined by now */
4114
4115
5.38k
    RETURN_IF_ERROR(normalize_jumps(g));
4116
5.38k
    assert(no_redundant_jumps(g));
4117
4118
    /* Can't modify the bytecode after inserting instructions that produce
4119
     * borrowed references.
4120
     */
4121
5.38k
    RETURN_IF_ERROR(optimize_load_fast(g));
4122
4123
    /* Can't modify the bytecode after computing jump offsets. */
4124
5.38k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4125
0
        return ERROR;
4126
0
    }
4127
4128
5.38k
    return SUCCESS;
4129
5.38k
}
4130
4131
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4132
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4133
 */
4134
int
4135
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4136
0
{
4137
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4138
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4139
0
    return SUCCESS;
4140
0
}
4141
4142
/* Exported API functions */
4143
4144
int
4145
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4146
0
{
4147
0
    stack_effects effs;
4148
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4149
0
        return PY_INVALID_STACK_EFFECT;
4150
0
    }
4151
0
    return effs.net;
4152
0
}
4153
4154
int
4155
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4156
26
{
4157
26
    stack_effects effs;
4158
26
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4159
0
        return PY_INVALID_STACK_EFFECT;
4160
0
    }
4161
26
    return effs.net;
4162
26
}
4163
4164
/* Access to compiler optimizations for unit tests.
4165
4166
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4167
 * a CFG, optimizes it and converts back to an instruction list.
4168
 */
4169
4170
static PyObject *
4171
cfg_to_instruction_sequence(cfg_builder *g)
4172
0
{
4173
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4174
0
    if (seq == NULL) {
4175
0
        return NULL;
4176
0
    }
4177
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4178
0
        PyInstructionSequence_Fini(seq);
4179
0
        return NULL;
4180
0
    }
4181
0
    return (PyObject*)seq;
4182
0
}
4183
4184
PyObject *
4185
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4186
0
{
4187
0
    if (!_PyInstructionSequence_Check(seq)) {
4188
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4189
0
        return NULL;
4190
0
    }
4191
0
    if (!PyList_Check(consts)) {
4192
0
        PyErr_SetString(PyExc_TypeError, "consts must be a list");
4193
0
        return NULL;
4194
0
    }
4195
0
    PyObject *const_cache = PyDict_New();
4196
0
    if (const_cache == NULL) {
4197
0
        return NULL;
4198
0
    }
4199
4200
0
    PyObject *res = NULL;
4201
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4202
0
    if (g == NULL) {
4203
0
        goto error;
4204
0
    }
4205
0
    int nparams = 0, firstlineno = 1;
4206
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4207
0
                                nparams, firstlineno) < 0) {
4208
0
        goto error;
4209
0
    }
4210
4211
0
    if (calculate_stackdepth(g) == ERROR) {
4212
0
        goto error;
4213
0
    }
4214
4215
0
    if (optimize_load_fast(g) != SUCCESS) {
4216
0
        goto error;
4217
0
    }
4218
4219
0
    res = cfg_to_instruction_sequence(g);
4220
0
error:
4221
0
    Py_DECREF(const_cache);
4222
0
    _PyCfgBuilder_Free(g);
4223
0
    return res;
4224
0
}