Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
#include "pycore_hashtable.h"     // _Py_hashtable_t
10
11
#include "pycore_opcode_utils.h"
12
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
13
#include "pycore_pystate.h"         // _PyInterpreterState_GET()
14
#include "pycore_stackref.h"        // PyStackRef_AsPyObjectBorrow()
15
16
#include <stdbool.h>
17
18
19
#undef SUCCESS
20
#undef ERROR
21
530k
#define SUCCESS 0
22
4.19k
#define ERROR -1
23
24
#define RETURN_IF_ERROR(X)  \
25
786k
    if ((X) == -1) {        \
26
0
        return ERROR;       \
27
0
    }
28
29
171k
#define DEFAULT_BLOCK_SIZE 16
30
31
typedef _Py_SourceLocation location;
32
typedef _PyJumpTargetLabel jump_target_label;
33
34
typedef struct _PyCfgInstruction {
35
    int i_opcode;
36
    int i_oparg;
37
    _Py_SourceLocation i_loc;
38
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
39
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
40
} cfg_instr;
41
42
typedef struct _PyCfgBasicblock {
43
    /* Each basicblock in a compilation unit is linked via b_list in the
44
       reverse order that the block are allocated.  b_list points to the next
45
       block in this list, not to be confused with b_next, which is next by
46
       control flow. */
47
    struct _PyCfgBasicblock *b_list;
48
    /* The label of this block if it is a jump target, -1 otherwise */
49
    _PyJumpTargetLabel b_label;
50
    /* Exception stack at start of block, used by assembler to create the exception handling table */
51
    struct _PyCfgExceptStack *b_exceptstack;
52
    /* pointer to an array of instructions, initially NULL */
53
    cfg_instr *b_instr;
54
    /* If b_next is non-NULL, it is a pointer to the next
55
       block reached by normal control flow. */
56
    struct _PyCfgBasicblock *b_next;
57
    /* number of instructions used */
58
    int b_iused;
59
    /* length of instruction array (b_instr) */
60
    int b_ialloc;
61
    /* Used by add_checks_for_loads_of_unknown_variables */
62
    uint64_t b_unsafe_locals_mask;
63
    /* Number of predecessors that a block has. */
64
    int b_predecessors;
65
    /* depth of stack upon entry of block, computed by stackdepth() */
66
    int b_startdepth;
67
    /* Basic block is an exception handler that preserves lasti */
68
    unsigned b_preserve_lasti : 1;
69
    /* Used by compiler passes to mark whether they have visited a basic block. */
70
    unsigned b_visited : 1;
71
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
72
    unsigned b_except_handler : 1;
73
    /* b_cold is true if this block is not perf critical (like an exception handler) */
74
    unsigned b_cold : 1;
75
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
76
    unsigned b_warm : 1;
77
} basicblock;
78
79
80
struct _PyCfgBuilder {
81
    /* The entryblock, at which control flow begins. All blocks of the
82
       CFG are reachable through the b_next links */
83
    struct _PyCfgBasicblock *g_entryblock;
84
    /* Pointer to the most recently allocated block.  By following
85
       b_list links, you can reach all allocated blocks. */
86
    struct _PyCfgBasicblock *g_block_list;
87
    /* pointer to the block currently being constructed */
88
    struct _PyCfgBasicblock *g_curblock;
89
    /* label for the next instruction to be placed */
90
    _PyJumpTargetLabel g_current_label;
91
};
92
93
typedef struct _PyCfgBuilder cfg_builder;
94
95
174k
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
96
174k
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
97
98
#define LOCATION(LNO, END_LNO, COL, END_COL) \
99
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
100
101
static inline int
102
is_block_push(cfg_instr *i)
103
766k
{
104
766k
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
105
766k
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
106
766k
}
107
108
static inline int
109
is_jump(cfg_instr *i)
110
647k
{
111
647k
    return OPCODE_HAS_JUMP(i->i_opcode);
112
647k
}
113
114
/* One arg*/
115
#define INSTR_SET_OP1(I, OP, ARG) \
116
29.4k
    do { \
117
29.4k
        assert(OPCODE_HAS_ARG(OP)); \
118
29.4k
        cfg_instr *_instr__ptr_ = (I); \
119
29.4k
        _instr__ptr_->i_opcode = (OP); \
120
29.4k
        _instr__ptr_->i_oparg = (ARG); \
121
29.4k
    } while (0);
122
123
/* No args*/
124
#define INSTR_SET_OP0(I, OP) \
125
29.4k
    do { \
126
29.4k
        assert(!OPCODE_HAS_ARG(OP)); \
127
29.4k
        cfg_instr *_instr__ptr_ = (I); \
128
29.4k
        _instr__ptr_->i_opcode = (OP); \
129
29.4k
        _instr__ptr_->i_oparg = 0; \
130
29.4k
    } while (0);
131
132
#define INSTR_SET_LOC(I, LOC) \
133
3.48k
    do { \
134
3.48k
        cfg_instr *_instr__ptr_ = (I); \
135
3.48k
        _instr__ptr_->i_loc = (LOC); \
136
3.48k
    } while (0);
137
138
/***** Blocks *****/
139
140
/* Returns the offset of the next instruction in the current block's
141
   b_instr array.  Resizes the b_instr as necessary.
142
   Returns -1 on failure.
143
*/
144
static int
145
basicblock_next_instr(basicblock *b)
146
171k
{
147
171k
    assert(b != NULL);
148
171k
    _Py_c_array_t array = {
149
171k
        .array = (void*)b->b_instr,
150
171k
        .allocated_entries = b->b_ialloc,
151
171k
        .item_size = sizeof(cfg_instr),
152
171k
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
153
171k
    };
154
155
171k
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
156
171k
    b->b_instr = array.array;
157
171k
    b->b_ialloc = array.allocated_entries;
158
171k
    return b->b_iused++;
159
171k
}
160
161
static cfg_instr *
162
510k
basicblock_last_instr(const basicblock *b) {
163
510k
    assert(b->b_iused >= 0);
164
510k
    if (b->b_iused > 0) {
165
464k
        assert(b->b_instr != NULL);
166
464k
        return &b->b_instr[b->b_iused - 1];
167
464k
    }
168
45.9k
    return NULL;
169
510k
}
170
171
/* Allocate a new block and return a pointer to it.
172
   Returns NULL on error.
173
*/
174
175
static basicblock *
176
cfg_builder_new_block(cfg_builder *g)
177
16.1k
{
178
16.1k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
179
16.1k
    if (b == NULL) {
180
0
        PyErr_NoMemory();
181
0
        return NULL;
182
0
    }
183
    /* Extend the singly linked list of blocks with new block. */
184
16.1k
    b->b_list = g->g_block_list;
185
16.1k
    g->g_block_list = b;
186
16.1k
    b->b_label = NO_LABEL;
187
16.1k
    return b;
188
16.1k
}
189
190
static int
191
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
192
167k
{
193
167k
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
194
167k
    assert(!IS_ASSEMBLER_OPCODE(opcode));
195
167k
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
196
167k
    assert(0 <= oparg && oparg < (1 << 30));
197
198
167k
    int off = basicblock_next_instr(b);
199
167k
    if (off < 0) {
200
0
        return ERROR;
201
0
    }
202
167k
    cfg_instr *i = &b->b_instr[off];
203
167k
    i->i_opcode = opcode;
204
167k
    i->i_oparg = oparg;
205
167k
    i->i_loc = loc;
206
    // memory is already zero initialized
207
167k
    assert(i->i_target == NULL);
208
167k
    assert(i->i_except == NULL);
209
210
167k
    return SUCCESS;
211
167k
}
212
213
static int
214
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
215
287
{
216
287
    cfg_instr *last = basicblock_last_instr(b);
217
287
    if (last && is_jump(last)) {
218
0
        return ERROR;
219
0
    }
220
221
287
    RETURN_IF_ERROR(
222
287
        basicblock_addop(b, opcode, target->b_label.id, loc));
223
287
    last = basicblock_last_instr(b);
224
287
    assert(last && last->i_opcode == opcode);
225
287
    last->i_target = target;
226
287
    return SUCCESS;
227
287
}
228
229
static inline int
230
basicblock_append_instructions(basicblock *to, basicblock *from)
231
876
{
232
2.42k
    for (int i = 0; i < from->b_iused; i++) {
233
1.54k
        int n = basicblock_next_instr(to);
234
1.54k
        if (n < 0) {
235
0
            return ERROR;
236
0
        }
237
1.54k
        to->b_instr[n] = from->b_instr[i];
238
1.54k
    }
239
876
    return SUCCESS;
240
876
}
241
242
static inline int
243
149k
basicblock_nofallthrough(const basicblock *b) {
244
149k
    cfg_instr *last = basicblock_last_instr(b);
245
149k
    return (last &&
246
139k
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
247
73.9k
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
248
149k
}
249
250
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
251
245k
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
252
253
static basicblock *
254
copy_basicblock(cfg_builder *g, basicblock *block)
255
211
{
256
    /* Cannot copy a block if it has a fallthrough, since
257
     * a block can only have one fallthrough predecessor.
258
     */
259
211
    assert(BB_NO_FALLTHROUGH(block));
260
211
    basicblock *result = cfg_builder_new_block(g);
261
211
    if (result == NULL) {
262
0
        return NULL;
263
0
    }
264
211
    if (basicblock_append_instructions(result, block) < 0) {
265
0
        return NULL;
266
0
    }
267
211
    return result;
268
211
}
269
270
static int
271
2.49k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
272
2.49k
    RETURN_IF_ERROR(basicblock_next_instr(block));
273
37.8k
    for (int i = block->b_iused - 1; i > pos; i--) {
274
35.3k
        block->b_instr[i] = block->b_instr[i-1];
275
35.3k
    }
276
2.49k
    block->b_instr[pos] = *instr;
277
2.49k
    return SUCCESS;
278
2.49k
}
279
280
/* For debugging purposes only */
281
#if 0
282
static void
283
dump_instr(cfg_instr *i)
284
{
285
    const char *jump = is_jump(i) ? "jump " : "";
286
287
    char arg[128];
288
289
    *arg = '\0';
290
    if (OPCODE_HAS_ARG(i->i_opcode)) {
291
        sprintf(arg, "arg: %d ", i->i_oparg);
292
    }
293
    if (HAS_TARGET(i->i_opcode)) {
294
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
295
    }
296
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
297
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
298
}
299
300
static inline int
301
basicblock_returns(const basicblock *b) {
302
    cfg_instr *last = basicblock_last_instr(b);
303
    return last && IS_RETURN_OPCODE(last->i_opcode);
304
}
305
306
static void
307
dump_basicblock(const basicblock *b, bool highlight)
308
{
309
    const char *b_return = basicblock_returns(b) ? "return " : "";
310
    if (highlight) {
311
        fprintf(stderr, ">>> ");
312
    }
313
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
314
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
315
        b->b_startdepth, b->b_predecessors, b_return);
316
    int depth = b->b_startdepth;
317
    if (b->b_instr) {
318
        int i;
319
        for (i = 0; i < b->b_iused; i++) {
320
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
321
            dump_instr(b->b_instr + i);
322
323
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
324
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
325
            depth += (pushed - popped);
326
        }
327
    }
328
}
329
330
void
331
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
332
{
333
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
334
        dump_basicblock(b, b == mark);
335
    }
336
}
337
338
#endif
339
340
341
/***** CFG construction and modification *****/
342
343
static basicblock *
344
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
345
11.5k
{
346
11.5k
    assert(block != NULL);
347
11.5k
    g->g_curblock->b_next = block;
348
11.5k
    g->g_curblock = block;
349
11.5k
    return block;
350
11.5k
}
351
352
static inline int
353
22.9k
basicblock_exits_scope(const basicblock *b) {
354
22.9k
    cfg_instr *last = basicblock_last_instr(b);
355
22.9k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
356
22.9k
}
357
358
static inline int
359
13.2k
basicblock_has_eval_break(const basicblock *b) {
360
69.6k
    for (int i = 0; i < b->b_iused; i++) {
361
61.3k
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
362
4.87k
            return true;
363
4.87k
        }
364
61.3k
    }
365
8.35k
    return false;
366
13.2k
}
367
368
static bool
369
cfg_builder_current_block_is_terminated(cfg_builder *g)
370
168k
{
371
168k
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
372
168k
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
373
10.0k
        return true;
374
10.0k
    }
375
158k
    if (IS_LABEL(g->g_current_label)) {
376
1.49k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
377
1.49k
            return true;
378
1.49k
        }
379
0
        else {
380
            /* current block is empty, label it */
381
0
            g->g_curblock->b_label = g->g_current_label;
382
0
            g->g_current_label = NO_LABEL;
383
0
        }
384
1.49k
    }
385
157k
    return false;
386
158k
}
387
388
static int
389
cfg_builder_maybe_start_new_block(cfg_builder *g)
390
168k
{
391
168k
    if (cfg_builder_current_block_is_terminated(g)) {
392
11.5k
        basicblock *b = cfg_builder_new_block(g);
393
11.5k
        if (b == NULL) {
394
0
            return ERROR;
395
0
        }
396
11.5k
        b->b_label = g->g_current_label;
397
11.5k
        g->g_current_label = NO_LABEL;
398
11.5k
        cfg_builder_use_next_block(g, b);
399
11.5k
    }
400
168k
    return SUCCESS;
401
168k
}
402
403
#ifndef NDEBUG
404
static bool
405
cfg_builder_check(cfg_builder *g)
406
{
407
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
408
        assert(!_PyMem_IsPtrFreed(block));
409
        if (block->b_instr != NULL) {
410
            assert(block->b_ialloc > 0);
411
            assert(block->b_iused >= 0);
412
            assert(block->b_ialloc >= block->b_iused);
413
        }
414
        else {
415
            assert (block->b_iused == 0);
416
            assert (block->b_ialloc == 0);
417
        }
418
    }
419
    return true;
420
}
421
#endif
422
423
static int
424
init_cfg_builder(cfg_builder *g)
425
4.22k
{
426
4.22k
    g->g_block_list = NULL;
427
4.22k
    basicblock *block = cfg_builder_new_block(g);
428
4.22k
    if (block == NULL) {
429
0
        return ERROR;
430
0
    }
431
4.22k
    g->g_curblock = g->g_entryblock = block;
432
4.22k
    g->g_current_label = NO_LABEL;
433
4.22k
    return SUCCESS;
434
4.22k
}
435
436
cfg_builder *
437
_PyCfgBuilder_New(void)
438
4.22k
{
439
4.22k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
440
4.22k
    if (g == NULL) {
441
0
        PyErr_NoMemory();
442
0
        return NULL;
443
0
    }
444
4.22k
    memset(g, 0, sizeof(cfg_builder));
445
4.22k
    if (init_cfg_builder(g) < 0) {
446
0
        PyMem_Free(g);
447
0
        return NULL;
448
0
    }
449
4.22k
    return g;
450
4.22k
}
451
452
void
453
_PyCfgBuilder_Free(cfg_builder *g)
454
4.22k
{
455
4.22k
    if (g == NULL) {
456
0
        return;
457
0
    }
458
4.22k
    assert(cfg_builder_check(g));
459
4.22k
    basicblock *b = g->g_block_list;
460
20.4k
    while (b != NULL) {
461
16.1k
        if (b->b_instr) {
462
16.1k
            PyMem_Free((void *)b->b_instr);
463
16.1k
        }
464
16.1k
        basicblock *next = b->b_list;
465
16.1k
        PyMem_Free((void *)b);
466
16.1k
        b = next;
467
16.1k
    }
468
4.22k
    PyMem_Free(g);
469
4.22k
}
470
471
int
472
_PyCfgBuilder_CheckSize(cfg_builder *g)
473
4.22k
{
474
4.22k
    int nblocks = 0;
475
20.0k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
476
15.7k
        nblocks++;
477
15.7k
    }
478
4.22k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
479
0
        PyErr_NoMemory();
480
0
        return ERROR;
481
0
    }
482
4.22k
    return SUCCESS;
483
4.22k
}
484
485
int
486
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
487
5.06k
{
488
5.06k
    g->g_current_label = lbl;
489
5.06k
    return cfg_builder_maybe_start_new_block(g);
490
5.06k
}
491
492
int
493
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
494
163k
{
495
163k
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
496
163k
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
497
163k
}
498
499
500
static basicblock *
501
next_nonempty_block(basicblock *b)
502
18.0k
{
503
18.7k
    while (b && b->b_iused == 0) {
504
695
        b = b->b_next;
505
695
    }
506
18.0k
    return b;
507
18.0k
}
508
509
/***** debugging helpers *****/
510
511
#ifndef NDEBUG
512
static int remove_redundant_nops(cfg_builder *g);
513
514
static bool
515
no_redundant_nops(cfg_builder *g) {
516
    if (remove_redundant_nops(g) != 0) {
517
        return false;
518
    }
519
    return true;
520
}
521
522
static bool
523
no_redundant_jumps(cfg_builder *g) {
524
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
525
        cfg_instr *last = basicblock_last_instr(b);
526
        if (last != NULL) {
527
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
528
                basicblock *next = next_nonempty_block(b->b_next);
529
                basicblock *jump_target = next_nonempty_block(last->i_target);
530
                if (jump_target == next) {
531
                    assert(next);
532
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
533
                        assert(0);
534
                        return false;
535
                    }
536
                }
537
            }
538
        }
539
    }
540
    return true;
541
}
542
#endif
543
544
/***** CFG preprocessing (jump targets and exceptions) *****/
545
546
static int
547
16.1k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
548
16.1k
    cfg_instr *last = basicblock_last_instr(b);
549
16.1k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
550
12.7k
        return SUCCESS;
551
12.7k
    }
552
16.1k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
553
554
3.39k
    bool is_forward = last->i_target->b_visited == 0;
555
3.39k
    if (is_forward) {
556
3.25k
        RETURN_IF_ERROR(
557
3.25k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
558
3.25k
        return SUCCESS;
559
3.25k
    }
560
561
146
    int reversed_opcode = 0;
562
146
    switch(last->i_opcode) {
563
0
        case POP_JUMP_IF_NOT_NONE:
564
0
            reversed_opcode = POP_JUMP_IF_NONE;
565
0
            break;
566
7
        case POP_JUMP_IF_NONE:
567
7
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
568
7
            break;
569
122
        case POP_JUMP_IF_FALSE:
570
122
            reversed_opcode = POP_JUMP_IF_TRUE;
571
122
            break;
572
17
        case POP_JUMP_IF_TRUE:
573
17
            reversed_opcode = POP_JUMP_IF_FALSE;
574
17
            break;
575
146
    }
576
    /* transform 'conditional jump T' to
577
     * 'reversed_jump b_next' followed by 'jump_backwards T'
578
     */
579
580
146
    basicblock *target = last->i_target;
581
146
    basicblock *backwards_jump = cfg_builder_new_block(g);
582
146
    if (backwards_jump == NULL) {
583
0
        return ERROR;
584
0
    }
585
146
    RETURN_IF_ERROR(
586
146
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
587
146
    RETURN_IF_ERROR(
588
146
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
589
146
    backwards_jump->b_startdepth = target->b_startdepth;
590
146
    last->i_opcode = reversed_opcode;
591
146
    last->i_target = b->b_next;
592
593
146
    backwards_jump->b_cold = b->b_cold;
594
146
    backwards_jump->b_next = b->b_next;
595
146
    b->b_next = backwards_jump;
596
146
    return SUCCESS;
597
146
}
598
599
600
static int
601
normalize_jumps(cfg_builder *g)
602
4.22k
{
603
4.22k
    basicblock *entryblock = g->g_entryblock;
604
20.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
605
16.0k
        b->b_visited = 0;
606
16.0k
    }
607
20.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
608
16.1k
        b->b_visited = 1;
609
16.1k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
610
16.1k
    }
611
4.22k
    return SUCCESS;
612
4.22k
}
613
614
static int
615
4.22k
check_cfg(cfg_builder *g) {
616
20.0k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
617
        /* Raise SystemError if jump or exit is not last instruction in the block. */
618
179k
        for (int i = 0; i < b->b_iused; i++) {
619
163k
            int opcode = b->b_instr[i].i_opcode;
620
163k
            assert(!IS_ASSEMBLER_OPCODE(opcode));
621
163k
            if (IS_TERMINATOR_OPCODE(opcode)) {
622
14.2k
                if (i != b->b_iused - 1) {
623
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
624
0
                    return ERROR;
625
0
                }
626
14.2k
            }
627
163k
        }
628
15.7k
    }
629
4.22k
    return SUCCESS;
630
4.22k
}
631
632
static int
633
get_max_label(basicblock *entryblock)
634
15.8k
{
635
15.8k
    int lbl = -1;
636
78.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
637
62.6k
        if (b->b_label.id > lbl) {
638
18.7k
            lbl = b->b_label.id;
639
18.7k
        }
640
62.6k
    }
641
15.8k
    return lbl;
642
15.8k
}
643
644
/* Calculate the actual jump target from the target_label */
645
static int
646
translate_jump_labels_to_targets(basicblock *entryblock)
647
4.22k
{
648
4.22k
    int max_label = get_max_label(entryblock);
649
4.22k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
650
4.22k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
651
4.22k
    if (!label2block) {
652
0
        PyErr_NoMemory();
653
0
        return ERROR;
654
0
    }
655
4.22k
    memset(label2block, 0, mapsize);
656
20.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
657
15.7k
        if (b->b_label.id >= 0) {
658
5.06k
            label2block[b->b_label.id] = b;
659
5.06k
        }
660
15.7k
    }
661
20.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
662
179k
        for (int i = 0; i < b->b_iused; i++) {
663
163k
            cfg_instr *instr = &b->b_instr[i];
664
163k
            assert(instr->i_target == NULL);
665
163k
            if (HAS_TARGET(instr->i_opcode)) {
666
6.32k
                int lbl = instr->i_oparg;
667
6.32k
                assert(lbl >= 0 && lbl <= max_label);
668
6.32k
                instr->i_target = label2block[lbl];
669
6.32k
                assert(instr->i_target != NULL);
670
6.32k
                assert(instr->i_target->b_label.id == lbl);
671
6.32k
            }
672
163k
        }
673
15.7k
    }
674
4.22k
    PyMem_Free(label2block);
675
4.22k
    return SUCCESS;
676
4.22k
}
677
678
static int
679
4.22k
mark_except_handlers(basicblock *entryblock) {
680
#ifndef NDEBUG
681
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
682
        assert(!b->b_except_handler);
683
    }
684
#endif
685
20.0k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
686
179k
        for (int i=0; i < b->b_iused; i++) {
687
163k
            cfg_instr *instr = &b->b_instr[i];
688
163k
            if (is_block_push(instr)) {
689
800
                instr->i_target->b_except_handler = 1;
690
800
            }
691
163k
        }
692
15.7k
    }
693
4.22k
    return SUCCESS;
694
4.22k
}
695
696
697
struct _PyCfgExceptStack {
698
    basicblock *handlers[CO_MAXBLOCKS+2];
699
    int depth;
700
};
701
702
703
static basicblock *
704
800
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
705
800
    assert(is_block_push(setup));
706
800
    int opcode = setup->i_opcode;
707
800
    basicblock * target = setup->i_target;
708
800
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
709
515
        target->b_preserve_lasti = 1;
710
515
    }
711
800
    assert(stack->depth <= CO_MAXBLOCKS);
712
800
    stack->handlers[++stack->depth] = target;
713
800
    return target;
714
800
}
715
716
static basicblock *
717
720
pop_except_block(struct _PyCfgExceptStack *stack) {
718
720
    assert(stack->depth > 0);
719
720
    return stack->handlers[--stack->depth];
720
720
}
721
722
static basicblock *
723
12.8k
except_stack_top(struct _PyCfgExceptStack *stack) {
724
12.8k
    return stack->handlers[stack->depth];
725
12.8k
}
726
727
static struct _PyCfgExceptStack *
728
4.22k
make_except_stack(void) {
729
4.22k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
730
4.22k
    if (new == NULL) {
731
0
        PyErr_NoMemory();
732
0
        return NULL;
733
0
    }
734
4.22k
    new->depth = 0;
735
4.22k
    new->handlers[0] = NULL;
736
4.22k
    return new;
737
4.22k
}
738
739
static struct _PyCfgExceptStack *
740
3.91k
copy_except_stack(struct _PyCfgExceptStack *stack) {
741
3.91k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
742
3.91k
    if (copy == NULL) {
743
0
        PyErr_NoMemory();
744
0
        return NULL;
745
0
    }
746
3.91k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
747
3.91k
    return copy;
748
3.91k
}
749
750
static basicblock**
751
30.1k
make_cfg_traversal_stack(basicblock *entryblock) {
752
30.1k
    int nblocks = 0;
753
153k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
754
123k
        b->b_visited = 0;
755
123k
        nblocks++;
756
123k
    }
757
30.1k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
758
30.1k
    if (!stack) {
759
0
        PyErr_NoMemory();
760
0
    }
761
30.1k
    return stack;
762
30.1k
}
763
764
/* Compute the stack effects of opcode with argument oparg.
765
766
   Some opcodes have different stack effect when jump to the target and
767
   when not jump. The 'jump' parameter specifies the case:
768
769
   * 0 -- when not jump
770
   * 1 -- when jump
771
   * -1 -- maximal
772
 */
773
typedef struct {
774
    /* The stack effect of the instruction. */
775
    int net;
776
} stack_effects;
777
778
Py_LOCAL(int)
779
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
780
151k
{
781
151k
    if (opcode < 0) {
782
0
        return -1;
783
0
    }
784
151k
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
785
        // Specialized instructions are not supported.
786
0
        return -1;
787
0
    }
788
151k
    int popped = _PyOpcode_num_popped(opcode, oparg);
789
151k
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
790
151k
    if (popped < 0 || pushed < 0) {
791
0
        return -1;
792
0
    }
793
151k
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
794
800
        effects->net = 0;
795
800
        return 0;
796
800
    }
797
151k
    effects->net = pushed - popped;
798
151k
    return 0;
799
151k
}
800
801
Py_LOCAL_INLINE(int)
802
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
803
15.2k
{
804
15.2k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
805
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
806
0
        return ERROR;
807
0
    }
808
15.2k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
809
12.8k
        assert(b->b_startdepth < 0);
810
12.8k
        b->b_startdepth = depth;
811
12.8k
        *(*sp)++ = b;
812
12.8k
    }
813
15.2k
    return SUCCESS;
814
15.2k
}
815
816
/* Find the flow path that needs the largest stack.  We assume that
817
 * cycles in the flow graph have no net effect on the stack depth.
818
 */
819
static int
820
calculate_stackdepth(cfg_builder *g)
821
4.22k
{
822
4.22k
    basicblock *entryblock = g->g_entryblock;
823
20.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
824
16.0k
        b->b_startdepth = INT_MIN;
825
16.0k
    }
826
4.22k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
827
4.22k
    if (!stack) {
828
0
        return ERROR;
829
0
    }
830
831
832
4.22k
    int stackdepth = -1;
833
4.22k
    int maxdepth = 0;
834
4.22k
    basicblock **sp = stack;
835
4.22k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
836
0
        goto error;
837
0
    }
838
17.0k
    while (sp != stack) {
839
12.8k
        basicblock *b = *--sp;
840
12.8k
        int depth = b->b_startdepth;
841
12.8k
        assert(depth >= 0);
842
12.8k
        basicblock *next = b->b_next;
843
151k
        for (int i = 0; i < b->b_iused; i++) {
844
146k
            cfg_instr *instr = &b->b_instr[i];
845
146k
            stack_effects effects;
846
146k
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
847
0
                PyErr_Format(PyExc_SystemError,
848
0
                             "Invalid stack effect for opcode=%d, arg=%i",
849
0
                             instr->i_opcode, instr->i_oparg);
850
0
                goto error;
851
0
            }
852
146k
            int new_depth = depth + effects.net;
853
146k
            if (new_depth < 0) {
854
0
                PyErr_Format(PyExc_ValueError,
855
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
856
0
                goto error;
857
0
            }
858
146k
            maxdepth = Py_MAX(maxdepth, depth);
859
146k
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
860
5.51k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
861
0
                    PyErr_Format(PyExc_SystemError,
862
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
863
0
                                 instr->i_opcode, instr->i_oparg);
864
0
                    goto error;
865
0
                }
866
5.51k
                int target_depth = depth + effects.net;
867
5.51k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
868
5.51k
                maxdepth = Py_MAX(maxdepth, depth);
869
5.51k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
870
0
                    goto error;
871
0
                }
872
5.51k
            }
873
146k
            depth = new_depth;
874
146k
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
875
146k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
876
145k
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
877
7.33k
            {
878
                /* remaining code is dead */
879
7.33k
                next = NULL;
880
7.33k
                break;
881
7.33k
            }
882
146k
        }
883
12.8k
        if (next != NULL) {
884
5.51k
            assert(BB_HAS_FALLTHROUGH(b));
885
5.51k
            if (stackdepth_push(&sp, next, depth) < 0) {
886
0
                goto error;
887
0
            }
888
5.51k
        }
889
12.8k
    }
890
4.22k
    stackdepth = maxdepth;
891
4.22k
error:
892
4.22k
    PyMem_Free(stack);
893
4.22k
    return stackdepth;
894
4.22k
}
895
896
static int
897
4.22k
label_exception_targets(basicblock *entryblock) {
898
4.22k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
899
4.22k
    if (todo_stack == NULL) {
900
0
        return ERROR;
901
0
    }
902
4.22k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
903
4.22k
    if (except_stack == NULL) {
904
0
        PyMem_Free(todo_stack);
905
0
        PyErr_NoMemory();
906
0
        return ERROR;
907
0
    }
908
4.22k
    except_stack->depth = 0;
909
4.22k
    todo_stack[0] = entryblock;
910
4.22k
    entryblock->b_visited = 1;
911
4.22k
    entryblock->b_exceptstack = except_stack;
912
4.22k
    basicblock **todo = &todo_stack[1];
913
4.22k
    basicblock *handler = NULL;
914
17.0k
    while (todo > todo_stack) {
915
12.8k
        todo--;
916
12.8k
        basicblock *b = todo[0];
917
12.8k
        assert(b->b_visited == 1);
918
12.8k
        except_stack = b->b_exceptstack;
919
12.8k
        assert(except_stack != NULL);
920
12.8k
        b->b_exceptstack = NULL;
921
12.8k
        handler = except_stack_top(except_stack);
922
12.8k
        int last_yield_except_depth = -1;
923
170k
        for (int i = 0; i < b->b_iused; i++) {
924
157k
            cfg_instr *instr = &b->b_instr[i];
925
157k
            if (is_block_push(instr)) {
926
800
                if (!instr->i_target->b_visited) {
927
800
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
928
800
                    if (copy == NULL) {
929
0
                        goto error;
930
0
                    }
931
800
                    instr->i_target->b_exceptstack = copy;
932
800
                    todo[0] = instr->i_target;
933
800
                    instr->i_target->b_visited = 1;
934
800
                    todo++;
935
800
                }
936
800
                handler = push_except_block(except_stack, instr);
937
800
            }
938
156k
            else if (instr->i_opcode == POP_BLOCK) {
939
720
                handler = pop_except_block(except_stack);
940
720
                INSTR_SET_OP0(instr, NOP);
941
720
            }
942
156k
            else if (is_jump(instr)) {
943
5.29k
                instr->i_except = handler;
944
5.29k
                assert(i == b->b_iused -1);
945
5.29k
                if (!instr->i_target->b_visited) {
946
3.79k
                    if (BB_HAS_FALLTHROUGH(b)) {
947
3.11k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
948
3.11k
                        if (copy == NULL) {
949
0
                            goto error;
950
0
                        }
951
3.11k
                        instr->i_target->b_exceptstack = copy;
952
3.11k
                    }
953
676
                    else {
954
676
                        instr->i_target->b_exceptstack = except_stack;
955
676
                        except_stack = NULL;
956
676
                    }
957
3.79k
                    todo[0] = instr->i_target;
958
3.79k
                    instr->i_target->b_visited = 1;
959
3.79k
                    todo++;
960
3.79k
                }
961
5.29k
            }
962
150k
            else if (instr->i_opcode == YIELD_VALUE) {
963
139
                instr->i_except = handler;
964
139
                last_yield_except_depth = except_stack->depth;
965
139
            }
966
150k
            else if (instr->i_opcode == RESUME) {
967
4.40k
                instr->i_except = handler;
968
4.40k
                if (instr->i_oparg != RESUME_AT_FUNC_START && instr->i_oparg != RESUME_AT_GEN_EXPR_START) {
969
139
                    assert(last_yield_except_depth >= 0);
970
139
                    if (last_yield_except_depth == 1) {
971
110
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
972
110
                    }
973
139
                    last_yield_except_depth = -1;
974
139
                }
975
4.40k
            }
976
146k
            else if (instr->i_opcode == RETURN_GENERATOR) {
977
92
                instr->i_except = NULL;
978
92
            }
979
146k
            else {
980
146k
                instr->i_except = handler;
981
146k
            }
982
157k
        }
983
12.8k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
984
4.01k
            assert(except_stack != NULL);
985
4.01k
            b->b_next->b_exceptstack = except_stack;
986
4.01k
            todo[0] = b->b_next;
987
4.01k
            b->b_next->b_visited = 1;
988
4.01k
            todo++;
989
4.01k
        }
990
8.81k
        else if (except_stack != NULL) {
991
8.14k
           PyMem_Free(except_stack);
992
8.14k
        }
993
12.8k
    }
994
#ifdef Py_DEBUG
995
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
996
        assert(b->b_exceptstack == NULL);
997
    }
998
#endif
999
4.22k
    PyMem_Free(todo_stack);
1000
4.22k
    return SUCCESS;
1001
0
error:
1002
0
    PyMem_Free(todo_stack);
1003
0
    PyMem_Free(except_stack);
1004
0
    return ERROR;
1005
4.22k
}
1006
1007
/***** CFG optimizations *****/
1008
1009
static int
1010
8.45k
remove_unreachable(basicblock *entryblock) {
1011
40.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1012
31.8k
        b->b_predecessors = 0;
1013
31.8k
    }
1014
8.45k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1015
8.45k
    if (stack == NULL) {
1016
0
        return ERROR;
1017
0
    }
1018
8.45k
    basicblock **sp = stack;
1019
8.45k
    entryblock->b_predecessors = 1;
1020
8.45k
    *sp++ = entryblock;
1021
8.45k
    entryblock->b_visited = 1;
1022
33.8k
    while (sp > stack) {
1023
25.4k
        basicblock *b = *(--sp);
1024
25.4k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1025
10.5k
            if (!b->b_next->b_visited) {
1026
8.84k
                assert(b->b_next->b_predecessors == 0);
1027
8.84k
                *sp++ = b->b_next;
1028
8.84k
                b->b_next->b_visited = 1;
1029
8.84k
            }
1030
10.5k
            b->b_next->b_predecessors++;
1031
10.5k
        }
1032
332k
        for (int i = 0; i < b->b_iused; i++) {
1033
306k
            basicblock *target;
1034
306k
            cfg_instr *instr = &b->b_instr[i];
1035
306k
            if (is_jump(instr) || is_block_push(instr)) {
1036
11.3k
                target = instr->i_target;
1037
11.3k
                if (!target->b_visited) {
1038
8.12k
                    *sp++ = target;
1039
8.12k
                    target->b_visited = 1;
1040
8.12k
                }
1041
11.3k
                target->b_predecessors++;
1042
11.3k
            }
1043
306k
        }
1044
25.4k
    }
1045
8.45k
    PyMem_Free(stack);
1046
1047
    /* Delete unreachable instructions */
1048
40.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1049
31.8k
       if (b->b_predecessors == 0) {
1050
6.37k
            b->b_iused = 0;
1051
6.37k
            b->b_except_handler = 0;
1052
6.37k
       }
1053
31.8k
    }
1054
8.45k
    return SUCCESS;
1055
8.45k
}
1056
1057
static int
1058
79.2k
basicblock_remove_redundant_nops(basicblock *bb) {
1059
    /* Remove NOPs when legal to do so. */
1060
79.2k
    int dest = 0;
1061
79.2k
    int prev_lineno = -1;
1062
771k
    for (int src = 0; src < bb->b_iused; src++) {
1063
692k
        int lineno = bb->b_instr[src].i_loc.lineno;
1064
692k
        if (bb->b_instr[src].i_opcode == NOP) {
1065
            /* Eliminate no-op if it doesn't have a line number */
1066
14.5k
            if (lineno < 0) {
1067
3.68k
                continue;
1068
3.68k
            }
1069
            /* or, if the previous instruction had the same line number. */
1070
10.8k
            if (prev_lineno == lineno) {
1071
9.06k
                continue;
1072
9.06k
            }
1073
            /* or, if the next instruction has same line number or no line number */
1074
1.82k
            if (src < bb->b_iused - 1) {
1075
1.68k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1076
1.68k
                if (next_lineno == lineno) {
1077
1.20k
                    continue;
1078
1.20k
                }
1079
480
                if (next_lineno < 0) {
1080
0
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1081
0
                    continue;
1082
0
                }
1083
480
            }
1084
146
            else {
1085
146
                basicblock *next = next_nonempty_block(bb->b_next);
1086
                /* or if last instruction in BB and next BB has same line number */
1087
146
                if (next) {
1088
146
                    location next_loc = NO_LOCATION;
1089
146
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1090
146
                        cfg_instr *instr = &next->b_instr[next_i];
1091
146
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1092
                            /* Skip over NOPs without a location, they will be removed */
1093
0
                            continue;
1094
0
                        }
1095
146
                        next_loc = instr->i_loc;
1096
146
                        break;
1097
146
                    }
1098
146
                    if (lineno == next_loc.lineno) {
1099
0
                        continue;
1100
0
                    }
1101
146
                }
1102
146
            }
1103
1104
1.82k
        }
1105
678k
        if (dest != src) {
1106
54.4k
            bb->b_instr[dest] = bb->b_instr[src];
1107
54.4k
        }
1108
678k
        dest++;
1109
678k
        prev_lineno = lineno;
1110
678k
    }
1111
79.2k
    assert(dest <= bb->b_iused);
1112
79.2k
    int num_removed = bb->b_iused - dest;
1113
79.2k
    bb->b_iused = dest;
1114
79.2k
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1115
79.2k
    return num_removed;
1116
79.2k
}
1117
1118
static int
1119
13.5k
remove_redundant_nops(cfg_builder *g) {
1120
13.5k
    int changes = 0;
1121
76.7k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1122
63.2k
        int change = basicblock_remove_redundant_nops(b);
1123
63.2k
        RETURN_IF_ERROR(change);
1124
63.2k
        changes += change;
1125
63.2k
    }
1126
13.5k
    return changes;
1127
13.5k
}
1128
1129
static int loads_const(int opcode);
1130
1131
static int
1132
remove_redundant_nops_and_pairs(basicblock *entryblock)
1133
4.22k
{
1134
4.22k
    bool done = false;
1135
1136
8.45k
    while (! done) {
1137
4.22k
        done = true;
1138
4.22k
        cfg_instr *prev_instr = NULL;
1139
4.22k
        cfg_instr *instr = NULL;
1140
20.2k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1141
16.0k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1142
16.0k
            if (IS_LABEL(b->b_label)) {
1143
                /* this block is a jump target, forget instr */
1144
5.27k
                instr = NULL;
1145
5.27k
            }
1146
164k
            for (int i = 0; i < b->b_iused; i++) {
1147
148k
                prev_instr = instr;
1148
148k
                instr = &b->b_instr[i];
1149
148k
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1150
148k
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1151
148k
                int opcode = instr->i_opcode;
1152
148k
                bool is_redundant_pair = false;
1153
148k
                if (opcode == POP_TOP) {
1154
4.00k
                   if (loads_const(prev_opcode)) {
1155
0
                       is_redundant_pair = true;
1156
0
                   }
1157
4.00k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1158
0
                       is_redundant_pair = true;
1159
0
                   }
1160
4.00k
                }
1161
148k
                if (is_redundant_pair) {
1162
0
                    INSTR_SET_OP0(prev_instr, NOP);
1163
0
                    INSTR_SET_OP0(instr, NOP);
1164
0
                    done = false;
1165
0
                }
1166
148k
            }
1167
16.0k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1168
11.3k
                instr = NULL;
1169
11.3k
            }
1170
16.0k
        }
1171
4.22k
    }
1172
4.22k
    return SUCCESS;
1173
4.22k
}
1174
1175
static int
1176
9.27k
remove_redundant_jumps(cfg_builder *g) {
1177
    /* If a non-empty block ends with a jump instruction, check if the next
1178
     * non-empty block reached through normal flow control is the target
1179
     * of that jump. If it is, then the jump instruction is redundant and
1180
     * can be deleted.
1181
     *
1182
     * Return the number of changes applied, or -1 on error.
1183
     */
1184
1185
9.27k
    int changes = 0;
1186
56.5k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1187
47.2k
        cfg_instr *last = basicblock_last_instr(b);
1188
47.2k
        if (last == NULL) {
1189
8.13k
            continue;
1190
8.13k
        }
1191
47.2k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1192
39.1k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1193
4.08k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1194
4.08k
            if (jump_target == NULL) {
1195
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1196
0
                return ERROR;
1197
0
            }
1198
4.08k
            basicblock *next = next_nonempty_block(b->b_next);
1199
4.08k
            if (jump_target == next) {
1200
164
                changes++;
1201
164
                INSTR_SET_OP0(last, NOP);
1202
164
            }
1203
4.08k
        }
1204
39.1k
    }
1205
1206
9.27k
    return changes;
1207
9.27k
}
1208
1209
static inline bool
1210
14.6k
basicblock_has_no_lineno(basicblock *b) {
1211
18.1k
    for (int i = 0; i < b->b_iused; i++) {
1212
16.6k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1213
13.0k
            return false;
1214
13.0k
        }
1215
16.6k
    }
1216
1.54k
    return true;
1217
14.6k
}
1218
1219
/* Maximum size of basic block that should be copied in optimizer */
1220
719
#define MAX_COPY_SIZE 4
1221
1222
/* If this block ends with an unconditional jump to a small exit block or
1223
 * a block that has no line numbers (and no fallthrough), then
1224
 * remove the jump and extend this block with the target.
1225
 * Returns 1 if extended, 0 if no change, and -1 on error.
1226
 */
1227
static int
1228
20.8k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1229
20.8k
    cfg_instr *last = basicblock_last_instr(bb);
1230
20.8k
    if (last == NULL) {
1231
0
        return 0;
1232
0
    }
1233
20.8k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1234
18.3k
        return 0;
1235
18.3k
    }
1236
2.54k
    basicblock *target = last->i_target;
1237
2.54k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1238
719
                             target->b_iused <= MAX_COPY_SIZE);
1239
2.54k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1240
592
                                     !BB_HAS_FALLTHROUGH(target));
1241
2.54k
    if (small_exit_block || no_lineno_no_fallthrough) {
1242
665
        assert(is_jump(last));
1243
665
        int removed_jump_opcode = last->i_opcode;
1244
665
        INSTR_SET_OP0(last, NOP);
1245
665
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1246
665
        if (no_lineno_no_fallthrough) {
1247
592
            last = basicblock_last_instr(bb);
1248
592
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1249
176
                removed_jump_opcode == JUMP)
1250
19
            {
1251
                /* Make sure we don't lose eval breaker checks */
1252
19
                last->i_opcode = JUMP;
1253
19
            }
1254
592
        }
1255
665
        target->b_predecessors--;
1256
665
        return 1;
1257
665
    }
1258
1.88k
    return 0;
1259
2.54k
}
1260
1261
static int
1262
4.22k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1263
4.22k
    bool changes;
1264
4.54k
    do {
1265
4.54k
        changes = false;
1266
25.3k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1267
20.8k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1268
20.8k
            RETURN_IF_ERROR(res);
1269
20.8k
            if (res) {
1270
665
                changes = true;
1271
665
            }
1272
20.8k
        }
1273
4.54k
    } while(changes); /* every change removes a jump, ensuring convergence */
1274
4.22k
    return changes;
1275
4.22k
}
1276
1277
// Attempt to eliminate jumps to jumps by updating inst to jump to
1278
// target->i_target using the provided opcode. Return whether or not the
1279
// optimization was successful.
1280
static bool
1281
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1282
141
{
1283
141
    assert(is_jump(inst));
1284
141
    assert(is_jump(target));
1285
141
    assert(inst == basicblock_last_instr(bb));
1286
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1287
    // changes (and we fall into an infinite loop):
1288
141
    if (inst->i_target != target->i_target) {
1289
        /* Change inst to NOP and append a jump to target->i_target. The
1290
         * NOP will be removed later if it's not needed for the lineno.
1291
         */
1292
141
        INSTR_SET_OP0(inst, NOP);
1293
1294
141
        RETURN_IF_ERROR(
1295
141
            basicblock_add_jump(
1296
141
                bb, opcode, target->i_target, target->i_loc));
1297
1298
141
        return true;
1299
141
    }
1300
0
    return false;
1301
141
}
1302
1303
static int
1304
loads_const(int opcode)
1305
171k
{
1306
171k
    return OPCODE_HAS_CONST(opcode)
1307
147k
        || opcode == LOAD_SMALL_INT
1308
130k
        || opcode == LOAD_COMMON_CONSTANT;
1309
171k
}
1310
1311
/* Returns new reference */
1312
static PyObject*
1313
get_const_value(int opcode, int oparg, PyObject *co_consts)
1314
60.0k
{
1315
60.0k
    PyObject *constant = NULL;
1316
60.0k
    assert(loads_const(opcode));
1317
60.0k
    if (opcode == LOAD_CONST) {
1318
57.7k
        assert(PyList_Check(co_consts));
1319
57.7k
        Py_ssize_t n = PyList_GET_SIZE(co_consts);
1320
57.7k
        if (oparg < 0 || oparg >= n) {
1321
0
            PyErr_Format(PyExc_ValueError,
1322
0
                         "LOAD_CONST index %d is out of range for consts (len=%zd)",
1323
0
                         oparg, n);
1324
0
            return NULL;
1325
0
        }
1326
57.7k
        constant = PyList_GET_ITEM(co_consts, oparg);
1327
57.7k
    }
1328
60.0k
    if (opcode == LOAD_SMALL_INT) {
1329
1.81k
        return PyLong_FromLong(oparg);
1330
1.81k
    }
1331
58.2k
    if (opcode == LOAD_COMMON_CONSTANT) {
1332
450
        assert(oparg < NUM_COMMON_CONSTANTS);
1333
450
        return PyStackRef_AsPyObjectBorrow(
1334
450
            _PyInterpreterState_GET()->common_consts[oparg]);
1335
450
    }
1336
1337
57.7k
    if (constant == NULL) {
1338
0
        PyErr_SetString(PyExc_SystemError,
1339
0
                        "Internal error: failed to get value of a constant");
1340
0
        return NULL;
1341
0
    }
1342
57.7k
    return Py_NewRef(constant);
1343
57.7k
}
1344
1345
// Steals a reference to newconst.
1346
static int
1347
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1348
          _Py_hashtable_t *consts_index)
1349
2.01k
{
1350
2.01k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1351
0
        Py_DECREF(newconst);
1352
0
        return -1;
1353
0
    }
1354
1355
2.01k
    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1356
2.01k
    if (entry != NULL) {
1357
1.42k
        Py_DECREF(newconst);
1358
1.42k
        return (int)(uintptr_t)entry->value;
1359
1.42k
    }
1360
1361
593
    Py_ssize_t index = PyList_GET_SIZE(consts);
1362
593
    if ((size_t)index >= (size_t)INT_MAX - 1) {
1363
0
        PyErr_SetString(PyExc_OverflowError, "too many constants");
1364
0
        Py_DECREF(newconst);
1365
0
        return -1;
1366
0
    }
1367
593
    if (PyList_Append(consts, newconst)) {
1368
0
        Py_DECREF(newconst);
1369
0
        return -1;
1370
0
    }
1371
1372
593
    if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1373
0
        PyList_SetSlice(consts, index, index + 1, NULL);
1374
0
        Py_DECREF(newconst);
1375
0
        PyErr_NoMemory();
1376
0
        return -1;
1377
0
    }
1378
1379
593
    Py_DECREF(newconst);
1380
593
    return (int)index;
1381
593
}
1382
1383
/*
1384
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1385
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1386
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1387
1388
  Return boolean indicating whether "size" such instructions were found.
1389
*/
1390
static bool
1391
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1392
5.09k
{
1393
5.09k
    assert(start < bb->b_iused);
1394
5.09k
    assert(size >= 0);
1395
5.09k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1396
1397
9.32k
    for (; start >= 0 && size > 0; start--) {
1398
6.83k
        cfg_instr *instr = &bb->b_instr[start];
1399
6.83k
        if (instr->i_opcode == NOP) {
1400
36
            continue;
1401
36
        }
1402
6.79k
        if (!loads_const(instr->i_opcode)) {
1403
2.60k
            return false;
1404
2.60k
        }
1405
4.19k
        instrs[--size] = instr;
1406
4.19k
    }
1407
1408
2.48k
    return size == 0;
1409
5.09k
}
1410
1411
/*
1412
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1413
  Caller must make sure "instrs" has at least "size" elements.
1414
*/
1415
static void
1416
nop_out(cfg_instr **instrs, int size)
1417
2.48k
{
1418
5.95k
    for (int i = 0; i < size; i++) {
1419
3.46k
        cfg_instr *instr = instrs[i];
1420
3.46k
        assert(instr->i_opcode != NOP);
1421
3.46k
        INSTR_SET_OP0(instr, NOP);
1422
3.46k
        INSTR_SET_LOC(instr, NO_LOCATION);
1423
3.46k
    }
1424
2.48k
}
1425
1426
/* Does not steal reference to "newconst".
1427
   Return 1 if changed instruction to LOAD_SMALL_INT.
1428
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1429
   Return -1 on error.
1430
*/
1431
static int
1432
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1433
                               PyObject *consts, PyObject *const_cache)
1434
38.1k
{
1435
38.1k
    if (PyLong_CheckExact(newconst)) {
1436
20.2k
        int overflow;
1437
20.2k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1438
20.2k
        if (val == -1 && PyErr_Occurred()) {
1439
0
            return -1;
1440
0
        }
1441
20.2k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1442
15.0k
            assert(_Py_IsImmortal(newconst));
1443
15.0k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1444
15.0k
            return 1;
1445
15.0k
        }
1446
20.2k
    }
1447
23.0k
    return 0;
1448
38.1k
}
1449
1450
/* Does not steal reference to "newconst".
1451
   Return 1 if changed instruction to LOAD_COMMON_CONSTANT.
1452
   Return 0 if could not change instruction to LOAD_COMMON_CONSTANT.
1453
   Return -1 on error.
1454
*/
1455
static int
1456
maybe_instr_make_load_common_const(cfg_instr *instr, PyObject *newconst)
1457
22.7k
{
1458
22.7k
    int oparg;
1459
22.7k
    if (newconst == Py_None) {
1460
3.85k
        oparg = CONSTANT_NONE;
1461
3.85k
    }
1462
18.9k
    else if (newconst == Py_True) {
1463
329
        oparg = CONSTANT_TRUE;
1464
329
    }
1465
18.6k
    else if (newconst == Py_False) {
1466
490
        oparg = CONSTANT_FALSE;
1467
490
    }
1468
18.1k
    else if (PyUnicode_CheckExact(newconst)
1469
7.28k
             && PyUnicode_GET_LENGTH(newconst) == 0) {
1470
116
        oparg = CONSTANT_EMPTY_STR;
1471
116
    }
1472
18.0k
    else if (PyTuple_CheckExact(newconst)
1473
2.04k
             && PyTuple_GET_SIZE(newconst) == 0) {
1474
628
        oparg = CONSTANT_EMPTY_TUPLE;
1475
628
    }
1476
17.3k
    else if (PyLong_CheckExact(newconst)) {
1477
5.21k
        int overflow;
1478
5.21k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1479
5.21k
        if (val == -1 && PyErr_Occurred()) {
1480
0
            return -1;
1481
0
        }
1482
5.21k
        if (overflow || val != -1) {
1483
4.76k
            return 0;
1484
4.76k
        }
1485
450
        oparg = CONSTANT_MINUS_ONE;
1486
450
    }
1487
12.1k
    else {
1488
12.1k
        return 0;
1489
12.1k
    }
1490
22.7k
    assert(_Py_IsImmortal(newconst));
1491
5.87k
    INSTR_SET_OP1(instr, LOAD_COMMON_CONSTANT, oparg);
1492
5.87k
    return 1;
1493
22.7k
}
1494
1495
/* Steals reference to "newconst" */
1496
static int
1497
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1498
                      PyObject *consts, PyObject *const_cache,
1499
                      _Py_hashtable_t *consts_index)
1500
2.35k
{
1501
2.35k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1502
2.35k
    if (res < 0) {
1503
0
        Py_DECREF(newconst);
1504
0
        return ERROR;
1505
0
    }
1506
2.35k
    if (res > 0) {
1507
0
        return SUCCESS;
1508
0
    }
1509
2.35k
    res = maybe_instr_make_load_common_const(instr, newconst);
1510
2.35k
    if (res < 0) {
1511
0
        Py_DECREF(newconst);
1512
0
        return ERROR;
1513
0
    }
1514
2.35k
    if (res > 0) {
1515
480
        return SUCCESS;
1516
480
    }
1517
1.87k
    int oparg = add_const(newconst, consts, const_cache, consts_index);
1518
1.87k
    RETURN_IF_ERROR(oparg);
1519
1.87k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1520
1.87k
    return SUCCESS;
1521
1.87k
}
1522
1523
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1524
   with    LOAD_CONST (c1, c2, ... cn).
1525
   The consts table must still be in list form so that the
1526
   new constant (c1, c2, ... cn) can be appended.
1527
   Called with codestr pointing to the first LOAD_CONST.
1528
*/
1529
static int
1530
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1531
                        PyObject *const_cache, _Py_hashtable_t *consts_index)
1532
1.88k
{
1533
    /* Pre-conditions */
1534
1.88k
    assert(PyDict_CheckExact(const_cache));
1535
1.88k
    assert(PyList_CheckExact(consts));
1536
1537
1.88k
    cfg_instr *instr = &bb->b_instr[i];
1538
1.88k
    assert(instr->i_opcode == BUILD_TUPLE);
1539
1540
1.88k
    int seq_size = instr->i_oparg;
1541
1.88k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1542
0
        return SUCCESS;
1543
0
    }
1544
1545
1.88k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1546
1.88k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1547
        /* not a const sequence */
1548
1.28k
        return SUCCESS;
1549
1.28k
    }
1550
1551
599
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1552
599
    if (const_tuple == NULL) {
1553
0
        return ERROR;
1554
0
    }
1555
1556
1.38k
    for (int i = 0; i < seq_size; i++) {
1557
782
        cfg_instr *inst = const_instrs[i];
1558
782
        assert(loads_const(inst->i_opcode));
1559
782
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1560
782
        if (element == NULL) {
1561
0
            Py_DECREF(const_tuple);
1562
0
            return ERROR;
1563
0
        }
1564
782
        PyTuple_SET_ITEM(const_tuple, i, element);
1565
782
    }
1566
1567
599
    nop_out(const_instrs, seq_size);
1568
599
    return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
1569
599
}
1570
1571
/* Replace:
1572
    BUILD_LIST 0
1573
    LOAD_CONST c1
1574
    LIST_APPEND 1
1575
    LOAD_CONST c2
1576
    LIST_APPEND 1
1577
    ...
1578
    LOAD_CONST cN
1579
    LIST_APPEND 1
1580
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1581
   with:
1582
    LOAD_CONST (c1, c2, ... cN)
1583
*/
1584
static int
1585
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1586
                                      PyObject *consts, PyObject *const_cache,
1587
                                      _Py_hashtable_t *consts_index)
1588
43
{
1589
43
    assert(PyDict_CheckExact(const_cache));
1590
43
    assert(PyList_CheckExact(consts));
1591
43
    assert(i >= 0);
1592
43
    assert(i < bb->b_iused);
1593
1594
43
    cfg_instr *intrinsic = &bb->b_instr[i];
1595
43
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1596
43
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1597
1598
43
    int consts_found = 0;
1599
43
    bool expect_append = true;
1600
1601
5.55k
    for (int pos = i - 1; pos >= 0; pos--) {
1602
5.55k
        cfg_instr *instr = &bb->b_instr[pos];
1603
5.55k
        int opcode = instr->i_opcode;
1604
5.55k
        int oparg = instr->i_oparg;
1605
1606
5.55k
        if (opcode == NOP) {
1607
1.66k
            continue;
1608
1.66k
        }
1609
1610
3.89k
        if (opcode == BUILD_LIST && oparg == 0) {
1611
0
            if (!expect_append) {
1612
                /* Not a sequence start. */
1613
0
                return SUCCESS;
1614
0
            }
1615
1616
            /* Sequence start, we are done. */
1617
0
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1618
0
            if (newconst == NULL) {
1619
0
                return ERROR;
1620
0
            }
1621
1622
0
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1623
0
                instr = &bb->b_instr[newpos];
1624
0
                if (instr->i_opcode == NOP) {
1625
0
                    continue;
1626
0
                }
1627
0
                if (loads_const(instr->i_opcode)) {
1628
0
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1629
0
                    if (constant == NULL) {
1630
0
                        Py_DECREF(newconst);
1631
0
                        return ERROR;
1632
0
                    }
1633
0
                    assert(consts_found > 0);
1634
0
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1635
0
                }
1636
0
                nop_out(&instr, 1);
1637
0
            }
1638
0
            assert(consts_found == 0);
1639
0
            return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
1640
0
        }
1641
1642
3.89k
        if (expect_append) {
1643
1.96k
            if (opcode != LIST_APPEND || oparg != 1) {
1644
41
                return SUCCESS;
1645
41
            }
1646
1.96k
        }
1647
1.92k
        else {
1648
1.92k
            if (!loads_const(opcode)) {
1649
2
                return SUCCESS;
1650
2
            }
1651
1.92k
            consts_found++;
1652
1.92k
        }
1653
1654
3.85k
        expect_append = !expect_append;
1655
3.85k
    }
1656
1657
    /* Did not find sequence start. */
1658
0
    return SUCCESS;
1659
43
}
1660
1661
750
#define MIN_CONST_SEQUENCE_SIZE 3
1662
/*
1663
Optimize lists and sets for:
1664
    1. "for" loop, comprehension or "in"/"not in" tests:
1665
           Change literal list or set of constants into constant
1666
           tuple or frozenset respectively. Change list of
1667
           non-constants into tuple.
1668
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1669
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1670
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1671
           or BUILD_SET & SET_UPDATE respectively.
1672
*/
1673
static int
1674
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1675
                        PyObject *consts, PyObject *const_cache,
1676
                        _Py_hashtable_t *consts_index)
1677
375
{
1678
375
    assert(PyDict_CheckExact(const_cache));
1679
375
    assert(PyList_CheckExact(consts));
1680
1681
375
    cfg_instr *instr = &bb->b_instr[i];
1682
375
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1683
1684
375
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1685
375
    int seq_size = instr->i_oparg;
1686
375
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1687
375
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1688
242
    {
1689
242
        return SUCCESS;
1690
242
    }
1691
1692
133
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1693
133
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1694
5
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1695
            /* iterate over a tuple instead of list */
1696
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1697
0
        }
1698
5
        return SUCCESS;
1699
5
    }
1700
1701
128
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1702
128
    if (const_result == NULL) {
1703
0
        return ERROR;
1704
0
    }
1705
1706
1.03k
    for (int i = 0; i < seq_size; i++) {
1707
904
        cfg_instr *inst = const_instrs[i];
1708
904
        assert(loads_const(inst->i_opcode));
1709
904
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1710
904
        if (element == NULL) {
1711
0
            Py_DECREF(const_result);
1712
0
            return ERROR;
1713
0
        }
1714
904
        PyTuple_SET_ITEM(const_result, i, element);
1715
904
    }
1716
1717
128
    if (instr->i_opcode == BUILD_SET) {
1718
121
        PyObject *frozenset = PyFrozenSet_New(const_result);
1719
121
        if (frozenset == NULL) {
1720
0
            Py_DECREF(const_result);
1721
0
            return ERROR;
1722
0
        }
1723
121
        Py_SETREF(const_result, frozenset);
1724
121
    }
1725
1726
128
    int index = add_const(const_result, consts, const_cache, consts_index);
1727
128
    RETURN_IF_ERROR(index);
1728
128
    nop_out(const_instrs, seq_size);
1729
1730
128
    if (contains_or_iter) {
1731
115
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1732
115
    }
1733
13
    else {
1734
13
        assert(i >= 2);
1735
13
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1736
1737
13
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1738
1739
13
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1740
13
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1741
13
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1742
13
    }
1743
128
    return SUCCESS;
1744
128
}
1745
1746
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1747
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1748
 * Used to avoid creating constants which are slow to hash.
1749
 */
1750
static Py_ssize_t
1751
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1752
0
{
1753
0
    if (PyTuple_Check(obj)) {
1754
0
        Py_ssize_t i;
1755
0
        limit -= PyTuple_GET_SIZE(obj);
1756
0
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1757
0
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1758
0
            if (limit < 0) {
1759
0
                return limit;
1760
0
            }
1761
0
        }
1762
0
    }
1763
0
    return limit;
1764
0
}
1765
1766
63
#define MAX_INT_SIZE           128  /* bits */
1767
0
#define MAX_COLLECTION_SIZE    256  /* items */
1768
2
#define MAX_STR_SIZE          4096  /* characters */
1769
0
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1770
1771
static PyObject *
1772
const_folding_safe_multiply(PyObject *v, PyObject *w)
1773
6
{
1774
6
    if (PyLong_Check(v) && PyLong_Check(w) &&
1775
2
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1776
6
    ) {
1777
2
        int64_t vbits = _PyLong_NumBits(v);
1778
2
        int64_t wbits = _PyLong_NumBits(w);
1779
2
        assert(vbits >= 0);
1780
2
        assert(wbits >= 0);
1781
2
        if (vbits + wbits > MAX_INT_SIZE) {
1782
0
            return NULL;
1783
0
        }
1784
2
    }
1785
4
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1786
0
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1787
0
        if (size) {
1788
0
            long n = PyLong_AsLong(v);
1789
0
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1790
0
                return NULL;
1791
0
            }
1792
0
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1793
0
                return NULL;
1794
0
            }
1795
0
        }
1796
0
    }
1797
4
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1798
2
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1799
2
                                               PyBytes_GET_SIZE(w);
1800
2
        if (size) {
1801
2
            long n = PyLong_AsLong(v);
1802
2
            if (n < 0 || n > MAX_STR_SIZE / size) {
1803
0
                return NULL;
1804
0
            }
1805
2
        }
1806
2
    }
1807
2
    else if (PyLong_Check(w) &&
1808
2
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1809
2
    {
1810
2
        return const_folding_safe_multiply(w, v);
1811
2
    }
1812
1813
4
    return PyNumber_Multiply(v, w);
1814
6
}
1815
1816
static PyObject *
1817
const_folding_safe_power(PyObject *v, PyObject *w)
1818
1
{
1819
1
    if (PyLong_Check(v) && PyLong_Check(w) &&
1820
1
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1821
1
    ) {
1822
1
        int64_t vbits = _PyLong_NumBits(v);
1823
1
        size_t wbits = PyLong_AsSize_t(w);
1824
1
        assert(vbits >= 0);
1825
1
        if (wbits == (size_t)-1) {
1826
0
            return NULL;
1827
0
        }
1828
1
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1829
0
            return NULL;
1830
0
        }
1831
1
    }
1832
1833
1
    return PyNumber_Power(v, w, Py_None);
1834
1
}
1835
1836
static PyObject *
1837
const_folding_safe_lshift(PyObject *v, PyObject *w)
1838
20
{
1839
20
    if (PyLong_Check(v) && PyLong_Check(w) &&
1840
20
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1841
20
    ) {
1842
20
        int64_t vbits = _PyLong_NumBits(v);
1843
20
        size_t wbits = PyLong_AsSize_t(w);
1844
20
        assert(vbits >= 0);
1845
20
        if (wbits == (size_t)-1) {
1846
0
            return NULL;
1847
0
        }
1848
20
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1849
0
            return NULL;
1850
0
        }
1851
20
    }
1852
1853
20
    return PyNumber_Lshift(v, w);
1854
20
}
1855
1856
static PyObject *
1857
const_folding_safe_mod(PyObject *v, PyObject *w)
1858
0
{
1859
0
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1860
0
        return NULL;
1861
0
    }
1862
1863
0
    return PyNumber_Remainder(v, w);
1864
0
}
1865
1866
static PyObject *
1867
eval_const_binop(PyObject *left, int op, PyObject *right)
1868
27
{
1869
27
    assert(left != NULL && right != NULL);
1870
27
    assert(op >= 0 && op <= NB_OPARG_LAST);
1871
1872
27
    PyObject *result = NULL;
1873
27
    switch (op) {
1874
1
        case NB_ADD:
1875
1
            result = PyNumber_Add(left, right);
1876
1
            break;
1877
1
        case NB_SUBTRACT:
1878
1
            result = PyNumber_Subtract(left, right);
1879
1
            break;
1880
4
        case NB_MULTIPLY:
1881
4
            result = const_folding_safe_multiply(left, right);
1882
4
            break;
1883
0
        case NB_TRUE_DIVIDE:
1884
0
            result = PyNumber_TrueDivide(left, right);
1885
0
            break;
1886
0
        case NB_FLOOR_DIVIDE:
1887
0
            result = PyNumber_FloorDivide(left, right);
1888
0
            break;
1889
0
        case NB_REMAINDER:
1890
0
            result = const_folding_safe_mod(left, right);
1891
0
            break;
1892
1
        case NB_POWER:
1893
1
            result = const_folding_safe_power(left, right);
1894
1
            break;
1895
20
        case NB_LSHIFT:
1896
20
            result = const_folding_safe_lshift(left, right);
1897
20
            break;
1898
0
        case NB_RSHIFT:
1899
0
            result = PyNumber_Rshift(left, right);
1900
0
            break;
1901
0
        case NB_OR:
1902
0
            result = PyNumber_Or(left, right);
1903
0
            break;
1904
0
        case NB_XOR:
1905
0
            result = PyNumber_Xor(left, right);
1906
0
            break;
1907
0
        case NB_AND:
1908
0
            result = PyNumber_And(left, right);
1909
0
            break;
1910
0
        case NB_SUBSCR:
1911
0
            result = PyObject_GetItem(left, right);
1912
0
            break;
1913
0
        case NB_MATRIX_MULTIPLY:
1914
            // No builtin constants implement matrix multiplication
1915
0
            break;
1916
0
        default:
1917
0
            Py_UNREACHABLE();
1918
27
    }
1919
27
    return result;
1920
27
}
1921
1922
static int
1923
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1924
                 PyObject *const_cache, _Py_hashtable_t *consts_index)
1925
1.34k
{
1926
1.36k
    #define BINOP_OPERAND_COUNT 2
1927
1.34k
    assert(PyDict_CheckExact(const_cache));
1928
1.34k
    assert(PyList_CheckExact(consts));
1929
1930
1.34k
    cfg_instr *binop = &bb->b_instr[i];
1931
1.34k
    assert(binop->i_opcode == BINARY_OP);
1932
1933
1.34k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1934
1.34k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1935
        /* not a const sequence */
1936
1.31k
        return SUCCESS;
1937
1.31k
    }
1938
1939
27
    cfg_instr *lhs_instr = operands_instrs[0];
1940
27
    assert(loads_const(lhs_instr->i_opcode));
1941
27
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1942
27
    if (lhs == NULL) {
1943
0
        return ERROR;
1944
0
    }
1945
1946
27
    cfg_instr *rhs_instr = operands_instrs[1];
1947
27
    assert(loads_const(rhs_instr->i_opcode));
1948
27
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1949
27
    if (rhs == NULL) {
1950
0
        Py_DECREF(lhs);
1951
0
        return ERROR;
1952
0
    }
1953
1954
27
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1955
27
    Py_DECREF(lhs);
1956
27
    Py_DECREF(rhs);
1957
27
    if (newconst == NULL) {
1958
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1959
0
            return ERROR;
1960
0
        }
1961
0
        PyErr_Clear();
1962
0
        return SUCCESS;
1963
0
    }
1964
1965
27
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1966
27
    return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
1967
27
}
1968
1969
static PyObject *
1970
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1971
1.72k
{
1972
1.72k
    assert(operand != NULL);
1973
1.72k
    assert(
1974
1.72k
        opcode == UNARY_NEGATIVE ||
1975
1.72k
        opcode == UNARY_INVERT ||
1976
1.72k
        opcode == UNARY_NOT ||
1977
1.72k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1978
1.72k
    );
1979
1.72k
    PyObject *result;
1980
1.72k
    switch (opcode) {
1981
1.72k
        case UNARY_NEGATIVE:
1982
1.72k
            result = PyNumber_Negative(operand);
1983
1.72k
            break;
1984
0
        case UNARY_INVERT:
1985
            // XXX: This should be removed once the ~bool depreciation expires.
1986
0
            if (PyBool_Check(operand)) {
1987
0
                return NULL;
1988
0
            }
1989
0
            result = PyNumber_Invert(operand);
1990
0
            break;
1991
0
        case UNARY_NOT: {
1992
0
            int r = PyObject_IsTrue(operand);
1993
0
            if (r < 0) {
1994
0
                return NULL;
1995
0
            }
1996
0
            result = PyBool_FromLong(!r);
1997
0
            break;
1998
0
        }
1999
0
        case CALL_INTRINSIC_1:
2000
0
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
2001
0
                Py_UNREACHABLE();
2002
0
            }
2003
0
            result = PyNumber_Positive(operand);
2004
0
            break;
2005
0
        default:
2006
0
            Py_UNREACHABLE();
2007
1.72k
    }
2008
1.72k
    return result;
2009
1.72k
}
2010
2011
static int
2012
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
2013
                   PyObject *const_cache, _Py_hashtable_t *consts_index)
2014
1.73k
{
2015
3.46k
    #define UNARYOP_OPERAND_COUNT 1
2016
1.73k
    assert(PyDict_CheckExact(const_cache));
2017
1.73k
    assert(PyList_CheckExact(consts));
2018
1.73k
    cfg_instr *unaryop = &bb->b_instr[i];
2019
2020
1.73k
    cfg_instr *operand_instr;
2021
1.73k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
2022
        /* not a const */
2023
9
        return SUCCESS;
2024
9
    }
2025
2026
1.73k
    assert(loads_const(operand_instr->i_opcode));
2027
1.72k
    PyObject *operand = get_const_value(
2028
1.72k
        operand_instr->i_opcode,
2029
1.72k
        operand_instr->i_oparg,
2030
1.72k
        consts
2031
1.72k
    );
2032
1.72k
    if (operand == NULL) {
2033
0
        return ERROR;
2034
0
    }
2035
2036
1.72k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
2037
1.72k
    Py_DECREF(operand);
2038
1.72k
    if (newconst == NULL) {
2039
0
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
2040
0
            return ERROR;
2041
0
        }
2042
0
        PyErr_Clear();
2043
0
        return SUCCESS;
2044
0
    }
2045
2046
1.72k
    if (unaryop->i_opcode == UNARY_NOT) {
2047
0
        assert(PyBool_Check(newconst));
2048
0
    }
2049
1.72k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
2050
1.72k
    return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
2051
1.72k
}
2052
2053
1.23k
#define VISITED (-1)
2054
2055
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
2056
// same effect.
2057
static int
2058
swaptimize(basicblock *block, int *ix)
2059
467
{
2060
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
2061
    // for this function. Make sure to blow away cached *.pyc files first!
2062
467
    assert(*ix < block->b_iused);
2063
467
    cfg_instr *instructions = &block->b_instr[*ix];
2064
    // Find the length of the current sequence of SWAPs and NOPs, and record the
2065
    // maximum depth of the stack manipulations:
2066
467
    assert(instructions[0].i_opcode == SWAP);
2067
467
    int depth = instructions[0].i_oparg;
2068
467
    int len = 0;
2069
467
    int more = false;
2070
467
    int limit = block->b_iused - *ix;
2071
581
    while (++len < limit) {
2072
581
        int opcode = instructions[len].i_opcode;
2073
581
        if (opcode == SWAP) {
2074
95
            depth = Py_MAX(depth, instructions[len].i_oparg);
2075
95
            more = true;
2076
95
        }
2077
486
        else if (opcode != NOP) {
2078
467
            break;
2079
467
        }
2080
581
    }
2081
    // It's already optimal if there's only one SWAP:
2082
467
    if (!more) {
2083
372
        return SUCCESS;
2084
372
    }
2085
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2086
95
    int *stack = PyMem_Malloc(depth * sizeof(int));
2087
95
    if (stack == NULL) {
2088
0
        PyErr_NoMemory();
2089
0
        return ERROR;
2090
0
    }
2091
380
    for (int i = 0; i < depth; i++) {
2092
285
        stack[i] = i;
2093
285
    }
2094
    // Simulate the combined effect of these instructions by "running" them on
2095
    // our "stack":
2096
285
    for (int i = 0; i < len; i++) {
2097
190
        if (instructions[i].i_opcode == SWAP) {
2098
190
            int oparg = instructions[i].i_oparg;
2099
190
            int top = stack[0];
2100
            // SWAPs are 1-indexed:
2101
190
            stack[0] = stack[oparg - 1];
2102
190
            stack[oparg - 1] = top;
2103
190
        }
2104
190
    }
2105
    // Now we can begin! Our approach here is based on a solution to a closely
2106
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2107
    // think of this algorithm as determining the steps needed to efficiently
2108
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2109
    // though, we can efficiently *shuffle* it! For this reason, we will be
2110
    // replacing instructions starting from the *end* of the run. Since the
2111
    // solution is optimal, we don't need to worry about running out of space:
2112
95
    int current = len - 1;
2113
380
    for (int i = 0; i < depth; i++) {
2114
        // Skip items that have already been visited, or just happen to be in
2115
        // the correct location:
2116
285
        if (stack[i] == VISITED || stack[i] == i) {
2117
190
            continue;
2118
190
        }
2119
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2120
        // with other items; traversing the cycle and swapping each item with
2121
        // the next will put them all in the correct place. The weird
2122
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2123
        // can only swap from that position:
2124
95
        int j = i;
2125
380
        while (true) {
2126
            // Skip the actual swap if our item is zero, since swapping the top
2127
            // item with itself is pointless:
2128
380
            if (j) {
2129
190
                assert(0 <= current);
2130
                // SWAPs are 1-indexed:
2131
190
                instructions[current].i_opcode = SWAP;
2132
190
                instructions[current--].i_oparg = j + 1;
2133
190
            }
2134
380
            if (stack[j] == VISITED) {
2135
                // Completed the cycle:
2136
95
                assert(j == i);
2137
95
                break;
2138
95
            }
2139
285
            int next_j = stack[j];
2140
285
            stack[j] = VISITED;
2141
285
            j = next_j;
2142
285
        }
2143
95
    }
2144
    // NOP out any unused instructions:
2145
95
    while (0 <= current) {
2146
0
        INSTR_SET_OP0(&instructions[current--], NOP);
2147
0
    }
2148
95
    PyMem_Free(stack);
2149
95
    *ix += len - 1;
2150
95
    return SUCCESS;
2151
95
}
2152
2153
2154
// This list is pretty small, since it's only okay to reorder opcodes that:
2155
// - can't affect control flow (like jumping or raising exceptions)
2156
// - can't invoke arbitrary code (besides finalizers)
2157
// - only touch the TOS (and pop it when finished)
2158
#define SWAPPABLE(opcode) \
2159
761
    ((opcode) == STORE_FAST || \
2160
761
     (opcode) == STORE_FAST_MAYBE_NULL || \
2161
761
     (opcode) == POP_TOP)
2162
2163
#define STORES_TO(instr) \
2164
166
    (((instr).i_opcode == STORE_FAST || \
2165
166
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2166
166
     ? (instr).i_oparg : -1)
2167
2168
static int
2169
next_swappable_instruction(basicblock *block, int i, int lineno)
2170
719
{
2171
738
    while (++i < block->b_iused) {
2172
733
        cfg_instr *instruction = &block->b_instr[i];
2173
733
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2174
            // Optimizing across this instruction could cause user-visible
2175
            // changes in the names bound between line tracing events!
2176
3
            return -1;
2177
3
        }
2178
730
        if (instruction->i_opcode == NOP) {
2179
19
            continue;
2180
19
        }
2181
711
        if (SWAPPABLE(instruction->i_opcode)) {
2182
329
            return i;
2183
329
        }
2184
382
        return -1;
2185
711
    }
2186
5
    return -1;
2187
719
}
2188
2189
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2190
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2191
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2192
static void
2193
apply_static_swaps(basicblock *block, int i)
2194
467
{
2195
    // SWAPs are to our left, and potential swaperands are to our right:
2196
594
    for (; 0 <= i; i--) {
2197
567
        assert(i < block->b_iused);
2198
567
        cfg_instr *swap = &block->b_instr[i];
2199
567
        if (swap->i_opcode != SWAP) {
2200
100
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2201
                // Nope, but we know how to handle these. Keep looking:
2202
50
                continue;
2203
50
            }
2204
            // We can't reason about what this instruction does. Bail:
2205
50
            return;
2206
100
        }
2207
467
        int j = next_swappable_instruction(block, i, -1);
2208
467
        if (j < 0) {
2209
247
            return;
2210
247
        }
2211
220
        int k = j;
2212
220
        int lineno = block->b_instr[j].i_loc.lineno;
2213
329
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2214
252
            k = next_swappable_instruction(block, k, lineno);
2215
252
            if (k < 0) {
2216
143
                return;
2217
143
            }
2218
252
        }
2219
        // The reordering is not safe if the two instructions to be swapped
2220
        // store to the same location, or if any intervening instruction stores
2221
        // to the same location as either of them.
2222
77
        int store_j = STORES_TO(block->b_instr[j]);
2223
77
        int store_k = STORES_TO(block->b_instr[k]);
2224
77
        if (store_j >= 0 || store_k >= 0) {
2225
77
            if (store_j == store_k) {
2226
0
                return;
2227
0
            }
2228
89
            for (int idx = j + 1; idx < k; idx++) {
2229
12
                int store_idx = STORES_TO(block->b_instr[idx]);
2230
12
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2231
0
                    return;
2232
0
                }
2233
12
            }
2234
77
        }
2235
2236
        // Success!
2237
77
        INSTR_SET_OP0(swap, NOP);
2238
77
        cfg_instr temp = block->b_instr[j];
2239
77
        block->b_instr[j] = block->b_instr[k];
2240
77
        block->b_instr[k] = temp;
2241
77
    }
2242
467
}
2243
2244
static int
2245
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2246
                               PyObject *consts, _Py_hashtable_t *consts_index)
2247
16.0k
{
2248
16.0k
    assert(PyDict_CheckExact(const_cache));
2249
16.0k
    assert(PyList_CheckExact(consts));
2250
16.0k
    int opcode = 0;
2251
16.0k
    int oparg = 0;
2252
174k
    for (int i = 0; i < bb->b_iused; i++) {
2253
158k
        cfg_instr *inst = &bb->b_instr[i];
2254
158k
        if (inst->i_opcode == LOAD_CONST) {
2255
35.7k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2256
35.7k
            if (constant == NULL) {
2257
0
                return ERROR;
2258
0
            }
2259
35.7k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2260
35.7k
            Py_DECREF(constant);
2261
35.7k
            if (res < 0) {
2262
0
                return ERROR;
2263
0
            }
2264
35.7k
        }
2265
158k
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2266
20.6k
                                      inst->i_opcode == COPY &&
2267
9
                                      inst->i_oparg == 1);
2268
158k
        if (! is_copy_of_load_const) {
2269
158k
            opcode = inst->i_opcode;
2270
158k
            oparg = inst->i_oparg;
2271
158k
        }
2272
158k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2273
158k
        if (!loads_const(opcode)) {
2274
122k
            continue;
2275
122k
        }
2276
35.9k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2277
35.9k
        switch(nextop) {
2278
17
            case POP_JUMP_IF_FALSE:
2279
17
            case POP_JUMP_IF_TRUE:
2280
17
            case JUMP_IF_FALSE:
2281
17
            case JUMP_IF_TRUE:
2282
17
            {
2283
                /* Remove LOAD_CONST const; conditional jump */
2284
17
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2285
17
                if (cnt == NULL) {
2286
0
                    return ERROR;
2287
0
                }
2288
17
                int is_true = PyObject_IsTrue(cnt);
2289
17
                Py_DECREF(cnt);
2290
17
                if (is_true == -1) {
2291
0
                    return ERROR;
2292
0
                }
2293
17
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2294
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2295
17
                    INSTR_SET_OP0(inst, NOP);
2296
17
                }
2297
17
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2298
17
                if (is_true == jump_if_true) {
2299
1
                    bb->b_instr[i+1].i_opcode = JUMP;
2300
1
                }
2301
16
                else {
2302
16
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2303
16
                }
2304
17
                break;
2305
17
            }
2306
304
            case IS_OP:
2307
304
            {
2308
                // Fold to POP_JUMP_IF_NONE:
2309
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2310
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2311
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2312
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2313
                // Fold to POP_JUMP_IF_NOT_NONE:
2314
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2315
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2316
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2317
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2318
304
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2319
304
                if (cnt == NULL) {
2320
0
                    return ERROR;
2321
0
                }
2322
304
                if (!Py_IsNone(cnt)) {
2323
26
                    Py_DECREF(cnt);
2324
26
                    break;
2325
26
                }
2326
278
                if (bb->b_iused <= i + 2) {
2327
0
                    break;
2328
0
                }
2329
278
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2330
278
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2331
                // Get rid of TO_BOOL regardless:
2332
278
                if (jump_instr->i_opcode == TO_BOOL) {
2333
271
                    INSTR_SET_OP0(jump_instr, NOP);
2334
271
                    if (bb->b_iused <= i + 3) {
2335
0
                        break;
2336
0
                    }
2337
271
                    jump_instr = &bb->b_instr[i + 3];
2338
271
                }
2339
278
                bool invert = is_instr->i_oparg;
2340
278
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2341
260
                    invert = !invert;
2342
260
                }
2343
18
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2344
7
                    break;
2345
7
                }
2346
271
                INSTR_SET_OP0(inst, NOP);
2347
271
                INSTR_SET_OP0(is_instr, NOP);
2348
271
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2349
271
                                              : POP_JUMP_IF_NONE;
2350
271
                break;
2351
278
            }
2352
17
            case TO_BOOL:
2353
17
            {
2354
17
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2355
17
                if (cnt == NULL) {
2356
0
                    return ERROR;
2357
0
                }
2358
17
                int is_true = PyObject_IsTrue(cnt);
2359
17
                Py_DECREF(cnt);
2360
17
                if (is_true == -1) {
2361
0
                    return ERROR;
2362
0
                }
2363
17
                cnt = PyBool_FromLong(is_true);
2364
17
                int index = add_const(cnt, consts, const_cache, consts_index);
2365
17
                if (index < 0) {
2366
0
                    return ERROR;
2367
0
                }
2368
17
                INSTR_SET_OP0(inst, NOP);
2369
17
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2370
17
                break;
2371
17
            }
2372
35.9k
        }
2373
35.9k
        if (inst->i_opcode == LOAD_CONST) {
2374
20.4k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2375
20.4k
            if (constant == NULL) {
2376
0
                return ERROR;
2377
0
            }
2378
20.4k
            int res = maybe_instr_make_load_common_const(inst, constant);
2379
20.4k
            Py_DECREF(constant);
2380
20.4k
            if (res < 0) {
2381
0
                return ERROR;
2382
0
            }
2383
20.4k
        }
2384
35.9k
    }
2385
16.0k
    return SUCCESS;
2386
16.0k
}
2387
2388
static int
2389
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2390
4.22k
                    _Py_hashtable_t *consts_index) {
2391
20.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2392
16.0k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
2393
16.0k
    }
2394
4.22k
    return SUCCESS;
2395
4.22k
}
2396
2397
static int
2398
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2399
                     _Py_hashtable_t *consts_index)
2400
16.0k
{
2401
16.0k
    assert(PyDict_CheckExact(const_cache));
2402
16.0k
    assert(PyList_CheckExact(consts));
2403
16.0k
    cfg_instr nop;
2404
16.0k
    INSTR_SET_OP0(&nop, NOP);
2405
175k
    for (int i = 0; i < bb->b_iused; i++) {
2406
158k
        cfg_instr *inst = &bb->b_instr[i];
2407
158k
        cfg_instr *target;
2408
158k
        int opcode = inst->i_opcode;
2409
158k
        int oparg = inst->i_oparg;
2410
158k
        if (HAS_TARGET(opcode)) {
2411
5.92k
            assert(inst->i_target->b_iused > 0);
2412
5.92k
            target = &inst->i_target->b_instr[0];
2413
5.92k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2414
5.92k
        }
2415
153k
        else {
2416
153k
            target = &nop;
2417
153k
        }
2418
158k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2419
158k
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2420
158k
        switch (opcode) {
2421
            /* Try to fold tuples of constants.
2422
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2423
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2424
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2425
1.93k
            case BUILD_TUPLE:
2426
1.93k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2427
54
                    switch(oparg) {
2428
0
                        case 1:
2429
0
                            INSTR_SET_OP0(inst, NOP);
2430
0
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2431
0
                            continue;
2432
44
                        case 2:
2433
53
                        case 3:
2434
53
                            INSTR_SET_OP0(inst, NOP);
2435
53
                            bb->b_instr[i+1].i_opcode = SWAP;
2436
53
                            continue;
2437
54
                    }
2438
54
                }
2439
1.88k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
2440
1.88k
                break;
2441
239
            case BUILD_LIST:
2442
375
            case BUILD_SET:
2443
375
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
2444
375
                break;
2445
123
            case POP_JUMP_IF_NOT_NONE:
2446
278
            case POP_JUMP_IF_NONE:
2447
278
                switch (target->i_opcode) {
2448
7
                    case JUMP:
2449
7
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2450
278
                }
2451
278
                break;
2452
2.15k
            case POP_JUMP_IF_FALSE:
2453
2.15k
                switch (target->i_opcode) {
2454
119
                    case JUMP:
2455
119
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2456
2.15k
                }
2457
2.15k
                break;
2458
2.15k
            case POP_JUMP_IF_TRUE:
2459
557
                switch (target->i_opcode) {
2460
15
                    case JUMP:
2461
15
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2462
557
                }
2463
557
                break;
2464
557
            case JUMP_IF_FALSE:
2465
493
                switch (target->i_opcode) {
2466
0
                    case JUMP:
2467
0
                    case JUMP_IF_FALSE:
2468
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2469
0
                        continue;
2470
2
                    case JUMP_IF_TRUE:
2471
                        // No need to check for loops here, a block's b_next
2472
                        // cannot point to itself.
2473
2
                        assert(inst->i_target != inst->i_target->b_next);
2474
2
                        inst->i_target = inst->i_target->b_next;
2475
2
                        i--;
2476
2
                        continue;
2477
493
                }
2478
491
                break;
2479
491
            case JUMP_IF_TRUE:
2480
57
                switch (target->i_opcode) {
2481
0
                    case JUMP:
2482
0
                    case JUMP_IF_TRUE:
2483
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2484
0
                        continue;
2485
0
                    case JUMP_IF_FALSE:
2486
                        // No need to check for loops here, a block's b_next
2487
                        // cannot point to itself.
2488
0
                        assert(inst->i_target != inst->i_target->b_next);
2489
0
                        inst->i_target = inst->i_target->b_next;
2490
0
                        i--;
2491
0
                        continue;
2492
57
                }
2493
57
                break;
2494
643
            case JUMP:
2495
1.21k
            case JUMP_NO_INTERRUPT:
2496
1.21k
                switch (target->i_opcode) {
2497
0
                    case JUMP:
2498
0
                        i -= jump_thread(bb, inst, target, JUMP);
2499
0
                        continue;
2500
0
                    case JUMP_NO_INTERRUPT:
2501
0
                        i -= jump_thread(bb, inst, target, opcode);
2502
0
                        continue;
2503
1.21k
                }
2504
1.21k
                break;
2505
1.21k
            case FOR_ITER:
2506
347
                if (target->i_opcode == JUMP) {
2507
                    /* This will not work now because the jump (at target) could
2508
                     * be forward or backward and FOR_ITER only jumps forward. We
2509
                     * can re-enable this if ever we implement a backward version
2510
                     * of FOR_ITER.
2511
                     */
2512
                    /*
2513
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2514
                    */
2515
0
                }
2516
347
                break;
2517
3.48k
            case STORE_FAST:
2518
3.48k
                if (opcode == nextop &&
2519
332
                    oparg == bb->b_instr[i+1].i_oparg &&
2520
14
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2521
14
                    bb->b_instr[i].i_opcode = POP_TOP;
2522
14
                    bb->b_instr[i].i_oparg = 0;
2523
14
                }
2524
3.48k
                break;
2525
562
            case SWAP:
2526
562
                if (oparg == 1) {
2527
0
                    INSTR_SET_OP0(inst, NOP);
2528
0
                }
2529
562
                break;
2530
5.68k
            case LOAD_GLOBAL:
2531
5.68k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2532
2.31k
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2533
2.31k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2534
2.31k
                }
2535
5.68k
                break;
2536
1.69k
            case COMPARE_OP:
2537
1.69k
                if (nextop == TO_BOOL) {
2538
504
                    INSTR_SET_OP0(inst, NOP);
2539
504
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2540
504
                    continue;
2541
504
                }
2542
1.19k
                break;
2543
1.19k
            case CONTAINS_OP:
2544
1.36k
            case IS_OP:
2545
1.36k
                if (nextop == TO_BOOL) {
2546
667
                    INSTR_SET_OP0(inst, NOP);
2547
667
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2548
667
                    continue;
2549
667
                }
2550
700
                if (nextop == UNARY_NOT) {
2551
0
                    INSTR_SET_OP0(inst, NOP);
2552
0
                    int inverted = oparg ^ 1;
2553
0
                    assert(inverted == 0 || inverted == 1);
2554
0
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2555
0
                    continue;
2556
0
                }
2557
700
                break;
2558
1.13k
            case TO_BOOL:
2559
1.13k
                if (nextop == TO_BOOL) {
2560
0
                    INSTR_SET_OP0(inst, NOP);
2561
0
                    continue;
2562
0
                }
2563
1.13k
                break;
2564
1.13k
            case UNARY_NOT:
2565
6
                if (nextop == TO_BOOL) {
2566
0
                    INSTR_SET_OP0(inst, NOP);
2567
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2568
0
                    continue;
2569
0
                }
2570
6
                if (nextop == UNARY_NOT) {
2571
0
                    INSTR_SET_OP0(inst, NOP);
2572
0
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2573
0
                    continue;
2574
0
                }
2575
6
                _Py_FALLTHROUGH;
2576
6
            case UNARY_INVERT:
2577
1.73k
            case UNARY_NEGATIVE:
2578
1.73k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2579
1.73k
                break;
2580
139
            case CALL_INTRINSIC_1:
2581
139
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2582
43
                    if (nextop == GET_ITER) {
2583
0
                        INSTR_SET_OP0(inst, NOP);
2584
0
                    }
2585
43
                    else {
2586
43
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
2587
43
                    }
2588
43
                }
2589
96
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2590
0
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
2591
0
                }
2592
139
                break;
2593
1.34k
            case BINARY_OP:
2594
1.34k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
2595
1.34k
                break;
2596
158k
        }
2597
158k
    }
2598
2599
174k
    for (int i = 0; i < bb->b_iused; i++) {
2600
158k
        cfg_instr *inst = &bb->b_instr[i];
2601
158k
        if (inst->i_opcode == SWAP) {
2602
467
            if (swaptimize(bb, &i) < 0) {
2603
0
                goto error;
2604
0
            }
2605
467
            apply_static_swaps(bb, i);
2606
467
        }
2607
158k
    }
2608
16.0k
    return SUCCESS;
2609
0
error:
2610
0
    return ERROR;
2611
16.0k
}
2612
2613
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2614
2615
static int
2616
remove_redundant_nops_and_jumps(cfg_builder *g)
2617
8.76k
{
2618
8.76k
    int removed_nops, removed_jumps;
2619
9.27k
    do {
2620
        /* Convergence is guaranteed because the number of
2621
         * redundant jumps and nops only decreases.
2622
         */
2623
9.27k
        removed_nops = remove_redundant_nops(g);
2624
9.27k
        RETURN_IF_ERROR(removed_nops);
2625
9.27k
        removed_jumps = remove_redundant_jumps(g);
2626
9.27k
        RETURN_IF_ERROR(removed_jumps);
2627
9.27k
    } while(removed_nops + removed_jumps > 0);
2628
8.76k
    return SUCCESS;
2629
8.76k
}
2630
2631
/* Perform optimizations on a control flow graph.
2632
   The consts object should still be in list form to allow new constants
2633
   to be appended.
2634
2635
   Code trasnformations that reduce code size initially fill the gaps with
2636
   NOPs.  Later those NOPs are removed.
2637
*/
2638
static int
2639
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2640
             _Py_hashtable_t *consts_index, int firstlineno)
2641
4.22k
{
2642
4.22k
    assert(PyDict_CheckExact(const_cache));
2643
4.22k
    RETURN_IF_ERROR(check_cfg(g));
2644
4.22k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2645
4.22k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2646
4.22k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2647
4.22k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
2648
20.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2649
16.0k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
2650
16.0k
    }
2651
4.22k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2652
4.22k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2653
4.22k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2654
4.22k
    assert(no_redundant_jumps(g));
2655
4.22k
    return SUCCESS;
2656
4.22k
}
2657
2658
static void
2659
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2660
4.51k
{
2661
4.51k
    int32_t line1 = inst1->i_loc.lineno;
2662
4.51k
    int32_t line2 = inst2->i_loc.lineno;
2663
    /* Skip if instructions are on different lines */
2664
4.51k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2665
1.39k
        return;
2666
1.39k
    }
2667
3.11k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2668
167
        return;
2669
167
    }
2670
2.95k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2671
2.95k
    INSTR_SET_OP0(inst2, NOP);
2672
2.95k
}
2673
2674
static int
2675
insert_superinstructions(cfg_builder *g)
2676
4.22k
{
2677
20.2k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2678
2679
164k
        for (int i = 0; i < b->b_iused; i++) {
2680
148k
            cfg_instr *inst = &b->b_instr[i];
2681
148k
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2682
148k
            switch(inst->i_opcode) {
2683
15.8k
                case LOAD_FAST:
2684
15.8k
                    if (nextop == LOAD_FAST) {
2685
2.88k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2686
2.88k
                    }
2687
15.8k
                    break;
2688
3.26k
                case STORE_FAST:
2689
3.26k
                    switch (nextop) {
2690
1.37k
                        case LOAD_FAST:
2691
1.37k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2692
1.37k
                            break;
2693
256
                        case STORE_FAST:
2694
256
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2695
256
                            break;
2696
3.26k
                    }
2697
3.26k
                    break;
2698
148k
            }
2699
148k
        }
2700
16.0k
    }
2701
4.22k
    int res = remove_redundant_nops(g);
2702
4.22k
    assert(no_redundant_nops(g));
2703
4.22k
    return res;
2704
4.22k
}
2705
2706
#define NOT_LOCAL -1
2707
3.36k
#define DUMMY_INSTR -1
2708
2709
typedef struct {
2710
    // Index of instruction that produced the reference or DUMMY_INSTR.
2711
    int instr;
2712
2713
    // The local to which the reference refers or NOT_LOCAL.
2714
    int local;
2715
} ref;
2716
2717
typedef struct {
2718
    ref *refs;
2719
    Py_ssize_t size;
2720
    Py_ssize_t capacity;
2721
} ref_stack;
2722
2723
static int
2724
ref_stack_push(ref_stack *stack, ref r)
2725
119k
{
2726
119k
    if (stack->size == stack->capacity) {
2727
4.22k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2728
4.22k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2729
4.22k
        if (refs == NULL) {
2730
0
            PyErr_NoMemory();
2731
0
            return -1;
2732
0
        }
2733
4.22k
        stack->refs = refs;
2734
4.22k
        stack->capacity = new_cap;
2735
4.22k
    }
2736
119k
    stack->refs[stack->size] = r;
2737
119k
    stack->size++;
2738
119k
    return 0;
2739
119k
}
2740
2741
static ref
2742
ref_stack_pop(ref_stack *stack)
2743
107k
{
2744
107k
    assert(stack->size > 0);
2745
107k
    stack->size--;
2746
107k
    ref r = stack->refs[stack->size];
2747
107k
    return r;
2748
107k
}
2749
2750
static void
2751
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2752
360
{
2753
360
    Py_ssize_t idx = stack->size - off;
2754
360
    assert(idx >= 0 && idx < stack->size);
2755
360
    ref tmp = stack->refs[idx];
2756
360
    stack->refs[idx] = stack->refs[stack->size - 1];
2757
360
    stack->refs[stack->size - 1] = tmp;
2758
360
}
2759
2760
static ref
2761
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2762
15.7k
{
2763
15.7k
    assert(idx >= 0 && idx < stack->size);
2764
15.7k
    return stack->refs[idx];
2765
15.7k
}
2766
2767
static void
2768
ref_stack_clear(ref_stack *stack)
2769
11.4k
{
2770
11.4k
    stack->size = 0;
2771
11.4k
}
2772
2773
static void
2774
ref_stack_fini(ref_stack *stack)
2775
4.22k
{
2776
4.22k
    if (stack->refs != NULL) {
2777
4.22k
        PyMem_Free(stack->refs);
2778
4.22k
    }
2779
4.22k
    stack->refs = NULL;
2780
4.22k
    stack->capacity = 0;
2781
4.22k
    stack->size = 0;
2782
4.22k
}
2783
2784
typedef enum {
2785
    // The loaded reference is still on the stack when the local is killed
2786
    SUPPORT_KILLED  = 1,
2787
    // The loaded reference is stored into a local
2788
    STORED_AS_LOCAL = 2,
2789
    // The loaded reference is still on the stack at the end of the basic block
2790
    REF_UNCONSUMED  = 4,
2791
} LoadFastInstrFlag;
2792
2793
static void
2794
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2795
3.42k
{
2796
6.15k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2797
2.73k
        ref r = ref_stack_at(refs, i);
2798
2.73k
        if (r.local == local) {
2799
4
            assert(r.instr >= 0);
2800
4
            instr_flags[r.instr] |= SUPPORT_KILLED;
2801
4
        }
2802
2.73k
    }
2803
3.42k
}
2804
2805
static void
2806
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2807
3.36k
{
2808
3.36k
    kill_local(instr_flags, refs, local);
2809
3.36k
    if (r.instr != DUMMY_INSTR) {
2810
2.98k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2811
2.98k
    }
2812
3.36k
}
2813
2814
static void
2815
load_fast_push_block(basicblock ***sp, basicblock *target,
2816
                     Py_ssize_t start_depth)
2817
9.50k
{
2818
9.50k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2819
9.50k
    if (!target->b_visited) {
2820
7.24k
        target->b_visited = 1;
2821
7.24k
        *(*sp)++ = target;
2822
7.24k
    }
2823
9.50k
}
2824
2825
/*
2826
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2827
 * load borrowed references onto the operand stack.
2828
 *
2829
 * This is only safe when we can prove that the reference in the frame outlives
2830
 * the borrowed reference produced by the instruction. We make this tractable
2831
 * by enforcing the following lifetimes:
2832
 *
2833
 * 1. Borrowed references loaded onto the operand stack live until the end of
2834
 *    the instruction that consumes them from the stack. Any borrowed
2835
 *    references that would escape into the heap (e.g. into frame objects or
2836
 *    generators) are converted into new, strong references.
2837
 *
2838
 * 2. Locals live until they are either killed by an instruction
2839
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2840
 *    via `f_locals` is added to a tuple owned by the frame object.
2841
 *
2842
 * To simplify the problem of detecting which supporting references in the
2843
 * frame are killed by instructions that overwrite locals, we only allow
2844
 * borrowed references to be stored as a local in the frame if they were passed
2845
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2846
 * strong references.
2847
 *
2848
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2849
 * that meet the following criteria:
2850
 *
2851
 * 1. The produced reference must be consumed from the stack before the
2852
 *    supporting reference in the frame is killed.
2853
 *
2854
 * 2. The produced reference cannot be stored as a local.
2855
 *
2856
 * We use abstract interpretation to identify instructions that meet these
2857
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2858
 * stack of abstract references and note any instructions that violate the
2859
 * criteria above. Once we've processed all the instructions in a block, any
2860
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2861
 */
2862
static int
2863
optimize_load_fast(cfg_builder *g)
2864
4.22k
{
2865
4.22k
    int status;
2866
4.22k
    ref_stack refs = {0};
2867
4.22k
    int max_instrs = 0;
2868
4.22k
    basicblock *entryblock = g->g_entryblock;
2869
20.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2870
16.1k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2871
16.1k
    }
2872
4.22k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2873
4.22k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2874
4.22k
    if (instr_flags == NULL) {
2875
0
        PyErr_NoMemory();
2876
0
        return ERROR;
2877
0
    }
2878
4.22k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2879
4.22k
    if (blocks == NULL) {
2880
0
        status = ERROR;
2881
0
        goto done;
2882
0
    }
2883
4.22k
    basicblock **sp = blocks;
2884
4.22k
    *sp = entryblock;
2885
4.22k
    sp++;
2886
4.22k
    entryblock->b_startdepth = 0;
2887
4.22k
    entryblock->b_visited = 1;
2888
2889
4.22k
    #define PUSH_REF(instr, local)                \
2890
119k
        do {                                      \
2891
119k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2892
0
                status = ERROR;                   \
2893
0
                goto done;                        \
2894
0
            }                                     \
2895
119k
        } while(0)
2896
2897
15.6k
    while (sp != blocks) {
2898
11.4k
        basicblock *block = *--sp;
2899
11.4k
        assert(block->b_startdepth > -1);
2900
2901
        // Reset per-block state.
2902
11.4k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2903
2904
        // Reset the stack of refs. We don't track references on the stack
2905
        // across basic blocks, but the bytecode will expect their
2906
        // presence. Add dummy references as necessary.
2907
11.4k
        ref_stack_clear(&refs);
2908
19.0k
        for (int i = 0; i < block->b_startdepth; i++) {
2909
7.52k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2910
7.52k
        }
2911
2912
155k
        for (int i = 0; i < block->b_iused; i++) {
2913
144k
            cfg_instr *instr = &block->b_instr[i];
2914
144k
            int opcode = instr->i_opcode;
2915
144k
            int oparg = instr->i_oparg;
2916
144k
            assert(opcode != EXTENDED_ARG);
2917
144k
            switch (opcode) {
2918
                // Opcodes that load and store locals
2919
0
                case DELETE_FAST: {
2920
0
                    kill_local(instr_flags, &refs, oparg);
2921
0
                    break;
2922
0
                }
2923
2924
13.9k
                case LOAD_FAST: {
2925
13.9k
                    PUSH_REF(i, oparg);
2926
13.9k
                    break;
2927
13.9k
                }
2928
2929
13.9k
                case LOAD_FAST_AND_CLEAR: {
2930
58
                    kill_local(instr_flags, &refs, oparg);
2931
58
                    PUSH_REF(i, oparg);
2932
58
                    break;
2933
58
                }
2934
2935
2.65k
                case LOAD_FAST_LOAD_FAST: {
2936
2.65k
                    PUSH_REF(i, oparg >> 4);
2937
2.65k
                    PUSH_REF(i, oparg & 15);
2938
2.65k
                    break;
2939
2.65k
                }
2940
2941
2.92k
                case STORE_FAST: {
2942
2.92k
                    ref r = ref_stack_pop(&refs);
2943
2.92k
                    store_local(instr_flags, &refs, oparg, r);
2944
2.92k
                    break;
2945
2.65k
                }
2946
2947
39
                case STORE_FAST_LOAD_FAST: {
2948
                    // STORE_FAST
2949
39
                    ref r = ref_stack_pop(&refs);
2950
39
                    store_local(instr_flags, &refs, oparg >> 4, r);
2951
                    // LOAD_FAST
2952
39
                    PUSH_REF(i, oparg & 15);
2953
39
                    break;
2954
39
                }
2955
2956
204
                case STORE_FAST_STORE_FAST: {
2957
                    // STORE_FAST
2958
204
                    ref r = ref_stack_pop(&refs);
2959
204
                    store_local(instr_flags, &refs, oparg >> 4, r);
2960
                    // STORE_FAST
2961
204
                    r = ref_stack_pop(&refs);
2962
204
                    store_local(instr_flags, &refs, oparg & 15, r);
2963
204
                    break;
2964
39
                }
2965
2966
                // Opcodes that shuffle values on the stack
2967
916
                case COPY: {
2968
916
                    assert(oparg > 0);
2969
916
                    Py_ssize_t idx = refs.size - oparg;
2970
916
                    ref r = ref_stack_at(&refs, idx);
2971
916
                    PUSH_REF(r.instr, r.local);
2972
916
                    break;
2973
916
                }
2974
2975
916
                case SWAP: {
2976
360
                    assert(oparg >= 2);
2977
360
                    ref_stack_swap_top(&refs, oparg);
2978
360
                    break;
2979
916
                }
2980
2981
                // We treat opcodes that do not consume all of their inputs on
2982
                // a case by case basis, as we have no generic way of knowing
2983
                // how many inputs should be left on the stack.
2984
2985
                // Opcodes that consume no inputs
2986
1.17k
                case FORMAT_SIMPLE:
2987
1.17k
                case GET_ANEXT:
2988
1.52k
                case GET_ITER:
2989
1.52k
                case GET_LEN:
2990
1.84k
                case IMPORT_FROM:
2991
1.84k
                case MATCH_KEYS:
2992
1.84k
                case MATCH_MAPPING:
2993
1.84k
                case MATCH_SEQUENCE:
2994
1.84k
                case WITH_EXCEPT_START: {
2995
1.84k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2996
1.84k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2997
1.84k
                    int net_pushed = num_pushed - num_popped;
2998
1.84k
                    assert(net_pushed >= 0);
2999
2.50k
                    for (int j = 0; j < net_pushed; j++) {
3000
661
                        PUSH_REF(i, NOT_LOCAL);
3001
661
                    }
3002
1.84k
                    break;
3003
1.84k
                }
3004
3005
                // Opcodes that consume some inputs and push no new values
3006
1.84k
                case DICT_MERGE:
3007
472
                case DICT_UPDATE:
3008
2.60k
                case LIST_APPEND:
3009
2.65k
                case LIST_EXTEND:
3010
9.41k
                case MAP_ADD:
3011
9.41k
                case RERAISE:
3012
9.48k
                case SET_ADD:
3013
9.49k
                case SET_UPDATE: {
3014
9.49k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3015
9.49k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3016
9.49k
                    int net_popped = num_popped - num_pushed;
3017
9.49k
                    assert(net_popped > 0);
3018
25.7k
                    for (int i = 0; i < net_popped; i++) {
3019
16.2k
                        ref_stack_pop(&refs);
3020
16.2k
                    }
3021
9.49k
                    break;
3022
9.48k
                }
3023
3024
21
                case END_SEND: {
3025
21
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
3026
21
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3027
21
                    ref tos = ref_stack_pop(&refs);
3028
21
                    ref_stack_pop(&refs);
3029
21
                    ref_stack_pop(&refs);
3030
21
                    PUSH_REF(tos.instr, tos.local);
3031
21
                    break;
3032
21
                }
3033
3034
1.33k
                case SET_FUNCTION_ATTRIBUTE: {
3035
1.33k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
3036
1.33k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
3037
1.33k
                    ref tos = ref_stack_pop(&refs);
3038
1.33k
                    ref_stack_pop(&refs);
3039
1.33k
                    PUSH_REF(tos.instr, tos.local);
3040
1.33k
                    break;
3041
1.33k
                }
3042
3043
                // Opcodes that consume some inputs and push new values
3044
1.33k
                case CHECK_EXC_MATCH: {
3045
0
                    ref_stack_pop(&refs);
3046
0
                    PUSH_REF(i, NOT_LOCAL);
3047
0
                    break;
3048
0
                }
3049
3050
341
                case FOR_ITER: {
3051
341
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
3052
341
                    PUSH_REF(i, NOT_LOCAL);
3053
341
                    break;
3054
341
                }
3055
3056
9.88k
                case LOAD_ATTR:
3057
10.0k
                case LOAD_SUPER_ATTR: {
3058
10.0k
                    ref self = ref_stack_pop(&refs);
3059
10.0k
                    if (opcode == LOAD_SUPER_ATTR) {
3060
127
                        ref_stack_pop(&refs);
3061
127
                        ref_stack_pop(&refs);
3062
127
                    }
3063
10.0k
                    PUSH_REF(i, NOT_LOCAL);
3064
10.0k
                    if (oparg & 1) {
3065
                        // A method call; conservatively assume that self is pushed
3066
                        // back onto the stack
3067
2.77k
                        PUSH_REF(self.instr, self.local);
3068
2.77k
                    }
3069
10.0k
                    break;
3070
10.0k
                }
3071
3072
10.0k
                case LOAD_SPECIAL:
3073
178
                case PUSH_EXC_INFO: {
3074
178
                    ref tos = ref_stack_pop(&refs);
3075
178
                    PUSH_REF(i, NOT_LOCAL);
3076
178
                    PUSH_REF(tos.instr, tos.local);
3077
178
                    break;
3078
178
                }
3079
3080
178
                case SEND: {
3081
21
                    load_fast_push_block(&sp, instr->i_target, refs.size);
3082
21
                    ref_stack_pop(&refs);
3083
21
                    PUSH_REF(i, NOT_LOCAL);
3084
21
                    break;
3085
21
                }
3086
3087
                // Opcodes that consume all of their inputs
3088
100k
                default: {
3089
100k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
3090
100k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
3091
100k
                    if (HAS_TARGET(instr->i_opcode)) {
3092
4.01k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
3093
4.01k
                    }
3094
100k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
3095
                        // Block push opcodes only affect the stack when jumping
3096
                        // to the target.
3097
174k
                        for (int j = 0; j < num_popped; j++) {
3098
74.5k
                            ref_stack_pop(&refs);
3099
74.5k
                        }
3100
176k
                        for (int j = 0; j < num_pushed; j++) {
3101
76.2k
                            PUSH_REF(i, NOT_LOCAL);
3102
76.2k
                        }
3103
100k
                    }
3104
100k
                    break;
3105
100k
                }
3106
144k
            }
3107
144k
        }
3108
3109
        // Push fallthrough block
3110
11.4k
        if (BB_HAS_FALLTHROUGH(block)) {
3111
5.13k
            assert(block->b_next != NULL);
3112
5.13k
            load_fast_push_block(&sp, block->b_next, refs.size);
3113
5.13k
        }
3114
3115
        // Mark instructions that produce values that are on the stack at the
3116
        // end of the basic block
3117
23.6k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3118
12.1k
            ref r = ref_stack_at(&refs, i);
3119
12.1k
            if (r.instr != -1) {
3120
7.34k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3121
7.34k
            }
3122
12.1k
        }
3123
3124
        // Optimize instructions
3125
155k
        for (int i = 0; i < block->b_iused; i++) {
3126
144k
            if (!instr_flags[i]) {
3127
134k
                cfg_instr *instr = &block->b_instr[i];
3128
134k
                switch (instr->i_opcode) {
3129
13.5k
                    case LOAD_FAST:
3130
13.5k
                        instr->i_opcode = LOAD_FAST_BORROW;
3131
13.5k
                        break;
3132
2.64k
                    case LOAD_FAST_LOAD_FAST:
3133
2.64k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3134
2.64k
                        break;
3135
118k
                    default:
3136
118k
                        break;
3137
134k
                }
3138
134k
            }
3139
144k
        }
3140
11.4k
    }
3141
3142
4.22k
    #undef PUSH_REF
3143
3144
4.22k
    status = SUCCESS;
3145
3146
4.22k
done:
3147
4.22k
    ref_stack_fini(&refs);
3148
4.22k
    PyMem_Free(instr_flags);
3149
4.22k
    PyMem_Free(blocks);
3150
4.22k
    return status;
3151
4.22k
}
3152
3153
// helper functions for add_checks_for_loads_of_unknown_variables
3154
static inline void
3155
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3156
42.5k
{
3157
    // Push b if the unsafe mask is giving us any new information.
3158
    // To avoid overflowing the stack, only allow each block once.
3159
    // Use b->b_visited=1 to mean that b is currently on the stack.
3160
42.5k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3161
42.5k
    if (b->b_unsafe_locals_mask != both) {
3162
5.80k
        b->b_unsafe_locals_mask = both;
3163
        // More work left to do.
3164
5.80k
        if (!b->b_visited) {
3165
            // not on the stack, so push it.
3166
5.75k
            *(*sp)++ = b;
3167
5.75k
            b->b_visited = 1;
3168
5.75k
        }
3169
5.80k
    }
3170
42.5k
}
3171
3172
static void
3173
scan_block_for_locals(basicblock *b, basicblock ***sp)
3174
19.2k
{
3175
    // bit i is set if local i is potentially uninitialized
3176
19.2k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3177
153k
    for (int i = 0; i < b->b_iused; i++) {
3178
134k
        cfg_instr *instr = &b->b_instr[i];
3179
134k
        assert(instr->i_opcode != EXTENDED_ARG);
3180
134k
        if (instr->i_except != NULL) {
3181
23.4k
            maybe_push(instr->i_except, unsafe_mask, sp);
3182
23.4k
        }
3183
134k
        if (instr->i_oparg >= 64) {
3184
6.56k
            continue;
3185
6.56k
        }
3186
134k
        assert(instr->i_oparg >= 0);
3187
128k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3188
128k
        switch (instr->i_opcode) {
3189
116
            case DELETE_FAST:
3190
232
            case LOAD_FAST_AND_CLEAR:
3191
464
            case STORE_FAST_MAYBE_NULL:
3192
464
                unsafe_mask |= bit;
3193
464
                break;
3194
6.74k
            case STORE_FAST:
3195
6.74k
                unsafe_mask &= ~bit;
3196
6.74k
                break;
3197
30
            case LOAD_FAST_CHECK:
3198
                // If this doesn't raise, then the local is defined.
3199
30
                unsafe_mask &= ~bit;
3200
30
                break;
3201
27.0k
            case LOAD_FAST:
3202
27.0k
                if (unsafe_mask & bit) {
3203
30
                    instr->i_opcode = LOAD_FAST_CHECK;
3204
30
                }
3205
27.0k
                unsafe_mask &= ~bit;
3206
27.0k
                break;
3207
128k
        }
3208
128k
    }
3209
19.2k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3210
8.74k
        maybe_push(b->b_next, unsafe_mask, sp);
3211
8.74k
    }
3212
19.2k
    cfg_instr *last = basicblock_last_instr(b);
3213
19.2k
    if (last && is_jump(last)) {
3214
7.75k
        assert(last->i_target != NULL);
3215
7.75k
        maybe_push(last->i_target, unsafe_mask, sp);
3216
7.75k
    }
3217
19.2k
}
3218
3219
static int
3220
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3221
0
{
3222
0
    assert(nlocals > 64);
3223
0
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3224
0
    if (states == NULL) {
3225
0
        PyErr_NoMemory();
3226
0
        return ERROR;
3227
0
    }
3228
0
    Py_ssize_t blocknum = 0;
3229
    // state[i - 64] == blocknum if local i is guaranteed to
3230
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3231
    // STORE_FAST within that basicblock (not followed by
3232
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3233
0
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
0
        blocknum++;
3235
0
        for (int i = 0; i < b->b_iused; i++) {
3236
0
            cfg_instr *instr = &b->b_instr[i];
3237
0
            assert(instr->i_opcode != EXTENDED_ARG);
3238
0
            int arg = instr->i_oparg;
3239
0
            if (arg < 64) {
3240
0
                continue;
3241
0
            }
3242
0
            assert(arg >= 0);
3243
0
            switch (instr->i_opcode) {
3244
0
                case DELETE_FAST:
3245
0
                case LOAD_FAST_AND_CLEAR:
3246
0
                case STORE_FAST_MAYBE_NULL:
3247
0
                    states[arg - 64] = blocknum - 1;
3248
0
                    break;
3249
0
                case STORE_FAST:
3250
0
                    states[arg - 64] = blocknum;
3251
0
                    break;
3252
0
                case LOAD_FAST:
3253
0
                    if (states[arg - 64] != blocknum) {
3254
0
                        instr->i_opcode = LOAD_FAST_CHECK;
3255
0
                    }
3256
0
                    states[arg - 64] = blocknum;
3257
0
                    break;
3258
0
                    Py_UNREACHABLE();
3259
0
            }
3260
0
        }
3261
0
    }
3262
0
    PyMem_Free(states);
3263
0
    return SUCCESS;
3264
0
}
3265
3266
static int
3267
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3268
4.22k
{
3269
4.22k
    assert(PyList_CheckExact(consts));
3270
4.22k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3271
4.22k
    if (nconsts == 0) {
3272
32
        return SUCCESS;  /* nothing to do */
3273
32
    }
3274
3275
4.19k
    Py_ssize_t *index_map = NULL;
3276
4.19k
    Py_ssize_t *reverse_index_map = NULL;
3277
4.19k
    int err = ERROR;
3278
3279
4.19k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3280
4.19k
    if (index_map == NULL) {
3281
0
        PyErr_NoMemory();
3282
0
        goto end;
3283
0
    }
3284
25.6k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3285
21.4k
        index_map[i] = -1;
3286
21.4k
    }
3287
    // The first constant may be docstring; keep it always.
3288
4.19k
    index_map[0] = 0;
3289
3290
    /* mark used consts */
3291
20.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3292
164k
        for (int i = 0; i < b->b_iused; i++) {
3293
148k
            int opcode = b->b_instr[i].i_opcode;
3294
148k
            if (OPCODE_HAS_CONST(opcode)) {
3295
15.8k
                int index = b->b_instr[i].i_oparg;
3296
15.8k
                index_map[index] = index;
3297
15.8k
            }
3298
148k
        }
3299
15.9k
    }
3300
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3301
    /* condense consts */
3302
4.19k
    Py_ssize_t n_used_consts = 0;
3303
29.8k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3304
25.6k
        if (index_map[i] != -1) {
3305
14.5k
            assert(index_map[i] == i);
3306
14.5k
            index_map[n_used_consts++] = index_map[i];
3307
14.5k
        }
3308
25.6k
    }
3309
4.19k
    if (n_used_consts == nconsts) {
3310
        /* nothing to do */
3311
1.04k
        err = SUCCESS;
3312
1.04k
        goto end;
3313
1.04k
    }
3314
3315
    /* move all used consts to the beginning of the consts list */
3316
4.19k
    assert(n_used_consts < nconsts);
3317
16.6k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3318
13.4k
        Py_ssize_t old_index = index_map[i];
3319
13.4k
        assert(i <= old_index && old_index < nconsts);
3320
13.4k
        if (i != old_index) {
3321
7.06k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3322
7.06k
            assert(value != NULL);
3323
7.06k
            PyList_SetItem(consts, i, Py_NewRef(value));
3324
7.06k
        }
3325
13.4k
    }
3326
3327
    /* truncate the consts list at its new size */
3328
3.15k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3329
0
        goto end;
3330
0
    }
3331
    /* adjust const indices in the bytecode */
3332
3.15k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3333
3.15k
    if (reverse_index_map == NULL) {
3334
0
        PyErr_NoMemory();
3335
0
        goto end;
3336
0
    }
3337
27.7k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3338
24.5k
        reverse_index_map[i] = -1;
3339
24.5k
    }
3340
16.6k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3341
13.4k
        assert(index_map[i] != -1);
3342
13.4k
        assert(reverse_index_map[index_map[i]] == -1);
3343
13.4k
        reverse_index_map[index_map[i]] = i;
3344
13.4k
    }
3345
3346
16.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3347
148k
        for (int i = 0; i < b->b_iused; i++) {
3348
134k
            int opcode = b->b_instr[i].i_opcode;
3349
134k
            if (OPCODE_HAS_CONST(opcode)) {
3350
15.5k
                int index = b->b_instr[i].i_oparg;
3351
15.5k
                assert(reverse_index_map[index] >= 0);
3352
15.5k
                assert(reverse_index_map[index] < n_used_consts);
3353
15.5k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3354
15.5k
            }
3355
134k
        }
3356
13.3k
    }
3357
3358
3.15k
    err = SUCCESS;
3359
4.19k
end:
3360
4.19k
    PyMem_Free(index_map);
3361
4.19k
    PyMem_Free(reverse_index_map);
3362
4.19k
    return err;
3363
3.15k
}
3364
3365
3366
3367
static int
3368
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3369
                                                int nlocals,
3370
                                                int nparams)
3371
4.22k
{
3372
4.22k
    if (nlocals == 0) {
3373
1.59k
        return SUCCESS;
3374
1.59k
    }
3375
2.63k
    if (nlocals > 64) {
3376
        // To avoid O(nlocals**2) compilation, locals beyond the first
3377
        // 64 are only analyzed one basicblock at a time: initialization
3378
        // info is not passed between basicblocks.
3379
0
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3380
0
            return ERROR;
3381
0
        }
3382
0
        nlocals = 64;
3383
0
    }
3384
2.63k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3385
2.63k
    if (stack == NULL) {
3386
0
        return ERROR;
3387
0
    }
3388
2.63k
    basicblock **sp = stack;
3389
3390
    // First origin of being uninitialized:
3391
    // The non-parameter locals in the entry block.
3392
2.63k
    uint64_t start_mask = 0;
3393
5.08k
    for (int i = nparams; i < nlocals; i++) {
3394
2.45k
        start_mask |= (uint64_t)1 << i;
3395
2.45k
    }
3396
2.63k
    maybe_push(entryblock, start_mask, &sp);
3397
3398
    // Second origin of being uninitialized:
3399
    // There could be DELETE_FAST somewhere, so
3400
    // be sure to scan each basicblock at least once.
3401
16.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3402
13.5k
        scan_block_for_locals(b, &sp);
3403
13.5k
    }
3404
    // Now propagate the uncertainty from the origins we found: Use
3405
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3406
8.38k
    while (sp > stack) {
3407
5.75k
        basicblock *b = *--sp;
3408
        // mark as no longer on stack
3409
5.75k
        b->b_visited = 0;
3410
5.75k
        scan_block_for_locals(b, &sp);
3411
5.75k
    }
3412
2.63k
    PyMem_Free(stack);
3413
2.63k
    return SUCCESS;
3414
2.63k
}
3415
3416
3417
static int
3418
3.20k
mark_warm(basicblock *entryblock) {
3419
3.20k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3420
3.20k
    if (stack == NULL) {
3421
0
        return ERROR;
3422
0
    }
3423
3.20k
    basicblock **sp = stack;
3424
3425
3.20k
    *sp++ = entryblock;
3426
3.20k
    entryblock->b_visited = 1;
3427
13.4k
    while (sp > stack) {
3428
10.2k
        basicblock *b = *(--sp);
3429
10.2k
        assert(!b->b_except_handler);
3430
10.2k
        b->b_warm = 1;
3431
10.2k
        basicblock *next = b->b_next;
3432
10.2k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3433
4.16k
            *sp++ = next;
3434
4.16k
            next->b_visited = 1;
3435
4.16k
        }
3436
107k
        for (int i=0; i < b->b_iused; i++) {
3437
97.0k
            cfg_instr *instr = &b->b_instr[i];
3438
97.0k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3439
2.85k
                *sp++ = instr->i_target;
3440
2.85k
                instr->i_target->b_visited = 1;
3441
2.85k
            }
3442
97.0k
        }
3443
10.2k
    }
3444
3.20k
    PyMem_Free(stack);
3445
3.20k
    return SUCCESS;
3446
3.20k
}
3447
3448
static int
3449
3.20k
mark_cold(basicblock *entryblock) {
3450
18.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3451
14.9k
        assert(!b->b_cold && !b->b_warm);
3452
14.9k
    }
3453
3.20k
    if (mark_warm(entryblock) < 0) {
3454
0
        return ERROR;
3455
0
    }
3456
3457
3.20k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3458
3.20k
    if (stack == NULL) {
3459
0
        return ERROR;
3460
0
    }
3461
3462
3.20k
    basicblock **sp = stack;
3463
18.1k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3464
14.9k
        if (b->b_except_handler) {
3465
800
            assert(!b->b_warm);
3466
800
            *sp++ = b;
3467
800
            b->b_visited = 1;
3468
800
        }
3469
14.9k
    }
3470
3471
4.71k
    while (sp > stack) {
3472
1.50k
        basicblock *b = *(--sp);
3473
1.50k
        b->b_cold = 1;
3474
1.50k
        basicblock *next = b->b_next;
3475
1.50k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3476
387
            if (!next->b_warm && !next->b_visited) {
3477
357
                *sp++ = next;
3478
357
                next->b_visited = 1;
3479
357
            }
3480
387
        }
3481
7.91k
        for (int i = 0; i < b->b_iused; i++) {
3482
6.41k
            cfg_instr *instr = &b->b_instr[i];
3483
6.41k
            if (is_jump(instr)) {
3484
470
                assert(i == b->b_iused - 1);
3485
470
                basicblock *target = b->b_instr[i].i_target;
3486
470
                if (!target->b_warm && !target->b_visited) {
3487
348
                    *sp++ = target;
3488
348
                    target->b_visited = 1;
3489
348
                }
3490
470
            }
3491
6.41k
        }
3492
1.50k
    }
3493
3.20k
    PyMem_Free(stack);
3494
3.20k
    return SUCCESS;
3495
3.20k
}
3496
3497
3498
static int
3499
4.22k
push_cold_blocks_to_end(cfg_builder *g) {
3500
4.22k
    basicblock *entryblock = g->g_entryblock;
3501
4.22k
    if (entryblock->b_next == NULL) {
3502
        /* single basicblock, no need to reorder */
3503
1.01k
        return SUCCESS;
3504
1.01k
    }
3505
3.20k
    RETURN_IF_ERROR(mark_cold(entryblock));
3506
3507
3.20k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3508
3509
    /* If we have a cold block with fallthrough to a warm block, add */
3510
    /* an explicit jump instead of fallthrough */
3511
18.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3512
15.0k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3513
21
            basicblock *explicit_jump = cfg_builder_new_block(g);
3514
21
            if (explicit_jump == NULL) {
3515
0
                return ERROR;
3516
0
            }
3517
21
            if (!IS_LABEL(b->b_next->b_label)) {
3518
0
                b->b_next->b_label.id = next_lbl++;
3519
0
            }
3520
21
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3521
21
                             NO_LOCATION);
3522
21
            explicit_jump->b_cold = 1;
3523
21
            explicit_jump->b_next = b->b_next;
3524
21
            explicit_jump->b_predecessors = 1;
3525
21
            b->b_next = explicit_jump;
3526
3527
            /* set target */
3528
21
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3529
21
            last->i_target = explicit_jump->b_next;
3530
21
        }
3531
15.0k
    }
3532
3533
3.20k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3534
3.20k
    basicblock *cold_blocks = NULL;
3535
3.20k
    basicblock *cold_blocks_tail = NULL;
3536
3537
3.20k
    basicblock *b = entryblock;
3538
3.80k
    while(b->b_next) {
3539
3.80k
        assert(!b->b_cold);
3540
14.0k
        while (b->b_next && !b->b_next->b_cold) {
3541
10.2k
            b = b->b_next;
3542
10.2k
        }
3543
3.80k
        if (b->b_next == NULL) {
3544
            /* no more cold blocks */
3545
3.20k
            break;
3546
3.20k
        }
3547
3548
        /* b->b_next is the beginning of a cold streak */
3549
3.80k
        assert(!b->b_cold && b->b_next->b_cold);
3550
3551
593
        basicblock *b_end = b->b_next;
3552
1.52k
        while (b_end->b_next && b_end->b_next->b_cold) {
3553
933
            b_end = b_end->b_next;
3554
933
        }
3555
3556
        /* b_end is the end of the cold streak */
3557
593
        assert(b_end && b_end->b_cold);
3558
593
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3559
3560
593
        if (cold_blocks == NULL) {
3561
311
            cold_blocks = b->b_next;
3562
311
        }
3563
282
        else {
3564
282
            cold_blocks_tail->b_next = b->b_next;
3565
282
        }
3566
593
        cold_blocks_tail = b_end;
3567
593
        b->b_next = b_end->b_next;
3568
593
        b_end->b_next = NULL;
3569
593
    }
3570
3.20k
    assert(b != NULL && b->b_next == NULL);
3571
3.20k
    b->b_next = cold_blocks;
3572
3573
3.20k
    if (cold_blocks != NULL) {
3574
311
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3575
311
    }
3576
3.20k
    return SUCCESS;
3577
3.20k
}
3578
3579
static int
3580
convert_pseudo_conditional_jumps(cfg_builder *g)
3581
4.22k
{
3582
4.22k
    basicblock *entryblock = g->g_entryblock;
3583
20.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3584
161k
        for (int i = 0; i < b->b_iused; i++) {
3585
145k
            cfg_instr *instr = &b->b_instr[i];
3586
145k
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3587
548
                assert(i == b->b_iused - 1);
3588
548
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3589
491
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3590
548
                location loc = instr->i_loc;
3591
548
                basicblock *except = instr->i_except;
3592
548
                cfg_instr copy = {
3593
548
                            .i_opcode = COPY,
3594
548
                            .i_oparg = 1,
3595
548
                            .i_loc = loc,
3596
548
                            .i_target = NULL,
3597
548
                            .i_except = except,
3598
548
                };
3599
548
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3600
548
                cfg_instr to_bool = {
3601
548
                            .i_opcode = TO_BOOL,
3602
548
                            .i_oparg = 0,
3603
548
                            .i_loc = loc,
3604
548
                            .i_target = NULL,
3605
548
                            .i_except = except,
3606
548
                };
3607
548
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3608
548
            }
3609
145k
        }
3610
16.0k
    }
3611
4.22k
    return SUCCESS;
3612
4.22k
}
3613
3614
static int
3615
convert_pseudo_ops(cfg_builder *g)
3616
4.22k
{
3617
4.22k
    basicblock *entryblock = g->g_entryblock;
3618
20.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3619
163k
        for (int i = 0; i < b->b_iused; i++) {
3620
147k
            cfg_instr *instr = &b->b_instr[i];
3621
147k
            if (is_block_push(instr)) {
3622
800
                INSTR_SET_OP0(instr, NOP);
3623
800
            }
3624
147k
            else if (instr->i_opcode == LOAD_CLOSURE) {
3625
1.09k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3626
1.09k
                instr->i_opcode = LOAD_FAST;
3627
1.09k
            }
3628
145k
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3629
116
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3630
116
                instr->i_opcode = STORE_FAST;
3631
116
            }
3632
147k
        }
3633
16.0k
    }
3634
4.22k
    return remove_redundant_nops_and_jumps(g);
3635
4.22k
}
3636
3637
static inline bool
3638
20.4k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3639
20.4k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3640
12.0k
        return basicblock_has_no_lineno(b);
3641
12.0k
    }
3642
8.35k
    else {
3643
8.35k
        return false;
3644
8.35k
    }
3645
20.4k
}
3646
3647
3648
/* PEP 626 mandates that the f_lineno of a frame is correct
3649
 * after a frame terminates. It would be prohibitively expensive
3650
 * to continuously update the f_lineno field at runtime,
3651
 * so we make sure that all exiting instruction (raises and returns)
3652
 * have a valid line number, allowing us to compute f_lineno lazily.
3653
 * We can do this by duplicating the exit blocks without line number
3654
 * so that none have more than one predecessor. We can then safely
3655
 * copy the line number from the sole predecessor block.
3656
 */
3657
static int
3658
duplicate_exits_without_lineno(cfg_builder *g)
3659
8.45k
{
3660
8.45k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3661
3662
    /* Copy all exit blocks without line number that are targets of a jump.
3663
     */
3664
8.45k
    basicblock *entryblock = g->g_entryblock;
3665
40.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3666
32.0k
        cfg_instr *last = basicblock_last_instr(b);
3667
32.0k
        if (last == NULL) {
3668
6.38k
            continue;
3669
6.38k
        }
3670
25.6k
        if (is_jump(last)) {
3671
9.71k
            basicblock *target = next_nonempty_block(last->i_target);
3672
9.71k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3673
211
                basicblock *new_target = copy_basicblock(g, target);
3674
211
                if (new_target == NULL) {
3675
0
                    return ERROR;
3676
0
                }
3677
211
                new_target->b_instr[0].i_loc = last->i_loc;
3678
211
                last->i_target = new_target;
3679
211
                target->b_predecessors--;
3680
211
                new_target->b_predecessors = 1;
3681
211
                new_target->b_next = target->b_next;
3682
211
                new_target->b_label.id = next_lbl++;
3683
211
                target->b_next = new_target;
3684
211
            }
3685
9.71k
        }
3686
25.6k
    }
3687
3688
    /* Any remaining reachable exit blocks without line number can only be reached by
3689
     * fall through, and thus can only have a single predecessor */
3690
40.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3691
32.0k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3692
10.7k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3693
320
                cfg_instr *last = basicblock_last_instr(b);
3694
320
                assert(last != NULL);
3695
320
                b->b_next->b_instr[0].i_loc = last->i_loc;
3696
320
            }
3697
10.7k
        }
3698
32.0k
    }
3699
8.45k
    return SUCCESS;
3700
8.45k
}
3701
3702
3703
/* If an instruction has no line number, but it's predecessor in the BB does,
3704
 * then copy the line number. If a successor block has no line number, and only
3705
 * one predecessor, then inherit the line number.
3706
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3707
 * Also reduces the size of the line number table,
3708
 * but has no impact on the generated line number events.
3709
 */
3710
3711
static inline void
3712
maybe_propagate_location(basicblock *b, int i, location loc)
3713
316k
{
3714
316k
    assert(b->b_iused > i);
3715
316k
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3716
16.8k
         b->b_instr[i].i_loc = loc;
3717
16.8k
    }
3718
316k
}
3719
3720
static void
3721
propagate_line_numbers(basicblock *entryblock)
3722
8.45k
{
3723
40.4k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3724
32.0k
        cfg_instr *last = basicblock_last_instr(b);
3725
32.0k
        if (last == NULL) {
3726
6.38k
            continue;
3727
6.38k
        }
3728
3729
25.6k
        location prev_location = NO_LOCATION;
3730
329k
        for (int i = 0; i < b->b_iused; i++) {
3731
304k
            maybe_propagate_location(b, i, prev_location);
3732
304k
            prev_location = b->b_instr[i].i_loc;
3733
304k
        }
3734
25.6k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3735
7.82k
            if (b->b_next->b_iused > 0) {
3736
7.82k
                maybe_propagate_location(b->b_next, 0, prev_location);
3737
7.82k
            }
3738
7.82k
        }
3739
25.6k
        if (is_jump(last)) {
3740
9.71k
            basicblock *target = last->i_target;
3741
9.71k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3742
0
                target = target->b_next;
3743
0
            }
3744
9.71k
            if (target->b_predecessors == 1) {
3745
4.52k
                maybe_propagate_location(target, 0, prev_location);
3746
4.52k
            }
3747
9.71k
        }
3748
25.6k
    }
3749
8.45k
}
3750
3751
static int
3752
resolve_line_numbers(cfg_builder *g, int firstlineno)
3753
8.45k
{
3754
8.45k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3755
8.45k
    propagate_line_numbers(g->g_entryblock);
3756
8.45k
    return SUCCESS;
3757
8.45k
}
3758
3759
int
3760
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3761
                        int nlocals, int nparams, int firstlineno)
3762
4.22k
{
3763
4.22k
    assert(cfg_builder_check(g));
3764
4.22k
    assert(g->g_entryblock->b_iused > 0);
3765
    /** Preprocessing **/
3766
    /* Map labels to targets and mark exception handlers */
3767
4.22k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3768
4.22k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3769
4.22k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3770
3771
    /** Optimization **/
3772
3773
4.22k
    _Py_hashtable_t *consts_index = _Py_hashtable_new(
3774
4.22k
        _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3775
4.22k
    if (consts_index == NULL) {
3776
0
        PyErr_NoMemory();
3777
0
        return ERROR;
3778
0
    }
3779
3780
29.3k
    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3781
25.0k
        PyObject *item = PyList_GET_ITEM(consts, i);
3782
25.0k
        if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3783
0
            continue;
3784
0
        }
3785
25.0k
        if (_Py_hashtable_set(consts_index, (void *)item,
3786
25.0k
                              (void *)(uintptr_t)i) < 0) {
3787
0
            _Py_hashtable_destroy(consts_index);
3788
0
            PyErr_NoMemory();
3789
0
            return ERROR;
3790
0
        }
3791
25.0k
    }
3792
3793
4.22k
    int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3794
3795
4.22k
    _Py_hashtable_destroy(consts_index);
3796
3797
4.22k
    RETURN_IF_ERROR(ret);
3798
3799
4.22k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3800
4.22k
    RETURN_IF_ERROR(
3801
4.22k
        add_checks_for_loads_of_uninitialized_variables(
3802
4.22k
            g->g_entryblock, nlocals, nparams));
3803
4.22k
    RETURN_IF_ERROR(insert_superinstructions(g));
3804
3805
4.22k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3806
4.22k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3807
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3808
    // assert(all_exits_have_lineno(g->g_entryblock));
3809
4.22k
    return SUCCESS;
3810
4.22k
}
3811
3812
static int *
3813
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3814
4.22k
{
3815
4.22k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3816
4.22k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3817
4.22k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3818
3819
4.22k
    int noffsets = ncellvars + nfreevars;
3820
4.22k
    int *fixed = PyMem_New(int, noffsets);
3821
4.22k
    if (fixed == NULL) {
3822
0
        PyErr_NoMemory();
3823
0
        return NULL;
3824
0
    }
3825
5.83k
    for (int i = 0; i < noffsets; i++) {
3826
1.61k
        fixed[i] = nlocals + i;
3827
1.61k
    }
3828
3829
4.22k
    PyObject *varname, *cellindex;
3830
4.22k
    Py_ssize_t pos = 0;
3831
5.10k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3832
876
        PyObject *varindex;
3833
876
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3834
0
            goto error;
3835
0
        }
3836
876
        if (varindex == NULL) {
3837
414
            continue;
3838
414
        }
3839
3840
462
        int argoffset = PyLong_AsInt(varindex);
3841
462
        Py_DECREF(varindex);
3842
462
        if (argoffset == -1 && PyErr_Occurred()) {
3843
0
            goto error;
3844
0
        }
3845
3846
462
        int oldindex = PyLong_AsInt(cellindex);
3847
462
        if (oldindex == -1 && PyErr_Occurred()) {
3848
0
            goto error;
3849
0
        }
3850
462
        fixed[oldindex] = argoffset;
3851
462
    }
3852
4.22k
    return fixed;
3853
3854
0
error:
3855
0
    PyMem_Free(fixed);
3856
0
    return NULL;
3857
4.22k
}
3858
3859
#define IS_GENERATOR(CF) \
3860
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3861
3862
static int
3863
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3864
                           int *fixed, int nfreevars)
3865
4.22k
{
3866
4.22k
    assert(umd->u_firstlineno > 0);
3867
3868
    /* Set up cells for any variable that escapes, to be put in a closure. */
3869
4.22k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3870
4.22k
    if (ncellvars) {
3871
        // umd->u_cellvars has the cells out of order so we sort them
3872
        // before adding the MAKE_CELL instructions.  Note that we
3873
        // adjust for arg cells, which come first.
3874
663
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3875
663
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3876
663
        if (sorted == NULL) {
3877
0
            PyErr_NoMemory();
3878
0
            return ERROR;
3879
0
        }
3880
1.53k
        for (int i = 0; i < ncellvars; i++) {
3881
876
            sorted[fixed[i]] = i + 1;
3882
876
        }
3883
1.95k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3884
1.28k
            int oldindex = sorted[i] - 1;
3885
1.28k
            if (oldindex == -1) {
3886
413
                continue;
3887
413
            }
3888
876
            cfg_instr make_cell = {
3889
876
                .i_opcode = MAKE_CELL,
3890
                // This will get fixed in offset_derefs().
3891
876
                .i_oparg = oldindex,
3892
876
                .i_loc = NO_LOCATION,
3893
876
                .i_target = NULL,
3894
876
                .i_except = NULL,
3895
876
            };
3896
876
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3897
0
                PyMem_RawFree(sorted);
3898
0
                return ERROR;
3899
0
            }
3900
876
            ncellsused += 1;
3901
876
        }
3902
663
        PyMem_RawFree(sorted);
3903
663
    }
3904
3905
4.22k
    if (nfreevars) {
3906
525
        cfg_instr copy_frees = {
3907
525
            .i_opcode = COPY_FREE_VARS,
3908
525
            .i_oparg = nfreevars,
3909
525
            .i_loc = NO_LOCATION,
3910
525
            .i_target = NULL,
3911
525
            .i_except = NULL,
3912
525
        };
3913
525
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3914
525
    }
3915
3916
4.22k
    return SUCCESS;
3917
4.22k
}
3918
3919
static int
3920
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3921
4.22k
{
3922
4.22k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3923
4.22k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3924
4.22k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3925
4.22k
    int noffsets = ncellvars + nfreevars;
3926
3927
    // First deal with duplicates (arg cells).
3928
4.22k
    int numdropped = 0;
3929
5.83k
    for (int i = 0; i < noffsets ; i++) {
3930
1.61k
        if (fixedmap[i] == i + nlocals) {
3931
1.14k
            fixedmap[i] -= numdropped;
3932
1.14k
        }
3933
462
        else {
3934
            // It was a duplicate (cell/arg).
3935
462
            numdropped += 1;
3936
462
        }
3937
1.61k
    }
3938
3939
    // Then update offsets, either relative to locals or by cell2arg.
3940
20.2k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3941
163k
        for (int i = 0; i < b->b_iused; i++) {
3942
147k
            cfg_instr *inst = &b->b_instr[i];
3943
            // This is called before extended args are generated.
3944
147k
            assert(inst->i_opcode != EXTENDED_ARG);
3945
147k
            int oldoffset = inst->i_oparg;
3946
147k
            switch(inst->i_opcode) {
3947
876
                case MAKE_CELL:
3948
1.97k
                case LOAD_CLOSURE:
3949
3.76k
                case LOAD_DEREF:
3950
4.18k
                case STORE_DEREF:
3951
4.18k
                case DELETE_DEREF:
3952
4.18k
                case LOAD_FROM_DICT_OR_DEREF:
3953
4.18k
                    assert(oldoffset >= 0);
3954
4.18k
                    assert(oldoffset < noffsets);
3955
4.18k
                    assert(fixedmap[oldoffset] >= 0);
3956
4.18k
                    inst->i_oparg = fixedmap[oldoffset];
3957
147k
            }
3958
147k
        }
3959
16.0k
    }
3960
3961
4.22k
    return numdropped;
3962
4.22k
}
3963
3964
static int
3965
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3966
4.22k
{
3967
4.22k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3968
4.22k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3969
4.22k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3970
4.22k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3971
4.22k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3972
4.22k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3973
4.22k
    assert(INT_MAX - nlocals - ncellvars > 0);
3974
4.22k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3975
4.22k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3976
4.22k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3977
4.22k
    if (cellfixedoffsets == NULL) {
3978
0
        return ERROR;
3979
0
    }
3980
3981
    // This must be called before fix_cell_offsets().
3982
4.22k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3983
0
        PyMem_Free(cellfixedoffsets);
3984
0
        return ERROR;
3985
0
    }
3986
3987
4.22k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3988
4.22k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3989
4.22k
    cellfixedoffsets = NULL;
3990
4.22k
    if (numdropped < 0) {
3991
0
        return ERROR;
3992
0
    }
3993
3994
4.22k
    nlocalsplus -= numdropped;
3995
4.22k
    return nlocalsplus;
3996
4.22k
}
3997
3998
cfg_builder *
3999
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
4000
4.22k
{
4001
4.22k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4002
0
        return NULL;
4003
0
    }
4004
4.22k
    cfg_builder *g = _PyCfgBuilder_New();
4005
4.22k
    if (g == NULL) {
4006
0
        return NULL;
4007
0
    }
4008
168k
    for (int i = 0; i < seq->s_used; i++) {
4009
164k
        seq->s_instrs[i].i_target = 0;
4010
164k
    }
4011
168k
    for (int i = 0; i < seq->s_used; i++) {
4012
164k
        _PyInstruction *instr = &seq->s_instrs[i];
4013
164k
        if (HAS_TARGET(instr->i_opcode)) {
4014
6.32k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
4015
6.32k
            seq->s_instrs[instr->i_oparg].i_target = 1;
4016
6.32k
        }
4017
164k
    }
4018
4.22k
    int offset = 0;
4019
168k
    for (int i = 0; i < seq->s_used; i++) {
4020
164k
        _PyInstruction *instr = &seq->s_instrs[i];
4021
164k
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
4022
747
            if (seq->s_annotations_code != NULL) {
4023
1
                assert(seq->s_annotations_code->s_labelmap_size == 0
4024
1
                    && seq->s_annotations_code->s_nested == NULL);
4025
4
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
4026
3
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
4027
3
                    assert(!HAS_TARGET(ann_instr->i_opcode));
4028
3
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
4029
0
                        goto error;
4030
0
                    }
4031
3
                }
4032
1
                offset += seq->s_annotations_code->s_used - 1;
4033
1
            }
4034
746
            else {
4035
746
                offset -= 1;
4036
746
            }
4037
747
            continue;
4038
747
        }
4039
163k
        if (instr->i_target) {
4040
5.06k
            jump_target_label lbl_ = {i + offset};
4041
5.06k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
4042
0
                goto error;
4043
0
            }
4044
5.06k
        }
4045
163k
        int opcode = instr->i_opcode;
4046
163k
        int oparg = instr->i_oparg;
4047
163k
        if (HAS_TARGET(opcode)) {
4048
6.32k
            oparg += offset;
4049
6.32k
        }
4050
163k
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
4051
0
            goto error;
4052
0
        }
4053
163k
    }
4054
4.22k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
4055
0
        goto error;
4056
0
    }
4057
4.22k
    return g;
4058
0
error:
4059
0
    _PyCfgBuilder_Free(g);
4060
0
    return NULL;
4061
4.22k
}
4062
4063
int
4064
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
4065
4.22k
{
4066
4.22k
    int lbl = 0;
4067
20.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4068
16.1k
        b->b_label = (jump_target_label){lbl};
4069
16.1k
        lbl += 1;
4070
16.1k
    }
4071
20.4k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
4072
16.1k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
4073
166k
        for (int i = 0; i < b->b_iused; i++) {
4074
150k
            cfg_instr *instr = &b->b_instr[i];
4075
150k
            if (HAS_TARGET(instr->i_opcode)) {
4076
                /* Set oparg to the label id (it will later be mapped to an offset) */
4077
4.86k
                instr->i_oparg = instr->i_target->b_label.id;
4078
4.86k
            }
4079
150k
            RETURN_IF_ERROR(
4080
150k
                _PyInstructionSequence_Addop(
4081
150k
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
4082
4083
150k
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
4084
150k
            if (instr->i_except != NULL) {
4085
12.8k
                hi->h_label = instr->i_except->b_label.id;
4086
12.8k
                hi->h_startdepth = instr->i_except->b_startdepth;
4087
12.8k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
4088
12.8k
            }
4089
137k
            else {
4090
137k
                hi->h_label = -1;
4091
137k
            }
4092
150k
        }
4093
16.1k
    }
4094
4.22k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
4095
0
        return ERROR;
4096
0
    }
4097
4.22k
    return SUCCESS;
4098
4.22k
}
4099
4100
4101
int
4102
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
4103
                                     _PyCompile_CodeUnitMetadata *umd,
4104
                                     int *stackdepth, int *nlocalsplus,
4105
                                     _PyInstructionSequence *seq)
4106
4.22k
{
4107
4.22k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
4108
4109
4.22k
    *stackdepth = calculate_stackdepth(g);
4110
4.22k
    if (*stackdepth < 0) {
4111
0
        return ERROR;
4112
0
    }
4113
4114
4.22k
    *nlocalsplus = prepare_localsplus(umd, g);
4115
4.22k
    if (*nlocalsplus < 0) {
4116
0
        return ERROR;
4117
0
    }
4118
4119
4.22k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
4120
4121
    /* Order of basic blocks must have been determined by now */
4122
4123
4.22k
    RETURN_IF_ERROR(normalize_jumps(g));
4124
4.22k
    assert(no_redundant_jumps(g));
4125
4126
    /* Can't modify the bytecode after inserting instructions that produce
4127
     * borrowed references.
4128
     */
4129
4.22k
    RETURN_IF_ERROR(optimize_load_fast(g));
4130
4131
    /* Can't modify the bytecode after computing jump offsets. */
4132
4.22k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4133
0
        return ERROR;
4134
0
    }
4135
4136
4.22k
    return SUCCESS;
4137
4.22k
}
4138
4139
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4140
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4141
 */
4142
int
4143
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4144
0
{
4145
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4146
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4147
0
    return SUCCESS;
4148
0
}
4149
4150
/* Exported API functions */
4151
4152
int
4153
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4154
0
{
4155
0
    stack_effects effs;
4156
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4157
0
        return PY_INVALID_STACK_EFFECT;
4158
0
    }
4159
0
    return effs.net;
4160
0
}
4161
4162
int
4163
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4164
17
{
4165
17
    stack_effects effs;
4166
17
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4167
0
        return PY_INVALID_STACK_EFFECT;
4168
0
    }
4169
17
    return effs.net;
4170
17
}
4171
4172
/* Access to compiler optimizations for unit tests.
4173
4174
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4175
 * a CFG, optimizes it and converts back to an instruction list.
4176
 */
4177
4178
static PyObject *
4179
cfg_to_instruction_sequence(cfg_builder *g)
4180
0
{
4181
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4182
0
    if (seq == NULL) {
4183
0
        return NULL;
4184
0
    }
4185
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4186
0
        PyInstructionSequence_Fini(seq);
4187
0
        return NULL;
4188
0
    }
4189
0
    return (PyObject*)seq;
4190
0
}
4191
4192
PyObject *
4193
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4194
0
{
4195
0
    if (!_PyInstructionSequence_Check(seq)) {
4196
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4197
0
        return NULL;
4198
0
    }
4199
0
    if (!PyList_Check(consts)) {
4200
0
        PyErr_SetString(PyExc_TypeError, "consts must be a list");
4201
0
        return NULL;
4202
0
    }
4203
0
    PyObject *const_cache = PyDict_New();
4204
0
    if (const_cache == NULL) {
4205
0
        return NULL;
4206
0
    }
4207
4208
0
    PyObject *res = NULL;
4209
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4210
0
    if (g == NULL) {
4211
0
        goto error;
4212
0
    }
4213
0
    int nparams = 0, firstlineno = 1;
4214
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4215
0
                                nparams, firstlineno) < 0) {
4216
0
        goto error;
4217
0
    }
4218
4219
0
    if (calculate_stackdepth(g) == ERROR) {
4220
0
        goto error;
4221
0
    }
4222
4223
0
    if (optimize_load_fast(g) != SUCCESS) {
4224
0
        goto error;
4225
0
    }
4226
4227
0
    res = cfg_to_instruction_sequence(g);
4228
0
error:
4229
0
    Py_DECREF(const_cache);
4230
0
    _PyCfgBuilder_Free(g);
4231
0
    return res;
4232
0
}