Coverage Report

Created: 2026-03-19 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
4.99M
#define SUCCESS 0
19
20.9k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
7.73M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
1.63M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
1.76M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
1.76M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
6.72M
{
101
6.72M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
6.72M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
6.72M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
5.85M
{
108
5.85M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
5.85M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
393k
    do { \
114
393k
        assert(OPCODE_HAS_ARG(OP)); \
115
393k
        cfg_instr *_instr__ptr_ = (I); \
116
393k
        _instr__ptr_->i_opcode = (OP); \
117
393k
        _instr__ptr_->i_oparg = (ARG); \
118
393k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
638k
    do { \
123
638k
        assert(!OPCODE_HAS_ARG(OP)); \
124
638k
        cfg_instr *_instr__ptr_ = (I); \
125
638k
        _instr__ptr_->i_opcode = (OP); \
126
638k
        _instr__ptr_->i_oparg = 0; \
127
638k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
389k
    do { \
131
389k
        cfg_instr *_instr__ptr_ = (I); \
132
389k
        _instr__ptr_->i_loc = (LOC); \
133
389k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
1.63M
{
144
1.63M
    assert(b != NULL);
145
1.63M
    _Py_c_array_t array = {
146
1.63M
        .array = (void*)b->b_instr,
147
1.63M
        .allocated_entries = b->b_ialloc,
148
1.63M
        .item_size = sizeof(cfg_instr),
149
1.63M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
1.63M
    };
151
152
1.63M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
1.63M
    b->b_instr = array.array;
154
1.63M
    b->b_ialloc = array.allocated_entries;
155
1.63M
    return b->b_iused++;
156
1.63M
}
157
158
static cfg_instr *
159
6.25M
basicblock_last_instr(const basicblock *b) {
160
6.25M
    assert(b->b_iused >= 0);
161
6.25M
    if (b->b_iused > 0) {
162
5.79M
        assert(b->b_instr != NULL);
163
5.79M
        return &b->b_instr[b->b_iused - 1];
164
5.79M
    }
165
455k
    return NULL;
166
6.25M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
178k
{
175
178k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
178k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
178k
    b->b_list = g->g_block_list;
182
178k
    g->g_block_list = b;
183
178k
    b->b_label = NO_LABEL;
184
178k
    return b;
185
178k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
1.61M
{
190
1.61M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
1.61M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
1.61M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
1.61M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
1.61M
    int off = basicblock_next_instr(b);
196
1.61M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
1.61M
    cfg_instr *i = &b->b_instr[off];
200
1.61M
    i->i_opcode = opcode;
201
1.61M
    i->i_oparg = oparg;
202
1.61M
    i->i_loc = loc;
203
    // memory is already zero initialized
204
1.61M
    assert(i->i_target == NULL);
205
1.61M
    assert(i->i_except == NULL);
206
207
1.61M
    return SUCCESS;
208
1.61M
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
203
{
213
203
    cfg_instr *last = basicblock_last_instr(b);
214
203
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
203
    RETURN_IF_ERROR(
219
203
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
203
    last = basicblock_last_instr(b);
221
203
    assert(last && last->i_opcode == opcode);
222
203
    last->i_target = target;
223
203
    return SUCCESS;
224
203
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
3.75k
{
229
12.2k
    for (int i = 0; i < from->b_iused; i++) {
230
8.54k
        int n = basicblock_next_instr(to);
231
8.54k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
8.54k
        to->b_instr[n] = from->b_instr[i];
235
8.54k
    }
236
3.75k
    return SUCCESS;
237
3.75k
}
238
239
static inline int
240
1.78M
basicblock_nofallthrough(const basicblock *b) {
241
1.78M
    cfg_instr *last = basicblock_last_instr(b);
242
1.78M
    return (last &&
243
1.70M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
1.30M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
1.78M
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
2.75M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
936
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
936
    assert(BB_NO_FALLTHROUGH(block));
257
936
    basicblock *result = cfg_builder_new_block(g);
258
936
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
936
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
936
    return result;
265
936
}
266
267
static int
268
11.2k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
11.2k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
164k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
153k
        block->b_instr[i] = block->b_instr[i-1];
272
153k
    }
273
11.2k
    block->b_instr[pos] = *instr;
274
11.2k
    return SUCCESS;
275
11.2k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
149k
{
343
149k
    assert(block != NULL);
344
149k
    g->g_curblock->b_next = block;
345
149k
    g->g_curblock = block;
346
149k
    return block;
347
149k
}
348
349
static inline int
350
384k
basicblock_exits_scope(const basicblock *b) {
351
384k
    cfg_instr *last = basicblock_last_instr(b);
352
384k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
384k
}
354
355
static inline int
356
280k
basicblock_has_eval_break(const basicblock *b) {
357
1.57M
    for (int i = 0; i < b->b_iused; i++) {
358
1.38M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
86.5k
            return true;
360
86.5k
        }
361
1.38M
    }
362
193k
    return false;
363
280k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
1.67M
{
368
1.67M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
1.67M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
109k
        return true;
371
109k
    }
372
1.56M
    if (IS_LABEL(g->g_current_label)) {
373
39.2k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
39.2k
            return true;
375
39.2k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
39.2k
    }
382
1.52M
    return false;
383
1.56M
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
1.67M
{
388
1.67M
    if (cfg_builder_current_block_is_terminated(g)) {
389
149k
        basicblock *b = cfg_builder_new_block(g);
390
149k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
149k
        b->b_label = g->g_current_label;
394
149k
        g->g_current_label = NO_LABEL;
395
149k
        cfg_builder_use_next_block(g, b);
396
149k
    }
397
1.67M
    return SUCCESS;
398
1.67M
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
42.0k
{
404
42.0k
    assert(g->g_entryblock->b_iused > 0);
405
390k
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
348k
        assert(!_PyMem_IsPtrFreed(block));
407
348k
        if (block->b_instr != NULL) {
408
348k
            assert(block->b_ialloc > 0);
409
348k
            assert(block->b_iused >= 0);
410
348k
            assert(block->b_ialloc >= block->b_iused);
411
348k
        }
412
0
        else {
413
0
            assert (block->b_iused == 0);
414
0
            assert (block->b_ialloc == 0);
415
0
        }
416
348k
    }
417
42.0k
    return true;
418
42.0k
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
21.0k
{
424
21.0k
    g->g_block_list = NULL;
425
21.0k
    basicblock *block = cfg_builder_new_block(g);
426
21.0k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
21.0k
    g->g_curblock = g->g_entryblock = block;
430
21.0k
    g->g_current_label = NO_LABEL;
431
21.0k
    return SUCCESS;
432
21.0k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
21.0k
{
437
21.0k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
21.0k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
21.0k
    memset(g, 0, sizeof(cfg_builder));
443
21.0k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
21.0k
    return g;
448
21.0k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
21.0k
{
453
21.0k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
21.0k
    assert(cfg_builder_check(g));
457
21.0k
    basicblock *b = g->g_block_list;
458
199k
    while (b != NULL) {
459
178k
        if (b->b_instr) {
460
178k
            PyMem_Free((void *)b->b_instr);
461
178k
        }
462
178k
        basicblock *next = b->b_list;
463
178k
        PyMem_Free((void *)b);
464
178k
        b = next;
465
178k
    }
466
21.0k
    PyMem_Free(g);
467
21.0k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
21.0k
{
472
21.0k
    int nblocks = 0;
473
191k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
170k
        nblocks++;
475
170k
    }
476
21.0k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
21.0k
    return SUCCESS;
481
21.0k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
88.9k
{
486
88.9k
    g->g_current_label = lbl;
487
88.9k
    return cfg_builder_maybe_start_new_block(g);
488
88.9k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
1.58M
{
493
1.58M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
1.58M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
1.58M
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
607k
{
501
678k
    while (b && b->b_iused == 0) {
502
70.9k
        b = b->b_next;
503
70.9k
    }
504
607k
    return b;
505
607k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
21.0k
no_redundant_nops(cfg_builder *g) {
514
21.0k
    if (remove_redundant_nops(g) != 0) {
515
0
        return false;
516
0
    }
517
21.0k
    return true;
518
21.0k
}
519
520
static bool
521
42.0k
no_redundant_jumps(cfg_builder *g) {
522
391k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
349k
        cfg_instr *last = basicblock_last_instr(b);
524
349k
        if (last != NULL) {
525
302k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
61.3k
                basicblock *next = next_nonempty_block(b->b_next);
527
61.3k
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
61.3k
                if (jump_target == next) {
529
0
                    assert(next);
530
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
0
                        assert(0);
532
0
                        return false;
533
0
                    }
534
0
                }
535
61.3k
            }
536
302k
        }
537
349k
    }
538
42.0k
    return true;
539
42.0k
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
178k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
178k
    cfg_instr *last = basicblock_last_instr(b);
547
178k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
153k
        return SUCCESS;
549
153k
    }
550
178k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
24.6k
    bool is_forward = last->i_target->b_visited == 0;
553
24.6k
    if (is_forward) {
554
24.6k
        RETURN_IF_ERROR(
555
24.6k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
24.6k
        return SUCCESS;
557
24.6k
    }
558
559
15
    int reversed_opcode = 0;
560
15
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
0
        case POP_JUMP_IF_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
0
            break;
567
15
        case POP_JUMP_IF_FALSE:
568
15
            reversed_opcode = POP_JUMP_IF_TRUE;
569
15
            break;
570
0
        case POP_JUMP_IF_TRUE:
571
0
            reversed_opcode = POP_JUMP_IF_FALSE;
572
0
            break;
573
15
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
15
    basicblock *target = last->i_target;
579
15
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
15
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
15
    RETURN_IF_ERROR(
584
15
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
15
    RETURN_IF_ERROR(
586
15
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
15
    backwards_jump->b_startdepth = target->b_startdepth;
588
15
    last->i_opcode = reversed_opcode;
589
15
    last->i_target = b->b_next;
590
591
15
    backwards_jump->b_cold = b->b_cold;
592
15
    backwards_jump->b_next = b->b_next;
593
15
    b->b_next = backwards_jump;
594
15
    return SUCCESS;
595
15
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
21.0k
{
601
21.0k
    basicblock *entryblock = g->g_entryblock;
602
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
178k
        b->b_visited = 0;
604
178k
    }
605
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
178k
        b->b_visited = 1;
607
178k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
178k
    }
609
21.0k
    return SUCCESS;
610
21.0k
}
611
612
static int
613
21.0k
check_cfg(cfg_builder *g) {
614
191k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
1.75M
        for (int i = 0; i < b->b_iused; i++) {
617
1.58M
            int opcode = b->b_instr[i].i_opcode;
618
1.58M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
1.58M
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
130k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
130k
            }
625
1.58M
        }
626
170k
    }
627
21.0k
    return SUCCESS;
628
21.0k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
75.0k
{
633
75.0k
    int lbl = -1;
634
755k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
680k
        if (b->b_label.id > lbl) {
636
301k
            lbl = b->b_label.id;
637
301k
        }
638
680k
    }
639
75.0k
    return lbl;
640
75.0k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
21.0k
{
646
21.0k
    int max_label = get_max_label(entryblock);
647
21.0k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
21.0k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
21.0k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
21.0k
    memset(label2block, 0, mapsize);
654
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
170k
        if (b->b_label.id >= 0) {
656
88.9k
            label2block[b->b_label.id] = b;
657
88.9k
        }
658
170k
    }
659
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
1.75M
        for (int i = 0; i < b->b_iused; i++) {
661
1.58M
            cfg_instr *instr = &b->b_instr[i];
662
1.58M
            assert(instr->i_target == NULL);
663
1.58M
            if (HAS_TARGET(instr->i_opcode)) {
664
109k
                int lbl = instr->i_oparg;
665
109k
                assert(lbl >= 0 && lbl <= max_label);
666
109k
                instr->i_target = label2block[lbl];
667
109k
                assert(instr->i_target != NULL);
668
109k
                assert(instr->i_target->b_label.id == lbl);
669
109k
            }
670
1.58M
        }
671
170k
    }
672
21.0k
    PyMem_Free(label2block);
673
21.0k
    return SUCCESS;
674
21.0k
}
675
676
static int
677
21.0k
mark_except_handlers(basicblock *entryblock) {
678
21.0k
#ifndef NDEBUG
679
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
170k
        assert(!b->b_except_handler);
681
170k
    }
682
21.0k
#endif
683
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
1.75M
        for (int i=0; i < b->b_iused; i++) {
685
1.58M
            cfg_instr *instr = &b->b_instr[i];
686
1.58M
            if (is_block_push(instr)) {
687
27.7k
                instr->i_target->b_except_handler = 1;
688
27.7k
            }
689
1.58M
        }
690
170k
    }
691
21.0k
    return SUCCESS;
692
21.0k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
27.5k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
27.5k
    assert(is_block_push(setup));
704
27.5k
    int opcode = setup->i_opcode;
705
27.5k
    basicblock * target = setup->i_target;
706
27.5k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
10.8k
        target->b_preserve_lasti = 1;
708
10.8k
    }
709
27.5k
    assert(stack->depth <= CO_MAXBLOCKS);
710
27.5k
    stack->handlers[++stack->depth] = target;
711
27.5k
    return target;
712
27.5k
}
713
714
static basicblock *
715
28.2k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
28.2k
    assert(stack->depth > 0);
717
28.2k
    return stack->handlers[--stack->depth];
718
28.2k
}
719
720
static basicblock *
721
158k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
158k
    return stack->handlers[stack->depth];
723
158k
}
724
725
static struct _PyCfgExceptStack *
726
21.0k
make_except_stack(void) {
727
21.0k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
21.0k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
21.0k
    new->depth = 0;
733
21.0k
    new->handlers[0] = NULL;
734
21.0k
    return new;
735
21.0k
}
736
737
static struct _PyCfgExceptStack *
738
61.7k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
61.7k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
61.7k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
61.7k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
61.7k
    return copy;
746
61.7k
}
747
748
static basicblock**
749
137k
make_cfg_traversal_stack(basicblock *entryblock) {
750
137k
    int nblocks = 0;
751
1.44M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
1.30M
        b->b_visited = 0;
753
1.30M
        nblocks++;
754
1.30M
    }
755
137k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
137k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
137k
    return stack;
760
137k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
1.17M
{
779
1.17M
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
1.17M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
1.17M
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
1.17M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
1.17M
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
1.17M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
27.5k
        effects->net = 0;
793
27.5k
        return 0;
794
27.5k
    }
795
1.14M
    effects->net = pushed - popped;
796
1.14M
    return 0;
797
1.17M
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
215k
{
802
215k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
215k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
165k
        assert(b->b_startdepth < 0);
808
165k
        b->b_startdepth = depth;
809
165k
        *(*sp)++ = b;
810
165k
    }
811
215k
    return SUCCESS;
812
215k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
21.0k
{
820
21.0k
    basicblock *entryblock = g->g_entryblock;
821
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
178k
        b->b_startdepth = INT_MIN;
823
178k
    }
824
21.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
21.0k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
21.0k
    int stackdepth = -1;
831
21.0k
    int maxdepth = 0;
832
21.0k
    basicblock **sp = stack;
833
21.0k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
186k
    while (sp != stack) {
837
165k
        basicblock *b = *--sp;
838
165k
        int depth = b->b_startdepth;
839
165k
        assert(depth >= 0);
840
165k
        basicblock *next = b->b_next;
841
1.16M
        for (int i = 0; i < b->b_iused; i++) {
842
1.07M
            cfg_instr *instr = &b->b_instr[i];
843
1.07M
            stack_effects effects;
844
1.07M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
1.07M
            int new_depth = depth + effects.net;
851
1.07M
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
854
0
                goto error;
855
0
            }
856
1.07M
            maxdepth = Py_MAX(maxdepth, depth);
857
1.07M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
100k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
100k
                int target_depth = depth + effects.net;
865
100k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
100k
                maxdepth = Py_MAX(maxdepth, depth);
867
100k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
100k
            }
871
1.07M
            depth = new_depth;
872
1.07M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
1.07M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
1.03M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
72.0k
            {
876
                /* remaining code is dead */
877
72.0k
                next = NULL;
878
72.0k
                break;
879
72.0k
            }
880
1.07M
        }
881
165k
        if (next != NULL) {
882
93.8k
            assert(BB_HAS_FALLTHROUGH(b));
883
93.8k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
93.8k
        }
887
165k
    }
888
21.0k
    stackdepth = maxdepth;
889
21.0k
error:
890
21.0k
    PyMem_Free(stack);
891
21.0k
    return stackdepth;
892
21.0k
}
893
894
static int
895
21.0k
label_exception_targets(basicblock *entryblock) {
896
21.0k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
21.0k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
21.0k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
21.0k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
21.0k
    except_stack->depth = 0;
907
21.0k
    todo_stack[0] = entryblock;
908
21.0k
    entryblock->b_visited = 1;
909
21.0k
    entryblock->b_exceptstack = except_stack;
910
21.0k
    basicblock **todo = &todo_stack[1];
911
21.0k
    basicblock *handler = NULL;
912
179k
    while (todo > todo_stack) {
913
158k
        todo--;
914
158k
        basicblock *b = todo[0];
915
158k
        assert(b->b_visited == 1);
916
158k
        except_stack = b->b_exceptstack;
917
158k
        assert(except_stack != NULL);
918
158k
        b->b_exceptstack = NULL;
919
158k
        handler = except_stack_top(except_stack);
920
158k
        int last_yield_except_depth = -1;
921
1.71M
        for (int i = 0; i < b->b_iused; i++) {
922
1.55M
            cfg_instr *instr = &b->b_instr[i];
923
1.55M
            if (is_block_push(instr)) {
924
27.5k
                if (!instr->i_target->b_visited) {
925
27.5k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
27.5k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
27.5k
                    instr->i_target->b_exceptstack = copy;
930
27.5k
                    todo[0] = instr->i_target;
931
27.5k
                    instr->i_target->b_visited = 1;
932
27.5k
                    todo++;
933
27.5k
                }
934
27.5k
                handler = push_except_block(except_stack, instr);
935
27.5k
            }
936
1.52M
            else if (instr->i_opcode == POP_BLOCK) {
937
28.2k
                handler = pop_except_block(except_stack);
938
28.2k
                INSTR_SET_OP0(instr, NOP);
939
28.2k
            }
940
1.50M
            else if (is_jump(instr)) {
941
80.9k
                instr->i_except = handler;
942
80.9k
                assert(i == b->b_iused -1);
943
80.9k
                if (!instr->i_target->b_visited) {
944
44.0k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
34.1k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
34.1k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
34.1k
                        instr->i_target->b_exceptstack = copy;
950
34.1k
                    }
951
9.95k
                    else {
952
9.95k
                        instr->i_target->b_exceptstack = except_stack;
953
9.95k
                        except_stack = NULL;
954
9.95k
                    }
955
44.0k
                    todo[0] = instr->i_target;
956
44.0k
                    instr->i_target->b_visited = 1;
957
44.0k
                    todo++;
958
44.0k
                }
959
80.9k
            }
960
1.42M
            else if (instr->i_opcode == YIELD_VALUE) {
961
14.9k
                instr->i_except = handler;
962
14.9k
                last_yield_except_depth = except_stack->depth;
963
14.9k
            }
964
1.40M
            else if (instr->i_opcode == RESUME) {
965
35.9k
                instr->i_except = handler;
966
35.9k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
14.9k
                    assert(last_yield_except_depth >= 0);
968
14.9k
                    if (last_yield_except_depth == 1) {
969
40
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
40
                    }
971
14.9k
                    last_yield_except_depth = -1;
972
14.9k
                }
973
35.9k
            }
974
1.36M
            else if (instr->i_opcode == RETURN_GENERATOR) {
975
845
                instr->i_except = NULL;
976
845
            }
977
1.36M
            else {
978
1.36M
                instr->i_except = handler;
979
1.36M
            }
980
1.55M
        }
981
158k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
982
65.4k
            assert(except_stack != NULL);
983
65.4k
            b->b_next->b_exceptstack = except_stack;
984
65.4k
            todo[0] = b->b_next;
985
65.4k
            b->b_next->b_visited = 1;
986
65.4k
            todo++;
987
65.4k
        }
988
92.6k
        else if (except_stack != NULL) {
989
82.7k
           PyMem_Free(except_stack);
990
82.7k
        }
991
158k
    }
992
#ifdef Py_DEBUG
993
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
994
        assert(b->b_exceptstack == NULL);
995
    }
996
#endif
997
21.0k
    PyMem_Free(todo_stack);
998
21.0k
    return SUCCESS;
999
0
error:
1000
0
    PyMem_Free(todo_stack);
1001
0
    PyMem_Free(except_stack);
1002
0
    return ERROR;
1003
21.0k
}
1004
1005
/***** CFG optimizations *****/
1006
1007
static int
1008
42.0k
remove_unreachable(basicblock *entryblock) {
1009
383k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1010
341k
        b->b_predecessors = 0;
1011
341k
    }
1012
42.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1013
42.0k
    if (stack == NULL) {
1014
0
        return ERROR;
1015
0
    }
1016
42.0k
    basicblock **sp = stack;
1017
42.0k
    entryblock->b_predecessors = 1;
1018
42.0k
    *sp++ = entryblock;
1019
42.0k
    entryblock->b_visited = 1;
1020
347k
    while (sp > stack) {
1021
305k
        basicblock *b = *(--sp);
1022
305k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1023
166k
            if (!b->b_next->b_visited) {
1024
130k
                assert(b->b_next->b_predecessors == 0);
1025
130k
                *sp++ = b->b_next;
1026
130k
                b->b_next->b_visited = 1;
1027
130k
            }
1028
166k
            b->b_next->b_predecessors++;
1029
166k
        }
1030
2.93M
        for (int i = 0; i < b->b_iused; i++) {
1031
2.63M
            basicblock *target;
1032
2.63M
            cfg_instr *instr = &b->b_instr[i];
1033
2.63M
            if (is_jump(instr) || is_block_push(instr)) {
1034
204k
                target = instr->i_target;
1035
204k
                if (!target->b_visited) {
1036
133k
                    *sp++ = target;
1037
133k
                    target->b_visited = 1;
1038
133k
                }
1039
204k
                target->b_predecessors++;
1040
204k
            }
1041
2.63M
        }
1042
305k
    }
1043
42.0k
    PyMem_Free(stack);
1044
1045
    /* Delete unreachable instructions */
1046
383k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1047
341k
       if (b->b_predecessors == 0) {
1048
35.8k
            b->b_iused = 0;
1049
35.8k
            b->b_except_handler = 0;
1050
35.8k
       }
1051
341k
    }
1052
42.0k
    return SUCCESS;
1053
42.0k
}
1054
1055
static int
1056
1.29M
basicblock_remove_redundant_nops(basicblock *bb) {
1057
    /* Remove NOPs when legal to do so. */
1058
1.29M
    int dest = 0;
1059
1.29M
    int prev_lineno = -1;
1060
8.54M
    for (int src = 0; src < bb->b_iused; src++) {
1061
7.25M
        int lineno = bb->b_instr[src].i_loc.lineno;
1062
7.25M
        if (bb->b_instr[src].i_opcode == NOP) {
1063
            /* Eliminate no-op if it doesn't have a line number */
1064
475k
            if (lineno < 0) {
1065
391k
                continue;
1066
391k
            }
1067
            /* or, if the previous instruction had the same line number. */
1068
84.2k
            if (prev_lineno == lineno) {
1069
45.0k
                continue;
1070
45.0k
            }
1071
            /* or, if the next instruction has same line number or no line number */
1072
39.1k
            if (src < bb->b_iused - 1) {
1073
34.4k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1074
34.4k
                if (next_lineno == lineno) {
1075
29.2k
                    continue;
1076
29.2k
                }
1077
5.25k
                if (next_lineno < 0) {
1078
533
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1079
533
                    continue;
1080
533
                }
1081
5.25k
            }
1082
4.66k
            else {
1083
4.66k
                basicblock *next = next_nonempty_block(bb->b_next);
1084
                /* or if last instruction in BB and next BB has same line number */
1085
4.66k
                if (next) {
1086
4.66k
                    location next_loc = NO_LOCATION;
1087
4.94k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1088
4.93k
                        cfg_instr *instr = &next->b_instr[next_i];
1089
4.93k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1090
                            /* Skip over NOPs without a location, they will be removed */
1091
279
                            continue;
1092
279
                        }
1093
4.65k
                        next_loc = instr->i_loc;
1094
4.65k
                        break;
1095
4.93k
                    }
1096
4.66k
                    if (lineno == next_loc.lineno) {
1097
863
                        continue;
1098
863
                    }
1099
4.66k
                }
1100
4.66k
            }
1101
1102
39.1k
        }
1103
6.79M
        if (dest != src) {
1104
575k
            bb->b_instr[dest] = bb->b_instr[src];
1105
575k
        }
1106
6.79M
        dest++;
1107
6.79M
        prev_lineno = lineno;
1108
6.79M
    }
1109
1.29M
    assert(dest <= bb->b_iused);
1110
1.29M
    int num_removed = bb->b_iused - dest;
1111
1.29M
    bb->b_iused = dest;
1112
1.29M
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1113
1.29M
    return num_removed;
1114
1.29M
}
1115
1116
static int
1117
87.2k
remove_redundant_nops(cfg_builder *g) {
1118
87.2k
    int changes = 0;
1119
1.18M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1120
1.09M
        int change = basicblock_remove_redundant_nops(b);
1121
1.09M
        RETURN_IF_ERROR(change);
1122
1.09M
        changes += change;
1123
1.09M
    }
1124
87.2k
    return changes;
1125
87.2k
}
1126
1127
static int
1128
remove_redundant_nops_and_pairs(basicblock *entryblock)
1129
21.0k
{
1130
21.0k
    bool done = false;
1131
1132
42.4k
    while (! done) {
1133
21.4k
        done = true;
1134
21.4k
        cfg_instr *prev_instr = NULL;
1135
21.4k
        cfg_instr *instr = NULL;
1136
213k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1137
192k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1138
192k
            if (IS_LABEL(b->b_label)) {
1139
                /* this block is a jump target, forget instr */
1140
104k
                instr = NULL;
1141
104k
            }
1142
1.42M
            for (int i = 0; i < b->b_iused; i++) {
1143
1.23M
                prev_instr = instr;
1144
1.23M
                instr = &b->b_instr[i];
1145
1.23M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1146
1.23M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1147
1.23M
                int opcode = instr->i_opcode;
1148
1.23M
                bool is_redundant_pair = false;
1149
1.23M
                if (opcode == POP_TOP) {
1150
94.2k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1151
2.33k
                       is_redundant_pair = true;
1152
2.33k
                   }
1153
91.9k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1154
36
                       is_redundant_pair = true;
1155
36
                   }
1156
94.2k
                }
1157
1.23M
                if (is_redundant_pair) {
1158
2.36k
                    INSTR_SET_OP0(prev_instr, NOP);
1159
2.36k
                    INSTR_SET_OP0(instr, NOP);
1160
2.36k
                    done = false;
1161
2.36k
                }
1162
1.23M
            }
1163
192k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1164
131k
                instr = NULL;
1165
131k
            }
1166
192k
        }
1167
21.4k
    }
1168
21.0k
    return SUCCESS;
1169
21.0k
}
1170
1171
static int
1172
45.1k
remove_redundant_jumps(cfg_builder *g) {
1173
    /* If a non-empty block ends with a jump instruction, check if the next
1174
     * non-empty block reached through normal flow control is the target
1175
     * of that jump. If it is, then the jump instruction is redundant and
1176
     * can be deleted.
1177
     *
1178
     * Return the number of changes applied, or -1 on error.
1179
     */
1180
1181
45.1k
    int changes = 0;
1182
801k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1183
756k
        cfg_instr *last = basicblock_last_instr(b);
1184
756k
        if (last == NULL) {
1185
114k
            continue;
1186
114k
        }
1187
756k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1188
641k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1189
163k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1190
163k
            if (jump_target == NULL) {
1191
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1192
0
                return ERROR;
1193
0
            }
1194
163k
            basicblock *next = next_nonempty_block(b->b_next);
1195
163k
            if (jump_target == next) {
1196
4.53k
                changes++;
1197
4.53k
                INSTR_SET_OP0(last, NOP);
1198
4.53k
            }
1199
163k
        }
1200
641k
    }
1201
1202
45.1k
    return changes;
1203
45.1k
}
1204
1205
static inline bool
1206
190k
basicblock_has_no_lineno(basicblock *b) {
1207
219k
    for (int i = 0; i < b->b_iused; i++) {
1208
207k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1209
178k
            return false;
1210
178k
        }
1211
207k
    }
1212
12.4k
    return true;
1213
190k
}
1214
1215
/* Maximum size of basic block that should be copied in optimizer */
1216
2.30k
#define MAX_COPY_SIZE 4
1217
1218
/* If this block ends with an unconditional jump to a small exit block or
1219
 * a block that has no line numbers (and no fallthrough), then
1220
 * remove the jump and extend this block with the target.
1221
 * Returns 1 if extended, 0 if no change, and -1 on error.
1222
 */
1223
static int
1224
287k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1225
287k
    cfg_instr *last = basicblock_last_instr(bb);
1226
287k
    if (last == NULL) {
1227
0
        return 0;
1228
0
    }
1229
287k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1230
225k
        return 0;
1231
225k
    }
1232
62.4k
    basicblock *target = last->i_target;
1233
62.4k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1234
2.30k
                             target->b_iused <= MAX_COPY_SIZE);
1235
62.4k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1236
5.14k
                                     !BB_HAS_FALLTHROUGH(target));
1237
62.4k
    if (small_exit_block || no_lineno_no_fallthrough) {
1238
2.81k
        assert(is_jump(last));
1239
2.81k
        int removed_jump_opcode = last->i_opcode;
1240
2.81k
        INSTR_SET_OP0(last, NOP);
1241
2.81k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1242
2.81k
        if (no_lineno_no_fallthrough) {
1243
2.48k
            last = basicblock_last_instr(bb);
1244
2.48k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1245
1.21k
                removed_jump_opcode == JUMP)
1246
6
            {
1247
                /* Make sure we don't lose eval breaker checks */
1248
6
                last->i_opcode = JUMP;
1249
6
            }
1250
2.48k
        }
1251
2.81k
        target->b_predecessors--;
1252
2.81k
        return 1;
1253
2.81k
    }
1254
59.5k
    return 0;
1255
62.4k
}
1256
1257
static int
1258
21.0k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1259
21.0k
    bool changes;
1260
22.0k
    do {
1261
22.0k
        changes = false;
1262
309k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1263
287k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1264
287k
            RETURN_IF_ERROR(res);
1265
287k
            if (res) {
1266
2.81k
                changes = true;
1267
2.81k
            }
1268
287k
        }
1269
22.0k
    } while(changes); /* every change removes a jump, ensuring convergence */
1270
21.0k
    return changes;
1271
21.0k
}
1272
1273
// Attempt to eliminate jumps to jumps by updating inst to jump to
1274
// target->i_target using the provided opcode. Return whether or not the
1275
// optimization was successful.
1276
static bool
1277
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1278
188
{
1279
188
    assert(is_jump(inst));
1280
188
    assert(is_jump(target));
1281
188
    assert(inst == basicblock_last_instr(bb));
1282
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1283
    // changes (and we fall into an infinite loop):
1284
188
    if (inst->i_target != target->i_target) {
1285
        /* Change inst to NOP and append a jump to target->i_target. The
1286
         * NOP will be removed later if it's not needed for the lineno.
1287
         */
1288
188
        INSTR_SET_OP0(inst, NOP);
1289
1290
188
        RETURN_IF_ERROR(
1291
188
            basicblock_add_jump(
1292
188
                bb, opcode, target->i_target, target->i_loc));
1293
1294
188
        return true;
1295
188
    }
1296
0
    return false;
1297
188
}
1298
1299
static int
1300
loads_const(int opcode)
1301
1.78M
{
1302
1.78M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1303
1.78M
}
1304
1305
/* Returns new reference */
1306
static PyObject*
1307
get_const_value(int opcode, int oparg, PyObject *co_consts)
1308
806k
{
1309
806k
    PyObject *constant = NULL;
1310
806k
    assert(loads_const(opcode));
1311
806k
    if (opcode == LOAD_CONST) {
1312
656k
        constant = PyList_GET_ITEM(co_consts, oparg);
1313
656k
    }
1314
806k
    if (opcode == LOAD_SMALL_INT) {
1315
149k
        return PyLong_FromLong(oparg);
1316
149k
    }
1317
1318
656k
    if (constant == NULL) {
1319
0
        PyErr_SetString(PyExc_SystemError,
1320
0
                        "Internal error: failed to get value of a constant");
1321
0
        return NULL;
1322
0
    }
1323
656k
    return Py_NewRef(constant);
1324
656k
}
1325
1326
// Steals a reference to newconst.
1327
static int
1328
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329
194k
{
1330
194k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1331
0
        Py_DECREF(newconst);
1332
0
        return -1;
1333
0
    }
1334
1335
194k
    Py_ssize_t index;
1336
41.4M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337
82.7M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1338
132k
            break;
1339
132k
        }
1340
41.3M
    }
1341
194k
    if (index == PyList_GET_SIZE(consts)) {
1342
62.1k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1343
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1344
0
            Py_DECREF(newconst);
1345
0
            return -1;
1346
0
        }
1347
62.1k
        if (PyList_Append(consts, newconst)) {
1348
0
            Py_DECREF(newconst);
1349
0
            return -1;
1350
0
        }
1351
62.1k
    }
1352
194k
    Py_DECREF(newconst);
1353
194k
    return (int)index;
1354
194k
}
1355
1356
/*
1357
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1358
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1359
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1360
1361
  Return boolean indicating whether "size" such instructions were found.
1362
*/
1363
static bool
1364
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1365
334k
{
1366
334k
    assert(start < bb->b_iused);
1367
334k
    assert(size >= 0);
1368
334k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1369
1370
1.10M
    for (; start >= 0 && size > 0; start--) {
1371
864k
        cfg_instr *instr = &bb->b_instr[start];
1372
864k
        if (instr->i_opcode == NOP) {
1373
312k
            continue;
1374
312k
        }
1375
552k
        if (!loads_const(instr->i_opcode)) {
1376
98.1k
            return false;
1377
98.1k
        }
1378
454k
        instrs[--size] = instr;
1379
454k
    }
1380
1381
236k
    return size == 0;
1382
334k
}
1383
1384
/*
1385
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1386
  Caller must make sure "instrs" has at least "size" elements.
1387
*/
1388
static void
1389
nop_out(cfg_instr **instrs, int size)
1390
224k
{
1391
614k
    for (int i = 0; i < size; i++) {
1392
389k
        cfg_instr *instr = instrs[i];
1393
389k
        assert(instr->i_opcode != NOP);
1394
389k
        INSTR_SET_OP0(instr, NOP);
1395
389k
        INSTR_SET_LOC(instr, NO_LOCATION);
1396
389k
    }
1397
224k
}
1398
1399
/* Does not steal reference to "newconst".
1400
   Return 1 if changed instruction to LOAD_SMALL_INT.
1401
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1402
   Return -1 on error.
1403
*/
1404
static int
1405
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1406
                               PyObject *consts, PyObject *const_cache)
1407
605k
{
1408
605k
    if (PyLong_CheckExact(newconst)) {
1409
330k
        int overflow;
1410
330k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1411
330k
        if (val == -1 && PyErr_Occurred()) {
1412
0
            return -1;
1413
0
        }
1414
330k
        if (!overflow && _PY_IS_SMALL_INT(val)) {
1415
193k
            assert(_Py_IsImmortal(newconst));
1416
193k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1417
193k
            return 1;
1418
193k
        }
1419
330k
    }
1420
412k
    return 0;
1421
605k
}
1422
1423
1424
/* Steals reference to "newconst" */
1425
static int
1426
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427
                      PyObject *consts, PyObject *const_cache)
1428
220k
{
1429
220k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1430
220k
    if (res < 0) {
1431
0
        Py_DECREF(newconst);
1432
0
        return ERROR;
1433
0
    }
1434
220k
    if (res > 0) {
1435
27.3k
        return SUCCESS;
1436
27.3k
    }
1437
193k
    int oparg = add_const(newconst, consts, const_cache);
1438
193k
    RETURN_IF_ERROR(oparg);
1439
193k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1440
193k
    return SUCCESS;
1441
193k
}
1442
1443
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1444
   with    LOAD_CONST (c1, c2, ... cn).
1445
   The consts table must still be in list form so that the
1446
   new constant (c1, c2, ... cn) can be appended.
1447
   Called with codestr pointing to the first LOAD_CONST.
1448
*/
1449
static int
1450
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1451
13.2k
{
1452
    /* Pre-conditions */
1453
13.2k
    assert(PyDict_CheckExact(const_cache));
1454
13.2k
    assert(PyList_CheckExact(consts));
1455
1456
13.2k
    cfg_instr *instr = &bb->b_instr[i];
1457
13.2k
    assert(instr->i_opcode == BUILD_TUPLE);
1458
1459
13.2k
    int seq_size = instr->i_oparg;
1460
13.2k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1461
0
        return SUCCESS;
1462
0
    }
1463
1464
13.2k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1465
13.2k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1466
        /* not a const sequence */
1467
10.1k
        return SUCCESS;
1468
10.1k
    }
1469
1470
3.11k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1471
3.11k
    if (const_tuple == NULL) {
1472
0
        return ERROR;
1473
0
    }
1474
1475
7.14k
    for (int i = 0; i < seq_size; i++) {
1476
4.03k
        cfg_instr *inst = const_instrs[i];
1477
4.03k
        assert(loads_const(inst->i_opcode));
1478
4.03k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1479
4.03k
        if (element == NULL) {
1480
0
            Py_DECREF(const_tuple);
1481
0
            return ERROR;
1482
0
        }
1483
4.03k
        PyTuple_SET_ITEM(const_tuple, i, element);
1484
4.03k
    }
1485
1486
3.11k
    nop_out(const_instrs, seq_size);
1487
3.11k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1488
3.11k
}
1489
1490
/* Replace:
1491
    BUILD_LIST 0
1492
    LOAD_CONST c1
1493
    LIST_APPEND 1
1494
    LOAD_CONST c2
1495
    LIST_APPEND 1
1496
    ...
1497
    LOAD_CONST cN
1498
    LIST_APPEND 1
1499
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1500
   with:
1501
    LOAD_CONST (c1, c2, ... cN)
1502
*/
1503
static int
1504
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505
                                      PyObject *consts, PyObject *const_cache)
1506
493
{
1507
493
    assert(PyDict_CheckExact(const_cache));
1508
493
    assert(PyList_CheckExact(consts));
1509
493
    assert(i >= 0);
1510
493
    assert(i < bb->b_iused);
1511
1512
493
    cfg_instr *intrinsic = &bb->b_instr[i];
1513
493
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1514
493
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1515
1516
493
    int consts_found = 0;
1517
493
    bool expect_append = true;
1518
1519
23.6k
    for (int pos = i - 1; pos >= 0; pos--) {
1520
23.5k
        cfg_instr *instr = &bb->b_instr[pos];
1521
23.5k
        int opcode = instr->i_opcode;
1522
23.5k
        int oparg = instr->i_oparg;
1523
1524
23.5k
        if (opcode == NOP) {
1525
14.8k
            continue;
1526
14.8k
        }
1527
1528
8.76k
        if (opcode == BUILD_LIST && oparg == 0) {
1529
17
            if (!expect_append) {
1530
                /* Not a sequence start. */
1531
0
                return SUCCESS;
1532
0
            }
1533
1534
            /* Sequence start, we are done. */
1535
17
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1536
17
            if (newconst == NULL) {
1537
0
                return ERROR;
1538
0
            }
1539
1540
10.3k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1541
10.3k
                instr = &bb->b_instr[newpos];
1542
10.3k
                if (instr->i_opcode == NOP) {
1543
6.30k
                    continue;
1544
6.30k
                }
1545
4.06k
                if (loads_const(instr->i_opcode)) {
1546
2.02k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1547
2.02k
                    if (constant == NULL) {
1548
0
                        Py_DECREF(newconst);
1549
0
                        return ERROR;
1550
0
                    }
1551
2.02k
                    assert(consts_found > 0);
1552
2.02k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1553
2.02k
                }
1554
4.06k
                nop_out(&instr, 1);
1555
4.06k
            }
1556
17
            assert(consts_found == 0);
1557
17
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1558
17
        }
1559
1560
8.75k
        if (expect_append) {
1561
4.43k
            if (opcode != LIST_APPEND || oparg != 1) {
1562
102
                return SUCCESS;
1563
102
            }
1564
4.43k
        }
1565
4.31k
        else {
1566
4.31k
            if (!loads_const(opcode)) {
1567
357
                return SUCCESS;
1568
357
            }
1569
3.95k
            consts_found++;
1570
3.95k
        }
1571
1572
8.29k
        expect_append = !expect_append;
1573
8.29k
    }
1574
1575
    /* Did not find sequence start. */
1576
17
    return SUCCESS;
1577
493
}
1578
1579
6.33k
#define MIN_CONST_SEQUENCE_SIZE 3
1580
/*
1581
Optimize lists and sets for:
1582
    1. "for" loop, comprehension or "in"/"not in" tests:
1583
           Change literal list or set of constants into constant
1584
           tuple or frozenset respectively. Change list of
1585
           non-constants into tuple.
1586
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1587
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1588
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1589
           or BUILD_SET & SET_UPDATE respectively.
1590
*/
1591
static int
1592
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593
                        PyObject *consts, PyObject *const_cache)
1594
3.16k
{
1595
3.16k
    assert(PyDict_CheckExact(const_cache));
1596
3.16k
    assert(PyList_CheckExact(consts));
1597
1598
3.16k
    cfg_instr *instr = &bb->b_instr[i];
1599
3.16k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1600
1601
3.16k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1602
3.16k
    int seq_size = instr->i_oparg;
1603
3.16k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1604
3.16k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1605
3.00k
    {
1606
3.00k
        return SUCCESS;
1607
3.00k
    }
1608
1609
159
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1610
159
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1611
98
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1612
            /* iterate over a tuple instead of list */
1613
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1614
0
        }
1615
98
        return SUCCESS;
1616
98
    }
1617
1618
61
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1619
61
    if (const_result == NULL) {
1620
0
        return ERROR;
1621
0
    }
1622
1623
366
    for (int i = 0; i < seq_size; i++) {
1624
305
        cfg_instr *inst = const_instrs[i];
1625
305
        assert(loads_const(inst->i_opcode));
1626
305
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1627
305
        if (element == NULL) {
1628
0
            Py_DECREF(const_result);
1629
0
            return ERROR;
1630
0
        }
1631
305
        PyTuple_SET_ITEM(const_result, i, element);
1632
305
    }
1633
1634
61
    if (instr->i_opcode == BUILD_SET) {
1635
50
        PyObject *frozenset = PyFrozenSet_New(const_result);
1636
50
        if (frozenset == NULL) {
1637
0
            Py_DECREF(const_result);
1638
0
            return ERROR;
1639
0
        }
1640
50
        Py_SETREF(const_result, frozenset);
1641
50
    }
1642
1643
61
    int index = add_const(const_result, consts, const_cache);
1644
61
    RETURN_IF_ERROR(index);
1645
61
    nop_out(const_instrs, seq_size);
1646
1647
61
    if (contains_or_iter) {
1648
8
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1649
8
    }
1650
53
    else {
1651
53
        assert(i >= 2);
1652
53
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1653
1654
53
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1655
1656
53
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1657
53
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1658
53
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1659
53
    }
1660
61
    return SUCCESS;
1661
61
}
1662
1663
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1664
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1665
 * Used to avoid creating constants which are slow to hash.
1666
 */
1667
static Py_ssize_t
1668
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1669
61.1k
{
1670
61.1k
    if (PyTuple_Check(obj)) {
1671
54.0k
        Py_ssize_t i;
1672
54.0k
        limit -= PyTuple_GET_SIZE(obj);
1673
112k
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1674
58.8k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1675
58.8k
            if (limit < 0) {
1676
291
                return limit;
1677
291
            }
1678
58.8k
        }
1679
54.0k
    }
1680
60.8k
    return limit;
1681
61.1k
}
1682
1683
22.4k
#define MAX_INT_SIZE           128  /* bits */
1684
2.51k
#define MAX_COLLECTION_SIZE    256  /* items */
1685
1.28k
#define MAX_STR_SIZE          4096  /* characters */
1686
2.28k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1687
1688
static PyObject *
1689
const_folding_safe_multiply(PyObject *v, PyObject *w)
1690
30.3k
{
1691
30.3k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1692
9.12k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1693
30.3k
    ) {
1694
6.10k
        int64_t vbits = _PyLong_NumBits(v);
1695
6.10k
        int64_t wbits = _PyLong_NumBits(w);
1696
6.10k
        assert(vbits >= 0);
1697
6.10k
        assert(wbits >= 0);
1698
6.10k
        if (vbits + wbits > MAX_INT_SIZE) {
1699
107
            return NULL;
1700
107
        }
1701
6.10k
    }
1702
24.2k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1703
2.58k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1704
2.58k
        if (size) {
1705
2.55k
            long n = PyLong_AsLong(v);
1706
2.55k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1707
198
                return NULL;
1708
198
            }
1709
2.35k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1710
61
                return NULL;
1711
61
            }
1712
2.35k
        }
1713
2.58k
    }
1714
21.6k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1715
2.21k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1716
2.21k
                                               PyBytes_GET_SIZE(w);
1717
2.21k
        if (size) {
1718
1.30k
            long n = PyLong_AsLong(v);
1719
1.30k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1720
140
                return NULL;
1721
140
            }
1722
1.30k
        }
1723
2.21k
    }
1724
19.4k
    else if (PyLong_Check(w) &&
1725
11.7k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1726
4.54k
    {
1727
4.54k
        return const_folding_safe_multiply(w, v);
1728
4.54k
    }
1729
1730
25.3k
    return PyNumber_Multiply(v, w);
1731
30.3k
}
1732
1733
static PyObject *
1734
const_folding_safe_power(PyObject *v, PyObject *w)
1735
38.5k
{
1736
38.5k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1737
17.5k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1738
38.5k
    ) {
1739
15.3k
        int64_t vbits = _PyLong_NumBits(v);
1740
15.3k
        size_t wbits = PyLong_AsSize_t(w);
1741
15.3k
        assert(vbits >= 0);
1742
15.3k
        if (wbits == (size_t)-1) {
1743
428
            return NULL;
1744
428
        }
1745
14.9k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1746
1.65k
            return NULL;
1747
1.65k
        }
1748
14.9k
    }
1749
1750
36.4k
    return PyNumber_Power(v, w, Py_None);
1751
38.5k
}
1752
1753
static PyObject *
1754
const_folding_safe_lshift(PyObject *v, PyObject *w)
1755
1.96k
{
1756
1.96k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1757
1.87k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1758
1.96k
    ) {
1759
800
        int64_t vbits = _PyLong_NumBits(v);
1760
800
        size_t wbits = PyLong_AsSize_t(w);
1761
800
        assert(vbits >= 0);
1762
800
        if (wbits == (size_t)-1) {
1763
233
            return NULL;
1764
233
        }
1765
567
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1766
312
            return NULL;
1767
312
        }
1768
567
    }
1769
1770
1.41k
    return PyNumber_Lshift(v, w);
1771
1.96k
}
1772
1773
static PyObject *
1774
const_folding_safe_mod(PyObject *v, PyObject *w)
1775
15.0k
{
1776
15.0k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1777
26
        return NULL;
1778
26
    }
1779
1780
15.0k
    return PyNumber_Remainder(v, w);
1781
15.0k
}
1782
1783
static PyObject *
1784
eval_const_binop(PyObject *left, int op, PyObject *right)
1785
178k
{
1786
178k
    assert(left != NULL && right != NULL);
1787
178k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1788
1789
178k
    PyObject *result = NULL;
1790
178k
    switch (op) {
1791
28.5k
        case NB_ADD:
1792
28.5k
            result = PyNumber_Add(left, right);
1793
28.5k
            break;
1794
29.0k
        case NB_SUBTRACT:
1795
29.0k
            result = PyNumber_Subtract(left, right);
1796
29.0k
            break;
1797
25.8k
        case NB_MULTIPLY:
1798
25.8k
            result = const_folding_safe_multiply(left, right);
1799
25.8k
            break;
1800
19.6k
        case NB_TRUE_DIVIDE:
1801
19.6k
            result = PyNumber_TrueDivide(left, right);
1802
19.6k
            break;
1803
6.94k
        case NB_FLOOR_DIVIDE:
1804
6.94k
            result = PyNumber_FloorDivide(left, right);
1805
6.94k
            break;
1806
15.0k
        case NB_REMAINDER:
1807
15.0k
            result = const_folding_safe_mod(left, right);
1808
15.0k
            break;
1809
38.5k
        case NB_POWER:
1810
38.5k
            result = const_folding_safe_power(left, right);
1811
38.5k
            break;
1812
1.96k
        case NB_LSHIFT:
1813
1.96k
            result = const_folding_safe_lshift(left, right);
1814
1.96k
            break;
1815
4.32k
        case NB_RSHIFT:
1816
4.32k
            result = PyNumber_Rshift(left, right);
1817
4.32k
            break;
1818
780
        case NB_OR:
1819
780
            result = PyNumber_Or(left, right);
1820
780
            break;
1821
2.73k
        case NB_XOR:
1822
2.73k
            result = PyNumber_Xor(left, right);
1823
2.73k
            break;
1824
3.24k
        case NB_AND:
1825
3.24k
            result = PyNumber_And(left, right);
1826
3.24k
            break;
1827
1.66k
        case NB_SUBSCR:
1828
1.66k
            result = PyObject_GetItem(left, right);
1829
1.66k
            break;
1830
128
        case NB_MATRIX_MULTIPLY:
1831
            // No builtin constants implement matrix multiplication
1832
128
            break;
1833
0
        default:
1834
0
            Py_UNREACHABLE();
1835
178k
    }
1836
178k
    return result;
1837
178k
}
1838
1839
static int
1840
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1841
239k
{
1842
402k
    #define BINOP_OPERAND_COUNT 2
1843
239k
    assert(PyDict_CheckExact(const_cache));
1844
239k
    assert(PyList_CheckExact(consts));
1845
1846
239k
    cfg_instr *binop = &bb->b_instr[i];
1847
239k
    assert(binop->i_opcode == BINARY_OP);
1848
1849
239k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1850
239k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1851
        /* not a const sequence */
1852
60.9k
        return SUCCESS;
1853
60.9k
    }
1854
1855
178k
    cfg_instr *lhs_instr = operands_instrs[0];
1856
178k
    assert(loads_const(lhs_instr->i_opcode));
1857
178k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1858
178k
    if (lhs == NULL) {
1859
0
        return ERROR;
1860
0
    }
1861
1862
178k
    cfg_instr *rhs_instr = operands_instrs[1];
1863
178k
    assert(loads_const(rhs_instr->i_opcode));
1864
178k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1865
178k
    if (rhs == NULL) {
1866
0
        Py_DECREF(lhs);
1867
0
        return ERROR;
1868
0
    }
1869
1870
178k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1871
178k
    Py_DECREF(lhs);
1872
178k
    Py_DECREF(rhs);
1873
178k
    if (newconst == NULL) {
1874
14.9k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1875
0
            return ERROR;
1876
0
        }
1877
14.9k
        PyErr_Clear();
1878
14.9k
        return SUCCESS;
1879
14.9k
    }
1880
1881
163k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882
163k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1883
178k
}
1884
1885
static PyObject *
1886
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1887
54.8k
{
1888
54.8k
    assert(operand != NULL);
1889
54.8k
    assert(
1890
54.8k
        opcode == UNARY_NEGATIVE ||
1891
54.8k
        opcode == UNARY_INVERT ||
1892
54.8k
        opcode == UNARY_NOT ||
1893
54.8k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1894
54.8k
    );
1895
54.8k
    PyObject *result;
1896
54.8k
    switch (opcode) {
1897
41.0k
        case UNARY_NEGATIVE:
1898
41.0k
            result = PyNumber_Negative(operand);
1899
41.0k
            break;
1900
4.86k
        case UNARY_INVERT:
1901
            // XXX: This should be removed once the ~bool depreciation expires.
1902
4.86k
            if (PyBool_Check(operand)) {
1903
38
                return NULL;
1904
38
            }
1905
4.83k
            result = PyNumber_Invert(operand);
1906
4.83k
            break;
1907
17
        case UNARY_NOT: {
1908
17
            int r = PyObject_IsTrue(operand);
1909
17
            if (r < 0) {
1910
0
                return NULL;
1911
0
            }
1912
17
            result = PyBool_FromLong(!r);
1913
17
            break;
1914
17
        }
1915
8.89k
        case CALL_INTRINSIC_1:
1916
8.89k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1917
0
                Py_UNREACHABLE();
1918
0
            }
1919
8.89k
            result = PyNumber_Positive(operand);
1920
8.89k
            break;
1921
0
        default:
1922
0
            Py_UNREACHABLE();
1923
54.8k
    }
1924
54.7k
    return result;
1925
54.8k
}
1926
1927
static int
1928
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1929
81.9k
{
1930
136k
    #define UNARYOP_OPERAND_COUNT 1
1931
81.9k
    assert(PyDict_CheckExact(const_cache));
1932
81.9k
    assert(PyList_CheckExact(consts));
1933
81.9k
    cfg_instr *unaryop = &bb->b_instr[i];
1934
1935
81.9k
    cfg_instr *operand_instr;
1936
81.9k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1937
        /* not a const */
1938
27.1k
        return SUCCESS;
1939
27.1k
    }
1940
1941
81.9k
    assert(loads_const(operand_instr->i_opcode));
1942
54.8k
    PyObject *operand = get_const_value(
1943
54.8k
        operand_instr->i_opcode,
1944
54.8k
        operand_instr->i_oparg,
1945
54.8k
        consts
1946
54.8k
    );
1947
54.8k
    if (operand == NULL) {
1948
0
        return ERROR;
1949
0
    }
1950
1951
54.8k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1952
54.8k
    Py_DECREF(operand);
1953
54.8k
    if (newconst == NULL) {
1954
618
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1955
0
            return ERROR;
1956
0
        }
1957
618
        PyErr_Clear();
1958
618
        return SUCCESS;
1959
618
    }
1960
1961
54.2k
    if (unaryop->i_opcode == UNARY_NOT) {
1962
17
        assert(PyBool_Check(newconst));
1963
17
    }
1964
54.2k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965
54.2k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1966
54.2k
}
1967
1968
57.6k
#define VISITED (-1)
1969
1970
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1971
// same effect.
1972
static int
1973
swaptimize(basicblock *block, int *ix)
1974
28.8k
{
1975
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1976
    // for this function. Make sure to blow away cached *.pyc files first!
1977
28.8k
    assert(*ix < block->b_iused);
1978
28.8k
    cfg_instr *instructions = &block->b_instr[*ix];
1979
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1980
    // maximum depth of the stack manipulations:
1981
28.8k
    assert(instructions[0].i_opcode == SWAP);
1982
28.8k
    int depth = instructions[0].i_oparg;
1983
28.8k
    int len = 0;
1984
28.8k
    int more = false;
1985
28.8k
    int limit = block->b_iused - *ix;
1986
33.4k
    while (++len < limit) {
1987
33.4k
        int opcode = instructions[len].i_opcode;
1988
33.4k
        if (opcode == SWAP) {
1989
4.63k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1990
4.63k
            more = true;
1991
4.63k
        }
1992
28.8k
        else if (opcode != NOP) {
1993
28.8k
            break;
1994
28.8k
        }
1995
33.4k
    }
1996
    // It's already optimal if there's only one SWAP:
1997
28.8k
    if (!more) {
1998
24.4k
        return SUCCESS;
1999
24.4k
    }
2000
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2001
4.43k
    int *stack = PyMem_Malloc(depth * sizeof(int));
2002
4.43k
    if (stack == NULL) {
2003
0
        PyErr_NoMemory();
2004
0
        return ERROR;
2005
0
    }
2006
17.7k
    for (int i = 0; i < depth; i++) {
2007
13.3k
        stack[i] = i;
2008
13.3k
    }
2009
    // Simulate the combined effect of these instructions by "running" them on
2010
    // our "stack":
2011
13.5k
    for (int i = 0; i < len; i++) {
2012
9.07k
        if (instructions[i].i_opcode == SWAP) {
2013
9.07k
            int oparg = instructions[i].i_oparg;
2014
9.07k
            int top = stack[0];
2015
            // SWAPs are 1-indexed:
2016
9.07k
            stack[0] = stack[oparg - 1];
2017
9.07k
            stack[oparg - 1] = top;
2018
9.07k
        }
2019
9.07k
    }
2020
    // Now we can begin! Our approach here is based on a solution to a closely
2021
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2022
    // think of this algorithm as determining the steps needed to efficiently
2023
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2024
    // though, we can efficiently *shuffle* it! For this reason, we will be
2025
    // replacing instructions starting from the *end* of the run. Since the
2026
    // solution is optimal, we don't need to worry about running out of space:
2027
4.43k
    int current = len - 1;
2028
17.7k
    for (int i = 0; i < depth; i++) {
2029
        // Skip items that have already been visited, or just happen to be in
2030
        // the correct location:
2031
13.3k
        if (stack[i] == VISITED || stack[i] == i) {
2032
8.92k
            continue;
2033
8.92k
        }
2034
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2035
        // with other items; traversing the cycle and swapping each item with
2036
        // the next will put them all in the correct place. The weird
2037
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2038
        // can only swap from that position:
2039
4.41k
        int j = i;
2040
17.6k
        while (true) {
2041
            // Skip the actual swap if our item is zero, since swapping the top
2042
            // item with itself is pointless:
2043
17.6k
            if (j) {
2044
8.85k
                assert(0 <= current);
2045
                // SWAPs are 1-indexed:
2046
8.85k
                instructions[current].i_opcode = SWAP;
2047
8.85k
                instructions[current--].i_oparg = j + 1;
2048
8.85k
            }
2049
17.6k
            if (stack[j] == VISITED) {
2050
                // Completed the cycle:
2051
4.41k
                assert(j == i);
2052
4.41k
                break;
2053
4.41k
            }
2054
13.2k
            int next_j = stack[j];
2055
13.2k
            stack[j] = VISITED;
2056
13.2k
            j = next_j;
2057
13.2k
        }
2058
4.41k
    }
2059
    // NOP out any unused instructions:
2060
4.66k
    while (0 <= current) {
2061
224
        INSTR_SET_OP0(&instructions[current--], NOP);
2062
224
    }
2063
4.43k
    PyMem_Free(stack);
2064
4.43k
    *ix += len - 1;
2065
4.43k
    return SUCCESS;
2066
4.43k
}
2067
2068
2069
// This list is pretty small, since it's only okay to reorder opcodes that:
2070
// - can't affect control flow (like jumping or raising exceptions)
2071
// - can't invoke arbitrary code (besides finalizers)
2072
// - only touch the TOS (and pop it when finished)
2073
#define SWAPPABLE(opcode) \
2074
30.5k
    ((opcode) == STORE_FAST || \
2075
30.5k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2076
30.5k
     (opcode) == POP_TOP)
2077
2078
#define STORES_TO(instr) \
2079
6
    (((instr).i_opcode == STORE_FAST || \
2080
6
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2081
6
     ? (instr).i_oparg : -1)
2082
2083
static int
2084
next_swappable_instruction(basicblock *block, int i, int lineno)
2085
33.6k
{
2086
33.6k
    while (++i < block->b_iused) {
2087
30.4k
        cfg_instr *instruction = &block->b_instr[i];
2088
30.4k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2089
            // Optimizing across this instruction could cause user-visible
2090
            // changes in the names bound between line tracing events!
2091
0
            return -1;
2092
0
        }
2093
30.4k
        if (instruction->i_opcode == NOP) {
2094
0
            continue;
2095
0
        }
2096
30.4k
        if (SWAPPABLE(instruction->i_opcode)) {
2097
4.85k
            return i;
2098
4.85k
        }
2099
25.6k
        return -1;
2100
30.4k
    }
2101
3.18k
    return -1;
2102
33.6k
}
2103
2104
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2105
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2106
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2107
static void
2108
apply_static_swaps(basicblock *block, int i)
2109
28.8k
{
2110
    // SWAPs are to our left, and potential swaperands are to our right:
2111
29.0k
    for (; 0 <= i; i--) {
2112
29.0k
        assert(i < block->b_iused);
2113
29.0k
        cfg_instr *swap = &block->b_instr[i];
2114
29.0k
        if (swap->i_opcode != SWAP) {
2115
236
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2116
                // Nope, but we know how to handle these. Keep looking:
2117
204
                continue;
2118
204
            }
2119
            // We can't reason about what this instruction does. Bail:
2120
32
            return;
2121
236
        }
2122
28.8k
        int j = next_swappable_instruction(block, i, -1);
2123
28.8k
        if (j < 0) {
2124
23.9k
            return;
2125
23.9k
        }
2126
4.83k
        int k = j;
2127
4.83k
        int lineno = block->b_instr[j].i_loc.lineno;
2128
4.85k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2129
4.85k
            k = next_swappable_instruction(block, k, lineno);
2130
4.85k
            if (k < 0) {
2131
4.83k
                return;
2132
4.83k
            }
2133
4.85k
        }
2134
        // The reordering is not safe if the two instructions to be swapped
2135
        // store to the same location, or if any intervening instruction stores
2136
        // to the same location as either of them.
2137
2
        int store_j = STORES_TO(block->b_instr[j]);
2138
2
        int store_k = STORES_TO(block->b_instr[k]);
2139
2
        if (store_j >= 0 || store_k >= 0) {
2140
2
            if (store_j == store_k) {
2141
0
                return;
2142
0
            }
2143
4
            for (int idx = j + 1; idx < k; idx++) {
2144
2
                int store_idx = STORES_TO(block->b_instr[idx]);
2145
2
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2146
0
                    return;
2147
0
                }
2148
2
            }
2149
2
        }
2150
2151
        // Success!
2152
2
        INSTR_SET_OP0(swap, NOP);
2153
2
        cfg_instr temp = block->b_instr[j];
2154
2
        block->b_instr[j] = block->b_instr[k];
2155
2
        block->b_instr[k] = temp;
2156
2
    }
2157
28.8k
}
2158
2159
static int
2160
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2161
171k
{
2162
171k
    assert(PyDict_CheckExact(const_cache));
2163
171k
    assert(PyList_CheckExact(consts));
2164
171k
    int opcode = 0;
2165
171k
    int oparg = 0;
2166
1.73M
    for (int i = 0; i < bb->b_iused; i++) {
2167
1.56M
        cfg_instr *inst = &bb->b_instr[i];
2168
1.56M
        if (inst->i_opcode == LOAD_CONST) {
2169
384k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2170
384k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2171
384k
            Py_DECREF(constant);
2172
384k
            if (res < 0) {
2173
0
                return ERROR;
2174
0
            }
2175
384k
        }
2176
1.56M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2177
202k
                                      inst->i_opcode == COPY &&
2178
220
                                      inst->i_oparg == 1);
2179
1.56M
        if (! is_copy_of_load_const) {
2180
1.56M
            opcode = inst->i_opcode;
2181
1.56M
            oparg = inst->i_oparg;
2182
1.56M
        }
2183
1.56M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2184
1.56M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2185
1.17M
            continue;
2186
1.17M
        }
2187
384k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2188
384k
        switch(nextop) {
2189
639
            case POP_JUMP_IF_FALSE:
2190
1.24k
            case POP_JUMP_IF_TRUE:
2191
1.89k
            case JUMP_IF_FALSE:
2192
2.23k
            case JUMP_IF_TRUE:
2193
2.23k
            {
2194
                /* Remove LOAD_CONST const; conditional jump */
2195
2.23k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2196
2.23k
                if (cnt == NULL) {
2197
0
                    return ERROR;
2198
0
                }
2199
2.23k
                int is_true = PyObject_IsTrue(cnt);
2200
2.23k
                Py_DECREF(cnt);
2201
2.23k
                if (is_true == -1) {
2202
0
                    return ERROR;
2203
0
                }
2204
2.23k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2205
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2206
1.24k
                    INSTR_SET_OP0(inst, NOP);
2207
1.24k
                }
2208
2.23k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2209
2.23k
                if (is_true == jump_if_true) {
2210
1.11k
                    bb->b_instr[i+1].i_opcode = JUMP;
2211
1.11k
                }
2212
1.12k
                else {
2213
1.12k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2214
1.12k
                }
2215
2.23k
                break;
2216
2.23k
            }
2217
2.23k
            case IS_OP:
2218
68
            {
2219
                // Fold to POP_JUMP_IF_NONE:
2220
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2223
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2224
                // Fold to POP_JUMP_IF_NOT_NONE:
2225
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2226
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2227
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2228
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2229
68
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2230
68
                if (cnt == NULL) {
2231
0
                    return ERROR;
2232
0
                }
2233
68
                if (!Py_IsNone(cnt)) {
2234
64
                    Py_DECREF(cnt);
2235
64
                    break;
2236
64
                }
2237
4
                if (bb->b_iused <= i + 2) {
2238
0
                    break;
2239
0
                }
2240
4
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2241
4
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2242
                // Get rid of TO_BOOL regardless:
2243
4
                if (jump_instr->i_opcode == TO_BOOL) {
2244
0
                    INSTR_SET_OP0(jump_instr, NOP);
2245
0
                    if (bb->b_iused <= i + 3) {
2246
0
                        break;
2247
0
                    }
2248
0
                    jump_instr = &bb->b_instr[i + 3];
2249
0
                }
2250
4
                bool invert = is_instr->i_oparg;
2251
4
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2252
4
                    invert = !invert;
2253
4
                }
2254
0
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2255
0
                    break;
2256
0
                }
2257
4
                INSTR_SET_OP0(inst, NOP);
2258
4
                INSTR_SET_OP0(is_instr, NOP);
2259
4
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2260
4
                                              : POP_JUMP_IF_NONE;
2261
4
                break;
2262
4
            }
2263
1.26k
            case TO_BOOL:
2264
1.26k
            {
2265
1.26k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2266
1.26k
                if (cnt == NULL) {
2267
0
                    return ERROR;
2268
0
                }
2269
1.26k
                int is_true = PyObject_IsTrue(cnt);
2270
1.26k
                Py_DECREF(cnt);
2271
1.26k
                if (is_true == -1) {
2272
0
                    return ERROR;
2273
0
                }
2274
1.26k
                cnt = PyBool_FromLong(is_true);
2275
1.26k
                int index = add_const(cnt, consts, const_cache);
2276
1.26k
                if (index < 0) {
2277
0
                    return ERROR;
2278
0
                }
2279
1.26k
                INSTR_SET_OP0(inst, NOP);
2280
1.26k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2281
1.26k
                break;
2282
1.26k
            }
2283
384k
        }
2284
384k
    }
2285
171k
    return SUCCESS;
2286
171k
}
2287
2288
static int
2289
21.0k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2290
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291
171k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2292
171k
    }
2293
21.0k
    return SUCCESS;
2294
21.0k
}
2295
2296
static int
2297
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2298
171k
{
2299
171k
    assert(PyDict_CheckExact(const_cache));
2300
171k
    assert(PyList_CheckExact(consts));
2301
171k
    cfg_instr nop;
2302
171k
    INSTR_SET_OP0(&nop, NOP);
2303
1.73M
    for (int i = 0; i < bb->b_iused; i++) {
2304
1.56M
        cfg_instr *inst = &bb->b_instr[i];
2305
1.56M
        cfg_instr *target;
2306
1.56M
        int opcode = inst->i_opcode;
2307
1.56M
        int oparg = inst->i_oparg;
2308
1.56M
        if (HAS_TARGET(opcode)) {
2309
105k
            assert(inst->i_target->b_iused > 0);
2310
105k
            target = &inst->i_target->b_instr[0];
2311
105k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2312
105k
        }
2313
1.45M
        else {
2314
1.45M
            target = &nop;
2315
1.45M
        }
2316
1.56M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2317
1.56M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2318
1.56M
        switch (opcode) {
2319
            /* Try to fold tuples of constants.
2320
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2321
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2322
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2323
13.2k
            case BUILD_TUPLE:
2324
13.2k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2325
23
                    switch(oparg) {
2326
11
                        case 1:
2327
11
                            INSTR_SET_OP0(inst, NOP);
2328
11
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2329
11
                            continue;
2330
8
                        case 2:
2331
11
                        case 3:
2332
11
                            INSTR_SET_OP0(inst, NOP);
2333
11
                            bb->b_instr[i+1].i_opcode = SWAP;
2334
11
                            continue;
2335
23
                    }
2336
23
                }
2337
13.2k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2338
13.2k
                break;
2339
831
            case BUILD_LIST:
2340
3.16k
            case BUILD_SET:
2341
3.16k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2342
3.16k
                break;
2343
238
            case POP_JUMP_IF_NOT_NONE:
2344
544
            case POP_JUMP_IF_NONE:
2345
544
                switch (target->i_opcode) {
2346
0
                    case JUMP:
2347
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2348
544
                }
2349
544
                break;
2350
24.2k
            case POP_JUMP_IF_FALSE:
2351
24.2k
                switch (target->i_opcode) {
2352
26
                    case JUMP:
2353
26
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2354
24.2k
                }
2355
24.2k
                break;
2356
24.2k
            case POP_JUMP_IF_TRUE:
2357
4.50k
                switch (target->i_opcode) {
2358
0
                    case JUMP:
2359
0
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2360
4.50k
                }
2361
4.50k
                break;
2362
4.50k
            case JUMP_IF_FALSE:
2363
1.09k
                switch (target->i_opcode) {
2364
0
                    case JUMP:
2365
0
                    case JUMP_IF_FALSE:
2366
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2367
0
                        continue;
2368
133
                    case JUMP_IF_TRUE:
2369
                        // No need to check for loops here, a block's b_next
2370
                        // cannot point to itself.
2371
133
                        assert(inst->i_target != inst->i_target->b_next);
2372
133
                        inst->i_target = inst->i_target->b_next;
2373
133
                        i--;
2374
133
                        continue;
2375
1.09k
                }
2376
963
                break;
2377
963
            case JUMP_IF_TRUE:
2378
584
                switch (target->i_opcode) {
2379
0
                    case JUMP:
2380
0
                    case JUMP_IF_TRUE:
2381
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2382
0
                        continue;
2383
0
                    case JUMP_IF_FALSE:
2384
                        // No need to check for loops here, a block's b_next
2385
                        // cannot point to itself.
2386
0
                        assert(inst->i_target != inst->i_target->b_next);
2387
0
                        inst->i_target = inst->i_target->b_next;
2388
0
                        i--;
2389
0
                        continue;
2390
584
                }
2391
584
                break;
2392
7.20k
            case JUMP:
2393
31.6k
            case JUMP_NO_INTERRUPT:
2394
31.6k
                switch (target->i_opcode) {
2395
21
                    case JUMP:
2396
21
                        i -= jump_thread(bb, inst, target, JUMP);
2397
21
                        continue;
2398
141
                    case JUMP_NO_INTERRUPT:
2399
141
                        i -= jump_thread(bb, inst, target, opcode);
2400
141
                        continue;
2401
31.6k
                }
2402
31.4k
                break;
2403
31.4k
            case FOR_ITER:
2404
486
                if (target->i_opcode == JUMP) {
2405
                    /* This will not work now because the jump (at target) could
2406
                     * be forward or backward and FOR_ITER only jumps forward. We
2407
                     * can re-enable this if ever we implement a backward version
2408
                     * of FOR_ITER.
2409
                     */
2410
                    /*
2411
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2412
                    */
2413
0
                }
2414
486
                break;
2415
23.2k
            case STORE_FAST:
2416
23.2k
                if (opcode == nextop &&
2417
8.56k
                    oparg == bb->b_instr[i+1].i_oparg &&
2418
91
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2419
91
                    bb->b_instr[i].i_opcode = POP_TOP;
2420
91
                    bb->b_instr[i].i_oparg = 0;
2421
91
                }
2422
23.2k
                break;
2423
33.4k
            case SWAP:
2424
33.4k
                if (oparg == 1) {
2425
0
                    INSTR_SET_OP0(inst, NOP);
2426
0
                }
2427
33.4k
                break;
2428
33.4k
            case LOAD_GLOBAL:
2429
23.5k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2430
419
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2431
419
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
419
                }
2433
23.5k
                break;
2434
26.6k
            case COMPARE_OP:
2435
26.6k
                if (nextop == TO_BOOL) {
2436
51
                    INSTR_SET_OP0(inst, NOP);
2437
51
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2438
51
                    continue;
2439
51
                }
2440
26.6k
                break;
2441
26.6k
            case CONTAINS_OP:
2442
3.28k
            case IS_OP:
2443
3.28k
                if (nextop == TO_BOOL) {
2444
265
                    INSTR_SET_OP0(inst, NOP);
2445
265
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2446
265
                    continue;
2447
265
                }
2448
3.02k
                if (nextop == UNARY_NOT) {
2449
256
                    INSTR_SET_OP0(inst, NOP);
2450
256
                    int inverted = oparg ^ 1;
2451
256
                    assert(inverted == 0 || inverted == 1);
2452
256
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2453
256
                    continue;
2454
256
                }
2455
2.76k
                break;
2456
22.1k
            case TO_BOOL:
2457
22.1k
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    continue;
2460
0
                }
2461
22.1k
                break;
2462
22.1k
            case UNARY_NOT:
2463
73
                if (nextop == TO_BOOL) {
2464
21
                    INSTR_SET_OP0(inst, NOP);
2465
21
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2466
21
                    continue;
2467
21
                }
2468
52
                if (nextop == UNARY_NOT) {
2469
21
                    INSTR_SET_OP0(inst, NOP);
2470
21
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2471
21
                    continue;
2472
21
                }
2473
31
                _Py_FALLTHROUGH;
2474
5.13k
            case UNARY_INVERT:
2475
65.0k
            case UNARY_NEGATIVE:
2476
65.0k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2477
65.0k
                break;
2478
24.1k
            case CALL_INTRINSIC_1:
2479
24.1k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2480
493
                    if (nextop == GET_ITER) {
2481
0
                        INSTR_SET_OP0(inst, NOP);
2482
0
                    }
2483
493
                    else {
2484
493
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2485
493
                    }
2486
493
                }
2487
23.6k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488
16.9k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2489
16.9k
                }
2490
24.1k
                break;
2491
239k
            case BINARY_OP:
2492
239k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2493
239k
                break;
2494
1.56M
        }
2495
1.56M
    }
2496
2497
1.73M
    for (int i = 0; i < bb->b_iused; i++) {
2498
1.55M
        cfg_instr *inst = &bb->b_instr[i];
2499
1.55M
        if (inst->i_opcode == SWAP) {
2500
28.8k
            if (swaptimize(bb, &i) < 0) {
2501
0
                goto error;
2502
0
            }
2503
28.8k
            apply_static_swaps(bb, i);
2504
28.8k
        }
2505
1.55M
    }
2506
171k
    return SUCCESS;
2507
0
error:
2508
0
    return ERROR;
2509
171k
}
2510
2511
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2512
2513
static int
2514
remove_redundant_nops_and_jumps(cfg_builder *g)
2515
43.1k
{
2516
43.1k
    int removed_nops, removed_jumps;
2517
45.1k
    do {
2518
        /* Convergence is guaranteed because the number of
2519
         * redundant jumps and nops only decreases.
2520
         */
2521
45.1k
        removed_nops = remove_redundant_nops(g);
2522
45.1k
        RETURN_IF_ERROR(removed_nops);
2523
45.1k
        removed_jumps = remove_redundant_jumps(g);
2524
45.1k
        RETURN_IF_ERROR(removed_jumps);
2525
45.1k
    } while(removed_nops + removed_jumps > 0);
2526
43.1k
    return SUCCESS;
2527
43.1k
}
2528
2529
/* Perform optimizations on a control flow graph.
2530
   The consts object should still be in list form to allow new constants
2531
   to be appended.
2532
2533
   Code trasnformations that reduce code size initially fill the gaps with
2534
   NOPs.  Later those NOPs are removed.
2535
*/
2536
static int
2537
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2538
21.0k
{
2539
21.0k
    assert(PyDict_CheckExact(const_cache));
2540
21.0k
    RETURN_IF_ERROR(check_cfg(g));
2541
21.0k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2542
21.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2543
21.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544
21.0k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2545
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546
171k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2547
171k
    }
2548
21.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2549
21.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2550
21.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2551
21.0k
    assert(no_redundant_jumps(g));
2552
21.0k
    return SUCCESS;
2553
21.0k
}
2554
2555
static void
2556
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2557
7.62k
{
2558
7.62k
    int32_t line1 = inst1->i_loc.lineno;
2559
7.62k
    int32_t line2 = inst2->i_loc.lineno;
2560
    /* Skip if instructions are on different lines */
2561
7.62k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2562
38
        return;
2563
38
    }
2564
7.58k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2565
3.34k
        return;
2566
3.34k
    }
2567
4.24k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2568
4.24k
    INSTR_SET_OP0(inst2, NOP);
2569
4.24k
}
2570
2571
static int
2572
insert_superinstructions(cfg_builder *g)
2573
21.0k
{
2574
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2575
2576
1.24M
        for (int i = 0; i < b->b_iused; i++) {
2577
1.06M
            cfg_instr *inst = &b->b_instr[i];
2578
1.06M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2579
1.06M
            switch(inst->i_opcode) {
2580
7.14k
                case LOAD_FAST:
2581
7.14k
                    if (nextop == LOAD_FAST) {
2582
1.20k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2583
1.20k
                    }
2584
7.14k
                    break;
2585
20.0k
                case STORE_FAST:
2586
20.0k
                    switch (nextop) {
2587
331
                        case LOAD_FAST:
2588
331
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2589
331
                            break;
2590
6.08k
                        case STORE_FAST:
2591
6.08k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2592
6.08k
                            break;
2593
20.0k
                    }
2594
20.0k
                    break;
2595
1.06M
            }
2596
1.06M
        }
2597
171k
    }
2598
21.0k
    int res = remove_redundant_nops(g);
2599
21.0k
    assert(no_redundant_nops(g));
2600
21.0k
    return res;
2601
21.0k
}
2602
2603
#define NOT_LOCAL -1
2604
23.1k
#define DUMMY_INSTR -1
2605
2606
typedef struct {
2607
    // Index of instruction that produced the reference or DUMMY_INSTR.
2608
    int instr;
2609
2610
    // The local to which the reference refers or NOT_LOCAL.
2611
    int local;
2612
} ref;
2613
2614
typedef struct {
2615
    ref *refs;
2616
    Py_ssize_t size;
2617
    Py_ssize_t capacity;
2618
} ref_stack;
2619
2620
static int
2621
ref_stack_push(ref_stack *stack, ref r)
2622
921k
{
2623
921k
    if (stack->size == stack->capacity) {
2624
21.0k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2625
21.0k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2626
21.0k
        if (refs == NULL) {
2627
0
            PyErr_NoMemory();
2628
0
            return -1;
2629
0
        }
2630
21.0k
        stack->refs = refs;
2631
21.0k
        stack->capacity = new_cap;
2632
21.0k
    }
2633
921k
    stack->refs[stack->size] = r;
2634
921k
    stack->size++;
2635
921k
    return 0;
2636
921k
}
2637
2638
static ref
2639
ref_stack_pop(ref_stack *stack)
2640
610k
{
2641
610k
    assert(stack->size > 0);
2642
610k
    stack->size--;
2643
610k
    ref r = stack->refs[stack->size];
2644
610k
    return r;
2645
610k
}
2646
2647
static void
2648
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2649
20.8k
{
2650
20.8k
    Py_ssize_t idx = stack->size - off;
2651
20.8k
    assert(idx >= 0 && idx < stack->size);
2652
20.8k
    ref tmp = stack->refs[idx];
2653
20.8k
    stack->refs[idx] = stack->refs[stack->size - 1];
2654
20.8k
    stack->refs[stack->size - 1] = tmp;
2655
20.8k
}
2656
2657
static ref
2658
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2659
427k
{
2660
427k
    assert(idx >= 0 && idx < stack->size);
2661
427k
    return stack->refs[idx];
2662
427k
}
2663
2664
static void
2665
ref_stack_clear(ref_stack *stack)
2666
87.4k
{
2667
87.4k
    stack->size = 0;
2668
87.4k
}
2669
2670
static void
2671
ref_stack_fini(ref_stack *stack)
2672
21.0k
{
2673
21.0k
    if (stack->refs != NULL) {
2674
21.0k
        PyMem_Free(stack->refs);
2675
21.0k
    }
2676
21.0k
    stack->refs = NULL;
2677
21.0k
    stack->capacity = 0;
2678
21.0k
    stack->size = 0;
2679
21.0k
}
2680
2681
typedef enum {
2682
    // The loaded reference is still on the stack when the local is killed
2683
    SUPPORT_KILLED  = 1,
2684
    // The loaded reference is stored into a local
2685
    STORED_AS_LOCAL = 2,
2686
    // The loaded reference is still on the stack at the end of the basic block
2687
    REF_UNCONSUMED  = 4,
2688
} LoadFastInstrFlag;
2689
2690
static void
2691
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2692
24.4k
{
2693
95.6k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2694
71.2k
        ref r = ref_stack_at(refs, i);
2695
71.2k
        if (r.local == local) {
2696
33
            assert(r.instr >= 0);
2697
33
            instr_flags[r.instr] |= SUPPORT_KILLED;
2698
33
        }
2699
71.2k
    }
2700
24.4k
}
2701
2702
static void
2703
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2704
23.1k
{
2705
23.1k
    kill_local(instr_flags, refs, local);
2706
23.1k
    if (r.instr != DUMMY_INSTR) {
2707
22.5k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2708
22.5k
    }
2709
23.1k
}
2710
2711
static void
2712
load_fast_push_block(basicblock ***sp, basicblock *target,
2713
                     Py_ssize_t start_depth)
2714
86.4k
{
2715
86.4k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2716
86.4k
    if (!target->b_visited) {
2717
66.4k
        target->b_visited = 1;
2718
66.4k
        *(*sp)++ = target;
2719
66.4k
    }
2720
86.4k
}
2721
2722
/*
2723
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2724
 * load borrowed references onto the operand stack.
2725
 *
2726
 * This is only safe when we can prove that the reference in the frame outlives
2727
 * the borrowed reference produced by the instruction. We make this tractable
2728
 * by enforcing the following lifetimes:
2729
 *
2730
 * 1. Borrowed references loaded onto the operand stack live until the end of
2731
 *    the instruction that consumes them from the stack. Any borrowed
2732
 *    references that would escape into the heap (e.g. into frame objects or
2733
 *    generators) are converted into new, strong references.
2734
 *
2735
 * 2. Locals live until they are either killed by an instruction
2736
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2737
 *    via `f_locals` is added to a tuple owned by the frame object.
2738
 *
2739
 * To simplify the problem of detecting which supporting references in the
2740
 * frame are killed by instructions that overwrite locals, we only allow
2741
 * borrowed references to be stored as a local in the frame if they were passed
2742
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2743
 * strong references.
2744
 *
2745
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2746
 * that meet the following criteria:
2747
 *
2748
 * 1. The produced reference must be consumed from the stack before the
2749
 *    supporting reference in the frame is killed.
2750
 *
2751
 * 2. The produced reference cannot be stored as a local.
2752
 *
2753
 * We use abstract interpretation to identify instructions that meet these
2754
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2755
 * stack of abstract references and note any instructions that violate the
2756
 * criteria above. Once we've processed all the instructions in a block, any
2757
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2758
 */
2759
static int
2760
optimize_load_fast(cfg_builder *g)
2761
21.0k
{
2762
21.0k
    int status;
2763
21.0k
    ref_stack refs = {0};
2764
21.0k
    int max_instrs = 0;
2765
21.0k
    basicblock *entryblock = g->g_entryblock;
2766
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2767
178k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2768
178k
    }
2769
21.0k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2770
21.0k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2771
21.0k
    if (instr_flags == NULL) {
2772
0
        PyErr_NoMemory();
2773
0
        return ERROR;
2774
0
    }
2775
21.0k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2776
21.0k
    if (blocks == NULL) {
2777
0
        status = ERROR;
2778
0
        goto done;
2779
0
    }
2780
21.0k
    basicblock **sp = blocks;
2781
21.0k
    *sp = entryblock;
2782
21.0k
    sp++;
2783
21.0k
    entryblock->b_startdepth = 0;
2784
21.0k
    entryblock->b_visited = 1;
2785
2786
21.0k
    #define PUSH_REF(instr, local)                \
2787
921k
        do {                                      \
2788
921k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2789
0
                status = ERROR;                   \
2790
0
                goto done;                        \
2791
0
            }                                     \
2792
921k
        } while(0)
2793
2794
108k
    while (sp != blocks) {
2795
87.4k
        basicblock *block = *--sp;
2796
87.4k
        assert(block->b_startdepth > -1);
2797
2798
        // Reset per-block state.
2799
87.4k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2800
2801
        // Reset the stack of refs. We don't track references on the stack
2802
        // across basic blocks, but the bytecode will expect their
2803
        // presence. Add dummy references as necessary.
2804
87.4k
        ref_stack_clear(&refs);
2805
379k
        for (int i = 0; i < block->b_startdepth; i++) {
2806
292k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2807
292k
        }
2808
2809
945k
        for (int i = 0; i < block->b_iused; i++) {
2810
857k
            cfg_instr *instr = &block->b_instr[i];
2811
857k
            int opcode = instr->i_opcode;
2812
857k
            int oparg = instr->i_oparg;
2813
857k
            assert(opcode != EXTENDED_ARG);
2814
857k
            switch (opcode) {
2815
                // Opcodes that load and store locals
2816
888
                case DELETE_FAST: {
2817
888
                    kill_local(instr_flags, &refs, oparg);
2818
888
                    break;
2819
0
                }
2820
2821
13.3k
                case LOAD_FAST: {
2822
13.3k
                    PUSH_REF(i, oparg);
2823
13.3k
                    break;
2824
13.3k
                }
2825
2826
13.3k
                case LOAD_FAST_AND_CLEAR: {
2827
357
                    kill_local(instr_flags, &refs, oparg);
2828
357
                    PUSH_REF(i, oparg);
2829
357
                    break;
2830
357
                }
2831
2832
1.00k
                case LOAD_FAST_LOAD_FAST: {
2833
1.00k
                    PUSH_REF(i, oparg >> 4);
2834
1.00k
                    PUSH_REF(i, oparg & 15);
2835
1.00k
                    break;
2836
1.00k
                }
2837
2838
16.8k
                case STORE_FAST: {
2839
16.8k
                    ref r = ref_stack_pop(&refs);
2840
16.8k
                    store_local(instr_flags, &refs, oparg, r);
2841
16.8k
                    break;
2842
1.00k
                }
2843
2844
148
                case STORE_FAST_LOAD_FAST: {
2845
                    // STORE_FAST
2846
148
                    ref r = ref_stack_pop(&refs);
2847
148
                    store_local(instr_flags, &refs, oparg >> 4, r);
2848
                    // LOAD_FAST
2849
148
                    PUSH_REF(i, oparg & 15);
2850
148
                    break;
2851
148
                }
2852
2853
3.09k
                case STORE_FAST_STORE_FAST: {
2854
                    // STORE_FAST
2855
3.09k
                    ref r = ref_stack_pop(&refs);
2856
3.09k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2857
                    // STORE_FAST
2858
3.09k
                    r = ref_stack_pop(&refs);
2859
3.09k
                    store_local(instr_flags, &refs, oparg & 15, r);
2860
3.09k
                    break;
2861
148
                }
2862
2863
                // Opcodes that shuffle values on the stack
2864
45.9k
                case COPY: {
2865
45.9k
                    assert(oparg > 0);
2866
45.9k
                    Py_ssize_t idx = refs.size - oparg;
2867
45.9k
                    ref r = ref_stack_at(&refs, idx);
2868
45.9k
                    PUSH_REF(r.instr, r.local);
2869
45.9k
                    break;
2870
45.9k
                }
2871
2872
45.9k
                case SWAP: {
2873
20.8k
                    assert(oparg >= 2);
2874
20.8k
                    ref_stack_swap_top(&refs, oparg);
2875
20.8k
                    break;
2876
20.8k
                }
2877
2878
                // We treat opcodes that do not consume all of their inputs on
2879
                // a case by case basis, as we have no generic way of knowing
2880
                // how many inputs should be left on the stack.
2881
2882
                // Opcodes that consume no inputs
2883
161
                case FORMAT_SIMPLE:
2884
281
                case GET_ANEXT:
2885
753
                case GET_ITER:
2886
800
                case GET_LEN:
2887
800
                case GET_YIELD_FROM_ITER:
2888
1.99k
                case IMPORT_FROM:
2889
1.99k
                case MATCH_KEYS:
2890
1.99k
                case MATCH_MAPPING:
2891
2.03k
                case MATCH_SEQUENCE:
2892
2.03k
                case WITH_EXCEPT_START: {
2893
2.03k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2894
2.03k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2895
2.03k
                    int net_pushed = num_pushed - num_popped;
2896
2.03k
                    assert(net_pushed >= 0);
2897
3.91k
                    for (int j = 0; j < net_pushed; j++) {
2898
1.87k
                        PUSH_REF(i, NOT_LOCAL);
2899
1.87k
                    }
2900
2.03k
                    break;
2901
2.03k
                }
2902
2903
                // Opcodes that consume some inputs and push no new values
2904
2.03k
                case DICT_MERGE:
2905
35
                case DICT_UPDATE:
2906
6.33k
                case LIST_APPEND:
2907
6.65k
                case LIST_EXTEND:
2908
6.66k
                case MAP_ADD:
2909
6.66k
                case RERAISE:
2910
8.76k
                case SET_ADD:
2911
8.82k
                case SET_UPDATE: {
2912
8.82k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2913
8.82k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2914
8.82k
                    int net_popped = num_popped - num_pushed;
2915
8.82k
                    assert(net_popped > 0);
2916
17.6k
                    for (int i = 0; i < net_popped; i++) {
2917
8.83k
                        ref_stack_pop(&refs);
2918
8.83k
                    }
2919
8.82k
                    break;
2920
8.82k
                }
2921
2922
7.17k
                case END_SEND:
2923
12.2k
                case SET_FUNCTION_ATTRIBUTE: {
2924
12.2k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2925
12.2k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2926
12.2k
                    ref tos = ref_stack_pop(&refs);
2927
12.2k
                    ref_stack_pop(&refs);
2928
12.2k
                    PUSH_REF(tos.instr, tos.local);
2929
12.2k
                    break;
2930
12.2k
                }
2931
2932
                // Opcodes that consume some inputs and push new values
2933
12.2k
                case CHECK_EXC_MATCH: {
2934
0
                    ref_stack_pop(&refs);
2935
0
                    PUSH_REF(i, NOT_LOCAL);
2936
0
                    break;
2937
0
                }
2938
2939
472
                case FOR_ITER: {
2940
472
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2941
472
                    PUSH_REF(i, NOT_LOCAL);
2942
472
                    break;
2943
472
                }
2944
2945
2.91k
                case LOAD_ATTR:
2946
3.09k
                case LOAD_SUPER_ATTR: {
2947
3.09k
                    ref self = ref_stack_pop(&refs);
2948
3.09k
                    if (opcode == LOAD_SUPER_ATTR) {
2949
183
                        ref_stack_pop(&refs);
2950
183
                        ref_stack_pop(&refs);
2951
183
                    }
2952
3.09k
                    PUSH_REF(i, NOT_LOCAL);
2953
3.09k
                    if (oparg & 1) {
2954
                        // A method call; conservatively assume that self is pushed
2955
                        // back onto the stack
2956
830
                        PUSH_REF(self.instr, self.local);
2957
830
                    }
2958
3.09k
                    break;
2959
3.09k
                }
2960
2961
7.07k
                case LOAD_SPECIAL:
2962
7.07k
                case PUSH_EXC_INFO: {
2963
7.07k
                    ref tos = ref_stack_pop(&refs);
2964
7.07k
                    PUSH_REF(i, NOT_LOCAL);
2965
7.07k
                    PUSH_REF(tos.instr, tos.local);
2966
7.07k
                    break;
2967
7.07k
                }
2968
2969
7.17k
                case SEND: {
2970
7.17k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2971
7.17k
                    ref_stack_pop(&refs);
2972
7.17k
                    PUSH_REF(i, NOT_LOCAL);
2973
7.17k
                    break;
2974
7.17k
                }
2975
2976
                // Opcodes that consume all of their inputs
2977
714k
                default: {
2978
714k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2979
714k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2980
714k
                    if (HAS_TARGET(instr->i_opcode)) {
2981
28.1k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2982
28.1k
                    }
2983
714k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2984
                        // Block push opcodes only affect the stack when jumping
2985
                        // to the target.
2986
1.25M
                        for (int j = 0; j < num_popped; j++) {
2987
536k
                            ref_stack_pop(&refs);
2988
536k
                        }
2989
1.24M
                        for (int j = 0; j < num_pushed; j++) {
2990
526k
                            PUSH_REF(i, NOT_LOCAL);
2991
526k
                        }
2992
714k
                    }
2993
714k
                    break;
2994
714k
                }
2995
857k
            }
2996
857k
        }
2997
2998
        // Push fallthrough block
2999
87.4k
        if (BB_HAS_FALLTHROUGH(block)) {
3000
50.6k
            assert(block->b_next != NULL);
3001
50.6k
            load_fast_push_block(&sp, block->b_next, refs.size);
3002
50.6k
        }
3003
3004
        // Mark instructions that produce values that are on the stack at the
3005
        // end of the basic block
3006
398k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3007
310k
            ref r = ref_stack_at(&refs, i);
3008
310k
            if (r.instr != -1) {
3009
81.9k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3010
81.9k
            }
3011
310k
        }
3012
3013
        // Optimize instructions
3014
945k
        for (int i = 0; i < block->b_iused; i++) {
3015
857k
            if (!instr_flags[i]) {
3016
765k
                cfg_instr *instr = &block->b_instr[i];
3017
765k
                switch (instr->i_opcode) {
3018
13.3k
                    case LOAD_FAST:
3019
13.3k
                        instr->i_opcode = LOAD_FAST_BORROW;
3020
13.3k
                        break;
3021
1.00k
                    case LOAD_FAST_LOAD_FAST:
3022
1.00k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3023
1.00k
                        break;
3024
750k
                    default:
3025
750k
                        break;
3026
765k
                }
3027
765k
            }
3028
857k
        }
3029
87.4k
    }
3030
3031
21.0k
    #undef PUSH_REF
3032
3033
21.0k
    status = SUCCESS;
3034
3035
21.0k
done:
3036
21.0k
    ref_stack_fini(&refs);
3037
21.0k
    PyMem_Free(instr_flags);
3038
21.0k
    PyMem_Free(blocks);
3039
21.0k
    return status;
3040
21.0k
}
3041
3042
// helper functions for add_checks_for_loads_of_unknown_variables
3043
static inline void
3044
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3045
466k
{
3046
    // Push b if the unsafe mask is giving us any new information.
3047
    // To avoid overflowing the stack, only allow each block once.
3048
    // Use b->b_visited=1 to mean that b is currently on the stack.
3049
466k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3050
466k
    if (b->b_unsafe_locals_mask != both) {
3051
14.5k
        b->b_unsafe_locals_mask = both;
3052
        // More work left to do.
3053
14.5k
        if (!b->b_visited) {
3054
            // not on the stack, so push it.
3055
14.5k
            *(*sp)++ = b;
3056
14.5k
            b->b_visited = 1;
3057
14.5k
        }
3058
14.5k
    }
3059
466k
}
3060
3061
static void
3062
scan_block_for_locals(basicblock *b, basicblock ***sp)
3063
129k
{
3064
    // bit i is set if local i is potentially uninitialized
3065
129k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3066
769k
    for (int i = 0; i < b->b_iused; i++) {
3067
640k
        cfg_instr *instr = &b->b_instr[i];
3068
640k
        assert(instr->i_opcode != EXTENDED_ARG);
3069
640k
        if (instr->i_except != NULL) {
3070
333k
            maybe_push(instr->i_except, unsafe_mask, sp);
3071
333k
        }
3072
640k
        if (instr->i_oparg >= 64) {
3073
80.3k
            continue;
3074
80.3k
        }
3075
640k
        assert(instr->i_oparg >= 0);
3076
560k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3077
560k
        switch (instr->i_opcode) {
3078
1.71k
            case DELETE_FAST:
3079
2.70k
            case LOAD_FAST_AND_CLEAR:
3080
4.67k
            case STORE_FAST_MAYBE_NULL:
3081
4.67k
                unsafe_mask |= bit;
3082
4.67k
                break;
3083
46.1k
            case STORE_FAST:
3084
46.1k
                unsafe_mask &= ~bit;
3085
46.1k
                break;
3086
1.08k
            case LOAD_FAST_CHECK:
3087
                // If this doesn't raise, then the local is defined.
3088
1.08k
                unsafe_mask &= ~bit;
3089
1.08k
                break;
3090
12.9k
            case LOAD_FAST:
3091
12.9k
                if (unsafe_mask & bit) {
3092
1.08k
                    instr->i_opcode = LOAD_FAST_CHECK;
3093
1.08k
                }
3094
12.9k
                unsafe_mask &= ~bit;
3095
12.9k
                break;
3096
560k
        }
3097
560k
    }
3098
129k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3099
67.0k
        maybe_push(b->b_next, unsafe_mask, sp);
3100
67.0k
    }
3101
129k
    cfg_instr *last = basicblock_last_instr(b);
3102
129k
    if (last && is_jump(last)) {
3103
57.4k
        assert(last->i_target != NULL);
3104
57.4k
        maybe_push(last->i_target, unsafe_mask, sp);
3105
57.4k
    }
3106
129k
}
3107
3108
static int
3109
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3110
20
{
3111
20
    assert(nlocals > 64);
3112
20
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3113
20
    if (states == NULL) {
3114
0
        PyErr_NoMemory();
3115
0
        return ERROR;
3116
0
    }
3117
20
    Py_ssize_t blocknum = 0;
3118
    // state[i - 64] == blocknum if local i is guaranteed to
3119
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3120
    // STORE_FAST within that basicblock (not followed by
3121
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3122
109
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3123
89
        blocknum++;
3124
10.3k
        for (int i = 0; i < b->b_iused; i++) {
3125
10.2k
            cfg_instr *instr = &b->b_instr[i];
3126
10.2k
            assert(instr->i_opcode != EXTENDED_ARG);
3127
10.2k
            int arg = instr->i_oparg;
3128
10.2k
            if (arg < 64) {
3129
10.1k
                continue;
3130
10.1k
            }
3131
10.2k
            assert(arg >= 0);
3132
121
            switch (instr->i_opcode) {
3133
29
                case DELETE_FAST:
3134
29
                case LOAD_FAST_AND_CLEAR:
3135
29
                case STORE_FAST_MAYBE_NULL:
3136
29
                    states[arg - 64] = blocknum - 1;
3137
29
                    break;
3138
32
                case STORE_FAST:
3139
32
                    states[arg - 64] = blocknum;
3140
32
                    break;
3141
16
                case LOAD_FAST:
3142
16
                    if (states[arg - 64] != blocknum) {
3143
8
                        instr->i_opcode = LOAD_FAST_CHECK;
3144
8
                    }
3145
16
                    states[arg - 64] = blocknum;
3146
16
                    break;
3147
121
                    Py_UNREACHABLE();
3148
121
            }
3149
121
        }
3150
89
    }
3151
20
    PyMem_Free(states);
3152
20
    return SUCCESS;
3153
20
}
3154
3155
static int
3156
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3157
21.0k
{
3158
21.0k
    assert(PyList_CheckExact(consts));
3159
21.0k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3160
21.0k
    if (nconsts == 0) {
3161
27
        return SUCCESS;  /* nothing to do */
3162
27
    }
3163
3164
20.9k
    Py_ssize_t *index_map = NULL;
3165
20.9k
    Py_ssize_t *reverse_index_map = NULL;
3166
20.9k
    int err = ERROR;
3167
3168
20.9k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3169
20.9k
    if (index_map == NULL) {
3170
0
        goto end;
3171
0
    }
3172
167k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3173
146k
        index_map[i] = -1;
3174
146k
    }
3175
    // The first constant may be docstring; keep it always.
3176
20.9k
    index_map[0] = 0;
3177
3178
    /* mark used consts */
3179
192k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3180
1.23M
        for (int i = 0; i < b->b_iused; i++) {
3181
1.06M
            int opcode = b->b_instr[i].i_opcode;
3182
1.06M
            if (OPCODE_HAS_CONST(opcode)) {
3183
157k
                int index = b->b_instr[i].i_oparg;
3184
157k
                index_map[index] = index;
3185
157k
            }
3186
1.06M
        }
3187
171k
    }
3188
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3189
    /* condense consts */
3190
20.9k
    Py_ssize_t n_used_consts = 0;
3191
188k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3192
167k
        if (index_map[i] != -1) {
3193
80.7k
            assert(index_map[i] == i);
3194
80.7k
            index_map[n_used_consts++] = index_map[i];
3195
80.7k
        }
3196
167k
    }
3197
20.9k
    if (n_used_consts == nconsts) {
3198
        /* nothing to do */
3199
9.06k
        err = SUCCESS;
3200
9.06k
        goto end;
3201
9.06k
    }
3202
3203
    /* move all used consts to the beginning of the consts list */
3204
20.9k
    assert(n_used_consts < nconsts);
3205
65.1k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3206
53.2k
        Py_ssize_t old_index = index_map[i];
3207
53.2k
        assert(i <= old_index && old_index < nconsts);
3208
53.2k
        if (i != old_index) {
3209
33.5k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3210
33.5k
            assert(value != NULL);
3211
33.5k
            PyList_SetItem(consts, i, Py_NewRef(value));
3212
33.5k
        }
3213
53.2k
    }
3214
3215
    /* truncate the consts list at its new size */
3216
11.9k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3217
0
        goto end;
3218
0
    }
3219
    /* adjust const indices in the bytecode */
3220
11.9k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3221
11.9k
    if (reverse_index_map == NULL) {
3222
0
        goto end;
3223
0
    }
3224
152k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3225
140k
        reverse_index_map[i] = -1;
3226
140k
    }
3227
65.1k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3228
53.2k
        assert(index_map[i] != -1);
3229
53.2k
        assert(reverse_index_map[index_map[i]] == -1);
3230
53.2k
        reverse_index_map[index_map[i]] = i;
3231
53.2k
    }
3232
3233
99.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3234
745k
        for (int i = 0; i < b->b_iused; i++) {
3235
657k
            int opcode = b->b_instr[i].i_opcode;
3236
657k
            if (OPCODE_HAS_CONST(opcode)) {
3237
99.2k
                int index = b->b_instr[i].i_oparg;
3238
99.2k
                assert(reverse_index_map[index] >= 0);
3239
99.2k
                assert(reverse_index_map[index] < n_used_consts);
3240
99.2k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3241
99.2k
            }
3242
657k
        }
3243
88.0k
    }
3244
3245
11.9k
    err = SUCCESS;
3246
20.9k
end:
3247
20.9k
    PyMem_Free(index_map);
3248
20.9k
    PyMem_Free(reverse_index_map);
3249
20.9k
    return err;
3250
11.9k
}
3251
3252
3253
3254
static int
3255
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3256
                                                int nlocals,
3257
                                                int nparams)
3258
21.0k
{
3259
21.0k
    if (nlocals == 0) {
3260
12.3k
        return SUCCESS;
3261
12.3k
    }
3262
8.65k
    if (nlocals > 64) {
3263
        // To avoid O(nlocals**2) compilation, locals beyond the first
3264
        // 64 are only analyzed one basicblock at a time: initialization
3265
        // info is not passed between basicblocks.
3266
20
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3267
0
            return ERROR;
3268
0
        }
3269
20
        nlocals = 64;
3270
20
    }
3271
8.65k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3272
8.65k
    if (stack == NULL) {
3273
0
        return ERROR;
3274
0
    }
3275
8.65k
    basicblock **sp = stack;
3276
3277
    // First origin of being uninitialized:
3278
    // The non-parameter locals in the entry block.
3279
8.65k
    uint64_t start_mask = 0;
3280
24.0k
    for (int i = nparams; i < nlocals; i++) {
3281
15.3k
        start_mask |= (uint64_t)1 << i;
3282
15.3k
    }
3283
8.65k
    maybe_push(entryblock, start_mask, &sp);
3284
3285
    // Second origin of being uninitialized:
3286
    // There could be DELETE_FAST somewhere, so
3287
    // be sure to scan each basicblock at least once.
3288
123k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3289
114k
        scan_block_for_locals(b, &sp);
3290
114k
    }
3291
    // Now propagate the uncertainty from the origins we found: Use
3292
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3293
23.1k
    while (sp > stack) {
3294
14.5k
        basicblock *b = *--sp;
3295
        // mark as no longer on stack
3296
14.5k
        b->b_visited = 0;
3297
14.5k
        scan_block_for_locals(b, &sp);
3298
14.5k
    }
3299
8.65k
    PyMem_Free(stack);
3300
8.65k
    return SUCCESS;
3301
8.65k
}
3302
3303
3304
static int
3305
11.9k
mark_warm(basicblock *entryblock) {
3306
11.9k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3307
11.9k
    if (stack == NULL) {
3308
0
        return ERROR;
3309
0
    }
3310
11.9k
    basicblock **sp = stack;
3311
3312
11.9k
    *sp++ = entryblock;
3313
11.9k
    entryblock->b_visited = 1;
3314
85.8k
    while (sp > stack) {
3315
73.8k
        basicblock *b = *(--sp);
3316
73.8k
        assert(!b->b_except_handler);
3317
73.8k
        b->b_warm = 1;
3318
73.8k
        basicblock *next = b->b_next;
3319
73.8k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3320
40.4k
            *sp++ = next;
3321
40.4k
            next->b_visited = 1;
3322
40.4k
        }
3323
645k
        for (int i=0; i < b->b_iused; i++) {
3324
571k
            cfg_instr *instr = &b->b_instr[i];
3325
571k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3326
21.3k
                *sp++ = instr->i_target;
3327
21.3k
                instr->i_target->b_visited = 1;
3328
21.3k
            }
3329
571k
        }
3330
73.8k
    }
3331
11.9k
    PyMem_Free(stack);
3332
11.9k
    return SUCCESS;
3333
11.9k
}
3334
3335
static int
3336
11.9k
mark_cold(basicblock *entryblock) {
3337
174k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3338
162k
        assert(!b->b_cold && !b->b_warm);
3339
162k
    }
3340
11.9k
    if (mark_warm(entryblock) < 0) {
3341
0
        return ERROR;
3342
0
    }
3343
3344
11.9k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3345
11.9k
    if (stack == NULL) {
3346
0
        return ERROR;
3347
0
    }
3348
3349
11.9k
    basicblock **sp = stack;
3350
174k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3351
162k
        if (b->b_except_handler) {
3352
27.5k
            assert(!b->b_warm);
3353
27.5k
            *sp++ = b;
3354
27.5k
            b->b_visited = 1;
3355
27.5k
        }
3356
162k
    }
3357
3358
83.2k
    while (sp > stack) {
3359
71.3k
        basicblock *b = *(--sp);
3360
71.3k
        b->b_cold = 1;
3361
71.3k
        basicblock *next = b->b_next;
3362
71.3k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3363
43.2k
            if (!next->b_warm && !next->b_visited) {
3364
35.3k
                *sp++ = next;
3365
35.3k
                next->b_visited = 1;
3366
35.3k
            }
3367
43.2k
        }
3368
291k
        for (int i = 0; i < b->b_iused; i++) {
3369
220k
            cfg_instr *instr = &b->b_instr[i];
3370
220k
            if (is_jump(instr)) {
3371
30.0k
                assert(i == b->b_iused - 1);
3372
30.0k
                basicblock *target = b->b_instr[i].i_target;
3373
30.0k
                if (!target->b_warm && !target->b_visited) {
3374
8.37k
                    *sp++ = target;
3375
8.37k
                    target->b_visited = 1;
3376
8.37k
                }
3377
30.0k
            }
3378
220k
        }
3379
71.3k
    }
3380
11.9k
    PyMem_Free(stack);
3381
11.9k
    return SUCCESS;
3382
11.9k
}
3383
3384
3385
static int
3386
21.0k
push_cold_blocks_to_end(cfg_builder *g) {
3387
21.0k
    basicblock *entryblock = g->g_entryblock;
3388
21.0k
    if (entryblock->b_next == NULL) {
3389
        /* single basicblock, no need to reorder */
3390
9.02k
        return SUCCESS;
3391
9.02k
    }
3392
11.9k
    RETURN_IF_ERROR(mark_cold(entryblock));
3393
3394
11.9k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3395
3396
    /* If we have a cold block with fallthrough to a warm block, add */
3397
    /* an explicit jump instead of fallthrough */
3398
181k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3399
169k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3400
7.17k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3401
7.17k
            if (explicit_jump == NULL) {
3402
0
                return ERROR;
3403
0
            }
3404
7.17k
            if (!IS_LABEL(b->b_next->b_label)) {
3405
0
                b->b_next->b_label.id = next_lbl++;
3406
0
            }
3407
7.17k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3408
7.17k
                             NO_LOCATION);
3409
7.17k
            explicit_jump->b_cold = 1;
3410
7.17k
            explicit_jump->b_next = b->b_next;
3411
7.17k
            explicit_jump->b_predecessors = 1;
3412
7.17k
            b->b_next = explicit_jump;
3413
3414
            /* set target */
3415
7.17k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3416
7.17k
            last->i_target = explicit_jump->b_next;
3417
7.17k
        }
3418
169k
    }
3419
3420
11.9k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3421
11.9k
    basicblock *cold_blocks = NULL;
3422
11.9k
    basicblock *cold_blocks_tail = NULL;
3423
3424
11.9k
    basicblock *b = entryblock;
3425
24.4k
    while(b->b_next) {
3426
24.4k
        assert(!b->b_cold);
3427
103k
        while (b->b_next && !b->b_next->b_cold) {
3428
78.7k
            b = b->b_next;
3429
78.7k
        }
3430
24.4k
        if (b->b_next == NULL) {
3431
            /* no more cold blocks */
3432
11.9k
            break;
3433
11.9k
        }
3434
3435
        /* b->b_next is the beginning of a cold streak */
3436
24.4k
        assert(!b->b_cold && b->b_next->b_cold);
3437
3438
12.4k
        basicblock *b_end = b->b_next;
3439
78.4k
        while (b_end->b_next && b_end->b_next->b_cold) {
3440
65.9k
            b_end = b_end->b_next;
3441
65.9k
        }
3442
3443
        /* b_end is the end of the cold streak */
3444
12.4k
        assert(b_end && b_end->b_cold);
3445
12.4k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3446
3447
12.4k
        if (cold_blocks == NULL) {
3448
1.14k
            cold_blocks = b->b_next;
3449
1.14k
        }
3450
11.3k
        else {
3451
11.3k
            cold_blocks_tail->b_next = b->b_next;
3452
11.3k
        }
3453
12.4k
        cold_blocks_tail = b_end;
3454
12.4k
        b->b_next = b_end->b_next;
3455
12.4k
        b_end->b_next = NULL;
3456
12.4k
    }
3457
11.9k
    assert(b != NULL && b->b_next == NULL);
3458
11.9k
    b->b_next = cold_blocks;
3459
3460
11.9k
    if (cold_blocks != NULL) {
3461
1.14k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3462
1.14k
    }
3463
11.9k
    return SUCCESS;
3464
11.9k
}
3465
3466
static int
3467
convert_pseudo_conditional_jumps(cfg_builder *g)
3468
21.0k
{
3469
21.0k
    basicblock *entryblock = g->g_entryblock;
3470
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3471
1.24M
        for (int i = 0; i < b->b_iused; i++) {
3472
1.06M
            cfg_instr *instr = &b->b_instr[i];
3473
1.06M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3474
1.43k
                assert(i == b->b_iused - 1);
3475
1.43k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3476
963
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3477
1.43k
                location loc = instr->i_loc;
3478
1.43k
                basicblock *except = instr->i_except;
3479
1.43k
                cfg_instr copy = {
3480
1.43k
                            .i_opcode = COPY,
3481
1.43k
                            .i_oparg = 1,
3482
1.43k
                            .i_loc = loc,
3483
1.43k
                            .i_target = NULL,
3484
1.43k
                            .i_except = except,
3485
1.43k
                };
3486
1.43k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3487
1.43k
                cfg_instr to_bool = {
3488
1.43k
                            .i_opcode = TO_BOOL,
3489
1.43k
                            .i_oparg = 0,
3490
1.43k
                            .i_loc = loc,
3491
1.43k
                            .i_target = NULL,
3492
1.43k
                            .i_except = except,
3493
1.43k
                };
3494
1.43k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3495
1.43k
            }
3496
1.06M
        }
3497
178k
    }
3498
21.0k
    return SUCCESS;
3499
21.0k
}
3500
3501
static int
3502
convert_pseudo_ops(cfg_builder *g)
3503
21.0k
{
3504
21.0k
    basicblock *entryblock = g->g_entryblock;
3505
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3506
1.25M
        for (int i = 0; i < b->b_iused; i++) {
3507
1.07M
            cfg_instr *instr = &b->b_instr[i];
3508
1.07M
            if (is_block_push(instr)) {
3509
27.5k
                INSTR_SET_OP0(instr, NOP);
3510
27.5k
            }
3511
1.05M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3512
7.31k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3513
7.31k
                instr->i_opcode = LOAD_FAST;
3514
7.31k
            }
3515
1.04M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3516
986
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3517
986
                instr->i_opcode = STORE_FAST;
3518
986
            }
3519
1.07M
        }
3520
178k
    }
3521
21.0k
    return remove_redundant_nops_and_jumps(g);
3522
21.0k
}
3523
3524
static inline bool
3525
322k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3526
322k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3527
128k
        return basicblock_has_no_lineno(b);
3528
128k
    }
3529
193k
    else {
3530
193k
        return false;
3531
193k
    }
3532
322k
}
3533
3534
3535
/* PEP 626 mandates that the f_lineno of a frame is correct
3536
 * after a frame terminates. It would be prohibitively expensive
3537
 * to continuously update the f_lineno field at runtime,
3538
 * so we make sure that all exiting instruction (raises and returns)
3539
 * have a valid line number, allowing us to compute f_lineno lazily.
3540
 * We can do this by duplicating the exit blocks without line number
3541
 * so that none have more than one predecessor. We can then safely
3542
 * copy the line number from the sole predecessor block.
3543
 */
3544
static int
3545
duplicate_exits_without_lineno(cfg_builder *g)
3546
42.0k
{
3547
42.0k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3548
3549
    /* Copy all exit blocks without line number that are targets of a jump.
3550
     */
3551
42.0k
    basicblock *entryblock = g->g_entryblock;
3552
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3553
349k
        cfg_instr *last = basicblock_last_instr(b);
3554
349k
        if (last == NULL) {
3555
36.2k
            continue;
3556
36.2k
        }
3557
313k
        if (is_jump(last)) {
3558
151k
            basicblock *target = next_nonempty_block(last->i_target);
3559
151k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3560
936
                basicblock *new_target = copy_basicblock(g, target);
3561
936
                if (new_target == NULL) {
3562
0
                    return ERROR;
3563
0
                }
3564
936
                new_target->b_instr[0].i_loc = last->i_loc;
3565
936
                last->i_target = new_target;
3566
936
                target->b_predecessors--;
3567
936
                new_target->b_predecessors = 1;
3568
936
                new_target->b_next = target->b_next;
3569
936
                new_target->b_label.id = next_lbl++;
3570
936
                target->b_next = new_target;
3571
936
            }
3572
151k
        }
3573
313k
    }
3574
3575
    /* Any remaining reachable exit blocks without line number can only be reached by
3576
     * fall through, and thus can only have a single predecessor */
3577
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3578
349k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3579
170k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3580
5.32k
                cfg_instr *last = basicblock_last_instr(b);
3581
5.32k
                assert(last != NULL);
3582
5.32k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3583
5.32k
            }
3584
170k
        }
3585
349k
    }
3586
42.0k
    return SUCCESS;
3587
42.0k
}
3588
3589
3590
/* If an instruction has no line number, but it's predecessor in the BB does,
3591
 * then copy the line number. If a successor block has no line number, and only
3592
 * one predecessor, then inherit the line number.
3593
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3594
 * Also reduces the size of the line number table,
3595
 * but has no impact on the generated line number events.
3596
 */
3597
3598
static inline void
3599
maybe_propagate_location(basicblock *b, int i, location loc)
3600
2.76M
{
3601
2.76M
    assert(b->b_iused > i);
3602
2.76M
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3603
207k
         b->b_instr[i].i_loc = loc;
3604
207k
    }
3605
2.76M
}
3606
3607
static void
3608
propagate_line_numbers(basicblock *entryblock)
3609
42.0k
{
3610
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3611
349k
        cfg_instr *last = basicblock_last_instr(b);
3612
349k
        if (last == NULL) {
3613
36.2k
            continue;
3614
36.2k
        }
3615
3616
313k
        location prev_location = NO_LOCATION;
3617
2.94M
        for (int i = 0; i < b->b_iused; i++) {
3618
2.63M
            maybe_propagate_location(b, i, prev_location);
3619
2.63M
            prev_location = b->b_instr[i].i_loc;
3620
2.63M
        }
3621
313k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3622
100k
            if (b->b_next->b_iused > 0) {
3623
100k
                maybe_propagate_location(b->b_next, 0, prev_location);
3624
100k
            }
3625
100k
        }
3626
313k
        if (is_jump(last)) {
3627
151k
            basicblock *target = last->i_target;
3628
151k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3629
6
                target = target->b_next;
3630
6
            }
3631
151k
            if (target->b_predecessors == 1) {
3632
28.4k
                maybe_propagate_location(target, 0, prev_location);
3633
28.4k
            }
3634
151k
        }
3635
313k
    }
3636
42.0k
}
3637
3638
static int
3639
resolve_line_numbers(cfg_builder *g, int firstlineno)
3640
42.0k
{
3641
42.0k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3642
42.0k
    propagate_line_numbers(g->g_entryblock);
3643
42.0k
    return SUCCESS;
3644
42.0k
}
3645
3646
int
3647
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3648
                        int nlocals, int nparams, int firstlineno)
3649
21.0k
{
3650
21.0k
    assert(cfg_builder_check(g));
3651
    /** Preprocessing **/
3652
    /* Map labels to targets and mark exception handlers */
3653
21.0k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3654
21.0k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3655
21.0k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3656
3657
    /** Optimization **/
3658
21.0k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3659
21.0k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3660
21.0k
    RETURN_IF_ERROR(
3661
21.0k
        add_checks_for_loads_of_uninitialized_variables(
3662
21.0k
            g->g_entryblock, nlocals, nparams));
3663
21.0k
    RETURN_IF_ERROR(insert_superinstructions(g));
3664
3665
21.0k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3666
21.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3667
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3668
    // assert(all_exits_have_lineno(g->g_entryblock));
3669
21.0k
    return SUCCESS;
3670
21.0k
}
3671
3672
static int *
3673
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3674
21.0k
{
3675
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3676
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3677
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3678
3679
21.0k
    int noffsets = ncellvars + nfreevars;
3680
21.0k
    int *fixed = PyMem_New(int, noffsets);
3681
21.0k
    if (fixed == NULL) {
3682
0
        PyErr_NoMemory();
3683
0
        return NULL;
3684
0
    }
3685
30.2k
    for (int i = 0; i < noffsets; i++) {
3686
9.22k
        fixed[i] = nlocals + i;
3687
9.22k
    }
3688
3689
21.0k
    PyObject *varname, *cellindex;
3690
21.0k
    Py_ssize_t pos = 0;
3691
25.9k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3692
4.96k
        PyObject *varindex;
3693
4.96k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3694
0
            goto error;
3695
0
        }
3696
4.96k
        if (varindex == NULL) {
3697
4.96k
            continue;
3698
4.96k
        }
3699
3700
6
        int argoffset = PyLong_AsInt(varindex);
3701
6
        Py_DECREF(varindex);
3702
6
        if (argoffset == -1 && PyErr_Occurred()) {
3703
0
            goto error;
3704
0
        }
3705
3706
6
        int oldindex = PyLong_AsInt(cellindex);
3707
6
        if (oldindex == -1 && PyErr_Occurred()) {
3708
0
            goto error;
3709
0
        }
3710
6
        fixed[oldindex] = argoffset;
3711
6
    }
3712
21.0k
    return fixed;
3713
3714
0
error:
3715
0
    PyMem_Free(fixed);
3716
0
    return NULL;
3717
21.0k
}
3718
3719
#define IS_GENERATOR(CF) \
3720
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3721
3722
static int
3723
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3724
                           int *fixed, int nfreevars)
3725
21.0k
{
3726
21.0k
    assert(umd->u_firstlineno > 0);
3727
3728
    /* Set up cells for any variable that escapes, to be put in a closure. */
3729
21.0k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3730
21.0k
    if (ncellvars) {
3731
        // umd->u_cellvars has the cells out of order so we sort them
3732
        // before adding the MAKE_CELL instructions.  Note that we
3733
        // adjust for arg cells, which come first.
3734
4.10k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3735
4.10k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3736
4.10k
        if (sorted == NULL) {
3737
0
            PyErr_NoMemory();
3738
0
            return ERROR;
3739
0
        }
3740
9.07k
        for (int i = 0; i < ncellvars; i++) {
3741
4.96k
            sorted[fixed[i]] = i + 1;
3742
4.96k
        }
3743
9.54k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3744
5.44k
            int oldindex = sorted[i] - 1;
3745
5.44k
            if (oldindex == -1) {
3746
476
                continue;
3747
476
            }
3748
4.96k
            cfg_instr make_cell = {
3749
4.96k
                .i_opcode = MAKE_CELL,
3750
                // This will get fixed in offset_derefs().
3751
4.96k
                .i_oparg = oldindex,
3752
4.96k
                .i_loc = NO_LOCATION,
3753
4.96k
                .i_target = NULL,
3754
4.96k
                .i_except = NULL,
3755
4.96k
            };
3756
4.96k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3757
0
                PyMem_RawFree(sorted);
3758
0
                return ERROR;
3759
0
            }
3760
4.96k
            ncellsused += 1;
3761
4.96k
        }
3762
4.10k
        PyMem_RawFree(sorted);
3763
4.10k
    }
3764
3765
21.0k
    if (nfreevars) {
3766
3.45k
        cfg_instr copy_frees = {
3767
3.45k
            .i_opcode = COPY_FREE_VARS,
3768
3.45k
            .i_oparg = nfreevars,
3769
3.45k
            .i_loc = NO_LOCATION,
3770
3.45k
            .i_target = NULL,
3771
3.45k
            .i_except = NULL,
3772
3.45k
        };
3773
3.45k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3774
3.45k
    }
3775
3776
21.0k
    return SUCCESS;
3777
21.0k
}
3778
3779
static int
3780
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3781
21.0k
{
3782
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3783
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3784
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3785
21.0k
    int noffsets = ncellvars + nfreevars;
3786
3787
    // First deal with duplicates (arg cells).
3788
21.0k
    int numdropped = 0;
3789
30.2k
    for (int i = 0; i < noffsets ; i++) {
3790
9.22k
        if (fixedmap[i] == i + nlocals) {
3791
9.22k
            fixedmap[i] -= numdropped;
3792
9.22k
        }
3793
6
        else {
3794
            // It was a duplicate (cell/arg).
3795
6
            numdropped += 1;
3796
6
        }
3797
9.22k
    }
3798
3799
    // Then update offsets, either relative to locals or by cell2arg.
3800
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3801
1.25M
        for (int i = 0; i < b->b_iused; i++) {
3802
1.07M
            cfg_instr *inst = &b->b_instr[i];
3803
            // This is called before extended args are generated.
3804
1.07M
            assert(inst->i_opcode != EXTENDED_ARG);
3805
1.07M
            int oldoffset = inst->i_oparg;
3806
1.07M
            switch(inst->i_opcode) {
3807
4.96k
                case MAKE_CELL:
3808
12.2k
                case LOAD_CLOSURE:
3809
21.7k
                case LOAD_DEREF:
3810
26.0k
                case STORE_DEREF:
3811
26.0k
                case DELETE_DEREF:
3812
26.3k
                case LOAD_FROM_DICT_OR_DEREF:
3813
26.3k
                    assert(oldoffset >= 0);
3814
26.3k
                    assert(oldoffset < noffsets);
3815
26.3k
                    assert(fixedmap[oldoffset] >= 0);
3816
26.3k
                    inst->i_oparg = fixedmap[oldoffset];
3817
1.07M
            }
3818
1.07M
        }
3819
178k
    }
3820
3821
21.0k
    return numdropped;
3822
21.0k
}
3823
3824
static int
3825
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3826
21.0k
{
3827
21.0k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3828
21.0k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3829
21.0k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3830
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3831
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3832
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3833
21.0k
    assert(INT_MAX - nlocals - ncellvars > 0);
3834
21.0k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3835
21.0k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3836
21.0k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3837
21.0k
    if (cellfixedoffsets == NULL) {
3838
0
        return ERROR;
3839
0
    }
3840
3841
    // This must be called before fix_cell_offsets().
3842
21.0k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3843
0
        PyMem_Free(cellfixedoffsets);
3844
0
        return ERROR;
3845
0
    }
3846
3847
21.0k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3848
21.0k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3849
21.0k
    cellfixedoffsets = NULL;
3850
21.0k
    if (numdropped < 0) {
3851
0
        return ERROR;
3852
0
    }
3853
3854
21.0k
    nlocalsplus -= numdropped;
3855
21.0k
    return nlocalsplus;
3856
21.0k
}
3857
3858
cfg_builder *
3859
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3860
21.0k
{
3861
21.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3862
0
        return NULL;
3863
0
    }
3864
21.0k
    cfg_builder *g = _PyCfgBuilder_New();
3865
21.0k
    if (g == NULL) {
3866
0
        return NULL;
3867
0
    }
3868
1.60M
    for (int i = 0; i < seq->s_used; i++) {
3869
1.58M
        seq->s_instrs[i].i_target = 0;
3870
1.58M
    }
3871
1.60M
    for (int i = 0; i < seq->s_used; i++) {
3872
1.58M
        _PyInstruction *instr = &seq->s_instrs[i];
3873
1.58M
        if (HAS_TARGET(instr->i_opcode)) {
3874
109k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3875
109k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3876
109k
        }
3877
1.58M
    }
3878
21.0k
    int offset = 0;
3879
1.60M
    for (int i = 0; i < seq->s_used; i++) {
3880
1.58M
        _PyInstruction *instr = &seq->s_instrs[i];
3881
1.58M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3882
4.49k
            if (seq->s_annotations_code != NULL) {
3883
235
                assert(seq->s_annotations_code->s_labelmap_size == 0
3884
235
                    && seq->s_annotations_code->s_nested == NULL);
3885
940
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3886
705
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3887
705
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3888
705
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3889
0
                        goto error;
3890
0
                    }
3891
705
                }
3892
235
                offset += seq->s_annotations_code->s_used - 1;
3893
235
            }
3894
4.25k
            else {
3895
4.25k
                offset -= 1;
3896
4.25k
            }
3897
4.49k
            continue;
3898
4.49k
        }
3899
1.58M
        if (instr->i_target) {
3900
88.9k
            jump_target_label lbl_ = {i + offset};
3901
88.9k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3902
0
                goto error;
3903
0
            }
3904
88.9k
        }
3905
1.58M
        int opcode = instr->i_opcode;
3906
1.58M
        int oparg = instr->i_oparg;
3907
1.58M
        if (HAS_TARGET(opcode)) {
3908
109k
            oparg += offset;
3909
109k
        }
3910
1.58M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3911
0
            goto error;
3912
0
        }
3913
1.58M
    }
3914
21.0k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3915
0
        goto error;
3916
0
    }
3917
21.0k
    return g;
3918
0
error:
3919
0
    _PyCfgBuilder_Free(g);
3920
0
    return NULL;
3921
21.0k
}
3922
3923
int
3924
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3925
21.0k
{
3926
21.0k
    int lbl = 0;
3927
199k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3928
178k
        b->b_label = (jump_target_label){lbl};
3929
178k
        lbl += 1;
3930
178k
    }
3931
199k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3932
178k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3933
1.25M
        for (int i = 0; i < b->b_iused; i++) {
3934
1.07M
            cfg_instr *instr = &b->b_instr[i];
3935
1.07M
            if (HAS_TARGET(instr->i_opcode)) {
3936
                /* Set oparg to the label id (it will later be mapped to an offset) */
3937
72.9k
                instr->i_oparg = instr->i_target->b_label.id;
3938
72.9k
            }
3939
1.07M
            RETURN_IF_ERROR(
3940
1.07M
                _PyInstructionSequence_Addop(
3941
1.07M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3942
3943
1.07M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3944
1.07M
            if (instr->i_except != NULL) {
3945
326k
                hi->h_label = instr->i_except->b_label.id;
3946
326k
                hi->h_startdepth = instr->i_except->b_startdepth;
3947
326k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3948
326k
            }
3949
751k
            else {
3950
751k
                hi->h_label = -1;
3951
751k
            }
3952
1.07M
        }
3953
178k
    }
3954
21.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3955
0
        return ERROR;
3956
0
    }
3957
21.0k
    return SUCCESS;
3958
21.0k
}
3959
3960
3961
int
3962
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3963
                                     _PyCompile_CodeUnitMetadata *umd,
3964
                                     int *stackdepth, int *nlocalsplus,
3965
                                     _PyInstructionSequence *seq)
3966
21.0k
{
3967
21.0k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3968
3969
21.0k
    *stackdepth = calculate_stackdepth(g);
3970
21.0k
    if (*stackdepth < 0) {
3971
0
        return ERROR;
3972
0
    }
3973
3974
21.0k
    *nlocalsplus = prepare_localsplus(umd, g);
3975
21.0k
    if (*nlocalsplus < 0) {
3976
0
        return ERROR;
3977
0
    }
3978
3979
21.0k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3980
3981
    /* Order of basic blocks must have been determined by now */
3982
3983
21.0k
    RETURN_IF_ERROR(normalize_jumps(g));
3984
21.0k
    assert(no_redundant_jumps(g));
3985
3986
    /* Can't modify the bytecode after inserting instructions that produce
3987
     * borrowed references.
3988
     */
3989
21.0k
    RETURN_IF_ERROR(optimize_load_fast(g));
3990
3991
    /* Can't modify the bytecode after computing jump offsets. */
3992
21.0k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
3993
0
        return ERROR;
3994
0
    }
3995
3996
21.0k
    return SUCCESS;
3997
21.0k
}
3998
3999
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4000
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4001
 */
4002
int
4003
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4004
0
{
4005
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4006
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4007
0
    return SUCCESS;
4008
0
}
4009
4010
/* Exported API functions */
4011
4012
int
4013
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4014
0
{
4015
0
    stack_effects effs;
4016
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4017
0
        return PY_INVALID_STACK_EFFECT;
4018
0
    }
4019
0
    return effs.net;
4020
0
}
4021
4022
int
4023
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4024
2.23k
{
4025
2.23k
    stack_effects effs;
4026
2.23k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4027
0
        return PY_INVALID_STACK_EFFECT;
4028
0
    }
4029
2.23k
    return effs.net;
4030
2.23k
}
4031
4032
/* Access to compiler optimizations for unit tests.
4033
4034
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4035
 * a CFG, optimizes it and converts back to an instruction list.
4036
 */
4037
4038
static PyObject *
4039
cfg_to_instruction_sequence(cfg_builder *g)
4040
0
{
4041
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4042
0
    if (seq == NULL) {
4043
0
        return NULL;
4044
0
    }
4045
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4046
0
        PyInstructionSequence_Fini(seq);
4047
0
        return NULL;
4048
0
    }
4049
0
    return (PyObject*)seq;
4050
0
}
4051
4052
PyObject *
4053
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4054
0
{
4055
0
    if (!_PyInstructionSequence_Check(seq)) {
4056
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4057
0
        return NULL;
4058
0
    }
4059
0
    PyObject *const_cache = PyDict_New();
4060
0
    if (const_cache == NULL) {
4061
0
        return NULL;
4062
0
    }
4063
4064
0
    PyObject *res = NULL;
4065
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4066
0
    if (g == NULL) {
4067
0
        goto error;
4068
0
    }
4069
0
    int nparams = 0, firstlineno = 1;
4070
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4071
0
                                nparams, firstlineno) < 0) {
4072
0
        goto error;
4073
0
    }
4074
4075
0
    if (calculate_stackdepth(g) == ERROR) {
4076
0
        goto error;
4077
0
    }
4078
4079
0
    if (optimize_load_fast(g) != SUCCESS) {
4080
0
        goto error;
4081
0
    }
4082
4083
0
    res = cfg_to_instruction_sequence(g);
4084
0
error:
4085
0
    Py_DECREF(const_cache);
4086
0
    _PyCfgBuilder_Free(g);
4087
0
    return res;
4088
0
}