Coverage Report

Created: 2026-04-12 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Python/flowgraph.c
Line
Count
Source
1
#include "Python.h"
2
#include "opcode.h"
3
#include "pycore_c_array.h"       // _Py_CArray_EnsureCapacity
4
#include "pycore_flowgraph.h"
5
#include "pycore_compile.h"
6
#include "pycore_intrinsics.h"
7
#include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
8
#include "pycore_long.h"          // _PY_IS_SMALL_INT()
9
10
#include "pycore_opcode_utils.h"
11
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
12
13
#include <stdbool.h>
14
15
16
#undef SUCCESS
17
#undef ERROR
18
5.02M
#define SUCCESS 0
19
20.9k
#define ERROR -1
20
21
#define RETURN_IF_ERROR(X)  \
22
7.77M
    if ((X) == -1) {        \
23
0
        return ERROR;       \
24
0
    }
25
26
1.64M
#define DEFAULT_BLOCK_SIZE 16
27
28
typedef _Py_SourceLocation location;
29
typedef _PyJumpTargetLabel jump_target_label;
30
31
typedef struct _PyCfgInstruction {
32
    int i_opcode;
33
    int i_oparg;
34
    _Py_SourceLocation i_loc;
35
    struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
36
    struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
37
} cfg_instr;
38
39
typedef struct _PyCfgBasicblock {
40
    /* Each basicblock in a compilation unit is linked via b_list in the
41
       reverse order that the block are allocated.  b_list points to the next
42
       block in this list, not to be confused with b_next, which is next by
43
       control flow. */
44
    struct _PyCfgBasicblock *b_list;
45
    /* The label of this block if it is a jump target, -1 otherwise */
46
    _PyJumpTargetLabel b_label;
47
    /* Exception stack at start of block, used by assembler to create the exception handling table */
48
    struct _PyCfgExceptStack *b_exceptstack;
49
    /* pointer to an array of instructions, initially NULL */
50
    cfg_instr *b_instr;
51
    /* If b_next is non-NULL, it is a pointer to the next
52
       block reached by normal control flow. */
53
    struct _PyCfgBasicblock *b_next;
54
    /* number of instructions used */
55
    int b_iused;
56
    /* length of instruction array (b_instr) */
57
    int b_ialloc;
58
    /* Used by add_checks_for_loads_of_unknown_variables */
59
    uint64_t b_unsafe_locals_mask;
60
    /* Number of predecessors that a block has. */
61
    int b_predecessors;
62
    /* depth of stack upon entry of block, computed by stackdepth() */
63
    int b_startdepth;
64
    /* Basic block is an exception handler that preserves lasti */
65
    unsigned b_preserve_lasti : 1;
66
    /* Used by compiler passes to mark whether they have visited a basic block. */
67
    unsigned b_visited : 1;
68
    /* b_except_handler is used by the cold-detection algorithm to mark exception targets */
69
    unsigned b_except_handler : 1;
70
    /* b_cold is true if this block is not perf critical (like an exception handler) */
71
    unsigned b_cold : 1;
72
    /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */
73
    unsigned b_warm : 1;
74
} basicblock;
75
76
77
struct _PyCfgBuilder {
78
    /* The entryblock, at which control flow begins. All blocks of the
79
       CFG are reachable through the b_next links */
80
    struct _PyCfgBasicblock *g_entryblock;
81
    /* Pointer to the most recently allocated block.  By following
82
       b_list links, you can reach all allocated blocks. */
83
    struct _PyCfgBasicblock *g_block_list;
84
    /* pointer to the block currently being constructed */
85
    struct _PyCfgBasicblock *g_curblock;
86
    /* label for the next instruction to be placed */
87
    _PyJumpTargetLabel g_current_label;
88
};
89
90
typedef struct _PyCfgBuilder cfg_builder;
91
92
1.77M
#define SAME_LABEL(L1, L2) ((L1).id == (L2).id)
93
1.77M
#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL)))
94
95
#define LOCATION(LNO, END_LNO, COL, END_COL) \
96
    ((const _Py_SourceLocation){(LNO), (END_LNO), (COL), (END_COL)})
97
98
static inline int
99
is_block_push(cfg_instr *i)
100
6.80M
{
101
6.80M
    assert(OPCODE_HAS_ARG(i->i_opcode) || !IS_BLOCK_PUSH_OPCODE(i->i_opcode));
102
6.80M
    return IS_BLOCK_PUSH_OPCODE(i->i_opcode);
103
6.80M
}
104
105
static inline int
106
is_jump(cfg_instr *i)
107
5.91M
{
108
5.91M
    return OPCODE_HAS_JUMP(i->i_opcode);
109
5.91M
}
110
111
/* One arg*/
112
#define INSTR_SET_OP1(I, OP, ARG) \
113
393k
    do { \
114
393k
        assert(OPCODE_HAS_ARG(OP)); \
115
393k
        cfg_instr *_instr__ptr_ = (I); \
116
393k
        _instr__ptr_->i_opcode = (OP); \
117
393k
        _instr__ptr_->i_oparg = (ARG); \
118
393k
    } while (0);
119
120
/* No args*/
121
#define INSTR_SET_OP0(I, OP) \
122
638k
    do { \
123
638k
        assert(!OPCODE_HAS_ARG(OP)); \
124
638k
        cfg_instr *_instr__ptr_ = (I); \
125
638k
        _instr__ptr_->i_opcode = (OP); \
126
638k
        _instr__ptr_->i_oparg = 0; \
127
638k
    } while (0);
128
129
#define INSTR_SET_LOC(I, LOC) \
130
389k
    do { \
131
389k
        cfg_instr *_instr__ptr_ = (I); \
132
389k
        _instr__ptr_->i_loc = (LOC); \
133
389k
    } while (0);
134
135
/***** Blocks *****/
136
137
/* Returns the offset of the next instruction in the current block's
138
   b_instr array.  Resizes the b_instr as necessary.
139
   Returns -1 on failure.
140
*/
141
static int
142
basicblock_next_instr(basicblock *b)
143
1.64M
{
144
1.64M
    assert(b != NULL);
145
1.64M
    _Py_c_array_t array = {
146
1.64M
        .array = (void*)b->b_instr,
147
1.64M
        .allocated_entries = b->b_ialloc,
148
1.64M
        .item_size = sizeof(cfg_instr),
149
1.64M
        .initial_num_entries = DEFAULT_BLOCK_SIZE,
150
1.64M
    };
151
152
1.64M
    RETURN_IF_ERROR(_Py_CArray_EnsureCapacity(&array, b->b_iused + 1));
153
1.64M
    b->b_instr = array.array;
154
1.64M
    b->b_ialloc = array.allocated_entries;
155
1.64M
    return b->b_iused++;
156
1.64M
}
157
158
static cfg_instr *
159
6.26M
basicblock_last_instr(const basicblock *b) {
160
6.26M
    assert(b->b_iused >= 0);
161
6.26M
    if (b->b_iused > 0) {
162
5.81M
        assert(b->b_instr != NULL);
163
5.81M
        return &b->b_instr[b->b_iused - 1];
164
5.81M
    }
165
455k
    return NULL;
166
6.26M
}
167
168
/* Allocate a new block and return a pointer to it.
169
   Returns NULL on error.
170
*/
171
172
static basicblock *
173
cfg_builder_new_block(cfg_builder *g)
174
178k
{
175
178k
    basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock));
176
178k
    if (b == NULL) {
177
0
        PyErr_NoMemory();
178
0
        return NULL;
179
0
    }
180
    /* Extend the singly linked list of blocks with new block. */
181
178k
    b->b_list = g->g_block_list;
182
178k
    g->g_block_list = b;
183
178k
    b->b_label = NO_LABEL;
184
178k
    return b;
185
178k
}
186
187
static int
188
basicblock_addop(basicblock *b, int opcode, int oparg, location loc)
189
1.62M
{
190
1.62M
    assert(IS_WITHIN_OPCODE_RANGE(opcode));
191
1.62M
    assert(!IS_ASSEMBLER_OPCODE(opcode));
192
1.62M
    assert(OPCODE_HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0);
193
1.62M
    assert(0 <= oparg && oparg < (1 << 30));
194
195
1.62M
    int off = basicblock_next_instr(b);
196
1.62M
    if (off < 0) {
197
0
        return ERROR;
198
0
    }
199
1.62M
    cfg_instr *i = &b->b_instr[off];
200
1.62M
    i->i_opcode = opcode;
201
1.62M
    i->i_oparg = oparg;
202
1.62M
    i->i_loc = loc;
203
    // memory is already zero initialized
204
1.62M
    assert(i->i_target == NULL);
205
1.62M
    assert(i->i_except == NULL);
206
207
1.62M
    return SUCCESS;
208
1.62M
}
209
210
static int
211
basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc)
212
203
{
213
203
    cfg_instr *last = basicblock_last_instr(b);
214
203
    if (last && is_jump(last)) {
215
0
        return ERROR;
216
0
    }
217
218
203
    RETURN_IF_ERROR(
219
203
        basicblock_addop(b, opcode, target->b_label.id, loc));
220
203
    last = basicblock_last_instr(b);
221
203
    assert(last && last->i_opcode == opcode);
222
203
    last->i_target = target;
223
203
    return SUCCESS;
224
203
}
225
226
static inline int
227
basicblock_append_instructions(basicblock *to, basicblock *from)
228
3.75k
{
229
12.2k
    for (int i = 0; i < from->b_iused; i++) {
230
8.54k
        int n = basicblock_next_instr(to);
231
8.54k
        if (n < 0) {
232
0
            return ERROR;
233
0
        }
234
8.54k
        to->b_instr[n] = from->b_instr[i];
235
8.54k
    }
236
3.75k
    return SUCCESS;
237
3.75k
}
238
239
static inline int
240
1.78M
basicblock_nofallthrough(const basicblock *b) {
241
1.78M
    cfg_instr *last = basicblock_last_instr(b);
242
1.78M
    return (last &&
243
1.70M
            (IS_SCOPE_EXIT_OPCODE(last->i_opcode) ||
244
1.30M
             IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)));
245
1.78M
}
246
247
#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B))
248
2.75M
#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B))
249
250
static basicblock *
251
copy_basicblock(cfg_builder *g, basicblock *block)
252
936
{
253
    /* Cannot copy a block if it has a fallthrough, since
254
     * a block can only have one fallthrough predecessor.
255
     */
256
936
    assert(BB_NO_FALLTHROUGH(block));
257
936
    basicblock *result = cfg_builder_new_block(g);
258
936
    if (result == NULL) {
259
0
        return NULL;
260
0
    }
261
936
    if (basicblock_append_instructions(result, block) < 0) {
262
0
        return NULL;
263
0
    }
264
936
    return result;
265
936
}
266
267
static int
268
11.2k
basicblock_insert_instruction(basicblock *block, int pos, cfg_instr *instr) {
269
11.2k
    RETURN_IF_ERROR(basicblock_next_instr(block));
270
164k
    for (int i = block->b_iused - 1; i > pos; i--) {
271
153k
        block->b_instr[i] = block->b_instr[i-1];
272
153k
    }
273
11.2k
    block->b_instr[pos] = *instr;
274
11.2k
    return SUCCESS;
275
11.2k
}
276
277
/* For debugging purposes only */
278
#if 0
279
static void
280
dump_instr(cfg_instr *i)
281
{
282
    const char *jump = is_jump(i) ? "jump " : "";
283
284
    char arg[128];
285
286
    *arg = '\0';
287
    if (OPCODE_HAS_ARG(i->i_opcode)) {
288
        sprintf(arg, "arg: %d ", i->i_oparg);
289
    }
290
    if (HAS_TARGET(i->i_opcode)) {
291
        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
292
    }
293
    fprintf(stderr, "line: %d, %s (%d)  %s%s\n",
294
                    i->i_loc.lineno, _PyOpcode_OpName[i->i_opcode], i->i_opcode, arg, jump);
295
}
296
297
static inline int
298
basicblock_returns(const basicblock *b) {
299
    cfg_instr *last = basicblock_last_instr(b);
300
    return last && IS_RETURN_OPCODE(last->i_opcode);
301
}
302
303
static void
304
dump_basicblock(const basicblock *b, bool highlight)
305
{
306
    const char *b_return = basicblock_returns(b) ? "return " : "";
307
    if (highlight) {
308
        fprintf(stderr, ">>> ");
309
    }
310
    fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, preds: %d %s\n",
311
        b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
312
        b->b_startdepth, b->b_predecessors, b_return);
313
    int depth = b->b_startdepth;
314
    if (b->b_instr) {
315
        int i;
316
        for (i = 0; i < b->b_iused; i++) {
317
            fprintf(stderr, "  [%02d] depth: %d ", i, depth);
318
            dump_instr(b->b_instr + i);
319
320
            int popped = _PyOpcode_num_popped(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
321
            int pushed = _PyOpcode_num_pushed(b->b_instr[i].i_opcode, b->b_instr[i].i_oparg);
322
            depth += (pushed - popped);
323
        }
324
    }
325
}
326
327
void
328
_PyCfgBuilder_DumpGraph(const basicblock *entryblock, const basicblock *mark)
329
{
330
    for (const basicblock *b = entryblock; b != NULL; b = b->b_next) {
331
        dump_basicblock(b, b == mark);
332
    }
333
}
334
335
#endif
336
337
338
/***** CFG construction and modification *****/
339
340
static basicblock *
341
cfg_builder_use_next_block(cfg_builder *g, basicblock *block)
342
149k
{
343
149k
    assert(block != NULL);
344
149k
    g->g_curblock->b_next = block;
345
149k
    g->g_curblock = block;
346
149k
    return block;
347
149k
}
348
349
static inline int
350
384k
basicblock_exits_scope(const basicblock *b) {
351
384k
    cfg_instr *last = basicblock_last_instr(b);
352
384k
    return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode);
353
384k
}
354
355
static inline int
356
280k
basicblock_has_eval_break(const basicblock *b) {
357
1.57M
    for (int i = 0; i < b->b_iused; i++) {
358
1.38M
        if (OPCODE_HAS_EVAL_BREAK(b->b_instr[i].i_opcode)) {
359
86.5k
            return true;
360
86.5k
        }
361
1.38M
    }
362
193k
    return false;
363
280k
}
364
365
static bool
366
cfg_builder_current_block_is_terminated(cfg_builder *g)
367
1.68M
{
368
1.68M
    cfg_instr *last = basicblock_last_instr(g->g_curblock);
369
1.68M
    if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) {
370
109k
        return true;
371
109k
    }
372
1.57M
    if (IS_LABEL(g->g_current_label)) {
373
39.2k
        if (last || IS_LABEL(g->g_curblock->b_label)) {
374
39.2k
            return true;
375
39.2k
        }
376
0
        else {
377
            /* current block is empty, label it */
378
0
            g->g_curblock->b_label = g->g_current_label;
379
0
            g->g_current_label = NO_LABEL;
380
0
        }
381
39.2k
    }
382
1.53M
    return false;
383
1.57M
}
384
385
static int
386
cfg_builder_maybe_start_new_block(cfg_builder *g)
387
1.68M
{
388
1.68M
    if (cfg_builder_current_block_is_terminated(g)) {
389
149k
        basicblock *b = cfg_builder_new_block(g);
390
149k
        if (b == NULL) {
391
0
            return ERROR;
392
0
        }
393
149k
        b->b_label = g->g_current_label;
394
149k
        g->g_current_label = NO_LABEL;
395
149k
        cfg_builder_use_next_block(g, b);
396
149k
    }
397
1.68M
    return SUCCESS;
398
1.68M
}
399
400
#ifndef NDEBUG
401
static bool
402
cfg_builder_check(cfg_builder *g)
403
42.0k
{
404
42.0k
    assert(g->g_entryblock->b_iused > 0);
405
390k
    for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) {
406
348k
        assert(!_PyMem_IsPtrFreed(block));
407
348k
        if (block->b_instr != NULL) {
408
348k
            assert(block->b_ialloc > 0);
409
348k
            assert(block->b_iused >= 0);
410
348k
            assert(block->b_ialloc >= block->b_iused);
411
348k
        }
412
0
        else {
413
0
            assert (block->b_iused == 0);
414
0
            assert (block->b_ialloc == 0);
415
0
        }
416
348k
    }
417
42.0k
    return true;
418
42.0k
}
419
#endif
420
421
static int
422
init_cfg_builder(cfg_builder *g)
423
21.0k
{
424
21.0k
    g->g_block_list = NULL;
425
21.0k
    basicblock *block = cfg_builder_new_block(g);
426
21.0k
    if (block == NULL) {
427
0
        return ERROR;
428
0
    }
429
21.0k
    g->g_curblock = g->g_entryblock = block;
430
21.0k
    g->g_current_label = NO_LABEL;
431
21.0k
    return SUCCESS;
432
21.0k
}
433
434
cfg_builder *
435
_PyCfgBuilder_New(void)
436
21.0k
{
437
21.0k
    cfg_builder *g = PyMem_Malloc(sizeof(cfg_builder));
438
21.0k
    if (g == NULL) {
439
0
        PyErr_NoMemory();
440
0
        return NULL;
441
0
    }
442
21.0k
    memset(g, 0, sizeof(cfg_builder));
443
21.0k
    if (init_cfg_builder(g) < 0) {
444
0
        PyMem_Free(g);
445
0
        return NULL;
446
0
    }
447
21.0k
    return g;
448
21.0k
}
449
450
void
451
_PyCfgBuilder_Free(cfg_builder *g)
452
21.0k
{
453
21.0k
    if (g == NULL) {
454
0
        return;
455
0
    }
456
21.0k
    assert(cfg_builder_check(g));
457
21.0k
    basicblock *b = g->g_block_list;
458
199k
    while (b != NULL) {
459
178k
        if (b->b_instr) {
460
178k
            PyMem_Free((void *)b->b_instr);
461
178k
        }
462
178k
        basicblock *next = b->b_list;
463
178k
        PyMem_Free((void *)b);
464
178k
        b = next;
465
178k
    }
466
21.0k
    PyMem_Free(g);
467
21.0k
}
468
469
int
470
_PyCfgBuilder_CheckSize(cfg_builder *g)
471
21.0k
{
472
21.0k
    int nblocks = 0;
473
191k
    for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) {
474
170k
        nblocks++;
475
170k
    }
476
21.0k
    if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
477
0
        PyErr_NoMemory();
478
0
        return ERROR;
479
0
    }
480
21.0k
    return SUCCESS;
481
21.0k
}
482
483
int
484
_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl)
485
88.9k
{
486
88.9k
    g->g_current_label = lbl;
487
88.9k
    return cfg_builder_maybe_start_new_block(g);
488
88.9k
}
489
490
int
491
_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc)
492
1.59M
{
493
1.59M
    RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g));
494
1.59M
    return basicblock_addop(g->g_curblock, opcode, oparg, loc);
495
1.59M
}
496
497
498
static basicblock *
499
next_nonempty_block(basicblock *b)
500
607k
{
501
678k
    while (b && b->b_iused == 0) {
502
70.9k
        b = b->b_next;
503
70.9k
    }
504
607k
    return b;
505
607k
}
506
507
/***** debugging helpers *****/
508
509
#ifndef NDEBUG
510
static int remove_redundant_nops(cfg_builder *g);
511
512
static bool
513
21.0k
no_redundant_nops(cfg_builder *g) {
514
21.0k
    if (remove_redundant_nops(g) != 0) {
515
0
        return false;
516
0
    }
517
21.0k
    return true;
518
21.0k
}
519
520
static bool
521
42.0k
no_redundant_jumps(cfg_builder *g) {
522
391k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
523
349k
        cfg_instr *last = basicblock_last_instr(b);
524
349k
        if (last != NULL) {
525
302k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
526
61.3k
                basicblock *next = next_nonempty_block(b->b_next);
527
61.3k
                basicblock *jump_target = next_nonempty_block(last->i_target);
528
61.3k
                if (jump_target == next) {
529
0
                    assert(next);
530
0
                    if (last->i_loc.lineno == next->b_instr[0].i_loc.lineno) {
531
0
                        assert(0);
532
0
                        return false;
533
0
                    }
534
0
                }
535
61.3k
            }
536
302k
        }
537
349k
    }
538
42.0k
    return true;
539
42.0k
}
540
#endif
541
542
/***** CFG preprocessing (jump targets and exceptions) *****/
543
544
static int
545
178k
normalize_jumps_in_block(cfg_builder *g, basicblock *b) {
546
178k
    cfg_instr *last = basicblock_last_instr(b);
547
178k
    if (last == NULL || !IS_CONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
548
153k
        return SUCCESS;
549
153k
    }
550
178k
    assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
551
552
24.6k
    bool is_forward = last->i_target->b_visited == 0;
553
24.6k
    if (is_forward) {
554
24.6k
        RETURN_IF_ERROR(
555
24.6k
            basicblock_addop(b, NOT_TAKEN, 0, last->i_loc));
556
24.6k
        return SUCCESS;
557
24.6k
    }
558
559
15
    int reversed_opcode = 0;
560
15
    switch(last->i_opcode) {
561
0
        case POP_JUMP_IF_NOT_NONE:
562
0
            reversed_opcode = POP_JUMP_IF_NONE;
563
0
            break;
564
0
        case POP_JUMP_IF_NONE:
565
0
            reversed_opcode = POP_JUMP_IF_NOT_NONE;
566
0
            break;
567
15
        case POP_JUMP_IF_FALSE:
568
15
            reversed_opcode = POP_JUMP_IF_TRUE;
569
15
            break;
570
0
        case POP_JUMP_IF_TRUE:
571
0
            reversed_opcode = POP_JUMP_IF_FALSE;
572
0
            break;
573
15
    }
574
    /* transform 'conditional jump T' to
575
     * 'reversed_jump b_next' followed by 'jump_backwards T'
576
     */
577
578
15
    basicblock *target = last->i_target;
579
15
    basicblock *backwards_jump = cfg_builder_new_block(g);
580
15
    if (backwards_jump == NULL) {
581
0
        return ERROR;
582
0
    }
583
15
    RETURN_IF_ERROR(
584
15
        basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc));
585
15
    RETURN_IF_ERROR(
586
15
        basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc));
587
15
    backwards_jump->b_startdepth = target->b_startdepth;
588
15
    last->i_opcode = reversed_opcode;
589
15
    last->i_target = b->b_next;
590
591
15
    backwards_jump->b_cold = b->b_cold;
592
15
    backwards_jump->b_next = b->b_next;
593
15
    b->b_next = backwards_jump;
594
15
    return SUCCESS;
595
15
}
596
597
598
static int
599
normalize_jumps(cfg_builder *g)
600
21.0k
{
601
21.0k
    basicblock *entryblock = g->g_entryblock;
602
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
603
178k
        b->b_visited = 0;
604
178k
    }
605
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
606
178k
        b->b_visited = 1;
607
178k
        RETURN_IF_ERROR(normalize_jumps_in_block(g, b));
608
178k
    }
609
21.0k
    return SUCCESS;
610
21.0k
}
611
612
static int
613
21.0k
check_cfg(cfg_builder *g) {
614
191k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
615
        /* Raise SystemError if jump or exit is not last instruction in the block. */
616
1.76M
        for (int i = 0; i < b->b_iused; i++) {
617
1.59M
            int opcode = b->b_instr[i].i_opcode;
618
1.59M
            assert(!IS_ASSEMBLER_OPCODE(opcode));
619
1.59M
            if (IS_TERMINATOR_OPCODE(opcode)) {
620
130k
                if (i != b->b_iused - 1) {
621
0
                    PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
622
0
                    return ERROR;
623
0
                }
624
130k
            }
625
1.59M
        }
626
170k
    }
627
21.0k
    return SUCCESS;
628
21.0k
}
629
630
static int
631
get_max_label(basicblock *entryblock)
632
75.0k
{
633
75.0k
    int lbl = -1;
634
755k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
635
680k
        if (b->b_label.id > lbl) {
636
301k
            lbl = b->b_label.id;
637
301k
        }
638
680k
    }
639
75.0k
    return lbl;
640
75.0k
}
641
642
/* Calculate the actual jump target from the target_label */
643
static int
644
translate_jump_labels_to_targets(basicblock *entryblock)
645
21.0k
{
646
21.0k
    int max_label = get_max_label(entryblock);
647
21.0k
    size_t mapsize = sizeof(basicblock *) * (max_label + 1);
648
21.0k
    basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize);
649
21.0k
    if (!label2block) {
650
0
        PyErr_NoMemory();
651
0
        return ERROR;
652
0
    }
653
21.0k
    memset(label2block, 0, mapsize);
654
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
655
170k
        if (b->b_label.id >= 0) {
656
88.9k
            label2block[b->b_label.id] = b;
657
88.9k
        }
658
170k
    }
659
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
660
1.76M
        for (int i = 0; i < b->b_iused; i++) {
661
1.59M
            cfg_instr *instr = &b->b_instr[i];
662
1.59M
            assert(instr->i_target == NULL);
663
1.59M
            if (HAS_TARGET(instr->i_opcode)) {
664
109k
                int lbl = instr->i_oparg;
665
109k
                assert(lbl >= 0 && lbl <= max_label);
666
109k
                instr->i_target = label2block[lbl];
667
109k
                assert(instr->i_target != NULL);
668
109k
                assert(instr->i_target->b_label.id == lbl);
669
109k
            }
670
1.59M
        }
671
170k
    }
672
21.0k
    PyMem_Free(label2block);
673
21.0k
    return SUCCESS;
674
21.0k
}
675
676
static int
677
21.0k
mark_except_handlers(basicblock *entryblock) {
678
21.0k
#ifndef NDEBUG
679
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
680
170k
        assert(!b->b_except_handler);
681
170k
    }
682
21.0k
#endif
683
191k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
684
1.76M
        for (int i=0; i < b->b_iused; i++) {
685
1.59M
            cfg_instr *instr = &b->b_instr[i];
686
1.59M
            if (is_block_push(instr)) {
687
27.7k
                instr->i_target->b_except_handler = 1;
688
27.7k
            }
689
1.59M
        }
690
170k
    }
691
21.0k
    return SUCCESS;
692
21.0k
}
693
694
695
struct _PyCfgExceptStack {
696
    basicblock *handlers[CO_MAXBLOCKS+2];
697
    int depth;
698
};
699
700
701
static basicblock *
702
27.5k
push_except_block(struct _PyCfgExceptStack *stack, cfg_instr *setup) {
703
27.5k
    assert(is_block_push(setup));
704
27.5k
    int opcode = setup->i_opcode;
705
27.5k
    basicblock * target = setup->i_target;
706
27.5k
    if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
707
10.8k
        target->b_preserve_lasti = 1;
708
10.8k
    }
709
27.5k
    assert(stack->depth <= CO_MAXBLOCKS);
710
27.5k
    stack->handlers[++stack->depth] = target;
711
27.5k
    return target;
712
27.5k
}
713
714
static basicblock *
715
28.2k
pop_except_block(struct _PyCfgExceptStack *stack) {
716
28.2k
    assert(stack->depth > 0);
717
28.2k
    return stack->handlers[--stack->depth];
718
28.2k
}
719
720
static basicblock *
721
158k
except_stack_top(struct _PyCfgExceptStack *stack) {
722
158k
    return stack->handlers[stack->depth];
723
158k
}
724
725
static struct _PyCfgExceptStack *
726
21.0k
make_except_stack(void) {
727
21.0k
    struct _PyCfgExceptStack *new = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
728
21.0k
    if (new == NULL) {
729
0
        PyErr_NoMemory();
730
0
        return NULL;
731
0
    }
732
21.0k
    new->depth = 0;
733
21.0k
    new->handlers[0] = NULL;
734
21.0k
    return new;
735
21.0k
}
736
737
static struct _PyCfgExceptStack *
738
61.7k
copy_except_stack(struct _PyCfgExceptStack *stack) {
739
61.7k
    struct _PyCfgExceptStack *copy = PyMem_Malloc(sizeof(struct _PyCfgExceptStack));
740
61.7k
    if (copy == NULL) {
741
0
        PyErr_NoMemory();
742
0
        return NULL;
743
0
    }
744
61.7k
    memcpy(copy, stack, sizeof(struct _PyCfgExceptStack));
745
61.7k
    return copy;
746
61.7k
}
747
748
static basicblock**
749
137k
make_cfg_traversal_stack(basicblock *entryblock) {
750
137k
    int nblocks = 0;
751
1.44M
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
752
1.30M
        b->b_visited = 0;
753
1.30M
        nblocks++;
754
1.30M
    }
755
137k
    basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks);
756
137k
    if (!stack) {
757
0
        PyErr_NoMemory();
758
0
    }
759
137k
    return stack;
760
137k
}
761
762
/* Compute the stack effects of opcode with argument oparg.
763
764
   Some opcodes have different stack effect when jump to the target and
765
   when not jump. The 'jump' parameter specifies the case:
766
767
   * 0 -- when not jump
768
   * 1 -- when jump
769
   * -1 -- maximal
770
 */
771
typedef struct {
772
    /* The stack effect of the instruction. */
773
    int net;
774
} stack_effects;
775
776
Py_LOCAL(int)
777
get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects)
778
1.18M
{
779
1.18M
    if (opcode < 0) {
780
0
        return -1;
781
0
    }
782
1.18M
    if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) {
783
        // Specialized instructions are not supported.
784
0
        return -1;
785
0
    }
786
1.18M
    int popped = _PyOpcode_num_popped(opcode, oparg);
787
1.18M
    int pushed = _PyOpcode_num_pushed(opcode, oparg);
788
1.18M
    if (popped < 0 || pushed < 0) {
789
0
        return -1;
790
0
    }
791
1.18M
    if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) {
792
27.5k
        effects->net = 0;
793
27.5k
        return 0;
794
27.5k
    }
795
1.16M
    effects->net = pushed - popped;
796
1.16M
    return 0;
797
1.18M
}
798
799
Py_LOCAL_INLINE(int)
800
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
801
215k
{
802
215k
    if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
803
0
        PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
804
0
        return ERROR;
805
0
    }
806
215k
    if (b->b_startdepth < depth && b->b_startdepth < 100) {
807
165k
        assert(b->b_startdepth < 0);
808
165k
        b->b_startdepth = depth;
809
165k
        *(*sp)++ = b;
810
165k
    }
811
215k
    return SUCCESS;
812
215k
}
813
814
/* Find the flow path that needs the largest stack.  We assume that
815
 * cycles in the flow graph have no net effect on the stack depth.
816
 */
817
static int
818
calculate_stackdepth(cfg_builder *g)
819
21.0k
{
820
21.0k
    basicblock *entryblock = g->g_entryblock;
821
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
822
178k
        b->b_startdepth = INT_MIN;
823
178k
    }
824
21.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
825
21.0k
    if (!stack) {
826
0
        return ERROR;
827
0
    }
828
829
830
21.0k
    int stackdepth = -1;
831
21.0k
    int maxdepth = 0;
832
21.0k
    basicblock **sp = stack;
833
21.0k
    if (stackdepth_push(&sp, entryblock, 0) < 0) {
834
0
        goto error;
835
0
    }
836
186k
    while (sp != stack) {
837
165k
        basicblock *b = *--sp;
838
165k
        int depth = b->b_startdepth;
839
165k
        assert(depth >= 0);
840
165k
        basicblock *next = b->b_next;
841
1.18M
        for (int i = 0; i < b->b_iused; i++) {
842
1.08M
            cfg_instr *instr = &b->b_instr[i];
843
1.08M
            stack_effects effects;
844
1.08M
            if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) {
845
0
                PyErr_Format(PyExc_SystemError,
846
0
                             "Invalid stack effect for opcode=%d, arg=%i",
847
0
                             instr->i_opcode, instr->i_oparg);
848
0
                goto error;
849
0
            }
850
1.08M
            int new_depth = depth + effects.net;
851
1.08M
            if (new_depth < 0) {
852
0
                PyErr_Format(PyExc_ValueError,
853
0
                             "Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
854
0
                goto error;
855
0
            }
856
1.08M
            maxdepth = Py_MAX(maxdepth, depth);
857
1.08M
            if (HAS_TARGET(instr->i_opcode) && instr->i_opcode != END_ASYNC_FOR) {
858
100k
                if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) {
859
0
                    PyErr_Format(PyExc_SystemError,
860
0
                                 "Invalid stack effect for opcode=%d, arg=%i",
861
0
                                 instr->i_opcode, instr->i_oparg);
862
0
                    goto error;
863
0
                }
864
100k
                int target_depth = depth + effects.net;
865
100k
                assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
866
100k
                maxdepth = Py_MAX(maxdepth, depth);
867
100k
                if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
868
0
                    goto error;
869
0
                }
870
100k
            }
871
1.08M
            depth = new_depth;
872
1.08M
            assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
873
1.08M
            if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) ||
874
1.05M
                IS_SCOPE_EXIT_OPCODE(instr->i_opcode))
875
72.0k
            {
876
                /* remaining code is dead */
877
72.0k
                next = NULL;
878
72.0k
                break;
879
72.0k
            }
880
1.08M
        }
881
165k
        if (next != NULL) {
882
93.8k
            assert(BB_HAS_FALLTHROUGH(b));
883
93.8k
            if (stackdepth_push(&sp, next, depth) < 0) {
884
0
                goto error;
885
0
            }
886
93.8k
        }
887
165k
    }
888
21.0k
    stackdepth = maxdepth;
889
21.0k
error:
890
21.0k
    PyMem_Free(stack);
891
21.0k
    return stackdepth;
892
21.0k
}
893
894
static int
895
21.0k
label_exception_targets(basicblock *entryblock) {
896
21.0k
    basicblock **todo_stack = make_cfg_traversal_stack(entryblock);
897
21.0k
    if (todo_stack == NULL) {
898
0
        return ERROR;
899
0
    }
900
21.0k
    struct _PyCfgExceptStack *except_stack = make_except_stack();
901
21.0k
    if (except_stack == NULL) {
902
0
        PyMem_Free(todo_stack);
903
0
        PyErr_NoMemory();
904
0
        return ERROR;
905
0
    }
906
21.0k
    except_stack->depth = 0;
907
21.0k
    todo_stack[0] = entryblock;
908
21.0k
    entryblock->b_visited = 1;
909
21.0k
    entryblock->b_exceptstack = except_stack;
910
21.0k
    basicblock **todo = &todo_stack[1];
911
21.0k
    basicblock *handler = NULL;
912
179k
    while (todo > todo_stack) {
913
158k
        todo--;
914
158k
        basicblock *b = todo[0];
915
158k
        assert(b->b_visited == 1);
916
158k
        except_stack = b->b_exceptstack;
917
158k
        assert(except_stack != NULL);
918
158k
        b->b_exceptstack = NULL;
919
158k
        handler = except_stack_top(except_stack);
920
158k
        int last_yield_except_depth = -1;
921
1.73M
        for (int i = 0; i < b->b_iused; i++) {
922
1.57M
            cfg_instr *instr = &b->b_instr[i];
923
1.57M
            if (is_block_push(instr)) {
924
27.5k
                if (!instr->i_target->b_visited) {
925
27.5k
                    struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
926
27.5k
                    if (copy == NULL) {
927
0
                        goto error;
928
0
                    }
929
27.5k
                    instr->i_target->b_exceptstack = copy;
930
27.5k
                    todo[0] = instr->i_target;
931
27.5k
                    instr->i_target->b_visited = 1;
932
27.5k
                    todo++;
933
27.5k
                }
934
27.5k
                handler = push_except_block(except_stack, instr);
935
27.5k
            }
936
1.54M
            else if (instr->i_opcode == POP_BLOCK) {
937
28.2k
                handler = pop_except_block(except_stack);
938
28.2k
                INSTR_SET_OP0(instr, NOP);
939
28.2k
            }
940
1.51M
            else if (is_jump(instr)) {
941
80.9k
                instr->i_except = handler;
942
80.9k
                assert(i == b->b_iused -1);
943
80.9k
                if (!instr->i_target->b_visited) {
944
44.0k
                    if (BB_HAS_FALLTHROUGH(b)) {
945
34.1k
                        struct _PyCfgExceptStack *copy = copy_except_stack(except_stack);
946
34.1k
                        if (copy == NULL) {
947
0
                            goto error;
948
0
                        }
949
34.1k
                        instr->i_target->b_exceptstack = copy;
950
34.1k
                    }
951
9.95k
                    else {
952
9.95k
                        instr->i_target->b_exceptstack = except_stack;
953
9.95k
                        except_stack = NULL;
954
9.95k
                    }
955
44.0k
                    todo[0] = instr->i_target;
956
44.0k
                    instr->i_target->b_visited = 1;
957
44.0k
                    todo++;
958
44.0k
                }
959
80.9k
            }
960
1.43M
            else if (instr->i_opcode == YIELD_VALUE) {
961
14.9k
                instr->i_except = handler;
962
14.9k
                last_yield_except_depth = except_stack->depth;
963
14.9k
            }
964
1.42M
            else if (instr->i_opcode == RESUME) {
965
35.9k
                instr->i_except = handler;
966
35.9k
                if (instr->i_oparg != RESUME_AT_FUNC_START) {
967
14.9k
                    assert(last_yield_except_depth >= 0);
968
14.9k
                    if (last_yield_except_depth == 1) {
969
40
                        instr->i_oparg |= RESUME_OPARG_DEPTH1_MASK;
970
40
                    }
971
14.9k
                    last_yield_except_depth = -1;
972
14.9k
                }
973
35.9k
            }
974
1.38M
            else if (instr->i_opcode == RETURN_GENERATOR) {
975
845
                instr->i_except = NULL;
976
845
            }
977
1.38M
            else {
978
1.38M
                instr->i_except = handler;
979
1.38M
            }
980
1.57M
        }
981
158k
        if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) {
982
65.4k
            assert(except_stack != NULL);
983
65.4k
            b->b_next->b_exceptstack = except_stack;
984
65.4k
            todo[0] = b->b_next;
985
65.4k
            b->b_next->b_visited = 1;
986
65.4k
            todo++;
987
65.4k
        }
988
92.6k
        else if (except_stack != NULL) {
989
82.7k
           PyMem_Free(except_stack);
990
82.7k
        }
991
158k
    }
992
#ifdef Py_DEBUG
993
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
994
        assert(b->b_exceptstack == NULL);
995
    }
996
#endif
997
21.0k
    PyMem_Free(todo_stack);
998
21.0k
    return SUCCESS;
999
0
error:
1000
0
    PyMem_Free(todo_stack);
1001
0
    PyMem_Free(except_stack);
1002
0
    return ERROR;
1003
21.0k
}
1004
1005
/***** CFG optimizations *****/
1006
1007
static int
1008
42.0k
remove_unreachable(basicblock *entryblock) {
1009
383k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1010
341k
        b->b_predecessors = 0;
1011
341k
    }
1012
42.0k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
1013
42.0k
    if (stack == NULL) {
1014
0
        return ERROR;
1015
0
    }
1016
42.0k
    basicblock **sp = stack;
1017
42.0k
    entryblock->b_predecessors = 1;
1018
42.0k
    *sp++ = entryblock;
1019
42.0k
    entryblock->b_visited = 1;
1020
347k
    while (sp > stack) {
1021
305k
        basicblock *b = *(--sp);
1022
305k
        if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
1023
166k
            if (!b->b_next->b_visited) {
1024
130k
                assert(b->b_next->b_predecessors == 0);
1025
130k
                *sp++ = b->b_next;
1026
130k
                b->b_next->b_visited = 1;
1027
130k
            }
1028
166k
            b->b_next->b_predecessors++;
1029
166k
        }
1030
2.96M
        for (int i = 0; i < b->b_iused; i++) {
1031
2.66M
            basicblock *target;
1032
2.66M
            cfg_instr *instr = &b->b_instr[i];
1033
2.66M
            if (is_jump(instr) || is_block_push(instr)) {
1034
204k
                target = instr->i_target;
1035
204k
                if (!target->b_visited) {
1036
133k
                    *sp++ = target;
1037
133k
                    target->b_visited = 1;
1038
133k
                }
1039
204k
                target->b_predecessors++;
1040
204k
            }
1041
2.66M
        }
1042
305k
    }
1043
42.0k
    PyMem_Free(stack);
1044
1045
    /* Delete unreachable instructions */
1046
383k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1047
341k
       if (b->b_predecessors == 0) {
1048
35.8k
            b->b_iused = 0;
1049
35.8k
            b->b_except_handler = 0;
1050
35.8k
       }
1051
341k
    }
1052
42.0k
    return SUCCESS;
1053
42.0k
}
1054
1055
static int
1056
1.29M
basicblock_remove_redundant_nops(basicblock *bb) {
1057
    /* Remove NOPs when legal to do so. */
1058
1.29M
    int dest = 0;
1059
1.29M
    int prev_lineno = -1;
1060
8.67M
    for (int src = 0; src < bb->b_iused; src++) {
1061
7.38M
        int lineno = bb->b_instr[src].i_loc.lineno;
1062
7.38M
        if (bb->b_instr[src].i_opcode == NOP) {
1063
            /* Eliminate no-op if it doesn't have a line number */
1064
475k
            if (lineno < 0) {
1065
391k
                continue;
1066
391k
            }
1067
            /* or, if the previous instruction had the same line number. */
1068
84.2k
            if (prev_lineno == lineno) {
1069
45.0k
                continue;
1070
45.0k
            }
1071
            /* or, if the next instruction has same line number or no line number */
1072
39.1k
            if (src < bb->b_iused - 1) {
1073
34.4k
                int next_lineno = bb->b_instr[src+1].i_loc.lineno;
1074
34.4k
                if (next_lineno == lineno) {
1075
29.2k
                    continue;
1076
29.2k
                }
1077
5.25k
                if (next_lineno < 0) {
1078
533
                    bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc;
1079
533
                    continue;
1080
533
                }
1081
5.25k
            }
1082
4.66k
            else {
1083
4.66k
                basicblock *next = next_nonempty_block(bb->b_next);
1084
                /* or if last instruction in BB and next BB has same line number */
1085
4.66k
                if (next) {
1086
4.66k
                    location next_loc = NO_LOCATION;
1087
4.94k
                    for (int next_i=0; next_i < next->b_iused; next_i++) {
1088
4.93k
                        cfg_instr *instr = &next->b_instr[next_i];
1089
4.93k
                        if (instr->i_opcode == NOP && instr->i_loc.lineno < 0) {
1090
                            /* Skip over NOPs without a location, they will be removed */
1091
279
                            continue;
1092
279
                        }
1093
4.65k
                        next_loc = instr->i_loc;
1094
4.65k
                        break;
1095
4.93k
                    }
1096
4.66k
                    if (lineno == next_loc.lineno) {
1097
863
                        continue;
1098
863
                    }
1099
4.66k
                }
1100
4.66k
            }
1101
1102
39.1k
        }
1103
6.91M
        if (dest != src) {
1104
590k
            bb->b_instr[dest] = bb->b_instr[src];
1105
590k
        }
1106
6.91M
        dest++;
1107
6.91M
        prev_lineno = lineno;
1108
6.91M
    }
1109
1.29M
    assert(dest <= bb->b_iused);
1110
1.29M
    int num_removed = bb->b_iused - dest;
1111
1.29M
    bb->b_iused = dest;
1112
1.29M
    memset(&bb->b_instr[dest], 0, sizeof(cfg_instr) * num_removed);
1113
1.29M
    return num_removed;
1114
1.29M
}
1115
1116
static int
1117
87.2k
remove_redundant_nops(cfg_builder *g) {
1118
87.2k
    int changes = 0;
1119
1.18M
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1120
1.09M
        int change = basicblock_remove_redundant_nops(b);
1121
1.09M
        RETURN_IF_ERROR(change);
1122
1.09M
        changes += change;
1123
1.09M
    }
1124
87.2k
    return changes;
1125
87.2k
}
1126
1127
static int
1128
remove_redundant_nops_and_pairs(basicblock *entryblock)
1129
21.0k
{
1130
21.0k
    bool done = false;
1131
1132
42.4k
    while (! done) {
1133
21.4k
        done = true;
1134
21.4k
        cfg_instr *prev_instr = NULL;
1135
21.4k
        cfg_instr *instr = NULL;
1136
213k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1137
192k
            RETURN_IF_ERROR(basicblock_remove_redundant_nops(b));
1138
192k
            if (IS_LABEL(b->b_label)) {
1139
                /* this block is a jump target, forget instr */
1140
104k
                instr = NULL;
1141
104k
            }
1142
1.44M
            for (int i = 0; i < b->b_iused; i++) {
1143
1.25M
                prev_instr = instr;
1144
1.25M
                instr = &b->b_instr[i];
1145
1.25M
                int prev_opcode = prev_instr ? prev_instr->i_opcode : 0;
1146
1.25M
                int prev_oparg = prev_instr ? prev_instr->i_oparg : 0;
1147
1.25M
                int opcode = instr->i_opcode;
1148
1.25M
                bool is_redundant_pair = false;
1149
1.25M
                if (opcode == POP_TOP) {
1150
94.2k
                   if (prev_opcode == LOAD_CONST || prev_opcode == LOAD_SMALL_INT) {
1151
2.33k
                       is_redundant_pair = true;
1152
2.33k
                   }
1153
91.9k
                   else if (prev_opcode == COPY && prev_oparg == 1) {
1154
36
                       is_redundant_pair = true;
1155
36
                   }
1156
94.2k
                }
1157
1.25M
                if (is_redundant_pair) {
1158
2.36k
                    INSTR_SET_OP0(prev_instr, NOP);
1159
2.36k
                    INSTR_SET_OP0(instr, NOP);
1160
2.36k
                    done = false;
1161
2.36k
                }
1162
1.25M
            }
1163
192k
            if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) {
1164
131k
                instr = NULL;
1165
131k
            }
1166
192k
        }
1167
21.4k
    }
1168
21.0k
    return SUCCESS;
1169
21.0k
}
1170
1171
static int
1172
45.2k
remove_redundant_jumps(cfg_builder *g) {
1173
    /* If a non-empty block ends with a jump instruction, check if the next
1174
     * non-empty block reached through normal flow control is the target
1175
     * of that jump. If it is, then the jump instruction is redundant and
1176
     * can be deleted.
1177
     *
1178
     * Return the number of changes applied, or -1 on error.
1179
     */
1180
1181
45.2k
    int changes = 0;
1182
801k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
1183
756k
        cfg_instr *last = basicblock_last_instr(b);
1184
756k
        if (last == NULL) {
1185
114k
            continue;
1186
114k
        }
1187
756k
        assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
1188
641k
        if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1189
163k
            basicblock* jump_target = next_nonempty_block(last->i_target);
1190
163k
            if (jump_target == NULL) {
1191
0
                PyErr_SetString(PyExc_SystemError, "jump with NULL target");
1192
0
                return ERROR;
1193
0
            }
1194
163k
            basicblock *next = next_nonempty_block(b->b_next);
1195
163k
            if (jump_target == next) {
1196
4.53k
                changes++;
1197
4.53k
                INSTR_SET_OP0(last, NOP);
1198
4.53k
            }
1199
163k
        }
1200
641k
    }
1201
1202
45.2k
    return changes;
1203
45.2k
}
1204
1205
static inline bool
1206
190k
basicblock_has_no_lineno(basicblock *b) {
1207
219k
    for (int i = 0; i < b->b_iused; i++) {
1208
207k
        if (b->b_instr[i].i_loc.lineno >= 0) {
1209
178k
            return false;
1210
178k
        }
1211
207k
    }
1212
12.4k
    return true;
1213
190k
}
1214
1215
/* Maximum size of basic block that should be copied in optimizer */
1216
2.30k
#define MAX_COPY_SIZE 4
1217
1218
/* If this block ends with an unconditional jump to a small exit block or
1219
 * a block that has no line numbers (and no fallthrough), then
1220
 * remove the jump and extend this block with the target.
1221
 * Returns 1 if extended, 0 if no change, and -1 on error.
1222
 */
1223
static int
1224
287k
basicblock_inline_small_or_no_lineno_blocks(basicblock *bb) {
1225
287k
    cfg_instr *last = basicblock_last_instr(bb);
1226
287k
    if (last == NULL) {
1227
0
        return 0;
1228
0
    }
1229
287k
    if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) {
1230
225k
        return 0;
1231
225k
    }
1232
62.4k
    basicblock *target = last->i_target;
1233
62.4k
    bool small_exit_block = (basicblock_exits_scope(target) &&
1234
2.30k
                             target->b_iused <= MAX_COPY_SIZE);
1235
62.4k
    bool no_lineno_no_fallthrough = (basicblock_has_no_lineno(target) &&
1236
5.14k
                                     !BB_HAS_FALLTHROUGH(target));
1237
62.4k
    if (small_exit_block || no_lineno_no_fallthrough) {
1238
2.81k
        assert(is_jump(last));
1239
2.81k
        int removed_jump_opcode = last->i_opcode;
1240
2.81k
        INSTR_SET_OP0(last, NOP);
1241
2.81k
        RETURN_IF_ERROR(basicblock_append_instructions(bb, target));
1242
2.81k
        if (no_lineno_no_fallthrough) {
1243
2.48k
            last = basicblock_last_instr(bb);
1244
2.48k
            if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode) &&
1245
1.21k
                removed_jump_opcode == JUMP)
1246
6
            {
1247
                /* Make sure we don't lose eval breaker checks */
1248
6
                last->i_opcode = JUMP;
1249
6
            }
1250
2.48k
        }
1251
2.81k
        target->b_predecessors--;
1252
2.81k
        return 1;
1253
2.81k
    }
1254
59.5k
    return 0;
1255
62.4k
}
1256
1257
static int
1258
21.0k
inline_small_or_no_lineno_blocks(basicblock *entryblock) {
1259
21.0k
    bool changes;
1260
22.0k
    do {
1261
22.0k
        changes = false;
1262
309k
        for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
1263
287k
            int res = basicblock_inline_small_or_no_lineno_blocks(b);
1264
287k
            RETURN_IF_ERROR(res);
1265
287k
            if (res) {
1266
2.81k
                changes = true;
1267
2.81k
            }
1268
287k
        }
1269
22.0k
    } while(changes); /* every change removes a jump, ensuring convergence */
1270
21.0k
    return changes;
1271
21.0k
}
1272
1273
// Attempt to eliminate jumps to jumps by updating inst to jump to
1274
// target->i_target using the provided opcode. Return whether or not the
1275
// optimization was successful.
1276
static bool
1277
jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode)
1278
188
{
1279
188
    assert(is_jump(inst));
1280
188
    assert(is_jump(target));
1281
188
    assert(inst == basicblock_last_instr(bb));
1282
    // bpo-45773: If inst->i_target == target->i_target, then nothing actually
1283
    // changes (and we fall into an infinite loop):
1284
188
    if (inst->i_target != target->i_target) {
1285
        /* Change inst to NOP and append a jump to target->i_target. The
1286
         * NOP will be removed later if it's not needed for the lineno.
1287
         */
1288
188
        INSTR_SET_OP0(inst, NOP);
1289
1290
188
        RETURN_IF_ERROR(
1291
188
            basicblock_add_jump(
1292
188
                bb, opcode, target->i_target, target->i_loc));
1293
1294
188
        return true;
1295
188
    }
1296
0
    return false;
1297
188
}
1298
1299
static int
1300
loads_const(int opcode)
1301
1.78M
{
1302
1.78M
    return OPCODE_HAS_CONST(opcode) || opcode == LOAD_SMALL_INT;
1303
1.78M
}
1304
1305
/* Returns new reference */
1306
static PyObject*
1307
get_const_value(int opcode, int oparg, PyObject *co_consts)
1308
806k
{
1309
806k
    PyObject *constant = NULL;
1310
806k
    assert(loads_const(opcode));
1311
806k
    if (opcode == LOAD_CONST) {
1312
656k
        constant = PyList_GET_ITEM(co_consts, oparg);
1313
656k
    }
1314
806k
    if (opcode == LOAD_SMALL_INT) {
1315
149k
        return PyLong_FromLong(oparg);
1316
149k
    }
1317
1318
656k
    if (constant == NULL) {
1319
0
        PyErr_SetString(PyExc_SystemError,
1320
0
                        "Internal error: failed to get value of a constant");
1321
0
        return NULL;
1322
0
    }
1323
656k
    return Py_NewRef(constant);
1324
656k
}
1325
1326
// Steals a reference to newconst.
1327
static int
1328
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329
194k
{
1330
194k
    if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
1331
0
        Py_DECREF(newconst);
1332
0
        return -1;
1333
0
    }
1334
1335
194k
    Py_ssize_t index;
1336
41.4M
    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337
82.7M
        if (PyList_GET_ITEM(consts, index) == newconst) {
1338
132k
            break;
1339
132k
        }
1340
41.3M
    }
1341
194k
    if (index == PyList_GET_SIZE(consts)) {
1342
62.1k
        if ((size_t)index >= (size_t)INT_MAX - 1) {
1343
0
            PyErr_SetString(PyExc_OverflowError, "too many constants");
1344
0
            Py_DECREF(newconst);
1345
0
            return -1;
1346
0
        }
1347
62.1k
        if (PyList_Append(consts, newconst)) {
1348
0
            Py_DECREF(newconst);
1349
0
            return -1;
1350
0
        }
1351
62.1k
    }
1352
194k
    Py_DECREF(newconst);
1353
194k
    return (int)index;
1354
194k
}
1355
1356
/*
1357
  Traverse the instructions of the basic block backwards from index "start", skipping over NOPs.
1358
  Try to collect "size" number of consecutive instructions that load constants into the array "instrs".
1359
  Caller must make sure that length of "instrs" is sufficient to fit in at least "size" instructions.
1360
1361
  Return boolean indicating whether "size" such instructions were found.
1362
*/
1363
static bool
1364
get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int size)
1365
334k
{
1366
334k
    assert(start < bb->b_iused);
1367
334k
    assert(size >= 0);
1368
334k
    assert(size <= _PY_STACK_USE_GUIDELINE);
1369
1370
1.10M
    for (; start >= 0 && size > 0; start--) {
1371
864k
        cfg_instr *instr = &bb->b_instr[start];
1372
864k
        if (instr->i_opcode == NOP) {
1373
312k
            continue;
1374
312k
        }
1375
552k
        if (!loads_const(instr->i_opcode)) {
1376
98.1k
            return false;
1377
98.1k
        }
1378
454k
        instrs[--size] = instr;
1379
454k
    }
1380
1381
236k
    return size == 0;
1382
334k
}
1383
1384
/*
1385
  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
1386
  Caller must make sure "instrs" has at least "size" elements.
1387
*/
1388
static void
1389
nop_out(cfg_instr **instrs, int size)
1390
224k
{
1391
614k
    for (int i = 0; i < size; i++) {
1392
389k
        cfg_instr *instr = instrs[i];
1393
389k
        assert(instr->i_opcode != NOP);
1394
389k
        INSTR_SET_OP0(instr, NOP);
1395
389k
        INSTR_SET_LOC(instr, NO_LOCATION);
1396
389k
    }
1397
224k
}
1398
1399
/* Does not steal reference to "newconst".
1400
   Return 1 if changed instruction to LOAD_SMALL_INT.
1401
   Return 0 if could not change instruction to LOAD_SMALL_INT.
1402
   Return -1 on error.
1403
*/
1404
static int
1405
maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
1406
                               PyObject *consts, PyObject *const_cache)
1407
605k
{
1408
605k
    if (PyLong_CheckExact(newconst)) {
1409
330k
        int overflow;
1410
330k
        long val = PyLong_AsLongAndOverflow(newconst, &overflow);
1411
330k
        if (val == -1 && PyErr_Occurred()) {
1412
0
            return -1;
1413
0
        }
1414
330k
        if (!overflow && _PY_IS_SMALL_INT(val) && 0 <= val && val <= 255) {
1415
193k
            assert(_Py_IsImmortal(newconst));
1416
193k
            INSTR_SET_OP1(instr, LOAD_SMALL_INT, (int)val);
1417
193k
            return 1;
1418
193k
        }
1419
330k
    }
1420
412k
    return 0;
1421
605k
}
1422
1423
1424
/* Steals reference to "newconst" */
1425
static int
1426
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427
                      PyObject *consts, PyObject *const_cache)
1428
220k
{
1429
220k
    int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
1430
220k
    if (res < 0) {
1431
0
        Py_DECREF(newconst);
1432
0
        return ERROR;
1433
0
    }
1434
220k
    if (res > 0) {
1435
27.3k
        return SUCCESS;
1436
27.3k
    }
1437
193k
    int oparg = add_const(newconst, consts, const_cache);
1438
193k
    RETURN_IF_ERROR(oparg);
1439
193k
    INSTR_SET_OP1(instr, LOAD_CONST, oparg);
1440
193k
    return SUCCESS;
1441
193k
}
1442
1443
/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
1444
   with    LOAD_CONST (c1, c2, ... cn).
1445
   The consts table must still be in list form so that the
1446
   new constant (c1, c2, ... cn) can be appended.
1447
   Called with codestr pointing to the first LOAD_CONST.
1448
*/
1449
static int
1450
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1451
13.2k
{
1452
    /* Pre-conditions */
1453
13.2k
    assert(PyDict_CheckExact(const_cache));
1454
13.2k
    assert(PyList_CheckExact(consts));
1455
1456
13.2k
    cfg_instr *instr = &bb->b_instr[i];
1457
13.2k
    assert(instr->i_opcode == BUILD_TUPLE);
1458
1459
13.2k
    int seq_size = instr->i_oparg;
1460
13.2k
    if (seq_size > _PY_STACK_USE_GUIDELINE) {
1461
0
        return SUCCESS;
1462
0
    }
1463
1464
13.2k
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1465
13.2k
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
1466
        /* not a const sequence */
1467
10.1k
        return SUCCESS;
1468
10.1k
    }
1469
1470
3.11k
    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
1471
3.11k
    if (const_tuple == NULL) {
1472
0
        return ERROR;
1473
0
    }
1474
1475
7.14k
    for (int i = 0; i < seq_size; i++) {
1476
4.03k
        cfg_instr *inst = const_instrs[i];
1477
4.03k
        assert(loads_const(inst->i_opcode));
1478
4.03k
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1479
4.03k
        if (element == NULL) {
1480
0
            Py_DECREF(const_tuple);
1481
0
            return ERROR;
1482
0
        }
1483
4.03k
        PyTuple_SET_ITEM(const_tuple, i, element);
1484
4.03k
    }
1485
1486
3.11k
    nop_out(const_instrs, seq_size);
1487
3.11k
    return instr_make_load_const(instr, const_tuple, consts, const_cache);
1488
3.11k
}
1489
1490
/* Replace:
1491
    BUILD_LIST 0
1492
    LOAD_CONST c1
1493
    LIST_APPEND 1
1494
    LOAD_CONST c2
1495
    LIST_APPEND 1
1496
    ...
1497
    LOAD_CONST cN
1498
    LIST_APPEND 1
1499
    CALL_INTRINSIC_1 INTRINSIC_LIST_TO_TUPLE
1500
   with:
1501
    LOAD_CONST (c1, c2, ... cN)
1502
*/
1503
static int
1504
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505
                                      PyObject *consts, PyObject *const_cache)
1506
493
{
1507
493
    assert(PyDict_CheckExact(const_cache));
1508
493
    assert(PyList_CheckExact(consts));
1509
493
    assert(i >= 0);
1510
493
    assert(i < bb->b_iused);
1511
1512
493
    cfg_instr *intrinsic = &bb->b_instr[i];
1513
493
    assert(intrinsic->i_opcode == CALL_INTRINSIC_1);
1514
493
    assert(intrinsic->i_oparg == INTRINSIC_LIST_TO_TUPLE);
1515
1516
493
    int consts_found = 0;
1517
493
    bool expect_append = true;
1518
1519
23.6k
    for (int pos = i - 1; pos >= 0; pos--) {
1520
23.5k
        cfg_instr *instr = &bb->b_instr[pos];
1521
23.5k
        int opcode = instr->i_opcode;
1522
23.5k
        int oparg = instr->i_oparg;
1523
1524
23.5k
        if (opcode == NOP) {
1525
14.8k
            continue;
1526
14.8k
        }
1527
1528
8.76k
        if (opcode == BUILD_LIST && oparg == 0) {
1529
17
            if (!expect_append) {
1530
                /* Not a sequence start. */
1531
0
                return SUCCESS;
1532
0
            }
1533
1534
            /* Sequence start, we are done. */
1535
17
            PyObject *newconst = PyTuple_New((Py_ssize_t)consts_found);
1536
17
            if (newconst == NULL) {
1537
0
                return ERROR;
1538
0
            }
1539
1540
10.3k
            for (int newpos = i - 1; newpos >= pos; newpos--) {
1541
10.3k
                instr = &bb->b_instr[newpos];
1542
10.3k
                if (instr->i_opcode == NOP) {
1543
6.30k
                    continue;
1544
6.30k
                }
1545
4.06k
                if (loads_const(instr->i_opcode)) {
1546
2.02k
                    PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, consts);
1547
2.02k
                    if (constant == NULL) {
1548
0
                        Py_DECREF(newconst);
1549
0
                        return ERROR;
1550
0
                    }
1551
2.02k
                    assert(consts_found > 0);
1552
2.02k
                    PyTuple_SET_ITEM(newconst, --consts_found, constant);
1553
2.02k
                }
1554
4.06k
                nop_out(&instr, 1);
1555
4.06k
            }
1556
17
            assert(consts_found == 0);
1557
17
            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1558
17
        }
1559
1560
8.75k
        if (expect_append) {
1561
4.43k
            if (opcode != LIST_APPEND || oparg != 1) {
1562
102
                return SUCCESS;
1563
102
            }
1564
4.43k
        }
1565
4.31k
        else {
1566
4.31k
            if (!loads_const(opcode)) {
1567
357
                return SUCCESS;
1568
357
            }
1569
3.95k
            consts_found++;
1570
3.95k
        }
1571
1572
8.29k
        expect_append = !expect_append;
1573
8.29k
    }
1574
1575
    /* Did not find sequence start. */
1576
17
    return SUCCESS;
1577
493
}
1578
1579
6.33k
#define MIN_CONST_SEQUENCE_SIZE 3
1580
/*
1581
Optimize lists and sets for:
1582
    1. "for" loop, comprehension or "in"/"not in" tests:
1583
           Change literal list or set of constants into constant
1584
           tuple or frozenset respectively. Change list of
1585
           non-constants into tuple.
1586
    2. Constant literal lists/set with length >= MIN_CONST_SEQUENCE_SIZE:
1587
           Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cN, BUILD_LIST N
1588
           with BUILD_LIST 0, LOAD_CONST (c1, c2, ... cN), LIST_EXTEND 1,
1589
           or BUILD_SET & SET_UPDATE respectively.
1590
*/
1591
static int
1592
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593
                        PyObject *consts, PyObject *const_cache)
1594
3.16k
{
1595
3.16k
    assert(PyDict_CheckExact(const_cache));
1596
3.16k
    assert(PyList_CheckExact(consts));
1597
1598
3.16k
    cfg_instr *instr = &bb->b_instr[i];
1599
3.16k
    assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1600
1601
3.16k
    bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
1602
3.16k
    int seq_size = instr->i_oparg;
1603
3.16k
    if (seq_size > _PY_STACK_USE_GUIDELINE ||
1604
3.16k
        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
1605
3.00k
    {
1606
3.00k
        return SUCCESS;
1607
3.00k
    }
1608
1609
159
    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
1610
159
    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not a const sequence */
1611
98
        if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
1612
            /* iterate over a tuple instead of list */
1613
0
            INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
1614
0
        }
1615
98
        return SUCCESS;
1616
98
    }
1617
1618
61
    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
1619
61
    if (const_result == NULL) {
1620
0
        return ERROR;
1621
0
    }
1622
1623
366
    for (int i = 0; i < seq_size; i++) {
1624
305
        cfg_instr *inst = const_instrs[i];
1625
305
        assert(loads_const(inst->i_opcode));
1626
305
        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, consts);
1627
305
        if (element == NULL) {
1628
0
            Py_DECREF(const_result);
1629
0
            return ERROR;
1630
0
        }
1631
305
        PyTuple_SET_ITEM(const_result, i, element);
1632
305
    }
1633
1634
61
    if (instr->i_opcode == BUILD_SET) {
1635
50
        PyObject *frozenset = PyFrozenSet_New(const_result);
1636
50
        if (frozenset == NULL) {
1637
0
            Py_DECREF(const_result);
1638
0
            return ERROR;
1639
0
        }
1640
50
        Py_SETREF(const_result, frozenset);
1641
50
    }
1642
1643
61
    int index = add_const(const_result, consts, const_cache);
1644
61
    RETURN_IF_ERROR(index);
1645
61
    nop_out(const_instrs, seq_size);
1646
1647
61
    if (contains_or_iter) {
1648
8
        INSTR_SET_OP1(instr, LOAD_CONST, index);
1649
8
    }
1650
53
    else {
1651
53
        assert(i >= 2);
1652
53
        assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
1653
1654
53
        INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
1655
1656
53
        INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
1657
53
        INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
1658
53
        INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
1659
53
    }
1660
61
    return SUCCESS;
1661
61
}
1662
1663
/* Check whether the total number of items in the (possibly nested) collection obj exceeds
1664
 * limit. Return a negative number if it does, and a non-negative number otherwise.
1665
 * Used to avoid creating constants which are slow to hash.
1666
 */
1667
static Py_ssize_t
1668
const_folding_check_complexity(PyObject *obj, Py_ssize_t limit)
1669
61.1k
{
1670
61.1k
    if (PyTuple_Check(obj)) {
1671
54.0k
        Py_ssize_t i;
1672
54.0k
        limit -= PyTuple_GET_SIZE(obj);
1673
112k
        for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
1674
58.8k
            limit = const_folding_check_complexity(PyTuple_GET_ITEM(obj, i), limit);
1675
58.8k
            if (limit < 0) {
1676
291
                return limit;
1677
291
            }
1678
58.8k
        }
1679
54.0k
    }
1680
60.8k
    return limit;
1681
61.1k
}
1682
1683
22.4k
#define MAX_INT_SIZE           128  /* bits */
1684
2.51k
#define MAX_COLLECTION_SIZE    256  /* items */
1685
1.28k
#define MAX_STR_SIZE          4096  /* characters */
1686
2.28k
#define MAX_TOTAL_ITEMS       1024  /* including nested collections */
1687
1688
static PyObject *
1689
const_folding_safe_multiply(PyObject *v, PyObject *w)
1690
30.3k
{
1691
30.3k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1692
9.12k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1693
30.3k
    ) {
1694
6.10k
        int64_t vbits = _PyLong_NumBits(v);
1695
6.10k
        int64_t wbits = _PyLong_NumBits(w);
1696
6.10k
        assert(vbits >= 0);
1697
6.10k
        assert(wbits >= 0);
1698
6.10k
        if (vbits + wbits > MAX_INT_SIZE) {
1699
107
            return NULL;
1700
107
        }
1701
6.10k
    }
1702
24.2k
    else if (PyLong_Check(v) && PyTuple_Check(w)) {
1703
2.58k
        Py_ssize_t size = PyTuple_GET_SIZE(w);
1704
2.58k
        if (size) {
1705
2.55k
            long n = PyLong_AsLong(v);
1706
2.55k
            if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
1707
198
                return NULL;
1708
198
            }
1709
2.35k
            if (n && const_folding_check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
1710
61
                return NULL;
1711
61
            }
1712
2.35k
        }
1713
2.58k
    }
1714
21.6k
    else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
1715
2.21k
        Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
1716
2.21k
                                               PyBytes_GET_SIZE(w);
1717
2.21k
        if (size) {
1718
1.30k
            long n = PyLong_AsLong(v);
1719
1.30k
            if (n < 0 || n > MAX_STR_SIZE / size) {
1720
140
                return NULL;
1721
140
            }
1722
1.30k
        }
1723
2.21k
    }
1724
19.4k
    else if (PyLong_Check(w) &&
1725
11.7k
             (PyTuple_Check(v) || PyUnicode_Check(v) || PyBytes_Check(v)))
1726
4.54k
    {
1727
4.54k
        return const_folding_safe_multiply(w, v);
1728
4.54k
    }
1729
1730
25.3k
    return PyNumber_Multiply(v, w);
1731
30.3k
}
1732
1733
static PyObject *
1734
const_folding_safe_power(PyObject *v, PyObject *w)
1735
38.5k
{
1736
38.5k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1737
17.5k
        !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w)
1738
38.5k
    ) {
1739
15.3k
        int64_t vbits = _PyLong_NumBits(v);
1740
15.3k
        size_t wbits = PyLong_AsSize_t(w);
1741
15.3k
        assert(vbits >= 0);
1742
15.3k
        if (wbits == (size_t)-1) {
1743
428
            return NULL;
1744
428
        }
1745
14.9k
        if ((uint64_t)vbits > MAX_INT_SIZE / wbits) {
1746
1.65k
            return NULL;
1747
1.65k
        }
1748
14.9k
    }
1749
1750
36.4k
    return PyNumber_Power(v, w, Py_None);
1751
38.5k
}
1752
1753
static PyObject *
1754
const_folding_safe_lshift(PyObject *v, PyObject *w)
1755
1.96k
{
1756
1.96k
    if (PyLong_Check(v) && PyLong_Check(w) &&
1757
1.87k
        !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w)
1758
1.96k
    ) {
1759
800
        int64_t vbits = _PyLong_NumBits(v);
1760
800
        size_t wbits = PyLong_AsSize_t(w);
1761
800
        assert(vbits >= 0);
1762
800
        if (wbits == (size_t)-1) {
1763
233
            return NULL;
1764
233
        }
1765
567
        if (wbits > MAX_INT_SIZE || (uint64_t)vbits > MAX_INT_SIZE - wbits) {
1766
312
            return NULL;
1767
312
        }
1768
567
    }
1769
1770
1.41k
    return PyNumber_Lshift(v, w);
1771
1.96k
}
1772
1773
static PyObject *
1774
const_folding_safe_mod(PyObject *v, PyObject *w)
1775
15.0k
{
1776
15.0k
    if (PyUnicode_Check(v) || PyBytes_Check(v)) {
1777
26
        return NULL;
1778
26
    }
1779
1780
15.0k
    return PyNumber_Remainder(v, w);
1781
15.0k
}
1782
1783
static PyObject *
1784
eval_const_binop(PyObject *left, int op, PyObject *right)
1785
178k
{
1786
178k
    assert(left != NULL && right != NULL);
1787
178k
    assert(op >= 0 && op <= NB_OPARG_LAST);
1788
1789
178k
    PyObject *result = NULL;
1790
178k
    switch (op) {
1791
28.5k
        case NB_ADD:
1792
28.5k
            result = PyNumber_Add(left, right);
1793
28.5k
            break;
1794
29.0k
        case NB_SUBTRACT:
1795
29.0k
            result = PyNumber_Subtract(left, right);
1796
29.0k
            break;
1797
25.8k
        case NB_MULTIPLY:
1798
25.8k
            result = const_folding_safe_multiply(left, right);
1799
25.8k
            break;
1800
19.6k
        case NB_TRUE_DIVIDE:
1801
19.6k
            result = PyNumber_TrueDivide(left, right);
1802
19.6k
            break;
1803
6.94k
        case NB_FLOOR_DIVIDE:
1804
6.94k
            result = PyNumber_FloorDivide(left, right);
1805
6.94k
            break;
1806
15.0k
        case NB_REMAINDER:
1807
15.0k
            result = const_folding_safe_mod(left, right);
1808
15.0k
            break;
1809
38.5k
        case NB_POWER:
1810
38.5k
            result = const_folding_safe_power(left, right);
1811
38.5k
            break;
1812
1.96k
        case NB_LSHIFT:
1813
1.96k
            result = const_folding_safe_lshift(left, right);
1814
1.96k
            break;
1815
4.32k
        case NB_RSHIFT:
1816
4.32k
            result = PyNumber_Rshift(left, right);
1817
4.32k
            break;
1818
780
        case NB_OR:
1819
780
            result = PyNumber_Or(left, right);
1820
780
            break;
1821
2.73k
        case NB_XOR:
1822
2.73k
            result = PyNumber_Xor(left, right);
1823
2.73k
            break;
1824
3.24k
        case NB_AND:
1825
3.24k
            result = PyNumber_And(left, right);
1826
3.24k
            break;
1827
1.66k
        case NB_SUBSCR:
1828
1.66k
            result = PyObject_GetItem(left, right);
1829
1.66k
            break;
1830
128
        case NB_MATRIX_MULTIPLY:
1831
            // No builtin constants implement matrix multiplication
1832
128
            break;
1833
0
        default:
1834
0
            Py_UNREACHABLE();
1835
178k
    }
1836
178k
    return result;
1837
178k
}
1838
1839
static int
1840
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1841
239k
{
1842
402k
    #define BINOP_OPERAND_COUNT 2
1843
239k
    assert(PyDict_CheckExact(const_cache));
1844
239k
    assert(PyList_CheckExact(consts));
1845
1846
239k
    cfg_instr *binop = &bb->b_instr[i];
1847
239k
    assert(binop->i_opcode == BINARY_OP);
1848
1849
239k
    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
1850
239k
    if (!get_const_loading_instrs(bb, i-1, operands_instrs, BINOP_OPERAND_COUNT)) {
1851
        /* not a const sequence */
1852
60.9k
        return SUCCESS;
1853
60.9k
    }
1854
1855
178k
    cfg_instr *lhs_instr = operands_instrs[0];
1856
178k
    assert(loads_const(lhs_instr->i_opcode));
1857
178k
    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, consts);
1858
178k
    if (lhs == NULL) {
1859
0
        return ERROR;
1860
0
    }
1861
1862
178k
    cfg_instr *rhs_instr = operands_instrs[1];
1863
178k
    assert(loads_const(rhs_instr->i_opcode));
1864
178k
    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, consts);
1865
178k
    if (rhs == NULL) {
1866
0
        Py_DECREF(lhs);
1867
0
        return ERROR;
1868
0
    }
1869
1870
178k
    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
1871
178k
    Py_DECREF(lhs);
1872
178k
    Py_DECREF(rhs);
1873
178k
    if (newconst == NULL) {
1874
14.9k
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1875
0
            return ERROR;
1876
0
        }
1877
14.9k
        PyErr_Clear();
1878
14.9k
        return SUCCESS;
1879
14.9k
    }
1880
1881
163k
    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882
163k
    return instr_make_load_const(binop, newconst, consts, const_cache);
1883
178k
}
1884
1885
static PyObject *
1886
eval_const_unaryop(PyObject *operand, int opcode, int oparg)
1887
54.8k
{
1888
54.8k
    assert(operand != NULL);
1889
54.8k
    assert(
1890
54.8k
        opcode == UNARY_NEGATIVE ||
1891
54.8k
        opcode == UNARY_INVERT ||
1892
54.8k
        opcode == UNARY_NOT ||
1893
54.8k
        (opcode == CALL_INTRINSIC_1 && oparg == INTRINSIC_UNARY_POSITIVE)
1894
54.8k
    );
1895
54.8k
    PyObject *result;
1896
54.8k
    switch (opcode) {
1897
41.0k
        case UNARY_NEGATIVE:
1898
41.0k
            result = PyNumber_Negative(operand);
1899
41.0k
            break;
1900
4.86k
        case UNARY_INVERT:
1901
            // XXX: This should be removed once the ~bool depreciation expires.
1902
4.86k
            if (PyBool_Check(operand)) {
1903
38
                return NULL;
1904
38
            }
1905
4.83k
            result = PyNumber_Invert(operand);
1906
4.83k
            break;
1907
17
        case UNARY_NOT: {
1908
17
            int r = PyObject_IsTrue(operand);
1909
17
            if (r < 0) {
1910
0
                return NULL;
1911
0
            }
1912
17
            result = PyBool_FromLong(!r);
1913
17
            break;
1914
17
        }
1915
8.89k
        case CALL_INTRINSIC_1:
1916
8.89k
            if (oparg != INTRINSIC_UNARY_POSITIVE) {
1917
0
                Py_UNREACHABLE();
1918
0
            }
1919
8.89k
            result = PyNumber_Positive(operand);
1920
8.89k
            break;
1921
0
        default:
1922
0
            Py_UNREACHABLE();
1923
54.8k
    }
1924
54.7k
    return result;
1925
54.8k
}
1926
1927
static int
1928
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1929
81.9k
{
1930
136k
    #define UNARYOP_OPERAND_COUNT 1
1931
81.9k
    assert(PyDict_CheckExact(const_cache));
1932
81.9k
    assert(PyList_CheckExact(consts));
1933
81.9k
    cfg_instr *unaryop = &bb->b_instr[i];
1934
1935
81.9k
    cfg_instr *operand_instr;
1936
81.9k
    if (!get_const_loading_instrs(bb, i-1, &operand_instr, UNARYOP_OPERAND_COUNT)) {
1937
        /* not a const */
1938
27.1k
        return SUCCESS;
1939
27.1k
    }
1940
1941
81.9k
    assert(loads_const(operand_instr->i_opcode));
1942
54.8k
    PyObject *operand = get_const_value(
1943
54.8k
        operand_instr->i_opcode,
1944
54.8k
        operand_instr->i_oparg,
1945
54.8k
        consts
1946
54.8k
    );
1947
54.8k
    if (operand == NULL) {
1948
0
        return ERROR;
1949
0
    }
1950
1951
54.8k
    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, unaryop->i_oparg);
1952
54.8k
    Py_DECREF(operand);
1953
54.8k
    if (newconst == NULL) {
1954
618
        if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
1955
0
            return ERROR;
1956
0
        }
1957
618
        PyErr_Clear();
1958
618
        return SUCCESS;
1959
618
    }
1960
1961
54.2k
    if (unaryop->i_opcode == UNARY_NOT) {
1962
17
        assert(PyBool_Check(newconst));
1963
17
    }
1964
54.2k
    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965
54.2k
    return instr_make_load_const(unaryop, newconst, consts, const_cache);
1966
54.2k
}
1967
1968
57.6k
#define VISITED (-1)
1969
1970
// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
1971
// same effect.
1972
static int
1973
swaptimize(basicblock *block, int *ix)
1974
28.8k
{
1975
    // NOTE: "./python -m test test_patma" serves as a good, quick stress test
1976
    // for this function. Make sure to blow away cached *.pyc files first!
1977
28.8k
    assert(*ix < block->b_iused);
1978
28.8k
    cfg_instr *instructions = &block->b_instr[*ix];
1979
    // Find the length of the current sequence of SWAPs and NOPs, and record the
1980
    // maximum depth of the stack manipulations:
1981
28.8k
    assert(instructions[0].i_opcode == SWAP);
1982
28.8k
    int depth = instructions[0].i_oparg;
1983
28.8k
    int len = 0;
1984
28.8k
    int more = false;
1985
28.8k
    int limit = block->b_iused - *ix;
1986
33.4k
    while (++len < limit) {
1987
33.4k
        int opcode = instructions[len].i_opcode;
1988
33.4k
        if (opcode == SWAP) {
1989
4.63k
            depth = Py_MAX(depth, instructions[len].i_oparg);
1990
4.63k
            more = true;
1991
4.63k
        }
1992
28.8k
        else if (opcode != NOP) {
1993
28.8k
            break;
1994
28.8k
        }
1995
33.4k
    }
1996
    // It's already optimal if there's only one SWAP:
1997
28.8k
    if (!more) {
1998
24.4k
        return SUCCESS;
1999
24.4k
    }
2000
    // Create an array with elements {0, 1, 2, ..., depth - 1}:
2001
4.43k
    int *stack = PyMem_Malloc(depth * sizeof(int));
2002
4.43k
    if (stack == NULL) {
2003
0
        PyErr_NoMemory();
2004
0
        return ERROR;
2005
0
    }
2006
17.7k
    for (int i = 0; i < depth; i++) {
2007
13.3k
        stack[i] = i;
2008
13.3k
    }
2009
    // Simulate the combined effect of these instructions by "running" them on
2010
    // our "stack":
2011
13.5k
    for (int i = 0; i < len; i++) {
2012
9.07k
        if (instructions[i].i_opcode == SWAP) {
2013
9.07k
            int oparg = instructions[i].i_oparg;
2014
9.07k
            int top = stack[0];
2015
            // SWAPs are 1-indexed:
2016
9.07k
            stack[0] = stack[oparg - 1];
2017
9.07k
            stack[oparg - 1] = top;
2018
9.07k
        }
2019
9.07k
    }
2020
    // Now we can begin! Our approach here is based on a solution to a closely
2021
    // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
2022
    // think of this algorithm as determining the steps needed to efficiently
2023
    // "un-shuffle" our stack. By performing the moves in *reverse* order,
2024
    // though, we can efficiently *shuffle* it! For this reason, we will be
2025
    // replacing instructions starting from the *end* of the run. Since the
2026
    // solution is optimal, we don't need to worry about running out of space:
2027
4.43k
    int current = len - 1;
2028
17.7k
    for (int i = 0; i < depth; i++) {
2029
        // Skip items that have already been visited, or just happen to be in
2030
        // the correct location:
2031
13.3k
        if (stack[i] == VISITED || stack[i] == i) {
2032
8.92k
            continue;
2033
8.92k
        }
2034
        // Okay, we've found an item that hasn't been visited. It forms a cycle
2035
        // with other items; traversing the cycle and swapping each item with
2036
        // the next will put them all in the correct place. The weird
2037
        // loop-and-a-half is necessary to insert 0 into every cycle, since we
2038
        // can only swap from that position:
2039
4.41k
        int j = i;
2040
17.6k
        while (true) {
2041
            // Skip the actual swap if our item is zero, since swapping the top
2042
            // item with itself is pointless:
2043
17.6k
            if (j) {
2044
8.85k
                assert(0 <= current);
2045
                // SWAPs are 1-indexed:
2046
8.85k
                instructions[current].i_opcode = SWAP;
2047
8.85k
                instructions[current--].i_oparg = j + 1;
2048
8.85k
            }
2049
17.6k
            if (stack[j] == VISITED) {
2050
                // Completed the cycle:
2051
4.41k
                assert(j == i);
2052
4.41k
                break;
2053
4.41k
            }
2054
13.2k
            int next_j = stack[j];
2055
13.2k
            stack[j] = VISITED;
2056
13.2k
            j = next_j;
2057
13.2k
        }
2058
4.41k
    }
2059
    // NOP out any unused instructions:
2060
4.66k
    while (0 <= current) {
2061
224
        INSTR_SET_OP0(&instructions[current--], NOP);
2062
224
    }
2063
4.43k
    PyMem_Free(stack);
2064
4.43k
    *ix += len - 1;
2065
4.43k
    return SUCCESS;
2066
4.43k
}
2067
2068
2069
// This list is pretty small, since it's only okay to reorder opcodes that:
2070
// - can't affect control flow (like jumping or raising exceptions)
2071
// - can't invoke arbitrary code (besides finalizers)
2072
// - only touch the TOS (and pop it when finished)
2073
#define SWAPPABLE(opcode) \
2074
30.5k
    ((opcode) == STORE_FAST || \
2075
30.5k
     (opcode) == STORE_FAST_MAYBE_NULL || \
2076
30.5k
     (opcode) == POP_TOP)
2077
2078
#define STORES_TO(instr) \
2079
6
    (((instr).i_opcode == STORE_FAST || \
2080
6
      (instr).i_opcode == STORE_FAST_MAYBE_NULL) \
2081
6
     ? (instr).i_oparg : -1)
2082
2083
static int
2084
next_swappable_instruction(basicblock *block, int i, int lineno)
2085
33.6k
{
2086
33.6k
    while (++i < block->b_iused) {
2087
30.4k
        cfg_instr *instruction = &block->b_instr[i];
2088
30.4k
        if (0 <= lineno && instruction->i_loc.lineno != lineno) {
2089
            // Optimizing across this instruction could cause user-visible
2090
            // changes in the names bound between line tracing events!
2091
0
            return -1;
2092
0
        }
2093
30.4k
        if (instruction->i_opcode == NOP) {
2094
0
            continue;
2095
0
        }
2096
30.4k
        if (SWAPPABLE(instruction->i_opcode)) {
2097
4.85k
            return i;
2098
4.85k
        }
2099
25.6k
        return -1;
2100
30.4k
    }
2101
3.18k
    return -1;
2102
33.6k
}
2103
2104
// Attempt to apply SWAPs statically by swapping *instructions* rather than
2105
// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
2106
// with the more efficient NOP, STORE_FAST(42), POP_TOP.
2107
static void
2108
apply_static_swaps(basicblock *block, int i)
2109
28.8k
{
2110
    // SWAPs are to our left, and potential swaperands are to our right:
2111
29.0k
    for (; 0 <= i; i--) {
2112
29.0k
        assert(i < block->b_iused);
2113
29.0k
        cfg_instr *swap = &block->b_instr[i];
2114
29.0k
        if (swap->i_opcode != SWAP) {
2115
236
            if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
2116
                // Nope, but we know how to handle these. Keep looking:
2117
204
                continue;
2118
204
            }
2119
            // We can't reason about what this instruction does. Bail:
2120
32
            return;
2121
236
        }
2122
28.8k
        int j = next_swappable_instruction(block, i, -1);
2123
28.8k
        if (j < 0) {
2124
23.9k
            return;
2125
23.9k
        }
2126
4.83k
        int k = j;
2127
4.83k
        int lineno = block->b_instr[j].i_loc.lineno;
2128
4.85k
        for (int count = swap->i_oparg - 1; 0 < count; count--) {
2129
4.85k
            k = next_swappable_instruction(block, k, lineno);
2130
4.85k
            if (k < 0) {
2131
4.83k
                return;
2132
4.83k
            }
2133
4.85k
        }
2134
        // The reordering is not safe if the two instructions to be swapped
2135
        // store to the same location, or if any intervening instruction stores
2136
        // to the same location as either of them.
2137
2
        int store_j = STORES_TO(block->b_instr[j]);
2138
2
        int store_k = STORES_TO(block->b_instr[k]);
2139
2
        if (store_j >= 0 || store_k >= 0) {
2140
2
            if (store_j == store_k) {
2141
0
                return;
2142
0
            }
2143
4
            for (int idx = j + 1; idx < k; idx++) {
2144
2
                int store_idx = STORES_TO(block->b_instr[idx]);
2145
2
                if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
2146
0
                    return;
2147
0
                }
2148
2
            }
2149
2
        }
2150
2151
        // Success!
2152
2
        INSTR_SET_OP0(swap, NOP);
2153
2
        cfg_instr temp = block->b_instr[j];
2154
2
        block->b_instr[j] = block->b_instr[k];
2155
2
        block->b_instr[k] = temp;
2156
2
    }
2157
28.8k
}
2158
2159
static int
2160
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2161
171k
{
2162
171k
    assert(PyDict_CheckExact(const_cache));
2163
171k
    assert(PyList_CheckExact(consts));
2164
171k
    int opcode = 0;
2165
171k
    int oparg = 0;
2166
1.74M
    for (int i = 0; i < bb->b_iused; i++) {
2167
1.57M
        cfg_instr *inst = &bb->b_instr[i];
2168
1.57M
        if (inst->i_opcode == LOAD_CONST) {
2169
384k
            PyObject *constant = get_const_value(inst->i_opcode, inst->i_oparg, consts);
2170
384k
            int res = maybe_instr_make_load_smallint(inst, constant, consts, const_cache);
2171
384k
            Py_DECREF(constant);
2172
384k
            if (res < 0) {
2173
0
                return ERROR;
2174
0
            }
2175
384k
        }
2176
1.57M
        bool is_copy_of_load_const = (opcode == LOAD_CONST &&
2177
202k
                                      inst->i_opcode == COPY &&
2178
220
                                      inst->i_oparg == 1);
2179
1.57M
        if (! is_copy_of_load_const) {
2180
1.57M
            opcode = inst->i_opcode;
2181
1.57M
            oparg = inst->i_oparg;
2182
1.57M
        }
2183
1.57M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2184
1.57M
        if (opcode != LOAD_CONST && opcode != LOAD_SMALL_INT) {
2185
1.19M
            continue;
2186
1.19M
        }
2187
384k
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2188
384k
        switch(nextop) {
2189
639
            case POP_JUMP_IF_FALSE:
2190
1.24k
            case POP_JUMP_IF_TRUE:
2191
1.89k
            case JUMP_IF_FALSE:
2192
2.23k
            case JUMP_IF_TRUE:
2193
2.23k
            {
2194
                /* Remove LOAD_CONST const; conditional jump */
2195
2.23k
                PyObject* cnt = get_const_value(opcode, oparg, consts);
2196
2.23k
                if (cnt == NULL) {
2197
0
                    return ERROR;
2198
0
                }
2199
2.23k
                int is_true = PyObject_IsTrue(cnt);
2200
2.23k
                Py_DECREF(cnt);
2201
2.23k
                if (is_true == -1) {
2202
0
                    return ERROR;
2203
0
                }
2204
2.23k
                if (PyCompile_OpcodeStackEffect(nextop, 0) == -1) {
2205
                    /* POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE */
2206
1.24k
                    INSTR_SET_OP0(inst, NOP);
2207
1.24k
                }
2208
2.23k
                int jump_if_true = (nextop == POP_JUMP_IF_TRUE || nextop == JUMP_IF_TRUE);
2209
2.23k
                if (is_true == jump_if_true) {
2210
1.11k
                    bb->b_instr[i+1].i_opcode = JUMP;
2211
1.11k
                }
2212
1.12k
                else {
2213
1.12k
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2214
1.12k
                }
2215
2.23k
                break;
2216
2.23k
            }
2217
2.23k
            case IS_OP:
2218
68
            {
2219
                // Fold to POP_JUMP_IF_NONE:
2220
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE
2221
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE
2222
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE
2223
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE
2224
                // Fold to POP_JUMP_IF_NOT_NONE:
2225
                // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE
2226
                // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE
2227
                // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE
2228
                // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE
2229
68
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2230
68
                if (cnt == NULL) {
2231
0
                    return ERROR;
2232
0
                }
2233
68
                if (!Py_IsNone(cnt)) {
2234
64
                    Py_DECREF(cnt);
2235
64
                    break;
2236
64
                }
2237
4
                if (bb->b_iused <= i + 2) {
2238
0
                    break;
2239
0
                }
2240
4
                cfg_instr *is_instr = &bb->b_instr[i + 1];
2241
4
                cfg_instr *jump_instr = &bb->b_instr[i + 2];
2242
                // Get rid of TO_BOOL regardless:
2243
4
                if (jump_instr->i_opcode == TO_BOOL) {
2244
0
                    INSTR_SET_OP0(jump_instr, NOP);
2245
0
                    if (bb->b_iused <= i + 3) {
2246
0
                        break;
2247
0
                    }
2248
0
                    jump_instr = &bb->b_instr[i + 3];
2249
0
                }
2250
4
                bool invert = is_instr->i_oparg;
2251
4
                if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) {
2252
4
                    invert = !invert;
2253
4
                }
2254
0
                else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) {
2255
0
                    break;
2256
0
                }
2257
4
                INSTR_SET_OP0(inst, NOP);
2258
4
                INSTR_SET_OP0(is_instr, NOP);
2259
4
                jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE
2260
4
                                              : POP_JUMP_IF_NONE;
2261
4
                break;
2262
4
            }
2263
1.26k
            case TO_BOOL:
2264
1.26k
            {
2265
1.26k
                PyObject *cnt = get_const_value(opcode, oparg, consts);
2266
1.26k
                if (cnt == NULL) {
2267
0
                    return ERROR;
2268
0
                }
2269
1.26k
                int is_true = PyObject_IsTrue(cnt);
2270
1.26k
                Py_DECREF(cnt);
2271
1.26k
                if (is_true == -1) {
2272
0
                    return ERROR;
2273
0
                }
2274
1.26k
                cnt = PyBool_FromLong(is_true);
2275
1.26k
                int index = add_const(cnt, consts, const_cache);
2276
1.26k
                if (index < 0) {
2277
0
                    return ERROR;
2278
0
                }
2279
1.26k
                INSTR_SET_OP0(inst, NOP);
2280
1.26k
                INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index);
2281
1.26k
                break;
2282
1.26k
            }
2283
384k
        }
2284
384k
    }
2285
171k
    return SUCCESS;
2286
171k
}
2287
2288
static int
2289
21.0k
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2290
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291
171k
        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2292
171k
    }
2293
21.0k
    return SUCCESS;
2294
21.0k
}
2295
2296
static int
2297
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2298
171k
{
2299
171k
    assert(PyDict_CheckExact(const_cache));
2300
171k
    assert(PyList_CheckExact(consts));
2301
171k
    cfg_instr nop;
2302
171k
    INSTR_SET_OP0(&nop, NOP);
2303
1.75M
    for (int i = 0; i < bb->b_iused; i++) {
2304
1.57M
        cfg_instr *inst = &bb->b_instr[i];
2305
1.57M
        cfg_instr *target;
2306
1.57M
        int opcode = inst->i_opcode;
2307
1.57M
        int oparg = inst->i_oparg;
2308
1.57M
        if (HAS_TARGET(opcode)) {
2309
105k
            assert(inst->i_target->b_iused > 0);
2310
105k
            target = &inst->i_target->b_instr[0];
2311
105k
            assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
2312
105k
        }
2313
1.47M
        else {
2314
1.47M
            target = &nop;
2315
1.47M
        }
2316
1.57M
        int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
2317
1.57M
        assert(!IS_ASSEMBLER_OPCODE(opcode));
2318
1.57M
        switch (opcode) {
2319
            /* Try to fold tuples of constants.
2320
               Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
2321
               Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
2322
               Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
2323
13.2k
            case BUILD_TUPLE:
2324
13.2k
                if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
2325
23
                    switch(oparg) {
2326
11
                        case 1:
2327
11
                            INSTR_SET_OP0(inst, NOP);
2328
11
                            INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2329
11
                            continue;
2330
8
                        case 2:
2331
11
                        case 3:
2332
11
                            INSTR_SET_OP0(inst, NOP);
2333
11
                            bb->b_instr[i+1].i_opcode = SWAP;
2334
11
                            continue;
2335
23
                    }
2336
23
                }
2337
13.2k
                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2338
13.2k
                break;
2339
831
            case BUILD_LIST:
2340
3.16k
            case BUILD_SET:
2341
3.16k
                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2342
3.16k
                break;
2343
238
            case POP_JUMP_IF_NOT_NONE:
2344
544
            case POP_JUMP_IF_NONE:
2345
544
                switch (target->i_opcode) {
2346
0
                    case JUMP:
2347
0
                        i -= jump_thread(bb, inst, target, inst->i_opcode);
2348
544
                }
2349
544
                break;
2350
24.2k
            case POP_JUMP_IF_FALSE:
2351
24.2k
                switch (target->i_opcode) {
2352
26
                    case JUMP:
2353
26
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE);
2354
24.2k
                }
2355
24.2k
                break;
2356
24.2k
            case POP_JUMP_IF_TRUE:
2357
4.50k
                switch (target->i_opcode) {
2358
0
                    case JUMP:
2359
0
                        i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE);
2360
4.50k
                }
2361
4.50k
                break;
2362
4.50k
            case JUMP_IF_FALSE:
2363
1.09k
                switch (target->i_opcode) {
2364
0
                    case JUMP:
2365
0
                    case JUMP_IF_FALSE:
2366
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_FALSE);
2367
0
                        continue;
2368
133
                    case JUMP_IF_TRUE:
2369
                        // No need to check for loops here, a block's b_next
2370
                        // cannot point to itself.
2371
133
                        assert(inst->i_target != inst->i_target->b_next);
2372
133
                        inst->i_target = inst->i_target->b_next;
2373
133
                        i--;
2374
133
                        continue;
2375
1.09k
                }
2376
963
                break;
2377
963
            case JUMP_IF_TRUE:
2378
584
                switch (target->i_opcode) {
2379
0
                    case JUMP:
2380
0
                    case JUMP_IF_TRUE:
2381
0
                        i -= jump_thread(bb, inst, target, JUMP_IF_TRUE);
2382
0
                        continue;
2383
0
                    case JUMP_IF_FALSE:
2384
                        // No need to check for loops here, a block's b_next
2385
                        // cannot point to itself.
2386
0
                        assert(inst->i_target != inst->i_target->b_next);
2387
0
                        inst->i_target = inst->i_target->b_next;
2388
0
                        i--;
2389
0
                        continue;
2390
584
                }
2391
584
                break;
2392
7.20k
            case JUMP:
2393
31.6k
            case JUMP_NO_INTERRUPT:
2394
31.6k
                switch (target->i_opcode) {
2395
21
                    case JUMP:
2396
21
                        i -= jump_thread(bb, inst, target, JUMP);
2397
21
                        continue;
2398
141
                    case JUMP_NO_INTERRUPT:
2399
141
                        i -= jump_thread(bb, inst, target, opcode);
2400
141
                        continue;
2401
31.6k
                }
2402
31.4k
                break;
2403
31.4k
            case FOR_ITER:
2404
486
                if (target->i_opcode == JUMP) {
2405
                    /* This will not work now because the jump (at target) could
2406
                     * be forward or backward and FOR_ITER only jumps forward. We
2407
                     * can re-enable this if ever we implement a backward version
2408
                     * of FOR_ITER.
2409
                     */
2410
                    /*
2411
                    i -= jump_thread(bb, inst, target, FOR_ITER);
2412
                    */
2413
0
                }
2414
486
                break;
2415
23.2k
            case STORE_FAST:
2416
23.2k
                if (opcode == nextop &&
2417
8.56k
                    oparg == bb->b_instr[i+1].i_oparg &&
2418
91
                    bb->b_instr[i].i_loc.lineno == bb->b_instr[i+1].i_loc.lineno) {
2419
91
                    bb->b_instr[i].i_opcode = POP_TOP;
2420
91
                    bb->b_instr[i].i_oparg = 0;
2421
91
                }
2422
23.2k
                break;
2423
33.4k
            case SWAP:
2424
33.4k
                if (oparg == 1) {
2425
0
                    INSTR_SET_OP0(inst, NOP);
2426
0
                }
2427
33.4k
                break;
2428
33.4k
            case LOAD_GLOBAL:
2429
23.5k
                if (nextop == PUSH_NULL && (oparg & 1) == 0) {
2430
421
                    INSTR_SET_OP1(inst, LOAD_GLOBAL, oparg | 1);
2431
421
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2432
421
                }
2433
23.5k
                break;
2434
26.6k
            case COMPARE_OP:
2435
26.6k
                if (nextop == TO_BOOL) {
2436
51
                    INSTR_SET_OP0(inst, NOP);
2437
51
                    INSTR_SET_OP1(&bb->b_instr[i + 1], COMPARE_OP, oparg | 16);
2438
51
                    continue;
2439
51
                }
2440
26.6k
                break;
2441
26.6k
            case CONTAINS_OP:
2442
3.28k
            case IS_OP:
2443
3.28k
                if (nextop == TO_BOOL) {
2444
265
                    INSTR_SET_OP0(inst, NOP);
2445
265
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, oparg);
2446
265
                    continue;
2447
265
                }
2448
3.02k
                if (nextop == UNARY_NOT) {
2449
256
                    INSTR_SET_OP0(inst, NOP);
2450
256
                    int inverted = oparg ^ 1;
2451
256
                    assert(inverted == 0 || inverted == 1);
2452
256
                    INSTR_SET_OP1(&bb->b_instr[i + 1], opcode, inverted);
2453
256
                    continue;
2454
256
                }
2455
2.76k
                break;
2456
22.1k
            case TO_BOOL:
2457
22.1k
                if (nextop == TO_BOOL) {
2458
0
                    INSTR_SET_OP0(inst, NOP);
2459
0
                    continue;
2460
0
                }
2461
22.1k
                break;
2462
22.1k
            case UNARY_NOT:
2463
73
                if (nextop == TO_BOOL) {
2464
21
                    INSTR_SET_OP0(inst, NOP);
2465
21
                    INSTR_SET_OP0(&bb->b_instr[i + 1], UNARY_NOT);
2466
21
                    continue;
2467
21
                }
2468
52
                if (nextop == UNARY_NOT) {
2469
21
                    INSTR_SET_OP0(inst, NOP);
2470
21
                    INSTR_SET_OP0(&bb->b_instr[i + 1], NOP);
2471
21
                    continue;
2472
21
                }
2473
31
                _Py_FALLTHROUGH;
2474
5.13k
            case UNARY_INVERT:
2475
65.0k
            case UNARY_NEGATIVE:
2476
65.0k
                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2477
65.0k
                break;
2478
24.1k
            case CALL_INTRINSIC_1:
2479
24.1k
                if (oparg == INTRINSIC_LIST_TO_TUPLE) {
2480
493
                    if (nextop == GET_ITER) {
2481
0
                        INSTR_SET_OP0(inst, NOP);
2482
0
                    }
2483
493
                    else {
2484
493
                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2485
493
                    }
2486
493
                }
2487
23.6k
                else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488
16.9k
                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2489
16.9k
                }
2490
24.1k
                break;
2491
239k
            case BINARY_OP:
2492
239k
                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2493
239k
                break;
2494
1.57M
        }
2495
1.57M
    }
2496
2497
1.74M
    for (int i = 0; i < bb->b_iused; i++) {
2498
1.57M
        cfg_instr *inst = &bb->b_instr[i];
2499
1.57M
        if (inst->i_opcode == SWAP) {
2500
28.8k
            if (swaptimize(bb, &i) < 0) {
2501
0
                goto error;
2502
0
            }
2503
28.8k
            apply_static_swaps(bb, i);
2504
28.8k
        }
2505
1.57M
    }
2506
171k
    return SUCCESS;
2507
0
error:
2508
0
    return ERROR;
2509
171k
}
2510
2511
static int resolve_line_numbers(cfg_builder *g, int firstlineno);
2512
2513
static int
2514
remove_redundant_nops_and_jumps(cfg_builder *g)
2515
43.1k
{
2516
43.1k
    int removed_nops, removed_jumps;
2517
45.2k
    do {
2518
        /* Convergence is guaranteed because the number of
2519
         * redundant jumps and nops only decreases.
2520
         */
2521
45.2k
        removed_nops = remove_redundant_nops(g);
2522
45.2k
        RETURN_IF_ERROR(removed_nops);
2523
45.2k
        removed_jumps = remove_redundant_jumps(g);
2524
45.2k
        RETURN_IF_ERROR(removed_jumps);
2525
45.2k
    } while(removed_nops + removed_jumps > 0);
2526
43.1k
    return SUCCESS;
2527
43.1k
}
2528
2529
/* Perform optimizations on a control flow graph.
2530
   The consts object should still be in list form to allow new constants
2531
   to be appended.
2532
2533
   Code trasnformations that reduce code size initially fill the gaps with
2534
   NOPs.  Later those NOPs are removed.
2535
*/
2536
static int
2537
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2538
21.0k
{
2539
21.0k
    assert(PyDict_CheckExact(const_cache));
2540
21.0k
    RETURN_IF_ERROR(check_cfg(g));
2541
21.0k
    RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
2542
21.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2543
21.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544
21.0k
    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2545
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546
171k
        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2547
171k
    }
2548
21.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
2549
21.0k
    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
2550
21.0k
    RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
2551
21.0k
    assert(no_redundant_jumps(g));
2552
21.0k
    return SUCCESS;
2553
21.0k
}
2554
2555
static void
2556
make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op)
2557
7.62k
{
2558
7.62k
    int32_t line1 = inst1->i_loc.lineno;
2559
7.62k
    int32_t line2 = inst2->i_loc.lineno;
2560
    /* Skip if instructions are on different lines */
2561
7.62k
    if (line1 >= 0 && line2 >= 0 && line1 != line2) {
2562
38
        return;
2563
38
    }
2564
7.58k
    if (inst1->i_oparg >= 16 || inst2->i_oparg >= 16) {
2565
3.34k
        return;
2566
3.34k
    }
2567
4.24k
    INSTR_SET_OP1(inst1, super_op, (inst1->i_oparg << 4) | inst2->i_oparg);
2568
4.24k
    INSTR_SET_OP0(inst2, NOP);
2569
4.24k
}
2570
2571
static int
2572
insert_superinstructions(cfg_builder *g)
2573
21.0k
{
2574
192k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2575
2576
1.25M
        for (int i = 0; i < b->b_iused; i++) {
2577
1.08M
            cfg_instr *inst = &b->b_instr[i];
2578
1.08M
            int nextop = i+1 < b->b_iused ? b->b_instr[i+1].i_opcode : 0;
2579
1.08M
            switch(inst->i_opcode) {
2580
7.15k
                case LOAD_FAST:
2581
7.15k
                    if (nextop == LOAD_FAST) {
2582
1.21k
                        make_super_instruction(inst, &b->b_instr[i + 1], LOAD_FAST_LOAD_FAST);
2583
1.21k
                    }
2584
7.15k
                    break;
2585
20.0k
                case STORE_FAST:
2586
20.0k
                    switch (nextop) {
2587
331
                        case LOAD_FAST:
2588
331
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_LOAD_FAST);
2589
331
                            break;
2590
6.08k
                        case STORE_FAST:
2591
6.08k
                            make_super_instruction(inst, &b->b_instr[i + 1], STORE_FAST_STORE_FAST);
2592
6.08k
                            break;
2593
20.0k
                    }
2594
20.0k
                    break;
2595
1.08M
            }
2596
1.08M
        }
2597
171k
    }
2598
21.0k
    int res = remove_redundant_nops(g);
2599
21.0k
    assert(no_redundant_nops(g));
2600
21.0k
    return res;
2601
21.0k
}
2602
2603
#define NOT_LOCAL -1
2604
23.1k
#define DUMMY_INSTR -1
2605
2606
typedef struct {
2607
    // Index of instruction that produced the reference or DUMMY_INSTR.
2608
    int instr;
2609
2610
    // The local to which the reference refers or NOT_LOCAL.
2611
    int local;
2612
} ref;
2613
2614
typedef struct {
2615
    ref *refs;
2616
    Py_ssize_t size;
2617
    Py_ssize_t capacity;
2618
} ref_stack;
2619
2620
static int
2621
ref_stack_push(ref_stack *stack, ref r)
2622
949k
{
2623
949k
    if (stack->size == stack->capacity) {
2624
21.0k
        Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2);
2625
21.0k
        ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap);
2626
21.0k
        if (refs == NULL) {
2627
0
            PyErr_NoMemory();
2628
0
            return -1;
2629
0
        }
2630
21.0k
        stack->refs = refs;
2631
21.0k
        stack->capacity = new_cap;
2632
21.0k
    }
2633
949k
    stack->refs[stack->size] = r;
2634
949k
    stack->size++;
2635
949k
    return 0;
2636
949k
}
2637
2638
static ref
2639
ref_stack_pop(ref_stack *stack)
2640
617k
{
2641
617k
    assert(stack->size > 0);
2642
617k
    stack->size--;
2643
617k
    ref r = stack->refs[stack->size];
2644
617k
    return r;
2645
617k
}
2646
2647
static void
2648
ref_stack_swap_top(ref_stack *stack, Py_ssize_t off)
2649
20.8k
{
2650
20.8k
    Py_ssize_t idx = stack->size - off;
2651
20.8k
    assert(idx >= 0 && idx < stack->size);
2652
20.8k
    ref tmp = stack->refs[idx];
2653
20.8k
    stack->refs[idx] = stack->refs[stack->size - 1];
2654
20.8k
    stack->refs[stack->size - 1] = tmp;
2655
20.8k
}
2656
2657
static ref
2658
ref_stack_at(ref_stack *stack, Py_ssize_t idx)
2659
449k
{
2660
449k
    assert(idx >= 0 && idx < stack->size);
2661
449k
    return stack->refs[idx];
2662
449k
}
2663
2664
static void
2665
ref_stack_clear(ref_stack *stack)
2666
87.4k
{
2667
87.4k
    stack->size = 0;
2668
87.4k
}
2669
2670
static void
2671
ref_stack_fini(ref_stack *stack)
2672
21.0k
{
2673
21.0k
    if (stack->refs != NULL) {
2674
21.0k
        PyMem_Free(stack->refs);
2675
21.0k
    }
2676
21.0k
    stack->refs = NULL;
2677
21.0k
    stack->capacity = 0;
2678
21.0k
    stack->size = 0;
2679
21.0k
}
2680
2681
typedef enum {
2682
    // The loaded reference is still on the stack when the local is killed
2683
    SUPPORT_KILLED  = 1,
2684
    // The loaded reference is stored into a local
2685
    STORED_AS_LOCAL = 2,
2686
    // The loaded reference is still on the stack at the end of the basic block
2687
    REF_UNCONSUMED  = 4,
2688
} LoadFastInstrFlag;
2689
2690
static void
2691
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
2692
24.4k
{
2693
95.6k
    for (Py_ssize_t i = 0; i < refs->size; i++) {
2694
71.2k
        ref r = ref_stack_at(refs, i);
2695
71.2k
        if (r.local == local) {
2696
33
            assert(r.instr >= 0);
2697
33
            instr_flags[r.instr] |= SUPPORT_KILLED;
2698
33
        }
2699
71.2k
    }
2700
24.4k
}
2701
2702
static void
2703
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2704
23.1k
{
2705
23.1k
    kill_local(instr_flags, refs, local);
2706
23.1k
    if (r.instr != DUMMY_INSTR) {
2707
22.5k
        instr_flags[r.instr] |= STORED_AS_LOCAL;
2708
22.5k
    }
2709
23.1k
}
2710
2711
static void
2712
load_fast_push_block(basicblock ***sp, basicblock *target,
2713
                     Py_ssize_t start_depth)
2714
86.4k
{
2715
86.4k
    assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth);
2716
86.4k
    if (!target->b_visited) {
2717
66.4k
        target->b_visited = 1;
2718
66.4k
        *(*sp)++ = target;
2719
66.4k
    }
2720
86.4k
}
2721
2722
/*
2723
 * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that
2724
 * load borrowed references onto the operand stack.
2725
 *
2726
 * This is only safe when we can prove that the reference in the frame outlives
2727
 * the borrowed reference produced by the instruction. We make this tractable
2728
 * by enforcing the following lifetimes:
2729
 *
2730
 * 1. Borrowed references loaded onto the operand stack live until the end of
2731
 *    the instruction that consumes them from the stack. Any borrowed
2732
 *    references that would escape into the heap (e.g. into frame objects or
2733
 *    generators) are converted into new, strong references.
2734
 *
2735
 * 2. Locals live until they are either killed by an instruction
2736
 *    (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten
2737
 *    via `f_locals` is added to a tuple owned by the frame object.
2738
 *
2739
 * To simplify the problem of detecting which supporting references in the
2740
 * frame are killed by instructions that overwrite locals, we only allow
2741
 * borrowed references to be stored as a local in the frame if they were passed
2742
 * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new,
2743
 * strong references.
2744
 *
2745
 * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions
2746
 * that meet the following criteria:
2747
 *
2748
 * 1. The produced reference must be consumed from the stack before the
2749
 *    supporting reference in the frame is killed.
2750
 *
2751
 * 2. The produced reference cannot be stored as a local.
2752
 *
2753
 * We use abstract interpretation to identify instructions that meet these
2754
 * criteria. For each basic block, we simulate the effect the bytecode has on a
2755
 * stack of abstract references and note any instructions that violate the
2756
 * criteria above. Once we've processed all the instructions in a block, any
2757
 * non-violating LOAD_FAST{_LOAD_FAST} can be optimized.
2758
 */
2759
static int
2760
optimize_load_fast(cfg_builder *g)
2761
21.0k
{
2762
21.0k
    int status;
2763
21.0k
    ref_stack refs = {0};
2764
21.0k
    int max_instrs = 0;
2765
21.0k
    basicblock *entryblock = g->g_entryblock;
2766
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
2767
178k
        max_instrs = Py_MAX(max_instrs, b->b_iused);
2768
178k
    }
2769
21.0k
    size_t instr_flags_size = max_instrs * sizeof(uint8_t);
2770
21.0k
    uint8_t *instr_flags = PyMem_Malloc(instr_flags_size);
2771
21.0k
    if (instr_flags == NULL) {
2772
0
        PyErr_NoMemory();
2773
0
        return ERROR;
2774
0
    }
2775
21.0k
    basicblock **blocks = make_cfg_traversal_stack(entryblock);
2776
21.0k
    if (blocks == NULL) {
2777
0
        status = ERROR;
2778
0
        goto done;
2779
0
    }
2780
21.0k
    basicblock **sp = blocks;
2781
21.0k
    *sp = entryblock;
2782
21.0k
    sp++;
2783
21.0k
    entryblock->b_startdepth = 0;
2784
21.0k
    entryblock->b_visited = 1;
2785
2786
21.0k
    #define PUSH_REF(instr, local)                \
2787
949k
        do {                                      \
2788
949k
            if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \
2789
0
                status = ERROR;                   \
2790
0
                goto done;                        \
2791
0
            }                                     \
2792
949k
        } while(0)
2793
2794
108k
    while (sp != blocks) {
2795
87.4k
        basicblock *block = *--sp;
2796
87.4k
        assert(block->b_startdepth > -1);
2797
2798
        // Reset per-block state.
2799
87.4k
        memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags));
2800
2801
        // Reset the stack of refs. We don't track references on the stack
2802
        // across basic blocks, but the bytecode will expect their
2803
        // presence. Add dummy references as necessary.
2804
87.4k
        ref_stack_clear(&refs);
2805
401k
        for (int i = 0; i < block->b_startdepth; i++) {
2806
313k
            PUSH_REF(DUMMY_INSTR, NOT_LOCAL);
2807
313k
        }
2808
2809
952k
        for (int i = 0; i < block->b_iused; i++) {
2810
864k
            cfg_instr *instr = &block->b_instr[i];
2811
864k
            int opcode = instr->i_opcode;
2812
864k
            int oparg = instr->i_oparg;
2813
864k
            assert(opcode != EXTENDED_ARG);
2814
864k
            switch (opcode) {
2815
                // Opcodes that load and store locals
2816
888
                case DELETE_FAST: {
2817
888
                    kill_local(instr_flags, &refs, oparg);
2818
888
                    break;
2819
0
                }
2820
2821
13.3k
                case LOAD_FAST: {
2822
13.3k
                    PUSH_REF(i, oparg);
2823
13.3k
                    break;
2824
13.3k
                }
2825
2826
13.3k
                case LOAD_FAST_AND_CLEAR: {
2827
357
                    kill_local(instr_flags, &refs, oparg);
2828
357
                    PUSH_REF(i, oparg);
2829
357
                    break;
2830
357
                }
2831
2832
1.00k
                case LOAD_FAST_LOAD_FAST: {
2833
1.00k
                    PUSH_REF(i, oparg >> 4);
2834
1.00k
                    PUSH_REF(i, oparg & 15);
2835
1.00k
                    break;
2836
1.00k
                }
2837
2838
16.8k
                case STORE_FAST: {
2839
16.8k
                    ref r = ref_stack_pop(&refs);
2840
16.8k
                    store_local(instr_flags, &refs, oparg, r);
2841
16.8k
                    break;
2842
1.00k
                }
2843
2844
148
                case STORE_FAST_LOAD_FAST: {
2845
                    // STORE_FAST
2846
148
                    ref r = ref_stack_pop(&refs);
2847
148
                    store_local(instr_flags, &refs, oparg >> 4, r);
2848
                    // LOAD_FAST
2849
148
                    PUSH_REF(i, oparg & 15);
2850
148
                    break;
2851
148
                }
2852
2853
3.09k
                case STORE_FAST_STORE_FAST: {
2854
                    // STORE_FAST
2855
3.09k
                    ref r = ref_stack_pop(&refs);
2856
3.09k
                    store_local(instr_flags, &refs, oparg >> 4, r);
2857
                    // STORE_FAST
2858
3.09k
                    r = ref_stack_pop(&refs);
2859
3.09k
                    store_local(instr_flags, &refs, oparg & 15, r);
2860
3.09k
                    break;
2861
148
                }
2862
2863
                // Opcodes that shuffle values on the stack
2864
45.9k
                case COPY: {
2865
45.9k
                    assert(oparg > 0);
2866
45.9k
                    Py_ssize_t idx = refs.size - oparg;
2867
45.9k
                    ref r = ref_stack_at(&refs, idx);
2868
45.9k
                    PUSH_REF(r.instr, r.local);
2869
45.9k
                    break;
2870
45.9k
                }
2871
2872
45.9k
                case SWAP: {
2873
20.8k
                    assert(oparg >= 2);
2874
20.8k
                    ref_stack_swap_top(&refs, oparg);
2875
20.8k
                    break;
2876
20.8k
                }
2877
2878
                // We treat opcodes that do not consume all of their inputs on
2879
                // a case by case basis, as we have no generic way of knowing
2880
                // how many inputs should be left on the stack.
2881
2882
                // Opcodes that consume no inputs
2883
161
                case FORMAT_SIMPLE:
2884
281
                case GET_ANEXT:
2885
753
                case GET_ITER:
2886
800
                case GET_LEN:
2887
1.99k
                case IMPORT_FROM:
2888
1.99k
                case MATCH_KEYS:
2889
1.99k
                case MATCH_MAPPING:
2890
2.03k
                case MATCH_SEQUENCE:
2891
2.03k
                case WITH_EXCEPT_START: {
2892
2.03k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2893
2.03k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2894
2.03k
                    int net_pushed = num_pushed - num_popped;
2895
2.03k
                    assert(net_pushed >= 0);
2896
3.91k
                    for (int j = 0; j < net_pushed; j++) {
2897
1.87k
                        PUSH_REF(i, NOT_LOCAL);
2898
1.87k
                    }
2899
2.03k
                    break;
2900
2.03k
                }
2901
2902
                // Opcodes that consume some inputs and push no new values
2903
2.03k
                case DICT_MERGE:
2904
35
                case DICT_UPDATE:
2905
6.33k
                case LIST_APPEND:
2906
6.65k
                case LIST_EXTEND:
2907
6.66k
                case MAP_ADD:
2908
6.66k
                case RERAISE:
2909
8.76k
                case SET_ADD:
2910
8.82k
                case SET_UPDATE: {
2911
8.82k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2912
8.82k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2913
8.82k
                    int net_popped = num_popped - num_pushed;
2914
8.82k
                    assert(net_popped > 0);
2915
17.6k
                    for (int i = 0; i < net_popped; i++) {
2916
8.83k
                        ref_stack_pop(&refs);
2917
8.83k
                    }
2918
8.82k
                    break;
2919
8.82k
                }
2920
2921
7.17k
                case END_SEND: {
2922
7.17k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 3);
2923
7.17k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2924
7.17k
                    ref tos = ref_stack_pop(&refs);
2925
7.17k
                    ref_stack_pop(&refs);
2926
7.17k
                    ref_stack_pop(&refs);
2927
7.17k
                    PUSH_REF(tos.instr, tos.local);
2928
7.17k
                    break;
2929
7.17k
                }
2930
2931
7.17k
                case SET_FUNCTION_ATTRIBUTE: {
2932
5.07k
                    assert(_PyOpcode_num_popped(opcode, oparg) == 2);
2933
5.07k
                    assert(_PyOpcode_num_pushed(opcode, oparg) == 1);
2934
5.07k
                    ref tos = ref_stack_pop(&refs);
2935
5.07k
                    ref_stack_pop(&refs);
2936
5.07k
                    PUSH_REF(tos.instr, tos.local);
2937
5.07k
                    break;
2938
5.07k
                }
2939
2940
                // Opcodes that consume some inputs and push new values
2941
5.07k
                case CHECK_EXC_MATCH: {
2942
0
                    ref_stack_pop(&refs);
2943
0
                    PUSH_REF(i, NOT_LOCAL);
2944
0
                    break;
2945
0
                }
2946
2947
472
                case FOR_ITER: {
2948
472
                    load_fast_push_block(&sp, instr->i_target, refs.size + 1);
2949
472
                    PUSH_REF(i, NOT_LOCAL);
2950
472
                    break;
2951
472
                }
2952
2953
2.91k
                case LOAD_ATTR:
2954
3.09k
                case LOAD_SUPER_ATTR: {
2955
3.09k
                    ref self = ref_stack_pop(&refs);
2956
3.09k
                    if (opcode == LOAD_SUPER_ATTR) {
2957
183
                        ref_stack_pop(&refs);
2958
183
                        ref_stack_pop(&refs);
2959
183
                    }
2960
3.09k
                    PUSH_REF(i, NOT_LOCAL);
2961
3.09k
                    if (oparg & 1) {
2962
                        // A method call; conservatively assume that self is pushed
2963
                        // back onto the stack
2964
830
                        PUSH_REF(self.instr, self.local);
2965
830
                    }
2966
3.09k
                    break;
2967
3.09k
                }
2968
2969
7.07k
                case LOAD_SPECIAL:
2970
7.07k
                case PUSH_EXC_INFO: {
2971
7.07k
                    ref tos = ref_stack_pop(&refs);
2972
7.07k
                    PUSH_REF(i, NOT_LOCAL);
2973
7.07k
                    PUSH_REF(tos.instr, tos.local);
2974
7.07k
                    break;
2975
7.07k
                }
2976
2977
7.17k
                case SEND: {
2978
7.17k
                    load_fast_push_block(&sp, instr->i_target, refs.size);
2979
7.17k
                    ref_stack_pop(&refs);
2980
7.17k
                    PUSH_REF(i, NOT_LOCAL);
2981
7.17k
                    break;
2982
7.17k
                }
2983
2984
                // Opcodes that consume all of their inputs
2985
721k
                default: {
2986
721k
                    int num_popped = _PyOpcode_num_popped(opcode, oparg);
2987
721k
                    int num_pushed = _PyOpcode_num_pushed(opcode, oparg);
2988
721k
                    if (HAS_TARGET(instr->i_opcode)) {
2989
28.1k
                        load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed);
2990
28.1k
                    }
2991
721k
                    if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) {
2992
                        // Block push opcodes only affect the stack when jumping
2993
                        // to the target.
2994
1.25M
                        for (int j = 0; j < num_popped; j++) {
2995
536k
                            ref_stack_pop(&refs);
2996
536k
                        }
2997
1.25M
                        for (int j = 0; j < num_pushed; j++) {
2998
534k
                            PUSH_REF(i, NOT_LOCAL);
2999
534k
                        }
3000
721k
                    }
3001
721k
                    break;
3002
721k
                }
3003
864k
            }
3004
864k
        }
3005
3006
        // Push fallthrough block
3007
87.4k
        if (BB_HAS_FALLTHROUGH(block)) {
3008
50.6k
            assert(block->b_next != NULL);
3009
50.6k
            load_fast_push_block(&sp, block->b_next, refs.size);
3010
50.6k
        }
3011
3012
        // Mark instructions that produce values that are on the stack at the
3013
        // end of the basic block
3014
419k
        for (Py_ssize_t i = 0; i < refs.size; i++) {
3015
332k
            ref r = ref_stack_at(&refs, i);
3016
332k
            if (r.instr != -1) {
3017
89.1k
                instr_flags[r.instr] |= REF_UNCONSUMED;
3018
89.1k
            }
3019
332k
        }
3020
3021
        // Optimize instructions
3022
952k
        for (int i = 0; i < block->b_iused; i++) {
3023
864k
            if (!instr_flags[i]) {
3024
765k
                cfg_instr *instr = &block->b_instr[i];
3025
765k
                switch (instr->i_opcode) {
3026
13.3k
                    case LOAD_FAST:
3027
13.3k
                        instr->i_opcode = LOAD_FAST_BORROW;
3028
13.3k
                        break;
3029
1.00k
                    case LOAD_FAST_LOAD_FAST:
3030
1.00k
                        instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW;
3031
1.00k
                        break;
3032
750k
                    default:
3033
750k
                        break;
3034
765k
                }
3035
765k
            }
3036
864k
        }
3037
87.4k
    }
3038
3039
21.0k
    #undef PUSH_REF
3040
3041
21.0k
    status = SUCCESS;
3042
3043
21.0k
done:
3044
21.0k
    ref_stack_fini(&refs);
3045
21.0k
    PyMem_Free(instr_flags);
3046
21.0k
    PyMem_Free(blocks);
3047
21.0k
    return status;
3048
21.0k
}
3049
3050
// helper functions for add_checks_for_loads_of_unknown_variables
3051
static inline void
3052
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
3053
482k
{
3054
    // Push b if the unsafe mask is giving us any new information.
3055
    // To avoid overflowing the stack, only allow each block once.
3056
    // Use b->b_visited=1 to mean that b is currently on the stack.
3057
482k
    uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
3058
482k
    if (b->b_unsafe_locals_mask != both) {
3059
14.5k
        b->b_unsafe_locals_mask = both;
3060
        // More work left to do.
3061
14.5k
        if (!b->b_visited) {
3062
            // not on the stack, so push it.
3063
14.5k
            *(*sp)++ = b;
3064
14.5k
            b->b_visited = 1;
3065
14.5k
        }
3066
14.5k
    }
3067
482k
}
3068
3069
static void
3070
scan_block_for_locals(basicblock *b, basicblock ***sp)
3071
129k
{
3072
    // bit i is set if local i is potentially uninitialized
3073
129k
    uint64_t unsafe_mask = b->b_unsafe_locals_mask;
3074
786k
    for (int i = 0; i < b->b_iused; i++) {
3075
656k
        cfg_instr *instr = &b->b_instr[i];
3076
656k
        assert(instr->i_opcode != EXTENDED_ARG);
3077
656k
        if (instr->i_except != NULL) {
3078
349k
            maybe_push(instr->i_except, unsafe_mask, sp);
3079
349k
        }
3080
656k
        if (instr->i_oparg >= 64) {
3081
80.6k
            continue;
3082
80.6k
        }
3083
656k
        assert(instr->i_oparg >= 0);
3084
576k
        uint64_t bit = (uint64_t)1 << instr->i_oparg;
3085
576k
        switch (instr->i_opcode) {
3086
1.71k
            case DELETE_FAST:
3087
2.70k
            case LOAD_FAST_AND_CLEAR:
3088
4.67k
            case STORE_FAST_MAYBE_NULL:
3089
4.67k
                unsafe_mask |= bit;
3090
4.67k
                break;
3091
46.1k
            case STORE_FAST:
3092
46.1k
                unsafe_mask &= ~bit;
3093
46.1k
                break;
3094
1.08k
            case LOAD_FAST_CHECK:
3095
                // If this doesn't raise, then the local is defined.
3096
1.08k
                unsafe_mask &= ~bit;
3097
1.08k
                break;
3098
12.9k
            case LOAD_FAST:
3099
12.9k
                if (unsafe_mask & bit) {
3100
1.08k
                    instr->i_opcode = LOAD_FAST_CHECK;
3101
1.08k
                }
3102
12.9k
                unsafe_mask &= ~bit;
3103
12.9k
                break;
3104
576k
        }
3105
576k
    }
3106
129k
    if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
3107
67.0k
        maybe_push(b->b_next, unsafe_mask, sp);
3108
67.0k
    }
3109
129k
    cfg_instr *last = basicblock_last_instr(b);
3110
129k
    if (last && is_jump(last)) {
3111
57.4k
        assert(last->i_target != NULL);
3112
57.4k
        maybe_push(last->i_target, unsafe_mask, sp);
3113
57.4k
    }
3114
129k
}
3115
3116
static int
3117
fast_scan_many_locals(basicblock *entryblock, int nlocals)
3118
20
{
3119
20
    assert(nlocals > 64);
3120
20
    Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
3121
20
    if (states == NULL) {
3122
0
        PyErr_NoMemory();
3123
0
        return ERROR;
3124
0
    }
3125
20
    Py_ssize_t blocknum = 0;
3126
    // state[i - 64] == blocknum if local i is guaranteed to
3127
    // be initialized, i.e., if it has had a previous LOAD_FAST or
3128
    // STORE_FAST within that basicblock (not followed by
3129
    // DELETE_FAST/LOAD_FAST_AND_CLEAR/STORE_FAST_MAYBE_NULL).
3130
109
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3131
89
        blocknum++;
3132
10.3k
        for (int i = 0; i < b->b_iused; i++) {
3133
10.2k
            cfg_instr *instr = &b->b_instr[i];
3134
10.2k
            assert(instr->i_opcode != EXTENDED_ARG);
3135
10.2k
            int arg = instr->i_oparg;
3136
10.2k
            if (arg < 64) {
3137
10.1k
                continue;
3138
10.1k
            }
3139
10.2k
            assert(arg >= 0);
3140
121
            switch (instr->i_opcode) {
3141
29
                case DELETE_FAST:
3142
29
                case LOAD_FAST_AND_CLEAR:
3143
29
                case STORE_FAST_MAYBE_NULL:
3144
29
                    states[arg - 64] = blocknum - 1;
3145
29
                    break;
3146
32
                case STORE_FAST:
3147
32
                    states[arg - 64] = blocknum;
3148
32
                    break;
3149
16
                case LOAD_FAST:
3150
16
                    if (states[arg - 64] != blocknum) {
3151
8
                        instr->i_opcode = LOAD_FAST_CHECK;
3152
8
                    }
3153
16
                    states[arg - 64] = blocknum;
3154
16
                    break;
3155
121
                    Py_UNREACHABLE();
3156
121
            }
3157
121
        }
3158
89
    }
3159
20
    PyMem_Free(states);
3160
20
    return SUCCESS;
3161
20
}
3162
3163
static int
3164
remove_unused_consts(basicblock *entryblock, PyObject *consts)
3165
21.0k
{
3166
21.0k
    assert(PyList_CheckExact(consts));
3167
21.0k
    Py_ssize_t nconsts = PyList_GET_SIZE(consts);
3168
21.0k
    if (nconsts == 0) {
3169
27
        return SUCCESS;  /* nothing to do */
3170
27
    }
3171
3172
20.9k
    Py_ssize_t *index_map = NULL;
3173
20.9k
    Py_ssize_t *reverse_index_map = NULL;
3174
20.9k
    int err = ERROR;
3175
3176
20.9k
    index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3177
20.9k
    if (index_map == NULL) {
3178
0
        goto end;
3179
0
    }
3180
167k
    for (Py_ssize_t i = 1; i < nconsts; i++) {
3181
146k
        index_map[i] = -1;
3182
146k
    }
3183
    // The first constant may be docstring; keep it always.
3184
20.9k
    index_map[0] = 0;
3185
3186
    /* mark used consts */
3187
192k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3188
1.25M
        for (int i = 0; i < b->b_iused; i++) {
3189
1.08M
            int opcode = b->b_instr[i].i_opcode;
3190
1.08M
            if (OPCODE_HAS_CONST(opcode)) {
3191
157k
                int index = b->b_instr[i].i_oparg;
3192
157k
                index_map[index] = index;
3193
157k
            }
3194
1.08M
        }
3195
171k
    }
3196
    /* now index_map[i] == i if consts[i] is used, -1 otherwise */
3197
    /* condense consts */
3198
20.9k
    Py_ssize_t n_used_consts = 0;
3199
188k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3200
167k
        if (index_map[i] != -1) {
3201
80.7k
            assert(index_map[i] == i);
3202
80.7k
            index_map[n_used_consts++] = index_map[i];
3203
80.7k
        }
3204
167k
    }
3205
20.9k
    if (n_used_consts == nconsts) {
3206
        /* nothing to do */
3207
9.07k
        err = SUCCESS;
3208
9.07k
        goto end;
3209
9.07k
    }
3210
3211
    /* move all used consts to the beginning of the consts list */
3212
20.9k
    assert(n_used_consts < nconsts);
3213
65.1k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3214
53.2k
        Py_ssize_t old_index = index_map[i];
3215
53.2k
        assert(i <= old_index && old_index < nconsts);
3216
53.2k
        if (i != old_index) {
3217
33.5k
            PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
3218
33.5k
            assert(value != NULL);
3219
33.5k
            PyList_SetItem(consts, i, Py_NewRef(value));
3220
33.5k
        }
3221
53.2k
    }
3222
3223
    /* truncate the consts list at its new size */
3224
11.9k
    if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
3225
0
        goto end;
3226
0
    }
3227
    /* adjust const indices in the bytecode */
3228
11.9k
    reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
3229
11.9k
    if (reverse_index_map == NULL) {
3230
0
        goto end;
3231
0
    }
3232
152k
    for (Py_ssize_t i = 0; i < nconsts; i++) {
3233
140k
        reverse_index_map[i] = -1;
3234
140k
    }
3235
65.1k
    for (Py_ssize_t i = 0; i < n_used_consts; i++) {
3236
53.2k
        assert(index_map[i] != -1);
3237
53.2k
        assert(reverse_index_map[index_map[i]] == -1);
3238
53.2k
        reverse_index_map[index_map[i]] = i;
3239
53.2k
    }
3240
3241
99.9k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3242
748k
        for (int i = 0; i < b->b_iused; i++) {
3243
660k
            int opcode = b->b_instr[i].i_opcode;
3244
660k
            if (OPCODE_HAS_CONST(opcode)) {
3245
99.2k
                int index = b->b_instr[i].i_oparg;
3246
99.2k
                assert(reverse_index_map[index] >= 0);
3247
99.2k
                assert(reverse_index_map[index] < n_used_consts);
3248
99.2k
                b->b_instr[i].i_oparg = (int)reverse_index_map[index];
3249
99.2k
            }
3250
660k
        }
3251
88.0k
    }
3252
3253
11.9k
    err = SUCCESS;
3254
20.9k
end:
3255
20.9k
    PyMem_Free(index_map);
3256
20.9k
    PyMem_Free(reverse_index_map);
3257
20.9k
    return err;
3258
11.9k
}
3259
3260
3261
3262
static int
3263
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
3264
                                                int nlocals,
3265
                                                int nparams)
3266
21.0k
{
3267
21.0k
    if (nlocals == 0) {
3268
12.3k
        return SUCCESS;
3269
12.3k
    }
3270
8.65k
    if (nlocals > 64) {
3271
        // To avoid O(nlocals**2) compilation, locals beyond the first
3272
        // 64 are only analyzed one basicblock at a time: initialization
3273
        // info is not passed between basicblocks.
3274
20
        if (fast_scan_many_locals(entryblock, nlocals) < 0) {
3275
0
            return ERROR;
3276
0
        }
3277
20
        nlocals = 64;
3278
20
    }
3279
8.65k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3280
8.65k
    if (stack == NULL) {
3281
0
        return ERROR;
3282
0
    }
3283
8.65k
    basicblock **sp = stack;
3284
3285
    // First origin of being uninitialized:
3286
    // The non-parameter locals in the entry block.
3287
8.65k
    uint64_t start_mask = 0;
3288
24.0k
    for (int i = nparams; i < nlocals; i++) {
3289
15.3k
        start_mask |= (uint64_t)1 << i;
3290
15.3k
    }
3291
8.65k
    maybe_push(entryblock, start_mask, &sp);
3292
3293
    // Second origin of being uninitialized:
3294
    // There could be DELETE_FAST somewhere, so
3295
    // be sure to scan each basicblock at least once.
3296
123k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3297
114k
        scan_block_for_locals(b, &sp);
3298
114k
    }
3299
    // Now propagate the uncertainty from the origins we found: Use
3300
    // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
3301
23.1k
    while (sp > stack) {
3302
14.5k
        basicblock *b = *--sp;
3303
        // mark as no longer on stack
3304
14.5k
        b->b_visited = 0;
3305
14.5k
        scan_block_for_locals(b, &sp);
3306
14.5k
    }
3307
8.65k
    PyMem_Free(stack);
3308
8.65k
    return SUCCESS;
3309
8.65k
}
3310
3311
3312
static int
3313
11.9k
mark_warm(basicblock *entryblock) {
3314
11.9k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3315
11.9k
    if (stack == NULL) {
3316
0
        return ERROR;
3317
0
    }
3318
11.9k
    basicblock **sp = stack;
3319
3320
11.9k
    *sp++ = entryblock;
3321
11.9k
    entryblock->b_visited = 1;
3322
85.8k
    while (sp > stack) {
3323
73.8k
        basicblock *b = *(--sp);
3324
73.8k
        assert(!b->b_except_handler);
3325
73.8k
        b->b_warm = 1;
3326
73.8k
        basicblock *next = b->b_next;
3327
73.8k
        if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) {
3328
40.4k
            *sp++ = next;
3329
40.4k
            next->b_visited = 1;
3330
40.4k
        }
3331
652k
        for (int i=0; i < b->b_iused; i++) {
3332
578k
            cfg_instr *instr = &b->b_instr[i];
3333
578k
            if (is_jump(instr) && !instr->i_target->b_visited) {
3334
21.3k
                *sp++ = instr->i_target;
3335
21.3k
                instr->i_target->b_visited = 1;
3336
21.3k
            }
3337
578k
        }
3338
73.8k
    }
3339
11.9k
    PyMem_Free(stack);
3340
11.9k
    return SUCCESS;
3341
11.9k
}
3342
3343
static int
3344
11.9k
mark_cold(basicblock *entryblock) {
3345
174k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3346
162k
        assert(!b->b_cold && !b->b_warm);
3347
162k
    }
3348
11.9k
    if (mark_warm(entryblock) < 0) {
3349
0
        return ERROR;
3350
0
    }
3351
3352
11.9k
    basicblock **stack = make_cfg_traversal_stack(entryblock);
3353
11.9k
    if (stack == NULL) {
3354
0
        return ERROR;
3355
0
    }
3356
3357
11.9k
    basicblock **sp = stack;
3358
174k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3359
162k
        if (b->b_except_handler) {
3360
27.5k
            assert(!b->b_warm);
3361
27.5k
            *sp++ = b;
3362
27.5k
            b->b_visited = 1;
3363
27.5k
        }
3364
162k
    }
3365
3366
83.2k
    while (sp > stack) {
3367
71.3k
        basicblock *b = *(--sp);
3368
71.3k
        b->b_cold = 1;
3369
71.3k
        basicblock *next = b->b_next;
3370
71.3k
        if (next && BB_HAS_FALLTHROUGH(b)) {
3371
43.2k
            if (!next->b_warm && !next->b_visited) {
3372
35.3k
                *sp++ = next;
3373
35.3k
                next->b_visited = 1;
3374
35.3k
            }
3375
43.2k
        }
3376
299k
        for (int i = 0; i < b->b_iused; i++) {
3377
227k
            cfg_instr *instr = &b->b_instr[i];
3378
227k
            if (is_jump(instr)) {
3379
30.0k
                assert(i == b->b_iused - 1);
3380
30.0k
                basicblock *target = b->b_instr[i].i_target;
3381
30.0k
                if (!target->b_warm && !target->b_visited) {
3382
8.37k
                    *sp++ = target;
3383
8.37k
                    target->b_visited = 1;
3384
8.37k
                }
3385
30.0k
            }
3386
227k
        }
3387
71.3k
    }
3388
11.9k
    PyMem_Free(stack);
3389
11.9k
    return SUCCESS;
3390
11.9k
}
3391
3392
3393
static int
3394
21.0k
push_cold_blocks_to_end(cfg_builder *g) {
3395
21.0k
    basicblock *entryblock = g->g_entryblock;
3396
21.0k
    if (entryblock->b_next == NULL) {
3397
        /* single basicblock, no need to reorder */
3398
9.03k
        return SUCCESS;
3399
9.03k
    }
3400
11.9k
    RETURN_IF_ERROR(mark_cold(entryblock));
3401
3402
11.9k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3403
3404
    /* If we have a cold block with fallthrough to a warm block, add */
3405
    /* an explicit jump instead of fallthrough */
3406
181k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3407
169k
        if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) {
3408
7.17k
            basicblock *explicit_jump = cfg_builder_new_block(g);
3409
7.17k
            if (explicit_jump == NULL) {
3410
0
                return ERROR;
3411
0
            }
3412
7.17k
            if (!IS_LABEL(b->b_next->b_label)) {
3413
0
                b->b_next->b_label.id = next_lbl++;
3414
0
            }
3415
7.17k
            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
3416
7.17k
                             NO_LOCATION);
3417
7.17k
            explicit_jump->b_cold = 1;
3418
7.17k
            explicit_jump->b_next = b->b_next;
3419
7.17k
            explicit_jump->b_predecessors = 1;
3420
7.17k
            b->b_next = explicit_jump;
3421
3422
            /* set target */
3423
7.17k
            cfg_instr *last = basicblock_last_instr(explicit_jump);
3424
7.17k
            last->i_target = explicit_jump->b_next;
3425
7.17k
        }
3426
169k
    }
3427
3428
11.9k
    assert(!entryblock->b_cold);  /* First block can't be cold */
3429
11.9k
    basicblock *cold_blocks = NULL;
3430
11.9k
    basicblock *cold_blocks_tail = NULL;
3431
3432
11.9k
    basicblock *b = entryblock;
3433
24.4k
    while(b->b_next) {
3434
24.4k
        assert(!b->b_cold);
3435
103k
        while (b->b_next && !b->b_next->b_cold) {
3436
78.7k
            b = b->b_next;
3437
78.7k
        }
3438
24.4k
        if (b->b_next == NULL) {
3439
            /* no more cold blocks */
3440
11.9k
            break;
3441
11.9k
        }
3442
3443
        /* b->b_next is the beginning of a cold streak */
3444
24.4k
        assert(!b->b_cold && b->b_next->b_cold);
3445
3446
12.4k
        basicblock *b_end = b->b_next;
3447
78.4k
        while (b_end->b_next && b_end->b_next->b_cold) {
3448
65.9k
            b_end = b_end->b_next;
3449
65.9k
        }
3450
3451
        /* b_end is the end of the cold streak */
3452
12.4k
        assert(b_end && b_end->b_cold);
3453
12.4k
        assert(b_end->b_next == NULL || !b_end->b_next->b_cold);
3454
3455
12.4k
        if (cold_blocks == NULL) {
3456
1.14k
            cold_blocks = b->b_next;
3457
1.14k
        }
3458
11.3k
        else {
3459
11.3k
            cold_blocks_tail->b_next = b->b_next;
3460
11.3k
        }
3461
12.4k
        cold_blocks_tail = b_end;
3462
12.4k
        b->b_next = b_end->b_next;
3463
12.4k
        b_end->b_next = NULL;
3464
12.4k
    }
3465
11.9k
    assert(b != NULL && b->b_next == NULL);
3466
11.9k
    b->b_next = cold_blocks;
3467
3468
11.9k
    if (cold_blocks != NULL) {
3469
1.14k
        RETURN_IF_ERROR(remove_redundant_nops_and_jumps(g));
3470
1.14k
    }
3471
11.9k
    return SUCCESS;
3472
11.9k
}
3473
3474
static int
3475
convert_pseudo_conditional_jumps(cfg_builder *g)
3476
21.0k
{
3477
21.0k
    basicblock *entryblock = g->g_entryblock;
3478
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3479
1.26M
        for (int i = 0; i < b->b_iused; i++) {
3480
1.08M
            cfg_instr *instr = &b->b_instr[i];
3481
1.08M
            if (instr->i_opcode == JUMP_IF_FALSE || instr->i_opcode == JUMP_IF_TRUE) {
3482
1.43k
                assert(i == b->b_iused - 1);
3483
1.43k
                instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ?
3484
963
                                          POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE;
3485
1.43k
                location loc = instr->i_loc;
3486
1.43k
                basicblock *except = instr->i_except;
3487
1.43k
                cfg_instr copy = {
3488
1.43k
                            .i_opcode = COPY,
3489
1.43k
                            .i_oparg = 1,
3490
1.43k
                            .i_loc = loc,
3491
1.43k
                            .i_target = NULL,
3492
1.43k
                            .i_except = except,
3493
1.43k
                };
3494
1.43k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy));
3495
1.43k
                cfg_instr to_bool = {
3496
1.43k
                            .i_opcode = TO_BOOL,
3497
1.43k
                            .i_oparg = 0,
3498
1.43k
                            .i_loc = loc,
3499
1.43k
                            .i_target = NULL,
3500
1.43k
                            .i_except = except,
3501
1.43k
                };
3502
1.43k
                RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool));
3503
1.43k
            }
3504
1.08M
        }
3505
178k
    }
3506
21.0k
    return SUCCESS;
3507
21.0k
}
3508
3509
static int
3510
convert_pseudo_ops(cfg_builder *g)
3511
21.0k
{
3512
21.0k
    basicblock *entryblock = g->g_entryblock;
3513
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3514
1.27M
        for (int i = 0; i < b->b_iused; i++) {
3515
1.09M
            cfg_instr *instr = &b->b_instr[i];
3516
1.09M
            if (is_block_push(instr)) {
3517
27.5k
                INSTR_SET_OP0(instr, NOP);
3518
27.5k
            }
3519
1.06M
            else if (instr->i_opcode == LOAD_CLOSURE) {
3520
7.31k
                assert(is_pseudo_target(LOAD_CLOSURE, LOAD_FAST));
3521
7.31k
                instr->i_opcode = LOAD_FAST;
3522
7.31k
            }
3523
1.05M
            else if (instr->i_opcode == STORE_FAST_MAYBE_NULL) {
3524
986
                assert(is_pseudo_target(STORE_FAST_MAYBE_NULL, STORE_FAST));
3525
986
                instr->i_opcode = STORE_FAST;
3526
986
            }
3527
1.09M
        }
3528
178k
    }
3529
21.0k
    return remove_redundant_nops_and_jumps(g);
3530
21.0k
}
3531
3532
static inline bool
3533
322k
is_exit_or_eval_check_without_lineno(basicblock *b) {
3534
322k
    if (basicblock_exits_scope(b) || basicblock_has_eval_break(b)) {
3535
128k
        return basicblock_has_no_lineno(b);
3536
128k
    }
3537
193k
    else {
3538
193k
        return false;
3539
193k
    }
3540
322k
}
3541
3542
3543
/* PEP 626 mandates that the f_lineno of a frame is correct
3544
 * after a frame terminates. It would be prohibitively expensive
3545
 * to continuously update the f_lineno field at runtime,
3546
 * so we make sure that all exiting instruction (raises and returns)
3547
 * have a valid line number, allowing us to compute f_lineno lazily.
3548
 * We can do this by duplicating the exit blocks without line number
3549
 * so that none have more than one predecessor. We can then safely
3550
 * copy the line number from the sole predecessor block.
3551
 */
3552
static int
3553
duplicate_exits_without_lineno(cfg_builder *g)
3554
42.0k
{
3555
42.0k
    int next_lbl = get_max_label(g->g_entryblock) + 1;
3556
3557
    /* Copy all exit blocks without line number that are targets of a jump.
3558
     */
3559
42.0k
    basicblock *entryblock = g->g_entryblock;
3560
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3561
349k
        cfg_instr *last = basicblock_last_instr(b);
3562
349k
        if (last == NULL) {
3563
36.2k
            continue;
3564
36.2k
        }
3565
313k
        if (is_jump(last)) {
3566
151k
            basicblock *target = next_nonempty_block(last->i_target);
3567
151k
            if (is_exit_or_eval_check_without_lineno(target) && target->b_predecessors > 1) {
3568
936
                basicblock *new_target = copy_basicblock(g, target);
3569
936
                if (new_target == NULL) {
3570
0
                    return ERROR;
3571
0
                }
3572
936
                new_target->b_instr[0].i_loc = last->i_loc;
3573
936
                last->i_target = new_target;
3574
936
                target->b_predecessors--;
3575
936
                new_target->b_predecessors = 1;
3576
936
                new_target->b_next = target->b_next;
3577
936
                new_target->b_label.id = next_lbl++;
3578
936
                target->b_next = new_target;
3579
936
            }
3580
151k
        }
3581
313k
    }
3582
3583
    /* Any remaining reachable exit blocks without line number can only be reached by
3584
     * fall through, and thus can only have a single predecessor */
3585
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3586
349k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) {
3587
170k
            if (is_exit_or_eval_check_without_lineno(b->b_next)) {
3588
5.32k
                cfg_instr *last = basicblock_last_instr(b);
3589
5.32k
                assert(last != NULL);
3590
5.32k
                b->b_next->b_instr[0].i_loc = last->i_loc;
3591
5.32k
            }
3592
170k
        }
3593
349k
    }
3594
42.0k
    return SUCCESS;
3595
42.0k
}
3596
3597
3598
/* If an instruction has no line number, but it's predecessor in the BB does,
3599
 * then copy the line number. If a successor block has no line number, and only
3600
 * one predecessor, then inherit the line number.
3601
 * This ensures that all exit blocks (with one predecessor) receive a line number.
3602
 * Also reduces the size of the line number table,
3603
 * but has no impact on the generated line number events.
3604
 */
3605
3606
static inline void
3607
maybe_propagate_location(basicblock *b, int i, location loc)
3608
2.79M
{
3609
2.79M
    assert(b->b_iused > i);
3610
2.79M
    if (b->b_instr[i].i_loc.lineno == NO_LOCATION.lineno) {
3611
207k
         b->b_instr[i].i_loc = loc;
3612
207k
    }
3613
2.79M
}
3614
3615
static void
3616
propagate_line_numbers(basicblock *entryblock)
3617
42.0k
{
3618
391k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3619
349k
        cfg_instr *last = basicblock_last_instr(b);
3620
349k
        if (last == NULL) {
3621
36.2k
            continue;
3622
36.2k
        }
3623
3624
313k
        location prev_location = NO_LOCATION;
3625
2.97M
        for (int i = 0; i < b->b_iused; i++) {
3626
2.66M
            maybe_propagate_location(b, i, prev_location);
3627
2.66M
            prev_location = b->b_instr[i].i_loc;
3628
2.66M
        }
3629
313k
        if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) {
3630
100k
            if (b->b_next->b_iused > 0) {
3631
100k
                maybe_propagate_location(b->b_next, 0, prev_location);
3632
100k
            }
3633
100k
        }
3634
313k
        if (is_jump(last)) {
3635
151k
            basicblock *target = last->i_target;
3636
151k
            while (target->b_iused == 0 && target->b_predecessors == 1) {
3637
6
                target = target->b_next;
3638
6
            }
3639
151k
            if (target->b_predecessors == 1) {
3640
28.4k
                maybe_propagate_location(target, 0, prev_location);
3641
28.4k
            }
3642
151k
        }
3643
313k
    }
3644
42.0k
}
3645
3646
static int
3647
resolve_line_numbers(cfg_builder *g, int firstlineno)
3648
42.0k
{
3649
42.0k
    RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
3650
42.0k
    propagate_line_numbers(g->g_entryblock);
3651
42.0k
    return SUCCESS;
3652
42.0k
}
3653
3654
int
3655
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
3656
                        int nlocals, int nparams, int firstlineno)
3657
21.0k
{
3658
21.0k
    assert(cfg_builder_check(g));
3659
    /** Preprocessing **/
3660
    /* Map labels to targets and mark exception handlers */
3661
21.0k
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
3662
21.0k
    RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock));
3663
21.0k
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
3664
3665
    /** Optimization **/
3666
21.0k
    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3667
21.0k
    RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
3668
21.0k
    RETURN_IF_ERROR(
3669
21.0k
        add_checks_for_loads_of_uninitialized_variables(
3670
21.0k
            g->g_entryblock, nlocals, nparams));
3671
21.0k
    RETURN_IF_ERROR(insert_superinstructions(g));
3672
3673
21.0k
    RETURN_IF_ERROR(push_cold_blocks_to_end(g));
3674
21.0k
    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
3675
    // temporarily remove assert. See https://github.com/python/cpython/issues/125845
3676
    // assert(all_exits_have_lineno(g->g_entryblock));
3677
21.0k
    return SUCCESS;
3678
21.0k
}
3679
3680
static int *
3681
build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
3682
21.0k
{
3683
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3684
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3685
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3686
3687
21.0k
    int noffsets = ncellvars + nfreevars;
3688
21.0k
    int *fixed = PyMem_New(int, noffsets);
3689
21.0k
    if (fixed == NULL) {
3690
0
        PyErr_NoMemory();
3691
0
        return NULL;
3692
0
    }
3693
30.2k
    for (int i = 0; i < noffsets; i++) {
3694
9.22k
        fixed[i] = nlocals + i;
3695
9.22k
    }
3696
3697
21.0k
    PyObject *varname, *cellindex;
3698
21.0k
    Py_ssize_t pos = 0;
3699
25.9k
    while (PyDict_Next(umd->u_cellvars, &pos, &varname, &cellindex)) {
3700
4.96k
        PyObject *varindex;
3701
4.96k
        if (PyDict_GetItemRef(umd->u_varnames, varname, &varindex) < 0) {
3702
0
            goto error;
3703
0
        }
3704
4.96k
        if (varindex == NULL) {
3705
4.96k
            continue;
3706
4.96k
        }
3707
3708
6
        int argoffset = PyLong_AsInt(varindex);
3709
6
        Py_DECREF(varindex);
3710
6
        if (argoffset == -1 && PyErr_Occurred()) {
3711
0
            goto error;
3712
0
        }
3713
3714
6
        int oldindex = PyLong_AsInt(cellindex);
3715
6
        if (oldindex == -1 && PyErr_Occurred()) {
3716
0
            goto error;
3717
0
        }
3718
6
        fixed[oldindex] = argoffset;
3719
6
    }
3720
21.0k
    return fixed;
3721
3722
0
error:
3723
0
    PyMem_Free(fixed);
3724
0
    return NULL;
3725
21.0k
}
3726
3727
#define IS_GENERATOR(CF) \
3728
    ((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
3729
3730
static int
3731
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
3732
                           int *fixed, int nfreevars)
3733
21.0k
{
3734
21.0k
    assert(umd->u_firstlineno > 0);
3735
3736
    /* Set up cells for any variable that escapes, to be put in a closure. */
3737
21.0k
    const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3738
21.0k
    if (ncellvars) {
3739
        // umd->u_cellvars has the cells out of order so we sort them
3740
        // before adding the MAKE_CELL instructions.  Note that we
3741
        // adjust for arg cells, which come first.
3742
4.10k
        const int nvars = ncellvars + (int)PyDict_GET_SIZE(umd->u_varnames);
3743
4.10k
        int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
3744
4.10k
        if (sorted == NULL) {
3745
0
            PyErr_NoMemory();
3746
0
            return ERROR;
3747
0
        }
3748
9.07k
        for (int i = 0; i < ncellvars; i++) {
3749
4.96k
            sorted[fixed[i]] = i + 1;
3750
4.96k
        }
3751
9.54k
        for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
3752
5.44k
            int oldindex = sorted[i] - 1;
3753
5.44k
            if (oldindex == -1) {
3754
476
                continue;
3755
476
            }
3756
4.96k
            cfg_instr make_cell = {
3757
4.96k
                .i_opcode = MAKE_CELL,
3758
                // This will get fixed in offset_derefs().
3759
4.96k
                .i_oparg = oldindex,
3760
4.96k
                .i_loc = NO_LOCATION,
3761
4.96k
                .i_target = NULL,
3762
4.96k
                .i_except = NULL,
3763
4.96k
            };
3764
4.96k
            if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
3765
0
                PyMem_RawFree(sorted);
3766
0
                return ERROR;
3767
0
            }
3768
4.96k
            ncellsused += 1;
3769
4.96k
        }
3770
4.10k
        PyMem_RawFree(sorted);
3771
4.10k
    }
3772
3773
21.0k
    if (nfreevars) {
3774
3.45k
        cfg_instr copy_frees = {
3775
3.45k
            .i_opcode = COPY_FREE_VARS,
3776
3.45k
            .i_oparg = nfreevars,
3777
3.45k
            .i_loc = NO_LOCATION,
3778
3.45k
            .i_target = NULL,
3779
3.45k
            .i_except = NULL,
3780
3.45k
        };
3781
3.45k
        RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees));
3782
3.45k
    }
3783
3784
21.0k
    return SUCCESS;
3785
21.0k
}
3786
3787
static int
3788
fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *fixedmap)
3789
21.0k
{
3790
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3791
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3792
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3793
21.0k
    int noffsets = ncellvars + nfreevars;
3794
3795
    // First deal with duplicates (arg cells).
3796
21.0k
    int numdropped = 0;
3797
30.2k
    for (int i = 0; i < noffsets ; i++) {
3798
9.22k
        if (fixedmap[i] == i + nlocals) {
3799
9.22k
            fixedmap[i] -= numdropped;
3800
9.22k
        }
3801
6
        else {
3802
            // It was a duplicate (cell/arg).
3803
6
            numdropped += 1;
3804
6
        }
3805
9.22k
    }
3806
3807
    // Then update offsets, either relative to locals or by cell2arg.
3808
199k
    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
3809
1.27M
        for (int i = 0; i < b->b_iused; i++) {
3810
1.09M
            cfg_instr *inst = &b->b_instr[i];
3811
            // This is called before extended args are generated.
3812
1.09M
            assert(inst->i_opcode != EXTENDED_ARG);
3813
1.09M
            int oldoffset = inst->i_oparg;
3814
1.09M
            switch(inst->i_opcode) {
3815
4.96k
                case MAKE_CELL:
3816
12.2k
                case LOAD_CLOSURE:
3817
21.7k
                case LOAD_DEREF:
3818
26.0k
                case STORE_DEREF:
3819
26.0k
                case DELETE_DEREF:
3820
26.3k
                case LOAD_FROM_DICT_OR_DEREF:
3821
26.3k
                    assert(oldoffset >= 0);
3822
26.3k
                    assert(oldoffset < noffsets);
3823
26.3k
                    assert(fixedmap[oldoffset] >= 0);
3824
26.3k
                    inst->i_oparg = fixedmap[oldoffset];
3825
1.09M
            }
3826
1.09M
        }
3827
178k
    }
3828
3829
21.0k
    return numdropped;
3830
21.0k
}
3831
3832
static int
3833
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
3834
21.0k
{
3835
21.0k
    assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
3836
21.0k
    assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
3837
21.0k
    assert(PyDict_GET_SIZE(umd->u_freevars) < INT_MAX);
3838
21.0k
    int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames);
3839
21.0k
    int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
3840
21.0k
    int nfreevars = (int)PyDict_GET_SIZE(umd->u_freevars);
3841
21.0k
    assert(INT_MAX - nlocals - ncellvars > 0);
3842
21.0k
    assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
3843
21.0k
    int nlocalsplus = nlocals + ncellvars + nfreevars;
3844
21.0k
    int* cellfixedoffsets = build_cellfixedoffsets(umd);
3845
21.0k
    if (cellfixedoffsets == NULL) {
3846
0
        return ERROR;
3847
0
    }
3848
3849
    // This must be called before fix_cell_offsets().
3850
21.0k
    if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
3851
0
        PyMem_Free(cellfixedoffsets);
3852
0
        return ERROR;
3853
0
    }
3854
3855
21.0k
    int numdropped = fix_cell_offsets(umd, g->g_entryblock, cellfixedoffsets);
3856
21.0k
    PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
3857
21.0k
    cellfixedoffsets = NULL;
3858
21.0k
    if (numdropped < 0) {
3859
0
        return ERROR;
3860
0
    }
3861
3862
21.0k
    nlocalsplus -= numdropped;
3863
21.0k
    return nlocalsplus;
3864
21.0k
}
3865
3866
cfg_builder *
3867
_PyCfg_FromInstructionSequence(_PyInstructionSequence *seq)
3868
21.0k
{
3869
21.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3870
0
        return NULL;
3871
0
    }
3872
21.0k
    cfg_builder *g = _PyCfgBuilder_New();
3873
21.0k
    if (g == NULL) {
3874
0
        return NULL;
3875
0
    }
3876
1.62M
    for (int i = 0; i < seq->s_used; i++) {
3877
1.60M
        seq->s_instrs[i].i_target = 0;
3878
1.60M
    }
3879
1.62M
    for (int i = 0; i < seq->s_used; i++) {
3880
1.60M
        _PyInstruction *instr = &seq->s_instrs[i];
3881
1.60M
        if (HAS_TARGET(instr->i_opcode)) {
3882
109k
            assert(instr->i_oparg >= 0 && instr->i_oparg < seq->s_used);
3883
109k
            seq->s_instrs[instr->i_oparg].i_target = 1;
3884
109k
        }
3885
1.60M
    }
3886
21.0k
    int offset = 0;
3887
1.62M
    for (int i = 0; i < seq->s_used; i++) {
3888
1.60M
        _PyInstruction *instr = &seq->s_instrs[i];
3889
1.60M
        if (instr->i_opcode == ANNOTATIONS_PLACEHOLDER) {
3890
4.49k
            if (seq->s_annotations_code != NULL) {
3891
235
                assert(seq->s_annotations_code->s_labelmap_size == 0
3892
235
                    && seq->s_annotations_code->s_nested == NULL);
3893
940
                for (int j = 0; j < seq->s_annotations_code->s_used; j++) {
3894
705
                    _PyInstruction *ann_instr = &seq->s_annotations_code->s_instrs[j];
3895
705
                    assert(!HAS_TARGET(ann_instr->i_opcode));
3896
705
                    if (_PyCfgBuilder_Addop(g, ann_instr->i_opcode, ann_instr->i_oparg, ann_instr->i_loc) < 0) {
3897
0
                        goto error;
3898
0
                    }
3899
705
                }
3900
235
                offset += seq->s_annotations_code->s_used - 1;
3901
235
            }
3902
4.25k
            else {
3903
4.25k
                offset -= 1;
3904
4.25k
            }
3905
4.49k
            continue;
3906
4.49k
        }
3907
1.59M
        if (instr->i_target) {
3908
88.9k
            jump_target_label lbl_ = {i + offset};
3909
88.9k
            if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) {
3910
0
                goto error;
3911
0
            }
3912
88.9k
        }
3913
1.59M
        int opcode = instr->i_opcode;
3914
1.59M
        int oparg = instr->i_oparg;
3915
1.59M
        if (HAS_TARGET(opcode)) {
3916
109k
            oparg += offset;
3917
109k
        }
3918
1.59M
        if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) {
3919
0
            goto error;
3920
0
        }
3921
1.59M
    }
3922
21.0k
    if (_PyCfgBuilder_CheckSize(g) < 0) {
3923
0
        goto error;
3924
0
    }
3925
21.0k
    return g;
3926
0
error:
3927
0
    _PyCfgBuilder_Free(g);
3928
0
    return NULL;
3929
21.0k
}
3930
3931
int
3932
_PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
3933
21.0k
{
3934
21.0k
    int lbl = 0;
3935
199k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3936
178k
        b->b_label = (jump_target_label){lbl};
3937
178k
        lbl += 1;
3938
178k
    }
3939
199k
    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
3940
178k
        RETURN_IF_ERROR(_PyInstructionSequence_UseLabel(seq, b->b_label.id));
3941
1.27M
        for (int i = 0; i < b->b_iused; i++) {
3942
1.09M
            cfg_instr *instr = &b->b_instr[i];
3943
1.09M
            if (HAS_TARGET(instr->i_opcode)) {
3944
                /* Set oparg to the label id (it will later be mapped to an offset) */
3945
72.9k
                instr->i_oparg = instr->i_target->b_label.id;
3946
72.9k
            }
3947
1.09M
            RETURN_IF_ERROR(
3948
1.09M
                _PyInstructionSequence_Addop(
3949
1.09M
                    seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
3950
3951
1.09M
            _PyExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
3952
1.09M
            if (instr->i_except != NULL) {
3953
341k
                hi->h_label = instr->i_except->b_label.id;
3954
341k
                hi->h_startdepth = instr->i_except->b_startdepth;
3955
341k
                hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
3956
341k
            }
3957
751k
            else {
3958
751k
                hi->h_label = -1;
3959
751k
            }
3960
1.09M
        }
3961
178k
    }
3962
21.0k
    if (_PyInstructionSequence_ApplyLabelMap(seq) < 0) {
3963
0
        return ERROR;
3964
0
    }
3965
21.0k
    return SUCCESS;
3966
21.0k
}
3967
3968
3969
int
3970
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
3971
                                     _PyCompile_CodeUnitMetadata *umd,
3972
                                     int *stackdepth, int *nlocalsplus,
3973
                                     _PyInstructionSequence *seq)
3974
21.0k
{
3975
21.0k
    RETURN_IF_ERROR(convert_pseudo_conditional_jumps(g));
3976
3977
21.0k
    *stackdepth = calculate_stackdepth(g);
3978
21.0k
    if (*stackdepth < 0) {
3979
0
        return ERROR;
3980
0
    }
3981
3982
21.0k
    *nlocalsplus = prepare_localsplus(umd, g);
3983
21.0k
    if (*nlocalsplus < 0) {
3984
0
        return ERROR;
3985
0
    }
3986
3987
21.0k
    RETURN_IF_ERROR(convert_pseudo_ops(g));
3988
3989
    /* Order of basic blocks must have been determined by now */
3990
3991
21.0k
    RETURN_IF_ERROR(normalize_jumps(g));
3992
21.0k
    assert(no_redundant_jumps(g));
3993
3994
    /* Can't modify the bytecode after inserting instructions that produce
3995
     * borrowed references.
3996
     */
3997
21.0k
    RETURN_IF_ERROR(optimize_load_fast(g));
3998
3999
    /* Can't modify the bytecode after computing jump offsets. */
4000
21.0k
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4001
0
        return ERROR;
4002
0
    }
4003
4004
21.0k
    return SUCCESS;
4005
21.0k
}
4006
4007
/* This is used by _PyCompile_Assemble to fill in the jump and exception
4008
 * targets in a synthetic CFG (which is not the output of the builtin compiler).
4009
 */
4010
int
4011
_PyCfg_JumpLabelsToTargets(cfg_builder *g)
4012
0
{
4013
0
    RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock));
4014
0
    RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
4015
0
    return SUCCESS;
4016
0
}
4017
4018
/* Exported API functions */
4019
4020
int
4021
PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
4022
0
{
4023
0
    stack_effects effs;
4024
0
    if (get_stack_effects(opcode, oparg, jump, &effs) < 0) {
4025
0
        return PY_INVALID_STACK_EFFECT;
4026
0
    }
4027
0
    return effs.net;
4028
0
}
4029
4030
int
4031
PyCompile_OpcodeStackEffect(int opcode, int oparg)
4032
2.23k
{
4033
2.23k
    stack_effects effs;
4034
2.23k
    if (get_stack_effects(opcode, oparg, -1, &effs) < 0) {
4035
0
        return PY_INVALID_STACK_EFFECT;
4036
0
    }
4037
2.23k
    return effs.net;
4038
2.23k
}
4039
4040
/* Access to compiler optimizations for unit tests.
4041
4042
 * _PyCompile_OptimizeCfg takes an instruction list, constructs
4043
 * a CFG, optimizes it and converts back to an instruction list.
4044
 */
4045
4046
static PyObject *
4047
cfg_to_instruction_sequence(cfg_builder *g)
4048
0
{
4049
0
    _PyInstructionSequence *seq = (_PyInstructionSequence *)_PyInstructionSequence_New();
4050
0
    if (seq == NULL) {
4051
0
        return NULL;
4052
0
    }
4053
0
    if (_PyCfg_ToInstructionSequence(g, seq) < 0) {
4054
0
        PyInstructionSequence_Fini(seq);
4055
0
        return NULL;
4056
0
    }
4057
0
    return (PyObject*)seq;
4058
0
}
4059
4060
PyObject *
4061
_PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals)
4062
0
{
4063
0
    if (!_PyInstructionSequence_Check(seq)) {
4064
0
        PyErr_SetString(PyExc_ValueError, "expected an instruction sequence");
4065
0
        return NULL;
4066
0
    }
4067
0
    PyObject *const_cache = PyDict_New();
4068
0
    if (const_cache == NULL) {
4069
0
        return NULL;
4070
0
    }
4071
4072
0
    PyObject *res = NULL;
4073
0
    cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq);
4074
0
    if (g == NULL) {
4075
0
        goto error;
4076
0
    }
4077
0
    int nparams = 0, firstlineno = 1;
4078
0
    if (_PyCfg_OptimizeCodeUnit(g, consts, const_cache, nlocals,
4079
0
                                nparams, firstlineno) < 0) {
4080
0
        goto error;
4081
0
    }
4082
4083
0
    if (calculate_stackdepth(g) == ERROR) {
4084
0
        goto error;
4085
0
    }
4086
4087
0
    if (optimize_load_fast(g) != SUCCESS) {
4088
0
        goto error;
4089
0
    }
4090
4091
0
    res = cfg_to_instruction_sequence(g);
4092
0
error:
4093
0
    Py_DECREF(const_cache);
4094
0
    _PyCfgBuilder_Free(g);
4095
0
    return res;
4096
0
}