Coverage Report

Created: 2025-11-16 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Python/ceval_macros.h
Line
Count
Source
1
// Macros and other things needed by ceval.c, and bytecodes.c
2
3
/* Computed GOTOs, or
4
       the-optimization-commonly-but-improperly-known-as-"threaded code"
5
   using gcc's labels-as-values extension
6
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
   The traditional bytecode evaluation loop uses a "switch" statement, which
9
   decent compilers will optimize as a single indirect branch instruction
10
   combined with a lookup table of jump addresses. However, since the
11
   indirect jump instruction is shared by all opcodes, the CPU will have a
12
   hard time making the right prediction for where to jump next (actually,
13
   it will be always wrong except in the uncommon case of a sequence of
14
   several identical opcodes).
15
16
   "Threaded code" in contrast, uses an explicit jump table and an explicit
17
   indirect jump instruction at the end of each opcode. Since the jump
18
   instruction is at a different address for each opcode, the CPU will make a
19
   separate prediction for each of these instructions, which is equivalent to
20
   predicting the second opcode of each opcode pair. These predictions have
21
   a much better chance to turn out valid, especially in small bytecode loops.
22
23
   A mispredicted branch on a modern CPU flushes the whole pipeline and
24
   can cost several CPU cycles (depending on the pipeline depth),
25
   and potentially many more instructions (depending on the pipeline width).
26
   A correctly predicted branch, however, is nearly free.
27
28
   At the time of this writing, the "threaded code" version is up to 15-20%
29
   faster than the normal "switch" version, depending on the compiler and the
30
   CPU architecture.
31
32
   NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
   indirect jumps by sharing them between all opcodes. Such optimizations
34
   can be disabled on gcc by using the -fno-gcse flag (or possibly
35
   -fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
 * to the C compiler that the tracing check is a simple test then branch.
40
 * We want to be sure that the compiler knows this before it generates
41
 * the CFG.
42
 */
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
    #ifndef USE_COMPUTED_GOTOS
52
    #define USE_COMPUTED_GOTOS 1
53
    #endif
54
#else
55
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
    #error "Computed gotos are not supported on this compiler."
57
    #endif
58
    #undef USE_COMPUTED_GOTOS
59
    #define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_STATS(op) \
64
    do { \
65
        PyStats *s = _PyStats_GET(); \
66
        OPCODE_EXE_INC(op); \
67
        if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
68
        lastopcode = op; \
69
    } while (0)
70
#else
71
3.55G
#define INSTRUCTION_STATS(op) ((void)0)
72
#endif
73
74
#ifdef Py_STATS
75
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg, int lastopcode
76
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg, lastopcode
77
#else
78
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg
79
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg
80
#endif
81
82
#if _Py_TAIL_CALL_INTERP
83
#   if defined(__clang__) || defined(__GNUC__)
84
#       if !_Py__has_attribute(preserve_none) || !_Py__has_attribute(musttail)
85
#           error "This compiler does not have support for efficient tail calling."
86
#       endif
87
#   elif defined(_MSC_VER) && (_MSC_VER < 1950)
88
#       error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
89
#   endif
90
91
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
92
#   define Py_MUSTTAIL [[clang::musttail]]
93
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
94
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
95
96
#   define DISPATCH_TABLE_VAR instruction_funcptr_table
97
#   define DISPATCH_TABLE instruction_funcptr_handler_table
98
#   define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
99
#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
100
101
#   define DISPATCH_GOTO() \
102
        do { \
103
            Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \
104
        } while (0)
105
#   define DISPATCH_GOTO_NON_TRACING() \
106
        do { \
107
            Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \
108
        } while (0)
109
#   define JUMP_TO_LABEL(name) \
110
        do { \
111
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
112
        } while (0)
113
#   ifdef Py_STATS
114
#       define JUMP_TO_PREDICTED(name) \
115
            do { \
116
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg, lastopcode); \
117
            } while (0)
118
#   else
119
#       define JUMP_TO_PREDICTED(name) \
120
            do { \
121
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg); \
122
            } while (0)
123
#   endif
124
#    define LABEL(name) TARGET(name)
125
#elif USE_COMPUTED_GOTOS
126
#  define DISPATCH_TABLE_VAR opcode_targets
127
373k
#  define DISPATCH_TABLE opcode_targets_table
128
#  define TRACING_DISPATCH_TABLE opcode_tracing_targets_table
129
3.55G
#  define TARGET(op) TARGET_##op:
130
3.55G
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
131
373k
#  define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode];
132
5.82M
#  define JUMP_TO_LABEL(name) goto name;
133
13.9M
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
134
25.5M
#  define LABEL(name) name:
135
#else
136
#  define TARGET(op) case op: TARGET_##op:
137
#  define DISPATCH_GOTO() goto dispatch_opcode
138
#  define DISPATCH_GOTO_NON_TRACING() goto dispatch_opcode
139
#  define JUMP_TO_LABEL(name) goto name;
140
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
141
#  define LABEL(name) name:
142
#endif
143
144
#if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2
145
#  define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
146
#  define ENTER_TRACING() \
147
    DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
148
#  define LEAVE_TRACING() \
149
    DISPATCH_TABLE_VAR = DISPATCH_TABLE;
150
#else
151
#  define IS_JIT_TRACING() (0)
152
#  define ENTER_TRACING()
153
#  define LEAVE_TRACING()
154
#endif
155
156
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
157
#ifdef Py_DEBUG
158
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
159
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
160
#else
161
3.55G
#define PRE_DISPATCH_GOTO() ((void)0)
162
#endif
163
164
#ifdef Py_DEBUG
165
#define LLTRACE_RESUME_FRAME() \
166
do { \
167
    _PyFrame_SetStackPointer(frame, stack_pointer); \
168
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
169
    stack_pointer = _PyFrame_GetStackPointer(frame); \
170
    frame->lltrace = lltrace; \
171
} while (0)
172
#else
173
221M
#define LLTRACE_RESUME_FRAME() ((void)0)
174
#endif
175
176
#ifdef Py_GIL_DISABLED
177
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
178
#else
179
#define QSBR_QUIESCENT_STATE(tstate)
180
#endif
181
182
183
/* Do interpreter dispatch accounting for tracing and instrumentation */
184
#define DISPATCH() \
185
3.49G
    { \
186
3.49G
        assert(frame->stackpointer == NULL); \
187
3.49G
        NEXTOPARG(); \
188
3.49G
        PRE_DISPATCH_GOTO(); \
189
3.49G
        DISPATCH_GOTO(); \
190
3.49G
    }
191
192
#define DISPATCH_NON_TRACING() \
193
    { \
194
        assert(frame->stackpointer == NULL); \
195
        NEXTOPARG(); \
196
        PRE_DISPATCH_GOTO(); \
197
        DISPATCH_GOTO_NON_TRACING(); \
198
    }
199
200
#define DISPATCH_SAME_OPARG() \
201
373k
    { \
202
373k
        opcode = next_instr->op.code; \
203
373k
        PRE_DISPATCH_GOTO(); \
204
373k
        DISPATCH_GOTO_NON_TRACING(); \
205
373k
    }
206
207
#define DISPATCH_INLINED(NEW_FRAME)                     \
208
491k
    do {                                                \
209
491k
        assert(tstate->interp->eval_frame == NULL);     \
210
491k
        _PyFrame_SetStackPointer(frame, stack_pointer); \
211
491k
        assert((NEW_FRAME)->previous == frame);         \
212
491k
        frame = tstate->current_frame = (NEW_FRAME);     \
213
491k
        CALL_STAT_INC(inlined_py_calls);                \
214
491k
        JUMP_TO_LABEL(start_frame);                      \
215
0
    } while (0)
216
217
/* Tuple access macros */
218
219
#ifndef Py_DEBUG
220
171M
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
221
#else
222
static inline PyObject *
223
GETITEM(PyObject *v, Py_ssize_t i) {
224
    assert(PyTuple_Check(v));
225
    assert(i >= 0);
226
    assert(i < PyTuple_GET_SIZE(v));
227
    return PyTuple_GET_ITEM(v, i);
228
}
229
#endif
230
231
/* Code access macros */
232
233
/* The integer overflow is checked by an assertion below. */
234
2.49M
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
235
3.49G
#define NEXTOPARG()  do { \
236
3.49G
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
237
3.49G
        opcode = word.op.code; \
238
3.49G
        oparg = word.op.arg; \
239
3.49G
    } while (0)
240
241
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
242
 * for advancing to the next instruction, taking into account cache entries
243
 * and skipped instructions.
244
 */
245
584M
#define JUMPBY(x)       (next_instr += (x))
246
14.3M
#define SKIP_OVER(x)    (next_instr += (x))
247
248
#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
249
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)
250
251
#define WITHIN_STACK_BOUNDS() \
252
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
253
254
/* Data access macros */
255
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
256
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)
257
258
/* Local variable macros */
259
260
262k
#define LOCALS_ARRAY    (frame->localsplus)
261
1.53G
#define GETLOCAL(i)     (frame->localsplus[i])
262
263
264
#ifdef Py_STATS
265
#define UPDATE_MISS_STATS(INSTNAME)                              \
266
    do {                                                         \
267
        STAT_INC(opcode, miss);                                  \
268
        STAT_INC((INSTNAME), miss);                              \
269
        /* The counter is always the first cache entry: */       \
270
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
271
            STAT_INC((INSTNAME), deopt);                         \
272
        }                                                        \
273
    } while (0)
274
#else
275
13.9M
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
276
#endif
277
278
279
// Try to lock an object in the free threading build, if it's not already
280
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
281
// These are no-ops in the default GIL build. The general pattern is:
282
//
283
// DEOPT_IF(!LOCK_OBJECT(op));
284
// if (/* condition fails */) {
285
//     UNLOCK_OBJECT(op);
286
//     DEOPT_IF(true);
287
//  }
288
//  ...
289
//  UNLOCK_OBJECT(op);
290
//
291
// NOTE: The object must be unlocked on every exit code path and you should
292
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
293
// object is locked.
294
#ifdef Py_GIL_DISABLED
295
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
296
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
297
#else
298
69.2M
#  define LOCK_OBJECT(op) (1)
299
69.2M
#  define UNLOCK_OBJECT(op) ((void)0)
300
#endif
301
302
232M
#define GLOBALS() frame->f_globals
303
94.0M
#define BUILTINS() frame->f_builtins
304
54.8k
#define LOCALS() frame->f_locals
305
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
306
#define NAMES() _PyFrame_GetCode(frame)->co_names
307
308
#define DTRACE_FUNCTION_ENTRY()  \
309
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
310
        dtrace_function_entry(frame); \
311
    }
312
313
/* This takes a uint16_t instead of a _Py_BackoffCounter,
314
 * because it is used directly on the cache entry in generated code,
315
 * which is always an integral type. */
316
// Force re-specialization when tracing a side exit to get good side exits.
317
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
318
104M
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
319
320
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
321
104M
    do { \
322
104M
        (COUNTER) = advance_backoff_counter((COUNTER)); \
323
104M
    } while (0);
324
325
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
326
0
    do { \
327
0
        (COUNTER) = pause_backoff_counter((COUNTER)); \
328
0
    } while (0);
329
330
#ifdef ENABLE_SPECIALIZATION_FT
331
/* Multiple threads may execute these concurrently if thread-local bytecode is
332
 * disabled and they all execute the main copy of the bytecode. Specialization
333
 * is disabled in that case so the value is unused, but the RMW cycle should be
334
 * free of data races.
335
 */
336
#define RECORD_BRANCH_TAKEN(bitset, flag) \
337
292M
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
338
292M
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
339
#else
340
#define RECORD_BRANCH_TAKEN(bitset, flag)
341
#endif
342
343
#define UNBOUNDLOCAL_ERROR_MSG \
344
0
    "cannot access local variable '%s' where it is not associated with a value"
345
#define UNBOUNDFREE_ERROR_MSG \
346
0
    "cannot access free variable '%s' where it is not associated with a value" \
347
0
    " in enclosing scope"
348
4
#define NAME_ERROR_MSG "name '%.200s' is not defined"
349
350
// If a trace function sets a new f_lineno and
351
// *then* raises, we use the destination when searching
352
// for an exception handler, displaying the traceback, and so on
353
0
#define INSTRUMENTED_JUMP(src, dest, event) \
354
0
do { \
355
0
    if (tstate->tracing) {\
356
0
        next_instr = dest; \
357
0
    } else { \
358
0
        _PyFrame_SetStackPointer(frame, stack_pointer); \
359
0
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
360
0
        stack_pointer = _PyFrame_GetStackPointer(frame); \
361
0
        if (next_instr == NULL) { \
362
0
            next_instr = (dest)+1; \
363
0
            JUMP_TO_LABEL(error); \
364
0
        } \
365
0
    } \
366
0
} while (0);
367
368
369
18.1M
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
370
18.1M
    return (tstate->py_recursion_remaining-- <= 0) &&
371
27.5k
        _Py_CheckRecursiveCallPy(tstate);
372
18.1M
}
373
374
115M
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
375
115M
    tstate->py_recursion_remaining++;
376
115M
}
377
378
/* Implementation of "macros" that modify the instruction pointer,
379
 * stack pointer, or frame pointer.
380
 * These need to treated differently by tier 1 and 2.
381
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
382
383
206M
#define LOAD_IP(OFFSET) do { \
384
206M
        next_instr = frame->instr_ptr + (OFFSET); \
385
206M
    } while (0)
386
387
/* There's no STORE_IP(), it's inlined by the code generator. */
388
389
93.0M
#define LOAD_SP() \
390
93.0M
stack_pointer = _PyFrame_GetStackPointer(frame)
391
392
#define SAVE_SP() \
393
_PyFrame_SetStackPointer(frame, stack_pointer)
394
395
/* Tier-switching macros. */
396
397
#define TIER1_TO_TIER2(EXECUTOR)                        \
398
do {                                                   \
399
    OPT_STAT_INC(traces_executed);                     \
400
    next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
401
    frame = tstate->current_frame;                     \
402
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
403
    int keep_tracing_bit = (uintptr_t)next_instr & 1;   \
404
    next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \
405
    if (next_instr == NULL) {                          \
406
        /* gh-140104: The exception handler expects frame->instr_ptr
407
            to after this_instr, not this_instr! */ \
408
        next_instr = frame->instr_ptr + 1;                 \
409
        JUMP_TO_LABEL(error);                          \
410
    }                                                  \
411
    if (keep_tracing_bit) { \
412
        assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state.prev_state.code_curr_size == 2); \
413
        ENTER_TRACING(); \
414
        DISPATCH_NON_TRACING(); \
415
    } \
416
    DISPATCH();                                        \
417
} while (0)
418
419
#define TIER2_TO_TIER2(EXECUTOR) \
420
do {                                                   \
421
    OPT_STAT_INC(traces_executed);                     \
422
    current_executor = (EXECUTOR);                     \
423
    goto tier2_start;                                  \
424
} while (0)
425
426
#define GOTO_TIER_ONE_SETUP \
427
    tstate->current_executor = NULL;                              \
428
    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
429
    _PyFrame_SetStackPointer(frame, stack_pointer);
430
431
#define GOTO_TIER_ONE(TARGET) \
432
    do \
433
    { \
434
        GOTO_TIER_ONE_SETUP \
435
        return (_Py_CODEUNIT *)(TARGET); \
436
    } while (0)
437
438
#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \
439
    do \
440
    { \
441
        GOTO_TIER_ONE_SETUP \
442
        return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \
443
    } while (0)
444
445
#define CURRENT_OPARG()    (next_uop[-1].oparg)
446
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
447
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
448
#define CURRENT_TARGET()   (next_uop[-1].target)
449
450
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
451
#define JUMP_TO_ERROR() goto jump_to_error_target
452
453
/* Stackref macros */
454
455
/* How much scratch space to give stackref to PyObject* conversion. */
456
20.7M
#define MAX_STACKREF_SCRATCH 10
457
458
#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
459
    /* +1 because vectorcall might use -1 to write self */ \
460
20.7M
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
461
20.7M
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
462
463
#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
464
    /* +1 because we +1 previously */ \
465
20.7M
    _PyObjectArray_Free(NAME - 1, NAME##_temp);
466
467
20.7M
#define CONVERSION_FAILED(NAME) ((NAME) == NULL)
468
469
static inline int
470
126M
check_periodics(PyThreadState *tstate) {
471
126M
    _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
472
126M
    QSBR_QUIESCENT_STATE(tstate);
473
126M
    if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
474
16.3k
        return _Py_HandlePending(tstate);
475
16.3k
    }
476
126M
    return 0;
477
126M
}
478