Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Python/ceval_macros.h
Line
Count
Source (jump to first uncovered line)
1
// Macros and other things needed by ceval.c, and bytecodes.c
2
3
/* Computed GOTOs, or
4
       the-optimization-commonly-but-improperly-known-as-"threaded code"
5
   using gcc's labels-as-values extension
6
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
   The traditional bytecode evaluation loop uses a "switch" statement, which
9
   decent compilers will optimize as a single indirect branch instruction
10
   combined with a lookup table of jump addresses. However, since the
11
   indirect jump instruction is shared by all opcodes, the CPU will have a
12
   hard time making the right prediction for where to jump next (actually,
13
   it will be always wrong except in the uncommon case of a sequence of
14
   several identical opcodes).
15
16
   "Threaded code" in contrast, uses an explicit jump table and an explicit
17
   indirect jump instruction at the end of each opcode. Since the jump
18
   instruction is at a different address for each opcode, the CPU will make a
19
   separate prediction for each of these instructions, which is equivalent to
20
   predicting the second opcode of each opcode pair. These predictions have
21
   a much better chance to turn out valid, especially in small bytecode loops.
22
23
   A mispredicted branch on a modern CPU flushes the whole pipeline and
24
   can cost several CPU cycles (depending on the pipeline depth),
25
   and potentially many more instructions (depending on the pipeline width).
26
   A correctly predicted branch, however, is nearly free.
27
28
   At the time of this writing, the "threaded code" version is up to 15-20%
29
   faster than the normal "switch" version, depending on the compiler and the
30
   CPU architecture.
31
32
   NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
   indirect jumps by sharing them between all opcodes. Such optimizations
34
   can be disabled on gcc by using the -fno-gcse flag (or possibly
35
   -fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
 * to the C compiler that the tracing check is a simple test then branch.
40
 * We want to be sure that the compiler knows this before it generates
41
 * the CFG.
42
 */
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
    #ifndef USE_COMPUTED_GOTOS
52
    #define USE_COMPUTED_GOTOS 1
53
    #endif
54
#else
55
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
    #error "Computed gotos are not supported on this compiler."
57
    #endif
58
    #undef USE_COMPUTED_GOTOS
59
    #define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_STATS(op) \
64
    do { \
65
        OPCODE_EXE_INC(op); \
66
        if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
67
        lastopcode = op; \
68
    } while (0)
69
#else
70
38.0G
#define INSTRUCTION_STATS(op) ((void)0)
71
#endif
72
73
#ifdef Py_STATS
74
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg, int lastopcode
75
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg, lastopcode
76
#else
77
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg
78
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
79
#endif
80
81
#if Py_TAIL_CALL_INTERP
82
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
83
#   define Py_MUSTTAIL [[clang::musttail]]
84
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
85
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
86
87
#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
88
#   define DISPATCH_GOTO() \
89
        do { \
90
            Py_MUSTTAIL return (INSTRUCTION_TABLE[opcode])(TAIL_CALL_ARGS); \
91
        } while (0)
92
#   define JUMP_TO_LABEL(name) \
93
        do { \
94
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
95
        } while (0)
96
#   ifdef Py_STATS
97
#       define JUMP_TO_PREDICTED(name) \
98
            do { \
99
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg, lastopcode); \
100
            } while (0)
101
#   else
102
#       define JUMP_TO_PREDICTED(name) \
103
            do { \
104
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg); \
105
            } while (0)
106
#   endif
107
#    define LABEL(name) TARGET(name)
108
#elif USE_COMPUTED_GOTOS
109
38.0G
#  define TARGET(op) TARGET_##op:
110
38.3G
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
111
43.5M
#  define JUMP_TO_LABEL(name) goto name;
112
209M
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
113
308M
#  define LABEL(name) name:
114
#else
115
#  define TARGET(op) case op: TARGET_##op:
116
#  define DISPATCH_GOTO() goto dispatch_opcode
117
#  define JUMP_TO_LABEL(name) goto name;
118
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
119
#  define LABEL(name) name:
120
#endif
121
122
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
123
#ifdef Py_DEBUG
124
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
125
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
126
#else
127
38.3G
#define PRE_DISPATCH_GOTO() ((void)0)
128
#endif
129
130
#ifdef Py_DEBUG
131
#define LLTRACE_RESUME_FRAME() \
132
do { \
133
    _PyFrame_SetStackPointer(frame, stack_pointer); \
134
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
135
    stack_pointer = _PyFrame_GetStackPointer(frame); \
136
    frame->lltrace = lltrace; \
137
} while (0)
138
#else
139
1.26G
#define LLTRACE_RESUME_FRAME() ((void)0)
140
#endif
141
142
#ifdef Py_GIL_DISABLED
143
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
144
#else
145
#define QSBR_QUIESCENT_STATE(tstate)
146
#endif
147
148
149
/* Do interpreter dispatch accounting for tracing and instrumentation */
150
#define DISPATCH() \
151
38.3G
    { \
152
38.3G
        assert(frame->stackpointer == NULL); \
153
38.3G
        NEXTOPARG(); \
154
38.3G
        PRE_DISPATCH_GOTO(); \
155
38.3G
        DISPATCH_GOTO(); \
156
38.3G
    }
157
158
#define DISPATCH_SAME_OPARG() \
159
4.37M
    { \
160
4.37M
        opcode = next_instr->op.code; \
161
4.37M
        PRE_DISPATCH_GOTO(); \
162
4.37M
        DISPATCH_GOTO(); \
163
4.37M
    }
164
165
#define DISPATCH_INLINED(NEW_FRAME)                     \
166
848k
    do {                                                \
167
848k
        assert(tstate->interp->eval_frame == NULL);     \
168
848k
        _PyFrame_SetStackPointer(frame, stack_pointer); \
169
848k
        assert((NEW_FRAME)->previous == frame);         \
170
848k
        frame = tstate->current_frame = (NEW_FRAME);     \
171
848k
        CALL_STAT_INC(inlined_py_calls);                \
172
848k
        JUMP_TO_LABEL(start_frame);                      \
173
0
    } while (0)
174
175
/* Tuple access macros */
176
177
#ifndef Py_DEBUG
178
1.66G
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
179
#else
180
static inline PyObject *
181
GETITEM(PyObject *v, Py_ssize_t i) {
182
    assert(PyTuple_Check(v));
183
    assert(i >= 0);
184
    assert(i < PyTuple_GET_SIZE(v));
185
    return PyTuple_GET_ITEM(v, i);
186
}
187
#endif
188
189
/* Code access macros */
190
191
/* The integer overflow is checked by an assertion below. */
192
31.7M
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
193
38.3G
#define NEXTOPARG()  do { \
194
38.3G
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
195
38.3G
        opcode = word.op.code; \
196
38.3G
        oparg = word.op.arg; \
197
38.3G
    } while (0)
198
199
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
200
 * for advancing to the next instruction, taking into account cache entries
201
 * and skipped instructions.
202
 */
203
5.37G
#define JUMPBY(x)       (next_instr += (x))
204
363M
#define SKIP_OVER(x)    (next_instr += (x))
205
206
#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
207
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)
208
209
#define WITHIN_STACK_BOUNDS() \
210
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
211
212
/* Data access macros */
213
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
214
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)
215
216
/* Local variable macros */
217
218
1.15M
#define LOCALS_ARRAY    (frame->localsplus)
219
18.6G
#define GETLOCAL(i)     (frame->localsplus[i])
220
221
222
#ifdef Py_STATS
223
#define UPDATE_MISS_STATS(INSTNAME)                              \
224
    do {                                                         \
225
        STAT_INC(opcode, miss);                                  \
226
        STAT_INC((INSTNAME), miss);                              \
227
        /* The counter is always the first cache entry: */       \
228
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
229
            STAT_INC((INSTNAME), deopt);                         \
230
        }                                                        \
231
    } while (0)
232
#else
233
209M
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
234
#endif
235
236
237
// Try to lock an object in the free threading build, if it's not already
238
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
239
// These are no-ops in the default GIL build. The general pattern is:
240
//
241
// DEOPT_IF(!LOCK_OBJECT(op));
242
// if (/* condition fails */) {
243
//     UNLOCK_OBJECT(op);
244
//     DEOPT_IF(true);
245
//  }
246
//  ...
247
//  UNLOCK_OBJECT(op);
248
//
249
// NOTE: The object must be unlocked on every exit code path and you should
250
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
251
// object is locked.
252
#ifdef Py_GIL_DISABLED
253
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
254
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
255
#else
256
495M
#  define LOCK_OBJECT(op) (1)
257
495M
#  define UNLOCK_OBJECT(op) ((void)0)
258
#endif
259
260
695M
#define GLOBALS() frame->f_globals
261
376M
#define BUILTINS() frame->f_builtins
262
77.5k
#define LOCALS() frame->f_locals
263
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
264
#define NAMES() _PyFrame_GetCode(frame)->co_names
265
266
#define DTRACE_FUNCTION_ENTRY()  \
267
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
268
        dtrace_function_entry(frame); \
269
    }
270
271
/* This takes a uint16_t instead of a _Py_BackoffCounter,
272
 * because it is used directly on the cache entry in generated code,
273
 * which is always an integral type. */
274
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
275
1.23G
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
276
277
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
278
1.23G
    do { \
279
1.23G
        (COUNTER) = advance_backoff_counter((COUNTER)); \
280
1.23G
    } while (0);
281
282
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
283
0
    do { \
284
0
        (COUNTER) = pause_backoff_counter((COUNTER)); \
285
0
    } while (0);
286
287
#ifdef ENABLE_SPECIALIZATION_FT
288
/* Multiple threads may execute these concurrently if thread-local bytecode is
289
 * disabled and they all execute the main copy of the bytecode. Specialization
290
 * is disabled in that case so the value is unused, but the RMW cycle should be
291
 * free of data races.
292
 */
293
#define RECORD_BRANCH_TAKEN(bitset, flag) \
294
2.68G
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
295
2.68G
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
296
#else
297
#define RECORD_BRANCH_TAKEN(bitset, flag)
298
#endif
299
300
#define UNBOUNDLOCAL_ERROR_MSG \
301
0
    "cannot access local variable '%s' where it is not associated with a value"
302
#define UNBOUNDFREE_ERROR_MSG \
303
0
    "cannot access free variable '%s' where it is not associated with a value" \
304
0
    " in enclosing scope"
305
1
#define NAME_ERROR_MSG "name '%.200s' is not defined"
306
307
// If a trace function sets a new f_lineno and
308
// *then* raises, we use the destination when searching
309
// for an exception handler, displaying the traceback, and so on
310
0
#define INSTRUMENTED_JUMP(src, dest, event) \
311
0
do { \
312
0
    if (tstate->tracing) {\
313
0
        next_instr = dest; \
314
0
    } else { \
315
0
        _PyFrame_SetStackPointer(frame, stack_pointer); \
316
0
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
317
0
        stack_pointer = _PyFrame_GetStackPointer(frame); \
318
0
        if (next_instr == NULL) { \
319
0
            next_instr = (dest)+1; \
320
0
            JUMP_TO_LABEL(error); \
321
0
        } \
322
0
    } \
323
0
} while (0);
324
325
326
234M
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
327
234M
    return (tstate->py_recursion_remaining-- <= 0) &&
328
234M
        _Py_CheckRecursiveCallPy(tstate);
329
234M
}
330
331
643M
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
332
643M
    tstate->py_recursion_remaining++;
333
643M
}
334
335
/* Implementation of "macros" that modify the instruction pointer,
336
 * stack pointer, or frame pointer.
337
 * These need to treated differently by tier 1 and 2.
338
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
339
340
1.04G
#define LOAD_IP(OFFSET) do { \
341
1.04G
        next_instr = frame->instr_ptr + (OFFSET); \
342
1.04G
    } while (0)
343
344
/* There's no STORE_IP(), it's inlined by the code generator. */
345
346
409M
#define LOAD_SP() \
347
409M
stack_pointer = _PyFrame_GetStackPointer(frame)
348
349
#define SAVE_SP() \
350
_PyFrame_SetStackPointer(frame, stack_pointer)
351
352
/* Tier-switching macros. */
353
354
#define TIER1_TO_TIER2(EXECUTOR)                        \
355
do {                                                   \
356
    OPT_STAT_INC(traces_executed);                     \
357
    next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
358
    frame = tstate->current_frame;                     \
359
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
360
    if (next_instr == NULL) {                          \
361
        next_instr = frame->instr_ptr;                 \
362
        JUMP_TO_LABEL(error);                          \
363
    }                                                  \
364
    DISPATCH();                                        \
365
} while (0)
366
367
#define TIER2_TO_TIER2(EXECUTOR) \
368
do {                                                   \
369
    OPT_STAT_INC(traces_executed);                     \
370
    current_executor = (EXECUTOR);                     \
371
    goto tier2_start;                                  \
372
} while (0)
373
374
#define GOTO_TIER_ONE(TARGET)                                         \
375
    do                                                                \
376
    {                                                                 \
377
        tstate->current_executor = NULL;                              \
378
        OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
379
        _PyFrame_SetStackPointer(frame, stack_pointer);               \
380
        return TARGET;                                                \
381
    } while (0)
382
383
#define CURRENT_OPARG()    (next_uop[-1].oparg)
384
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
385
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
386
#define CURRENT_TARGET()   (next_uop[-1].target)
387
388
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
389
#define JUMP_TO_ERROR() goto jump_to_error_target
390
391
/* Stackref macros */
392
393
/* How much scratch space to give stackref to PyObject* conversion. */
394
1.83G
#define MAX_STACKREF_SCRATCH 10
395
396
#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
397
    /* +1 because vectorcall might use -1 to write self */ \
398
1.83G
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
399
1.83G
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
400
401
#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
402
    /* +1 because we +1 previously */ \
403
1.83G
    _PyObjectArray_Free(NAME - 1, NAME##_temp);
404
405
1.83G
#define CONVERSION_FAILED(NAME) ((NAME) == NULL)
406
407
static inline int
408
3.23G
check_periodics(PyThreadState *tstate) {
409
3.23G
    _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
410
3.23G
    QSBR_QUIESCENT_STATE(tstate);
411
3.23G
    if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
412
58.3k
        return _Py_HandlePending(tstate);
413
58.3k
    }
414
3.23G
    return 0;
415
3.23G
}
416