Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/ceval_macros.h
Line
Count
Source (jump to first uncovered line)
1
// Macros and other things needed by ceval.c, and bytecodes.c
2
3
/* Computed GOTOs, or
4
       the-optimization-commonly-but-improperly-known-as-"threaded code"
5
   using gcc's labels-as-values extension
6
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
   The traditional bytecode evaluation loop uses a "switch" statement, which
9
   decent compilers will optimize as a single indirect branch instruction
10
   combined with a lookup table of jump addresses. However, since the
11
   indirect jump instruction is shared by all opcodes, the CPU will have a
12
   hard time making the right prediction for where to jump next (actually,
13
   it will be always wrong except in the uncommon case of a sequence of
14
   several identical opcodes).
15
16
   "Threaded code" in contrast, uses an explicit jump table and an explicit
17
   indirect jump instruction at the end of each opcode. Since the jump
18
   instruction is at a different address for each opcode, the CPU will make a
19
   separate prediction for each of these instructions, which is equivalent to
20
   predicting the second opcode of each opcode pair. These predictions have
21
   a much better chance to turn out valid, especially in small bytecode loops.
22
23
   A mispredicted branch on a modern CPU flushes the whole pipeline and
24
   can cost several CPU cycles (depending on the pipeline depth),
25
   and potentially many more instructions (depending on the pipeline width).
26
   A correctly predicted branch, however, is nearly free.
27
28
   At the time of this writing, the "threaded code" version is up to 15-20%
29
   faster than the normal "switch" version, depending on the compiler and the
30
   CPU architecture.
31
32
   NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
   indirect jumps by sharing them between all opcodes. Such optimizations
34
   can be disabled on gcc by using the -fno-gcse flag (or possibly
35
   -fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
 * to the C compiler that the tracing check is a simple test then branch.
40
 * We want to be sure that the compiler knows this before it generates
41
 * the CFG.
42
 */
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
    #ifndef USE_COMPUTED_GOTOS
52
    #define USE_COMPUTED_GOTOS 1
53
    #endif
54
#else
55
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
    #error "Computed gotos are not supported on this compiler."
57
    #endif
58
    #undef USE_COMPUTED_GOTOS
59
    #define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_STATS(op) \
64
    do { \
65
        OPCODE_EXE_INC(op); \
66
        if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
67
        lastopcode = op; \
68
    } while (0)
69
#else
70
32.4G
#define INSTRUCTION_STATS(op) ((void)0)
71
#endif
72
73
#ifdef Py_STATS
74
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg, int lastopcode
75
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg, lastopcode
76
#else
77
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg
78
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
79
#endif
80
81
#if Py_TAIL_CALL_INTERP
82
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
83
#   define Py_MUSTTAIL [[clang::musttail]]
84
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
85
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
86
87
#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
88
#   define DISPATCH_GOTO() \
89
        do { \
90
            Py_MUSTTAIL return (INSTRUCTION_TABLE[opcode])(TAIL_CALL_ARGS); \
91
        } while (0)
92
#   define JUMP_TO_LABEL(name) \
93
        do { \
94
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
95
        } while (0)
96
#   ifdef Py_STATS
97
#       define JUMP_TO_PREDICTED(name) \
98
            do { \
99
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg, lastopcode); \
100
            } while (0)
101
#   else
102
#       define JUMP_TO_PREDICTED(name) \
103
            do { \
104
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg); \
105
            } while (0)
106
#   endif
107
#    define LABEL(name) TARGET(name)
108
#elif USE_COMPUTED_GOTOS
109
32.4G
#  define TARGET(op) TARGET_##op:
110
32.8G
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
111
47.5M
#  define JUMP_TO_LABEL(name) goto name;
112
195M
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
113
303M
#  define LABEL(name) name:
114
#else
115
#  define TARGET(op) case op: TARGET_##op:
116
#  define DISPATCH_GOTO() goto dispatch_opcode
117
#  define JUMP_TO_LABEL(name) goto name;
118
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
119
#  define LABEL(name) name:
120
#endif
121
122
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
123
#ifdef Py_DEBUG
124
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
125
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
126
#else
127
32.8G
#define PRE_DISPATCH_GOTO() ((void)0)
128
#endif
129
130
#ifdef Py_DEBUG
131
#define LLTRACE_RESUME_FRAME() \
132
do { \
133
    _PyFrame_SetStackPointer(frame, stack_pointer); \
134
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
135
    stack_pointer = _PyFrame_GetStackPointer(frame); \
136
    if (lltrace < 0) { \
137
        JUMP_TO_LABEL(exit_unwind); \
138
    } \
139
    frame->lltrace = lltrace; \
140
} while (0)
141
#else
142
1.31G
#define LLTRACE_RESUME_FRAME() ((void)0)
143
#endif
144
145
#ifdef Py_GIL_DISABLED
146
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
147
#else
148
#define QSBR_QUIESCENT_STATE(tstate)
149
#endif
150
151
152
/* Do interpreter dispatch accounting for tracing and instrumentation */
153
#define DISPATCH() \
154
32.7G
    { \
155
32.7G
        assert(frame->stackpointer == NULL); \
156
32.7G
        NEXTOPARG(); \
157
32.7G
        PRE_DISPATCH_GOTO(); \
158
32.7G
        DISPATCH_GOTO(); \
159
32.7G
    }
160
161
#define DISPATCH_SAME_OPARG() \
162
4.08M
    { \
163
4.08M
        opcode = next_instr->op.code; \
164
4.08M
        PRE_DISPATCH_GOTO(); \
165
4.08M
        DISPATCH_GOTO(); \
166
4.08M
    }
167
168
#define DISPATCH_INLINED(NEW_FRAME)                     \
169
809k
    do {                                                \
170
809k
        assert(tstate->interp->eval_frame == NULL);     \
171
809k
        _PyFrame_SetStackPointer(frame, stack_pointer); \
172
809k
        assert((NEW_FRAME)->previous == frame);         \
173
809k
        frame = tstate->current_frame = (NEW_FRAME);     \
174
809k
        CALL_STAT_INC(inlined_py_calls);                \
175
809k
        JUMP_TO_LABEL(start_frame);                      \
176
0
    } while (0)
177
178
/* Tuple access macros */
179
180
#ifndef Py_DEBUG
181
1.68G
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
182
#else
183
static inline PyObject *
184
GETITEM(PyObject *v, Py_ssize_t i) {
185
    assert(PyTuple_Check(v));
186
    assert(i >= 0);
187
    assert(i < PyTuple_GET_SIZE(v));
188
    return PyTuple_GET_ITEM(v, i);
189
}
190
#endif
191
192
/* Code access macros */
193
194
/* The integer overflow is checked by an assertion below. */
195
33.7M
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
196
32.7G
#define NEXTOPARG()  do { \
197
32.7G
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
198
32.7G
        opcode = word.op.code; \
199
32.7G
        oparg = word.op.arg; \
200
32.7G
    } while (0)
201
202
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
203
 * for advancing to the next instruction, taking into account cache entries
204
 * and skipped instructions.
205
 */
206
4.26G
#define JUMPBY(x)       (next_instr += (x))
207
367M
#define SKIP_OVER(x)    (next_instr += (x))
208
209
#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
210
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)
211
212
#define WITHIN_STACK_BOUNDS() \
213
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
214
215
/* Data access macros */
216
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
217
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)
218
219
/* Local variable macros */
220
221
1.28M
#define LOCALS_ARRAY    (frame->localsplus)
222
15.7G
#define GETLOCAL(i)     (frame->localsplus[i])
223
224
225
#ifdef Py_STATS
226
#define UPDATE_MISS_STATS(INSTNAME)                              \
227
    do {                                                         \
228
        STAT_INC(opcode, miss);                                  \
229
        STAT_INC((INSTNAME), miss);                              \
230
        /* The counter is always the first cache entry: */       \
231
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
232
            STAT_INC((INSTNAME), deopt);                         \
233
        }                                                        \
234
    } while (0)
235
#else
236
195M
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
237
#endif
238
239
240
// Try to lock an object in the free threading build, if it's not already
241
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
242
// These are no-ops in the default GIL build. The general pattern is:
243
//
244
// DEOPT_IF(!LOCK_OBJECT(op));
245
// if (/* condition fails */) {
246
//     UNLOCK_OBJECT(op);
247
//     DEOPT_IF(true);
248
//  }
249
//  ...
250
//  UNLOCK_OBJECT(op);
251
//
252
// NOTE: The object must be unlocked on every exit code path and you should
253
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
254
// object is locked.
255
#ifdef Py_GIL_DISABLED
256
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
257
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
258
#else
259
511M
#  define LOCK_OBJECT(op) (1)
260
511M
#  define UNLOCK_OBJECT(op) ((void)0)
261
#endif
262
263
712M
#define GLOBALS() frame->f_globals
264
392M
#define BUILTINS() frame->f_builtins
265
71.6k
#define LOCALS() frame->f_locals
266
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
267
#define NAMES() _PyFrame_GetCode(frame)->co_names
268
269
#define DTRACE_FUNCTION_ENTRY()  \
270
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
271
        dtrace_function_entry(frame); \
272
    }
273
274
/* This takes a uint16_t instead of a _Py_BackoffCounter,
275
 * because it is used directly on the cache entry in generated code,
276
 * which is always an integral type. */
277
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
278
1.15G
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
279
280
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
281
1.15G
    do { \
282
1.15G
        (COUNTER) = advance_backoff_counter((COUNTER)); \
283
1.15G
    } while (0);
284
285
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
286
0
    do { \
287
0
        (COUNTER) = pause_backoff_counter((COUNTER)); \
288
0
    } while (0);
289
290
#ifdef ENABLE_SPECIALIZATION_FT
291
/* Multiple threads may execute these concurrently if thread-local bytecode is
292
 * disabled and they all execute the main copy of the bytecode. Specialization
293
 * is disabled in that case so the value is unused, but the RMW cycle should be
294
 * free of data races.
295
 */
296
#define RECORD_BRANCH_TAKEN(bitset, flag) \
297
2.13G
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
298
2.13G
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
299
#else
300
#define RECORD_BRANCH_TAKEN(bitset, flag)
301
#endif
302
303
#define UNBOUNDLOCAL_ERROR_MSG \
304
0
    "cannot access local variable '%s' where it is not associated with a value"
305
#define UNBOUNDFREE_ERROR_MSG \
306
0
    "cannot access free variable '%s' where it is not associated with a value" \
307
0
    " in enclosing scope"
308
1
#define NAME_ERROR_MSG "name '%.200s' is not defined"
309
310
// If a trace function sets a new f_lineno and
311
// *then* raises, we use the destination when searching
312
// for an exception handler, displaying the traceback, and so on
313
0
#define INSTRUMENTED_JUMP(src, dest, event) \
314
0
do { \
315
0
    if (tstate->tracing) {\
316
0
        next_instr = dest; \
317
0
    } else { \
318
0
        _PyFrame_SetStackPointer(frame, stack_pointer); \
319
0
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
320
0
        stack_pointer = _PyFrame_GetStackPointer(frame); \
321
0
        if (next_instr == NULL) { \
322
0
            next_instr = (dest)+1; \
323
0
            JUMP_TO_LABEL(error); \
324
0
        } \
325
0
    } \
326
0
} while (0);
327
328
329
222M
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
330
222M
    return (tstate->py_recursion_remaining-- <= 0) &&
331
222M
        _Py_CheckRecursiveCallPy(tstate);
332
222M
}
333
334
668M
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
335
668M
    tstate->py_recursion_remaining++;
336
668M
}
337
338
/* Implementation of "macros" that modify the instruction pointer,
339
 * stack pointer, or frame pointer.
340
 * These need to treated differently by tier 1 and 2.
341
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
342
343
1.10G
#define LOAD_IP(OFFSET) do { \
344
1.10G
        next_instr = frame->instr_ptr + (OFFSET); \
345
1.10G
    } while (0)
346
347
/* There's no STORE_IP(), it's inlined by the code generator. */
348
349
445M
#define LOAD_SP() \
350
445M
stack_pointer = _PyFrame_GetStackPointer(frame)
351
352
#define SAVE_SP() \
353
_PyFrame_SetStackPointer(frame, stack_pointer)
354
355
/* Tier-switching macros. */
356
357
#ifdef _Py_JIT
358
#define GOTO_TIER_TWO(EXECUTOR)                        \
359
do {                                                   \
360
    OPT_STAT_INC(traces_executed);                     \
361
    _PyExecutorObject *_executor = (EXECUTOR);         \
362
    tstate->current_executor = (PyObject *)_executor;  \
363
    jit_func jitted = _executor->jit_code;             \
364
    /* Keep the shim frame alive via the executor: */  \
365
    Py_INCREF(_executor);                              \
366
    next_instr = jitted(frame, stack_pointer, tstate); \
367
    Py_DECREF(_executor);                              \
368
    frame = tstate->current_frame;                     \
369
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
370
    if (next_instr == NULL) {                          \
371
        next_instr = frame->instr_ptr;                 \
372
        JUMP_TO_LABEL(error);                          \
373
    }                                                  \
374
    DISPATCH();                                        \
375
} while (0)
376
#else
377
#define GOTO_TIER_TWO(EXECUTOR) \
378
do { \
379
    OPT_STAT_INC(traces_executed); \
380
    _PyExecutorObject *_executor = (EXECUTOR); \
381
    tstate->current_executor = (PyObject *)_executor; \
382
    next_uop = _executor->trace; \
383
    assert(next_uop->opcode == _START_EXECUTOR); \
384
    goto enter_tier_two; \
385
} while (0)
386
#endif
387
388
#define GOTO_TIER_ONE(TARGET)                                         \
389
    do                                                                \
390
    {                                                                 \
391
        tstate->current_executor = NULL;                              \
392
        next_instr = (TARGET);                                        \
393
        assert(tstate->current_executor == NULL);                     \
394
        OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
395
        _PyFrame_SetStackPointer(frame, stack_pointer);               \
396
        stack_pointer = _PyFrame_GetStackPointer(frame);              \
397
        if (next_instr == NULL)                                       \
398
        {                                                             \
399
            next_instr = frame->instr_ptr;                            \
400
            goto error;                                               \
401
        }                                                             \
402
        DISPATCH();                                                   \
403
    } while (0)
404
405
#define CURRENT_OPARG()    (next_uop[-1].oparg)
406
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
407
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
408
#define CURRENT_TARGET()   (next_uop[-1].target)
409
410
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
411
#define JUMP_TO_ERROR() goto jump_to_error_target
412
413
/* Stackref macros */
414
415
/* How much scratch space to give stackref to PyObject* conversion. */
416
1.61G
#define MAX_STACKREF_SCRATCH 10
417
418
#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
419
    /* +1 because vectorcall might use -1 to write self */ \
420
1.62G
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
421
1.62G
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
422
423
#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
424
    /* +1 because we +1 previously */ \
425
1.62G
    _PyObjectArray_Free(NAME - 1, NAME##_temp);
426
427
1.62G
#define CONVERSION_FAILED(NAME) ((NAME) == NULL)