Coverage Report

Created: 2025-07-11 06:24

/src/cpython/Python/ceval_macros.h
Line
Count
Source (jump to first uncovered line)
1
// Macros and other things needed by ceval.c, and bytecodes.c
2
3
/* Computed GOTOs, or
4
       the-optimization-commonly-but-improperly-known-as-"threaded code"
5
   using gcc's labels-as-values extension
6
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
   The traditional bytecode evaluation loop uses a "switch" statement, which
9
   decent compilers will optimize as a single indirect branch instruction
10
   combined with a lookup table of jump addresses. However, since the
11
   indirect jump instruction is shared by all opcodes, the CPU will have a
12
   hard time making the right prediction for where to jump next (actually,
13
   it will be always wrong except in the uncommon case of a sequence of
14
   several identical opcodes).
15
16
   "Threaded code" in contrast, uses an explicit jump table and an explicit
17
   indirect jump instruction at the end of each opcode. Since the jump
18
   instruction is at a different address for each opcode, the CPU will make a
19
   separate prediction for each of these instructions, which is equivalent to
20
   predicting the second opcode of each opcode pair. These predictions have
21
   a much better chance to turn out valid, especially in small bytecode loops.
22
23
   A mispredicted branch on a modern CPU flushes the whole pipeline and
24
   can cost several CPU cycles (depending on the pipeline depth),
25
   and potentially many more instructions (depending on the pipeline width).
26
   A correctly predicted branch, however, is nearly free.
27
28
   At the time of this writing, the "threaded code" version is up to 15-20%
29
   faster than the normal "switch" version, depending on the compiler and the
30
   CPU architecture.
31
32
   NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
   indirect jumps by sharing them between all opcodes. Such optimizations
34
   can be disabled on gcc by using the -fno-gcse flag (or possibly
35
   -fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
 * to the C compiler that the tracing check is a simple test then branch.
40
 * We want to be sure that the compiler knows this before it generates
41
 * the CFG.
42
 */
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
    #ifndef USE_COMPUTED_GOTOS
52
    #define USE_COMPUTED_GOTOS 1
53
    #endif
54
#else
55
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
    #error "Computed gotos are not supported on this compiler."
57
    #endif
58
    #undef USE_COMPUTED_GOTOS
59
    #define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_STATS(op) \
64
    do { \
65
        OPCODE_EXE_INC(op); \
66
        if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
67
        lastopcode = op; \
68
    } while (0)
69
#else
70
27.8G
#define INSTRUCTION_STATS(op) ((void)0)
71
#endif
72
73
#ifdef Py_STATS
74
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg, int lastopcode
75
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg, lastopcode
76
#else
77
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg
78
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
79
#endif
80
81
#if Py_TAIL_CALL_INTERP
82
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
83
#   define Py_MUSTTAIL [[clang::musttail]]
84
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
85
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
86
87
#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
88
#   define DISPATCH_GOTO() \
89
        do { \
90
            Py_MUSTTAIL return (INSTRUCTION_TABLE[opcode])(TAIL_CALL_ARGS); \
91
        } while (0)
92
#   define JUMP_TO_LABEL(name) \
93
        do { \
94
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
95
        } while (0)
96
#   ifdef Py_STATS
97
#       define JUMP_TO_PREDICTED(name) \
98
            do { \
99
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg, lastopcode); \
100
            } while (0)
101
#   else
102
#       define JUMP_TO_PREDICTED(name) \
103
            do { \
104
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg); \
105
            } while (0)
106
#   endif
107
#    define LABEL(name) TARGET(name)
108
#elif USE_COMPUTED_GOTOS
109
27.8G
#  define TARGET(op) TARGET_##op:
110
28.1G
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
111
39.5M
#  define JUMP_TO_LABEL(name) goto name;
112
170M
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
113
266M
#  define LABEL(name) name:
114
#else
115
#  define TARGET(op) case op: TARGET_##op:
116
#  define DISPATCH_GOTO() goto dispatch_opcode
117
#  define JUMP_TO_LABEL(name) goto name;
118
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
119
#  define LABEL(name) name:
120
#endif
121
122
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
123
#ifdef Py_DEBUG
124
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
125
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
126
#else
127
28.1G
#define PRE_DISPATCH_GOTO() ((void)0)
128
#endif
129
130
#ifdef Py_DEBUG
131
#define LLTRACE_RESUME_FRAME() \
132
do { \
133
    _PyFrame_SetStackPointer(frame, stack_pointer); \
134
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
135
    stack_pointer = _PyFrame_GetStackPointer(frame); \
136
    if (lltrace < 0) { \
137
        JUMP_TO_LABEL(exit_unwind); \
138
    } \
139
    frame->lltrace = lltrace; \
140
} while (0)
141
#else
142
1.11G
#define LLTRACE_RESUME_FRAME() ((void)0)
143
#endif
144
145
#ifdef Py_GIL_DISABLED
146
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
147
#else
148
#define QSBR_QUIESCENT_STATE(tstate)
149
#endif
150
151
152
/* Do interpreter dispatch accounting for tracing and instrumentation */
153
#define DISPATCH() \
154
28.0G
    { \
155
28.0G
        assert(frame->stackpointer == NULL); \
156
28.0G
        NEXTOPARG(); \
157
28.0G
        PRE_DISPATCH_GOTO(); \
158
28.0G
        DISPATCH_GOTO(); \
159
28.0G
    }
160
161
#define DISPATCH_SAME_OPARG() \
162
3.57M
    { \
163
3.57M
        opcode = next_instr->op.code; \
164
3.57M
        PRE_DISPATCH_GOTO(); \
165
3.57M
        DISPATCH_GOTO(); \
166
3.57M
    }
167
168
#define DISPATCH_INLINED(NEW_FRAME)                     \
169
659k
    do {                                                \
170
659k
        assert(tstate->interp->eval_frame == NULL);     \
171
659k
        _PyFrame_SetStackPointer(frame, stack_pointer); \
172
659k
        assert((NEW_FRAME)->previous == frame);         \
173
659k
        frame = tstate->current_frame = (NEW_FRAME);     \
174
659k
        CALL_STAT_INC(inlined_py_calls);                \
175
659k
        JUMP_TO_LABEL(start_frame);                      \
176
0
    } while (0)
177
178
/* Tuple access macros */
179
180
#ifndef Py_DEBUG
181
1.43G
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
182
#else
183
static inline PyObject *
184
GETITEM(PyObject *v, Py_ssize_t i) {
185
    assert(PyTuple_Check(v));
186
    assert(i >= 0);
187
    assert(i < PyTuple_GET_SIZE(v));
188
    return PyTuple_GET_ITEM(v, i);
189
}
190
#endif
191
192
/* Code access macros */
193
194
/* The integer overflow is checked by an assertion below. */
195
28.1M
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
196
28.0G
#define NEXTOPARG()  do { \
197
28.0G
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
198
28.0G
        opcode = word.op.code; \
199
28.0G
        oparg = word.op.arg; \
200
28.0G
    } while (0)
201
202
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
203
 * for advancing to the next instruction, taking into account cache entries
204
 * and skipped instructions.
205
 */
206
3.65G
#define JUMPBY(x)       (next_instr += (x))
207
318M
#define SKIP_OVER(x)    (next_instr += (x))
208
209
#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
210
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)
211
212
#define WITHIN_STACK_BOUNDS() \
213
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
214
215
/* Data access macros */
216
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
217
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)
218
219
/* Local variable macros */
220
221
923k
#define LOCALS_ARRAY    (frame->localsplus)
222
13.4G
#define GETLOCAL(i)     (frame->localsplus[i])
223
224
225
#ifdef Py_STATS
226
#define UPDATE_MISS_STATS(INSTNAME)                              \
227
    do {                                                         \
228
        STAT_INC(opcode, miss);                                  \
229
        STAT_INC((INSTNAME), miss);                              \
230
        /* The counter is always the first cache entry: */       \
231
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
232
            STAT_INC((INSTNAME), deopt);                         \
233
        }                                                        \
234
    } while (0)
235
#else
236
170M
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
237
#endif
238
239
240
// Try to lock an object in the free threading build, if it's not already
241
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
242
// These are no-ops in the default GIL build. The general pattern is:
243
//
244
// DEOPT_IF(!LOCK_OBJECT(op));
245
// if (/* condition fails */) {
246
//     UNLOCK_OBJECT(op);
247
//     DEOPT_IF(true);
248
//  }
249
//  ...
250
//  UNLOCK_OBJECT(op);
251
//
252
// NOTE: The object must be unlocked on every exit code path and you should
253
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
254
// object is locked.
255
#ifdef Py_GIL_DISABLED
256
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
257
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
258
#else
259
439M
#  define LOCK_OBJECT(op) (1)
260
439M
#  define UNLOCK_OBJECT(op) ((void)0)
261
#endif
262
263
603M
#define GLOBALS() frame->f_globals
264
326M
#define BUILTINS() frame->f_builtins
265
71.6k
#define LOCALS() frame->f_locals
266
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
267
#define NAMES() _PyFrame_GetCode(frame)->co_names
268
269
#define DTRACE_FUNCTION_ENTRY()  \
270
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
271
        dtrace_function_entry(frame); \
272
    }
273
274
/* This takes a uint16_t instead of a _Py_BackoffCounter,
275
 * because it is used directly on the cache entry in generated code,
276
 * which is always an integral type. */
277
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
278
1.02G
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
279
280
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
281
1.02G
    do { \
282
1.02G
        (COUNTER) = advance_backoff_counter((COUNTER)); \
283
1.02G
    } while (0);
284
285
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
286
0
    do { \
287
0
        (COUNTER) = pause_backoff_counter((COUNTER)); \
288
0
    } while (0);
289
290
#ifdef ENABLE_SPECIALIZATION_FT
291
/* Multiple threads may execute these concurrently if thread-local bytecode is
292
 * disabled and they all execute the main copy of the bytecode. Specialization
293
 * is disabled in that case so the value is unused, but the RMW cycle should be
294
 * free of data races.
295
 */
296
#define RECORD_BRANCH_TAKEN(bitset, flag) \
297
1.82G
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
298
1.82G
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
299
#else
300
#define RECORD_BRANCH_TAKEN(bitset, flag)
301
#endif
302
303
#define UNBOUNDLOCAL_ERROR_MSG \
304
0
    "cannot access local variable '%s' where it is not associated with a value"
305
#define UNBOUNDFREE_ERROR_MSG \
306
0
    "cannot access free variable '%s' where it is not associated with a value" \
307
0
    " in enclosing scope"
308
1
#define NAME_ERROR_MSG "name '%.200s' is not defined"
309
310
// If a trace function sets a new f_lineno and
311
// *then* raises, we use the destination when searching
312
// for an exception handler, displaying the traceback, and so on
313
0
#define INSTRUMENTED_JUMP(src, dest, event) \
314
0
do { \
315
0
    if (tstate->tracing) {\
316
0
        next_instr = dest; \
317
0
    } else { \
318
0
        _PyFrame_SetStackPointer(frame, stack_pointer); \
319
0
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
320
0
        stack_pointer = _PyFrame_GetStackPointer(frame); \
321
0
        if (next_instr == NULL) { \
322
0
            next_instr = (dest)+1; \
323
0
            JUMP_TO_LABEL(error); \
324
0
        } \
325
0
    } \
326
0
} while (0);
327
328
329
199M
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
330
199M
    return (tstate->py_recursion_remaining-- <= 0) &&
331
199M
        _Py_CheckRecursiveCallPy(tstate);
332
199M
}
333
334
567M
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
335
567M
    tstate->py_recursion_remaining++;
336
567M
}
337
338
/* Implementation of "macros" that modify the instruction pointer,
339
 * stack pointer, or frame pointer.
340
 * These need to treated differently by tier 1 and 2.
341
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
342
343
924M
#define LOAD_IP(OFFSET) do { \
344
924M
        next_instr = frame->instr_ptr + (OFFSET); \
345
924M
    } while (0)
346
347
/* There's no STORE_IP(), it's inlined by the code generator. */
348
349
367M
#define LOAD_SP() \
350
367M
stack_pointer = _PyFrame_GetStackPointer(frame)
351
352
#define SAVE_SP() \
353
_PyFrame_SetStackPointer(frame, stack_pointer)
354
355
/* Tier-switching macros. */
356
357
#ifdef _Py_JIT
358
#define GOTO_TIER_TWO(EXECUTOR)                        \
359
do {                                                   \
360
    OPT_STAT_INC(traces_executed);                     \
361
    _PyExecutorObject *_executor = (EXECUTOR);         \
362
    tstate->current_executor = (PyObject *)_executor;  \
363
    jit_func jitted = _executor->jit_code;             \
364
    /* Keep the shim frame alive via the executor: */  \
365
    Py_INCREF(_executor);                              \
366
    next_instr = jitted(frame, stack_pointer, tstate); \
367
    Py_DECREF(_executor);                              \
368
    frame = tstate->current_frame;                     \
369
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
370
    if (next_instr == NULL) {                          \
371
        next_instr = frame->instr_ptr;                 \
372
        JUMP_TO_LABEL(error);                          \
373
    }                                                  \
374
    DISPATCH();                                        \
375
} while (0)
376
#else
377
#define GOTO_TIER_TWO(EXECUTOR) \
378
do { \
379
    OPT_STAT_INC(traces_executed); \
380
    _PyExecutorObject *_executor = (EXECUTOR); \
381
    tstate->current_executor = (PyObject *)_executor; \
382
    next_uop = _executor->trace; \
383
    assert(next_uop->opcode == _START_EXECUTOR); \
384
    goto enter_tier_two; \
385
} while (0)
386
#endif
387
388
#define GOTO_TIER_ONE(TARGET)                                         \
389
    do                                                                \
390
    {                                                                 \
391
        tstate->current_executor = NULL;                              \
392
        next_instr = (TARGET);                                        \
393
        assert(tstate->current_executor == NULL);                     \
394
        OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
395
        _PyFrame_SetStackPointer(frame, stack_pointer);               \
396
        stack_pointer = _PyFrame_GetStackPointer(frame);              \
397
        if (next_instr == NULL)                                       \
398
        {                                                             \
399
            next_instr = frame->instr_ptr;                            \
400
            goto error;                                               \
401
        }                                                             \
402
        DISPATCH();                                                   \
403
    } while (0)
404
405
#define CURRENT_OPARG()    (next_uop[-1].oparg)
406
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
407
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
408
#define CURRENT_TARGET()   (next_uop[-1].target)
409
410
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
411
#define JUMP_TO_ERROR() goto jump_to_error_target
412
413
/* Stackref macros */
414
415
/* How much scratch space to give stackref to PyObject* conversion. */
416
1.38G
#define MAX_STACKREF_SCRATCH 10
417
418
#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
419
    /* +1 because vectorcall might use -1 to write self */ \
420
1.38G
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
421
1.38G
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
422
423
#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
424
    /* +1 because we +1 previously */ \
425
1.38G
    _PyObjectArray_Free(NAME - 1, NAME##_temp);
426
427
1.38G
#define CONVERSION_FAILED(NAME) ((NAME) == NULL)