Coverage Report

Created: 2025-11-24 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/ceval_macros.h
Line
Count
Source
1
// Macros and other things needed by ceval.c, and bytecodes.c
2
3
/* Computed GOTOs, or
4
       the-optimization-commonly-but-improperly-known-as-"threaded code"
5
   using gcc's labels-as-values extension
6
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8
   The traditional bytecode evaluation loop uses a "switch" statement, which
9
   decent compilers will optimize as a single indirect branch instruction
10
   combined with a lookup table of jump addresses. However, since the
11
   indirect jump instruction is shared by all opcodes, the CPU will have a
12
   hard time making the right prediction for where to jump next (actually,
13
   it will be always wrong except in the uncommon case of a sequence of
14
   several identical opcodes).
15
16
   "Threaded code" in contrast, uses an explicit jump table and an explicit
17
   indirect jump instruction at the end of each opcode. Since the jump
18
   instruction is at a different address for each opcode, the CPU will make a
19
   separate prediction for each of these instructions, which is equivalent to
20
   predicting the second opcode of each opcode pair. These predictions have
21
   a much better chance to turn out valid, especially in small bytecode loops.
22
23
   A mispredicted branch on a modern CPU flushes the whole pipeline and
24
   can cost several CPU cycles (depending on the pipeline depth),
25
   and potentially many more instructions (depending on the pipeline width).
26
   A correctly predicted branch, however, is nearly free.
27
28
   At the time of this writing, the "threaded code" version is up to 15-20%
29
   faster than the normal "switch" version, depending on the compiler and the
30
   CPU architecture.
31
32
   NOTE: care must be taken that the compiler doesn't try to "optimize" the
33
   indirect jumps by sharing them between all opcodes. Such optimizations
34
   can be disabled on gcc by using the -fno-gcse flag (or possibly
35
   -fno-crossjumping).
36
*/
37
38
/* Use macros rather than inline functions, to make it as clear as possible
39
 * to the C compiler that the tracing check is a simple test then branch.
40
 * We want to be sure that the compiler knows this before it generates
41
 * the CFG.
42
 */
43
44
#ifdef WITH_DTRACE
45
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
46
#else
47
#define OR_DTRACE_LINE
48
#endif
49
50
#ifdef HAVE_COMPUTED_GOTOS
51
    #ifndef USE_COMPUTED_GOTOS
52
    #define USE_COMPUTED_GOTOS 1
53
    #endif
54
#else
55
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56
    #error "Computed gotos are not supported on this compiler."
57
    #endif
58
    #undef USE_COMPUTED_GOTOS
59
    #define USE_COMPUTED_GOTOS 0
60
#endif
61
62
#ifdef Py_STATS
63
#define INSTRUCTION_STATS(op) \
64
    do { \
65
        PyStats *s = _PyStats_GET(); \
66
        OPCODE_EXE_INC(op); \
67
        if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
68
        lastopcode = op; \
69
    } while (0)
70
#else
71
57.8G
#define INSTRUCTION_STATS(op) ((void)0)
72
#endif
73
74
#ifdef Py_STATS
75
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg, int lastopcode
76
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg, lastopcode
77
#else
78
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg
79
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg
80
#endif
81
82
#if _Py_TAIL_CALL_INTERP
83
#   if defined(__clang__) || defined(__GNUC__)
84
#       if !_Py__has_attribute(preserve_none) || !_Py__has_attribute(musttail)
85
#           error "This compiler does not have support for efficient tail calling."
86
#       endif
87
#   elif defined(_MSC_VER) && (_MSC_VER < 1950)
88
#       error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
89
#   endif
90
91
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
92
#   define Py_MUSTTAIL [[clang::musttail]]
93
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
94
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
95
96
#   define DISPATCH_TABLE_VAR instruction_funcptr_table
97
#   define DISPATCH_TABLE instruction_funcptr_handler_table
98
#   define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
99
#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
100
101
#   define DISPATCH_GOTO() \
102
        do { \
103
            Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \
104
        } while (0)
105
#   define DISPATCH_GOTO_NON_TRACING() \
106
        do { \
107
            Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \
108
        } while (0)
109
#   define JUMP_TO_LABEL(name) \
110
        do { \
111
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
112
        } while (0)
113
#   ifdef Py_STATS
114
#       define JUMP_TO_PREDICTED(name) \
115
            do { \
116
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg, lastopcode); \
117
            } while (0)
118
#   else
119
#       define JUMP_TO_PREDICTED(name) \
120
            do { \
121
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg); \
122
            } while (0)
123
#   endif
124
#    define LABEL(name) TARGET(name)
125
#elif USE_COMPUTED_GOTOS
126
#  define DISPATCH_TABLE_VAR opcode_targets
127
6.76M
#  define DISPATCH_TABLE opcode_targets_table
128
#  define TRACING_DISPATCH_TABLE opcode_tracing_targets_table
129
57.8G
#  define TARGET(op) TARGET_##op:
130
58.1G
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
131
6.76M
#  define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode];
132
96.0M
#  define JUMP_TO_LABEL(name) goto name;
133
294M
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
134
422M
#  define LABEL(name) name:
135
#else
136
#  define TARGET(op) case op: TARGET_##op:
137
#  define DISPATCH_GOTO() dispatch_code = opcode | tracing_mode ; goto dispatch_opcode
138
#  define DISPATCH_GOTO_NON_TRACING() dispatch_code = opcode; goto dispatch_opcode
139
#  define JUMP_TO_LABEL(name) goto name;
140
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
141
#  define LABEL(name) name:
142
#endif
143
144
#if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2
145
#  define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
146
#  define ENTER_TRACING() \
147
    DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
148
#  define LEAVE_TRACING() \
149
    DISPATCH_TABLE_VAR = DISPATCH_TABLE;
150
#else
151
#  define IS_JIT_TRACING() (tracing_mode != 0)
152
#  define ENTER_TRACING() tracing_mode = 255
153
#  define LEAVE_TRACING() tracing_mode = 0
154
#endif
155
156
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
157
#ifdef Py_DEBUG
158
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
159
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
160
#else
161
58.1G
#define PRE_DISPATCH_GOTO() ((void)0)
162
#endif
163
164
#ifdef Py_DEBUG
165
#define LLTRACE_RESUME_FRAME() \
166
do { \
167
    _PyFrame_SetStackPointer(frame, stack_pointer); \
168
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
169
    stack_pointer = _PyFrame_GetStackPointer(frame); \
170
    frame->lltrace = lltrace; \
171
} while (0)
172
#else
173
2.29G
#define LLTRACE_RESUME_FRAME() ((void)0)
174
#endif
175
176
#ifdef Py_GIL_DISABLED
177
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
178
#else
179
#define QSBR_QUIESCENT_STATE(tstate)
180
#endif
181
182
183
/* Do interpreter dispatch accounting for tracing and instrumentation */
184
#define DISPATCH() \
185
57.8G
    { \
186
57.8G
        assert(frame->stackpointer == NULL); \
187
57.8G
        NEXTOPARG(); \
188
57.8G
        PRE_DISPATCH_GOTO(); \
189
57.8G
        DISPATCH_GOTO(); \
190
57.8G
    }
191
192
#define DISPATCH_NON_TRACING() \
193
    { \
194
        assert(frame->stackpointer == NULL); \
195
        NEXTOPARG(); \
196
        PRE_DISPATCH_GOTO(); \
197
        DISPATCH_GOTO_NON_TRACING(); \
198
    }
199
200
#define DISPATCH_SAME_OPARG() \
201
6.76M
    { \
202
6.76M
        opcode = next_instr->op.code; \
203
6.76M
        PRE_DISPATCH_GOTO(); \
204
6.76M
        DISPATCH_GOTO_NON_TRACING(); \
205
6.76M
    }
206
207
#define DISPATCH_INLINED(NEW_FRAME)                     \
208
1.14M
    do {                                                \
209
1.14M
        assert(tstate->interp->eval_frame == NULL);     \
210
1.14M
        _PyFrame_SetStackPointer(frame, stack_pointer); \
211
1.14M
        assert((NEW_FRAME)->previous == frame);         \
212
1.14M
        frame = tstate->current_frame = (NEW_FRAME);     \
213
1.14M
        CALL_STAT_INC(inlined_py_calls);                \
214
1.14M
        JUMP_TO_LABEL(start_frame);                      \
215
0
    } while (0)
216
217
/* Tuple access macros */
218
219
#ifndef Py_DEBUG
220
2.96G
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
221
#else
222
static inline PyObject *
223
GETITEM(PyObject *v, Py_ssize_t i) {
224
    assert(PyTuple_Check(v));
225
    assert(i >= 0);
226
    assert(i < PyTuple_GET_SIZE(v));
227
    return PyTuple_GET_ITEM(v, i);
228
}
229
#endif
230
231
/* Code access macros */
232
233
/* The integer overflow is checked by an assertion below. */
234
60.5M
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
235
57.8G
#define NEXTOPARG()  do { \
236
57.8G
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
237
57.8G
        opcode = word.op.code; \
238
57.8G
        oparg = word.op.arg; \
239
57.8G
    } while (0)
240
241
/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
242
 * for advancing to the next instruction, taking into account cache entries
243
 * and skipped instructions.
244
 */
245
8.78G
#define JUMPBY(x)       (next_instr += (x))
246
379M
#define SKIP_OVER(x)    (next_instr += (x))
247
248
#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
249
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)
250
251
#define WITHIN_STACK_BOUNDS() \
252
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
253
254
/* Data access macros */
255
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
256
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)
257
258
/* Local variable macros */
259
260
1.35M
#define LOCALS_ARRAY    (frame->localsplus)
261
27.7G
#define GETLOCAL(i)     (frame->localsplus[i])
262
263
264
#ifdef Py_STATS
265
#define UPDATE_MISS_STATS(INSTNAME)                              \
266
    do {                                                         \
267
        STAT_INC(opcode, miss);                                  \
268
        STAT_INC((INSTNAME), miss);                              \
269
        /* The counter is always the first cache entry: */       \
270
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
271
            STAT_INC((INSTNAME), deopt);                         \
272
        }                                                        \
273
    } while (0)
274
#else
275
294M
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
276
#endif
277
278
279
// Try to lock an object in the free threading build, if it's not already
280
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
281
// These are no-ops in the default GIL build. The general pattern is:
282
//
283
// DEOPT_IF(!LOCK_OBJECT(op));
284
// if (/* condition fails */) {
285
//     UNLOCK_OBJECT(op);
286
//     DEOPT_IF(true);
287
//  }
288
//  ...
289
//  UNLOCK_OBJECT(op);
290
//
291
// NOTE: The object must be unlocked on every exit code path and you should
292
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
293
// object is locked.
294
#ifdef Py_GIL_DISABLED
295
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
296
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
297
#else
298
789M
#  define LOCK_OBJECT(op) (1)
299
789M
#  define UNLOCK_OBJECT(op) ((void)0)
300
#endif
301
302
1.52G
#define GLOBALS() frame->f_globals
303
553M
#define BUILTINS() frame->f_builtins
304
147k
#define LOCALS() frame->f_locals
305
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
306
#define NAMES() _PyFrame_GetCode(frame)->co_names
307
308
#define DTRACE_FUNCTION_ENTRY()  \
309
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
310
        dtrace_function_entry(frame); \
311
    }
312
313
/* This takes a uint16_t instead of a _Py_BackoffCounter,
314
 * because it is used directly on the cache entry in generated code,
315
 * which is always an integral type. */
316
// Force re-specialization when tracing a side exit to get good side exits.
317
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
318
1.80G
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))
319
320
#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
321
1.80G
    do { \
322
1.80G
        (COUNTER) = advance_backoff_counter((COUNTER)); \
323
1.80G
    } while (0);
324
325
#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
326
0
    do { \
327
0
        (COUNTER) = pause_backoff_counter((COUNTER)); \
328
0
    } while (0);
329
330
#ifdef ENABLE_SPECIALIZATION_FT
331
/* Multiple threads may execute these concurrently if thread-local bytecode is
332
 * disabled and they all execute the main copy of the bytecode. Specialization
333
 * is disabled in that case so the value is unused, but the RMW cycle should be
334
 * free of data races.
335
 */
336
#define RECORD_BRANCH_TAKEN(bitset, flag) \
337
4.39G
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
338
4.39G
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
339
#else
340
#define RECORD_BRANCH_TAKEN(bitset, flag)
341
#endif
342
343
#define UNBOUNDLOCAL_ERROR_MSG \
344
0
    "cannot access local variable '%s' where it is not associated with a value"
345
#define UNBOUNDFREE_ERROR_MSG \
346
0
    "cannot access free variable '%s' where it is not associated with a value" \
347
0
    " in enclosing scope"
348
17
#define NAME_ERROR_MSG "name '%.200s' is not defined"
349
350
// If a trace function sets a new f_lineno and
351
// *then* raises, we use the destination when searching
352
// for an exception handler, displaying the traceback, and so on
353
0
#define INSTRUMENTED_JUMP(src, dest, event) \
354
0
do { \
355
0
    if (tstate->tracing) {\
356
0
        next_instr = dest; \
357
0
    } else { \
358
0
        _PyFrame_SetStackPointer(frame, stack_pointer); \
359
0
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
360
0
        stack_pointer = _PyFrame_GetStackPointer(frame); \
361
0
        if (next_instr == NULL) { \
362
0
            next_instr = (dest)+1; \
363
0
            JUMP_TO_LABEL(error); \
364
0
        } \
365
0
    } \
366
0
} while (0);
367
368
369
274M
static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
370
274M
    return (tstate->py_recursion_remaining-- <= 0) &&
371
206
        _Py_CheckRecursiveCallPy(tstate);
372
274M
}
373
374
1.17G
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
375
1.17G
    tstate->py_recursion_remaining++;
376
1.17G
}
377
378
/* Implementation of "macros" that modify the instruction pointer,
379
 * stack pointer, or frame pointer.
380
 * These need to treated differently by tier 1 and 2.
381
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
382
383
2.03G
#define LOAD_IP(OFFSET) do { \
384
2.03G
        next_instr = frame->instr_ptr + (OFFSET); \
385
2.03G
    } while (0)
386
387
/* There's no STORE_IP(), it's inlined by the code generator. */
388
389
896M
#define LOAD_SP() \
390
896M
stack_pointer = _PyFrame_GetStackPointer(frame)
391
392
#define SAVE_SP() \
393
_PyFrame_SetStackPointer(frame, stack_pointer)
394
395
/* Tier-switching macros. */
396
397
#define TIER1_TO_TIER2(EXECUTOR)                        \
398
do {                                                   \
399
    OPT_STAT_INC(traces_executed);                     \
400
    next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
401
    frame = tstate->current_frame;                     \
402
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
403
    int keep_tracing_bit = (uintptr_t)next_instr & 1;   \
404
    next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \
405
    if (next_instr == NULL) {                          \
406
        /* gh-140104: The exception handler expects frame->instr_ptr
407
            to after this_instr, not this_instr! */ \
408
        next_instr = frame->instr_ptr + 1;                 \
409
        JUMP_TO_LABEL(error);                          \
410
    }                                                  \
411
    if (keep_tracing_bit) { \
412
        assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state.prev_state.code_curr_size == 2); \
413
        ENTER_TRACING(); \
414
        DISPATCH_NON_TRACING(); \
415
    } \
416
    DISPATCH();                                        \
417
} while (0)
418
419
#define TIER2_TO_TIER2(EXECUTOR) \
420
do {                                                   \
421
    OPT_STAT_INC(traces_executed);                     \
422
    current_executor = (EXECUTOR);                     \
423
    goto tier2_start;                                  \
424
} while (0)
425
426
#define GOTO_TIER_ONE_SETUP \
427
    tstate->current_executor = NULL;                              \
428
    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
429
    _PyFrame_SetStackPointer(frame, stack_pointer);
430
431
#define GOTO_TIER_ONE(TARGET) \
432
    do \
433
    { \
434
        GOTO_TIER_ONE_SETUP \
435
        return (_Py_CODEUNIT *)(TARGET); \
436
    } while (0)
437
438
#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \
439
    do \
440
    { \
441
        GOTO_TIER_ONE_SETUP \
442
        return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \
443
    } while (0)
444
445
#define CURRENT_OPARG()    (next_uop[-1].oparg)
446
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
447
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
448
#define CURRENT_TARGET()   (next_uop[-1].target)
449
450
#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
451
#define JUMP_TO_ERROR() goto jump_to_error_target
452
453
/* Stackref macros */
454
455
/* How much scratch space to give stackref to PyObject* conversion. */
456
1.97G
#define MAX_STACKREF_SCRATCH 10
457
458
#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
459
    /* +1 because vectorcall might use -1 to write self */ \
460
1.97G
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
461
1.97G
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
462
463
#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
464
    /* +1 because we +1 previously */ \
465
1.97G
    _PyObjectArray_Free(NAME - 1, NAME##_temp);
466
467
1.97G
#define CONVERSION_FAILED(NAME) ((NAME) == NULL)
468
469
static inline int
470
4.13G
check_periodics(PyThreadState *tstate) {
471
4.13G
    _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
472
4.13G
    QSBR_QUIESCENT_STATE(tstate);
473
4.13G
    if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
474
79.3k
        return _Py_HandlePending(tstate);
475
79.3k
    }
476
4.13G
    return 0;
477
4.13G
}
478