/src/cpython/Python/ceval_macros.h

Source
// Macros and other things needed by ceval.c, and bytecodes.c

/* Computed GOTOs, or
       the-optimization-commonly-but-improperly-known-as-"threaded code"
   using gcc's labels-as-values extension
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).

   The traditional bytecode evaluation loop uses a "switch" statement, which
   decent compilers will optimize as a single indirect branch instruction
   combined with a lookup table of jump addresses. However, since the
   indirect jump instruction is shared by all opcodes, the CPU will have a
   hard time making the right prediction for where to jump next (actually,
   it will be always wrong except in the uncommon case of a sequence of
   several identical opcodes).

   "Threaded code" in contrast, uses an explicit jump table and an explicit
   indirect jump instruction at the end of each opcode. Since the jump
   instruction is at a different address for each opcode, the CPU will make a
   separate prediction for each of these instructions, which is equivalent to
   predicting the second opcode of each opcode pair. These predictions have
   a much better chance to turn out valid, especially in small bytecode loops.

   A mispredicted branch on a modern CPU flushes the whole pipeline and
   can cost several CPU cycles (depending on the pipeline depth),
   and potentially many more instructions (depending on the pipeline width).
   A correctly predicted branch, however, is nearly free.

   At the time of this writing, the "threaded code" version is up to 15-20%
   faster than the normal "switch" version, depending on the compiler and the
   CPU architecture.

   NOTE: care must be taken that the compiler doesn't try to "optimize" the
   indirect jumps by sharing them between all opcodes. Such optimizations
   can be disabled on gcc by using the -fno-gcse flag (or possibly
   -fno-crossjumping).
*/

/* Use macros rather than inline functions, to make it as clear as possible
 * to the C compiler that the tracing check is a simple test then branch.
 * We want to be sure that the compiler knows this before it generates
 * the CFG.
 */

#ifdef WITH_DTRACE
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
#else
#define OR_DTRACE_LINE
#endif

#ifdef HAVE_COMPUTED_GOTOS
    #ifndef USE_COMPUTED_GOTOS
    #define USE_COMPUTED_GOTOS 1
    #endif
#else
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
    #error "Computed gotos are not supported on this compiler."
    #endif
    #undef USE_COMPUTED_GOTOS
    #define USE_COMPUTED_GOTOS 0
#endif

#ifdef Py_STATS
#define INSTRUCTION_STATS(op) \
    do { \
        PyStats *s = _PyStats_GET(); \
        OPCODE_EXE_INC(op); \
        if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
        lastopcode = op; \
    } while (0)
#else
#define INSTRUCTION_STATS(op) ((void)0)
#endif

#ifdef Py_STATS
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg, int lastopcode
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg, lastopcode
#else
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg
#endif

#if _Py_TAIL_CALL_INTERP
#   if defined(__clang__) || defined(__GNUC__)
#       if !_Py__has_attribute(preserve_none) || !_Py__has_attribute(musttail)
#           error "This compiler does not have support for efficient tail calling."
#       endif
#   elif defined(_MSC_VER) && (_MSC_VER < 1950)
#       error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
#   endif
#   if defined(_MSC_VER) && !defined(__clang__)
#      define Py_MUSTTAIL [[msvc::musttail]]
#      define Py_PRESERVE_NONE_CC __preserve_none
#   else
#       define Py_MUSTTAIL __attribute__((musttail))
#       define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
#   endif
    typedef PyObject *(Py_PRESERVE_NONE_CC *py_tail_call_funcptr)(TAIL_CALL_PARAMS);

#   define DISPATCH_TABLE_VAR instruction_funcptr_table
#   define DISPATCH_TABLE instruction_funcptr_handler_table
#   define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
#   define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS)

#   define DISPATCH_GOTO() \
        do { \
            Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \
        } while (0)
#   define DISPATCH_GOTO_NON_TRACING() \
        do { \
            Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \
        } while (0)
#   define JUMP_TO_LABEL(name) \
        do { \
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
        } while (0)
#   ifdef Py_STATS
#       define JUMP_TO_PREDICTED(name) \
            do { \
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg, lastopcode); \
            } while (0)
#   else
#       define JUMP_TO_PREDICTED(name) \
            do { \
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg); \
            } while (0)
#   endif
#    define LABEL(name) TARGET(name)
#elif USE_COMPUTED_GOTOS
#  define DISPATCH_TABLE_VAR opcode_targets
#  define DISPATCH_TABLE opcode_targets_table
#  define TRACING_DISPATCH_TABLE opcode_tracing_targets_table
#  define TARGET(op) TARGET_##op:
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
#  define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode];
#  define JUMP_TO_LABEL(name) goto name;
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
#  define LABEL(name) name:
#else
#  define TARGET(op) case op: TARGET_##op:
#  define DISPATCH_GOTO() dispatch_code = opcode | tracing_mode ; goto dispatch_opcode
#  define DISPATCH_GOTO_NON_TRACING() dispatch_code = opcode; goto dispatch_opcode
#  define JUMP_TO_LABEL(name) goto name;
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
#  define LABEL(name) name:
#endif

#if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2
#  define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
#  define ENTER_TRACING() \
    DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
#  define LEAVE_TRACING() \
    DISPATCH_TABLE_VAR = DISPATCH_TABLE;
#else
#  define IS_JIT_TRACING() (tracing_mode != 0)
#  define ENTER_TRACING() tracing_mode = 255
#  define LEAVE_TRACING() tracing_mode = 0
#endif

#if _Py_TIER2
#define STOP_TRACING() \
    do { \
        if (IS_JIT_TRACING()) { \
            LEAVE_TRACING(); \
            _PyJit_FinalizeTracing(tstate, 0); \
        } \
    } while (0);
#else
#define STOP_TRACING() ((void)(0));
#endif


/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
#ifdef Py_DEBUG
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
#else
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

#ifdef Py_DEBUG
#define LLTRACE_RESUME_FRAME() \
do { \
    _PyFrame_SetStackPointer(frame, stack_pointer); \
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
    stack_pointer = _PyFrame_GetStackPointer(frame); \
    frame->lltrace = lltrace; \
} while (0)
#else
#define LLTRACE_RESUME_FRAME() ((void)0)
#endif

#ifdef Py_GIL_DISABLED
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
#else
#define QSBR_QUIESCENT_STATE(tstate)
#endif


/* Do interpreter dispatch accounting for tracing and instrumentation */
#define DISPATCH() \
    { \
        assert(frame->stackpointer == NULL); \
        NEXTOPARG(); \
        PRE_DISPATCH_GOTO(); \
        DISPATCH_GOTO(); \
    }

#define DISPATCH_NON_TRACING() \
    { \
        assert(frame->stackpointer == NULL); \
        NEXTOPARG(); \
        PRE_DISPATCH_GOTO(); \
        DISPATCH_GOTO_NON_TRACING(); \
    }

#define DISPATCH_SAME_OPARG() \
    { \
        opcode = next_instr->op.code; \
        PRE_DISPATCH_GOTO(); \
        DISPATCH_GOTO_NON_TRACING(); \
    }

#define DISPATCH_INLINED(NEW_FRAME)                     \
    do {                                                \
        assert(tstate->interp->eval_frame == NULL);     \
        _PyFrame_SetStackPointer(frame, stack_pointer); \
        assert((NEW_FRAME)->previous == frame);         \
        frame = tstate->current_frame = (NEW_FRAME);     \
        CALL_STAT_INC(inlined_py_calls);                \
        JUMP_TO_LABEL(start_frame);                      \
    } while (0)

/* Tuple access macros */

#ifndef Py_DEBUG
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
#else
static inline PyObject *
GETITEM(PyObject *v, Py_ssize_t i) {
    assert(PyTuple_Check(v));
    assert(i >= 0);
    assert(i < PyTuple_GET_SIZE(v));
    return PyTuple_GET_ITEM(v, i);
}
#endif

/* Code access macros */

/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
#define NEXTOPARG()  do { \
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
        opcode = word.op.code; \
        oparg = word.op.arg; \
    } while (0)

/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
 * for advancing to the next instruction, taking into account cache entries
 * and skipped instructions.
 */
#define JUMPBY(x)       (next_instr += (x))
#define SKIP_OVER(x)    (next_instr += (x))

#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)

#define WITHIN_STACK_BOUNDS() \
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))

#if defined(Py_DEBUG) && !defined(_Py_JIT)
// This allows temporary stack "overflows", provided it's all in the cache at any point of time.
#define WITHIN_STACK_BOUNDS_IGNORING_CACHE() \
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && (STACK_LEVEL()) <= STACK_SIZE()))
#else
#define WITHIN_STACK_BOUNDS_IGNORING_CACHE WITHIN_STACK_BOUNDS
#endif

/* Data access macros */
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)

/* Local variable macros */

#define LOCALS_ARRAY    (frame->localsplus)
#define GETLOCAL(i)     (frame->localsplus[i])


#ifdef Py_STATS
#define UPDATE_MISS_STATS(INSTNAME)                              \
    do {                                                         \
        STAT_INC(opcode, miss);                                  \
        STAT_INC((INSTNAME), miss);                              \
        /* The counter is always the first cache entry: */       \
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
            STAT_INC((INSTNAME), deopt);                         \
        }                                                        \
    } while (0)
#else
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
#endif


// Try to lock an object in the free threading build, if it's not already
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
// These are no-ops in the default GIL build. The general pattern is:
//
// DEOPT_IF(!LOCK_OBJECT(op));
// if (/* condition fails */) {
//     UNLOCK_OBJECT(op);
//     DEOPT_IF(true);
//  }
//  ...
//  UNLOCK_OBJECT(op);
//
// NOTE: The object must be unlocked on every exit code path and you should
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
// object is locked.
#ifdef Py_GIL_DISABLED
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
#else
#  define LOCK_OBJECT(op) (1)
#  define UNLOCK_OBJECT(op) ((void)0)
#endif

#define GLOBALS() frame->f_globals
#define BUILTINS() frame->f_builtins
#define LOCALS() frame->f_locals
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
#define NAMES() _PyFrame_GetCode(frame)->co_names

#define DTRACE_FUNCTION_ENTRY()  \
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
        dtrace_function_entry(frame); \
    }

/* This takes a uint16_t instead of a _Py_BackoffCounter,
 * because it is used directly on the cache entry in generated code,
 * which is always an integral type. */
// Force re-specialization when tracing a side exit to get good side exits.
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))

#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
    do { \
        (COUNTER) = advance_backoff_counter((COUNTER)); \
    } while (0);

#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
    do { \
        (COUNTER) = pause_backoff_counter((COUNTER)); \
    } while (0);

#ifdef ENABLE_SPECIALIZATION
/* Multiple threads may execute these concurrently if thread-local bytecode is
 * disabled and they all execute the main copy of the bytecode. Specialization
 * is disabled in that case so the value is unused, but the RMW cycle should be
 * free of data races.
 */
#define RECORD_BRANCH_TAKEN(bitset, flag) \
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
#else
#define RECORD_BRANCH_TAKEN(bitset, flag)
#endif

#define UNBOUNDLOCAL_ERROR_MSG \
    "cannot access local variable '%s' where it is not associated with a value"
#define UNBOUNDFREE_ERROR_MSG \
    "cannot access free variable '%s' where it is not associated with a value" \
    " in enclosing scope"
#define NAME_ERROR_MSG "name '%.200s' is not defined"

// If a trace function sets a new f_lineno and
// *then* raises, we use the destination when searching
// for an exception handler, displaying the traceback, and so on
#define INSTRUMENTED_JUMP(src, dest, event) \
do { \
    if (tstate->tracing) {\
        next_instr = dest; \
    } else { \
        _PyFrame_SetStackPointer(frame, stack_pointer); \
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
        stack_pointer = _PyFrame_GetStackPointer(frame); \
        if (next_instr == NULL) { \
            next_instr = (dest)+1; \
            JUMP_TO_LABEL(error); \
        } \
    } \
} while (0);


static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
    return (tstate->py_recursion_remaining-- <= 0) &&
        _Py_CheckRecursiveCallPy(tstate);
}

static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
    tstate->py_recursion_remaining++;
}

/* Implementation of "macros" that modify the instruction pointer,
 * stack pointer, or frame pointer.
 * These need to treated differently by tier 1 and 2.
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */

#define LOAD_IP(OFFSET) do { \
        next_instr = frame->instr_ptr + (OFFSET); \
    } while (0)

/* There's no STORE_IP(), it's inlined by the code generator. */

#define LOAD_SP() \
stack_pointer = _PyFrame_GetStackPointer(frame)

#define SAVE_SP() \
_PyFrame_SetStackPointer(frame, stack_pointer)

/* Tier-switching macros. */

#define TIER1_TO_TIER2(EXECUTOR)                        \
do {                                                   \
    OPT_STAT_INC(traces_executed);                     \
    next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
    frame = tstate->current_frame;                     \
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
    int keep_tracing_bit = (uintptr_t)next_instr & 1;   \
    next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \
    if (next_instr == NULL) {                          \
        /* gh-140104: The exception handler expects frame->instr_ptr
            to after this_instr, not this_instr! */ \
        next_instr = frame->instr_ptr + 1;                 \
        JUMP_TO_LABEL(error);                          \
    }                                                  \
    if (keep_tracing_bit) { \
        assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \
        ENTER_TRACING(); \
        DISPATCH_NON_TRACING(); \
    } \
    DISPATCH();                                        \
} while (0)

#define TIER2_TO_TIER2(EXECUTOR) \
do {                                                   \
    OPT_STAT_INC(traces_executed);                     \
    current_executor = (EXECUTOR);                     \
    goto tier2_start;                                  \
} while (0)

#define GOTO_TIER_ONE_SETUP \
    tstate->current_executor = NULL;                              \
    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
    _PyFrame_SetStackPointer(frame, stack_pointer);

#define GOTO_TIER_ONE(TARGET) \
    do \
    { \
        GOTO_TIER_ONE_SETUP \
        return (_Py_CODEUNIT *)(TARGET); \
    } while (0)

#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \
    do \
    { \
        GOTO_TIER_ONE_SETUP \
        return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \
    } while (0)

#define CURRENT_OPARG()    (next_uop[-1].oparg)
#define CURRENT_OPERAND0_64() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_64() (next_uop[-1].operand1)
#define CURRENT_OPERAND0_32() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_32() (next_uop[-1].operand1)
#define CURRENT_OPERAND0_16() (next_uop[-1].operand0)
#define CURRENT_OPERAND1_16() (next_uop[-1].operand1)
#define CURRENT_TARGET()   (next_uop[-1].target)

#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
#define JUMP_TO_ERROR() goto jump_to_error_target

/* Stackref macros */

/* How much scratch space to give stackref to PyObject* conversion. */
#define MAX_STACKREF_SCRATCH 10

#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
    /* +1 because vectorcall might use -1 to write self */ \
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp);

#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
    /* +1 because we +1 previously */ \
    _PyObjectArray_Free(NAME - 1, NAME##_temp);

#define CONVERSION_FAILED(NAME) ((NAME) == NULL)

#if defined(Py_DEBUG) && !defined(_Py_JIT)
#define SET_CURRENT_CACHED_VALUES(N) current_cached_values = (N)
#define CHECK_CURRENT_CACHED_VALUES(N) assert(current_cached_values == (N))
#else
#define SET_CURRENT_CACHED_VALUES(N) ((void)0)
#define CHECK_CURRENT_CACHED_VALUES(N) ((void)0)
#endif

#define IS_PEP523_HOOKED(tstate) (tstate->interp->eval_frame != NULL)

static inline int
check_periodics(PyThreadState *tstate) {
    _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
    QSBR_QUIESCENT_STATE(tstate);
    if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
        return _Py_HandlePending(tstate);
    }
    return 0;
}

// Mark the generator as executing. Returns true if the state was changed,
// false if it was already executing or finished.
static inline bool
gen_try_set_executing(PyGenObject *gen)
{
#ifdef Py_GIL_DISABLED
    if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) {
        int8_t frame_state = _Py_atomic_load_int8_relaxed(&gen->gi_frame_state);
        while (frame_state < FRAME_SUSPENDED_YIELD_FROM_LOCKED) {
            if (_Py_atomic_compare_exchange_int8(&gen->gi_frame_state,
                                                 &frame_state,
                                                 FRAME_EXECUTING)) {
                return true;
            }
        }
        // NB: We return false for FRAME_SUSPENDED_YIELD_FROM_LOCKED as well.
        // That case is rare enough that we can just handle it in the deopt.
        return false;
    }
#endif
    // Use faster non-atomic modifications in the GIL-enabled build and when
    // the object is uniquely referenced in the free-threaded build.
    if (gen->gi_frame_state < FRAME_EXECUTING) {
        assert(gen->gi_frame_state != FRAME_SUSPENDED_YIELD_FROM_LOCKED);
        gen->gi_frame_state = FRAME_EXECUTING;
        return true;
    }
    return false;
}

Coverage Report

Created: 2026-02-26 06:53

Line	Count	Source
1		// Macros and other things needed by ceval.c, and bytecodes.c
2
3		/* Computed GOTOs, or
4		the-optimization-commonly-but-improperly-known-as-"threaded code"
5		using gcc's labels-as-values extension
6		(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8		The traditional bytecode evaluation loop uses a "switch" statement, which
9		decent compilers will optimize as a single indirect branch instruction
10		combined with a lookup table of jump addresses. However, since the
11		indirect jump instruction is shared by all opcodes, the CPU will have a
12		hard time making the right prediction for where to jump next (actually,
13		it will be always wrong except in the uncommon case of a sequence of
14		several identical opcodes).
15
16		"Threaded code" in contrast, uses an explicit jump table and an explicit
17		indirect jump instruction at the end of each opcode. Since the jump
18		instruction is at a different address for each opcode, the CPU will make a
19		separate prediction for each of these instructions, which is equivalent to
20		predicting the second opcode of each opcode pair. These predictions have
21		a much better chance to turn out valid, especially in small bytecode loops.
22
23		A mispredicted branch on a modern CPU flushes the whole pipeline and
24		can cost several CPU cycles (depending on the pipeline depth),
25		and potentially many more instructions (depending on the pipeline width).
26		A correctly predicted branch, however, is nearly free.
27
28		At the time of this writing, the "threaded code" version is up to 15-20%
29		faster than the normal "switch" version, depending on the compiler and the
30		CPU architecture.
31
32		NOTE: care must be taken that the compiler doesn't try to "optimize" the
33		indirect jumps by sharing them between all opcodes. Such optimizations
34		can be disabled on gcc by using the -fno-gcse flag (or possibly
35		-fno-crossjumping).
36		*/
37
38		/* Use macros rather than inline functions, to make it as clear as possible
39		* to the C compiler that the tracing check is a simple test then branch.
40		* We want to be sure that the compiler knows this before it generates
41		* the CFG.
42		*/
43
44		#ifdef WITH_DTRACE
45		#define OR_DTRACE_LINE \| (PyDTrace_LINE_ENABLED() ? 255 : 0)
46		#else
47		#define OR_DTRACE_LINE
48		#endif
49
50		#ifdef HAVE_COMPUTED_GOTOS
51		#ifndef USE_COMPUTED_GOTOS
52		#define USE_COMPUTED_GOTOS 1
53		#endif
54		#else
55		#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56		#error "Computed gotos are not supported on this compiler."
57		#endif
58		#undef USE_COMPUTED_GOTOS
59		#define USE_COMPUTED_GOTOS 0
60		#endif
61
62		#ifdef Py_STATS
63		#define INSTRUCTION_STATS(op) \
64		do { \
65		PyStats *s = _PyStats_GET(); \
66		OPCODE_EXE_INC(op); \
67		if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
68		lastopcode = op; \
69		} while (0)
70		#else
71	60.6G	#define INSTRUCTION_STATS(op) ((void)0)
72		#endif
73
74		#ifdef Py_STATS
75		# define TAIL_CALL_PARAMS _PyInterpreterFrame frame, _PyStackRef stack_pointer, PyThreadState tstate, _Py_CODEUNIT next_instr, const void *instruction_funcptr_table, int oparg, int lastopcode
76		# define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg, lastopcode
77		#else
78		# define TAIL_CALL_PARAMS _PyInterpreterFrame frame, _PyStackRef stack_pointer, PyThreadState tstate, _Py_CODEUNIT next_instr, const void *instruction_funcptr_table, int oparg
79		# define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg
80		#endif
81
82		#if _Py_TAIL_CALL_INTERP
83		# if defined(__clang__) \|\| defined(__GNUC__)
84		# if !_Py__has_attribute(preserve_none) \|\| !_Py__has_attribute(musttail)
85		# error "This compiler does not have support for efficient tail calling."
86		# endif
87		# elif defined(_MSC_VER) && (_MSC_VER < 1950)
88		# error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
89		# endif
90		# if defined(_MSC_VER) && !defined(__clang__)
91		# define Py_MUSTTAIL [[msvc::musttail]]
92		# define Py_PRESERVE_NONE_CC __preserve_none
93		# else
94		# define Py_MUSTTAIL __attribute__((musttail))
95		# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
96		# endif
97		typedef PyObject (Py_PRESERVE_NONE_CC py_tail_call_funcptr)(TAIL_CALL_PARAMS);
98
99		# define DISPATCH_TABLE_VAR instruction_funcptr_table
100		# define DISPATCH_TABLE instruction_funcptr_handler_table
101		# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
102		# define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS)
103
104		# define DISPATCH_GOTO() \
105		do { \
106		Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \
107		} while (0)
108		# define DISPATCH_GOTO_NON_TRACING() \
109		do { \
110		Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \
111		} while (0)
112		# define JUMP_TO_LABEL(name) \
113		do { \
114		Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
115		} while (0)
116		# ifdef Py_STATS
117		# define JUMP_TO_PREDICTED(name) \
118		do { \
119		Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg, lastopcode); \
120		} while (0)
121		# else
122		# define JUMP_TO_PREDICTED(name) \
123		do { \
124		Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg); \
125		} while (0)
126		# endif
127		# define LABEL(name) TARGET(name)
128		#elif USE_COMPUTED_GOTOS
129		# define DISPATCH_TABLE_VAR opcode_targets
130	6.47M	# define DISPATCH_TABLE opcode_targets_table
131		# define TRACING_DISPATCH_TABLE opcode_tracing_targets_table
132	60.6G	# define TARGET(op) TARGET_##op:
133	61.0G	# define DISPATCH_GOTO() goto *opcode_targets[opcode]
134	6.47M	# define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode];
135	101M	# define JUMP_TO_LABEL(name) goto name;
136	254M	# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
137	436M	# define LABEL(name) name:
138		#else
139		# define TARGET(op) case op: TARGET_##op:
140		# define DISPATCH_GOTO() dispatch_code = opcode \| tracing_mode ; goto dispatch_opcode
141		# define DISPATCH_GOTO_NON_TRACING() dispatch_code = opcode; goto dispatch_opcode
142		# define JUMP_TO_LABEL(name) goto name;
143		# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
144		# define LABEL(name) name:
145		#endif
146
147		#if (_Py_TAIL_CALL_INTERP \|\| USE_COMPUTED_GOTOS) && _Py_TIER2
148		# define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
149		# define ENTER_TRACING() \
150		DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
151		# define LEAVE_TRACING() \
152		DISPATCH_TABLE_VAR = DISPATCH_TABLE;
153		#else
154		# define IS_JIT_TRACING() (tracing_mode != 0)
155		# define ENTER_TRACING() tracing_mode = 255
156		# define LEAVE_TRACING() tracing_mode = 0
157		#endif
158
159		#if _Py_TIER2
160		#define STOP_TRACING() \
161		do { \
162		if (IS_JIT_TRACING()) { \
163		LEAVE_TRACING(); \
164		_PyJit_FinalizeTracing(tstate, 0); \
165		} \
166		} while (0);
167		#else
168	122M	#define STOP_TRACING() ((void)(0));
169		#endif
170
171
172		/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
173		#ifdef Py_DEBUG
174		#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
175		lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
176		#else
177	61.0G	#define PRE_DISPATCH_GOTO() ((void)0)
178		#endif
179
180		#ifdef Py_DEBUG
181		#define LLTRACE_RESUME_FRAME() \
182		do { \
183		_PyFrame_SetStackPointer(frame, stack_pointer); \
184		int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
185		stack_pointer = _PyFrame_GetStackPointer(frame); \
186		frame->lltrace = lltrace; \
187		} while (0)
188		#else
189	2.40G	#define LLTRACE_RESUME_FRAME() ((void)0)
190		#endif
191
192		#ifdef Py_GIL_DISABLED
193		#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
194		#else
195		#define QSBR_QUIESCENT_STATE(tstate)
196		#endif
197
198
199		/* Do interpreter dispatch accounting for tracing and instrumentation */
200		#define DISPATCH() \
201	60.7G	{ \
202	60.7G	assert(frame->stackpointer == NULL); \
203	60.7G	NEXTOPARG(); \
204	60.7G	PRE_DISPATCH_GOTO(); \
205	60.7G	DISPATCH_GOTO(); \
206	60.7G	}
207
208		#define DISPATCH_NON_TRACING() \
209		{ \
210		assert(frame->stackpointer == NULL); \
211		NEXTOPARG(); \
212		PRE_DISPATCH_GOTO(); \
213		DISPATCH_GOTO_NON_TRACING(); \
214		}
215
216		#define DISPATCH_SAME_OPARG() \
217	6.47M	{ \
218	6.47M	opcode = next_instr->op.code; \
219	6.47M	PRE_DISPATCH_GOTO(); \
220	6.47M	DISPATCH_GOTO_NON_TRACING(); \
221	6.47M	}
222
223		#define DISPATCH_INLINED(NEW_FRAME) \
224	1.01M	do { \
225	1.01M	assert(tstate->interp->eval_frame == NULL); \
226	1.01M	_PyFrame_SetStackPointer(frame, stack_pointer); \
227	1.01M	assert((NEW_FRAME)->previous == frame); \
228	1.01M	frame = tstate->current_frame = (NEW_FRAME); \
229	1.01M	CALL_STAT_INC(inlined_py_calls); \
230	1.01M	JUMP_TO_LABEL(start_frame); \
231	0	} while (0)
232
233		/* Tuple access macros */
234
235		#ifndef Py_DEBUG
236	3.29G	#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
237		#else
238		static inline PyObject *
239		GETITEM(PyObject *v, Py_ssize_t i) {
240		assert(PyTuple_Check(v));
241		assert(i >= 0);
242		assert(i < PyTuple_GET_SIZE(v));
243		return PyTuple_GET_ITEM(v, i);
244		}
245		#endif
246
247		/* Code access macros */
248
249		/* The integer overflow is checked by an assertion below. */
250	63.6M	#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
251	60.7G	#define NEXTOPARG() do { \
252	60.7G	_Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED((uint16_t)next_instr)}; \
253	60.7G	opcode = word.op.code; \
254	60.7G	oparg = word.op.arg; \
255	60.7G	} while (0)
256
257		/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
258		* for advancing to the next instruction, taking into account cache entries
259		* and skipped instructions.
260		*/
261	9.19G	#define JUMPBY(x) (next_instr += (x))
262		#define SKIP_OVER(x) (next_instr += (x))
263
264		#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
265		#define STACK_SIZE() (_PyFrame_GetCode(frame)->co_stacksize)
266
267		#define WITHIN_STACK_BOUNDS() \
268		(frame->owner == FRAME_OWNED_BY_INTERPRETER \|\| (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
269
270		#if defined(Py_DEBUG) && !defined(_Py_JIT)
271		// This allows temporary stack "overflows", provided it's all in the cache at any point of time.
272		#define WITHIN_STACK_BOUNDS_IGNORING_CACHE() \
273		(frame->owner == FRAME_OWNED_BY_INTERPRETER \|\| (STACK_LEVEL() >= 0 && (STACK_LEVEL()) <= STACK_SIZE()))
274		#else
275		#define WITHIN_STACK_BOUNDS_IGNORING_CACHE WITHIN_STACK_BOUNDS
276		#endif
277
278		/* Data access macros */
279		#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
280		#define FRAME_CO_NAMES (_PyFrame_GetCode(frame)->co_names)
281
282		/* Local variable macros */
283
284	1.39M	#define LOCALS_ARRAY (frame->localsplus)
285	29.2G	#define GETLOCAL(i) (frame->localsplus[i])
286
287
288		#ifdef Py_STATS
289		#define UPDATE_MISS_STATS(INSTNAME) \
290		do { \
291		STAT_INC(opcode, miss); \
292		STAT_INC((INSTNAME), miss); \
293		/* The counter is always the first cache entry: */ \
294		if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \
295		STAT_INC((INSTNAME), deopt); \
296		} \
297		} while (0)
298		#else
299	254M	#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
300		#endif
301
302
303		// Try to lock an object in the free threading build, if it's not already
304		// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
305		// These are no-ops in the default GIL build. The general pattern is:
306		//
307		// DEOPT_IF(!LOCK_OBJECT(op));
308		// if (/* condition fails */) {
309		// UNLOCK_OBJECT(op);
310		// DEOPT_IF(true);
311		// }
312		// ...
313		// UNLOCK_OBJECT(op);
314		//
315		// NOTE: The object must be unlocked on every exit code path and you should
316		// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
317		// object is locked.
318		#ifdef Py_GIL_DISABLED
319		# define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
320		# define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
321		#else
322	806M	# define LOCK_OBJECT(op) (1)
323	806M	# define UNLOCK_OBJECT(op) ((void)0)
324		#endif
325
326	1.68G	#define GLOBALS() frame->f_globals
327	592M	#define BUILTINS() frame->f_builtins
328	3.04M	#define LOCALS() frame->f_locals
329		#define CONSTS() _PyFrame_GetCode(frame)->co_consts
330		#define NAMES() _PyFrame_GetCode(frame)->co_names
331
332		#define DTRACE_FUNCTION_ENTRY() \
333		if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
334		dtrace_function_entry(frame); \
335		}
336
337		/* This takes a uint16_t instead of a _Py_BackoffCounter,
338		* because it is used directly on the cache entry in generated code,
339		* which is always an integral type. */
340		// Force re-specialization when tracing a side exit to get good side exits.
341		#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
342	1.92G	backoff_counter_triggers(forge_backoff_counter((COUNTER)))
343
344		#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
345	1.92G	do { \
346	1.92G	(COUNTER) = advance_backoff_counter((COUNTER)); \
347	1.92G	} while (0);
348
349		#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
350	0	do { \
351	0	(COUNTER) = pause_backoff_counter((COUNTER)); \
352	0	} while (0);
353
354		#ifdef ENABLE_SPECIALIZATION
355		/* Multiple threads may execute these concurrently if thread-local bytecode is
356		* disabled and they all execute the main copy of the bytecode. Specialization
357		* is disabled in that case so the value is unused, but the RMW cycle should be
358		* free of data races.
359		*/
360		#define RECORD_BRANCH_TAKEN(bitset, flag) \
361	4.59G	FT_ATOMIC_STORE_UINT16_RELAXED( \
362	4.59G	bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) \| (flag))
363		#else
364		#define RECORD_BRANCH_TAKEN(bitset, flag)
365		#endif
366
367		#define UNBOUNDLOCAL_ERROR_MSG \
368	0	"cannot access local variable '%s' where it is not associated with a value"
369		#define UNBOUNDFREE_ERROR_MSG \
370	0	"cannot access free variable '%s' where it is not associated with a value" \
371	0	" in enclosing scope"
372	39	#define NAME_ERROR_MSG "name '%.200s' is not defined"
373
374		// If a trace function sets a new f_lineno and
375		// then raises, we use the destination when searching
376		// for an exception handler, displaying the traceback, and so on
377	0	#define INSTRUMENTED_JUMP(src, dest, event) \
378	0	do { \
379	0	if (tstate->tracing) {\
380	0	next_instr = dest; \
381	0	} else { \
382	0	_PyFrame_SetStackPointer(frame, stack_pointer); \
383	0	next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
384	0	stack_pointer = _PyFrame_GetStackPointer(frame); \
385	0	if (next_instr == NULL) { \
386	0	next_instr = (dest)+1; \
387	0	JUMP_TO_LABEL(error); \
388	0	} \
389	0	} \
390	0	} while (0);
391
392
393	280M	static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
394	280M	return (tstate->py_recursion_remaining-- <= 0) &&
395	167	_Py_CheckRecursiveCallPy(tstate);
396	280M	}
397
398	1.22G	static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
399	1.22G	tstate->py_recursion_remaining++;
400	1.22G	}
401
402		/* Implementation of "macros" that modify the instruction pointer,
403		* stack pointer, or frame pointer.
404		* These need to treated differently by tier 1 and 2.
405		* The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
406
407	2.13G	#define LOAD_IP(OFFSET) do { \
408	2.13G	next_instr = frame->instr_ptr + (OFFSET); \
409	2.13G	} while (0)
410
411		/* There's no STORE_IP(), it's inlined by the code generator. */
412
413	941M	#define LOAD_SP() \
414	941M	stack_pointer = _PyFrame_GetStackPointer(frame)
415
416		#define SAVE_SP() \
417		_PyFrame_SetStackPointer(frame, stack_pointer)
418
419		/* Tier-switching macros. */
420
421		#define TIER1_TO_TIER2(EXECUTOR) \
422		do { \
423		OPT_STAT_INC(traces_executed); \
424		next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
425		frame = tstate->current_frame; \
426		stack_pointer = _PyFrame_GetStackPointer(frame); \
427		int keep_tracing_bit = (uintptr_t)next_instr & 1; \
428		next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \
429		if (next_instr == NULL) { \
430		/* gh-140104: The exception handler expects frame->instr_ptr
431		to after this_instr, not this_instr! */ \
432		next_instr = frame->instr_ptr + 1; \
433		JUMP_TO_LABEL(error); \
434		} \
435		if (keep_tracing_bit) { \
436		assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \
437		ENTER_TRACING(); \
438		DISPATCH_NON_TRACING(); \
439		} \
440		DISPATCH(); \
441		} while (0)
442
443		#define TIER2_TO_TIER2(EXECUTOR) \
444		do { \
445		OPT_STAT_INC(traces_executed); \
446		current_executor = (EXECUTOR); \
447		goto tier2_start; \
448		} while (0)
449
450		#define GOTO_TIER_ONE_SETUP \
451		tstate->current_executor = NULL; \
452		OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
453		_PyFrame_SetStackPointer(frame, stack_pointer);
454
455		#define GOTO_TIER_ONE(TARGET) \
456		do \
457		{ \
458		GOTO_TIER_ONE_SETUP \
459		return (_Py_CODEUNIT *)(TARGET); \
460		} while (0)
461
462		#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \
463		do \
464		{ \
465		GOTO_TIER_ONE_SETUP \
466		return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))\| 1); \
467		} while (0)
468
469		#define CURRENT_OPARG() (next_uop[-1].oparg)
470		#define CURRENT_OPERAND0_64() (next_uop[-1].operand0)
471		#define CURRENT_OPERAND1_64() (next_uop[-1].operand1)
472		#define CURRENT_OPERAND0_32() (next_uop[-1].operand0)
473		#define CURRENT_OPERAND1_32() (next_uop[-1].operand1)
474		#define CURRENT_OPERAND0_16() (next_uop[-1].operand0)
475		#define CURRENT_OPERAND1_16() (next_uop[-1].operand1)
476		#define CURRENT_TARGET() (next_uop[-1].target)
477
478		#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
479		#define JUMP_TO_ERROR() goto jump_to_error_target
480
481		/* Stackref macros */
482
483		/* How much scratch space to give stackref to PyObject* conversion. */
484	1.91G	#define MAX_STACKREF_SCRATCH 10
485
486		#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
487		/* +1 because vectorcall might use -1 to write self */ \
488	1.91G	PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
489	1.91G	PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp);
490
491		#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
492		/* +1 because we +1 previously */ \
493	1.91G	_PyObjectArray_Free(NAME - 1, NAME##_temp);
494
495	1.91G	#define CONVERSION_FAILED(NAME) ((NAME) == NULL)
496
497		#if defined(Py_DEBUG) && !defined(_Py_JIT)
498		#define SET_CURRENT_CACHED_VALUES(N) current_cached_values = (N)
499		#define CHECK_CURRENT_CACHED_VALUES(N) assert(current_cached_values == (N))
500		#else
501		#define SET_CURRENT_CACHED_VALUES(N) ((void)0)
502		#define CHECK_CURRENT_CACHED_VALUES(N) ((void)0)
503		#endif
504
505	995M	#define IS_PEP523_HOOKED(tstate) (tstate->interp->eval_frame != NULL)
506
507		static inline int
508	4.18G	check_periodics(PyThreadState *tstate) {
509	4.18G	_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY();
510	4.18G	QSBR_QUIESCENT_STATE(tstate);
511	4.18G	if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
512	88.2k	return _Py_HandlePending(tstate);
513	88.2k	}
514	4.18G	return 0;
515	4.18G	}
516
517		// Mark the generator as executing. Returns true if the state was changed,
518		// false if it was already executing or finished.
519		static inline bool
520		gen_try_set_executing(PyGenObject *gen)
521	54.6M	{
522		#ifdef Py_GIL_DISABLED
523		if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) {
524		int8_t frame_state = _Py_atomic_load_int8_relaxed(&gen->gi_frame_state);
525		while (frame_state < FRAME_SUSPENDED_YIELD_FROM_LOCKED) {
526		if (_Py_atomic_compare_exchange_int8(&gen->gi_frame_state,
527		&frame_state,
528		FRAME_EXECUTING)) {
529		return true;
530		}
531		}
532		// NB: We return false for FRAME_SUSPENDED_YIELD_FROM_LOCKED as well.
533		// That case is rare enough that we can just handle it in the deopt.
534		return false;
535		}
536		#endif
537		// Use faster non-atomic modifications in the GIL-enabled build and when
538		// the object is uniquely referenced in the free-threaded build.
539	54.6M	if (gen->gi_frame_state < FRAME_EXECUTING) {
540	54.6M	assert(gen->gi_frame_state != FRAME_SUSPENDED_YIELD_FROM_LOCKED);
541	54.6M	gen->gi_frame_state = FRAME_EXECUTING;
542	54.6M	return true;
543	54.6M	}
544	0	return false;
545	54.6M	}