/src/cpython/Python/ceval_macros.h

Source (jump to first uncovered line)
// Macros and other things needed by ceval.c, and bytecodes.c

/* Computed GOTOs, or
       the-optimization-commonly-but-improperly-known-as-"threaded code"
   using gcc's labels-as-values extension
   (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).

   The traditional bytecode evaluation loop uses a "switch" statement, which
   decent compilers will optimize as a single indirect branch instruction
   combined with a lookup table of jump addresses. However, since the
   indirect jump instruction is shared by all opcodes, the CPU will have a
   hard time making the right prediction for where to jump next (actually,
   it will be always wrong except in the uncommon case of a sequence of
   several identical opcodes).

   "Threaded code" in contrast, uses an explicit jump table and an explicit
   indirect jump instruction at the end of each opcode. Since the jump
   instruction is at a different address for each opcode, the CPU will make a
   separate prediction for each of these instructions, which is equivalent to
   predicting the second opcode of each opcode pair. These predictions have
   a much better chance to turn out valid, especially in small bytecode loops.

   A mispredicted branch on a modern CPU flushes the whole pipeline and
   can cost several CPU cycles (depending on the pipeline depth),
   and potentially many more instructions (depending on the pipeline width).
   A correctly predicted branch, however, is nearly free.

   At the time of this writing, the "threaded code" version is up to 15-20%
   faster than the normal "switch" version, depending on the compiler and the
   CPU architecture.

   NOTE: care must be taken that the compiler doesn't try to "optimize" the
   indirect jumps by sharing them between all opcodes. Such optimizations
   can be disabled on gcc by using the -fno-gcse flag (or possibly
   -fno-crossjumping).
*/

/* Use macros rather than inline functions, to make it as clear as possible
 * to the C compiler that the tracing check is a simple test then branch.
 * We want to be sure that the compiler knows this before it generates
 * the CFG.
 */

#ifdef WITH_DTRACE
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
#else
#define OR_DTRACE_LINE
#endif

#ifdef HAVE_COMPUTED_GOTOS
    #ifndef USE_COMPUTED_GOTOS
    #define USE_COMPUTED_GOTOS 1
    #endif
#else
    #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
    #error "Computed gotos are not supported on this compiler."
    #endif
    #undef USE_COMPUTED_GOTOS
    #define USE_COMPUTED_GOTOS 0
#endif

#ifdef Py_STATS
#define INSTRUCTION_STATS(op) \
    do { \
        OPCODE_EXE_INC(op); \
        if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
        lastopcode = op; \
    } while (0)
#else
#define INSTRUCTION_STATS(op) ((void)0)
#endif

#ifdef Py_STATS
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg, int lastopcode
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg, lastopcode
#else
#   define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg
#   define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
#endif

#if Py_TAIL_CALL_INTERP
    // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
#   define Py_MUSTTAIL [[clang::musttail]]
#   define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
    Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);

#   define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
#   define DISPATCH_GOTO() \
        do { \
            Py_MUSTTAIL return (INSTRUCTION_TABLE[opcode])(TAIL_CALL_ARGS); \
        } while (0)
#   define JUMP_TO_LABEL(name) \
        do { \
            Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
        } while (0)
#   ifdef Py_STATS
#       define JUMP_TO_PREDICTED(name) \
            do { \
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg, lastopcode); \
            } while (0)
#   else
#       define JUMP_TO_PREDICTED(name) \
            do { \
                Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg); \
            } while (0)
#   endif
#    define LABEL(name) TARGET(name)
#elif USE_COMPUTED_GOTOS
#  define TARGET(op) TARGET_##op:
#  define DISPATCH_GOTO() goto *opcode_targets[opcode]
#  define JUMP_TO_LABEL(name) goto name;
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
#  define LABEL(name) name:
#else
#  define TARGET(op) case op: TARGET_##op:
#  define DISPATCH_GOTO() goto dispatch_opcode
#  define JUMP_TO_LABEL(name) goto name;
#  define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
#  define LABEL(name) name:
#endif

/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
#ifdef Py_DEBUG
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
    lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
#else
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

#ifdef Py_DEBUG
#define LLTRACE_RESUME_FRAME() \
do { \
    _PyFrame_SetStackPointer(frame, stack_pointer); \
    int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
    stack_pointer = _PyFrame_GetStackPointer(frame); \
    if (lltrace < 0) { \
        JUMP_TO_LABEL(exit_unwind); \
    } \
    frame->lltrace = lltrace; \
} while (0)
#else
#define LLTRACE_RESUME_FRAME() ((void)0)
#endif

#ifdef Py_GIL_DISABLED
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
#else
#define QSBR_QUIESCENT_STATE(tstate)
#endif


/* Do interpreter dispatch accounting for tracing and instrumentation */
#define DISPATCH() \
    { \
        assert(frame->stackpointer == NULL); \
        NEXTOPARG(); \
        PRE_DISPATCH_GOTO(); \
        DISPATCH_GOTO(); \
    }

#define DISPATCH_SAME_OPARG() \
    { \
        opcode = next_instr->op.code; \
        PRE_DISPATCH_GOTO(); \
        DISPATCH_GOTO(); \
    }

#define DISPATCH_INLINED(NEW_FRAME)                     \
    do {                                                \
        assert(tstate->interp->eval_frame == NULL);     \
        _PyFrame_SetStackPointer(frame, stack_pointer); \
        assert((NEW_FRAME)->previous == frame);         \
        frame = tstate->current_frame = (NEW_FRAME);     \
        CALL_STAT_INC(inlined_py_calls);                \
        JUMP_TO_LABEL(start_frame);                      \
    } while (0)

/* Tuple access macros */

#ifndef Py_DEBUG
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
#else
static inline PyObject *
GETITEM(PyObject *v, Py_ssize_t i) {
    assert(PyTuple_Check(v));
    assert(i >= 0);
    assert(i < PyTuple_GET_SIZE(v));
    return PyTuple_GET_ITEM(v, i);
}
#endif

/* Code access macros */

/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
#define NEXTOPARG()  do { \
        _Py_CODEUNIT word  = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
        opcode = word.op.code; \
        oparg = word.op.arg; \
    } while (0)

/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
 * for advancing to the next instruction, taking into account cache entries
 * and skipped instructions.
 */
#define JUMPBY(x)       (next_instr += (x))
#define SKIP_OVER(x)    (next_instr += (x))

#define STACK_LEVEL()     ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
#define STACK_SIZE()      (_PyFrame_GetCode(frame)->co_stacksize)

#define WITHIN_STACK_BOUNDS() \
   (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))

/* Data access macros */
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
#define FRAME_CO_NAMES  (_PyFrame_GetCode(frame)->co_names)

/* Local variable macros */

#define LOCALS_ARRAY    (frame->localsplus)
#define GETLOCAL(i)     (frame->localsplus[i])


#ifdef Py_STATS
#define UPDATE_MISS_STATS(INSTNAME)                              \
    do {                                                         \
        STAT_INC(opcode, miss);                                  \
        STAT_INC((INSTNAME), miss);                              \
        /* The counter is always the first cache entry: */       \
        if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) {       \
            STAT_INC((INSTNAME), deopt);                         \
        }                                                        \
    } while (0)
#else
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
#endif


// Try to lock an object in the free threading build, if it's not already
// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
// These are no-ops in the default GIL build. The general pattern is:
//
// DEOPT_IF(!LOCK_OBJECT(op));
// if (/* condition fails */) {
//     UNLOCK_OBJECT(op);
//     DEOPT_IF(true);
//  }
//  ...
//  UNLOCK_OBJECT(op);
//
// NOTE: The object must be unlocked on every exit code path and you should
// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
// object is locked.
#ifdef Py_GIL_DISABLED
#  define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
#  define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
#else
#  define LOCK_OBJECT(op) (1)
#  define UNLOCK_OBJECT(op) ((void)0)
#endif

#define GLOBALS() frame->f_globals
#define BUILTINS() frame->f_builtins
#define LOCALS() frame->f_locals
#define CONSTS() _PyFrame_GetCode(frame)->co_consts
#define NAMES() _PyFrame_GetCode(frame)->co_names

#define DTRACE_FUNCTION_ENTRY()  \
    if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
        dtrace_function_entry(frame); \
    }

/* This takes a uint16_t instead of a _Py_BackoffCounter,
 * because it is used directly on the cache entry in generated code,
 * which is always an integral type. */
#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
    backoff_counter_triggers(forge_backoff_counter((COUNTER)))

#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
    do { \
        (COUNTER) = advance_backoff_counter((COUNTER)); \
    } while (0);

#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
    do { \
        (COUNTER) = pause_backoff_counter((COUNTER)); \
    } while (0);

#ifdef ENABLE_SPECIALIZATION_FT
/* Multiple threads may execute these concurrently if thread-local bytecode is
 * disabled and they all execute the main copy of the bytecode. Specialization
 * is disabled in that case so the value is unused, but the RMW cycle should be
 * free of data races.
 */
#define RECORD_BRANCH_TAKEN(bitset, flag) \
    FT_ATOMIC_STORE_UINT16_RELAXED(       \
        bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
#else
#define RECORD_BRANCH_TAKEN(bitset, flag)
#endif

#define UNBOUNDLOCAL_ERROR_MSG \
    "cannot access local variable '%s' where it is not associated with a value"
#define UNBOUNDFREE_ERROR_MSG \
    "cannot access free variable '%s' where it is not associated with a value" \
    " in enclosing scope"
#define NAME_ERROR_MSG "name '%.200s' is not defined"

// If a trace function sets a new f_lineno and
// *then* raises, we use the destination when searching
// for an exception handler, displaying the traceback, and so on
#define INSTRUMENTED_JUMP(src, dest, event) \
do { \
    if (tstate->tracing) {\
        next_instr = dest; \
    } else { \
        _PyFrame_SetStackPointer(frame, stack_pointer); \
        next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
        stack_pointer = _PyFrame_GetStackPointer(frame); \
        if (next_instr == NULL) { \
            next_instr = (dest)+1; \
            JUMP_TO_LABEL(error); \
        } \
    } \
} while (0);


static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
    return (tstate->py_recursion_remaining-- <= 0) &&
        _Py_CheckRecursiveCallPy(tstate);
}

static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
    tstate->py_recursion_remaining++;
}

/* Implementation of "macros" that modify the instruction pointer,
 * stack pointer, or frame pointer.
 * These need to treated differently by tier 1 and 2.
 * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */

#define LOAD_IP(OFFSET) do { \
        next_instr = frame->instr_ptr + (OFFSET); \
    } while (0)

/* There's no STORE_IP(), it's inlined by the code generator. */

#define LOAD_SP() \
stack_pointer = _PyFrame_GetStackPointer(frame)

#define SAVE_SP() \
_PyFrame_SetStackPointer(frame, stack_pointer)

/* Tier-switching macros. */

#ifdef _Py_JIT
#define GOTO_TIER_TWO(EXECUTOR)                        \
do {                                                   \
    OPT_STAT_INC(traces_executed);                     \
    _PyExecutorObject *_executor = (EXECUTOR);         \
    tstate->current_executor = (PyObject *)_executor;  \
    jit_func jitted = _executor->jit_code;             \
    /* Keep the shim frame alive via the executor: */  \
    Py_INCREF(_executor);                              \
    next_instr = jitted(frame, stack_pointer, tstate); \
    Py_DECREF(_executor);                              \
    frame = tstate->current_frame;                     \
    stack_pointer = _PyFrame_GetStackPointer(frame);   \
    if (next_instr == NULL) {                          \
        next_instr = frame->instr_ptr;                 \
        JUMP_TO_LABEL(error);                          \
    }                                                  \
    DISPATCH();                                        \
} while (0)
#else
#define GOTO_TIER_TWO(EXECUTOR) \
do { \
    OPT_STAT_INC(traces_executed); \
    _PyExecutorObject *_executor = (EXECUTOR); \
    tstate->current_executor = (PyObject *)_executor; \
    next_uop = _executor->trace; \
    assert(next_uop->opcode == _START_EXECUTOR); \
    goto enter_tier_two; \
} while (0)
#endif

#define GOTO_TIER_ONE(TARGET)                                         \
    do                                                                \
    {                                                                 \
        tstate->current_executor = NULL;                              \
        next_instr = (TARGET);                                        \
        assert(tstate->current_executor == NULL);                     \
        OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
        _PyFrame_SetStackPointer(frame, stack_pointer);               \
        stack_pointer = _PyFrame_GetStackPointer(frame);              \
        if (next_instr == NULL)                                       \
        {                                                             \
            next_instr = frame->instr_ptr;                            \
            goto error;                                               \
        }                                                             \
        DISPATCH();                                                   \
    } while (0)

#define CURRENT_OPARG()    (next_uop[-1].oparg)
#define CURRENT_OPERAND0() (next_uop[-1].operand0)
#define CURRENT_OPERAND1() (next_uop[-1].operand1)
#define CURRENT_TARGET()   (next_uop[-1].target)

#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
#define JUMP_TO_ERROR() goto jump_to_error_target

/* Stackref macros */

/* How much scratch space to give stackref to PyObject* conversion. */
#define MAX_STACKREF_SCRATCH 10

#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
    /* +1 because vectorcall might use -1 to write self */ \
    PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);

#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
    /* +1 because we +1 previously */ \
    _PyObjectArray_Free(NAME - 1, NAME##_temp);

#define CONVERSION_FAILED(NAME) ((NAME) == NULL)

Coverage Report

Created: 2025-07-04 06:49

Line	Count	Source (jump to first uncovered line)
1		// Macros and other things needed by ceval.c, and bytecodes.c
2
3		/* Computed GOTOs, or
4		the-optimization-commonly-but-improperly-known-as-"threaded code"
5		using gcc's labels-as-values extension
6		(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
7
8		The traditional bytecode evaluation loop uses a "switch" statement, which
9		decent compilers will optimize as a single indirect branch instruction
10		combined with a lookup table of jump addresses. However, since the
11		indirect jump instruction is shared by all opcodes, the CPU will have a
12		hard time making the right prediction for where to jump next (actually,
13		it will be always wrong except in the uncommon case of a sequence of
14		several identical opcodes).
15
16		"Threaded code" in contrast, uses an explicit jump table and an explicit
17		indirect jump instruction at the end of each opcode. Since the jump
18		instruction is at a different address for each opcode, the CPU will make a
19		separate prediction for each of these instructions, which is equivalent to
20		predicting the second opcode of each opcode pair. These predictions have
21		a much better chance to turn out valid, especially in small bytecode loops.
22
23		A mispredicted branch on a modern CPU flushes the whole pipeline and
24		can cost several CPU cycles (depending on the pipeline depth),
25		and potentially many more instructions (depending on the pipeline width).
26		A correctly predicted branch, however, is nearly free.
27
28		At the time of this writing, the "threaded code" version is up to 15-20%
29		faster than the normal "switch" version, depending on the compiler and the
30		CPU architecture.
31
32		NOTE: care must be taken that the compiler doesn't try to "optimize" the
33		indirect jumps by sharing them between all opcodes. Such optimizations
34		can be disabled on gcc by using the -fno-gcse flag (or possibly
35		-fno-crossjumping).
36		*/
37
38		/* Use macros rather than inline functions, to make it as clear as possible
39		* to the C compiler that the tracing check is a simple test then branch.
40		* We want to be sure that the compiler knows this before it generates
41		* the CFG.
42		*/
43
44		#ifdef WITH_DTRACE
45		#define OR_DTRACE_LINE \| (PyDTrace_LINE_ENABLED() ? 255 : 0)
46		#else
47		#define OR_DTRACE_LINE
48		#endif
49
50		#ifdef HAVE_COMPUTED_GOTOS
51		#ifndef USE_COMPUTED_GOTOS
52		#define USE_COMPUTED_GOTOS 1
53		#endif
54		#else
55		#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
56		#error "Computed gotos are not supported on this compiler."
57		#endif
58		#undef USE_COMPUTED_GOTOS
59		#define USE_COMPUTED_GOTOS 0
60		#endif
61
62		#ifdef Py_STATS
63		#define INSTRUCTION_STATS(op) \
64		do { \
65		OPCODE_EXE_INC(op); \
66		if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
67		lastopcode = op; \
68		} while (0)
69		#else
70	32.4G	#define INSTRUCTION_STATS(op) ((void)0)
71		#endif
72
73		#ifdef Py_STATS
74		# define TAIL_CALL_PARAMS _PyInterpreterFrame frame, _PyStackRef stack_pointer, PyThreadState tstate, _Py_CODEUNIT next_instr, int oparg, int lastopcode
75		# define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg, lastopcode
76		#else
77		# define TAIL_CALL_PARAMS _PyInterpreterFrame frame, _PyStackRef stack_pointer, PyThreadState tstate, _Py_CODEUNIT next_instr, int oparg
78		# define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
79		#endif
80
81		#if Py_TAIL_CALL_INTERP
82		// Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
83		# define Py_MUSTTAIL [[clang::musttail]]
84		# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
85		Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
86
87		# define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
88		# define DISPATCH_GOTO() \
89		do { \
90		Py_MUSTTAIL return (INSTRUCTION_TABLE[opcode])(TAIL_CALL_ARGS); \
91		} while (0)
92		# define JUMP_TO_LABEL(name) \
93		do { \
94		Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \
95		} while (0)
96		# ifdef Py_STATS
97		# define JUMP_TO_PREDICTED(name) \
98		do { \
99		Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg, lastopcode); \
100		} while (0)
101		# else
102		# define JUMP_TO_PREDICTED(name) \
103		do { \
104		Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, oparg); \
105		} while (0)
106		# endif
107		# define LABEL(name) TARGET(name)
108		#elif USE_COMPUTED_GOTOS
109	32.4G	# define TARGET(op) TARGET_##op:
110	32.8G	# define DISPATCH_GOTO() goto *opcode_targets[opcode]
111	47.5M	# define JUMP_TO_LABEL(name) goto name;
112	195M	# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
113	303M	# define LABEL(name) name:
114		#else
115		# define TARGET(op) case op: TARGET_##op:
116		# define DISPATCH_GOTO() goto dispatch_opcode
117		# define JUMP_TO_LABEL(name) goto name;
118		# define JUMP_TO_PREDICTED(name) goto PREDICTED_##name;
119		# define LABEL(name) name:
120		#endif
121
122		/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
123		#ifdef Py_DEBUG
124		#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
125		lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
126		#else
127	32.8G	#define PRE_DISPATCH_GOTO() ((void)0)
128		#endif
129
130		#ifdef Py_DEBUG
131		#define LLTRACE_RESUME_FRAME() \
132		do { \
133		_PyFrame_SetStackPointer(frame, stack_pointer); \
134		int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
135		stack_pointer = _PyFrame_GetStackPointer(frame); \
136		if (lltrace < 0) { \
137		JUMP_TO_LABEL(exit_unwind); \
138		} \
139		frame->lltrace = lltrace; \
140		} while (0)
141		#else
142	1.31G	#define LLTRACE_RESUME_FRAME() ((void)0)
143		#endif
144
145		#ifdef Py_GIL_DISABLED
146		#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
147		#else
148		#define QSBR_QUIESCENT_STATE(tstate)
149		#endif
150
151
152		/* Do interpreter dispatch accounting for tracing and instrumentation */
153		#define DISPATCH() \
154	32.7G	{ \
155	32.7G	assert(frame->stackpointer == NULL); \
156	32.7G	NEXTOPARG(); \
157	32.7G	PRE_DISPATCH_GOTO(); \
158	32.7G	DISPATCH_GOTO(); \
159	32.7G	}
160
161		#define DISPATCH_SAME_OPARG() \
162	4.08M	{ \
163	4.08M	opcode = next_instr->op.code; \
164	4.08M	PRE_DISPATCH_GOTO(); \
165	4.08M	DISPATCH_GOTO(); \
166	4.08M	}
167
168		#define DISPATCH_INLINED(NEW_FRAME) \
169	809k	do { \
170	809k	assert(tstate->interp->eval_frame == NULL); \
171	809k	_PyFrame_SetStackPointer(frame, stack_pointer); \
172	809k	assert((NEW_FRAME)->previous == frame); \
173	809k	frame = tstate->current_frame = (NEW_FRAME); \
174	809k	CALL_STAT_INC(inlined_py_calls); \
175	809k	JUMP_TO_LABEL(start_frame); \
176	0	} while (0)
177
178		/* Tuple access macros */
179
180		#ifndef Py_DEBUG
181	1.68G	#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
182		#else
183		static inline PyObject *
184		GETITEM(PyObject *v, Py_ssize_t i) {
185		assert(PyTuple_Check(v));
186		assert(i >= 0);
187		assert(i < PyTuple_GET_SIZE(v));
188		return PyTuple_GET_ITEM(v, i);
189		}
190		#endif
191
192		/* Code access macros */
193
194		/* The integer overflow is checked by an assertion below. */
195	33.7M	#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame)))
196	32.7G	#define NEXTOPARG() do { \
197	32.7G	_Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED((uint16_t)next_instr)}; \
198	32.7G	opcode = word.op.code; \
199	32.7G	oparg = word.op.arg; \
200	32.7G	} while (0)
201
202		/* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is
203		* for advancing to the next instruction, taking into account cache entries
204		* and skipped instructions.
205		*/
206	4.26G	#define JUMPBY(x) (next_instr += (x))
207	367M	#define SKIP_OVER(x) (next_instr += (x))
208
209		#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
210		#define STACK_SIZE() (_PyFrame_GetCode(frame)->co_stacksize)
211
212		#define WITHIN_STACK_BOUNDS() \
213		(frame->owner == FRAME_OWNED_BY_INTERPRETER \|\| (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
214
215		/* Data access macros */
216		#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)
217		#define FRAME_CO_NAMES (_PyFrame_GetCode(frame)->co_names)
218
219		/* Local variable macros */
220
221	1.28M	#define LOCALS_ARRAY (frame->localsplus)
222	15.7G	#define GETLOCAL(i) (frame->localsplus[i])
223
224
225		#ifdef Py_STATS
226		#define UPDATE_MISS_STATS(INSTNAME) \
227		do { \
228		STAT_INC(opcode, miss); \
229		STAT_INC((INSTNAME), miss); \
230		/* The counter is always the first cache entry: */ \
231		if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \
232		STAT_INC((INSTNAME), deopt); \
233		} \
234		} while (0)
235		#else
236	195M	#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
237		#endif
238
239
240		// Try to lock an object in the free threading build, if it's not already
241		// locked. Use with a DEOPT_IF() to deopt if the object is already locked.
242		// These are no-ops in the default GIL build. The general pattern is:
243		//
244		// DEOPT_IF(!LOCK_OBJECT(op));
245		// if (/* condition fails */) {
246		// UNLOCK_OBJECT(op);
247		// DEOPT_IF(true);
248		// }
249		// ...
250		// UNLOCK_OBJECT(op);
251		//
252		// NOTE: The object must be unlocked on every exit code path and you should
253		// avoid any potentially escaping calls (like PyStackRef_CLOSE) while the
254		// object is locked.
255		#ifdef Py_GIL_DISABLED
256		# define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex)
257		# define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex)
258		#else
259	511M	# define LOCK_OBJECT(op) (1)
260	511M	# define UNLOCK_OBJECT(op) ((void)0)
261		#endif
262
263	712M	#define GLOBALS() frame->f_globals
264	392M	#define BUILTINS() frame->f_builtins
265	71.6k	#define LOCALS() frame->f_locals
266		#define CONSTS() _PyFrame_GetCode(frame)->co_consts
267		#define NAMES() _PyFrame_GetCode(frame)->co_names
268
269		#define DTRACE_FUNCTION_ENTRY() \
270		if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
271		dtrace_function_entry(frame); \
272		}
273
274		/* This takes a uint16_t instead of a _Py_BackoffCounter,
275		* because it is used directly on the cache entry in generated code,
276		* which is always an integral type. */
277		#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \
278	1.15G	backoff_counter_triggers(forge_backoff_counter((COUNTER)))
279
280		#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \
281	1.15G	do { \
282	1.15G	(COUNTER) = advance_backoff_counter((COUNTER)); \
283	1.15G	} while (0);
284
285		#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \
286	0	do { \
287	0	(COUNTER) = pause_backoff_counter((COUNTER)); \
288	0	} while (0);
289
290		#ifdef ENABLE_SPECIALIZATION_FT
291		/* Multiple threads may execute these concurrently if thread-local bytecode is
292		* disabled and they all execute the main copy of the bytecode. Specialization
293		* is disabled in that case so the value is unused, but the RMW cycle should be
294		* free of data races.
295		*/
296		#define RECORD_BRANCH_TAKEN(bitset, flag) \
297	2.13G	FT_ATOMIC_STORE_UINT16_RELAXED( \
298	2.13G	bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) \| (flag))
299		#else
300		#define RECORD_BRANCH_TAKEN(bitset, flag)
301		#endif
302
303		#define UNBOUNDLOCAL_ERROR_MSG \
304	0	"cannot access local variable '%s' where it is not associated with a value"
305		#define UNBOUNDFREE_ERROR_MSG \
306	0	"cannot access free variable '%s' where it is not associated with a value" \
307	0	" in enclosing scope"
308	1	#define NAME_ERROR_MSG "name '%.200s' is not defined"
309
310		// If a trace function sets a new f_lineno and
311		// then raises, we use the destination when searching
312		// for an exception handler, displaying the traceback, and so on
313	0	#define INSTRUMENTED_JUMP(src, dest, event) \
314	0	do { \
315	0	if (tstate->tracing) {\
316	0	next_instr = dest; \
317	0	} else { \
318	0	_PyFrame_SetStackPointer(frame, stack_pointer); \
319	0	next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
320	0	stack_pointer = _PyFrame_GetStackPointer(frame); \
321	0	if (next_instr == NULL) { \
322	0	next_instr = (dest)+1; \
323	0	JUMP_TO_LABEL(error); \
324	0	} \
325	0	} \
326	0	} while (0);
327
328
329	222M	static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
330	222M	return (tstate->py_recursion_remaining-- <= 0) &&
331	222M	_Py_CheckRecursiveCallPy(tstate);
332	222M	}
333
334	668M	static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
335	668M	tstate->py_recursion_remaining++;
336	668M	}
337
338		/* Implementation of "macros" that modify the instruction pointer,
339		* stack pointer, or frame pointer.
340		* These need to treated differently by tier 1 and 2.
341		* The Tier 1 version is here; Tier 2 is inlined in ceval.c. */
342
343	1.10G	#define LOAD_IP(OFFSET) do { \
344	1.10G	next_instr = frame->instr_ptr + (OFFSET); \
345	1.10G	} while (0)
346
347		/* There's no STORE_IP(), it's inlined by the code generator. */
348
349	445M	#define LOAD_SP() \
350	445M	stack_pointer = _PyFrame_GetStackPointer(frame)
351
352		#define SAVE_SP() \
353		_PyFrame_SetStackPointer(frame, stack_pointer)
354
355		/* Tier-switching macros. */
356
357		#ifdef _Py_JIT
358		#define GOTO_TIER_TWO(EXECUTOR) \
359		do { \
360		OPT_STAT_INC(traces_executed); \
361		_PyExecutorObject *_executor = (EXECUTOR); \
362		tstate->current_executor = (PyObject *)_executor; \
363		jit_func jitted = _executor->jit_code; \
364		/* Keep the shim frame alive via the executor: */ \
365		Py_INCREF(_executor); \
366		next_instr = jitted(frame, stack_pointer, tstate); \
367		Py_DECREF(_executor); \
368		frame = tstate->current_frame; \
369		stack_pointer = _PyFrame_GetStackPointer(frame); \
370		if (next_instr == NULL) { \
371		next_instr = frame->instr_ptr; \
372		JUMP_TO_LABEL(error); \
373		} \
374		DISPATCH(); \
375		} while (0)
376		#else
377		#define GOTO_TIER_TWO(EXECUTOR) \
378		do { \
379		OPT_STAT_INC(traces_executed); \
380		_PyExecutorObject *_executor = (EXECUTOR); \
381		tstate->current_executor = (PyObject *)_executor; \
382		next_uop = _executor->trace; \
383		assert(next_uop->opcode == _START_EXECUTOR); \
384		goto enter_tier_two; \
385		} while (0)
386		#endif
387
388		#define GOTO_TIER_ONE(TARGET) \
389		do \
390		{ \
391		tstate->current_executor = NULL; \
392		next_instr = (TARGET); \
393		assert(tstate->current_executor == NULL); \
394		OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
395		_PyFrame_SetStackPointer(frame, stack_pointer); \
396		stack_pointer = _PyFrame_GetStackPointer(frame); \
397		if (next_instr == NULL) \
398		{ \
399		next_instr = frame->instr_ptr; \
400		goto error; \
401		} \
402		DISPATCH(); \
403		} while (0)
404
405		#define CURRENT_OPARG() (next_uop[-1].oparg)
406		#define CURRENT_OPERAND0() (next_uop[-1].operand0)
407		#define CURRENT_OPERAND1() (next_uop[-1].operand1)
408		#define CURRENT_TARGET() (next_uop[-1].target)
409
410		#define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target
411		#define JUMP_TO_ERROR() goto jump_to_error_target
412
413		/* Stackref macros */
414
415		/* How much scratch space to give stackref to PyObject* conversion. */
416	1.61G	#define MAX_STACKREF_SCRATCH 10
417
418		#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
419		/* +1 because vectorcall might use -1 to write self */ \
420	1.62G	PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
421	1.62G	PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
422
423		#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
424		/* +1 because we +1 previously */ \
425	1.62G	_PyObjectArray_Free(NAME - 1, NAME##_temp);
426
427	1.62G	#define CONVERSION_FAILED(NAME) ((NAME) == NULL)