/src/cpython/Python/ceval_macros.h
Line | Count | Source |
1 | | // Macros and other things needed by ceval.c, and bytecodes.c |
2 | | |
3 | | /* Computed GOTOs, or |
4 | | the-optimization-commonly-but-improperly-known-as-"threaded code" |
5 | | using gcc's labels-as-values extension |
6 | | (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html). |
7 | | |
8 | | The traditional bytecode evaluation loop uses a "switch" statement, which |
9 | | decent compilers will optimize as a single indirect branch instruction |
10 | | combined with a lookup table of jump addresses. However, since the |
11 | | indirect jump instruction is shared by all opcodes, the CPU will have a |
12 | | hard time making the right prediction for where to jump next (actually, |
13 | | it will be always wrong except in the uncommon case of a sequence of |
14 | | several identical opcodes). |
15 | | |
16 | | "Threaded code" in contrast, uses an explicit jump table and an explicit |
17 | | indirect jump instruction at the end of each opcode. Since the jump |
18 | | instruction is at a different address for each opcode, the CPU will make a |
19 | | separate prediction for each of these instructions, which is equivalent to |
20 | | predicting the second opcode of each opcode pair. These predictions have |
21 | | a much better chance to turn out valid, especially in small bytecode loops. |
22 | | |
23 | | A mispredicted branch on a modern CPU flushes the whole pipeline and |
24 | | can cost several CPU cycles (depending on the pipeline depth), |
25 | | and potentially many more instructions (depending on the pipeline width). |
26 | | A correctly predicted branch, however, is nearly free. |
27 | | |
28 | | At the time of this writing, the "threaded code" version is up to 15-20% |
29 | | faster than the normal "switch" version, depending on the compiler and the |
30 | | CPU architecture. |
31 | | |
32 | | NOTE: care must be taken that the compiler doesn't try to "optimize" the |
33 | | indirect jumps by sharing them between all opcodes. Such optimizations |
34 | | can be disabled on gcc by using the -fno-gcse flag (or possibly |
35 | | -fno-crossjumping). |
36 | | */ |
37 | | |
38 | | /* Use macros rather than inline functions, to make it as clear as possible |
39 | | * to the C compiler that the tracing check is a simple test then branch. |
40 | | * We want to be sure that the compiler knows this before it generates |
41 | | * the CFG. |
42 | | */ |
43 | | |
44 | | #ifdef WITH_DTRACE |
45 | | #define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0) |
46 | | #else |
47 | | #define OR_DTRACE_LINE |
48 | | #endif |
49 | | |
50 | | #ifdef HAVE_COMPUTED_GOTOS |
51 | | #ifndef USE_COMPUTED_GOTOS |
52 | | #define USE_COMPUTED_GOTOS 1 |
53 | | #endif |
54 | | #else |
55 | | #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS |
56 | | #error "Computed gotos are not supported on this compiler." |
57 | | #endif |
58 | | #undef USE_COMPUTED_GOTOS |
59 | | #define USE_COMPUTED_GOTOS 0 |
60 | | #endif |
61 | | |
62 | | #ifdef Py_STATS |
63 | | #define INSTRUCTION_STATS(op) \ |
64 | | do { \ |
65 | | PyStats *s = _PyStats_GET(); \ |
66 | | OPCODE_EXE_INC(op); \ |
67 | | if (s) s->opcode_stats[lastopcode].pair_count[op]++; \ |
68 | | lastopcode = op; \ |
69 | | } while (0) |
70 | | #else |
71 | 60.6G | #define INSTRUCTION_STATS(op) ((void)0) |
72 | | #endif |
73 | | |
74 | | #ifdef Py_STATS |
75 | | # define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg, int lastopcode |
76 | | # define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg, lastopcode |
77 | | #else |
78 | | # define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, const void *instruction_funcptr_table, int oparg |
79 | | # define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, oparg |
80 | | #endif |
81 | | |
82 | | #if _Py_TAIL_CALL_INTERP |
83 | | # if defined(__clang__) || defined(__GNUC__) |
84 | | # if !_Py__has_attribute(preserve_none) || !_Py__has_attribute(musttail) |
85 | | # error "This compiler does not have support for efficient tail calling." |
86 | | # endif |
87 | | # elif defined(_MSC_VER) && (_MSC_VER < 1950) |
88 | | # error "You need at least VS 2026 / PlatformToolset v145 for tail calling." |
89 | | # endif |
90 | | # if defined(_MSC_VER) && !defined(__clang__) |
91 | | # define Py_MUSTTAIL [[msvc::musttail]] |
92 | | # define Py_PRESERVE_NONE_CC __preserve_none |
93 | | # else |
94 | | # define Py_MUSTTAIL __attribute__((musttail)) |
95 | | # define Py_PRESERVE_NONE_CC __attribute__((preserve_none)) |
96 | | # endif |
97 | | typedef PyObject *(Py_PRESERVE_NONE_CC *py_tail_call_funcptr)(TAIL_CALL_PARAMS); |
98 | | |
99 | | # define DISPATCH_TABLE_VAR instruction_funcptr_table |
100 | | # define DISPATCH_TABLE instruction_funcptr_handler_table |
101 | | # define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table |
102 | | # define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS) |
103 | | |
104 | | # define DISPATCH_GOTO() \ |
105 | | do { \ |
106 | | Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \ |
107 | | } while (0) |
108 | | # define DISPATCH_GOTO_NON_TRACING() \ |
109 | | do { \ |
110 | | Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \ |
111 | | } while (0) |
112 | | # define JUMP_TO_LABEL(name) \ |
113 | | do { \ |
114 | | Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \ |
115 | | } while (0) |
116 | | # ifdef Py_STATS |
117 | | # define JUMP_TO_PREDICTED(name) \ |
118 | | do { \ |
119 | | Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg, lastopcode); \ |
120 | | } while (0) |
121 | | # else |
122 | | # define JUMP_TO_PREDICTED(name) \ |
123 | | do { \ |
124 | | Py_MUSTTAIL return (_TAIL_CALL_##name)(frame, stack_pointer, tstate, this_instr, instruction_funcptr_table, oparg); \ |
125 | | } while (0) |
126 | | # endif |
127 | | # define LABEL(name) TARGET(name) |
128 | | #elif USE_COMPUTED_GOTOS |
129 | | # define DISPATCH_TABLE_VAR opcode_targets |
130 | 6.47M | # define DISPATCH_TABLE opcode_targets_table |
131 | | # define TRACING_DISPATCH_TABLE opcode_tracing_targets_table |
132 | 60.6G | # define TARGET(op) TARGET_##op: |
133 | 61.0G | # define DISPATCH_GOTO() goto *opcode_targets[opcode] |
134 | 6.47M | # define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode]; |
135 | 101M | # define JUMP_TO_LABEL(name) goto name; |
136 | 254M | # define JUMP_TO_PREDICTED(name) goto PREDICTED_##name; |
137 | 436M | # define LABEL(name) name: |
138 | | #else |
139 | | # define TARGET(op) case op: TARGET_##op: |
140 | | # define DISPATCH_GOTO() dispatch_code = opcode | tracing_mode ; goto dispatch_opcode |
141 | | # define DISPATCH_GOTO_NON_TRACING() dispatch_code = opcode; goto dispatch_opcode |
142 | | # define JUMP_TO_LABEL(name) goto name; |
143 | | # define JUMP_TO_PREDICTED(name) goto PREDICTED_##name; |
144 | | # define LABEL(name) name: |
145 | | #endif |
146 | | |
147 | | #if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2 |
148 | | # define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE) |
149 | | # define ENTER_TRACING() \ |
150 | | DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE; |
151 | | # define LEAVE_TRACING() \ |
152 | | DISPATCH_TABLE_VAR = DISPATCH_TABLE; |
153 | | #else |
154 | | # define IS_JIT_TRACING() (tracing_mode != 0) |
155 | | # define ENTER_TRACING() tracing_mode = 255 |
156 | | # define LEAVE_TRACING() tracing_mode = 0 |
157 | | #endif |
158 | | |
159 | | #if _Py_TIER2 |
160 | | #define STOP_TRACING() \ |
161 | | do { \ |
162 | | if (IS_JIT_TRACING()) { \ |
163 | | LEAVE_TRACING(); \ |
164 | | _PyJit_FinalizeTracing(tstate, 0); \ |
165 | | } \ |
166 | | } while (0); |
167 | | #else |
168 | 122M | #define STOP_TRACING() ((void)(0)); |
169 | | #endif |
170 | | |
171 | | |
172 | | /* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */ |
173 | | #ifdef Py_DEBUG |
174 | | #define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \ |
175 | | lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); } |
176 | | #else |
177 | 61.0G | #define PRE_DISPATCH_GOTO() ((void)0) |
178 | | #endif |
179 | | |
180 | | #ifdef Py_DEBUG |
181 | | #define LLTRACE_RESUME_FRAME() \ |
182 | | do { \ |
183 | | _PyFrame_SetStackPointer(frame, stack_pointer); \ |
184 | | int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \ |
185 | | stack_pointer = _PyFrame_GetStackPointer(frame); \ |
186 | | frame->lltrace = lltrace; \ |
187 | | } while (0) |
188 | | #else |
189 | 2.40G | #define LLTRACE_RESUME_FRAME() ((void)0) |
190 | | #endif |
191 | | |
192 | | #ifdef Py_GIL_DISABLED |
193 | | #define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr) |
194 | | #else |
195 | | #define QSBR_QUIESCENT_STATE(tstate) |
196 | | #endif |
197 | | |
198 | | |
199 | | /* Do interpreter dispatch accounting for tracing and instrumentation */ |
200 | | #define DISPATCH() \ |
201 | 60.7G | { \ |
202 | 60.7G | assert(frame->stackpointer == NULL); \ |
203 | 60.7G | NEXTOPARG(); \ |
204 | 60.7G | PRE_DISPATCH_GOTO(); \ |
205 | 60.7G | DISPATCH_GOTO(); \ |
206 | 60.7G | } |
207 | | |
208 | | #define DISPATCH_NON_TRACING() \ |
209 | | { \ |
210 | | assert(frame->stackpointer == NULL); \ |
211 | | NEXTOPARG(); \ |
212 | | PRE_DISPATCH_GOTO(); \ |
213 | | DISPATCH_GOTO_NON_TRACING(); \ |
214 | | } |
215 | | |
216 | | #define DISPATCH_SAME_OPARG() \ |
217 | 6.47M | { \ |
218 | 6.47M | opcode = next_instr->op.code; \ |
219 | 6.47M | PRE_DISPATCH_GOTO(); \ |
220 | 6.47M | DISPATCH_GOTO_NON_TRACING(); \ |
221 | 6.47M | } |
222 | | |
223 | | #define DISPATCH_INLINED(NEW_FRAME) \ |
224 | 1.01M | do { \ |
225 | 1.01M | assert(tstate->interp->eval_frame == NULL); \ |
226 | 1.01M | _PyFrame_SetStackPointer(frame, stack_pointer); \ |
227 | 1.01M | assert((NEW_FRAME)->previous == frame); \ |
228 | 1.01M | frame = tstate->current_frame = (NEW_FRAME); \ |
229 | 1.01M | CALL_STAT_INC(inlined_py_calls); \ |
230 | 1.01M | JUMP_TO_LABEL(start_frame); \ |
231 | 0 | } while (0) |
232 | | |
233 | | /* Tuple access macros */ |
234 | | |
235 | | #ifndef Py_DEBUG |
236 | 3.29G | #define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) |
237 | | #else |
238 | | static inline PyObject * |
239 | | GETITEM(PyObject *v, Py_ssize_t i) { |
240 | | assert(PyTuple_Check(v)); |
241 | | assert(i >= 0); |
242 | | assert(i < PyTuple_GET_SIZE(v)); |
243 | | return PyTuple_GET_ITEM(v, i); |
244 | | } |
245 | | #endif |
246 | | |
247 | | /* Code access macros */ |
248 | | |
249 | | /* The integer overflow is checked by an assertion below. */ |
250 | 63.6M | #define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame))) |
251 | 60.7G | #define NEXTOPARG() do { \ |
252 | 60.7G | _Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \ |
253 | 60.7G | opcode = word.op.code; \ |
254 | 60.7G | oparg = word.op.arg; \ |
255 | 60.7G | } while (0) |
256 | | |
257 | | /* JUMPBY makes the generator identify the instruction as a jump. SKIP_OVER is |
258 | | * for advancing to the next instruction, taking into account cache entries |
259 | | * and skipped instructions. |
260 | | */ |
261 | 9.19G | #define JUMPBY(x) (next_instr += (x)) |
262 | | #define SKIP_OVER(x) (next_instr += (x)) |
263 | | |
264 | | #define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame))) |
265 | | #define STACK_SIZE() (_PyFrame_GetCode(frame)->co_stacksize) |
266 | | |
267 | | #define WITHIN_STACK_BOUNDS() \ |
268 | | (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE())) |
269 | | |
270 | | #if defined(Py_DEBUG) && !defined(_Py_JIT) |
271 | | // This allows temporary stack "overflows", provided it's all in the cache at any point of time. |
272 | | #define WITHIN_STACK_BOUNDS_IGNORING_CACHE() \ |
273 | | (frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && (STACK_LEVEL()) <= STACK_SIZE())) |
274 | | #else |
275 | | #define WITHIN_STACK_BOUNDS_IGNORING_CACHE WITHIN_STACK_BOUNDS |
276 | | #endif |
277 | | |
278 | | /* Data access macros */ |
279 | | #define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts) |
280 | | #define FRAME_CO_NAMES (_PyFrame_GetCode(frame)->co_names) |
281 | | |
282 | | /* Local variable macros */ |
283 | | |
284 | 1.39M | #define LOCALS_ARRAY (frame->localsplus) |
285 | 29.2G | #define GETLOCAL(i) (frame->localsplus[i]) |
286 | | |
287 | | |
288 | | #ifdef Py_STATS |
289 | | #define UPDATE_MISS_STATS(INSTNAME) \ |
290 | | do { \ |
291 | | STAT_INC(opcode, miss); \ |
292 | | STAT_INC((INSTNAME), miss); \ |
293 | | /* The counter is always the first cache entry: */ \ |
294 | | if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \ |
295 | | STAT_INC((INSTNAME), deopt); \ |
296 | | } \ |
297 | | } while (0) |
298 | | #else |
299 | 254M | #define UPDATE_MISS_STATS(INSTNAME) ((void)0) |
300 | | #endif |
301 | | |
302 | | |
303 | | // Try to lock an object in the free threading build, if it's not already |
304 | | // locked. Use with a DEOPT_IF() to deopt if the object is already locked. |
305 | | // These are no-ops in the default GIL build. The general pattern is: |
306 | | // |
307 | | // DEOPT_IF(!LOCK_OBJECT(op)); |
308 | | // if (/* condition fails */) { |
309 | | // UNLOCK_OBJECT(op); |
310 | | // DEOPT_IF(true); |
311 | | // } |
312 | | // ... |
313 | | // UNLOCK_OBJECT(op); |
314 | | // |
315 | | // NOTE: The object must be unlocked on every exit code path and you should |
316 | | // avoid any potentially escaping calls (like PyStackRef_CLOSE) while the |
317 | | // object is locked. |
318 | | #ifdef Py_GIL_DISABLED |
319 | | # define LOCK_OBJECT(op) PyMutex_LockFast(&(_PyObject_CAST(op))->ob_mutex) |
320 | | # define UNLOCK_OBJECT(op) PyMutex_Unlock(&(_PyObject_CAST(op))->ob_mutex) |
321 | | #else |
322 | 806M | # define LOCK_OBJECT(op) (1) |
323 | 806M | # define UNLOCK_OBJECT(op) ((void)0) |
324 | | #endif |
325 | | |
326 | 1.68G | #define GLOBALS() frame->f_globals |
327 | 592M | #define BUILTINS() frame->f_builtins |
328 | 3.04M | #define LOCALS() frame->f_locals |
329 | | #define CONSTS() _PyFrame_GetCode(frame)->co_consts |
330 | | #define NAMES() _PyFrame_GetCode(frame)->co_names |
331 | | |
332 | | #define DTRACE_FUNCTION_ENTRY() \ |
333 | | if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \ |
334 | | dtrace_function_entry(frame); \ |
335 | | } |
336 | | |
337 | | /* This takes a uint16_t instead of a _Py_BackoffCounter, |
338 | | * because it is used directly on the cache entry in generated code, |
339 | | * which is always an integral type. */ |
340 | | // Force re-specialization when tracing a side exit to get good side exits. |
341 | | #define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ |
342 | 1.92G | backoff_counter_triggers(forge_backoff_counter((COUNTER))) |
343 | | |
344 | | #define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ |
345 | 1.92G | do { \ |
346 | 1.92G | (COUNTER) = advance_backoff_counter((COUNTER)); \ |
347 | 1.92G | } while (0); |
348 | | |
349 | | #define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ |
350 | 0 | do { \ |
351 | 0 | (COUNTER) = pause_backoff_counter((COUNTER)); \ |
352 | 0 | } while (0); |
353 | | |
354 | | #ifdef ENABLE_SPECIALIZATION |
355 | | /* Multiple threads may execute these concurrently if thread-local bytecode is |
356 | | * disabled and they all execute the main copy of the bytecode. Specialization |
357 | | * is disabled in that case so the value is unused, but the RMW cycle should be |
358 | | * free of data races. |
359 | | */ |
360 | | #define RECORD_BRANCH_TAKEN(bitset, flag) \ |
361 | 4.59G | FT_ATOMIC_STORE_UINT16_RELAXED( \ |
362 | 4.59G | bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag)) |
363 | | #else |
364 | | #define RECORD_BRANCH_TAKEN(bitset, flag) |
365 | | #endif |
366 | | |
367 | | #define UNBOUNDLOCAL_ERROR_MSG \ |
368 | 0 | "cannot access local variable '%s' where it is not associated with a value" |
369 | | #define UNBOUNDFREE_ERROR_MSG \ |
370 | 0 | "cannot access free variable '%s' where it is not associated with a value" \ |
371 | 0 | " in enclosing scope" |
372 | 39 | #define NAME_ERROR_MSG "name '%.200s' is not defined" |
373 | | |
374 | | // If a trace function sets a new f_lineno and |
375 | | // *then* raises, we use the destination when searching |
376 | | // for an exception handler, displaying the traceback, and so on |
377 | 0 | #define INSTRUMENTED_JUMP(src, dest, event) \ |
378 | 0 | do { \ |
379 | 0 | if (tstate->tracing) {\ |
380 | 0 | next_instr = dest; \ |
381 | 0 | } else { \ |
382 | 0 | _PyFrame_SetStackPointer(frame, stack_pointer); \ |
383 | 0 | next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \ |
384 | 0 | stack_pointer = _PyFrame_GetStackPointer(frame); \ |
385 | 0 | if (next_instr == NULL) { \ |
386 | 0 | next_instr = (dest)+1; \ |
387 | 0 | JUMP_TO_LABEL(error); \ |
388 | 0 | } \ |
389 | 0 | } \ |
390 | 0 | } while (0); |
391 | | |
392 | | |
393 | 280M | static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { |
394 | 280M | return (tstate->py_recursion_remaining-- <= 0) && |
395 | 167 | _Py_CheckRecursiveCallPy(tstate); |
396 | 280M | } |
397 | | |
398 | 1.22G | static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { |
399 | 1.22G | tstate->py_recursion_remaining++; |
400 | 1.22G | } |
401 | | |
402 | | /* Implementation of "macros" that modify the instruction pointer, |
403 | | * stack pointer, or frame pointer. |
404 | | * These need to treated differently by tier 1 and 2. |
405 | | * The Tier 1 version is here; Tier 2 is inlined in ceval.c. */ |
406 | | |
407 | 2.13G | #define LOAD_IP(OFFSET) do { \ |
408 | 2.13G | next_instr = frame->instr_ptr + (OFFSET); \ |
409 | 2.13G | } while (0) |
410 | | |
411 | | /* There's no STORE_IP(), it's inlined by the code generator. */ |
412 | | |
413 | 941M | #define LOAD_SP() \ |
414 | 941M | stack_pointer = _PyFrame_GetStackPointer(frame) |
415 | | |
416 | | #define SAVE_SP() \ |
417 | | _PyFrame_SetStackPointer(frame, stack_pointer) |
418 | | |
419 | | /* Tier-switching macros. */ |
420 | | |
421 | | #define TIER1_TO_TIER2(EXECUTOR) \ |
422 | | do { \ |
423 | | OPT_STAT_INC(traces_executed); \ |
424 | | next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \ |
425 | | frame = tstate->current_frame; \ |
426 | | stack_pointer = _PyFrame_GetStackPointer(frame); \ |
427 | | int keep_tracing_bit = (uintptr_t)next_instr & 1; \ |
428 | | next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \ |
429 | | if (next_instr == NULL) { \ |
430 | | /* gh-140104: The exception handler expects frame->instr_ptr |
431 | | to after this_instr, not this_instr! */ \ |
432 | | next_instr = frame->instr_ptr + 1; \ |
433 | | JUMP_TO_LABEL(error); \ |
434 | | } \ |
435 | | if (keep_tracing_bit) { \ |
436 | | assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \ |
437 | | ENTER_TRACING(); \ |
438 | | DISPATCH_NON_TRACING(); \ |
439 | | } \ |
440 | | DISPATCH(); \ |
441 | | } while (0) |
442 | | |
443 | | #define TIER2_TO_TIER2(EXECUTOR) \ |
444 | | do { \ |
445 | | OPT_STAT_INC(traces_executed); \ |
446 | | current_executor = (EXECUTOR); \ |
447 | | goto tier2_start; \ |
448 | | } while (0) |
449 | | |
450 | | #define GOTO_TIER_ONE_SETUP \ |
451 | | tstate->current_executor = NULL; \ |
452 | | OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ |
453 | | _PyFrame_SetStackPointer(frame, stack_pointer); |
454 | | |
455 | | #define GOTO_TIER_ONE(TARGET) \ |
456 | | do \ |
457 | | { \ |
458 | | GOTO_TIER_ONE_SETUP \ |
459 | | return (_Py_CODEUNIT *)(TARGET); \ |
460 | | } while (0) |
461 | | |
462 | | #define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \ |
463 | | do \ |
464 | | { \ |
465 | | GOTO_TIER_ONE_SETUP \ |
466 | | return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \ |
467 | | } while (0) |
468 | | |
469 | | #define CURRENT_OPARG() (next_uop[-1].oparg) |
470 | | #define CURRENT_OPERAND0_64() (next_uop[-1].operand0) |
471 | | #define CURRENT_OPERAND1_64() (next_uop[-1].operand1) |
472 | | #define CURRENT_OPERAND0_32() (next_uop[-1].operand0) |
473 | | #define CURRENT_OPERAND1_32() (next_uop[-1].operand1) |
474 | | #define CURRENT_OPERAND0_16() (next_uop[-1].operand0) |
475 | | #define CURRENT_OPERAND1_16() (next_uop[-1].operand1) |
476 | | #define CURRENT_TARGET() (next_uop[-1].target) |
477 | | |
478 | | #define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target |
479 | | #define JUMP_TO_ERROR() goto jump_to_error_target |
480 | | |
481 | | /* Stackref macros */ |
482 | | |
483 | | /* How much scratch space to give stackref to PyObject* conversion. */ |
484 | 1.91G | #define MAX_STACKREF_SCRATCH 10 |
485 | | |
486 | | #define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \ |
487 | | /* +1 because vectorcall might use -1 to write self */ \ |
488 | 1.91G | PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \ |
489 | 1.91G | PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp); |
490 | | |
491 | | #define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \ |
492 | | /* +1 because we +1 previously */ \ |
493 | 1.91G | _PyObjectArray_Free(NAME - 1, NAME##_temp); |
494 | | |
495 | 1.91G | #define CONVERSION_FAILED(NAME) ((NAME) == NULL) |
496 | | |
497 | | #if defined(Py_DEBUG) && !defined(_Py_JIT) |
498 | | #define SET_CURRENT_CACHED_VALUES(N) current_cached_values = (N) |
499 | | #define CHECK_CURRENT_CACHED_VALUES(N) assert(current_cached_values == (N)) |
500 | | #else |
501 | | #define SET_CURRENT_CACHED_VALUES(N) ((void)0) |
502 | | #define CHECK_CURRENT_CACHED_VALUES(N) ((void)0) |
503 | | #endif |
504 | | |
505 | 995M | #define IS_PEP523_HOOKED(tstate) (tstate->interp->eval_frame != NULL) |
506 | | |
507 | | static inline int |
508 | 4.18G | check_periodics(PyThreadState *tstate) { |
509 | 4.18G | _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); |
510 | 4.18G | QSBR_QUIESCENT_STATE(tstate); |
511 | 4.18G | if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { |
512 | 88.2k | return _Py_HandlePending(tstate); |
513 | 88.2k | } |
514 | 4.18G | return 0; |
515 | 4.18G | } |
516 | | |
517 | | // Mark the generator as executing. Returns true if the state was changed, |
518 | | // false if it was already executing or finished. |
519 | | static inline bool |
520 | | gen_try_set_executing(PyGenObject *gen) |
521 | 54.6M | { |
522 | | #ifdef Py_GIL_DISABLED |
523 | | if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) { |
524 | | int8_t frame_state = _Py_atomic_load_int8_relaxed(&gen->gi_frame_state); |
525 | | while (frame_state < FRAME_SUSPENDED_YIELD_FROM_LOCKED) { |
526 | | if (_Py_atomic_compare_exchange_int8(&gen->gi_frame_state, |
527 | | &frame_state, |
528 | | FRAME_EXECUTING)) { |
529 | | return true; |
530 | | } |
531 | | } |
532 | | // NB: We return false for FRAME_SUSPENDED_YIELD_FROM_LOCKED as well. |
533 | | // That case is rare enough that we can just handle it in the deopt. |
534 | | return false; |
535 | | } |
536 | | #endif |
537 | | // Use faster non-atomic modifications in the GIL-enabled build and when |
538 | | // the object is uniquely referenced in the free-threaded build. |
539 | 54.6M | if (gen->gi_frame_state < FRAME_EXECUTING) { |
540 | 54.6M | assert(gen->gi_frame_state != FRAME_SUSPENDED_YIELD_FROM_LOCKED); |
541 | 54.6M | gen->gi_frame_state = FRAME_EXECUTING; |
542 | 54.6M | return true; |
543 | 54.6M | } |
544 | 0 | return false; |
545 | 54.6M | } |