/src/cpython/Python/ceval_gil.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "Python.h" |
2 | | #include "pycore_ceval.h" // _PyEval_SignalReceived() |
3 | | #include "pycore_gc.h" // _Py_RunGC() |
4 | | #include "pycore_initconfig.h" // _PyStatus_OK() |
5 | | #include "pycore_optimizer.h" // _Py_Executors_InvalidateCold() |
6 | | #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() |
7 | | #include "pycore_pylifecycle.h" // _PyErr_Print() |
8 | | #include "pycore_pystats.h" // _Py_PrintSpecializationStats() |
9 | | #include "pycore_runtime.h" // _PyRuntime |
10 | | |
11 | | |
12 | | /* |
13 | | Notes about the implementation: |
14 | | |
15 | | - The GIL is just a boolean variable (locked) whose access is protected |
16 | | by a mutex (gil_mutex), and whose changes are signalled by a condition |
17 | | variable (gil_cond). gil_mutex is taken for short periods of time, |
18 | | and therefore mostly uncontended. |
19 | | |
20 | | - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be |
21 | | able to release the GIL on demand by another thread. A volatile boolean |
22 | | variable (gil_drop_request) is used for that purpose, which is checked |
23 | | at every turn of the eval loop. That variable is set after a wait of |
24 | | `interval` microseconds on `gil_cond` has timed out. |
25 | | |
26 | | [Actually, another volatile boolean variable (eval_breaker) is used |
27 | | which ORs several conditions into one. Volatile booleans are |
28 | | sufficient as inter-thread signalling means since Python is run |
29 | | on cache-coherent architectures only.] |
30 | | |
31 | | - A thread wanting to take the GIL will first let pass a given amount of |
32 | | time (`interval` microseconds) before setting gil_drop_request. This |
33 | | encourages a defined switching period, but doesn't enforce it since |
34 | | opcodes can take an arbitrary time to execute. |
35 | | |
36 | | The `interval` value is available for the user to read and modify |
37 | | using the Python API `sys.{get,set}switchinterval()`. |
38 | | |
39 | | - When a thread releases the GIL and gil_drop_request is set, that thread |
40 | | ensures that another GIL-awaiting thread gets scheduled. |
41 | | It does so by waiting on a condition variable (switch_cond) until |
42 | | the value of last_holder is changed to something else than its |
43 | | own thread state pointer, indicating that another thread was able to |
44 | | take the GIL. |
45 | | |
46 | | This is meant to prohibit the latency-adverse behaviour on multi-core |
47 | | machines where one thread would speculatively release the GIL, but still |
48 | | run and end up being the first to re-acquire it, making the "timeslices" |
49 | | much longer than expected. |
50 | | (Note: this mechanism is enabled with FORCE_SWITCHING above) |
51 | | */ |
52 | | |
53 | | // Atomically copy the bits indicated by mask between two values. |
54 | | static inline void |
55 | | copy_eval_breaker_bits(uintptr_t *from, uintptr_t *to, uintptr_t mask) |
56 | 31.1k | { |
57 | 31.1k | uintptr_t from_bits = _Py_atomic_load_uintptr_relaxed(from) & mask; |
58 | 31.1k | uintptr_t old_value = _Py_atomic_load_uintptr_relaxed(to); |
59 | 31.1k | uintptr_t to_bits = old_value & mask; |
60 | 31.1k | if (from_bits == to_bits) { |
61 | 31.1k | return; |
62 | 31.1k | } |
63 | | |
64 | 0 | uintptr_t new_value; |
65 | 0 | do { |
66 | 0 | new_value = (old_value & ~mask) | from_bits; |
67 | 0 | } while (!_Py_atomic_compare_exchange_uintptr(to, &old_value, new_value)); |
68 | 0 | } |
69 | | |
70 | | // When attaching a thread, set the global instrumentation version and |
71 | | // _PY_CALLS_TO_DO_BIT from the current state of the interpreter. |
72 | | static inline void |
73 | | update_eval_breaker_for_thread(PyInterpreterState *interp, PyThreadState *tstate) |
74 | 31.1k | { |
75 | | #ifdef Py_GIL_DISABLED |
76 | | // Free-threaded builds eagerly update the eval_breaker on *all* threads as |
77 | | // needed, so this function doesn't apply. |
78 | | return; |
79 | | #endif |
80 | | |
81 | 31.1k | int32_t npending = _Py_atomic_load_int32_relaxed( |
82 | 31.1k | &interp->ceval.pending.npending); |
83 | 31.1k | if (npending) { |
84 | 0 | _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); |
85 | 0 | } |
86 | 31.1k | else if (_Py_IsMainThread()) { |
87 | 31.1k | npending = _Py_atomic_load_int32_relaxed( |
88 | 31.1k | &_PyRuntime.ceval.pending_mainthread.npending); |
89 | 31.1k | if (npending) { |
90 | 0 | _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); |
91 | 0 | } |
92 | 31.1k | } |
93 | | |
94 | | // _PY_CALLS_TO_DO_BIT was derived from other state above, so the only bits |
95 | | // we copy from our interpreter's state are the instrumentation version. |
96 | 31.1k | copy_eval_breaker_bits(&interp->ceval.instrumentation_version, |
97 | 31.1k | &tstate->eval_breaker, |
98 | 31.1k | ~_PY_EVAL_EVENTS_MASK); |
99 | 31.1k | } |
100 | | |
101 | | /* |
102 | | * Implementation of the Global Interpreter Lock (GIL). |
103 | | */ |
104 | | |
105 | | #include <stdlib.h> |
106 | | #include <errno.h> |
107 | | |
108 | | #include "condvar.h" |
109 | | |
110 | | #define MUTEX_INIT(mut) \ |
111 | 32 | if (PyMUTEX_INIT(&(mut))) { \ |
112 | 32 | Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); }; |
113 | | #define MUTEX_FINI(mut) \ |
114 | 0 | if (PyMUTEX_FINI(&(mut))) { \ |
115 | 0 | Py_FatalError("PyMUTEX_FINI(" #mut ") failed"); }; |
116 | | #define MUTEX_LOCK(mut) \ |
117 | 93.3k | if (PyMUTEX_LOCK(&(mut))) { \ |
118 | 93.3k | Py_FatalError("PyMUTEX_LOCK(" #mut ") failed"); }; |
119 | | #define MUTEX_UNLOCK(mut) \ |
120 | 93.3k | if (PyMUTEX_UNLOCK(&(mut))) { \ |
121 | 93.3k | Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); }; |
122 | | |
123 | | #define COND_INIT(cond) \ |
124 | 32 | if (PyCOND_INIT(&(cond))) { \ |
125 | 32 | Py_FatalError("PyCOND_INIT(" #cond ") failed"); }; |
126 | | #define COND_FINI(cond) \ |
127 | 0 | if (PyCOND_FINI(&(cond))) { \ |
128 | 0 | Py_FatalError("PyCOND_FINI(" #cond ") failed"); }; |
129 | | #define COND_SIGNAL(cond) \ |
130 | 62.2k | if (PyCOND_SIGNAL(&(cond))) { \ |
131 | 62.2k | Py_FatalError("PyCOND_SIGNAL(" #cond ") failed"); }; |
132 | | #define COND_WAIT(cond, mut) \ |
133 | 0 | if (PyCOND_WAIT(&(cond), &(mut))) { \ |
134 | 0 | Py_FatalError("PyCOND_WAIT(" #cond ") failed"); }; |
135 | | #define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \ |
136 | 0 | { \ |
137 | 0 | int r = PyCOND_TIMEDWAIT(&(cond), &(mut), (microseconds)); \ |
138 | 0 | if (r < 0) \ |
139 | 0 | Py_FatalError("PyCOND_WAIT(" #cond ") failed"); \ |
140 | 0 | if (r) /* 1 == timeout, 2 == impl. can't say, so assume timeout */ \ |
141 | 0 | timeout_result = 1; \ |
142 | 0 | else \ |
143 | 0 | timeout_result = 0; \ |
144 | 0 | } \ |
145 | | |
146 | | |
147 | 16 | #define DEFAULT_INTERVAL 5000 |
148 | | |
149 | | static void _gil_initialize(struct _gil_runtime_state *gil) |
150 | 16 | { |
151 | 16 | gil->locked = -1; |
152 | 16 | gil->interval = DEFAULT_INTERVAL; |
153 | 16 | } |
154 | | |
155 | | static int gil_created(struct _gil_runtime_state *gil) |
156 | 0 | { |
157 | 0 | if (gil == NULL) { |
158 | 0 | return 0; |
159 | 0 | } |
160 | 0 | return (_Py_atomic_load_int_acquire(&gil->locked) >= 0); |
161 | 0 | } |
162 | | |
163 | | static void create_gil(struct _gil_runtime_state *gil) |
164 | 16 | { |
165 | 16 | MUTEX_INIT(gil->mutex); |
166 | 16 | #ifdef FORCE_SWITCHING |
167 | 16 | MUTEX_INIT(gil->switch_mutex); |
168 | 16 | #endif |
169 | 16 | COND_INIT(gil->cond); |
170 | 16 | #ifdef FORCE_SWITCHING |
171 | 16 | COND_INIT(gil->switch_cond); |
172 | 16 | #endif |
173 | 16 | _Py_atomic_store_ptr_relaxed(&gil->last_holder, 0); |
174 | 16 | _Py_ANNOTATE_RWLOCK_CREATE(&gil->locked); |
175 | 16 | _Py_atomic_store_int_release(&gil->locked, 0); |
176 | 16 | } |
177 | | |
178 | | static void destroy_gil(struct _gil_runtime_state *gil) |
179 | 0 | { |
180 | | /* some pthread-like implementations tie the mutex to the cond |
181 | | * and must have the cond destroyed first. |
182 | | */ |
183 | 0 | COND_FINI(gil->cond); |
184 | 0 | MUTEX_FINI(gil->mutex); |
185 | 0 | #ifdef FORCE_SWITCHING |
186 | 0 | COND_FINI(gil->switch_cond); |
187 | 0 | MUTEX_FINI(gil->switch_mutex); |
188 | 0 | #endif |
189 | 0 | _Py_atomic_store_int_release(&gil->locked, -1); |
190 | 0 | _Py_ANNOTATE_RWLOCK_DESTROY(&gil->locked); |
191 | 0 | } |
192 | | |
193 | | #ifdef HAVE_FORK |
194 | | static void recreate_gil(struct _gil_runtime_state *gil) |
195 | 0 | { |
196 | 0 | _Py_ANNOTATE_RWLOCK_DESTROY(&gil->locked); |
197 | | /* XXX should we destroy the old OS resources here? */ |
198 | 0 | create_gil(gil); |
199 | 0 | } |
200 | | #endif |
201 | | |
202 | | static inline void |
203 | | drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil) |
204 | 31.1k | { |
205 | 31.1k | MUTEX_LOCK(gil->mutex); |
206 | 31.1k | _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1); |
207 | 31.1k | _Py_atomic_store_int_relaxed(&gil->locked, 0); |
208 | 31.1k | if (tstate != NULL) { |
209 | 31.1k | tstate->holds_gil = 0; |
210 | 31.1k | } |
211 | 31.1k | COND_SIGNAL(gil->cond); |
212 | 31.1k | MUTEX_UNLOCK(gil->mutex); |
213 | 31.1k | } |
214 | | |
215 | | static void |
216 | | drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release) |
217 | 31.1k | { |
218 | 31.1k | struct _ceval_state *ceval = &interp->ceval; |
219 | | /* If final_release is true, the caller is indicating that we're releasing |
220 | | the GIL for the last time in this thread. This is particularly |
221 | | relevant when the current thread state is finalizing or its |
222 | | interpreter is finalizing (either may be in an inconsistent |
223 | | state). In that case the current thread will definitely |
224 | | never try to acquire the GIL again. */ |
225 | | // XXX It may be more correct to check tstate->_status.finalizing. |
226 | | // XXX assert(final_release || !tstate->_status.cleared); |
227 | | |
228 | 31.1k | assert(final_release || tstate != NULL); |
229 | 31.1k | struct _gil_runtime_state *gil = ceval->gil; |
230 | | #ifdef Py_GIL_DISABLED |
231 | | // Check if we have the GIL before dropping it. tstate will be NULL if |
232 | | // take_gil() detected that this thread has been destroyed, in which case |
233 | | // we know we have the GIL. |
234 | | if (tstate != NULL && !tstate->holds_gil) { |
235 | | return; |
236 | | } |
237 | | #endif |
238 | 31.1k | if (!_Py_atomic_load_int_relaxed(&gil->locked)) { |
239 | 0 | Py_FatalError("drop_gil: GIL is not locked"); |
240 | 0 | } |
241 | | |
242 | 31.1k | if (!final_release) { |
243 | | /* Sub-interpreter support: threads might have been switched |
244 | | under our feet using PyThreadState_Swap(). Fix the GIL last |
245 | | holder variable so that our heuristics work. */ |
246 | 31.1k | _Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate); |
247 | 31.1k | } |
248 | | |
249 | 31.1k | drop_gil_impl(tstate, gil); |
250 | | |
251 | 31.1k | #ifdef FORCE_SWITCHING |
252 | | /* We might be releasing the GIL for the last time in this thread. In that |
253 | | case there's a possible race with tstate->interp getting deleted after |
254 | | gil->mutex is unlocked and before the following code runs, leading to a |
255 | | crash. We can use final_release to indicate the thread is done with the |
256 | | GIL, and that's the only time we might delete the interpreter. See |
257 | | https://github.com/python/cpython/issues/104341. */ |
258 | 31.1k | if (!final_release && |
259 | 31.1k | _Py_eval_breaker_bit_is_set(tstate, _PY_GIL_DROP_REQUEST_BIT)) { |
260 | 0 | MUTEX_LOCK(gil->switch_mutex); |
261 | | /* Not switched yet => wait */ |
262 | 0 | if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate) |
263 | 0 | { |
264 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
265 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); |
266 | | /* NOTE: if COND_WAIT does not atomically start waiting when |
267 | | releasing the mutex, another thread can run through, take |
268 | | the GIL and drop it again, and reset the condition |
269 | | before we even had a chance to wait for it. */ |
270 | 0 | COND_WAIT(gil->switch_cond, gil->switch_mutex); |
271 | 0 | } |
272 | 0 | MUTEX_UNLOCK(gil->switch_mutex); |
273 | 0 | } |
274 | 31.1k | #endif |
275 | 31.1k | } |
276 | | |
277 | | |
278 | | /* Take the GIL. |
279 | | |
280 | | The function saves errno at entry and restores its value at exit. |
281 | | It may hang rather than return if the interpreter has been finalized. |
282 | | |
283 | | tstate must be non-NULL. */ |
284 | | static void |
285 | | take_gil(PyThreadState *tstate) |
286 | 31.1k | { |
287 | 31.1k | int err = errno; |
288 | | |
289 | 31.1k | assert(tstate != NULL); |
290 | | /* We shouldn't be using a thread state that isn't viable any more. */ |
291 | | // XXX It may be more correct to check tstate->_status.finalizing. |
292 | | // XXX assert(!tstate->_status.cleared); |
293 | | |
294 | 31.1k | if (_PyThreadState_MustExit(tstate)) { |
295 | | /* bpo-39877: If Py_Finalize() has been called and tstate is not the |
296 | | thread which called Py_Finalize(), this thread cannot continue. |
297 | | |
298 | | This code path can be reached by a daemon thread after Py_Finalize() |
299 | | completes. |
300 | | |
301 | | This used to call a *thread_exit API, but that was not safe as it |
302 | | lacks stack unwinding and local variable destruction important to |
303 | | C++. gh-87135: The best that can be done is to hang the thread as |
304 | | the public APIs calling this have no error reporting mechanism (!). |
305 | | */ |
306 | 0 | _PyThreadState_HangThread(tstate); |
307 | 0 | } |
308 | | |
309 | 31.1k | assert(_PyThreadState_CheckConsistency(tstate)); |
310 | 31.1k | PyInterpreterState *interp = tstate->interp; |
311 | 31.1k | struct _gil_runtime_state *gil = interp->ceval.gil; |
312 | | #ifdef Py_GIL_DISABLED |
313 | | if (!_Py_atomic_load_int_relaxed(&gil->enabled)) { |
314 | | return; |
315 | | } |
316 | | #endif |
317 | | |
318 | | /* Check that _PyEval_InitThreads() was called to create the lock */ |
319 | 31.1k | assert(gil_created(gil)); |
320 | | |
321 | 31.1k | MUTEX_LOCK(gil->mutex); |
322 | | |
323 | 31.1k | int drop_requested = 0; |
324 | 31.1k | while (_Py_atomic_load_int_relaxed(&gil->locked)) { |
325 | 0 | unsigned long saved_switchnum = gil->switch_number; |
326 | |
|
327 | 0 | unsigned long interval = _Py_atomic_load_ulong_relaxed(&gil->interval); |
328 | 0 | if (interval < 1) { |
329 | 0 | interval = 1; |
330 | 0 | } |
331 | 0 | int timed_out = 0; |
332 | 0 | COND_TIMED_WAIT(gil->cond, gil->mutex, interval, timed_out); |
333 | | |
334 | | /* If we timed out and no switch occurred in the meantime, it is time |
335 | | to ask the GIL-holding thread to drop it. */ |
336 | 0 | if (timed_out && |
337 | 0 | _Py_atomic_load_int_relaxed(&gil->locked) && |
338 | 0 | gil->switch_number == saved_switchnum) |
339 | 0 | { |
340 | 0 | PyThreadState *holder_tstate = |
341 | 0 | (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); |
342 | 0 | if (_PyThreadState_MustExit(tstate)) { |
343 | 0 | MUTEX_UNLOCK(gil->mutex); |
344 | | // gh-96387: If the loop requested a drop request in a previous |
345 | | // iteration, reset the request. Otherwise, drop_gil() can |
346 | | // block forever waiting for the thread which exited. Drop |
347 | | // requests made by other threads are also reset: these threads |
348 | | // may have to request again a drop request (iterate one more |
349 | | // time). |
350 | 0 | if (drop_requested) { |
351 | 0 | _Py_unset_eval_breaker_bit(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); |
352 | 0 | } |
353 | | // gh-87135: hang the thread as *thread_exit() is not a safe |
354 | | // API. It lacks stack unwind and local variable destruction. |
355 | 0 | _PyThreadState_HangThread(tstate); |
356 | 0 | } |
357 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
358 | |
|
359 | 0 | _Py_set_eval_breaker_bit(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); |
360 | 0 | drop_requested = 1; |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | | #ifdef Py_GIL_DISABLED |
365 | | if (!_Py_atomic_load_int_relaxed(&gil->enabled)) { |
366 | | // Another thread disabled the GIL between our check above and |
367 | | // now. Don't take the GIL, signal any other waiting threads, and |
368 | | // return. |
369 | | COND_SIGNAL(gil->cond); |
370 | | MUTEX_UNLOCK(gil->mutex); |
371 | | return; |
372 | | } |
373 | | #endif |
374 | | |
375 | 31.1k | #ifdef FORCE_SWITCHING |
376 | | /* This mutex must be taken before modifying gil->last_holder: |
377 | | see drop_gil(). */ |
378 | 62.2k | MUTEX_LOCK(gil->switch_mutex); |
379 | 62.2k | #endif |
380 | | /* We now hold the GIL */ |
381 | 62.2k | _Py_atomic_store_int_relaxed(&gil->locked, 1); |
382 | 62.2k | _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil->locked, /*is_write=*/1); |
383 | | |
384 | 62.2k | if (tstate != (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) { |
385 | 16 | _Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate); |
386 | 16 | ++gil->switch_number; |
387 | 16 | } |
388 | | |
389 | 62.2k | #ifdef FORCE_SWITCHING |
390 | 62.2k | COND_SIGNAL(gil->switch_cond); |
391 | 31.1k | MUTEX_UNLOCK(gil->switch_mutex); |
392 | 31.1k | #endif |
393 | | |
394 | 31.1k | if (_PyThreadState_MustExit(tstate)) { |
395 | | /* bpo-36475: If Py_Finalize() has been called and tstate is not |
396 | | the thread which called Py_Finalize(), gh-87135: hang the |
397 | | thread. |
398 | | |
399 | | This code path can be reached by a daemon thread which was waiting |
400 | | in take_gil() while the main thread called |
401 | | wait_for_thread_shutdown() from Py_Finalize(). */ |
402 | 0 | MUTEX_UNLOCK(gil->mutex); |
403 | | /* tstate could be a dangling pointer, so don't pass it to |
404 | | drop_gil(). */ |
405 | 0 | drop_gil(interp, NULL, 1); |
406 | 0 | _PyThreadState_HangThread(tstate); |
407 | 0 | } |
408 | 31.1k | assert(_PyThreadState_CheckConsistency(tstate)); |
409 | | |
410 | 31.1k | tstate->holds_gil = 1; |
411 | 31.1k | _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); |
412 | 31.1k | update_eval_breaker_for_thread(interp, tstate); |
413 | | |
414 | 31.1k | MUTEX_UNLOCK(gil->mutex); |
415 | | |
416 | 31.1k | errno = err; |
417 | 31.1k | return; |
418 | 31.1k | } |
419 | | |
420 | | void _PyEval_SetSwitchInterval(unsigned long microseconds) |
421 | 0 | { |
422 | 0 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
423 | 0 | struct _gil_runtime_state *gil = interp->ceval.gil; |
424 | 0 | assert(gil != NULL); |
425 | 0 | _Py_atomic_store_ulong_relaxed(&gil->interval, microseconds); |
426 | 0 | } |
427 | | |
428 | | unsigned long _PyEval_GetSwitchInterval(void) |
429 | 0 | { |
430 | 0 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
431 | 0 | struct _gil_runtime_state *gil = interp->ceval.gil; |
432 | 0 | assert(gil != NULL); |
433 | 0 | return _Py_atomic_load_ulong_relaxed(&gil->interval); |
434 | 0 | } |
435 | | |
436 | | |
437 | | int |
438 | | _PyEval_ThreadsInitialized(void) |
439 | 0 | { |
440 | | /* XXX This is only needed for an assert in PyGILState_Ensure(), |
441 | | * which currently does not work with subinterpreters. |
442 | | * Thus we only use the main interpreter. */ |
443 | 0 | PyInterpreterState *interp = _PyInterpreterState_Main(); |
444 | 0 | if (interp == NULL) { |
445 | 0 | return 0; |
446 | 0 | } |
447 | 0 | struct _gil_runtime_state *gil = interp->ceval.gil; |
448 | 0 | return gil_created(gil); |
449 | 0 | } |
450 | | |
451 | | // Function removed in the Python 3.13 API but kept in the stable ABI. |
452 | | PyAPI_FUNC(int) |
453 | | PyEval_ThreadsInitialized(void) |
454 | 0 | { |
455 | 0 | return _PyEval_ThreadsInitialized(); |
456 | 0 | } |
457 | | |
458 | | #ifndef NDEBUG |
459 | | static inline int |
460 | | current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate) |
461 | | { |
462 | | int holds_gil = tstate->holds_gil; |
463 | | |
464 | | // holds_gil is the source of truth; check that last_holder and gil->locked |
465 | | // are consistent with it. |
466 | | int locked = _Py_atomic_load_int_relaxed(&gil->locked); |
467 | | int is_last_holder = |
468 | | ((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate; |
469 | | assert(!holds_gil || locked); |
470 | | assert(!holds_gil || is_last_holder); |
471 | | |
472 | | return holds_gil; |
473 | | } |
474 | | #endif |
475 | | |
476 | | static void |
477 | | init_shared_gil(PyInterpreterState *interp, struct _gil_runtime_state *gil) |
478 | 0 | { |
479 | 0 | assert(gil_created(gil)); |
480 | 0 | interp->ceval.gil = gil; |
481 | 0 | interp->ceval.own_gil = 0; |
482 | 0 | } |
483 | | |
484 | | static void |
485 | | init_own_gil(PyInterpreterState *interp, struct _gil_runtime_state *gil) |
486 | 16 | { |
487 | 16 | assert(!gil_created(gil)); |
488 | | #ifdef Py_GIL_DISABLED |
489 | | const PyConfig *config = _PyInterpreterState_GetConfig(interp); |
490 | | gil->enabled = config->enable_gil == _PyConfig_GIL_ENABLE ? INT_MAX : 0; |
491 | | #endif |
492 | 16 | create_gil(gil); |
493 | 16 | assert(gil_created(gil)); |
494 | 16 | interp->ceval.gil = gil; |
495 | 16 | interp->ceval.own_gil = 1; |
496 | 16 | } |
497 | | |
498 | | void |
499 | | _PyEval_InitGIL(PyThreadState *tstate, int own_gil) |
500 | 16 | { |
501 | 16 | assert(tstate->interp->ceval.gil == NULL); |
502 | 16 | if (!own_gil) { |
503 | | /* The interpreter will share the main interpreter's instead. */ |
504 | 0 | PyInterpreterState *main_interp = _PyInterpreterState_Main(); |
505 | 0 | assert(tstate->interp != main_interp); |
506 | 0 | struct _gil_runtime_state *gil = main_interp->ceval.gil; |
507 | 0 | init_shared_gil(tstate->interp, gil); |
508 | 0 | assert(!current_thread_holds_gil(gil, tstate)); |
509 | 0 | } |
510 | 16 | else { |
511 | 16 | PyThread_init_thread(); |
512 | 16 | init_own_gil(tstate->interp, &tstate->interp->_gil); |
513 | 16 | } |
514 | | |
515 | | // Lock the GIL and mark the current thread as attached. |
516 | 16 | _PyThreadState_Attach(tstate); |
517 | 16 | } |
518 | | |
519 | | void |
520 | | _PyEval_FiniGIL(PyInterpreterState *interp) |
521 | 16 | { |
522 | 16 | struct _gil_runtime_state *gil = interp->ceval.gil; |
523 | 16 | if (gil == NULL) { |
524 | | /* It was already finalized (or hasn't been initialized yet). */ |
525 | 16 | assert(!interp->ceval.own_gil); |
526 | 16 | return; |
527 | 16 | } |
528 | 0 | else if (!interp->ceval.own_gil) { |
529 | | #ifdef Py_DEBUG |
530 | | PyInterpreterState *main_interp = _PyInterpreterState_Main(); |
531 | | assert(main_interp != NULL && interp != main_interp); |
532 | | assert(interp->ceval.gil == main_interp->ceval.gil); |
533 | | #endif |
534 | 0 | interp->ceval.gil = NULL; |
535 | 0 | return; |
536 | 0 | } |
537 | | |
538 | 0 | if (!gil_created(gil)) { |
539 | | /* First Py_InitializeFromConfig() call: the GIL doesn't exist |
540 | | yet: do nothing. */ |
541 | 0 | return; |
542 | 0 | } |
543 | | |
544 | 0 | destroy_gil(gil); |
545 | 0 | assert(!gil_created(gil)); |
546 | 0 | interp->ceval.gil = NULL; |
547 | 0 | } |
548 | | |
549 | | void |
550 | | PyEval_InitThreads(void) |
551 | 0 | { |
552 | | /* Do nothing: kept for backward compatibility */ |
553 | 0 | } |
554 | | |
555 | | void |
556 | | _PyEval_Fini(void) |
557 | 0 | { |
558 | | #ifdef Py_STATS |
559 | | _Py_PrintSpecializationStats(1); |
560 | | #endif |
561 | 0 | } |
562 | | |
563 | | // Function removed in the Python 3.13 API but kept in the stable ABI. |
564 | | PyAPI_FUNC(void) |
565 | | PyEval_AcquireLock(void) |
566 | 0 | { |
567 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
568 | 0 | _Py_EnsureTstateNotNULL(tstate); |
569 | |
|
570 | 0 | take_gil(tstate); |
571 | 0 | } |
572 | | |
573 | | // Function removed in the Python 3.13 API but kept in the stable ABI. |
574 | | PyAPI_FUNC(void) |
575 | | PyEval_ReleaseLock(void) |
576 | 0 | { |
577 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
578 | | /* This function must succeed when the current thread state is NULL. |
579 | | We therefore avoid PyThreadState_Get() which dumps a fatal error |
580 | | in debug mode. */ |
581 | 0 | drop_gil(tstate->interp, tstate, 0); |
582 | 0 | } |
583 | | |
584 | | void |
585 | | _PyEval_AcquireLock(PyThreadState *tstate) |
586 | 31.1k | { |
587 | 31.1k | _Py_EnsureTstateNotNULL(tstate); |
588 | 31.1k | take_gil(tstate); |
589 | 31.1k | } |
590 | | |
591 | | void |
592 | | _PyEval_ReleaseLock(PyInterpreterState *interp, |
593 | | PyThreadState *tstate, |
594 | | int final_release) |
595 | 31.1k | { |
596 | 31.1k | assert(tstate != NULL); |
597 | 31.1k | assert(tstate->interp == interp); |
598 | 31.1k | drop_gil(interp, tstate, final_release); |
599 | 31.1k | } |
600 | | |
601 | | void |
602 | | PyEval_AcquireThread(PyThreadState *tstate) |
603 | 0 | { |
604 | 0 | _Py_EnsureTstateNotNULL(tstate); |
605 | 0 | _PyThreadState_Attach(tstate); |
606 | 0 | } |
607 | | |
608 | | void |
609 | | PyEval_ReleaseThread(PyThreadState *tstate) |
610 | 0 | { |
611 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
612 | 0 | _PyThreadState_Detach(tstate); |
613 | 0 | } |
614 | | |
615 | | #ifdef HAVE_FORK |
616 | | /* This function is called from PyOS_AfterFork_Child to re-initialize the |
617 | | GIL and pending calls lock. */ |
618 | | PyStatus |
619 | | _PyEval_ReInitThreads(PyThreadState *tstate) |
620 | 0 | { |
621 | 0 | assert(tstate->interp == _PyInterpreterState_Main()); |
622 | |
|
623 | 0 | struct _gil_runtime_state *gil = tstate->interp->ceval.gil; |
624 | 0 | if (!gil_created(gil)) { |
625 | 0 | return _PyStatus_OK(); |
626 | 0 | } |
627 | 0 | recreate_gil(gil); |
628 | |
|
629 | 0 | take_gil(tstate); |
630 | |
|
631 | 0 | struct _pending_calls *pending = &tstate->interp->ceval.pending; |
632 | 0 | _PyMutex_at_fork_reinit(&pending->mutex); |
633 | |
|
634 | 0 | return _PyStatus_OK(); |
635 | 0 | } |
636 | | #endif |
637 | | |
638 | | PyThreadState * |
639 | | PyEval_SaveThread(void) |
640 | 31.1k | { |
641 | 31.1k | PyThreadState *tstate = _PyThreadState_GET(); |
642 | 31.1k | _PyThreadState_Detach(tstate); |
643 | 31.1k | return tstate; |
644 | 31.1k | } |
645 | | |
646 | | void |
647 | | PyEval_RestoreThread(PyThreadState *tstate) |
648 | 31.1k | { |
649 | | #ifdef MS_WINDOWS |
650 | | int err = GetLastError(); |
651 | | #endif |
652 | | |
653 | 31.1k | _Py_EnsureTstateNotNULL(tstate); |
654 | 31.1k | _PyThreadState_Attach(tstate); |
655 | | |
656 | | #ifdef MS_WINDOWS |
657 | | SetLastError(err); |
658 | | #endif |
659 | 31.1k | } |
660 | | |
661 | | |
662 | | void |
663 | | _PyEval_SignalReceived(void) |
664 | 0 | { |
665 | 0 | _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_SIGNALS_PENDING_BIT); |
666 | 0 | } |
667 | | |
668 | | |
669 | | #ifndef Py_GIL_DISABLED |
670 | | static void |
671 | | signal_active_thread(PyInterpreterState *interp, uintptr_t bit) |
672 | 0 | { |
673 | 0 | struct _gil_runtime_state *gil = interp->ceval.gil; |
674 | | |
675 | | // If a thread from the targeted interpreter is holding the GIL, signal |
676 | | // that thread. Otherwise, the next thread to run from the targeted |
677 | | // interpreter will have its bit set as part of taking the GIL. |
678 | 0 | MUTEX_LOCK(gil->mutex); |
679 | 0 | if (_Py_atomic_load_int_relaxed(&gil->locked)) { |
680 | 0 | PyThreadState *holder = (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); |
681 | 0 | if (holder->interp == interp) { |
682 | 0 | _Py_set_eval_breaker_bit(holder, bit); |
683 | 0 | } |
684 | 0 | } |
685 | 0 | MUTEX_UNLOCK(gil->mutex); |
686 | 0 | } |
687 | | #endif |
688 | | |
689 | | |
690 | | /* Mechanism whereby asynchronously executing callbacks (e.g. UNIX |
691 | | signal handlers or Mac I/O completion routines) can schedule calls |
692 | | to a function to be called synchronously. |
693 | | The synchronous function is called with one void* argument. |
694 | | It should return 0 for success or -1 for failure -- failure should |
695 | | be accompanied by an exception. |
696 | | |
697 | | If registry succeeds, the registry function returns 0; if it fails |
698 | | (e.g. due to too many pending calls) it returns -1 (without setting |
699 | | an exception condition). |
700 | | |
701 | | Note that because registry may occur from within signal handlers, |
702 | | or other asynchronous events, calling malloc() is unsafe! |
703 | | |
704 | | Any thread can schedule pending calls, but only the main thread |
705 | | will execute them. |
706 | | There is no facility to schedule calls to a particular thread, but |
707 | | that should be easy to change, should that ever be required. In |
708 | | that case, the static variables here should go into the python |
709 | | threadstate. |
710 | | */ |
711 | | |
712 | | /* Push one item onto the queue while holding the lock. */ |
713 | | static int |
714 | | _push_pending_call(struct _pending_calls *pending, |
715 | | _Py_pending_call_func func, void *arg, int flags) |
716 | 0 | { |
717 | 0 | if (pending->npending == pending->max) { |
718 | 0 | return _Py_ADD_PENDING_FULL; |
719 | 0 | } |
720 | 0 | assert(pending->npending < pending->max); |
721 | |
|
722 | 0 | int i = pending->next; |
723 | 0 | assert(pending->calls[i].func == NULL); |
724 | |
|
725 | 0 | pending->calls[i].func = func; |
726 | 0 | pending->calls[i].arg = arg; |
727 | 0 | pending->calls[i].flags = flags; |
728 | |
|
729 | 0 | assert(pending->npending < PENDINGCALLSARRAYSIZE); |
730 | 0 | _Py_atomic_add_int32(&pending->npending, 1); |
731 | |
|
732 | 0 | pending->next = (i + 1) % PENDINGCALLSARRAYSIZE; |
733 | 0 | assert(pending->next != pending->first |
734 | 0 | || pending->npending == pending->max); |
735 | |
|
736 | 0 | return _Py_ADD_PENDING_SUCCESS; |
737 | 0 | } |
738 | | |
739 | | static int |
740 | | _next_pending_call(struct _pending_calls *pending, |
741 | | int (**func)(void *), void **arg, int *flags) |
742 | 0 | { |
743 | 0 | int i = pending->first; |
744 | 0 | if (pending->npending == 0) { |
745 | | /* Queue empty */ |
746 | 0 | assert(i == pending->next); |
747 | 0 | assert(pending->calls[i].func == NULL); |
748 | 0 | return -1; |
749 | 0 | } |
750 | 0 | *func = pending->calls[i].func; |
751 | 0 | *arg = pending->calls[i].arg; |
752 | 0 | *flags = pending->calls[i].flags; |
753 | 0 | return i; |
754 | 0 | } |
755 | | |
756 | | /* Pop one item off the queue while holding the lock. */ |
757 | | static void |
758 | | _pop_pending_call(struct _pending_calls *pending, |
759 | | int (**func)(void *), void **arg, int *flags) |
760 | 0 | { |
761 | 0 | int i = _next_pending_call(pending, func, arg, flags); |
762 | 0 | if (i >= 0) { |
763 | 0 | pending->calls[i] = (struct _pending_call){0}; |
764 | 0 | pending->first = (i + 1) % PENDINGCALLSARRAYSIZE; |
765 | 0 | assert(pending->npending > 0); |
766 | 0 | _Py_atomic_add_int32(&pending->npending, -1); |
767 | 0 | } |
768 | 0 | } |
769 | | |
770 | | /* This implementation is thread-safe. It allows |
771 | | scheduling to be made from any thread, and even from an executing |
772 | | callback. |
773 | | */ |
774 | | |
775 | | _Py_add_pending_call_result |
776 | | _PyEval_AddPendingCall(PyInterpreterState *interp, |
777 | | _Py_pending_call_func func, void *arg, int flags) |
778 | 0 | { |
779 | 0 | struct _pending_calls *pending = &interp->ceval.pending; |
780 | 0 | int main_only = (flags & _Py_PENDING_MAINTHREADONLY) != 0; |
781 | 0 | if (main_only) { |
782 | | /* The main thread only exists in the main interpreter. */ |
783 | 0 | assert(_Py_IsMainInterpreter(interp)); |
784 | 0 | pending = &_PyRuntime.ceval.pending_mainthread; |
785 | 0 | } |
786 | |
|
787 | 0 | PyMutex_Lock(&pending->mutex); |
788 | 0 | _Py_add_pending_call_result result = |
789 | 0 | _push_pending_call(pending, func, arg, flags); |
790 | 0 | PyMutex_Unlock(&pending->mutex); |
791 | |
|
792 | 0 | if (main_only) { |
793 | 0 | _Py_set_eval_breaker_bit(_PyRuntime.main_tstate, _PY_CALLS_TO_DO_BIT); |
794 | 0 | } |
795 | 0 | else { |
796 | | #ifdef Py_GIL_DISABLED |
797 | | _Py_set_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); |
798 | | #else |
799 | 0 | signal_active_thread(interp, _PY_CALLS_TO_DO_BIT); |
800 | 0 | #endif |
801 | 0 | } |
802 | |
|
803 | 0 | return result; |
804 | 0 | } |
805 | | |
806 | | int |
807 | | Py_AddPendingCall(_Py_pending_call_func func, void *arg) |
808 | 0 | { |
809 | | /* Legacy users of this API will continue to target the main thread |
810 | | (of the main interpreter). */ |
811 | 0 | PyInterpreterState *interp = _PyInterpreterState_Main(); |
812 | 0 | _Py_add_pending_call_result r = |
813 | 0 | _PyEval_AddPendingCall(interp, func, arg, _Py_PENDING_MAINTHREADONLY); |
814 | 0 | if (r == _Py_ADD_PENDING_FULL) { |
815 | 0 | return -1; |
816 | 0 | } |
817 | 0 | else { |
818 | 0 | assert(r == _Py_ADD_PENDING_SUCCESS); |
819 | 0 | return 0; |
820 | 0 | } |
821 | 0 | } |
822 | | |
823 | | static int |
824 | | handle_signals(PyThreadState *tstate) |
825 | 0 | { |
826 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
827 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_SIGNALS_PENDING_BIT); |
828 | 0 | if (!_Py_ThreadCanHandleSignals(tstate->interp)) { |
829 | 0 | return 0; |
830 | 0 | } |
831 | 0 | if (_PyErr_CheckSignalsTstate(tstate) < 0) { |
832 | | /* On failure, re-schedule a call to handle_signals(). */ |
833 | 0 | _Py_set_eval_breaker_bit(tstate, _PY_SIGNALS_PENDING_BIT); |
834 | 0 | return -1; |
835 | 0 | } |
836 | 0 | return 0; |
837 | 0 | } |
838 | | |
839 | | static int |
840 | | _make_pending_calls(struct _pending_calls *pending, int32_t *p_npending) |
841 | 0 | { |
842 | 0 | int res = 0; |
843 | 0 | int32_t npending = -1; |
844 | |
|
845 | 0 | assert(sizeof(pending->max) <= sizeof(size_t) |
846 | 0 | && ((size_t)pending->max) <= Py_ARRAY_LENGTH(pending->calls)); |
847 | 0 | int32_t maxloop = pending->maxloop; |
848 | 0 | if (maxloop == 0) { |
849 | 0 | maxloop = pending->max; |
850 | 0 | } |
851 | 0 | assert(maxloop > 0 && maxloop <= pending->max); |
852 | | |
853 | | /* perform a bounded number of calls, in case of recursion */ |
854 | 0 | for (int i=0; i<maxloop; i++) { |
855 | 0 | _Py_pending_call_func func = NULL; |
856 | 0 | void *arg = NULL; |
857 | 0 | int flags = 0; |
858 | | |
859 | | /* pop one item off the queue while holding the lock */ |
860 | 0 | PyMutex_Lock(&pending->mutex); |
861 | 0 | _pop_pending_call(pending, &func, &arg, &flags); |
862 | 0 | npending = pending->npending; |
863 | 0 | PyMutex_Unlock(&pending->mutex); |
864 | | |
865 | | /* Check if there are any more pending calls. */ |
866 | 0 | if (func == NULL) { |
867 | 0 | assert(npending == 0); |
868 | 0 | break; |
869 | 0 | } |
870 | | |
871 | | /* having released the lock, perform the callback */ |
872 | 0 | res = func(arg); |
873 | 0 | if ((flags & _Py_PENDING_RAWFREE) && arg != NULL) { |
874 | 0 | PyMem_RawFree(arg); |
875 | 0 | } |
876 | 0 | if (res != 0) { |
877 | 0 | res = -1; |
878 | 0 | goto finally; |
879 | 0 | } |
880 | 0 | } |
881 | | |
882 | 0 | finally: |
883 | 0 | *p_npending = npending; |
884 | 0 | return res; |
885 | 0 | } |
886 | | |
887 | | static void |
888 | | signal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) |
889 | 0 | { |
890 | | #ifdef Py_GIL_DISABLED |
891 | | _Py_set_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); |
892 | | #else |
893 | 0 | _Py_set_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); |
894 | 0 | #endif |
895 | 0 | } |
896 | | |
897 | | static void |
898 | | unsignal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) |
899 | 0 | { |
900 | | #ifdef Py_GIL_DISABLED |
901 | | _Py_unset_eval_breaker_bit_all(interp, _PY_CALLS_TO_DO_BIT); |
902 | | #else |
903 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); |
904 | 0 | #endif |
905 | 0 | } |
906 | | |
907 | | static void |
908 | | clear_pending_handling_thread(struct _pending_calls *pending) |
909 | 0 | { |
910 | | #ifdef Py_GIL_DISABLED |
911 | | PyMutex_Lock(&pending->mutex); |
912 | | pending->handling_thread = NULL; |
913 | | PyMutex_Unlock(&pending->mutex); |
914 | | #else |
915 | 0 | pending->handling_thread = NULL; |
916 | 0 | #endif |
917 | 0 | } |
918 | | |
919 | | static int |
920 | | make_pending_calls(PyThreadState *tstate) |
921 | 0 | { |
922 | 0 | PyInterpreterState *interp = tstate->interp; |
923 | 0 | struct _pending_calls *pending = &interp->ceval.pending; |
924 | 0 | struct _pending_calls *pending_main = &_PyRuntime.ceval.pending_mainthread; |
925 | | |
926 | | /* Only one thread (per interpreter) may run the pending calls |
927 | | at once. In the same way, we don't do recursive pending calls. */ |
928 | 0 | PyMutex_Lock(&pending->mutex); |
929 | 0 | if (pending->handling_thread != NULL) { |
930 | | /* A pending call was added after another thread was already |
931 | | handling the pending calls (and had already "unsignaled"). |
932 | | Once that thread is done, it may have taken care of all the |
933 | | pending calls, or there might be some still waiting. |
934 | | To avoid all threads constantly stopping on the eval breaker, |
935 | | we clear the bit for this thread and make sure it is set |
936 | | for the thread currently handling the pending call. */ |
937 | 0 | _Py_set_eval_breaker_bit(pending->handling_thread, _PY_CALLS_TO_DO_BIT); |
938 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_CALLS_TO_DO_BIT); |
939 | 0 | PyMutex_Unlock(&pending->mutex); |
940 | 0 | return 0; |
941 | 0 | } |
942 | 0 | pending->handling_thread = tstate; |
943 | 0 | PyMutex_Unlock(&pending->mutex); |
944 | | |
945 | | /* unsignal before starting to call callbacks, so that any callback |
946 | | added in-between re-signals */ |
947 | 0 | unsignal_pending_calls(tstate, interp); |
948 | |
|
949 | 0 | int32_t npending; |
950 | 0 | if (_make_pending_calls(pending, &npending) != 0) { |
951 | 0 | clear_pending_handling_thread(pending); |
952 | | /* There might not be more calls to make, but we play it safe. */ |
953 | 0 | signal_pending_calls(tstate, interp); |
954 | 0 | return -1; |
955 | 0 | } |
956 | 0 | if (npending > 0) { |
957 | | /* We hit pending->maxloop. */ |
958 | 0 | signal_pending_calls(tstate, interp); |
959 | 0 | } |
960 | |
|
961 | 0 | if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) { |
962 | 0 | if (_make_pending_calls(pending_main, &npending) != 0) { |
963 | 0 | clear_pending_handling_thread(pending); |
964 | | /* There might not be more calls to make, but we play it safe. */ |
965 | 0 | signal_pending_calls(tstate, interp); |
966 | 0 | return -1; |
967 | 0 | } |
968 | 0 | if (npending > 0) { |
969 | | /* We hit pending_main->maxloop. */ |
970 | 0 | signal_pending_calls(tstate, interp); |
971 | 0 | } |
972 | 0 | } |
973 | | |
974 | 0 | clear_pending_handling_thread(pending); |
975 | 0 | return 0; |
976 | 0 | } |
977 | | |
978 | | |
979 | | void |
980 | | _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) |
981 | 0 | { |
982 | 0 | _Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) { |
983 | 0 | _Py_set_eval_breaker_bit(tstate, bit); |
984 | 0 | } |
985 | 0 | _Py_FOR_EACH_TSTATE_END(interp); |
986 | 0 | } |
987 | | |
988 | | void |
989 | | _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) |
990 | 0 | { |
991 | 0 | _Py_FOR_EACH_TSTATE_BEGIN(interp, tstate) { |
992 | 0 | _Py_unset_eval_breaker_bit(tstate, bit); |
993 | 0 | } |
994 | 0 | _Py_FOR_EACH_TSTATE_END(interp); |
995 | 0 | } |
996 | | |
997 | | void |
998 | | _Py_FinishPendingCalls(PyThreadState *tstate) |
999 | 0 | { |
1000 | 0 | _Py_AssertHoldsTstate(); |
1001 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
1002 | |
|
1003 | 0 | struct _pending_calls *pending = &tstate->interp->ceval.pending; |
1004 | 0 | struct _pending_calls *pending_main = |
1005 | 0 | _Py_IsMainThread() && _Py_IsMainInterpreter(tstate->interp) |
1006 | 0 | ? &_PyRuntime.ceval.pending_mainthread |
1007 | 0 | : NULL; |
1008 | | /* make_pending_calls() may return early without making all pending |
1009 | | calls, so we keep trying until we're actually done. */ |
1010 | 0 | int32_t npending; |
1011 | | #ifndef NDEBUG |
1012 | | int32_t npending_prev = INT32_MAX; |
1013 | | #endif |
1014 | 0 | do { |
1015 | 0 | if (make_pending_calls(tstate) < 0) { |
1016 | 0 | PyObject *exc = _PyErr_GetRaisedException(tstate); |
1017 | 0 | PyErr_BadInternalCall(); |
1018 | 0 | _PyErr_ChainExceptions1(exc); |
1019 | 0 | _PyErr_Print(tstate); |
1020 | 0 | } |
1021 | |
|
1022 | 0 | npending = _Py_atomic_load_int32_relaxed(&pending->npending); |
1023 | 0 | if (pending_main != NULL) { |
1024 | 0 | npending += _Py_atomic_load_int32_relaxed(&pending_main->npending); |
1025 | 0 | } |
1026 | | #ifndef NDEBUG |
1027 | | assert(npending_prev > npending); |
1028 | | npending_prev = npending; |
1029 | | #endif |
1030 | 0 | } while (npending > 0); |
1031 | 0 | } |
1032 | | |
1033 | | int |
1034 | | _PyEval_MakePendingCalls(PyThreadState *tstate) |
1035 | 0 | { |
1036 | 0 | int res; |
1037 | |
|
1038 | 0 | if (_Py_IsMainThread() && _Py_IsMainInterpreter(tstate->interp)) { |
1039 | | /* Python signal handler doesn't really queue a callback: |
1040 | | it only signals that a signal was received, |
1041 | | see _PyEval_SignalReceived(). */ |
1042 | 0 | res = handle_signals(tstate); |
1043 | 0 | if (res != 0) { |
1044 | 0 | return res; |
1045 | 0 | } |
1046 | 0 | } |
1047 | | |
1048 | 0 | res = make_pending_calls(tstate); |
1049 | 0 | if (res != 0) { |
1050 | 0 | return res; |
1051 | 0 | } |
1052 | | |
1053 | 0 | return 0; |
1054 | 0 | } |
1055 | | |
1056 | | /* Py_MakePendingCalls() is a simple wrapper for the sake |
1057 | | of backward-compatibility. */ |
1058 | | int |
1059 | | Py_MakePendingCalls(void) |
1060 | 0 | { |
1061 | 0 | _Py_AssertHoldsTstate(); |
1062 | |
|
1063 | 0 | PyThreadState *tstate = _PyThreadState_GET(); |
1064 | 0 | assert(_PyThreadState_CheckConsistency(tstate)); |
1065 | | |
1066 | | /* Only execute pending calls on the main thread. */ |
1067 | 0 | if (!_Py_IsMainThread() || !_Py_IsMainInterpreter(tstate->interp)) { |
1068 | 0 | return 0; |
1069 | 0 | } |
1070 | 0 | return _PyEval_MakePendingCalls(tstate); |
1071 | 0 | } |
1072 | | |
1073 | | void |
1074 | | _PyEval_InitState(PyInterpreterState *interp) |
1075 | 16 | { |
1076 | 16 | _gil_initialize(&interp->_gil); |
1077 | 16 | } |
1078 | | |
1079 | | #ifdef Py_GIL_DISABLED |
1080 | | int |
1081 | | _PyEval_EnableGILTransient(PyThreadState *tstate) |
1082 | | { |
1083 | | const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); |
1084 | | if (config->enable_gil != _PyConfig_GIL_DEFAULT) { |
1085 | | return 0; |
1086 | | } |
1087 | | struct _gil_runtime_state *gil = tstate->interp->ceval.gil; |
1088 | | |
1089 | | int enabled = _Py_atomic_load_int_relaxed(&gil->enabled); |
1090 | | if (enabled == INT_MAX) { |
1091 | | // The GIL is already enabled permanently. |
1092 | | return 0; |
1093 | | } |
1094 | | if (enabled == INT_MAX - 1) { |
1095 | | Py_FatalError("Too many transient requests to enable the GIL"); |
1096 | | } |
1097 | | if (enabled > 0) { |
1098 | | // If enabled is nonzero, we know we hold the GIL. This means that no |
1099 | | // other threads are attached, and nobody else can be concurrently |
1100 | | // mutating it. |
1101 | | _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1); |
1102 | | return 0; |
1103 | | } |
1104 | | |
1105 | | // Enabling the GIL changes what it means to be an "attached" thread. To |
1106 | | // safely make this transition, we: |
1107 | | // 1. Detach the current thread. |
1108 | | // 2. Stop the world to detach (and suspend) all other threads. |
1109 | | // 3. Enable the GIL, if nobody else did between our check above and when |
1110 | | // our stop-the-world begins. |
1111 | | // 4. Start the world. |
1112 | | // 5. Attach the current thread. Other threads may attach and hold the GIL |
1113 | | // before this thread, which is harmless. |
1114 | | _PyThreadState_Detach(tstate); |
1115 | | |
1116 | | // This could be an interpreter-local stop-the-world in situations where we |
1117 | | // know that this interpreter's GIL is not shared, and that it won't become |
1118 | | // shared before the stop-the-world begins. For now, we always stop all |
1119 | | // interpreters for simplicity. |
1120 | | _PyEval_StopTheWorldAll(&_PyRuntime); |
1121 | | |
1122 | | enabled = _Py_atomic_load_int_relaxed(&gil->enabled); |
1123 | | int this_thread_enabled = enabled == 0; |
1124 | | _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1); |
1125 | | |
1126 | | _PyEval_StartTheWorldAll(&_PyRuntime); |
1127 | | _PyThreadState_Attach(tstate); |
1128 | | |
1129 | | return this_thread_enabled; |
1130 | | } |
1131 | | |
1132 | | int |
1133 | | _PyEval_EnableGILPermanent(PyThreadState *tstate) |
1134 | | { |
1135 | | const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); |
1136 | | if (config->enable_gil != _PyConfig_GIL_DEFAULT) { |
1137 | | return 0; |
1138 | | } |
1139 | | |
1140 | | struct _gil_runtime_state *gil = tstate->interp->ceval.gil; |
1141 | | assert(current_thread_holds_gil(gil, tstate)); |
1142 | | |
1143 | | int enabled = _Py_atomic_load_int_relaxed(&gil->enabled); |
1144 | | if (enabled == INT_MAX) { |
1145 | | return 0; |
1146 | | } |
1147 | | |
1148 | | _Py_atomic_store_int_relaxed(&gil->enabled, INT_MAX); |
1149 | | return 1; |
1150 | | } |
1151 | | |
1152 | | int |
1153 | | _PyEval_DisableGIL(PyThreadState *tstate) |
1154 | | { |
1155 | | const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); |
1156 | | if (config->enable_gil != _PyConfig_GIL_DEFAULT) { |
1157 | | return 0; |
1158 | | } |
1159 | | |
1160 | | struct _gil_runtime_state *gil = tstate->interp->ceval.gil; |
1161 | | assert(current_thread_holds_gil(gil, tstate)); |
1162 | | |
1163 | | int enabled = _Py_atomic_load_int_relaxed(&gil->enabled); |
1164 | | if (enabled == INT_MAX) { |
1165 | | return 0; |
1166 | | } |
1167 | | |
1168 | | assert(enabled >= 1); |
1169 | | enabled--; |
1170 | | |
1171 | | // Disabling the GIL is much simpler than enabling it, since we know we are |
1172 | | // the only attached thread. Other threads may start free-threading as soon |
1173 | | // as this store is complete, if it sets gil->enabled to 0. |
1174 | | _Py_atomic_store_int_relaxed(&gil->enabled, enabled); |
1175 | | |
1176 | | if (enabled == 0) { |
1177 | | // We're attached, so we know the GIL will remain disabled until at |
1178 | | // least the next time we detach, which must be after this function |
1179 | | // returns. |
1180 | | // |
1181 | | // Drop the GIL, which will wake up any threads waiting in take_gil() |
1182 | | // and let them resume execution without the GIL. |
1183 | | drop_gil_impl(tstate, gil); |
1184 | | |
1185 | | // If another thread asked us to drop the GIL, they should be |
1186 | | // free-threading by now. Remove any such request so we have a clean |
1187 | | // slate if/when the GIL is enabled again. |
1188 | | _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); |
1189 | | return 1; |
1190 | | } |
1191 | | return 0; |
1192 | | } |
1193 | | #endif |
1194 | | |
1195 | | #if defined(Py_REMOTE_DEBUG) && defined(Py_SUPPORTS_REMOTE_DEBUG) |
1196 | | // Note that this function is inline to avoid creating a PLT entry |
1197 | | // that would be an easy target for a ROP gadget. |
1198 | | static inline int run_remote_debugger_source(PyObject *source) |
1199 | 0 | { |
1200 | 0 | const char *str = PyBytes_AsString(source); |
1201 | 0 | if (!str) { |
1202 | 0 | return -1; |
1203 | 0 | } |
1204 | | |
1205 | 0 | PyObject *ns = PyDict_New(); |
1206 | 0 | if (!ns) { |
1207 | 0 | return -1; |
1208 | 0 | } |
1209 | | |
1210 | 0 | PyObject *res = PyRun_String(str, Py_file_input, ns, ns); |
1211 | 0 | Py_DECREF(ns); |
1212 | 0 | if (!res) { |
1213 | 0 | return -1; |
1214 | 0 | } |
1215 | 0 | Py_DECREF(res); |
1216 | 0 | return 0; |
1217 | 0 | } |
1218 | | |
1219 | | // Note that this function is inline to avoid creating a PLT entry |
1220 | | // that would be an easy target for a ROP gadget. |
1221 | | static inline void run_remote_debugger_script(PyObject *path) |
1222 | 0 | { |
1223 | 0 | if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) { |
1224 | 0 | PyErr_FormatUnraisable( |
1225 | 0 | "Audit hook failed for remote debugger script %U", path); |
1226 | 0 | return; |
1227 | 0 | } |
1228 | | |
1229 | | // Open the debugger script with the open code hook, and reopen the |
1230 | | // resulting file object to get a C FILE* object. |
1231 | 0 | PyObject* fileobj = PyFile_OpenCodeObject(path); |
1232 | 0 | if (!fileobj) { |
1233 | 0 | PyErr_FormatUnraisable("Can't open debugger script %U", path); |
1234 | 0 | return; |
1235 | 0 | } |
1236 | | |
1237 | 0 | PyObject* source = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(read)); |
1238 | 0 | if (!source) { |
1239 | 0 | PyErr_FormatUnraisable("Error reading debugger script %U", path); |
1240 | 0 | } |
1241 | |
|
1242 | 0 | PyObject* res = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(close)); |
1243 | 0 | if (!res) { |
1244 | 0 | PyErr_FormatUnraisable("Error closing debugger script %U", path); |
1245 | 0 | } else { |
1246 | 0 | Py_DECREF(res); |
1247 | 0 | } |
1248 | 0 | Py_DECREF(fileobj); |
1249 | |
|
1250 | 0 | if (source) { |
1251 | 0 | if (0 != run_remote_debugger_source(source)) { |
1252 | 0 | PyErr_FormatUnraisable("Error executing debugger script %U", path); |
1253 | 0 | } |
1254 | 0 | Py_DECREF(source); |
1255 | 0 | } |
1256 | 0 | } |
1257 | | |
1258 | | int _PyRunRemoteDebugger(PyThreadState *tstate) |
1259 | 114M | { |
1260 | 114M | const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); |
1261 | 114M | if (config->remote_debug == 1 |
1262 | 114M | && tstate->remote_debugger_support.debugger_pending_call == 1) |
1263 | 0 | { |
1264 | 0 | tstate->remote_debugger_support.debugger_pending_call = 0; |
1265 | | |
1266 | | // Immediately make a copy in case of a race with another debugger |
1267 | | // process that's trying to write to the buffer. At least this way |
1268 | | // we'll be internally consistent: what we audit is what we run. |
1269 | 0 | const size_t pathsz |
1270 | 0 | = sizeof(tstate->remote_debugger_support.debugger_script_path); |
1271 | |
|
1272 | 0 | char *path = PyMem_Malloc(pathsz); |
1273 | 0 | if (path) { |
1274 | | // And don't assume the debugger correctly null terminated it. |
1275 | 0 | memcpy( |
1276 | 0 | path, |
1277 | 0 | tstate->remote_debugger_support.debugger_script_path, |
1278 | 0 | pathsz); |
1279 | 0 | path[pathsz - 1] = '\0'; |
1280 | 0 | if (*path) { |
1281 | 0 | PyObject *path_obj = PyUnicode_DecodeFSDefault(path); |
1282 | 0 | if (path_obj == NULL) { |
1283 | 0 | PyErr_FormatUnraisable("Can't decode debugger script"); |
1284 | 0 | } |
1285 | 0 | else { |
1286 | 0 | run_remote_debugger_script(path_obj); |
1287 | 0 | Py_DECREF(path_obj); |
1288 | 0 | } |
1289 | 0 | } |
1290 | 0 | PyMem_Free(path); |
1291 | 0 | } |
1292 | 0 | } |
1293 | 114M | return 0; |
1294 | 114M | } |
1295 | | |
1296 | | #endif |
1297 | | |
1298 | | /* Do periodic things, like check for signals and async I/0. |
1299 | | * We need to do reasonably frequently, but not too frequently. |
1300 | | * All loops should include a check of the eval breaker. |
1301 | | * We also check on return from any builtin function. |
1302 | | * |
1303 | | * ## More Details ### |
1304 | | * |
1305 | | * The eval loop (this function) normally executes the instructions |
1306 | | * of a code object sequentially. However, the runtime supports a |
1307 | | * number of out-of-band execution scenarios that may pause that |
1308 | | * sequential execution long enough to do that out-of-band work |
1309 | | * in the current thread using the current PyThreadState. |
1310 | | * |
1311 | | * The scenarios include: |
1312 | | * |
1313 | | * - cyclic garbage collection |
1314 | | * - GIL drop requests |
1315 | | * - "async" exceptions |
1316 | | * - "pending calls" (some only in the main thread) |
1317 | | * - signal handling (only in the main thread) |
1318 | | * |
1319 | | * When the need for one of the above is detected, the eval loop |
1320 | | * pauses long enough to handle the detected case. Then, if doing |
1321 | | * so didn't trigger an exception, the eval loop resumes executing |
1322 | | * the sequential instructions. |
1323 | | * |
1324 | | * To make this work, the eval loop periodically checks if any |
1325 | | * of the above needs to happen. The individual checks can be |
1326 | | * expensive if computed each time, so a while back we switched |
1327 | | * to using pre-computed, per-interpreter variables for the checks, |
1328 | | * and later consolidated that to a single "eval breaker" variable |
1329 | | * (now a PyInterpreterState field). |
1330 | | * |
1331 | | * For the longest time, the eval breaker check would happen |
1332 | | * frequently, every 5 or so times through the loop, regardless |
1333 | | * of what instruction ran last or what would run next. Then, in |
1334 | | * early 2021 (gh-18334, commit 4958f5d), we switched to checking |
1335 | | * the eval breaker less frequently, by hard-coding the check to |
1336 | | * specific places in the eval loop (e.g. certain instructions). |
1337 | | * The intent then was to check after returning from calls |
1338 | | * and on the back edges of loops. |
1339 | | * |
1340 | | * In addition to being more efficient, that approach keeps |
1341 | | * the eval loop from running arbitrary code between instructions |
1342 | | * that don't handle that well. (See gh-74174.) |
1343 | | * |
1344 | | * Currently, the eval breaker check happens on back edges in |
1345 | | * the control flow graph, which pretty much applies to all loops, |
1346 | | * and most calls. |
1347 | | * (See bytecodes.c for exact information.) |
1348 | | * |
1349 | | * One consequence of this approach is that it might not be obvious |
1350 | | * how to force any specific thread to pick up the eval breaker, |
1351 | | * or for any specific thread to not pick it up. Mostly this |
1352 | | * involves judicious uses of locks and careful ordering of code, |
1353 | | * while avoiding code that might trigger the eval breaker |
1354 | | * until so desired. |
1355 | | */ |
1356 | | int |
1357 | | _Py_HandlePending(PyThreadState *tstate) |
1358 | 64.1k | { |
1359 | 64.1k | uintptr_t breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); |
1360 | | |
1361 | | /* Stop-the-world */ |
1362 | 64.1k | if ((breaker & _PY_EVAL_PLEASE_STOP_BIT) != 0) { |
1363 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_PLEASE_STOP_BIT); |
1364 | 0 | _PyThreadState_Suspend(tstate); |
1365 | | |
1366 | | /* The attach blocks until the stop-the-world event is complete. */ |
1367 | 0 | _PyThreadState_Attach(tstate); |
1368 | 0 | } |
1369 | | |
1370 | | /* Pending signals */ |
1371 | 64.1k | if ((breaker & _PY_SIGNALS_PENDING_BIT) != 0) { |
1372 | 0 | if (handle_signals(tstate) != 0) { |
1373 | 0 | return -1; |
1374 | 0 | } |
1375 | 0 | } |
1376 | | |
1377 | | /* Pending calls */ |
1378 | 64.1k | if ((breaker & _PY_CALLS_TO_DO_BIT) != 0) { |
1379 | 0 | if (make_pending_calls(tstate) != 0) { |
1380 | 0 | return -1; |
1381 | 0 | } |
1382 | 0 | } |
1383 | | |
1384 | | #ifdef Py_GIL_DISABLED |
1385 | | /* Objects with refcounts to merge */ |
1386 | | if ((breaker & _PY_EVAL_EXPLICIT_MERGE_BIT) != 0) { |
1387 | | _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); |
1388 | | _Py_brc_merge_refcounts(tstate); |
1389 | | } |
1390 | | /* Process deferred memory frees held by QSBR */ |
1391 | | if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { |
1392 | | _PyMem_ProcessDelayed(tstate); |
1393 | | } |
1394 | | #endif |
1395 | | |
1396 | | /* GC scheduled to run */ |
1397 | 64.1k | if ((breaker & _PY_GC_SCHEDULED_BIT) != 0) { |
1398 | 64.1k | _Py_unset_eval_breaker_bit(tstate, _PY_GC_SCHEDULED_BIT); |
1399 | 64.1k | _Py_RunGC(tstate); |
1400 | 64.1k | } |
1401 | | |
1402 | 64.1k | if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) { |
1403 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); |
1404 | 0 | _Py_Executors_InvalidateCold(tstate->interp); |
1405 | 0 | tstate->interp->trace_run_counter = JIT_CLEANUP_THRESHOLD; |
1406 | 0 | } |
1407 | | |
1408 | | /* GIL drop request */ |
1409 | 64.1k | if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { |
1410 | | /* Give another thread a chance */ |
1411 | 0 | _PyThreadState_Detach(tstate); |
1412 | | |
1413 | | /* Other threads may run now */ |
1414 | |
|
1415 | 0 | _PyThreadState_Attach(tstate); |
1416 | 0 | } |
1417 | | |
1418 | | /* Check for asynchronous exception. */ |
1419 | 64.1k | if ((breaker & _PY_ASYNC_EXCEPTION_BIT) != 0) { |
1420 | 0 | _Py_unset_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT); |
1421 | 0 | PyObject *exc = _Py_atomic_exchange_ptr(&tstate->async_exc, NULL); |
1422 | 0 | if (exc != NULL) { |
1423 | 0 | _PyErr_SetNone(tstate, exc); |
1424 | 0 | Py_DECREF(exc); |
1425 | 0 | return -1; |
1426 | 0 | } |
1427 | 0 | } |
1428 | | |
1429 | 64.1k | #if defined(Py_REMOTE_DEBUG) && defined(Py_SUPPORTS_REMOTE_DEBUG) |
1430 | 64.1k | _PyRunRemoteDebugger(tstate); |
1431 | 64.1k | #endif |
1432 | | |
1433 | 64.1k | return 0; |
1434 | 64.1k | } |