/src/cpython/Python/lock.c

Source
// Lock implementation

#include "Python.h"

#include "pycore_lock.h"
#include "pycore_parking_lot.h"
#include "pycore_semaphore.h"
#include "pycore_time.h"          // _PyTime_Add()
#include "pycore_stats.h"         // FT_STAT_MUTEX_SLEEP_INC()

#ifdef MS_WINDOWS
#  ifndef WIN32_LEAN_AND_MEAN
#    define WIN32_LEAN_AND_MEAN
#  endif
#  include <windows.h>            // SwitchToThread()
#elif defined(HAVE_SCHED_H)
#  include <sched.h>              // sched_yield()
#endif

// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
// the unlocking thread directly hands off ownership of the lock. This avoids
// starvation.
static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000;

// Spin for a bit before parking the thread. This is only enabled for
// `--disable-gil` builds because it is unlikely to be helpful if the GIL is
// enabled.
#if Py_GIL_DISABLED
static const int MAX_SPIN_COUNT = 40;
static const int RELOAD_SPIN_MASK = 3;
#else
static const int MAX_SPIN_COUNT = 0;
static const int RELOAD_SPIN_MASK = 1;
#endif

struct mutex_entry {
    // The time after which the unlocking thread should hand off lock ownership
    // directly to the waiting thread. Written by the waiting thread.
    PyTime_t time_to_be_fair;

    // Set to 1 if the lock was handed off. Written by the unlocking thread.
    int handed_off;
};

void
_Py_yield(void)
{
#ifdef MS_WINDOWS
    SwitchToThread();
#elif defined(HAVE_SCHED_H)
    sched_yield();
#endif
}

PyLockStatus
_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
{
    uint8_t v = _Py_atomic_load_uint8_relaxed(&m->_bits);
    if ((v & _Py_LOCKED) == 0) {
        if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) {
            return PY_LOCK_ACQUIRED;
        }
    }
    if (timeout == 0) {
        return PY_LOCK_FAILURE;
    }

    FT_STAT_MUTEX_SLEEP_INC();

    PyTime_t now;
    // silently ignore error: cannot report error to the caller
    (void)PyTime_MonotonicRaw(&now);
    PyTime_t endtime = 0;
    if (timeout > 0) {
        endtime = _PyTime_Add(now, timeout);
    }

    struct mutex_entry entry = {
        .time_to_be_fair = now + TIME_TO_BE_FAIR_NS,
        .handed_off = 0,
    };

    Py_ssize_t spin_count = 0;
#ifdef Py_GIL_DISABLED
    // Using thread-id as a way of reducing contention further in the reload below.
    // It adds a pseudo-random starting offset to the recurrence, so that threads
    // are less likely to try and run compare-exchange at the same time.
    // The lower bits of platform thread ids are likely to not be random,
    // hence the right shift.
    const Py_ssize_t tid = (Py_ssize_t)(_Py_ThreadId() >> 12);
#else
    const Py_ssize_t tid = 0;
#endif
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // The lock is unlocked. Try to grab it.
            if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) {
                return PY_LOCK_ACQUIRED;
            }
            continue;
        }

        if (!(v & _Py_HAS_PARKED) && spin_count < MAX_SPIN_COUNT) {
            // Spin for a bit.
            _Py_yield();
            spin_count++;
            if (((spin_count + tid) & RELOAD_SPIN_MASK) == 0) {
                v = _Py_atomic_load_uint8_relaxed(&m->_bits);
            }
            continue;
        }

        if (timeout == 0) {
            return PY_LOCK_FAILURE;
        }
        if ((flags & _PY_LOCK_PYTHONLOCK) && Py_IsFinalizing()) {
            // At this phase of runtime shutdown, only the finalization thread
            // can have attached thread state; others hang if they try
            // attaching. And since operations on this lock requires attached
            // thread state (_PY_LOCK_PYTHONLOCK), the finalization thread is
            // running this code, and no other thread can unlock.
            // Raise rather than hang. (_PY_LOCK_PYTHONLOCK allows raising
            // exceptons.)
            PyErr_SetString(PyExc_PythonFinalizationError,
                            "cannot acquire lock at interpreter finalization");
            return PY_LOCK_FAILURE;
        }

        uint8_t newv = v;
        if (!(v & _Py_HAS_PARKED)) {
            // We are the first waiter. Set the _Py_HAS_PARKED flag.
            newv = v | _Py_HAS_PARKED;
            if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &v, newv)) {
                continue;
            }
        }

        int ret = _PyParkingLot_Park(&m->_bits, &newv, sizeof(newv), timeout,
                                     &entry, (flags & _PY_LOCK_DETACH) != 0);
        if (ret == Py_PARK_OK) {
            if (entry.handed_off) {
                // We own the lock now. thread.Lock allows other threads
                // to concurrently release the lock so we cannot assert that
                // it is locked if _PY_LOCK_PYTHONLOCK is set.
                assert(_Py_atomic_load_uint8_relaxed(&m->_bits) & _Py_LOCKED ||
                       (flags & _PY_LOCK_PYTHONLOCK) != 0);
                return PY_LOCK_ACQUIRED;
            }
        }
        else if (ret == Py_PARK_INTR && (flags & _PY_LOCK_HANDLE_SIGNALS)) {
            if (Py_MakePendingCalls() < 0) {
                return PY_LOCK_INTR;
            }
        }
        else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) {
            return PY_LOCK_INTR;
        }
        else if (ret == Py_PARK_TIMEOUT) {
            assert(timeout >= 0);
            return PY_LOCK_FAILURE;
        }

        if (timeout > 0) {
            timeout = _PyDeadline_Get(endtime);
            if (timeout <= 0) {
                // Avoid negative values because those mean block forever.
                timeout = 0;
            }
        }

        v = _Py_atomic_load_uint8_relaxed(&m->_bits);
    }
}

static void
mutex_unpark(void *arg, void *park_arg, int has_more_waiters)
{
    PyMutex *m = (PyMutex*)arg;
    struct mutex_entry *entry = (struct mutex_entry*)park_arg;
    uint8_t v = 0;
    if (entry) {
        PyTime_t now;
        // silently ignore error: cannot report error to the caller
        (void)PyTime_MonotonicRaw(&now);
        int should_be_fair = now > entry->time_to_be_fair;

        entry->handed_off = should_be_fair;
        if (should_be_fair) {
            v |= _Py_LOCKED;
        }
        if (has_more_waiters) {
            v |= _Py_HAS_PARKED;
        }
    }
    _Py_atomic_store_uint8(&m->_bits, v);
}

int
_PyMutex_TryUnlock(PyMutex *m)
{
    uint8_t v = _Py_atomic_load_uint8(&m->_bits);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // error: the mutex is not locked
            return -1;
        }
        else if ((v & _Py_HAS_PARKED)) {
            // wake up a single thread
            _PyParkingLot_Unpark(&m->_bits, mutex_unpark, m);
            return 0;
        }
        else if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, _Py_UNLOCKED)) {
            // fast-path: no waiters
            return 0;
        }
    }
}

// _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each
// thread waiting on the mutex, directly in the mutex itself.
struct raw_mutex_entry {
    struct raw_mutex_entry *next;
    _PySemaphore sema;
};

void
_PyRawMutex_LockSlow(_PyRawMutex *m)
{
    struct raw_mutex_entry waiter;
    _PySemaphore_Init(&waiter.sema);

    uintptr_t v = _Py_atomic_load_uintptr(&m->v);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // Unlocked: try to grab it (even if it has a waiter).
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, v|_Py_LOCKED)) {
                break;
            }
            continue;
        }

        // Locked: try to add ourselves as a waiter.
        waiter.next = (struct raw_mutex_entry *)(v & ~1);
        uintptr_t desired = ((uintptr_t)&waiter)|_Py_LOCKED;
        if (!_Py_atomic_compare_exchange_uintptr(&m->v, &v, desired)) {
            continue;
        }

        // Wait for us to be woken up. Note that we still have to lock the
        // mutex ourselves: it is NOT handed off to us.
        _PySemaphore_Wait(&waiter.sema, -1);
    }

    _PySemaphore_Destroy(&waiter.sema);
}

void
_PyRawMutex_UnlockSlow(_PyRawMutex *m)
{
    uintptr_t v = _Py_atomic_load_uintptr(&m->v);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            Py_FatalError("unlocking mutex that is not locked");
        }

        struct raw_mutex_entry *waiter = (struct raw_mutex_entry *)(v & ~1);
        if (waiter) {
            uintptr_t next_waiter = (uintptr_t)waiter->next;
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, next_waiter)) {
                _PySemaphore_Wakeup(&waiter->sema);
                return;
            }
        }
        else {
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, _Py_UNLOCKED)) {
                return;
            }
        }
    }
}

int
_PyEvent_IsSet(PyEvent *evt)
{
    uint8_t v = _Py_atomic_load_uint8(&evt->v);
    return v == _Py_LOCKED;
}

void
_PyEvent_Notify(PyEvent *evt)
{
    uintptr_t v = _Py_atomic_exchange_uint8(&evt->v, _Py_LOCKED);
    if (v == _Py_UNLOCKED) {
        // no waiters
        return;
    }
    else if (v == _Py_LOCKED) {
        // event already set
        return;
    }
    else {
        assert(v == _Py_HAS_PARKED);
        _PyParkingLot_UnparkAll(&evt->v);
    }
}

void
PyEvent_Wait(PyEvent *evt)
{
    while (!PyEvent_WaitTimed(evt, -1, /*detach=*/1))
        ;
}

int
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
{
    for (;;) {
        uint8_t v = _Py_atomic_load_uint8(&evt->v);
        if (v == _Py_LOCKED) {
            // event already set
            return 1;
        }
        if (v == _Py_UNLOCKED) {
            if (!_Py_atomic_compare_exchange_uint8(&evt->v, &v, _Py_HAS_PARKED)) {
                continue;
            }
        }

        uint8_t expected = _Py_HAS_PARKED;
        (void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
                                  timeout_ns, NULL, detach);

        return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
    }
}

static int
unlock_once(_PyOnceFlag *o, int res)
{
    // On success (res=0), we set the state to _Py_ONCE_INITIALIZED.
    // On failure (res=-1), we reset the state to _Py_UNLOCKED.
    uint8_t new_value;
    switch (res) {
        case -1: new_value = _Py_UNLOCKED; break;
        case  0: new_value = _Py_ONCE_INITIALIZED; break;
        default: {
            Py_FatalError("invalid result from _PyOnceFlag_CallOnce");
            Py_UNREACHABLE();
            break;
        }
    }

    uint8_t old_value = _Py_atomic_exchange_uint8(&o->v, new_value);
    if ((old_value & _Py_HAS_PARKED) != 0) {
        // wake up anyone waiting on the once flag
        _PyParkingLot_UnparkAll(&o->v);
    }
    return res;
}

int
_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg)
{
    uint8_t v = _Py_atomic_load_uint8(&flag->v);
    for (;;) {
        if (v == _Py_UNLOCKED) {
            if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, _Py_LOCKED)) {
                continue;
            }
            int res = fn(arg);
            return unlock_once(flag, res);
        }

        if (v == _Py_ONCE_INITIALIZED) {
            return 0;
        }

        // The once flag is initializing (locked).
        assert((v & _Py_LOCKED));
        if (!(v & _Py_HAS_PARKED)) {
            // We are the first waiter. Set the _Py_HAS_PARKED flag.
            uint8_t new_value = v | _Py_HAS_PARKED;
            if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, new_value)) {
                continue;
            }
            v = new_value;
        }

        // Wait for initialization to finish.
        _PyParkingLot_Park(&flag->v, &v, sizeof(v), -1, NULL, 1);
        v = _Py_atomic_load_uint8(&flag->v);
    }
}

static int
recursive_mutex_is_owned_by(_PyRecursiveMutex *m, PyThread_ident_t tid)
{
    return _Py_atomic_load_ullong_relaxed(&m->thread) == tid;
}

int
_PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m)
{
    return recursive_mutex_is_owned_by(m, PyThread_get_thread_ident_ex());
}

void
_PyRecursiveMutex_Lock(_PyRecursiveMutex *m)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (recursive_mutex_is_owned_by(m, thread)) {
        m->level++;
        return;
    }
    PyMutex_Lock(&m->mutex);
    _Py_atomic_store_ullong_relaxed(&m->thread, thread);
    assert(m->level == 0);
}

PyLockStatus
_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (recursive_mutex_is_owned_by(m, thread)) {
        m->level++;
        return PY_LOCK_ACQUIRED;
    }
    PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags);
    if (s == PY_LOCK_ACQUIRED) {
        _Py_atomic_store_ullong_relaxed(&m->thread, thread);
        assert(m->level == 0);
    }
    return s;
}

void
_PyRecursiveMutex_Unlock(_PyRecursiveMutex *m)
{
    if (_PyRecursiveMutex_TryUnlock(m) < 0) {
        Py_FatalError("unlocking a recursive mutex that is not "
                      "owned by the current thread");
    }
}

int
_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (!recursive_mutex_is_owned_by(m, thread)) {
        return -1;
    }
    if (m->level > 0) {
        m->level--;
        return 0;
    }
    assert(m->level == 0);
    _Py_atomic_store_ullong_relaxed(&m->thread, 0);
    PyMutex_Unlock(&m->mutex);
    return 0;
}

#define _Py_WRITE_LOCKED 1
#define _PyRWMutex_READER_SHIFT 2
#define _Py_RWMUTEX_MAX_READERS (UINTPTR_MAX >> _PyRWMutex_READER_SHIFT)

static uintptr_t
rwmutex_set_parked_and_wait(_PyRWMutex *rwmutex, uintptr_t bits)
{
    // Set _Py_HAS_PARKED and wait until we are woken up.
    if ((bits & _Py_HAS_PARKED) == 0) {
        uintptr_t newval = bits | _Py_HAS_PARKED;
        if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                 &bits, newval)) {
            return bits;
        }
        bits = newval;
    }

    _PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1);
    return _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
}

// The number of readers holding the lock
static uintptr_t
rwmutex_reader_count(uintptr_t bits)
{
    return bits >> _PyRWMutex_READER_SHIFT;
}

void
_PyRWMutex_RLock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
    for (;;) {
        if ((bits & _Py_WRITE_LOCKED)) {
            // A writer already holds the lock.
            bits = rwmutex_set_parked_and_wait(rwmutex, bits);
            continue;
        }
        else if ((bits & _Py_HAS_PARKED)) {
            // Reader(s) hold the lock (or just gave up the lock), but there is
            // at least one waiting writer. We can't grab the lock because we
            // don't want to starve the writer. Instead, we park ourselves and
            // wait for the writer to eventually wake us up.
            bits = rwmutex_set_parked_and_wait(rwmutex, bits);
            continue;
        }
        else {
            // The lock is unlocked or read-locked. Try to grab it.
            assert(rwmutex_reader_count(bits) < _Py_RWMUTEX_MAX_READERS);
            uintptr_t newval = bits + (1 << _PyRWMutex_READER_SHIFT);
            if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                     &bits, newval)) {
                continue;
            }
            return;
        }
    }
}

void
_PyRWMutex_RUnlock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_add_uintptr(&rwmutex->bits, -(1 << _PyRWMutex_READER_SHIFT));
    assert(rwmutex_reader_count(bits) > 0 && "lock was not read-locked");
    bits -= (1 << _PyRWMutex_READER_SHIFT);

    if (rwmutex_reader_count(bits) == 0 && (bits & _Py_HAS_PARKED)) {
        _PyParkingLot_UnparkAll(&rwmutex->bits);
        return;
    }
}

void
_PyRWMutex_Lock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
    for (;;) {
        // If there are no active readers and it's not already write-locked,
        // then we can grab the lock.
        if ((bits & ~_Py_HAS_PARKED) == 0) {
            if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                     &bits,
                                                     bits | _Py_WRITE_LOCKED)) {
                continue;
            }
            return;
        }

        // Otherwise, we have to wait.
        bits = rwmutex_set_parked_and_wait(rwmutex, bits);
    }
}

void
_PyRWMutex_Unlock(_PyRWMutex *rwmutex)
{
    uintptr_t old_bits = _Py_atomic_exchange_uintptr(&rwmutex->bits, 0);

    assert((old_bits & _Py_WRITE_LOCKED) && "lock was not write-locked");
    assert(rwmutex_reader_count(old_bits) == 0 && "lock was read-locked");

    if ((old_bits & _Py_HAS_PARKED) != 0) {
        _PyParkingLot_UnparkAll(&rwmutex->bits);
    }
}

#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01)

void _PySeqLock_LockWrite(_PySeqLock *seqlock)
{
    // lock by moving to an odd sequence number
    uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
    while (1) {
        if (SEQLOCK_IS_UPDATING(prev)) {
            // Someone else is currently updating the cache
            _Py_yield();
            prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
        }
        else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
            // We've locked the cache
            _Py_atomic_fence_release();
            break;
        }
        else {
            _Py_yield();
        }
    }
}

void _PySeqLock_AbandonWrite(_PySeqLock *seqlock)
{
    uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) - 1;
    assert(!SEQLOCK_IS_UPDATING(new_seq));
    _Py_atomic_store_uint32(&seqlock->sequence, new_seq);
}

void _PySeqLock_UnlockWrite(_PySeqLock *seqlock)
{
    uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) + 1;
    assert(!SEQLOCK_IS_UPDATING(new_seq));
    _Py_atomic_store_uint32(&seqlock->sequence, new_seq);
}

uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
{
    uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
    while (SEQLOCK_IS_UPDATING(sequence)) {
        _Py_yield();
        sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
    }

    return sequence;
}

int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
{
    // gh-121368: We need an explicit acquire fence here to ensure that
    // this load of the sequence number is not reordered before any loads
    // within the read lock.
    _Py_atomic_fence_acquire();

    if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
        return 1;
    }

    _Py_yield();
    return 0;
}

int _PySeqLock_AfterFork(_PySeqLock *seqlock)
{
    // Synchronize again and validate that the entry hasn't been updated
    // while we were readying the values.
    if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
        seqlock->sequence = 0;
        return 1;
    }

    return 0;
}

#undef PyMutex_Lock
void
PyMutex_Lock(PyMutex *m)
{
    _PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH);
}

#undef PyMutex_Unlock
void
PyMutex_Unlock(PyMutex *m)
{
    if (_PyMutex_TryUnlock(m) < 0) {
        Py_FatalError("unlocking mutex that is not locked");
    }
}


#undef PyMutex_IsLocked
int
PyMutex_IsLocked(PyMutex *m)
{
    return _PyMutex_IsLocked(m);
}

Coverage Report

Created: 2026-04-12 06:54

Line	Count	Source
1		// Lock implementation
2
3		#include "Python.h"
4
5		#include "pycore_lock.h"
6		#include "pycore_parking_lot.h"
7		#include "pycore_semaphore.h"
8		#include "pycore_time.h" // _PyTime_Add()
9		#include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC()
10
11		#ifdef MS_WINDOWS
12		# ifndef WIN32_LEAN_AND_MEAN
13		# define WIN32_LEAN_AND_MEAN
14		# endif
15		# include <windows.h> // SwitchToThread()
16		#elif defined(HAVE_SCHED_H)
17		# include <sched.h> // sched_yield()
18		#endif
19
20		// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
21		// the unlocking thread directly hands off ownership of the lock. This avoids
22		// starvation.
23		static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000;
24
25		// Spin for a bit before parking the thread. This is only enabled for
26		// `--disable-gil` builds because it is unlikely to be helpful if the GIL is
27		// enabled.
28		#if Py_GIL_DISABLED
29		static const int MAX_SPIN_COUNT = 40;
30		static const int RELOAD_SPIN_MASK = 3;
31		#else
32		static const int MAX_SPIN_COUNT = 0;
33		static const int RELOAD_SPIN_MASK = 1;
34		#endif
35
36		struct mutex_entry {
37		// The time after which the unlocking thread should hand off lock ownership
38		// directly to the waiting thread. Written by the waiting thread.
39		PyTime_t time_to_be_fair;
40
41		// Set to 1 if the lock was handed off. Written by the unlocking thread.
42		int handed_off;
43		};
44
45		void
46		_Py_yield(void)
47	0	{
48		#ifdef MS_WINDOWS
49		SwitchToThread();
50		#elif defined(HAVE_SCHED_H)
51		sched_yield();
52	0	#endif
53	0	}
54
55		PyLockStatus
56		_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
57	932k	{
58	932k	uint8_t v = _Py_atomic_load_uint8_relaxed(&m->_bits);
59	932k	if ((v & _Py_LOCKED) == 0) {
60	932k	if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v\|_Py_LOCKED)) {
61	932k	return PY_LOCK_ACQUIRED;
62	932k	}
63	932k	}
64	8	if (timeout == 0) {
65	8	return PY_LOCK_FAILURE;
66	8	}
67
68	0	FT_STAT_MUTEX_SLEEP_INC();
69
70	0	PyTime_t now;
71		// silently ignore error: cannot report error to the caller
72	0	(void)PyTime_MonotonicRaw(&now);
73	0	PyTime_t endtime = 0;
74	0	if (timeout > 0) {
75	0	endtime = _PyTime_Add(now, timeout);
76	0	}
77
78	0	struct mutex_entry entry = {
79	0	.time_to_be_fair = now + TIME_TO_BE_FAIR_NS,
80	0	.handed_off = 0,
81	0	};
82
83	0	Py_ssize_t spin_count = 0;
84		#ifdef Py_GIL_DISABLED
85		// Using thread-id as a way of reducing contention further in the reload below.
86		// It adds a pseudo-random starting offset to the recurrence, so that threads
87		// are less likely to try and run compare-exchange at the same time.
88		// The lower bits of platform thread ids are likely to not be random,
89		// hence the right shift.
90		const Py_ssize_t tid = (Py_ssize_t)(_Py_ThreadId() >> 12);
91		#else
92	0	const Py_ssize_t tid = 0;
93	0	#endif
94	0	for (;;) {
95	0	if ((v & _Py_LOCKED) == 0) {
96		// The lock is unlocked. Try to grab it.
97	0	if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v\|_Py_LOCKED)) {
98	0	return PY_LOCK_ACQUIRED;
99	0	}
100	0	continue;
101	0	}
102
103	0	if (!(v & _Py_HAS_PARKED) && spin_count < MAX_SPIN_COUNT) {
104		// Spin for a bit.
105	0	_Py_yield();
106	0	spin_count++;
107	0	if (((spin_count + tid) & RELOAD_SPIN_MASK) == 0) {
108	0	v = _Py_atomic_load_uint8_relaxed(&m->_bits);
109	0	}
110	0	continue;
111	0	}
112
113	0	if (timeout == 0) {
114	0	return PY_LOCK_FAILURE;
115	0	}
116	0	if ((flags & _PY_LOCK_PYTHONLOCK) && Py_IsFinalizing()) {
117		// At this phase of runtime shutdown, only the finalization thread
118		// can have attached thread state; others hang if they try
119		// attaching. And since operations on this lock requires attached
120		// thread state (_PY_LOCK_PYTHONLOCK), the finalization thread is
121		// running this code, and no other thread can unlock.
122		// Raise rather than hang. (_PY_LOCK_PYTHONLOCK allows raising
123		// exceptons.)
124	0	PyErr_SetString(PyExc_PythonFinalizationError,
125	0	"cannot acquire lock at interpreter finalization");
126	0	return PY_LOCK_FAILURE;
127	0	}
128
129	0	uint8_t newv = v;
130	0	if (!(v & _Py_HAS_PARKED)) {
131		// We are the first waiter. Set the _Py_HAS_PARKED flag.
132	0	newv = v \| _Py_HAS_PARKED;
133	0	if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &v, newv)) {
134	0	continue;
135	0	}
136	0	}
137
138	0	int ret = _PyParkingLot_Park(&m->_bits, &newv, sizeof(newv), timeout,
139	0	&entry, (flags & _PY_LOCK_DETACH) != 0);
140	0	if (ret == Py_PARK_OK) {
141	0	if (entry.handed_off) {
142		// We own the lock now. thread.Lock allows other threads
143		// to concurrently release the lock so we cannot assert that
144		// it is locked if _PY_LOCK_PYTHONLOCK is set.
145	0	assert(_Py_atomic_load_uint8_relaxed(&m->_bits) & _Py_LOCKED \|\|
146	0	(flags & _PY_LOCK_PYTHONLOCK) != 0);
147	0	return PY_LOCK_ACQUIRED;
148	0	}
149	0	}
150	0	else if (ret == Py_PARK_INTR && (flags & _PY_LOCK_HANDLE_SIGNALS)) {
151	0	if (Py_MakePendingCalls() < 0) {
152	0	return PY_LOCK_INTR;
153	0	}
154	0	}
155	0	else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) {
156	0	return PY_LOCK_INTR;
157	0	}
158	0	else if (ret == Py_PARK_TIMEOUT) {
159	0	assert(timeout >= 0);
160	0	return PY_LOCK_FAILURE;
161	0	}
162
163	0	if (timeout > 0) {
164	0	timeout = _PyDeadline_Get(endtime);
165	0	if (timeout <= 0) {
166		// Avoid negative values because those mean block forever.
167	0	timeout = 0;
168	0	}
169	0	}
170
171	0	v = _Py_atomic_load_uint8_relaxed(&m->_bits);
172	0	}
173	0	}
174
175		static void
176		mutex_unpark(void arg, void park_arg, int has_more_waiters)
177	0	{
178	0	PyMutex m = (PyMutex)arg;
179	0	struct mutex_entry entry = (struct mutex_entry)park_arg;
180	0	uint8_t v = 0;
181	0	if (entry) {
182	0	PyTime_t now;
183		// silently ignore error: cannot report error to the caller
184	0	(void)PyTime_MonotonicRaw(&now);
185	0	int should_be_fair = now > entry->time_to_be_fair;
186
187	0	entry->handed_off = should_be_fair;
188	0	if (should_be_fair) {
189	0	v \|= _Py_LOCKED;
190	0	}
191	0	if (has_more_waiters) {
192	0	v \|= _Py_HAS_PARKED;
193	0	}
194	0	}
195	0	_Py_atomic_store_uint8(&m->_bits, v);
196	0	}
197
198		int
199		_PyMutex_TryUnlock(PyMutex *m)
200	372	{
201	372	uint8_t v = _Py_atomic_load_uint8(&m->_bits);
202	372	for (;;) {
203	372	if ((v & _Py_LOCKED) == 0) {
204		// error: the mutex is not locked
205	0	return -1;
206	0	}
207	372	else if ((v & _Py_HAS_PARKED)) {
208		// wake up a single thread
209	0	_PyParkingLot_Unpark(&m->_bits, mutex_unpark, m);
210	0	return 0;
211	0	}
212	372	else if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, _Py_UNLOCKED)) {
213		// fast-path: no waiters
214	372	return 0;
215	372	}
216	372	}
217	372	}
218
219		// _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each
220		// thread waiting on the mutex, directly in the mutex itself.
221		struct raw_mutex_entry {
222		struct raw_mutex_entry *next;
223		_PySemaphore sema;
224		};
225
226		void
227		_PyRawMutex_LockSlow(_PyRawMutex *m)
228	0	{
229	0	struct raw_mutex_entry waiter;
230	0	_PySemaphore_Init(&waiter.sema);
231
232	0	uintptr_t v = _Py_atomic_load_uintptr(&m->v);
233	0	for (;;) {
234	0	if ((v & _Py_LOCKED) == 0) {
235		// Unlocked: try to grab it (even if it has a waiter).
236	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, v\|_Py_LOCKED)) {
237	0	break;
238	0	}
239	0	continue;
240	0	}
241
242		// Locked: try to add ourselves as a waiter.
243	0	waiter.next = (struct raw_mutex_entry *)(v & ~1);
244	0	uintptr_t desired = ((uintptr_t)&waiter)\|_Py_LOCKED;
245	0	if (!_Py_atomic_compare_exchange_uintptr(&m->v, &v, desired)) {
246	0	continue;
247	0	}
248
249		// Wait for us to be woken up. Note that we still have to lock the
250		// mutex ourselves: it is NOT handed off to us.
251	0	_PySemaphore_Wait(&waiter.sema, -1);
252	0	}
253
254	0	_PySemaphore_Destroy(&waiter.sema);
255	0	}
256
257		void
258		_PyRawMutex_UnlockSlow(_PyRawMutex *m)
259	0	{
260	0	uintptr_t v = _Py_atomic_load_uintptr(&m->v);
261	0	for (;;) {
262	0	if ((v & _Py_LOCKED) == 0) {
263	0	Py_FatalError("unlocking mutex that is not locked");
264	0	}
265
266	0	struct raw_mutex_entry waiter = (struct raw_mutex_entry )(v & ~1);
267	0	if (waiter) {
268	0	uintptr_t next_waiter = (uintptr_t)waiter->next;
269	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, next_waiter)) {
270	0	_PySemaphore_Wakeup(&waiter->sema);
271	0	return;
272	0	}
273	0	}
274	0	else {
275	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, _Py_UNLOCKED)) {
276	0	return;
277	0	}
278	0	}
279	0	}
280	0	}
281
282		int
283		_PyEvent_IsSet(PyEvent *evt)
284	0	{
285	0	uint8_t v = _Py_atomic_load_uint8(&evt->v);
286	0	return v == _Py_LOCKED;
287	0	}
288
289		void
290		_PyEvent_Notify(PyEvent *evt)
291	0	{
292	0	uintptr_t v = _Py_atomic_exchange_uint8(&evt->v, _Py_LOCKED);
293	0	if (v == _Py_UNLOCKED) {
294		// no waiters
295	0	return;
296	0	}
297	0	else if (v == _Py_LOCKED) {
298		// event already set
299	0	return;
300	0	}
301	0	else {
302	0	assert(v == _Py_HAS_PARKED);
303	0	_PyParkingLot_UnparkAll(&evt->v);
304	0	}
305	0	}
306
307		void
308		PyEvent_Wait(PyEvent *evt)
309	0	{
310	0	while (!PyEvent_WaitTimed(evt, -1, /detach=/1))
311	0	;
312	0	}
313
314		int
315		PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
316	0	{
317	0	for (;;) {
318	0	uint8_t v = _Py_atomic_load_uint8(&evt->v);
319	0	if (v == _Py_LOCKED) {
320		// event already set
321	0	return 1;
322	0	}
323	0	if (v == _Py_UNLOCKED) {
324	0	if (!_Py_atomic_compare_exchange_uint8(&evt->v, &v, _Py_HAS_PARKED)) {
325	0	continue;
326	0	}
327	0	}
328
329	0	uint8_t expected = _Py_HAS_PARKED;
330	0	(void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
331	0	timeout_ns, NULL, detach);
332
333	0	return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
334	0	}
335	0	}
336
337		static int
338		unlock_once(_PyOnceFlag *o, int res)
339	261	{
340		// On success (res=0), we set the state to _Py_ONCE_INITIALIZED.
341		// On failure (res=-1), we reset the state to _Py_UNLOCKED.
342	261	uint8_t new_value;
343	261	switch (res) {
344	0	case -1: new_value = _Py_UNLOCKED; break;
345	261	case 0: new_value = _Py_ONCE_INITIALIZED; break;
346	0	default: {
347	0	Py_FatalError("invalid result from _PyOnceFlag_CallOnce");
348	0	Py_UNREACHABLE();
349	0	break;
350	0	}
351	261	}
352
353	261	uint8_t old_value = _Py_atomic_exchange_uint8(&o->v, new_value);
354	261	if ((old_value & _Py_HAS_PARKED) != 0) {
355		// wake up anyone waiting on the once flag
356	0	_PyParkingLot_UnparkAll(&o->v);
357	0	}
358	261	return res;
359	261	}
360
361		int
362		_PyOnceFlag_CallOnceSlow(_PyOnceFlag flag, _Py_once_fn_t fn, void *arg)
363	261	{
364	261	uint8_t v = _Py_atomic_load_uint8(&flag->v);
365	261	for (;;) {
366	261	if (v == _Py_UNLOCKED) {
367	261	if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, _Py_LOCKED)) {
368	0	continue;
369	0	}
370	261	int res = fn(arg);
371	261	return unlock_once(flag, res);
372	261	}
373
374	0	if (v == _Py_ONCE_INITIALIZED) {
375	0	return 0;
376	0	}
377
378		// The once flag is initializing (locked).
379	0	assert((v & _Py_LOCKED));
380	0	if (!(v & _Py_HAS_PARKED)) {
381		// We are the first waiter. Set the _Py_HAS_PARKED flag.
382	0	uint8_t new_value = v \| _Py_HAS_PARKED;
383	0	if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, new_value)) {
384	0	continue;
385	0	}
386	0	v = new_value;
387	0	}
388
389		// Wait for initialization to finish.
390	0	_PyParkingLot_Park(&flag->v, &v, sizeof(v), -1, NULL, 1);
391	0	v = _Py_atomic_load_uint8(&flag->v);
392	0	}
393	261	}
394
395		static int
396		recursive_mutex_is_owned_by(_PyRecursiveMutex *m, PyThread_ident_t tid)
397	2.99M	{
398	2.99M	return _Py_atomic_load_ullong_relaxed(&m->thread) == tid;
399	2.99M	}
400
401		int
402		_PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m)
403	64.3k	{
404	64.3k	return recursive_mutex_is_owned_by(m, PyThread_get_thread_ident_ex());
405	64.3k	}
406
407		void
408		_PyRecursiveMutex_Lock(_PyRecursiveMutex *m)
409	1.20M	{
410	1.20M	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
411	1.20M	if (recursive_mutex_is_owned_by(m, thread)) {
412	82	m->level++;
413	82	return;
414	82	}
415	1.20M	PyMutex_Lock(&m->mutex);
416	1.20M	_Py_atomic_store_ullong_relaxed(&m->thread, thread);
417	1.20M	assert(m->level == 0);
418	1.20M	}
419
420		PyLockStatus
421		_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags)
422	265k	{
423	265k	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
424	265k	if (recursive_mutex_is_owned_by(m, thread)) {
425	10	m->level++;
426	10	return PY_LOCK_ACQUIRED;
427	10	}
428	265k	PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags);
429	265k	if (s == PY_LOCK_ACQUIRED) {
430	265k	_Py_atomic_store_ullong_relaxed(&m->thread, thread);
431	265k	assert(m->level == 0);
432	265k	}
433	265k	return s;
434	265k	}
435
436		void
437		_PyRecursiveMutex_Unlock(_PyRecursiveMutex *m)
438	64.3k	{
439	64.3k	if (_PyRecursiveMutex_TryUnlock(m) < 0) {
440	0	Py_FatalError("unlocking a recursive mutex that is not "
441	0	"owned by the current thread");
442	0	}
443	64.3k	}
444
445		int
446		_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m)
447	1.46M	{
448	1.46M	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
449	1.46M	if (!recursive_mutex_is_owned_by(m, thread)) {
450	0	return -1;
451	0	}
452	1.46M	if (m->level > 0) {
453	92	m->level--;
454	92	return 0;
455	92	}
456	1.46M	assert(m->level == 0);
457	1.46M	_Py_atomic_store_ullong_relaxed(&m->thread, 0);
458	1.46M	PyMutex_Unlock(&m->mutex);
459	1.46M	return 0;
460	1.46M	}
461
462	0	#define _Py_WRITE_LOCKED 1
463	0	#define _PyRWMutex_READER_SHIFT 2
464		#define _Py_RWMUTEX_MAX_READERS (UINTPTR_MAX >> _PyRWMutex_READER_SHIFT)
465
466		static uintptr_t
467		rwmutex_set_parked_and_wait(_PyRWMutex *rwmutex, uintptr_t bits)
468	0	{
469		// Set _Py_HAS_PARKED and wait until we are woken up.
470	0	if ((bits & _Py_HAS_PARKED) == 0) {
471	0	uintptr_t newval = bits \| _Py_HAS_PARKED;
472	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
473	0	&bits, newval)) {
474	0	return bits;
475	0	}
476	0	bits = newval;
477	0	}
478
479	0	_PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1);
480	0	return _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
481	0	}
482
483		// The number of readers holding the lock
484		static uintptr_t
485		rwmutex_reader_count(uintptr_t bits)
486	0	{
487	0	return bits >> _PyRWMutex_READER_SHIFT;
488	0	}
489
490		void
491		_PyRWMutex_RLock(_PyRWMutex *rwmutex)
492	0	{
493	0	uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
494	0	for (;;) {
495	0	if ((bits & _Py_WRITE_LOCKED)) {
496		// A writer already holds the lock.
497	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
498	0	continue;
499	0	}
500	0	else if ((bits & _Py_HAS_PARKED)) {
501		// Reader(s) hold the lock (or just gave up the lock), but there is
502		// at least one waiting writer. We can't grab the lock because we
503		// don't want to starve the writer. Instead, we park ourselves and
504		// wait for the writer to eventually wake us up.
505	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
506	0	continue;
507	0	}
508	0	else {
509		// The lock is unlocked or read-locked. Try to grab it.
510	0	assert(rwmutex_reader_count(bits) < _Py_RWMUTEX_MAX_READERS);
511	0	uintptr_t newval = bits + (1 << _PyRWMutex_READER_SHIFT);
512	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
513	0	&bits, newval)) {
514	0	continue;
515	0	}
516	0	return;
517	0	}
518	0	}
519	0	}
520
521		void
522		_PyRWMutex_RUnlock(_PyRWMutex *rwmutex)
523	0	{
524	0	uintptr_t bits = _Py_atomic_add_uintptr(&rwmutex->bits, -(1 << _PyRWMutex_READER_SHIFT));
525	0	assert(rwmutex_reader_count(bits) > 0 && "lock was not read-locked");
526	0	bits -= (1 << _PyRWMutex_READER_SHIFT);
527
528	0	if (rwmutex_reader_count(bits) == 0 && (bits & _Py_HAS_PARKED)) {
529	0	_PyParkingLot_UnparkAll(&rwmutex->bits);
530	0	return;
531	0	}
532	0	}
533
534		void
535		_PyRWMutex_Lock(_PyRWMutex *rwmutex)
536	0	{
537	0	uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
538	0	for (;;) {
539		// If there are no active readers and it's not already write-locked,
540		// then we can grab the lock.
541	0	if ((bits & ~_Py_HAS_PARKED) == 0) {
542	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
543	0	&bits,
544	0	bits \| _Py_WRITE_LOCKED)) {
545	0	continue;
546	0	}
547	0	return;
548	0	}
549
550		// Otherwise, we have to wait.
551	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
552	0	}
553	0	}
554
555		void
556		_PyRWMutex_Unlock(_PyRWMutex *rwmutex)
557	0	{
558	0	uintptr_t old_bits = _Py_atomic_exchange_uintptr(&rwmutex->bits, 0);
559
560	0	assert((old_bits & _Py_WRITE_LOCKED) && "lock was not write-locked");
561	0	assert(rwmutex_reader_count(old_bits) == 0 && "lock was read-locked");
562
563	0	if ((old_bits & _Py_HAS_PARKED) != 0) {
564	0	_PyParkingLot_UnparkAll(&rwmutex->bits);
565	0	}
566	0	}
567
568	0	#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01)
569
570		void _PySeqLock_LockWrite(_PySeqLock *seqlock)
571	0	{
572		// lock by moving to an odd sequence number
573	0	uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
574	0	while (1) {
575	0	if (SEQLOCK_IS_UPDATING(prev)) {
576		// Someone else is currently updating the cache
577	0	_Py_yield();
578	0	prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
579	0	}
580	0	else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
581		// We've locked the cache
582	0	_Py_atomic_fence_release();
583	0	break;
584	0	}
585	0	else {
586	0	_Py_yield();
587	0	}
588	0	}
589	0	}
590
591		void _PySeqLock_AbandonWrite(_PySeqLock *seqlock)
592	0	{
593	0	uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) - 1;
594	0	assert(!SEQLOCK_IS_UPDATING(new_seq));
595	0	_Py_atomic_store_uint32(&seqlock->sequence, new_seq);
596	0	}
597
598		void _PySeqLock_UnlockWrite(_PySeqLock *seqlock)
599	0	{
600	0	uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) + 1;
601	0	assert(!SEQLOCK_IS_UPDATING(new_seq));
602	0	_Py_atomic_store_uint32(&seqlock->sequence, new_seq);
603	0	}
604
605		uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
606	0	{
607	0	uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
608	0	while (SEQLOCK_IS_UPDATING(sequence)) {
609	0	_Py_yield();
610	0	sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
611	0	}
612
613	0	return sequence;
614	0	}
615
616		int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
617	0	{
618		// gh-121368: We need an explicit acquire fence here to ensure that
619		// this load of the sequence number is not reordered before any loads
620		// within the read lock.
621	0	_Py_atomic_fence_acquire();
622
623	0	if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
624	0	return 1;
625	0	}
626
627	0	_Py_yield();
628	0	return 0;
629	0	}
630
631		int _PySeqLock_AfterFork(_PySeqLock *seqlock)
632	0	{
633		// Synchronize again and validate that the entry hasn't been updated
634		// while we were readying the values.
635	0	if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
636	0	seqlock->sequence = 0;
637	0	return 1;
638	0	}
639
640	0	return 0;
641	0	}
642
643		#undef PyMutex_Lock
644		void
645		PyMutex_Lock(PyMutex *m)
646	0	{
647	0	_PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH);
648	0	}
649
650		#undef PyMutex_Unlock
651		void
652		PyMutex_Unlock(PyMutex *m)
653	0	{
654	0	if (_PyMutex_TryUnlock(m) < 0) {
655	0	Py_FatalError("unlocking mutex that is not locked");
656	0	}
657	0	}
658
659
660		#undef PyMutex_IsLocked
661		int
662		PyMutex_IsLocked(PyMutex *m)
663	0	{
664	0	return _PyMutex_IsLocked(m);
665	0	}