/src/cpython/Python/lock.c

Source
// Lock implementation

#include "Python.h"

#include "pycore_lock.h"
#include "pycore_parking_lot.h"
#include "pycore_semaphore.h"
#include "pycore_time.h"          // _PyTime_Add()
#include "pycore_stats.h"         // FT_STAT_MUTEX_SLEEP_INC()

#ifdef MS_WINDOWS
#  ifndef WIN32_LEAN_AND_MEAN
#    define WIN32_LEAN_AND_MEAN
#  endif
#  include <windows.h>            // SwitchToThread()
#elif defined(HAVE_SCHED_H)
#  include <sched.h>              // sched_yield()
#endif

// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
// the unlocking thread directly hands off ownership of the lock. This avoids
// starvation.
static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000;

// Spin for a bit before parking the thread. This is only enabled for
// `--disable-gil` builds because it is unlikely to be helpful if the GIL is
// enabled.
#if Py_GIL_DISABLED
static const int MAX_SPIN_COUNT = 40;
#else
static const int MAX_SPIN_COUNT = 0;
#endif

struct mutex_entry {
    // The time after which the unlocking thread should hand off lock ownership
    // directly to the waiting thread. Written by the waiting thread.
    PyTime_t time_to_be_fair;

    // Set to 1 if the lock was handed off. Written by the unlocking thread.
    int handed_off;
};

void
_Py_yield(void)
{
#ifdef MS_WINDOWS
    SwitchToThread();
#elif defined(HAVE_SCHED_H)
    sched_yield();
#endif
}

PyLockStatus
_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
{
    uint8_t v = _Py_atomic_load_uint8_relaxed(&m->_bits);
    if ((v & _Py_LOCKED) == 0) {
        if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) {
            return PY_LOCK_ACQUIRED;
        }
    }
    if (timeout == 0) {
        return PY_LOCK_FAILURE;
    }

    FT_STAT_MUTEX_SLEEP_INC();

    PyTime_t now;
    // silently ignore error: cannot report error to the caller
    (void)PyTime_MonotonicRaw(&now);
    PyTime_t endtime = 0;
    if (timeout > 0) {
        endtime = _PyTime_Add(now, timeout);
    }

    struct mutex_entry entry = {
        .time_to_be_fair = now + TIME_TO_BE_FAIR_NS,
        .handed_off = 0,
    };

    Py_ssize_t spin_count = 0;
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // The lock is unlocked. Try to grab it.
            if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v|_Py_LOCKED)) {
                return PY_LOCK_ACQUIRED;
            }
            continue;
        }

        if (!(v & _Py_HAS_PARKED) && spin_count < MAX_SPIN_COUNT) {
            // Spin for a bit.
            _Py_yield();
            spin_count++;
            continue;
        }

        if (timeout == 0) {
            return PY_LOCK_FAILURE;
        }
        if ((flags & _PY_LOCK_PYTHONLOCK) && Py_IsFinalizing()) {
            // At this phase of runtime shutdown, only the finalization thread
            // can have attached thread state; others hang if they try
            // attaching. And since operations on this lock requires attached
            // thread state (_PY_LOCK_PYTHONLOCK), the finalization thread is
            // running this code, and no other thread can unlock.
            // Raise rather than hang. (_PY_LOCK_PYTHONLOCK allows raising
            // exceptons.)
            PyErr_SetString(PyExc_PythonFinalizationError,
                            "cannot acquire lock at interpreter finalization");
            return PY_LOCK_FAILURE;
        }

        uint8_t newv = v;
        if (!(v & _Py_HAS_PARKED)) {
            // We are the first waiter. Set the _Py_HAS_PARKED flag.
            newv = v | _Py_HAS_PARKED;
            if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &v, newv)) {
                continue;
            }
        }

        int ret = _PyParkingLot_Park(&m->_bits, &newv, sizeof(newv), timeout,
                                     &entry, (flags & _PY_LOCK_DETACH) != 0);
        if (ret == Py_PARK_OK) {
            if (entry.handed_off) {
                // We own the lock now. thread.Lock allows other threads
                // to concurrently release the lock so we cannot assert that
                // it is locked if _PY_LOCK_PYTHONLOCK is set.
                assert(_Py_atomic_load_uint8_relaxed(&m->_bits) & _Py_LOCKED ||
                       (flags & _PY_LOCK_PYTHONLOCK) != 0);
                return PY_LOCK_ACQUIRED;
            }
        }
        else if (ret == Py_PARK_INTR && (flags & _PY_LOCK_HANDLE_SIGNALS)) {
            if (Py_MakePendingCalls() < 0) {
                return PY_LOCK_INTR;
            }
        }
        else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) {
            return PY_LOCK_INTR;
        }
        else if (ret == Py_PARK_TIMEOUT) {
            assert(timeout >= 0);
            return PY_LOCK_FAILURE;
        }

        if (timeout > 0) {
            timeout = _PyDeadline_Get(endtime);
            if (timeout <= 0) {
                // Avoid negative values because those mean block forever.
                timeout = 0;
            }
        }

        v = _Py_atomic_load_uint8_relaxed(&m->_bits);
    }
}

static void
mutex_unpark(void *arg, void *park_arg, int has_more_waiters)
{
    PyMutex *m = (PyMutex*)arg;
    struct mutex_entry *entry = (struct mutex_entry*)park_arg;
    uint8_t v = 0;
    if (entry) {
        PyTime_t now;
        // silently ignore error: cannot report error to the caller
        (void)PyTime_MonotonicRaw(&now);
        int should_be_fair = now > entry->time_to_be_fair;

        entry->handed_off = should_be_fair;
        if (should_be_fair) {
            v |= _Py_LOCKED;
        }
        if (has_more_waiters) {
            v |= _Py_HAS_PARKED;
        }
    }
    _Py_atomic_store_uint8(&m->_bits, v);
}

int
_PyMutex_TryUnlock(PyMutex *m)
{
    uint8_t v = _Py_atomic_load_uint8(&m->_bits);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // error: the mutex is not locked
            return -1;
        }
        else if ((v & _Py_HAS_PARKED)) {
            // wake up a single thread
            _PyParkingLot_Unpark(&m->_bits, mutex_unpark, m);
            return 0;
        }
        else if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, _Py_UNLOCKED)) {
            // fast-path: no waiters
            return 0;
        }
    }
}

// _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each
// thread waiting on the mutex, directly in the mutex itself.
struct raw_mutex_entry {
    struct raw_mutex_entry *next;
    _PySemaphore sema;
};

void
_PyRawMutex_LockSlow(_PyRawMutex *m)
{
    struct raw_mutex_entry waiter;
    _PySemaphore_Init(&waiter.sema);

    uintptr_t v = _Py_atomic_load_uintptr(&m->v);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            // Unlocked: try to grab it (even if it has a waiter).
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, v|_Py_LOCKED)) {
                break;
            }
            continue;
        }

        // Locked: try to add ourselves as a waiter.
        waiter.next = (struct raw_mutex_entry *)(v & ~1);
        uintptr_t desired = ((uintptr_t)&waiter)|_Py_LOCKED;
        if (!_Py_atomic_compare_exchange_uintptr(&m->v, &v, desired)) {
            continue;
        }

        // Wait for us to be woken up. Note that we still have to lock the
        // mutex ourselves: it is NOT handed off to us.
        _PySemaphore_Wait(&waiter.sema, -1);
    }

    _PySemaphore_Destroy(&waiter.sema);
}

void
_PyRawMutex_UnlockSlow(_PyRawMutex *m)
{
    uintptr_t v = _Py_atomic_load_uintptr(&m->v);
    for (;;) {
        if ((v & _Py_LOCKED) == 0) {
            Py_FatalError("unlocking mutex that is not locked");
        }

        struct raw_mutex_entry *waiter = (struct raw_mutex_entry *)(v & ~1);
        if (waiter) {
            uintptr_t next_waiter = (uintptr_t)waiter->next;
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, next_waiter)) {
                _PySemaphore_Wakeup(&waiter->sema);
                return;
            }
        }
        else {
            if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, _Py_UNLOCKED)) {
                return;
            }
        }
    }
}

int
_PyEvent_IsSet(PyEvent *evt)
{
    uint8_t v = _Py_atomic_load_uint8(&evt->v);
    return v == _Py_LOCKED;
}

void
_PyEvent_Notify(PyEvent *evt)
{
    uintptr_t v = _Py_atomic_exchange_uint8(&evt->v, _Py_LOCKED);
    if (v == _Py_UNLOCKED) {
        // no waiters
        return;
    }
    else if (v == _Py_LOCKED) {
        // event already set
        return;
    }
    else {
        assert(v == _Py_HAS_PARKED);
        _PyParkingLot_UnparkAll(&evt->v);
    }
}

void
PyEvent_Wait(PyEvent *evt)
{
    while (!PyEvent_WaitTimed(evt, -1, /*detach=*/1))
        ;
}

int
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
{
    for (;;) {
        uint8_t v = _Py_atomic_load_uint8(&evt->v);
        if (v == _Py_LOCKED) {
            // event already set
            return 1;
        }
        if (v == _Py_UNLOCKED) {
            if (!_Py_atomic_compare_exchange_uint8(&evt->v, &v, _Py_HAS_PARKED)) {
                continue;
            }
        }

        uint8_t expected = _Py_HAS_PARKED;
        (void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
                                  timeout_ns, NULL, detach);

        return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
    }
}

static int
unlock_once(_PyOnceFlag *o, int res)
{
    // On success (res=0), we set the state to _Py_ONCE_INITIALIZED.
    // On failure (res=-1), we reset the state to _Py_UNLOCKED.
    uint8_t new_value;
    switch (res) {
        case -1: new_value = _Py_UNLOCKED; break;
        case  0: new_value = _Py_ONCE_INITIALIZED; break;
        default: {
            Py_FatalError("invalid result from _PyOnceFlag_CallOnce");
            Py_UNREACHABLE();
            break;
        }
    }

    uint8_t old_value = _Py_atomic_exchange_uint8(&o->v, new_value);
    if ((old_value & _Py_HAS_PARKED) != 0) {
        // wake up anyone waiting on the once flag
        _PyParkingLot_UnparkAll(&o->v);
    }
    return res;
}

int
_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg)
{
    uint8_t v = _Py_atomic_load_uint8(&flag->v);
    for (;;) {
        if (v == _Py_UNLOCKED) {
            if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, _Py_LOCKED)) {
                continue;
            }
            int res = fn(arg);
            return unlock_once(flag, res);
        }

        if (v == _Py_ONCE_INITIALIZED) {
            return 0;
        }

        // The once flag is initializing (locked).
        assert((v & _Py_LOCKED));
        if (!(v & _Py_HAS_PARKED)) {
            // We are the first waiter. Set the _Py_HAS_PARKED flag.
            uint8_t new_value = v | _Py_HAS_PARKED;
            if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, new_value)) {
                continue;
            }
            v = new_value;
        }

        // Wait for initialization to finish.
        _PyParkingLot_Park(&flag->v, &v, sizeof(v), -1, NULL, 1);
        v = _Py_atomic_load_uint8(&flag->v);
    }
}

static int
recursive_mutex_is_owned_by(_PyRecursiveMutex *m, PyThread_ident_t tid)
{
    return _Py_atomic_load_ullong_relaxed(&m->thread) == tid;
}

int
_PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m)
{
    return recursive_mutex_is_owned_by(m, PyThread_get_thread_ident_ex());
}

void
_PyRecursiveMutex_Lock(_PyRecursiveMutex *m)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (recursive_mutex_is_owned_by(m, thread)) {
        m->level++;
        return;
    }
    PyMutex_Lock(&m->mutex);
    _Py_atomic_store_ullong_relaxed(&m->thread, thread);
    assert(m->level == 0);
}

PyLockStatus
_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (recursive_mutex_is_owned_by(m, thread)) {
        m->level++;
        return PY_LOCK_ACQUIRED;
    }
    PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags);
    if (s == PY_LOCK_ACQUIRED) {
        _Py_atomic_store_ullong_relaxed(&m->thread, thread);
        assert(m->level == 0);
    }
    return s;
}

void
_PyRecursiveMutex_Unlock(_PyRecursiveMutex *m)
{
    if (_PyRecursiveMutex_TryUnlock(m) < 0) {
        Py_FatalError("unlocking a recursive mutex that is not "
                      "owned by the current thread");
    }
}

int
_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m)
{
    PyThread_ident_t thread = PyThread_get_thread_ident_ex();
    if (!recursive_mutex_is_owned_by(m, thread)) {
        return -1;
    }
    if (m->level > 0) {
        m->level--;
        return 0;
    }
    assert(m->level == 0);
    _Py_atomic_store_ullong_relaxed(&m->thread, 0);
    PyMutex_Unlock(&m->mutex);
    return 0;
}

#define _Py_WRITE_LOCKED 1
#define _PyRWMutex_READER_SHIFT 2
#define _Py_RWMUTEX_MAX_READERS (UINTPTR_MAX >> _PyRWMutex_READER_SHIFT)

static uintptr_t
rwmutex_set_parked_and_wait(_PyRWMutex *rwmutex, uintptr_t bits)
{
    // Set _Py_HAS_PARKED and wait until we are woken up.
    if ((bits & _Py_HAS_PARKED) == 0) {
        uintptr_t newval = bits | _Py_HAS_PARKED;
        if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                 &bits, newval)) {
            return bits;
        }
        bits = newval;
    }

    _PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1);
    return _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
}

// The number of readers holding the lock
static uintptr_t
rwmutex_reader_count(uintptr_t bits)
{
    return bits >> _PyRWMutex_READER_SHIFT;
}

void
_PyRWMutex_RLock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
    for (;;) {
        if ((bits & _Py_WRITE_LOCKED)) {
            // A writer already holds the lock.
            bits = rwmutex_set_parked_and_wait(rwmutex, bits);
            continue;
        }
        else if ((bits & _Py_HAS_PARKED)) {
            // Reader(s) hold the lock (or just gave up the lock), but there is
            // at least one waiting writer. We can't grab the lock because we
            // don't want to starve the writer. Instead, we park ourselves and
            // wait for the writer to eventually wake us up.
            bits = rwmutex_set_parked_and_wait(rwmutex, bits);
            continue;
        }
        else {
            // The lock is unlocked or read-locked. Try to grab it.
            assert(rwmutex_reader_count(bits) < _Py_RWMUTEX_MAX_READERS);
            uintptr_t newval = bits + (1 << _PyRWMutex_READER_SHIFT);
            if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                     &bits, newval)) {
                continue;
            }
            return;
        }
    }
}

void
_PyRWMutex_RUnlock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_add_uintptr(&rwmutex->bits, -(1 << _PyRWMutex_READER_SHIFT));
    assert(rwmutex_reader_count(bits) > 0 && "lock was not read-locked");
    bits -= (1 << _PyRWMutex_READER_SHIFT);

    if (rwmutex_reader_count(bits) == 0 && (bits & _Py_HAS_PARKED)) {
        _PyParkingLot_UnparkAll(&rwmutex->bits);
        return;
    }
}

void
_PyRWMutex_Lock(_PyRWMutex *rwmutex)
{
    uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
    for (;;) {
        // If there are no active readers and it's not already write-locked,
        // then we can grab the lock.
        if ((bits & ~_Py_HAS_PARKED) == 0) {
            if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
                                                     &bits,
                                                     bits | _Py_WRITE_LOCKED)) {
                continue;
            }
            return;
        }

        // Otherwise, we have to wait.
        bits = rwmutex_set_parked_and_wait(rwmutex, bits);
    }
}

void
_PyRWMutex_Unlock(_PyRWMutex *rwmutex)
{
    uintptr_t old_bits = _Py_atomic_exchange_uintptr(&rwmutex->bits, 0);

    assert((old_bits & _Py_WRITE_LOCKED) && "lock was not write-locked");
    assert(rwmutex_reader_count(old_bits) == 0 && "lock was read-locked");

    if ((old_bits & _Py_HAS_PARKED) != 0) {
        _PyParkingLot_UnparkAll(&rwmutex->bits);
    }
}

#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01)

void _PySeqLock_LockWrite(_PySeqLock *seqlock)
{
    // lock by moving to an odd sequence number
    uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
    while (1) {
        if (SEQLOCK_IS_UPDATING(prev)) {
            // Someone else is currently updating the cache
            _Py_yield();
            prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
        }
        else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
            // We've locked the cache
            _Py_atomic_fence_release();
            break;
        }
        else {
            _Py_yield();
        }
    }
}

void _PySeqLock_AbandonWrite(_PySeqLock *seqlock)
{
    uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) - 1;
    assert(!SEQLOCK_IS_UPDATING(new_seq));
    _Py_atomic_store_uint32(&seqlock->sequence, new_seq);
}

void _PySeqLock_UnlockWrite(_PySeqLock *seqlock)
{
    uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) + 1;
    assert(!SEQLOCK_IS_UPDATING(new_seq));
    _Py_atomic_store_uint32(&seqlock->sequence, new_seq);
}

uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
{
    uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
    while (SEQLOCK_IS_UPDATING(sequence)) {
        _Py_yield();
        sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
    }

    return sequence;
}

int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
{
    // gh-121368: We need an explicit acquire fence here to ensure that
    // this load of the sequence number is not reordered before any loads
    // within the read lock.
    _Py_atomic_fence_acquire();

    if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
        return 1;
    }

    _Py_yield();
    return 0;
}

int _PySeqLock_AfterFork(_PySeqLock *seqlock)
{
    // Synchronize again and validate that the entry hasn't been updated
    // while we were readying the values.
    if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
        seqlock->sequence = 0;
        return 1;
    }

    return 0;
}

#undef PyMutex_Lock
void
PyMutex_Lock(PyMutex *m)
{
    _PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH);
}

#undef PyMutex_Unlock
void
PyMutex_Unlock(PyMutex *m)
{
    if (_PyMutex_TryUnlock(m) < 0) {
        Py_FatalError("unlocking mutex that is not locked");
    }
}


#undef PyMutex_IsLocked
int
PyMutex_IsLocked(PyMutex *m)
{
    return _PyMutex_IsLocked(m);
}

Coverage Report

Created: 2026-02-26 06:53

Line	Count	Source
1		// Lock implementation
2
3		#include "Python.h"
4
5		#include "pycore_lock.h"
6		#include "pycore_parking_lot.h"
7		#include "pycore_semaphore.h"
8		#include "pycore_time.h" // _PyTime_Add()
9		#include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC()
10
11		#ifdef MS_WINDOWS
12		# ifndef WIN32_LEAN_AND_MEAN
13		# define WIN32_LEAN_AND_MEAN
14		# endif
15		# include <windows.h> // SwitchToThread()
16		#elif defined(HAVE_SCHED_H)
17		# include <sched.h> // sched_yield()
18		#endif
19
20		// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
21		// the unlocking thread directly hands off ownership of the lock. This avoids
22		// starvation.
23		static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000;
24
25		// Spin for a bit before parking the thread. This is only enabled for
26		// `--disable-gil` builds because it is unlikely to be helpful if the GIL is
27		// enabled.
28		#if Py_GIL_DISABLED
29		static const int MAX_SPIN_COUNT = 40;
30		#else
31		static const int MAX_SPIN_COUNT = 0;
32		#endif
33
34		struct mutex_entry {
35		// The time after which the unlocking thread should hand off lock ownership
36		// directly to the waiting thread. Written by the waiting thread.
37		PyTime_t time_to_be_fair;
38
39		// Set to 1 if the lock was handed off. Written by the unlocking thread.
40		int handed_off;
41		};
42
43		void
44		_Py_yield(void)
45	0	{
46		#ifdef MS_WINDOWS
47		SwitchToThread();
48		#elif defined(HAVE_SCHED_H)
49		sched_yield();
50	0	#endif
51	0	}
52
53		PyLockStatus
54		_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
55	900k	{
56	900k	uint8_t v = _Py_atomic_load_uint8_relaxed(&m->_bits);
57	900k	if ((v & _Py_LOCKED) == 0) {
58	900k	if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v\|_Py_LOCKED)) {
59	900k	return PY_LOCK_ACQUIRED;
60	900k	}
61	900k	}
62	4	if (timeout == 0) {
63	4	return PY_LOCK_FAILURE;
64	4	}
65
66	0	FT_STAT_MUTEX_SLEEP_INC();
67
68	0	PyTime_t now;
69		// silently ignore error: cannot report error to the caller
70	0	(void)PyTime_MonotonicRaw(&now);
71	0	PyTime_t endtime = 0;
72	0	if (timeout > 0) {
73	0	endtime = _PyTime_Add(now, timeout);
74	0	}
75
76	0	struct mutex_entry entry = {
77	0	.time_to_be_fair = now + TIME_TO_BE_FAIR_NS,
78	0	.handed_off = 0,
79	0	};
80
81	0	Py_ssize_t spin_count = 0;
82	0	for (;;) {
83	0	if ((v & _Py_LOCKED) == 0) {
84		// The lock is unlocked. Try to grab it.
85	0	if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, v\|_Py_LOCKED)) {
86	0	return PY_LOCK_ACQUIRED;
87	0	}
88	0	continue;
89	0	}
90
91	0	if (!(v & _Py_HAS_PARKED) && spin_count < MAX_SPIN_COUNT) {
92		// Spin for a bit.
93	0	_Py_yield();
94	0	spin_count++;
95	0	continue;
96	0	}
97
98	0	if (timeout == 0) {
99	0	return PY_LOCK_FAILURE;
100	0	}
101	0	if ((flags & _PY_LOCK_PYTHONLOCK) && Py_IsFinalizing()) {
102		// At this phase of runtime shutdown, only the finalization thread
103		// can have attached thread state; others hang if they try
104		// attaching. And since operations on this lock requires attached
105		// thread state (_PY_LOCK_PYTHONLOCK), the finalization thread is
106		// running this code, and no other thread can unlock.
107		// Raise rather than hang. (_PY_LOCK_PYTHONLOCK allows raising
108		// exceptons.)
109	0	PyErr_SetString(PyExc_PythonFinalizationError,
110	0	"cannot acquire lock at interpreter finalization");
111	0	return PY_LOCK_FAILURE;
112	0	}
113
114	0	uint8_t newv = v;
115	0	if (!(v & _Py_HAS_PARKED)) {
116		// We are the first waiter. Set the _Py_HAS_PARKED flag.
117	0	newv = v \| _Py_HAS_PARKED;
118	0	if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &v, newv)) {
119	0	continue;
120	0	}
121	0	}
122
123	0	int ret = _PyParkingLot_Park(&m->_bits, &newv, sizeof(newv), timeout,
124	0	&entry, (flags & _PY_LOCK_DETACH) != 0);
125	0	if (ret == Py_PARK_OK) {
126	0	if (entry.handed_off) {
127		// We own the lock now. thread.Lock allows other threads
128		// to concurrently release the lock so we cannot assert that
129		// it is locked if _PY_LOCK_PYTHONLOCK is set.
130	0	assert(_Py_atomic_load_uint8_relaxed(&m->_bits) & _Py_LOCKED \|\|
131	0	(flags & _PY_LOCK_PYTHONLOCK) != 0);
132	0	return PY_LOCK_ACQUIRED;
133	0	}
134	0	}
135	0	else if (ret == Py_PARK_INTR && (flags & _PY_LOCK_HANDLE_SIGNALS)) {
136	0	if (Py_MakePendingCalls() < 0) {
137	0	return PY_LOCK_INTR;
138	0	}
139	0	}
140	0	else if (ret == Py_PARK_INTR && (flags & _PY_FAIL_IF_INTERRUPTED)) {
141	0	return PY_LOCK_INTR;
142	0	}
143	0	else if (ret == Py_PARK_TIMEOUT) {
144	0	assert(timeout >= 0);
145	0	return PY_LOCK_FAILURE;
146	0	}
147
148	0	if (timeout > 0) {
149	0	timeout = _PyDeadline_Get(endtime);
150	0	if (timeout <= 0) {
151		// Avoid negative values because those mean block forever.
152	0	timeout = 0;
153	0	}
154	0	}
155
156	0	v = _Py_atomic_load_uint8_relaxed(&m->_bits);
157	0	}
158	0	}
159
160		static void
161		mutex_unpark(void arg, void park_arg, int has_more_waiters)
162	0	{
163	0	PyMutex m = (PyMutex)arg;
164	0	struct mutex_entry entry = (struct mutex_entry)park_arg;
165	0	uint8_t v = 0;
166	0	if (entry) {
167	0	PyTime_t now;
168		// silently ignore error: cannot report error to the caller
169	0	(void)PyTime_MonotonicRaw(&now);
170	0	int should_be_fair = now > entry->time_to_be_fair;
171
172	0	entry->handed_off = should_be_fair;
173	0	if (should_be_fair) {
174	0	v \|= _Py_LOCKED;
175	0	}
176	0	if (has_more_waiters) {
177	0	v \|= _Py_HAS_PARKED;
178	0	}
179	0	}
180	0	_Py_atomic_store_uint8(&m->_bits, v);
181	0	}
182
183		int
184		_PyMutex_TryUnlock(PyMutex *m)
185	102	{
186	102	uint8_t v = _Py_atomic_load_uint8(&m->_bits);
187	102	for (;;) {
188	102	if ((v & _Py_LOCKED) == 0) {
189		// error: the mutex is not locked
190	0	return -1;
191	0	}
192	102	else if ((v & _Py_HAS_PARKED)) {
193		// wake up a single thread
194	0	_PyParkingLot_Unpark(&m->_bits, mutex_unpark, m);
195	0	return 0;
196	0	}
197	102	else if (_Py_atomic_compare_exchange_uint8(&m->_bits, &v, _Py_UNLOCKED)) {
198		// fast-path: no waiters
199	102	return 0;
200	102	}
201	102	}
202	102	}
203
204		// _PyRawMutex stores a linked list of `struct raw_mutex_entry`, one for each
205		// thread waiting on the mutex, directly in the mutex itself.
206		struct raw_mutex_entry {
207		struct raw_mutex_entry *next;
208		_PySemaphore sema;
209		};
210
211		void
212		_PyRawMutex_LockSlow(_PyRawMutex *m)
213	0	{
214	0	struct raw_mutex_entry waiter;
215	0	_PySemaphore_Init(&waiter.sema);
216
217	0	uintptr_t v = _Py_atomic_load_uintptr(&m->v);
218	0	for (;;) {
219	0	if ((v & _Py_LOCKED) == 0) {
220		// Unlocked: try to grab it (even if it has a waiter).
221	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, v\|_Py_LOCKED)) {
222	0	break;
223	0	}
224	0	continue;
225	0	}
226
227		// Locked: try to add ourselves as a waiter.
228	0	waiter.next = (struct raw_mutex_entry *)(v & ~1);
229	0	uintptr_t desired = ((uintptr_t)&waiter)\|_Py_LOCKED;
230	0	if (!_Py_atomic_compare_exchange_uintptr(&m->v, &v, desired)) {
231	0	continue;
232	0	}
233
234		// Wait for us to be woken up. Note that we still have to lock the
235		// mutex ourselves: it is NOT handed off to us.
236	0	_PySemaphore_Wait(&waiter.sema, -1);
237	0	}
238
239	0	_PySemaphore_Destroy(&waiter.sema);
240	0	}
241
242		void
243		_PyRawMutex_UnlockSlow(_PyRawMutex *m)
244	0	{
245	0	uintptr_t v = _Py_atomic_load_uintptr(&m->v);
246	0	for (;;) {
247	0	if ((v & _Py_LOCKED) == 0) {
248	0	Py_FatalError("unlocking mutex that is not locked");
249	0	}
250
251	0	struct raw_mutex_entry waiter = (struct raw_mutex_entry )(v & ~1);
252	0	if (waiter) {
253	0	uintptr_t next_waiter = (uintptr_t)waiter->next;
254	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, next_waiter)) {
255	0	_PySemaphore_Wakeup(&waiter->sema);
256	0	return;
257	0	}
258	0	}
259	0	else {
260	0	if (_Py_atomic_compare_exchange_uintptr(&m->v, &v, _Py_UNLOCKED)) {
261	0	return;
262	0	}
263	0	}
264	0	}
265	0	}
266
267		int
268		_PyEvent_IsSet(PyEvent *evt)
269	0	{
270	0	uint8_t v = _Py_atomic_load_uint8(&evt->v);
271	0	return v == _Py_LOCKED;
272	0	}
273
274		void
275		_PyEvent_Notify(PyEvent *evt)
276	0	{
277	0	uintptr_t v = _Py_atomic_exchange_uint8(&evt->v, _Py_LOCKED);
278	0	if (v == _Py_UNLOCKED) {
279		// no waiters
280	0	return;
281	0	}
282	0	else if (v == _Py_LOCKED) {
283		// event already set
284	0	return;
285	0	}
286	0	else {
287	0	assert(v == _Py_HAS_PARKED);
288	0	_PyParkingLot_UnparkAll(&evt->v);
289	0	}
290	0	}
291
292		void
293		PyEvent_Wait(PyEvent *evt)
294	0	{
295	0	while (!PyEvent_WaitTimed(evt, -1, /detach=/1))
296	0	;
297	0	}
298
299		int
300		PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
301	0	{
302	0	for (;;) {
303	0	uint8_t v = _Py_atomic_load_uint8(&evt->v);
304	0	if (v == _Py_LOCKED) {
305		// event already set
306	0	return 1;
307	0	}
308	0	if (v == _Py_UNLOCKED) {
309	0	if (!_Py_atomic_compare_exchange_uint8(&evt->v, &v, _Py_HAS_PARKED)) {
310	0	continue;
311	0	}
312	0	}
313
314	0	uint8_t expected = _Py_HAS_PARKED;
315	0	(void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
316	0	timeout_ns, NULL, detach);
317
318	0	return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
319	0	}
320	0	}
321
322		static int
323		unlock_once(_PyOnceFlag *o, int res)
324	213	{
325		// On success (res=0), we set the state to _Py_ONCE_INITIALIZED.
326		// On failure (res=-1), we reset the state to _Py_UNLOCKED.
327	213	uint8_t new_value;
328	213	switch (res) {
329	0	case -1: new_value = _Py_UNLOCKED; break;
330	213	case 0: new_value = _Py_ONCE_INITIALIZED; break;
331	0	default: {
332	0	Py_FatalError("invalid result from _PyOnceFlag_CallOnce");
333	0	Py_UNREACHABLE();
334	0	break;
335	0	}
336	213	}
337
338	213	uint8_t old_value = _Py_atomic_exchange_uint8(&o->v, new_value);
339	213	if ((old_value & _Py_HAS_PARKED) != 0) {
340		// wake up anyone waiting on the once flag
341	0	_PyParkingLot_UnparkAll(&o->v);
342	0	}
343	213	return res;
344	213	}
345
346		int
347		_PyOnceFlag_CallOnceSlow(_PyOnceFlag flag, _Py_once_fn_t fn, void *arg)
348	213	{
349	213	uint8_t v = _Py_atomic_load_uint8(&flag->v);
350	213	for (;;) {
351	213	if (v == _Py_UNLOCKED) {
352	213	if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, _Py_LOCKED)) {
353	0	continue;
354	0	}
355	213	int res = fn(arg);
356	213	return unlock_once(flag, res);
357	213	}
358
359	0	if (v == _Py_ONCE_INITIALIZED) {
360	0	return 0;
361	0	}
362
363		// The once flag is initializing (locked).
364	0	assert((v & _Py_LOCKED));
365	0	if (!(v & _Py_HAS_PARKED)) {
366		// We are the first waiter. Set the _Py_HAS_PARKED flag.
367	0	uint8_t new_value = v \| _Py_HAS_PARKED;
368	0	if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, new_value)) {
369	0	continue;
370	0	}
371	0	v = new_value;
372	0	}
373
374		// Wait for initialization to finish.
375	0	_PyParkingLot_Park(&flag->v, &v, sizeof(v), -1, NULL, 1);
376	0	v = _Py_atomic_load_uint8(&flag->v);
377	0	}
378	213	}
379
380		static int
381		recursive_mutex_is_owned_by(_PyRecursiveMutex *m, PyThread_ident_t tid)
382	3.40M	{
383	3.40M	return _Py_atomic_load_ullong_relaxed(&m->thread) == tid;
384	3.40M	}
385
386		int
387		_PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m)
388	42.9k	{
389	42.9k	return recursive_mutex_is_owned_by(m, PyThread_get_thread_ident_ex());
390	42.9k	}
391
392		void
393		_PyRecursiveMutex_Lock(_PyRecursiveMutex *m)
394	1.51M	{
395	1.51M	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
396	1.51M	if (recursive_mutex_is_owned_by(m, thread)) {
397	0	m->level++;
398	0	return;
399	0	}
400	1.51M	PyMutex_Lock(&m->mutex);
401	1.51M	_Py_atomic_store_ullong_relaxed(&m->thread, thread);
402	1.51M	assert(m->level == 0);
403	1.51M	}
404
405		PyLockStatus
406		_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags)
407	168k	{
408	168k	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
409	168k	if (recursive_mutex_is_owned_by(m, thread)) {
410	4	m->level++;
411	4	return PY_LOCK_ACQUIRED;
412	4	}
413	168k	PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags);
414	168k	if (s == PY_LOCK_ACQUIRED) {
415	168k	_Py_atomic_store_ullong_relaxed(&m->thread, thread);
416	168k	assert(m->level == 0);
417	168k	}
418	168k	return s;
419	168k	}
420
421		void
422		_PyRecursiveMutex_Unlock(_PyRecursiveMutex *m)
423	42.9k	{
424	42.9k	if (_PyRecursiveMutex_TryUnlock(m) < 0) {
425	0	Py_FatalError("unlocking a recursive mutex that is not "
426	0	"owned by the current thread");
427	0	}
428	42.9k	}
429
430		int
431		_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m)
432	1.68M	{
433	1.68M	PyThread_ident_t thread = PyThread_get_thread_ident_ex();
434	1.68M	if (!recursive_mutex_is_owned_by(m, thread)) {
435	0	return -1;
436	0	}
437	1.68M	if (m->level > 0) {
438	4	m->level--;
439	4	return 0;
440	4	}
441	1.68M	assert(m->level == 0);
442	1.68M	_Py_atomic_store_ullong_relaxed(&m->thread, 0);
443	1.68M	PyMutex_Unlock(&m->mutex);
444	1.68M	return 0;
445	1.68M	}
446
447	0	#define _Py_WRITE_LOCKED 1
448	0	#define _PyRWMutex_READER_SHIFT 2
449		#define _Py_RWMUTEX_MAX_READERS (UINTPTR_MAX >> _PyRWMutex_READER_SHIFT)
450
451		static uintptr_t
452		rwmutex_set_parked_and_wait(_PyRWMutex *rwmutex, uintptr_t bits)
453	0	{
454		// Set _Py_HAS_PARKED and wait until we are woken up.
455	0	if ((bits & _Py_HAS_PARKED) == 0) {
456	0	uintptr_t newval = bits \| _Py_HAS_PARKED;
457	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
458	0	&bits, newval)) {
459	0	return bits;
460	0	}
461	0	bits = newval;
462	0	}
463
464	0	_PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1);
465	0	return _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
466	0	}
467
468		// The number of readers holding the lock
469		static uintptr_t
470		rwmutex_reader_count(uintptr_t bits)
471	0	{
472	0	return bits >> _PyRWMutex_READER_SHIFT;
473	0	}
474
475		void
476		_PyRWMutex_RLock(_PyRWMutex *rwmutex)
477	0	{
478	0	uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
479	0	for (;;) {
480	0	if ((bits & _Py_WRITE_LOCKED)) {
481		// A writer already holds the lock.
482	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
483	0	continue;
484	0	}
485	0	else if ((bits & _Py_HAS_PARKED)) {
486		// Reader(s) hold the lock (or just gave up the lock), but there is
487		// at least one waiting writer. We can't grab the lock because we
488		// don't want to starve the writer. Instead, we park ourselves and
489		// wait for the writer to eventually wake us up.
490	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
491	0	continue;
492	0	}
493	0	else {
494		// The lock is unlocked or read-locked. Try to grab it.
495	0	assert(rwmutex_reader_count(bits) < _Py_RWMUTEX_MAX_READERS);
496	0	uintptr_t newval = bits + (1 << _PyRWMutex_READER_SHIFT);
497	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
498	0	&bits, newval)) {
499	0	continue;
500	0	}
501	0	return;
502	0	}
503	0	}
504	0	}
505
506		void
507		_PyRWMutex_RUnlock(_PyRWMutex *rwmutex)
508	0	{
509	0	uintptr_t bits = _Py_atomic_add_uintptr(&rwmutex->bits, -(1 << _PyRWMutex_READER_SHIFT));
510	0	assert(rwmutex_reader_count(bits) > 0 && "lock was not read-locked");
511	0	bits -= (1 << _PyRWMutex_READER_SHIFT);
512
513	0	if (rwmutex_reader_count(bits) == 0 && (bits & _Py_HAS_PARKED)) {
514	0	_PyParkingLot_UnparkAll(&rwmutex->bits);
515	0	return;
516	0	}
517	0	}
518
519		void
520		_PyRWMutex_Lock(_PyRWMutex *rwmutex)
521	0	{
522	0	uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits);
523	0	for (;;) {
524		// If there are no active readers and it's not already write-locked,
525		// then we can grab the lock.
526	0	if ((bits & ~_Py_HAS_PARKED) == 0) {
527	0	if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits,
528	0	&bits,
529	0	bits \| _Py_WRITE_LOCKED)) {
530	0	continue;
531	0	}
532	0	return;
533	0	}
534
535		// Otherwise, we have to wait.
536	0	bits = rwmutex_set_parked_and_wait(rwmutex, bits);
537	0	}
538	0	}
539
540		void
541		_PyRWMutex_Unlock(_PyRWMutex *rwmutex)
542	0	{
543	0	uintptr_t old_bits = _Py_atomic_exchange_uintptr(&rwmutex->bits, 0);
544
545	0	assert((old_bits & _Py_WRITE_LOCKED) && "lock was not write-locked");
546	0	assert(rwmutex_reader_count(old_bits) == 0 && "lock was read-locked");
547
548	0	if ((old_bits & _Py_HAS_PARKED) != 0) {
549	0	_PyParkingLot_UnparkAll(&rwmutex->bits);
550	0	}
551	0	}
552
553	0	#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01)
554
555		void _PySeqLock_LockWrite(_PySeqLock *seqlock)
556	0	{
557		// lock by moving to an odd sequence number
558	0	uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
559	0	while (1) {
560	0	if (SEQLOCK_IS_UPDATING(prev)) {
561		// Someone else is currently updating the cache
562	0	_Py_yield();
563	0	prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence);
564	0	}
565	0	else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
566		// We've locked the cache
567	0	_Py_atomic_fence_release();
568	0	break;
569	0	}
570	0	else {
571	0	_Py_yield();
572	0	}
573	0	}
574	0	}
575
576		void _PySeqLock_AbandonWrite(_PySeqLock *seqlock)
577	0	{
578	0	uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) - 1;
579	0	assert(!SEQLOCK_IS_UPDATING(new_seq));
580	0	_Py_atomic_store_uint32(&seqlock->sequence, new_seq);
581	0	}
582
583		void _PySeqLock_UnlockWrite(_PySeqLock *seqlock)
584	0	{
585	0	uint32_t new_seq = _Py_atomic_load_uint32_relaxed(&seqlock->sequence) + 1;
586	0	assert(!SEQLOCK_IS_UPDATING(new_seq));
587	0	_Py_atomic_store_uint32(&seqlock->sequence, new_seq);
588	0	}
589
590		uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
591	0	{
592	0	uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
593	0	while (SEQLOCK_IS_UPDATING(sequence)) {
594	0	_Py_yield();
595	0	sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence);
596	0	}
597
598	0	return sequence;
599	0	}
600
601		int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
602	0	{
603		// gh-121368: We need an explicit acquire fence here to ensure that
604		// this load of the sequence number is not reordered before any loads
605		// within the read lock.
606	0	_Py_atomic_fence_acquire();
607
608	0	if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
609	0	return 1;
610	0	}
611
612	0	_Py_yield();
613	0	return 0;
614	0	}
615
616		int _PySeqLock_AfterFork(_PySeqLock *seqlock)
617	0	{
618		// Synchronize again and validate that the entry hasn't been updated
619		// while we were readying the values.
620	0	if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
621	0	seqlock->sequence = 0;
622	0	return 1;
623	0	}
624
625	0	return 0;
626	0	}
627
628		#undef PyMutex_Lock
629		void
630		PyMutex_Lock(PyMutex *m)
631	0	{
632	0	_PyMutex_LockTimed(m, -1, _PY_LOCK_DETACH);
633	0	}
634
635		#undef PyMutex_Unlock
636		void
637		PyMutex_Unlock(PyMutex *m)
638	0	{
639	0	if (_PyMutex_TryUnlock(m) < 0) {
640	0	Py_FatalError("unlocking mutex that is not locked");
641	0	}
642	0	}
643
644
645		#undef PyMutex_IsLocked
646		int
647		PyMutex_IsLocked(PyMutex *m)
648	0	{
649	0	return _PyMutex_IsLocked(m);
650	0	}