/src/cpython/Python/qsbr.c

Source
/*
 * Implementation of safe memory reclamation scheme using
 * quiescent states.  See InternalDocs/qsbr.md.
 *
 * This is derived from the "GUS" safe memory reclamation technique
 * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
 * in this code are likely due to the modifications.
 *
 * The original copyright is preserved below.
 *
 * Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include "Python.h"
#include "pycore_interp.h"          // PyInterpreterState
#include "pycore_pystate.h"         // _PyThreadState_GET()
#include "pycore_qsbr.h"
#include "pycore_tstate.h"          // _PyThreadStateImpl
#include "pycore_stats.h"           // FT_STAT_QSBR_POLL_INC()


// Starting size of the array of qsbr thread states
#define MIN_ARRAY_SIZE 8

// Allocate a QSBR thread state from the freelist
static struct _qsbr_thread_state *
qsbr_allocate(struct _qsbr_shared *shared)
{
    struct _qsbr_thread_state *qsbr = shared->freelist;
    if (qsbr == NULL) {
        return NULL;
    }
    shared->freelist = qsbr->freelist_next;
    qsbr->freelist_next = NULL;
    qsbr->shared = shared;
    qsbr->allocated = true;
    return qsbr;
}

// Initialize (or reinitialize) the freelist of QSBR thread states
static void
initialize_new_array(struct _qsbr_shared *shared)
{
    for (Py_ssize_t i = 0; i != shared->size; i++) {
        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
        if (qsbr->tstate != NULL) {
            // Update the thread state pointer to its QSBR state
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate;
            tstate->qsbr = qsbr;
        }
        if (!qsbr->allocated) {
            // Push to freelist
            qsbr->freelist_next = shared->freelist;
            shared->freelist = qsbr;
        }
    }
}

// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.
static int
grow_thread_array(struct _qsbr_shared *shared)
{
    Py_ssize_t new_size = shared->size * 2;
    if (new_size < MIN_ARRAY_SIZE) {
        new_size = MIN_ARRAY_SIZE;
    }

    // Overallocate by 63 bytes so we can align to a 64-byte boundary.
    // This avoids potential false sharing between the first entry and other
    // allocations.
    size_t alignment = 64;
    size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1;
    void *raw = PyMem_RawCalloc(1, alloc_size);
    if (raw == NULL) {
        return -1;
    }
    struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment);

    void *old_raw = shared->array_raw;
    if (shared->array != NULL) {
        memcpy(array, shared->array, shared->size * sizeof(*array));
    }

    shared->array = array;
    shared->array_raw = raw;
    shared->size = new_size;
    shared->freelist = NULL;
    initialize_new_array(shared);

    PyMem_RawFree(old_raw);
    return 0;
}

uint64_t
_Py_qsbr_advance(struct _qsbr_shared *shared)
{
    // NOTE: with 64-bit sequence numbers, we don't have to worry too much
    // about the wr_seq getting too far ahead of rd_seq, but if we ever use
    // 32-bit sequence numbers, we'll need to be more careful.
    return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
}

uint64_t
_Py_qsbr_shared_next(struct _qsbr_shared *shared)
{
    return _Py_qsbr_shared_current(shared) + QSBR_INCR;
}

static uint64_t
qsbr_poll_scan(struct _qsbr_shared *shared)
{
    // Synchronize with store in _Py_qsbr_attach(). We need to ensure that
    // the reads from each thread's sequence number are not reordered to see
    // earlier "offline" states.
    _Py_atomic_fence_seq_cst();

    // Compute the minimum sequence number of all attached threads
    uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);
    struct _qsbr_pad *array = shared->array;
    for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {
        struct _qsbr_thread_state *qsbr = &array[i].qsbr;

        uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);
        if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {
            min_seq = seq;
        }
    }

    // Update the shared read sequence
    uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);
    if (QSBR_LT(rd_seq, min_seq)) {
        // It's okay if the compare-exchange failed: another thread updated it
        (void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);
        rd_seq = min_seq;
    }

    return rd_seq;
}

bool
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
{
    assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED);
    assert(((_PyThreadStateImpl *)_PyThreadState_GET())->qsbr == qsbr);

    if (_Py_qbsr_goal_reached(qsbr, goal)) {
        return true;
    }
    FT_STAT_QSBR_POLL_INC();
    uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
    return QSBR_LEQ(goal, rd_seq);
}

void
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)
{
    assert(qsbr->seq == 0 && "already attached");

    uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
    _Py_atomic_store_uint64(&qsbr->seq, seq);  // needs seq_cst
}

void
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)
{
    assert(qsbr->seq != 0 && "already detached");

    _Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE);
}

Py_ssize_t
_Py_qsbr_reserve(PyInterpreterState *interp)
{
    struct _qsbr_shared *shared = &interp->qsbr;

    PyMutex_Lock(&shared->mutex);
    // Try allocating from our internal freelist
    struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);

    // If there are no free entries, we pause all threads, grow the array,
    // and update the pointers in PyThreadState to entries in the new array.
    if (qsbr == NULL) {
        _PyEval_StopTheWorld(interp);
        if (grow_thread_array(shared) == 0) {
            qsbr = qsbr_allocate(shared);
        }
        _PyEval_StartTheWorld(interp);
    }

    // Return an index rather than the pointer because the array may be
    // resized and the pointer invalidated.
    Py_ssize_t index = -1;
    if (qsbr != NULL) {
        index = (struct _qsbr_pad *)qsbr - shared->array;
    }
    PyMutex_Unlock(&shared->mutex);
    return index;
}

void
_Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp,
                  Py_ssize_t index)
{
    // Associate the QSBR state with the thread state
    struct _qsbr_shared *shared = &interp->qsbr;

    PyMutex_Lock(&shared->mutex);
    struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;
    assert(qsbr->allocated && qsbr->tstate == NULL);
    qsbr->tstate = (PyThreadState *)tstate;
    tstate->qsbr = qsbr;
    PyMutex_Unlock(&shared->mutex);
}

void
_Py_qsbr_unregister(PyThreadState *tstate)
{
    struct _qsbr_shared *shared = &tstate->interp->qsbr;
    struct _PyThreadStateImpl *tstate_imp = (_PyThreadStateImpl*) tstate;

    // gh-119369: GIL must be released (if held) to prevent deadlocks, because
    // we might not have an active tstate, which means that blocking on PyMutex
    // locks will not implicitly release the GIL.
    assert(!tstate->holds_gil);

    PyMutex_Lock(&shared->mutex);
    // NOTE: we must load (or reload) the thread state's qbsr inside the mutex
    // because the array may have been resized (changing tstate->qsbr) while
    // we waited to acquire the mutex.
    struct _qsbr_thread_state *qsbr = tstate_imp->qsbr;

    assert(qsbr->seq == 0 && "thread state must be detached");
    assert(qsbr->allocated && qsbr->tstate == tstate);

    tstate_imp->qsbr = NULL;
    qsbr->tstate = NULL;
    qsbr->allocated = false;
    qsbr->freelist_next = shared->freelist;
    shared->freelist = qsbr;
    PyMutex_Unlock(&shared->mutex);
}

void
_Py_qsbr_fini(PyInterpreterState *interp)
{
    struct _qsbr_shared *shared = &interp->qsbr;
    PyMem_RawFree(shared->array_raw);
    shared->array = NULL;
    shared->array_raw = NULL;
    shared->size = 0;
    shared->freelist = NULL;
}

void
_Py_qsbr_after_fork(_PyThreadStateImpl *tstate)
{
    struct _qsbr_thread_state *this_qsbr = tstate->qsbr;
    struct _qsbr_shared *shared = this_qsbr->shared;

    _PyMutex_at_fork_reinit(&shared->mutex);

    for (Py_ssize_t i = 0; i != shared->size; i++) {
        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
        if (qsbr != this_qsbr && qsbr->allocated) {
            qsbr->tstate = NULL;
            qsbr->allocated = false;
            qsbr->freelist_next = shared->freelist;
            shared->freelist = qsbr;
        }
    }
}

Coverage Report

Created: 2026-02-26 06:53

Line	Count	Source
1		/*
2		* Implementation of safe memory reclamation scheme using
3		* quiescent states. See InternalDocs/qsbr.md.
4		*
5		* This is derived from the "GUS" safe memory reclamation technique
6		* in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
7		* in this code are likely due to the modifications.
8		*
9		* The original copyright is preserved below.
10		*
11		* Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
12		*
13		* Redistribution and use in source and binary forms, with or without
14		* modification, are permitted provided that the following conditions
15		* are met:
16		* 1. Redistributions of source code must retain the above copyright
17		* notice unmodified, this list of conditions, and the following
18		* disclaimer.
19		* 2. Redistributions in binary form must reproduce the above copyright
20		* notice, this list of conditions and the following disclaimer in the
21		* documentation and/or other materials provided with the distribution.
22		*
23		* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24		* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25		* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26		* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27		* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28		* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29		* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30		* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31		* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32		* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33		*/
34		#include "Python.h"
35		#include "pycore_interp.h" // PyInterpreterState
36		#include "pycore_pystate.h" // _PyThreadState_GET()
37		#include "pycore_qsbr.h"
38		#include "pycore_tstate.h" // _PyThreadStateImpl
39		#include "pycore_stats.h" // FT_STAT_QSBR_POLL_INC()
40
41
42		// Starting size of the array of qsbr thread states
43	0	#define MIN_ARRAY_SIZE 8
44
45		// Allocate a QSBR thread state from the freelist
46		static struct _qsbr_thread_state *
47		qsbr_allocate(struct _qsbr_shared *shared)
48	0	{
49	0	struct _qsbr_thread_state *qsbr = shared->freelist;
50	0	if (qsbr == NULL) {
51	0	return NULL;
52	0	}
53	0	shared->freelist = qsbr->freelist_next;
54	0	qsbr->freelist_next = NULL;
55	0	qsbr->shared = shared;
56	0	qsbr->allocated = true;
57	0	return qsbr;
58	0	}
59
60		// Initialize (or reinitialize) the freelist of QSBR thread states
61		static void
62		initialize_new_array(struct _qsbr_shared *shared)
63	0	{
64	0	for (Py_ssize_t i = 0; i != shared->size; i++) {
65	0	struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
66	0	if (qsbr->tstate != NULL) {
67		// Update the thread state pointer to its QSBR state
68	0	_PyThreadStateImpl tstate = (_PyThreadStateImpl )qsbr->tstate;
69	0	tstate->qsbr = qsbr;
70	0	}
71	0	if (!qsbr->allocated) {
72		// Push to freelist
73	0	qsbr->freelist_next = shared->freelist;
74	0	shared->freelist = qsbr;
75	0	}
76	0	}
77	0	}
78
79		// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.
80		static int
81		grow_thread_array(struct _qsbr_shared *shared)
82	0	{
83	0	Py_ssize_t new_size = shared->size * 2;
84	0	if (new_size < MIN_ARRAY_SIZE) {
85	0	new_size = MIN_ARRAY_SIZE;
86	0	}
87
88		// Overallocate by 63 bytes so we can align to a 64-byte boundary.
89		// This avoids potential false sharing between the first entry and other
90		// allocations.
91	0	size_t alignment = 64;
92	0	size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1;
93	0	void *raw = PyMem_RawCalloc(1, alloc_size);
94	0	if (raw == NULL) {
95	0	return -1;
96	0	}
97	0	struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment);
98
99	0	void *old_raw = shared->array_raw;
100	0	if (shared->array != NULL) {
101	0	memcpy(array, shared->array, shared->size * sizeof(*array));
102	0	}
103
104	0	shared->array = array;
105	0	shared->array_raw = raw;
106	0	shared->size = new_size;
107	0	shared->freelist = NULL;
108	0	initialize_new_array(shared);
109
110	0	PyMem_RawFree(old_raw);
111	0	return 0;
112	0	}
113
114		uint64_t
115		_Py_qsbr_advance(struct _qsbr_shared *shared)
116	0	{
117		// NOTE: with 64-bit sequence numbers, we don't have to worry too much
118		// about the wr_seq getting too far ahead of rd_seq, but if we ever use
119		// 32-bit sequence numbers, we'll need to be more careful.
120	0	return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
121	0	}
122
123		uint64_t
124		_Py_qsbr_shared_next(struct _qsbr_shared *shared)
125	0	{
126	0	return _Py_qsbr_shared_current(shared) + QSBR_INCR;
127	0	}
128
129		static uint64_t
130		qsbr_poll_scan(struct _qsbr_shared *shared)
131	0	{
132		// Synchronize with store in _Py_qsbr_attach(). We need to ensure that
133		// the reads from each thread's sequence number are not reordered to see
134		// earlier "offline" states.
135	0	_Py_atomic_fence_seq_cst();
136
137		// Compute the minimum sequence number of all attached threads
138	0	uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);
139	0	struct _qsbr_pad *array = shared->array;
140	0	for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {
141	0	struct _qsbr_thread_state *qsbr = &array[i].qsbr;
142
143	0	uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);
144	0	if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {
145	0	min_seq = seq;
146	0	}
147	0	}
148
149		// Update the shared read sequence
150	0	uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);
151	0	if (QSBR_LT(rd_seq, min_seq)) {
152		// It's okay if the compare-exchange failed: another thread updated it
153	0	(void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);
154	0	rd_seq = min_seq;
155	0	}
156
157	0	return rd_seq;
158	0	}
159
160		bool
161		_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
162	0	{
163	0	assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED);
164	0	assert(((_PyThreadStateImpl *)_PyThreadState_GET())->qsbr == qsbr);
165
166	0	if (_Py_qbsr_goal_reached(qsbr, goal)) {
167	0	return true;
168	0	}
169	0	FT_STAT_QSBR_POLL_INC();
170	0	uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
171	0	return QSBR_LEQ(goal, rd_seq);
172	0	}
173
174		void
175		_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)
176	0	{
177	0	assert(qsbr->seq == 0 && "already attached");
178
179	0	uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
180	0	_Py_atomic_store_uint64(&qsbr->seq, seq); // needs seq_cst
181	0	}
182
183		void
184		_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)
185	0	{
186	0	assert(qsbr->seq != 0 && "already detached");
187
188	0	_Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE);
189	0	}
190
191		Py_ssize_t
192		_Py_qsbr_reserve(PyInterpreterState *interp)
193	0	{
194	0	struct _qsbr_shared *shared = &interp->qsbr;
195
196	0	PyMutex_Lock(&shared->mutex);
197		// Try allocating from our internal freelist
198	0	struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);
199
200		// If there are no free entries, we pause all threads, grow the array,
201		// and update the pointers in PyThreadState to entries in the new array.
202	0	if (qsbr == NULL) {
203	0	_PyEval_StopTheWorld(interp);
204	0	if (grow_thread_array(shared) == 0) {
205	0	qsbr = qsbr_allocate(shared);
206	0	}
207	0	_PyEval_StartTheWorld(interp);
208	0	}
209
210		// Return an index rather than the pointer because the array may be
211		// resized and the pointer invalidated.
212	0	Py_ssize_t index = -1;
213	0	if (qsbr != NULL) {
214	0	index = (struct _qsbr_pad *)qsbr - shared->array;
215	0	}
216	0	PyMutex_Unlock(&shared->mutex);
217	0	return index;
218	0	}
219
220		void
221		_Py_qsbr_register(_PyThreadStateImpl tstate, PyInterpreterState interp,
222		Py_ssize_t index)
223	0	{
224		// Associate the QSBR state with the thread state
225	0	struct _qsbr_shared *shared = &interp->qsbr;
226
227	0	PyMutex_Lock(&shared->mutex);
228	0	struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;
229	0	assert(qsbr->allocated && qsbr->tstate == NULL);
230	0	qsbr->tstate = (PyThreadState *)tstate;
231	0	tstate->qsbr = qsbr;
232	0	PyMutex_Unlock(&shared->mutex);
233	0	}
234
235		void
236		_Py_qsbr_unregister(PyThreadState *tstate)
237	0	{
238	0	struct _qsbr_shared *shared = &tstate->interp->qsbr;
239	0	struct _PyThreadStateImpl tstate_imp = (_PyThreadStateImpl) tstate;
240
241		// gh-119369: GIL must be released (if held) to prevent deadlocks, because
242		// we might not have an active tstate, which means that blocking on PyMutex
243		// locks will not implicitly release the GIL.
244	0	assert(!tstate->holds_gil);
245
246	0	PyMutex_Lock(&shared->mutex);
247		// NOTE: we must load (or reload) the thread state's qbsr inside the mutex
248		// because the array may have been resized (changing tstate->qsbr) while
249		// we waited to acquire the mutex.
250	0	struct _qsbr_thread_state *qsbr = tstate_imp->qsbr;
251
252	0	assert(qsbr->seq == 0 && "thread state must be detached");
253	0	assert(qsbr->allocated && qsbr->tstate == tstate);
254
255	0	tstate_imp->qsbr = NULL;
256	0	qsbr->tstate = NULL;
257	0	qsbr->allocated = false;
258	0	qsbr->freelist_next = shared->freelist;
259	0	shared->freelist = qsbr;
260	0	PyMutex_Unlock(&shared->mutex);
261	0	}
262
263		void
264		_Py_qsbr_fini(PyInterpreterState *interp)
265	0	{
266	0	struct _qsbr_shared *shared = &interp->qsbr;
267	0	PyMem_RawFree(shared->array_raw);
268	0	shared->array = NULL;
269	0	shared->array_raw = NULL;
270	0	shared->size = 0;
271	0	shared->freelist = NULL;
272	0	}
273
274		void
275		_Py_qsbr_after_fork(_PyThreadStateImpl *tstate)
276	0	{
277	0	struct _qsbr_thread_state *this_qsbr = tstate->qsbr;
278	0	struct _qsbr_shared *shared = this_qsbr->shared;
279
280	0	_PyMutex_at_fork_reinit(&shared->mutex);
281
282	0	for (Py_ssize_t i = 0; i != shared->size; i++) {
283	0	struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
284	0	if (qsbr != this_qsbr && qsbr->allocated) {
285	0	qsbr->tstate = NULL;
286		qsbr->allocated = false;
287	0	qsbr->freelist_next = shared->freelist;
288	0	shared->freelist = qsbr;
289	0	}
290	0	}
291	0	}