/src/cpython/Python/qsbr.c
Line | Count | Source |
1 | | /* |
2 | | * Implementation of safe memory reclamation scheme using |
3 | | * quiescent states. See InternalDocs/qsbr.md. |
4 | | * |
5 | | * This is derived from the "GUS" safe memory reclamation technique |
6 | | * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs |
7 | | * in this code are likely due to the modifications. |
8 | | * |
9 | | * The original copyright is preserved below. |
10 | | * |
11 | | * Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org> |
12 | | * |
13 | | * Redistribution and use in source and binary forms, with or without |
14 | | * modification, are permitted provided that the following conditions |
15 | | * are met: |
16 | | * 1. Redistributions of source code must retain the above copyright |
17 | | * notice unmodified, this list of conditions, and the following |
18 | | * disclaimer. |
19 | | * 2. Redistributions in binary form must reproduce the above copyright |
20 | | * notice, this list of conditions and the following disclaimer in the |
21 | | * documentation and/or other materials provided with the distribution. |
22 | | * |
23 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
24 | | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
25 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
26 | | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
27 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
28 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
29 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
30 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
31 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
32 | | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | */ |
34 | | #include "Python.h" |
35 | | #include "pycore_interp.h" // PyInterpreterState |
36 | | #include "pycore_pystate.h" // _PyThreadState_GET() |
37 | | #include "pycore_qsbr.h" |
38 | | #include "pycore_tstate.h" // _PyThreadStateImpl |
39 | | #include "pycore_stats.h" // FT_STAT_QSBR_POLL_INC() |
40 | | |
41 | | |
42 | | // Starting size of the array of qsbr thread states |
43 | 0 | #define MIN_ARRAY_SIZE 8 |
44 | | |
45 | | // Allocate a QSBR thread state from the freelist |
46 | | static struct _qsbr_thread_state * |
47 | | qsbr_allocate(struct _qsbr_shared *shared) |
48 | 0 | { |
49 | 0 | struct _qsbr_thread_state *qsbr = shared->freelist; |
50 | 0 | if (qsbr == NULL) { |
51 | 0 | return NULL; |
52 | 0 | } |
53 | 0 | shared->freelist = qsbr->freelist_next; |
54 | 0 | qsbr->freelist_next = NULL; |
55 | 0 | qsbr->shared = shared; |
56 | 0 | qsbr->allocated = true; |
57 | 0 | return qsbr; |
58 | 0 | } |
59 | | |
60 | | // Initialize (or reinitialize) the freelist of QSBR thread states |
61 | | static void |
62 | | initialize_new_array(struct _qsbr_shared *shared) |
63 | 0 | { |
64 | 0 | for (Py_ssize_t i = 0; i != shared->size; i++) { |
65 | 0 | struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr; |
66 | 0 | if (qsbr->tstate != NULL) { |
67 | | // Update the thread state pointer to its QSBR state |
68 | 0 | _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate; |
69 | 0 | tstate->qsbr = qsbr; |
70 | 0 | } |
71 | 0 | if (!qsbr->allocated) { |
72 | | // Push to freelist |
73 | 0 | qsbr->freelist_next = shared->freelist; |
74 | 0 | shared->freelist = qsbr; |
75 | 0 | } |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | // Grow the array of QSBR thread states. Returns 0 on success, -1 on failure. |
80 | | static int |
81 | | grow_thread_array(struct _qsbr_shared *shared) |
82 | 0 | { |
83 | 0 | Py_ssize_t new_size = shared->size * 2; |
84 | 0 | if (new_size < MIN_ARRAY_SIZE) { |
85 | 0 | new_size = MIN_ARRAY_SIZE; |
86 | 0 | } |
87 | | |
88 | | // Overallocate by 63 bytes so we can align to a 64-byte boundary. |
89 | | // This avoids potential false sharing between the first entry and other |
90 | | // allocations. |
91 | 0 | size_t alignment = 64; |
92 | 0 | size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + alignment - 1; |
93 | 0 | void *raw = PyMem_RawCalloc(1, alloc_size); |
94 | 0 | if (raw == NULL) { |
95 | 0 | return -1; |
96 | 0 | } |
97 | 0 | struct _qsbr_pad *array = _Py_ALIGN_UP(raw, alignment); |
98 | |
|
99 | 0 | void *old_raw = shared->array_raw; |
100 | 0 | if (shared->array != NULL) { |
101 | 0 | memcpy(array, shared->array, shared->size * sizeof(*array)); |
102 | 0 | } |
103 | |
|
104 | 0 | shared->array = array; |
105 | 0 | shared->array_raw = raw; |
106 | 0 | shared->size = new_size; |
107 | 0 | shared->freelist = NULL; |
108 | 0 | initialize_new_array(shared); |
109 | |
|
110 | 0 | PyMem_RawFree(old_raw); |
111 | 0 | return 0; |
112 | 0 | } |
113 | | |
114 | | uint64_t |
115 | | _Py_qsbr_advance(struct _qsbr_shared *shared) |
116 | 0 | { |
117 | | // NOTE: with 64-bit sequence numbers, we don't have to worry too much |
118 | | // about the wr_seq getting too far ahead of rd_seq, but if we ever use |
119 | | // 32-bit sequence numbers, we'll need to be more careful. |
120 | 0 | return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR; |
121 | 0 | } |
122 | | |
123 | | uint64_t |
124 | | _Py_qsbr_shared_next(struct _qsbr_shared *shared) |
125 | 0 | { |
126 | 0 | return _Py_qsbr_shared_current(shared) + QSBR_INCR; |
127 | 0 | } |
128 | | |
129 | | static uint64_t |
130 | | qsbr_poll_scan(struct _qsbr_shared *shared) |
131 | 0 | { |
132 | | // Synchronize with store in _Py_qsbr_attach(). We need to ensure that |
133 | | // the reads from each thread's sequence number are not reordered to see |
134 | | // earlier "offline" states. |
135 | 0 | _Py_atomic_fence_seq_cst(); |
136 | | |
137 | | // Compute the minimum sequence number of all attached threads |
138 | 0 | uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq); |
139 | 0 | struct _qsbr_pad *array = shared->array; |
140 | 0 | for (Py_ssize_t i = 0, size = shared->size; i != size; i++) { |
141 | 0 | struct _qsbr_thread_state *qsbr = &array[i].qsbr; |
142 | |
|
143 | 0 | uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq); |
144 | 0 | if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) { |
145 | 0 | min_seq = seq; |
146 | 0 | } |
147 | 0 | } |
148 | | |
149 | | // Update the shared read sequence |
150 | 0 | uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq); |
151 | 0 | if (QSBR_LT(rd_seq, min_seq)) { |
152 | | // It's okay if the compare-exchange failed: another thread updated it |
153 | 0 | (void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq); |
154 | 0 | rd_seq = min_seq; |
155 | 0 | } |
156 | |
|
157 | 0 | return rd_seq; |
158 | 0 | } |
159 | | |
160 | | bool |
161 | | _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) |
162 | 0 | { |
163 | 0 | assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED); |
164 | 0 | assert(((_PyThreadStateImpl *)_PyThreadState_GET())->qsbr == qsbr); |
165 | |
|
166 | 0 | if (_Py_qbsr_goal_reached(qsbr, goal)) { |
167 | 0 | return true; |
168 | 0 | } |
169 | 0 | FT_STAT_QSBR_POLL_INC(); |
170 | 0 | uint64_t rd_seq = qsbr_poll_scan(qsbr->shared); |
171 | 0 | return QSBR_LEQ(goal, rd_seq); |
172 | 0 | } |
173 | | |
174 | | void |
175 | | _Py_qsbr_attach(struct _qsbr_thread_state *qsbr) |
176 | 0 | { |
177 | 0 | assert(qsbr->seq == 0 && "already attached"); |
178 | |
|
179 | 0 | uint64_t seq = _Py_qsbr_shared_current(qsbr->shared); |
180 | 0 | _Py_atomic_store_uint64(&qsbr->seq, seq); // needs seq_cst |
181 | 0 | } |
182 | | |
183 | | void |
184 | | _Py_qsbr_detach(struct _qsbr_thread_state *qsbr) |
185 | 0 | { |
186 | 0 | assert(qsbr->seq != 0 && "already detached"); |
187 | |
|
188 | 0 | _Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE); |
189 | 0 | } |
190 | | |
191 | | Py_ssize_t |
192 | | _Py_qsbr_reserve(PyInterpreterState *interp) |
193 | 0 | { |
194 | 0 | struct _qsbr_shared *shared = &interp->qsbr; |
195 | |
|
196 | 0 | PyMutex_Lock(&shared->mutex); |
197 | | // Try allocating from our internal freelist |
198 | 0 | struct _qsbr_thread_state *qsbr = qsbr_allocate(shared); |
199 | | |
200 | | // If there are no free entries, we pause all threads, grow the array, |
201 | | // and update the pointers in PyThreadState to entries in the new array. |
202 | 0 | if (qsbr == NULL) { |
203 | 0 | _PyEval_StopTheWorld(interp); |
204 | 0 | if (grow_thread_array(shared) == 0) { |
205 | 0 | qsbr = qsbr_allocate(shared); |
206 | 0 | } |
207 | 0 | _PyEval_StartTheWorld(interp); |
208 | 0 | } |
209 | | |
210 | | // Return an index rather than the pointer because the array may be |
211 | | // resized and the pointer invalidated. |
212 | 0 | Py_ssize_t index = -1; |
213 | 0 | if (qsbr != NULL) { |
214 | 0 | index = (struct _qsbr_pad *)qsbr - shared->array; |
215 | 0 | } |
216 | 0 | PyMutex_Unlock(&shared->mutex); |
217 | 0 | return index; |
218 | 0 | } |
219 | | |
220 | | void |
221 | | _Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp, |
222 | | Py_ssize_t index) |
223 | 0 | { |
224 | | // Associate the QSBR state with the thread state |
225 | 0 | struct _qsbr_shared *shared = &interp->qsbr; |
226 | |
|
227 | 0 | PyMutex_Lock(&shared->mutex); |
228 | 0 | struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr; |
229 | 0 | assert(qsbr->allocated && qsbr->tstate == NULL); |
230 | 0 | qsbr->tstate = (PyThreadState *)tstate; |
231 | 0 | tstate->qsbr = qsbr; |
232 | 0 | PyMutex_Unlock(&shared->mutex); |
233 | 0 | } |
234 | | |
235 | | void |
236 | | _Py_qsbr_unregister(PyThreadState *tstate) |
237 | 0 | { |
238 | 0 | struct _qsbr_shared *shared = &tstate->interp->qsbr; |
239 | 0 | struct _PyThreadStateImpl *tstate_imp = (_PyThreadStateImpl*) tstate; |
240 | | |
241 | | // gh-119369: GIL must be released (if held) to prevent deadlocks, because |
242 | | // we might not have an active tstate, which means that blocking on PyMutex |
243 | | // locks will not implicitly release the GIL. |
244 | 0 | assert(!tstate->holds_gil); |
245 | |
|
246 | 0 | PyMutex_Lock(&shared->mutex); |
247 | | // NOTE: we must load (or reload) the thread state's qbsr inside the mutex |
248 | | // because the array may have been resized (changing tstate->qsbr) while |
249 | | // we waited to acquire the mutex. |
250 | 0 | struct _qsbr_thread_state *qsbr = tstate_imp->qsbr; |
251 | |
|
252 | 0 | assert(qsbr->seq == 0 && "thread state must be detached"); |
253 | 0 | assert(qsbr->allocated && qsbr->tstate == tstate); |
254 | |
|
255 | 0 | tstate_imp->qsbr = NULL; |
256 | 0 | qsbr->tstate = NULL; |
257 | 0 | qsbr->allocated = false; |
258 | 0 | qsbr->freelist_next = shared->freelist; |
259 | 0 | shared->freelist = qsbr; |
260 | 0 | PyMutex_Unlock(&shared->mutex); |
261 | 0 | } |
262 | | |
263 | | void |
264 | | _Py_qsbr_fini(PyInterpreterState *interp) |
265 | 0 | { |
266 | 0 | struct _qsbr_shared *shared = &interp->qsbr; |
267 | 0 | PyMem_RawFree(shared->array_raw); |
268 | 0 | shared->array = NULL; |
269 | 0 | shared->array_raw = NULL; |
270 | 0 | shared->size = 0; |
271 | 0 | shared->freelist = NULL; |
272 | 0 | } |
273 | | |
274 | | void |
275 | | _Py_qsbr_after_fork(_PyThreadStateImpl *tstate) |
276 | 0 | { |
277 | 0 | struct _qsbr_thread_state *this_qsbr = tstate->qsbr; |
278 | 0 | struct _qsbr_shared *shared = this_qsbr->shared; |
279 | |
|
280 | 0 | _PyMutex_at_fork_reinit(&shared->mutex); |
281 | |
|
282 | 0 | for (Py_ssize_t i = 0; i != shared->size; i++) { |
283 | 0 | struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr; |
284 | 0 | if (qsbr != this_qsbr && qsbr->allocated) { |
285 | 0 | qsbr->tstate = NULL; |
286 | | qsbr->allocated = false; |
287 | 0 | qsbr->freelist_next = shared->freelist; |
288 | 0 | shared->freelist = qsbr; |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |