/src/openssl/crypto/threads_pthread.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved. |
3 | | * |
4 | | * Licensed under the Apache License 2.0 (the "License"). You may not use |
5 | | * this file except in compliance with the License. You can obtain a copy |
6 | | * in the file LICENSE in the source distribution or at |
7 | | * https://www.openssl.org/source/license.html |
8 | | */ |
9 | | |
10 | | /* We need to use the OPENSSL_fork_*() deprecated APIs */ |
11 | | #define OPENSSL_SUPPRESS_DEPRECATED |
12 | | |
13 | | #include <openssl/crypto.h> |
14 | | #include <crypto/cryptlib.h> |
15 | | #include "internal/cryptlib.h" |
16 | | #include "internal/rcu.h" |
17 | | #include "rcu_internal.h" |
18 | | |
19 | | #if defined(__clang__) && defined(__has_feature) |
20 | | # if __has_feature(thread_sanitizer) |
21 | | # define __SANITIZE_THREAD__ |
22 | | # endif |
23 | | #endif |
24 | | |
25 | | #if defined(__SANITIZE_THREAD__) |
26 | | # include <sanitizer/tsan_interface.h> |
27 | | # define TSAN_FAKE_UNLOCK(x) __tsan_mutex_pre_unlock((x), 0); \ |
28 | | __tsan_mutex_post_unlock((x), 0) |
29 | | |
30 | | # define TSAN_FAKE_LOCK(x) __tsan_mutex_pre_lock((x), 0); \ |
31 | | __tsan_mutex_post_lock((x), 0, 0) |
32 | | #else |
33 | | # define TSAN_FAKE_UNLOCK(x) |
34 | | # define TSAN_FAKE_LOCK(x) |
35 | | #endif |
36 | | |
37 | | #if defined(__sun) |
38 | | # include <atomic.h> |
39 | | #endif |
40 | | |
41 | | #if defined(__apple_build_version__) && __apple_build_version__ < 6000000 |
42 | | /* |
43 | | * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and |
44 | | * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free() |
45 | | * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))). |
46 | | * All of this makes impossible to use __atomic_is_lock_free here. |
47 | | * |
48 | | * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760 |
49 | | */ |
50 | | # define BROKEN_CLANG_ATOMICS |
51 | | #endif |
52 | | |
53 | | #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS) |
54 | | |
55 | | # if defined(OPENSSL_SYS_UNIX) |
56 | | # include <sys/types.h> |
57 | | # include <unistd.h> |
58 | | # endif |
59 | | |
60 | | # include <assert.h> |
61 | | |
62 | | # ifdef PTHREAD_RWLOCK_INITIALIZER |
63 | | # define USE_RWLOCK |
64 | | # endif |
65 | | |
66 | | /* |
67 | | * For all GNU/clang atomic builtins, we also need fallbacks, to cover all |
68 | | * other compilers. |
69 | | |
70 | | * Unfortunately, we can't do that with some "generic type", because there's no |
71 | | * guarantee that the chosen generic type is large enough to cover all cases. |
72 | | * Therefore, we implement fallbacks for each applicable type, with composed |
73 | | * names that include the type they handle. |
74 | | * |
75 | | * (an anecdote: we previously tried to use |void *| as the generic type, with |
76 | | * the thought that the pointer itself is the largest type. However, this is |
77 | | * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large) |
78 | | * |
79 | | * All applicable ATOMIC_ macros take the intended type as first parameter, so |
80 | | * they can map to the correct fallback function. In the GNU/clang case, that |
81 | | * parameter is simply ignored. |
82 | | */ |
83 | | |
84 | | /* |
85 | | * Internal types used with the ATOMIC_ macros, to make it possible to compose |
86 | | * fallback function names. |
87 | | */ |
88 | | typedef void *pvoid; |
89 | | typedef struct rcu_cb_item *prcu_cb_item; |
90 | | |
91 | | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \ |
92 | | && !defined(USE_ATOMIC_FALLBACKS) |
93 | | # if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__) |
94 | | /* |
95 | | * For pointers, Apple M1 virtualized cpu seems to have some problem using the |
96 | | * ldapr instruction (see https://github.com/openssl/openssl/pull/23974) |
97 | | * When using the native apple clang compiler, this instruction is emitted for |
98 | | * atomic loads, which is bad. So, if |
99 | | * 1) We are building on a target that defines __APPLE__ AND |
100 | | * 2) We are building on a target using clang (__clang__) AND |
101 | | * 3) We are building for an M1 processor (__aarch64__) |
102 | | * Then we should not use __atomic_load_n and instead implement our own |
103 | | * function to issue the ldar instruction instead, which produces the proper |
104 | | * sequencing guarantees |
105 | | */ |
106 | | static inline void *apple_atomic_load_n_pvoid(void **p, |
107 | | ossl_unused int memorder) |
108 | | { |
109 | | void *ret; |
110 | | |
111 | | __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):); |
112 | | |
113 | | return ret; |
114 | | } |
115 | | |
116 | | /* For uint64_t, we should be fine, though */ |
117 | | # define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o) |
118 | | |
119 | | # define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o) |
120 | | # else |
121 | 15 | # define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o) |
122 | | # endif |
123 | 9 | # define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o) |
124 | 6 | # define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o) |
125 | 0 | # define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o) |
126 | 0 | # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o) |
127 | | # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o) |
128 | 0 | # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o) |
129 | 9 | # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o) |
130 | 9 | # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o) |
131 | | # else |
132 | | static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER; |
133 | | |
134 | | # define IMPL_fallback_atomic_load_n(t) \ |
135 | | static inline t fallback_atomic_load_n_##t(t *p) \ |
136 | | { \ |
137 | | t ret; \ |
138 | | \ |
139 | | pthread_mutex_lock(&atomic_sim_lock); \ |
140 | | ret = *p; \ |
141 | | pthread_mutex_unlock(&atomic_sim_lock); \ |
142 | | return ret; \ |
143 | | } |
144 | | IMPL_fallback_atomic_load_n(uint64_t) |
145 | | IMPL_fallback_atomic_load_n(pvoid) |
146 | | |
147 | | # define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p) |
148 | | |
149 | | # define IMPL_fallback_atomic_store_n(t) \ |
150 | | static inline t fallback_atomic_store_n_##t(t *p, t v) \ |
151 | | { \ |
152 | | t ret; \ |
153 | | \ |
154 | | pthread_mutex_lock(&atomic_sim_lock); \ |
155 | | ret = *p; \ |
156 | | *p = v; \ |
157 | | pthread_mutex_unlock(&atomic_sim_lock); \ |
158 | | return ret; \ |
159 | | } |
160 | | IMPL_fallback_atomic_store_n(uint64_t) |
161 | | |
162 | | # define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v) |
163 | | |
164 | | # define IMPL_fallback_atomic_store(t) \ |
165 | | static inline void fallback_atomic_store_##t(t *p, t *v) \ |
166 | | { \ |
167 | | pthread_mutex_lock(&atomic_sim_lock); \ |
168 | | *p = *v; \ |
169 | | pthread_mutex_unlock(&atomic_sim_lock); \ |
170 | | } |
171 | | IMPL_fallback_atomic_store(uint64_t) |
172 | | IMPL_fallback_atomic_store(pvoid) |
173 | | |
174 | | # define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v) |
175 | | |
176 | | # define IMPL_fallback_atomic_exchange_n(t) \ |
177 | | static inline t fallback_atomic_exchange_n_##t(t *p, t v) \ |
178 | | { \ |
179 | | t ret; \ |
180 | | \ |
181 | | pthread_mutex_lock(&atomic_sim_lock); \ |
182 | | ret = *p; \ |
183 | | *p = v; \ |
184 | | pthread_mutex_unlock(&atomic_sim_lock); \ |
185 | | return ret; \ |
186 | | } |
187 | | IMPL_fallback_atomic_exchange_n(uint64_t) |
188 | | IMPL_fallback_atomic_exchange_n(prcu_cb_item) |
189 | | |
190 | | # define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v) |
191 | | |
192 | | /* |
193 | | * The fallbacks that follow don't need any per type implementation, as |
194 | | * they are designed for uint64_t only. If there comes a time when multiple |
195 | | * types need to be covered, it's relatively easy to refactor them the same |
196 | | * way as the fallbacks above. |
197 | | */ |
198 | | |
199 | | static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v) |
200 | | { |
201 | | uint64_t ret; |
202 | | |
203 | | pthread_mutex_lock(&atomic_sim_lock); |
204 | | *p += v; |
205 | | ret = *p; |
206 | | pthread_mutex_unlock(&atomic_sim_lock); |
207 | | return ret; |
208 | | } |
209 | | |
210 | | # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v) |
211 | | |
212 | | static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v) |
213 | | { |
214 | | uint64_t ret; |
215 | | |
216 | | pthread_mutex_lock(&atomic_sim_lock); |
217 | | ret = *p; |
218 | | *p += v; |
219 | | pthread_mutex_unlock(&atomic_sim_lock); |
220 | | return ret; |
221 | | } |
222 | | |
223 | | # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v) |
224 | | |
225 | | static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v) |
226 | | { |
227 | | uint64_t ret; |
228 | | |
229 | | pthread_mutex_lock(&atomic_sim_lock); |
230 | | *p -= v; |
231 | | ret = *p; |
232 | | pthread_mutex_unlock(&atomic_sim_lock); |
233 | | return ret; |
234 | | } |
235 | | |
236 | | # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v) |
237 | | |
238 | | static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m) |
239 | | { |
240 | | uint64_t ret; |
241 | | |
242 | | pthread_mutex_lock(&atomic_sim_lock); |
243 | | *p &= m; |
244 | | ret = *p; |
245 | | pthread_mutex_unlock(&atomic_sim_lock); |
246 | | return ret; |
247 | | } |
248 | | |
249 | | # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v) |
250 | | |
251 | | static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m) |
252 | | { |
253 | | uint64_t ret; |
254 | | |
255 | | pthread_mutex_lock(&atomic_sim_lock); |
256 | | *p |= m; |
257 | | ret = *p; |
258 | | pthread_mutex_unlock(&atomic_sim_lock); |
259 | | return ret; |
260 | | } |
261 | | |
262 | | # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v) |
263 | | # endif |
264 | | |
265 | | /* |
266 | | * users is broken up into 2 parts |
267 | | * bits 0-15 current readers |
268 | | * bit 32-63 - ID |
269 | | */ |
270 | 9 | # define READER_SHIFT 0 |
271 | 18 | # define ID_SHIFT 32 |
272 | 9 | # define READER_SIZE 16 |
273 | 9 | # define ID_SIZE 32 |
274 | | |
275 | 9 | # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1) |
276 | 9 | # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1) |
277 | 9 | # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK) |
278 | 9 | # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK) |
279 | | # define VAL_READER ((uint64_t)1 << READER_SHIFT) |
280 | 9 | # define VAL_ID(x) ((uint64_t)x << ID_SHIFT) |
281 | | |
282 | | /* |
283 | | * This is the core of an rcu lock. It tracks the readers and writers for the |
284 | | * current quiescence point for a given lock. Users is the 64 bit value that |
285 | | * stores the READERS/ID as defined above |
286 | | * |
287 | | */ |
288 | | struct rcu_qp { |
289 | | uint64_t users; |
290 | | }; |
291 | | |
292 | | struct thread_qp { |
293 | | struct rcu_qp *qp; |
294 | | unsigned int depth; |
295 | | CRYPTO_RCU_LOCK *lock; |
296 | | }; |
297 | | |
298 | 0 | # define MAX_QPS 10 |
299 | | /* |
300 | | * This is the per thread tracking data |
301 | | * that is assigned to each thread participating |
302 | | * in an rcu qp |
303 | | * |
304 | | * qp points to the qp that it last acquired |
305 | | * |
306 | | */ |
307 | | struct rcu_thr_data { |
308 | | struct thread_qp thread_qps[MAX_QPS]; |
309 | | }; |
310 | | |
311 | | /* |
312 | | * This is the internal version of a CRYPTO_RCU_LOCK |
313 | | * it is cast from CRYPTO_RCU_LOCK |
314 | | */ |
315 | | struct rcu_lock_st { |
316 | | /* Callbacks to call for next ossl_synchronize_rcu */ |
317 | | struct rcu_cb_item *cb_items; |
318 | | |
319 | | /* The context we are being created against */ |
320 | | OSSL_LIB_CTX *ctx; |
321 | | |
322 | | /* rcu generation counter for in-order retirement */ |
323 | | uint32_t id_ctr; |
324 | | |
325 | | /* Array of quiescent points for synchronization */ |
326 | | struct rcu_qp *qp_group; |
327 | | |
328 | | /* Number of elements in qp_group array */ |
329 | | size_t group_count; |
330 | | |
331 | | /* Index of the current qp in the qp_group array */ |
332 | | uint64_t reader_idx; |
333 | | |
334 | | /* value of the next id_ctr value to be retired */ |
335 | | uint32_t next_to_retire; |
336 | | |
337 | | /* index of the next free rcu_qp in the qp_group */ |
338 | | uint64_t current_alloc_idx; |
339 | | |
340 | | /* number of qp's in qp_group array currently being retired */ |
341 | | uint32_t writers_alloced; |
342 | | |
343 | | /* lock protecting write side operations */ |
344 | | pthread_mutex_t write_lock; |
345 | | |
346 | | /* lock protecting updates to writers_alloced/current_alloc_idx */ |
347 | | pthread_mutex_t alloc_lock; |
348 | | |
349 | | /* signal to wake threads waiting on alloc_lock */ |
350 | | pthread_cond_t alloc_signal; |
351 | | |
352 | | /* lock to enforce in-order retirement */ |
353 | | pthread_mutex_t prior_lock; |
354 | | |
355 | | /* signal to wake threads waiting on prior_lock */ |
356 | | pthread_cond_t prior_signal; |
357 | | }; |
358 | | |
359 | | /* Read side acquisition of the current qp */ |
360 | | static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) |
361 | 0 | { |
362 | 0 | uint64_t qp_idx; |
363 | | |
364 | | /* get the current qp index */ |
365 | 0 | for (;;) { |
366 | | /* |
367 | | * Notes on use of __ATOMIC_ACQUIRE |
368 | | * We need to ensure the following: |
369 | | * 1) That subsequent operations aren't optimized by hoisting them above |
370 | | * this operation. Specifically, we don't want the below re-load of |
371 | | * qp_idx to get optimized away |
372 | | * 2) We want to ensure that any updating of reader_idx on the write side |
373 | | * of the lock is flushed from a local cpu cache so that we see any |
374 | | * updates prior to the load. This is a non-issue on cache coherent |
375 | | * systems like x86, but is relevant on other arches |
376 | | * Note: This applies to the reload below as well |
377 | | */ |
378 | 0 | qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE); |
379 | | |
380 | | /* |
381 | | * Notes of use of __ATOMIC_RELEASE |
382 | | * This counter is only read by the write side of the lock, and so we |
383 | | * specify __ATOMIC_RELEASE here to ensure that the write side of the |
384 | | * lock see this during the spin loop read of users, as it waits for the |
385 | | * reader count to approach zero |
386 | | */ |
387 | 0 | ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, |
388 | 0 | __ATOMIC_RELEASE); |
389 | | |
390 | | /* if the idx hasn't changed, we're good, else try again */ |
391 | 0 | if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE)) |
392 | 0 | break; |
393 | | |
394 | | /* |
395 | | * Notes on use of __ATOMIC_RELEASE |
396 | | * As with the add above, we want to ensure that this decrement is |
397 | | * seen by the write side of the lock as soon as it happens to prevent |
398 | | * undue spinning waiting for write side completion |
399 | | */ |
400 | 0 | ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER, |
401 | 0 | __ATOMIC_RELEASE); |
402 | 0 | } |
403 | |
|
404 | 0 | return &lock->qp_group[qp_idx]; |
405 | 0 | } |
406 | | |
407 | | static void ossl_rcu_free_local_data(void *arg) |
408 | 0 | { |
409 | 0 | OSSL_LIB_CTX *ctx = arg; |
410 | 0 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(ctx); |
411 | 0 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); |
412 | 0 | OPENSSL_free(data); |
413 | 0 | } |
414 | | |
415 | | void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock) |
416 | 0 | { |
417 | 0 | struct rcu_thr_data *data; |
418 | 0 | int i, available_qp = -1; |
419 | 0 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
420 | | |
421 | | /* |
422 | | * we're going to access current_qp here so ask the |
423 | | * processor to fetch it |
424 | | */ |
425 | 0 | data = CRYPTO_THREAD_get_local(lkey); |
426 | |
|
427 | 0 | if (data == NULL) { |
428 | 0 | data = OPENSSL_zalloc(sizeof(*data)); |
429 | 0 | OPENSSL_assert(data != NULL); |
430 | 0 | CRYPTO_THREAD_set_local(lkey, data); |
431 | 0 | ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data); |
432 | 0 | } |
433 | |
|
434 | 0 | for (i = 0; i < MAX_QPS; i++) { |
435 | 0 | if (data->thread_qps[i].qp == NULL && available_qp == -1) |
436 | 0 | available_qp = i; |
437 | | /* If we have a hold on this lock already, we're good */ |
438 | 0 | if (data->thread_qps[i].lock == lock) { |
439 | 0 | data->thread_qps[i].depth++; |
440 | 0 | return; |
441 | 0 | } |
442 | 0 | } |
443 | | |
444 | | /* |
445 | | * if we get here, then we don't have a hold on this lock yet |
446 | | */ |
447 | 0 | assert(available_qp != -1); |
448 | |
|
449 | 0 | data->thread_qps[available_qp].qp = get_hold_current_qp(lock); |
450 | 0 | data->thread_qps[available_qp].depth = 1; |
451 | 0 | data->thread_qps[available_qp].lock = lock; |
452 | 0 | } |
453 | | |
454 | | void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock) |
455 | 0 | { |
456 | 0 | int i; |
457 | 0 | CRYPTO_THREAD_LOCAL *lkey = ossl_lib_ctx_get_rcukey(lock->ctx); |
458 | 0 | struct rcu_thr_data *data = CRYPTO_THREAD_get_local(lkey); |
459 | 0 | uint64_t ret; |
460 | |
|
461 | 0 | assert(data != NULL); |
462 | |
|
463 | 0 | for (i = 0; i < MAX_QPS; i++) { |
464 | 0 | if (data->thread_qps[i].lock == lock) { |
465 | | /* |
466 | | * As with read side acquisition, we use __ATOMIC_RELEASE here |
467 | | * to ensure that the decrement is published immediately |
468 | | * to any write side waiters |
469 | | */ |
470 | 0 | data->thread_qps[i].depth--; |
471 | 0 | if (data->thread_qps[i].depth == 0) { |
472 | 0 | ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER, |
473 | 0 | __ATOMIC_RELEASE); |
474 | 0 | OPENSSL_assert(ret != UINT64_MAX); |
475 | 0 | data->thread_qps[i].qp = NULL; |
476 | 0 | data->thread_qps[i].lock = NULL; |
477 | 0 | } |
478 | 0 | return; |
479 | 0 | } |
480 | 0 | } |
481 | | /* |
482 | | * If we get here, we're trying to unlock a lock that we never acquired - |
483 | | * that's fatal. |
484 | | */ |
485 | 0 | assert(0); |
486 | 0 | } |
487 | | |
488 | | /* |
489 | | * Write side allocation routine to get the current qp |
490 | | * and replace it with a new one |
491 | | */ |
492 | | static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock) |
493 | 9 | { |
494 | 9 | uint64_t new_id; |
495 | 9 | uint64_t current_idx; |
496 | | |
497 | 9 | pthread_mutex_lock(&lock->alloc_lock); |
498 | | |
499 | | /* |
500 | | * we need at least one qp to be available with one |
501 | | * left over, so that readers can start working on |
502 | | * one that isn't yet being waited on |
503 | | */ |
504 | 9 | while (lock->group_count - lock->writers_alloced < 2) |
505 | | /* we have to wait for one to be free */ |
506 | 0 | pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock); |
507 | | |
508 | 9 | current_idx = lock->current_alloc_idx; |
509 | | |
510 | | /* Allocate the qp */ |
511 | 9 | lock->writers_alloced++; |
512 | | |
513 | | /* increment the allocation index */ |
514 | 9 | lock->current_alloc_idx = |
515 | 9 | (lock->current_alloc_idx + 1) % lock->group_count; |
516 | | |
517 | | /* get and insert a new id */ |
518 | 9 | new_id = lock->id_ctr; |
519 | 9 | lock->id_ctr++; |
520 | | |
521 | 9 | new_id = VAL_ID(new_id); |
522 | | /* |
523 | | * Even though we are under a write side lock here |
524 | | * We need to use atomic instructions to ensure that the results |
525 | | * of this update are published to the read side prior to updating the |
526 | | * reader idx below |
527 | | */ |
528 | 9 | ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK, |
529 | 9 | __ATOMIC_RELEASE); |
530 | 9 | ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id, |
531 | 9 | __ATOMIC_RELEASE); |
532 | | |
533 | | /* |
534 | | * Update the reader index to be the prior qp. |
535 | | * Note the use of __ATOMIC_RELEASE here is based on the corresponding use |
536 | | * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication |
537 | | * of this value to be seen on the read side immediately after it happens |
538 | | */ |
539 | 9 | ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx, |
540 | 9 | __ATOMIC_RELEASE); |
541 | | |
542 | | /* wake up any waiters */ |
543 | 9 | pthread_cond_signal(&lock->alloc_signal); |
544 | 9 | pthread_mutex_unlock(&lock->alloc_lock); |
545 | 9 | return &lock->qp_group[current_idx]; |
546 | 9 | } |
547 | | |
548 | | static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp) |
549 | 9 | { |
550 | 9 | pthread_mutex_lock(&lock->alloc_lock); |
551 | 9 | lock->writers_alloced--; |
552 | 9 | pthread_cond_signal(&lock->alloc_signal); |
553 | 9 | pthread_mutex_unlock(&lock->alloc_lock); |
554 | 9 | } |
555 | | |
556 | | static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock, |
557 | | int count) |
558 | 3 | { |
559 | 3 | struct rcu_qp *new = |
560 | 3 | OPENSSL_zalloc(sizeof(*new) * count); |
561 | | |
562 | 3 | lock->group_count = count; |
563 | 3 | return new; |
564 | 3 | } |
565 | | |
566 | | void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock) |
567 | 6 | { |
568 | 6 | pthread_mutex_lock(&lock->write_lock); |
569 | 6 | TSAN_FAKE_UNLOCK(&lock->write_lock); |
570 | 6 | } |
571 | | |
572 | | void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock) |
573 | 6 | { |
574 | 6 | TSAN_FAKE_LOCK(&lock->write_lock); |
575 | 6 | pthread_mutex_unlock(&lock->write_lock); |
576 | 6 | } |
577 | | |
578 | | void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) |
579 | 9 | { |
580 | 9 | struct rcu_qp *qp; |
581 | 9 | uint64_t count; |
582 | 9 | struct rcu_cb_item *cb_items, *tmpcb; |
583 | | |
584 | 9 | pthread_mutex_lock(&lock->write_lock); |
585 | 9 | cb_items = lock->cb_items; |
586 | 9 | lock->cb_items = NULL; |
587 | 9 | pthread_mutex_unlock(&lock->write_lock); |
588 | | |
589 | 9 | qp = update_qp(lock); |
590 | | |
591 | | /* |
592 | | * wait for the reader count to reach zero |
593 | | * Note the use of __ATOMIC_ACQUIRE here to ensure that any |
594 | | * prior __ATOMIC_RELEASE write operation in get_hold_current_qp |
595 | | * is visible prior to our read |
596 | | */ |
597 | 9 | do { |
598 | 9 | count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE); |
599 | 9 | } while (READER_COUNT(count) != 0); |
600 | | |
601 | | /* retire in order */ |
602 | 9 | pthread_mutex_lock(&lock->prior_lock); |
603 | 9 | while (lock->next_to_retire != ID_VAL(count)) |
604 | 0 | pthread_cond_wait(&lock->prior_signal, &lock->prior_lock); |
605 | 9 | lock->next_to_retire++; |
606 | 9 | pthread_cond_broadcast(&lock->prior_signal); |
607 | 9 | pthread_mutex_unlock(&lock->prior_lock); |
608 | | |
609 | 9 | retire_qp(lock, qp); |
610 | | |
611 | | /* handle any callbacks that we have */ |
612 | 9 | while (cb_items != NULL) { |
613 | 0 | tmpcb = cb_items; |
614 | 0 | cb_items = cb_items->next; |
615 | 0 | tmpcb->fn(tmpcb->data); |
616 | 0 | OPENSSL_free(tmpcb); |
617 | 0 | } |
618 | 9 | } |
619 | | |
620 | | int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data) |
621 | 0 | { |
622 | 0 | struct rcu_cb_item *new = |
623 | 0 | OPENSSL_zalloc(sizeof(*new)); |
624 | |
|
625 | 0 | if (new == NULL) |
626 | 0 | return 0; |
627 | | |
628 | 0 | new->data = data; |
629 | 0 | new->fn = cb; |
630 | | /* |
631 | | * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this |
632 | | * list are visible to us prior to reading, and publish the new value |
633 | | * immediately |
634 | | */ |
635 | 0 | new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new, |
636 | 0 | __ATOMIC_ACQ_REL); |
637 | |
|
638 | 0 | return 1; |
639 | 0 | } |
640 | | |
641 | | void *ossl_rcu_uptr_deref(void **p) |
642 | 6 | { |
643 | 6 | return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE); |
644 | 6 | } |
645 | | |
646 | | void ossl_rcu_assign_uptr(void **p, void **v) |
647 | 6 | { |
648 | 6 | ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE); |
649 | 6 | } |
650 | | |
651 | | CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) |
652 | 3 | { |
653 | 3 | struct rcu_lock_st *new; |
654 | | |
655 | 3 | if (num_writers < 1) |
656 | 0 | num_writers = 1; |
657 | | |
658 | 3 | ctx = ossl_lib_ctx_get_concrete(ctx); |
659 | 3 | if (ctx == NULL) |
660 | 0 | return 0; |
661 | | |
662 | 3 | new = OPENSSL_zalloc(sizeof(*new)); |
663 | 3 | if (new == NULL) |
664 | 0 | return NULL; |
665 | | |
666 | 3 | new->ctx = ctx; |
667 | 3 | pthread_mutex_init(&new->write_lock, NULL); |
668 | 3 | pthread_mutex_init(&new->prior_lock, NULL); |
669 | 3 | pthread_mutex_init(&new->alloc_lock, NULL); |
670 | 3 | pthread_cond_init(&new->prior_signal, NULL); |
671 | 3 | pthread_cond_init(&new->alloc_signal, NULL); |
672 | 3 | new->qp_group = allocate_new_qp_group(new, num_writers + 1); |
673 | 3 | if (new->qp_group == NULL) { |
674 | 0 | OPENSSL_free(new); |
675 | 0 | new = NULL; |
676 | 0 | } |
677 | 3 | return new; |
678 | 3 | } |
679 | | |
680 | | void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock) |
681 | 3 | { |
682 | 3 | struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock; |
683 | | |
684 | 3 | if (lock == NULL) |
685 | 0 | return; |
686 | | |
687 | | /* make sure we're synchronized */ |
688 | 3 | ossl_synchronize_rcu(rlock); |
689 | | |
690 | 3 | OPENSSL_free(rlock->qp_group); |
691 | | /* There should only be a single qp left now */ |
692 | 3 | OPENSSL_free(rlock); |
693 | 3 | } |
694 | | |
695 | | CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void) |
696 | 228 | { |
697 | 228 | # ifdef USE_RWLOCK |
698 | 228 | CRYPTO_RWLOCK *lock; |
699 | | |
700 | 228 | if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL) |
701 | | /* Don't set error, to avoid recursion blowup. */ |
702 | 0 | return NULL; |
703 | | |
704 | 228 | if (pthread_rwlock_init(lock, NULL) != 0) { |
705 | 0 | OPENSSL_free(lock); |
706 | 0 | return NULL; |
707 | 0 | } |
708 | | # else |
709 | | pthread_mutexattr_t attr; |
710 | | CRYPTO_RWLOCK *lock; |
711 | | |
712 | | if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL) |
713 | | /* Don't set error, to avoid recursion blowup. */ |
714 | | return NULL; |
715 | | |
716 | | /* |
717 | | * We don't use recursive mutexes, but try to catch errors if we do. |
718 | | */ |
719 | | pthread_mutexattr_init(&attr); |
720 | | # if !defined (__TANDEM) && !defined (_SPT_MODEL_) |
721 | | # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK) |
722 | | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); |
723 | | # endif |
724 | | # else |
725 | | /* The SPT Thread Library does not define MUTEX attributes. */ |
726 | | # endif |
727 | | |
728 | | if (pthread_mutex_init(lock, &attr) != 0) { |
729 | | pthread_mutexattr_destroy(&attr); |
730 | | OPENSSL_free(lock); |
731 | | return NULL; |
732 | | } |
733 | | |
734 | | pthread_mutexattr_destroy(&attr); |
735 | | # endif |
736 | | |
737 | 228 | return lock; |
738 | 228 | } |
739 | | |
740 | | __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock) |
741 | 2.16M | { |
742 | 2.16M | # ifdef USE_RWLOCK |
743 | 2.16M | if (pthread_rwlock_rdlock(lock) != 0) |
744 | 0 | return 0; |
745 | | # else |
746 | | if (pthread_mutex_lock(lock) != 0) { |
747 | | assert(errno != EDEADLK && errno != EBUSY); |
748 | | return 0; |
749 | | } |
750 | | # endif |
751 | | |
752 | 2.16M | return 1; |
753 | 2.16M | } |
754 | | |
755 | | __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock) |
756 | 6.94k | { |
757 | 6.94k | # ifdef USE_RWLOCK |
758 | 6.94k | if (pthread_rwlock_wrlock(lock) != 0) |
759 | 0 | return 0; |
760 | | # else |
761 | | if (pthread_mutex_lock(lock) != 0) { |
762 | | assert(errno != EDEADLK && errno != EBUSY); |
763 | | return 0; |
764 | | } |
765 | | # endif |
766 | | |
767 | 6.94k | return 1; |
768 | 6.94k | } |
769 | | |
770 | | int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock) |
771 | 2.17M | { |
772 | 2.17M | # ifdef USE_RWLOCK |
773 | 2.17M | if (pthread_rwlock_unlock(lock) != 0) |
774 | 0 | return 0; |
775 | | # else |
776 | | if (pthread_mutex_unlock(lock) != 0) { |
777 | | assert(errno != EPERM); |
778 | | return 0; |
779 | | } |
780 | | # endif |
781 | | |
782 | 2.17M | return 1; |
783 | 2.17M | } |
784 | | |
785 | | void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock) |
786 | 96 | { |
787 | 96 | if (lock == NULL) |
788 | 21 | return; |
789 | | |
790 | 75 | # ifdef USE_RWLOCK |
791 | 75 | pthread_rwlock_destroy(lock); |
792 | | # else |
793 | | pthread_mutex_destroy(lock); |
794 | | # endif |
795 | 75 | OPENSSL_free(lock); |
796 | | |
797 | 75 | return; |
798 | 96 | } |
799 | | |
800 | | int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)) |
801 | 10.2k | { |
802 | 10.2k | if (pthread_once(once, init) != 0) |
803 | 0 | return 0; |
804 | | |
805 | 10.2k | return 1; |
806 | 10.2k | } |
807 | | |
808 | | int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)) |
809 | 39 | { |
810 | 39 | if (pthread_key_create(key, cleanup) != 0) |
811 | 0 | return 0; |
812 | | |
813 | 39 | return 1; |
814 | 39 | } |
815 | | |
816 | | void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key) |
817 | 4.30k | { |
818 | 4.30k | return pthread_getspecific(*key); |
819 | 4.30k | } |
820 | | |
821 | | int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val) |
822 | 23 | { |
823 | 23 | if (pthread_setspecific(*key, val) != 0) |
824 | 0 | return 0; |
825 | | |
826 | 23 | return 1; |
827 | 23 | } |
828 | | |
829 | | int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key) |
830 | 21 | { |
831 | 21 | if (pthread_key_delete(*key) != 0) |
832 | 0 | return 0; |
833 | | |
834 | 21 | return 1; |
835 | 21 | } |
836 | | |
837 | | CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void) |
838 | 0 | { |
839 | 0 | return pthread_self(); |
840 | 0 | } |
841 | | |
842 | | int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b) |
843 | 0 | { |
844 | 0 | return pthread_equal(a, b); |
845 | 0 | } |
846 | | |
847 | | int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock) |
848 | 3.27k | { |
849 | 3.27k | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
850 | 3.27k | if (__atomic_is_lock_free(sizeof(*val), val)) { |
851 | 3.27k | *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL); |
852 | 3.27k | return 1; |
853 | 3.27k | } |
854 | | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
855 | | /* This will work for all future Solaris versions. */ |
856 | | if (ret != NULL) { |
857 | | *ret = atomic_add_int_nv((volatile unsigned int *)val, amount); |
858 | | return 1; |
859 | | } |
860 | | # endif |
861 | 0 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) |
862 | 0 | return 0; |
863 | | |
864 | 0 | *val += amount; |
865 | 0 | *ret = *val; |
866 | |
|
867 | 0 | if (!CRYPTO_THREAD_unlock(lock)) |
868 | 0 | return 0; |
869 | | |
870 | 0 | return 1; |
871 | 0 | } |
872 | | |
873 | | int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, |
874 | | CRYPTO_RWLOCK *lock) |
875 | 8 | { |
876 | 8 | # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS) |
877 | 8 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
878 | 8 | *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL); |
879 | 8 | return 1; |
880 | 8 | } |
881 | | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
882 | | /* This will work for all future Solaris versions. */ |
883 | | if (ret != NULL) { |
884 | | *ret = atomic_or_64_nv(val, op); |
885 | | return 1; |
886 | | } |
887 | | # endif |
888 | 0 | if (lock == NULL || !CRYPTO_THREAD_write_lock(lock)) |
889 | 0 | return 0; |
890 | 0 | *val |= op; |
891 | 0 | *ret = *val; |
892 | |
|
893 | 0 | if (!CRYPTO_THREAD_unlock(lock)) |
894 | 0 | return 0; |
895 | | |
896 | 0 | return 1; |
897 | 0 | } |
898 | | |
899 | | int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock) |
900 | 40.6k | { |
901 | 40.6k | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
902 | 40.6k | if (__atomic_is_lock_free(sizeof(*val), val)) { |
903 | 40.6k | __atomic_load(val, ret, __ATOMIC_ACQUIRE); |
904 | 40.6k | return 1; |
905 | 40.6k | } |
906 | | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
907 | | /* This will work for all future Solaris versions. */ |
908 | | if (ret != NULL) { |
909 | | *ret = atomic_or_64_nv(val, 0); |
910 | | return 1; |
911 | | } |
912 | | # endif |
913 | 0 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) |
914 | 0 | return 0; |
915 | 0 | *ret = *val; |
916 | 0 | if (!CRYPTO_THREAD_unlock(lock)) |
917 | 0 | return 0; |
918 | | |
919 | 0 | return 1; |
920 | 0 | } |
921 | | |
922 | | int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock) |
923 | 0 | { |
924 | 0 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
925 | 0 | if (__atomic_is_lock_free(sizeof(*dst), dst)) { |
926 | 0 | __atomic_store(dst, &val, __ATOMIC_RELEASE); |
927 | 0 | return 1; |
928 | 0 | } |
929 | | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
930 | | /* This will work for all future Solaris versions. */ |
931 | | if (ret != NULL) { |
932 | | atomic_swap_64(dst, val); |
933 | | return 1; |
934 | | } |
935 | | # endif |
936 | 0 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) |
937 | 0 | return 0; |
938 | 0 | *dst = val; |
939 | 0 | if (!CRYPTO_THREAD_unlock(lock)) |
940 | 0 | return 0; |
941 | | |
942 | 0 | return 1; |
943 | 0 | } |
944 | | |
945 | | int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock) |
946 | 0 | { |
947 | 0 | # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) |
948 | 0 | if (__atomic_is_lock_free(sizeof(*val), val)) { |
949 | 0 | __atomic_load(val, ret, __ATOMIC_ACQUIRE); |
950 | 0 | return 1; |
951 | 0 | } |
952 | | # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11)) |
953 | | /* This will work for all future Solaris versions. */ |
954 | | if (ret != NULL) { |
955 | | *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0); |
956 | | return 1; |
957 | | } |
958 | | # endif |
959 | 0 | if (lock == NULL || !CRYPTO_THREAD_read_lock(lock)) |
960 | 0 | return 0; |
961 | 0 | *ret = *val; |
962 | 0 | if (!CRYPTO_THREAD_unlock(lock)) |
963 | 0 | return 0; |
964 | | |
965 | 0 | return 1; |
966 | 0 | } |
967 | | |
968 | | # ifndef FIPS_MODULE |
969 | | int openssl_init_fork_handlers(void) |
970 | 0 | { |
971 | 0 | return 1; |
972 | 0 | } |
973 | | # endif /* FIPS_MODULE */ |
974 | | |
975 | | int openssl_get_fork_id(void) |
976 | 0 | { |
977 | 0 | return getpid(); |
978 | 0 | } |
979 | | #endif |