/src/haproxy/include/import/plock.h
Line | Count | Source |
1 | | /* plock - progressive locks |
2 | | * |
3 | | * Copyright (C) 2012-2017 Willy Tarreau <w@1wt.eu> |
4 | | * |
5 | | * Permission is hereby granted, free of charge, to any person obtaining |
6 | | * a copy of this software and associated documentation files (the |
7 | | * "Software"), to deal in the Software without restriction, including |
8 | | * without limitation the rights to use, copy, modify, merge, publish, |
9 | | * distribute, sublicense, and/or sell copies of the Software, and to |
10 | | * permit persons to whom the Software is furnished to do so, subject to |
11 | | * the following conditions: |
12 | | * |
13 | | * The above copyright notice and this permission notice shall be |
14 | | * included in all copies or substantial portions of the Software. |
15 | | * |
16 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
18 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
19 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
20 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
21 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
23 | | * OTHER DEALINGS IN THE SOFTWARE. |
24 | | */ |
25 | | |
26 | | #ifndef PL_PLOCK_H |
27 | | #define PL_PLOCK_H |
28 | | |
29 | | #include "atomic-ops.h" |
30 | | #ifdef _POSIX_PRIORITY_SCHEDULING |
31 | | #include <sched.h> |
32 | | #endif |
33 | | |
34 | | /* 64 bit */ |
35 | | #define PLOCK64_RL_1 0x0000000000000004ULL |
36 | | #define PLOCK64_RL_2PL 0x00000000FFFFFFF8ULL |
37 | | #define PLOCK64_RL_ANY 0x00000000FFFFFFFCULL |
38 | | #define PLOCK64_SL_1 0x0000000100000000ULL |
39 | | #define PLOCK64_SL_ANY 0x0000000300000000ULL |
40 | | #define PLOCK64_WL_1 0x0000000400000000ULL |
41 | | #define PLOCK64_WL_2PL 0xFFFFFFF800000000ULL |
42 | | #define PLOCK64_WL_ANY 0xFFFFFFFC00000000ULL |
43 | | |
44 | | /* 32 bit */ |
45 | | #define PLOCK32_RL_1 0x00000004 |
46 | | #define PLOCK32_RL_2PL 0x0000FFF8 |
47 | | #define PLOCK32_RL_ANY 0x0000FFFC |
48 | | #define PLOCK32_SL_1 0x00010000 |
49 | | #define PLOCK32_SL_ANY 0x00030000 |
50 | | #define PLOCK32_WL_1 0x00040000 |
51 | | #define PLOCK32_WL_2PL 0xFFF80000 |
52 | | #define PLOCK32_WL_ANY 0xFFFC0000 |
53 | | |
54 | | /* dereferences <*p> as unsigned long without causing aliasing issues */ |
55 | 0 | #define pl_deref_long(p) ({ volatile unsigned long *__pl_l = (unsigned long *)(p); *__pl_l; }) |
56 | | |
57 | | /* dereferences <*p> as unsigned int without causing aliasing issues */ |
58 | | #define pl_deref_int(p) ({ volatile unsigned int *__pl_i = (unsigned int *)(p); *__pl_i; }) |
59 | | |
60 | | /* This function waits for <lock> to release all bits covered by <mask>, and |
61 | | * enforces an exponential backoff using CPU pauses to limit the pollution to |
62 | | * the other threads' caches. The progression follows (1.5^N)-1, limited to |
63 | | * 16384 iterations, which is way sufficient even for very large numbers of |
64 | | * threads. It's possible to disable exponential backoff (EBO) for debugging |
65 | | * purposes by setting PLOCK_DISABLE_EBO, in which case the function will be |
66 | | * replaced with a simpler macro. This may for example be useful to more |
67 | | * easily track callers' CPU usage. The macro was not designed to be used |
68 | | * outside of the functions defined here. |
69 | | */ |
70 | | #if defined(PLOCK_DISABLE_EBO) |
71 | | #define pl_wait_unlock_long(lock, mask) \ |
72 | | ({ \ |
73 | | unsigned long _r; \ |
74 | | do { \ |
75 | | pl_cpu_relax(); \ |
76 | | _r = pl_deref_long(lock); \ |
77 | | } while (_r & mask); \ |
78 | | _r; /* return value */ \ |
79 | | }) |
80 | | #else /* not PLOCK_DISABLE_EBO */ |
81 | | __attribute__((unused,always_inline,no_instrument_function)) inline |
82 | | static unsigned long __pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask) |
83 | 0 | { |
84 | 0 | unsigned long ret; |
85 | 0 | unsigned int m = 0; |
86 | 0 |
|
87 | 0 | do { |
88 | 0 | unsigned int loops = m; |
89 | 0 |
|
90 | 0 | #ifdef _POSIX_PRIORITY_SCHEDULING |
91 | 0 | if (loops >= 65536) { |
92 | 0 | sched_yield(); |
93 | 0 | loops -= 32768; |
94 | 0 | } |
95 | 0 | #endif |
96 | 0 | for (; loops >= 90; loops --) |
97 | 0 | pl_cpu_relax(); |
98 | 0 |
|
99 | 0 | for (; loops >= 1; loops--) |
100 | 0 | pl_barrier(); |
101 | 0 |
|
102 | 0 | ret = pl_load(lock); |
103 | 0 | if (__builtin_expect(ret & mask, 0) == 0) |
104 | 0 | break; |
105 | 0 |
|
106 | 0 | /* the below produces an exponential growth with loops to lower |
107 | 0 | * values and still growing. This allows competing threads to |
108 | 0 | * wait different times once the threshold is reached. |
109 | 0 | */ |
110 | 0 | m = ((m + (m >> 2)) + 1) & 0x1ffff; |
111 | 0 | } while (1); |
112 | 0 |
|
113 | 0 | return ret; |
114 | 0 | } |
115 | | |
116 | | # if defined(PLOCK_INLINE_EBO) |
117 | | __attribute__((unused,always_inline,no_instrument_function)) inline |
118 | | # else |
119 | | __attribute__((unused,noinline,no_instrument_function)) |
120 | | # endif |
121 | | static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask) |
122 | 0 | { |
123 | 0 | return __pl_wait_unlock_long(lock, mask); |
124 | 0 | } |
125 | | #endif /* PLOCK_DISABLE_EBO */ |
126 | | |
127 | | /* This function waits for <lock> to release all bits covered by <mask>, and |
128 | | * enforces an exponential backoff using CPU pauses to limit the pollution to |
129 | | * the other threads' caches. The progression follows (2^N)-1, limited to 255 |
130 | | * iterations, which is way sufficient even for very large numbers of threads. |
131 | | * The function slightly benefits from size optimization under gcc, but Clang |
132 | | * cannot do it, so it's not done here, as it doesn't make a big difference. |
133 | | * It is possible to disable exponential backoff (EBO) for debugging purposes |
134 | | * by setting PLOCK_DISABLE_EBO, in which case the function will be replaced |
135 | | * with a simpler macro. This may for example be useful to more easily track |
136 | | * callers' CPU usage. The macro was not designed to be used outside of the |
137 | | * functions defined here. |
138 | | */ |
139 | | #if defined(PLOCK_DISABLE_EBO) |
140 | | #define pl_wait_unlock_int(lock, mask) \ |
141 | | ({ \ |
142 | | unsigned int _r; \ |
143 | | do { \ |
144 | | pl_cpu_relax(); \ |
145 | | _r = pl_deref_int(lock); \ |
146 | | } while (_r & mask); \ |
147 | | _r; /* return value */ \ |
148 | | }) |
149 | | #else |
150 | | __attribute__((unused,always_inline,no_instrument_function)) inline |
151 | | static unsigned int __pl_wait_unlock_int(const unsigned int *lock, const unsigned int mask) |
152 | 0 | { |
153 | 0 | unsigned int ret; |
154 | 0 | unsigned int m = 0; |
155 | 0 |
|
156 | 0 | do { |
157 | 0 | unsigned int loops = m; |
158 | 0 |
|
159 | 0 | #ifdef _POSIX_PRIORITY_SCHEDULING |
160 | 0 | if (loops >= 65536) { |
161 | 0 | sched_yield(); |
162 | 0 | loops -= 32768; |
163 | 0 | } |
164 | 0 | #endif |
165 | 0 | for (; loops >= 200; loops -= 10) |
166 | 0 | pl_cpu_relax(); |
167 | 0 |
|
168 | 0 | for (; loops >= 1; loops--) |
169 | 0 | pl_barrier(); |
170 | 0 |
|
171 | 0 | ret = pl_deref_int(lock); |
172 | 0 | if (__builtin_expect(ret & mask, 0) == 0) |
173 | 0 | break; |
174 | 0 |
|
175 | 0 | /* the below produces an exponential growth with loops to lower |
176 | 0 | * values and still growing. This allows competing threads to |
177 | 0 | * wait different times once the threshold is reached. |
178 | 0 | */ |
179 | 0 | m = ((m + (m >> 2)) + 1) & 0x1ffff; |
180 | 0 | } while (1); |
181 | 0 |
|
182 | 0 | return ret; |
183 | 0 | } |
184 | | |
185 | | # if defined(PLOCK_INLINE_EBO) |
186 | | __attribute__((unused,always_inline,no_instrument_function)) inline |
187 | | # else |
188 | | __attribute__((unused,noinline,no_instrument_function)) |
189 | | # endif |
190 | | static unsigned int pl_wait_unlock_int(const unsigned int *lock, const unsigned int mask) |
191 | 0 | { |
192 | 0 | return __pl_wait_unlock_int(lock, mask); |
193 | 0 | } |
194 | | #endif /* PLOCK_DISABLE_EBO */ |
195 | | |
196 | | /* This function waits for <lock> to change from value <prev> and returns the |
197 | | * new value. It enforces an exponential backoff using CPU pauses to limit the |
198 | | * pollution to the other threads' caches. The progression follows (2^N)-1, |
199 | | * limited to 255 iterations, which is way sufficient even for very large |
200 | | * numbers of threads. It is designed to be called after a first test which |
201 | | * retrieves the previous value, so it starts by waiting. The function slightly |
202 | | * benefits from size optimization under gcc, but Clang cannot do it, so it's |
203 | | * not done here, as it doesn't make a big difference. |
204 | | */ |
205 | | __attribute__((unused,noinline,no_instrument_function)) |
206 | | static unsigned long pl_wait_new_long(const unsigned long *lock, const unsigned long prev) |
207 | 0 | { |
208 | 0 | unsigned char m = 0; |
209 | 0 | unsigned long curr; |
210 | |
|
211 | 0 | do { |
212 | 0 | unsigned char loops = m + 1; |
213 | 0 | m = (m << 1) + 1; |
214 | 0 | do { |
215 | 0 | pl_cpu_relax(); |
216 | 0 | } while (__builtin_expect(--loops, 0)); |
217 | 0 | curr = pl_deref_long(lock); |
218 | 0 | } while (__builtin_expect(curr == prev, 0)); |
219 | 0 | return curr; |
220 | 0 | } |
221 | | |
222 | | /* This function waits for <lock> to change from value <prev> and returns the |
223 | | * new value. It enforces an exponential backoff using CPU pauses to limit the |
224 | | * pollution to the other threads' caches. The progression follows (2^N)-1, |
225 | | * limited to 255 iterations, which is way sufficient even for very large |
226 | | * numbers of threads. It is designed to be called after a first test which |
227 | | * retrieves the previous value, so it starts by waiting. The function slightly |
228 | | * benefits from size optimization under gcc, but Clang cannot do it, so it's |
229 | | * not done here, as it doesn't make a big difference. |
230 | | */ |
231 | | __attribute__((unused,noinline,no_instrument_function)) |
232 | | static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int prev) |
233 | 0 | { |
234 | 0 | unsigned char m = 0; |
235 | 0 | unsigned int curr; |
236 | 0 |
|
237 | 0 | do { |
238 | 0 | unsigned char loops = m + 1; |
239 | 0 | m = (m << 1) + 1; |
240 | 0 | do { |
241 | 0 | pl_cpu_relax(); |
242 | 0 | } while (__builtin_expect(--loops, 0)); |
243 | 0 | curr = pl_deref_int(lock); |
244 | 0 | } while (__builtin_expect(curr == prev, 0)); |
245 | 0 | return curr; |
246 | 0 | } |
247 | | |
248 | | /* request shared read access (R), return non-zero on success, otherwise 0 */ |
249 | | #define pl_try_r(lock) ( \ |
250 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
251 | | register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_WL_ANY; \ |
252 | | pl_barrier(); \ |
253 | | if (!__builtin_expect(__pl_r, 0)) { \ |
254 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \ |
255 | | if (__builtin_expect(__pl_r, 0)) \ |
256 | | pl_sub_noret((lock), PLOCK64_RL_1); \ |
257 | | } \ |
258 | | !__pl_r; /* return value */ \ |
259 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
260 | | register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_WL_ANY; \ |
261 | | pl_barrier(); \ |
262 | | if (!__builtin_expect(__pl_r, 0)) { \ |
263 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \ |
264 | | if (__builtin_expect(__pl_r, 0)) \ |
265 | | pl_sub_noret((lock), PLOCK32_RL_1); \ |
266 | | } \ |
267 | | !__pl_r; /* return value */ \ |
268 | | }) : ({ \ |
269 | | void __unsupported_argument_size_for_pl_try_r__(char *,int); \ |
270 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
271 | | __unsupported_argument_size_for_pl_try_r__(__FILE__,__LINE__); \ |
272 | | 0; \ |
273 | | }) \ |
274 | | ) |
275 | | |
276 | | /* request shared read access (R) and wait for it. In order not to disturb a W |
277 | | * lock waiting for all readers to leave, we first check if a W lock is held |
278 | | * before trying to claim the R lock. |
279 | | */ |
280 | | #define pl_take_r(lock) \ |
281 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
282 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
283 | | register unsigned long __set_r = PLOCK64_RL_1; \ |
284 | | register unsigned long __msk_r = PLOCK64_WL_ANY; \ |
285 | | register unsigned long __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \ |
286 | | if (__old_r) { \ |
287 | | while (1) { \ |
288 | | if (__old_r & __msk_r) \ |
289 | | pl_wait_unlock_long(__lk_r, __msk_r); \ |
290 | | if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \ |
291 | | break; \ |
292 | | __old_r = pl_sub_lax(__lk_r, __set_r); \ |
293 | | } \ |
294 | | } \ |
295 | | pl_barrier(); \ |
296 | | 0; \ |
297 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
298 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
299 | | register unsigned int __set_r = PLOCK32_RL_1; \ |
300 | | register unsigned int __msk_r = PLOCK32_WL_ANY; \ |
301 | | register unsigned int __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \ |
302 | | if (__old_r) { \ |
303 | | while (1) { \ |
304 | | if (__old_r & __msk_r) \ |
305 | | pl_wait_unlock_int(__lk_r, __msk_r); \ |
306 | | if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \ |
307 | | break; \ |
308 | | __old_r = pl_sub_lax(__lk_r, __set_r); \ |
309 | | } \ |
310 | | } \ |
311 | | pl_barrier(); \ |
312 | | 0; \ |
313 | | }) : ({ \ |
314 | | void __unsupported_argument_size_for_pl_take_r__(char *,int); \ |
315 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
316 | | __unsupported_argument_size_for_pl_take_r__(__FILE__,__LINE__); \ |
317 | | 0; \ |
318 | | }) |
319 | | |
320 | | /* release the read access (R) lock */ |
321 | | #define pl_drop_r(lock) ( \ |
322 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
323 | | pl_barrier(); \ |
324 | | pl_sub_noret_rel(lock, PLOCK64_RL_1); \ |
325 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
326 | | pl_barrier(); \ |
327 | | pl_sub_noret_rel(lock, PLOCK32_RL_1); \ |
328 | | }) : ({ \ |
329 | | void __unsupported_argument_size_for_pl_drop_r__(char *,int); \ |
330 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
331 | | __unsupported_argument_size_for_pl_drop_r__(__FILE__,__LINE__); \ |
332 | | }) \ |
333 | | ) |
334 | | |
335 | | /* request a seek access (S), return non-zero on success, otherwise 0 */ |
336 | | #define pl_try_s(lock) ( \ |
337 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
338 | | register unsigned long __pl_r = pl_deref_long(lock); \ |
339 | | pl_barrier(); \ |
340 | | if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ |
341 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \ |
342 | | (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \ |
343 | | if (__builtin_expect(__pl_r, 0)) \ |
344 | | pl_sub_noret_lax((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \ |
345 | | } \ |
346 | | !__pl_r; /* return value */ \ |
347 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
348 | | register unsigned int __pl_r = pl_deref_int(lock); \ |
349 | | pl_barrier(); \ |
350 | | if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ |
351 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \ |
352 | | (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \ |
353 | | if (__builtin_expect(__pl_r, 0)) \ |
354 | | pl_sub_noret_lax((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \ |
355 | | } \ |
356 | | !__pl_r; /* return value */ \ |
357 | | }) : ({ \ |
358 | | void __unsupported_argument_size_for_pl_try_s__(char *,int); \ |
359 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
360 | | __unsupported_argument_size_for_pl_try_s__(__FILE__,__LINE__); \ |
361 | | 0; \ |
362 | | }) \ |
363 | | ) |
364 | | |
365 | | /* request a seek access (S) and wait for it. The lock is immediately claimed, |
366 | | * and only upon failure an exponential backoff is used. S locks rarely compete |
367 | | * with W locks so S will generally not disturb W. As the S lock may be used as |
368 | | * a spinlock, it's important to grab it as fast as possible. |
369 | | */ |
370 | | #define pl_take_s(lock) \ |
371 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
372 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
373 | | register unsigned long __set_r = PLOCK64_SL_1 | PLOCK64_RL_1; \ |
374 | | register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \ |
375 | | while (1) { \ |
376 | | if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \ |
377 | | break; \ |
378 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
379 | | pl_wait_unlock_long(__lk_r, __msk_r); \ |
380 | | } \ |
381 | | pl_barrier(); \ |
382 | | 0; \ |
383 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
384 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
385 | | register unsigned int __set_r = PLOCK32_SL_1 | PLOCK32_RL_1; \ |
386 | | register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \ |
387 | | while (1) { \ |
388 | | if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \ |
389 | | break; \ |
390 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
391 | | pl_wait_unlock_int(__lk_r, __msk_r); \ |
392 | | } \ |
393 | | pl_barrier(); \ |
394 | | 0; \ |
395 | | }) : ({ \ |
396 | | void __unsupported_argument_size_for_pl_take_s__(char *,int); \ |
397 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
398 | | __unsupported_argument_size_for_pl_take_s__(__FILE__,__LINE__); \ |
399 | | 0; \ |
400 | | }) |
401 | | |
402 | | /* release the seek access (S) lock */ |
403 | | #define pl_drop_s(lock) ( \ |
404 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
405 | | pl_barrier(); \ |
406 | | pl_sub_noret_rel(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \ |
407 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
408 | | pl_barrier(); \ |
409 | | pl_sub_noret_rel(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \ |
410 | | }) : ({ \ |
411 | | void __unsupported_argument_size_for_pl_drop_s__(char *,int); \ |
412 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
413 | | __unsupported_argument_size_for_pl_drop_s__(__FILE__,__LINE__); \ |
414 | | }) \ |
415 | | ) |
416 | | |
417 | | /* drop the S lock and go back to the R lock */ |
418 | | #define pl_stor(lock) ( \ |
419 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
420 | | pl_barrier(); \ |
421 | | pl_sub_noret(lock, PLOCK64_SL_1); \ |
422 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
423 | | pl_barrier(); \ |
424 | | pl_sub_noret(lock, PLOCK32_SL_1); \ |
425 | | }) : ({ \ |
426 | | void __unsupported_argument_size_for_pl_stor__(char *,int); \ |
427 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
428 | | __unsupported_argument_size_for_pl_stor__(__FILE__,__LINE__); \ |
429 | | }) \ |
430 | | ) |
431 | | |
432 | | /* take the W lock under the S lock */ |
433 | | #define pl_stow(lock) ( \ |
434 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
435 | | register unsigned long __pl_r = pl_ldadd((lock), PLOCK64_WL_1); \ |
436 | | if (__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) \ |
437 | | __pl_r = pl_wait_unlock_long((const unsigned long*)lock, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)); \ |
438 | | pl_barrier(); \ |
439 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
440 | | register unsigned int __pl_r = pl_ldadd((lock), PLOCK32_WL_1); \ |
441 | | if (__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) \ |
442 | | __pl_r = pl_wait_unlock_int((const unsigned int*)lock, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)); \ |
443 | | pl_barrier(); \ |
444 | | }) : ({ \ |
445 | | void __unsupported_argument_size_for_pl_stow__(char *,int); \ |
446 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
447 | | __unsupported_argument_size_for_pl_stow__(__FILE__,__LINE__); \ |
448 | | }) \ |
449 | | ) |
450 | | |
451 | | /* drop the W lock and go back to the S lock */ |
452 | | #define pl_wtos(lock) ( \ |
453 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
454 | | pl_barrier(); \ |
455 | | pl_sub_noret(lock, PLOCK64_WL_1); \ |
456 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
457 | | pl_barrier(); \ |
458 | | pl_sub_noret(lock, PLOCK32_WL_1); \ |
459 | | }) : ({ \ |
460 | | void __unsupported_argument_size_for_pl_wtos__(char *,int); \ |
461 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
462 | | __unsupported_argument_size_for_pl_wtos__(__FILE__,__LINE__); \ |
463 | | }) \ |
464 | | ) |
465 | | |
466 | | /* drop the W lock and go back to the R lock */ |
467 | | #define pl_wtor(lock) ( \ |
468 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
469 | | pl_barrier(); \ |
470 | | pl_sub_noret(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \ |
471 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
472 | | pl_barrier(); \ |
473 | | pl_sub_noret(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \ |
474 | | }) : ({ \ |
475 | | void __unsupported_argument_size_for_pl_wtor__(char *,int); \ |
476 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
477 | | __unsupported_argument_size_for_pl_wtor__(__FILE__,__LINE__); \ |
478 | | }) \ |
479 | | ) |
480 | | |
481 | | /* request a write access (W), return non-zero on success, otherwise 0. |
482 | | * |
483 | | * Below there is something important : by taking both W and S, we will cause |
484 | | * an overflow of W at 4/5 of the maximum value that can be stored into W due |
485 | | * to the fact that S is 2 bits, so we're effectively adding 5 to the word |
486 | | * composed by W:S. But for all words multiple of 4 bits, the maximum value is |
487 | | * multiple of 15 thus of 5. So the largest value we can store with all bits |
488 | | * set to one will be met by adding 5, and then adding 5 again will place value |
489 | | * 1 in W and value 0 in S, so we never leave W with 0. Also, even upon such an |
490 | | * overflow, there's no risk to confuse it with an atomic lock because R is not |
491 | | * null since it will not have overflown. For 32-bit locks, this situation |
492 | | * happens when exactly 13108 threads try to grab the lock at once, W=1, S=0 |
493 | | * and R=13108. For 64-bit locks, it happens at 858993460 concurrent writers |
494 | | * where W=1, S=0 and R=858993460. |
495 | | */ |
496 | | #define pl_try_w(lock) ( \ |
497 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
498 | | register unsigned long __pl_r = pl_deref_long(lock); \ |
499 | | pl_barrier(); \ |
500 | | if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ |
501 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\ |
502 | | if (__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ |
503 | | /* a writer, seeker or atomic is present, let's leave */ \ |
504 | | pl_sub_noret_lax((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\ |
505 | | __pl_r &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */\ |
506 | | } else { \ |
507 | | /* wait for all other readers to leave */ \ |
508 | | while (__pl_r) \ |
509 | | __pl_r = pl_deref_long(lock) - \ |
510 | | (PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ |
511 | | } \ |
512 | | } \ |
513 | | !__pl_r; /* return value */ \ |
514 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
515 | | register unsigned int __pl_r = pl_deref_int(lock); \ |
516 | | pl_barrier(); \ |
517 | | if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ |
518 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\ |
519 | | if (__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ |
520 | | /* a writer, seeker or atomic is present, let's leave */ \ |
521 | | pl_sub_noret_lax((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\ |
522 | | __pl_r &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */\ |
523 | | } else { \ |
524 | | /* wait for all other readers to leave */ \ |
525 | | while (__pl_r) \ |
526 | | __pl_r = pl_deref_int(lock) - \ |
527 | | (PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ |
528 | | } \ |
529 | | } \ |
530 | | !__pl_r; /* return value */ \ |
531 | | }) : ({ \ |
532 | | void __unsupported_argument_size_for_pl_try_w__(char *,int); \ |
533 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
534 | | __unsupported_argument_size_for_pl_try_w__(__FILE__,__LINE__); \ |
535 | | 0; \ |
536 | | }) \ |
537 | | ) |
538 | | |
539 | | /* request a write access (W) and wait for it. The lock is immediately claimed, |
540 | | * and only upon failure an exponential backoff is used. |
541 | | */ |
542 | | #define pl_take_w(lock) \ |
543 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
544 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
545 | | register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1; \ |
546 | | register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \ |
547 | | register unsigned long __pl_r; \ |
548 | | while (1) { \ |
549 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
550 | | if (!__builtin_expect(__pl_r & __msk_r, 0)) \ |
551 | | break; \ |
552 | | if (!__builtin_expect(__pl_r & PLOCK64_WL_ANY, 0)) { \ |
553 | | /* S only: let it finish but impose ourselves */ \ |
554 | | pl_sub_noret_lax(__lk_r, PLOCK64_RL_1); \ |
555 | | __pl_r = pl_wait_unlock_long(__lk_r, PLOCK64_RL_ANY); \ |
556 | | __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_RL_1); \ |
557 | | break; \ |
558 | | } \ |
559 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
560 | | __pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \ |
561 | | } \ |
562 | | /* wait for all other readers to leave */ \ |
563 | | if (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0)) \ |
564 | | __pl_r = pl_wait_unlock_long(__lk_r, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) - __set_r; \ |
565 | | pl_barrier(); \ |
566 | | 0; \ |
567 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
568 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
569 | | register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1; \ |
570 | | register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \ |
571 | | register unsigned int __pl_r; \ |
572 | | while (1) { \ |
573 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
574 | | if (!__builtin_expect(__pl_r & __msk_r, 0)) \ |
575 | | break; \ |
576 | | if (!__builtin_expect(__pl_r & PLOCK32_WL_ANY, 0)) { \ |
577 | | /* S only: let it finish but impose ourselves */ \ |
578 | | pl_sub_noret_lax(__lk_r, PLOCK32_RL_1); \ |
579 | | __pl_r = pl_wait_unlock_int(__lk_r, PLOCK32_RL_ANY); \ |
580 | | __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_RL_1); \ |
581 | | break; \ |
582 | | } \ |
583 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
584 | | __pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \ |
585 | | } \ |
586 | | /* wait for all other readers to leave */ \ |
587 | | if (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0)) \ |
588 | | __pl_r = pl_wait_unlock_int(__lk_r, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) - __set_r; \ |
589 | | pl_barrier(); \ |
590 | | 0; \ |
591 | | }) : ({ \ |
592 | | void __unsupported_argument_size_for_pl_take_w__(char *,int); \ |
593 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
594 | | __unsupported_argument_size_for_pl_take_w__(__FILE__,__LINE__); \ |
595 | | 0; \ |
596 | | }) |
597 | | |
598 | | /* drop the write (W) lock entirely */ |
599 | | #define pl_drop_w(lock) ( \ |
600 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
601 | | pl_barrier(); \ |
602 | | pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ |
603 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
604 | | pl_barrier(); \ |
605 | | pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ |
606 | | }) : ({ \ |
607 | | void __unsupported_argument_size_for_pl_drop_w__(char *,int); \ |
608 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
609 | | __unsupported_argument_size_for_pl_drop_w__(__FILE__,__LINE__); \ |
610 | | }) \ |
611 | | ) |
612 | | |
613 | | /* Try to upgrade from R to S, return non-zero on success, otherwise 0. |
614 | | * This lock will fail if S or W are already held. In case of failure to grab |
615 | | * the lock, it MUST NOT be retried without first dropping R, or it may never |
616 | | * complete due to S waiting for R to leave before upgrading to W. |
617 | | */ |
618 | | #define pl_try_rtos(lock) ( \ |
619 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
620 | | register unsigned long __pl_r; \ |
621 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1) & (PLOCK64_WL_ANY | PLOCK64_SL_ANY);\ |
622 | | if (__builtin_expect(__pl_r, 0)) \ |
623 | | pl_sub_noret_lax((lock), PLOCK64_SL_1); \ |
624 | | !__pl_r; /* return value */ \ |
625 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
626 | | register unsigned int __pl_r; \ |
627 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1) & (PLOCK32_WL_ANY | PLOCK32_SL_ANY);\ |
628 | | if (__builtin_expect(__pl_r, 0)) \ |
629 | | pl_sub_noret_lax((lock), PLOCK32_SL_1); \ |
630 | | !__pl_r; /* return value */ \ |
631 | | }) : ({ \ |
632 | | void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \ |
633 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
634 | | __unsupported_argument_size_for_pl_try_rtos__(__FILE__,__LINE__); \ |
635 | | 0; \ |
636 | | }) \ |
637 | | ) |
638 | | |
639 | | |
640 | | /* Try to upgrade from R to W, return non-zero on success, otherwise 0. |
641 | | * This lock will fail if S or W are already held. In case of failure to grab |
642 | | * the lock, it MUST NOT be retried without first dropping R, or it may never |
643 | | * complete due to S waiting for R to leave before upgrading to W. It waits for |
644 | | * the last readers to leave. |
645 | | */ |
646 | | #define pl_try_rtow(lock) ( \ |
647 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
648 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
649 | | register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_SL_1; \ |
650 | | register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \ |
651 | | register unsigned long __pl_r; \ |
652 | | pl_barrier(); \ |
653 | | while (1) { \ |
654 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
655 | | if (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
656 | | if (pl_ldadd_lax(__lk_r, - __set_r)) \ |
657 | | break; /* the caller needs to drop the lock now */ \ |
658 | | continue; /* lock was released, try again */ \ |
659 | | } \ |
660 | | /* ok we're the only writer, wait for readers to leave */ \ |
661 | | while (__builtin_expect(__pl_r, 0)) \ |
662 | | __pl_r = pl_deref_long(__lk_r) - (PLOCK64_WL_1|PLOCK64_SL_1|PLOCK64_RL_1); \ |
663 | | /* now return with __pl_r = 0 */ \ |
664 | | break; \ |
665 | | } \ |
666 | | !__pl_r; /* return value */ \ |
667 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
668 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
669 | | register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_SL_1; \ |
670 | | register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \ |
671 | | register unsigned int __pl_r; \ |
672 | | pl_barrier(); \ |
673 | | while (1) { \ |
674 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
675 | | if (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
676 | | if (pl_ldadd_lax(__lk_r, - __set_r)) \ |
677 | | break; /* the caller needs to drop the lock now */ \ |
678 | | continue; /* lock was released, try again */ \ |
679 | | } \ |
680 | | /* ok we're the only writer, wait for readers to leave */ \ |
681 | | while (__builtin_expect(__pl_r, 0)) \ |
682 | | __pl_r = pl_deref_int(__lk_r) - (PLOCK32_WL_1|PLOCK32_SL_1|PLOCK32_RL_1); \ |
683 | | /* now return with __pl_r = 0 */ \ |
684 | | break; \ |
685 | | } \ |
686 | | !__pl_r; /* return value */ \ |
687 | | }) : ({ \ |
688 | | void __unsupported_argument_size_for_pl_try_rtow__(char *,int); \ |
689 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
690 | | __unsupported_argument_size_for_pl_try_rtow__(__FILE__,__LINE__); \ |
691 | | 0; \ |
692 | | }) \ |
693 | | ) |
694 | | |
695 | | |
696 | | /* request atomic write access (A), return non-zero on success, otherwise 0. |
697 | | * It's a bit tricky as we only use the W bits for this and want to distinguish |
698 | | * between other atomic users and regular lock users. We have to give up if an |
699 | | * S lock appears. It's possible that such a lock stays hidden in the W bits |
700 | | * after an overflow, but in this case R is still held, ensuring we stay in the |
701 | | * loop until we discover the conflict. The lock only return successfully if all |
702 | | * readers are gone (or converted to A). |
703 | | */ |
704 | | #define pl_try_a(lock) ( \ |
705 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
706 | | register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \ |
707 | | pl_barrier(); \ |
708 | | if (!__builtin_expect(__pl_r, 0)) { \ |
709 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1); \ |
710 | | while (1) { \ |
711 | | if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \ |
712 | | pl_sub_noret_lax((lock), PLOCK64_WL_1); \ |
713 | | break; /* return !__pl_r */ \ |
714 | | } \ |
715 | | __pl_r &= PLOCK64_RL_ANY; \ |
716 | | if (!__builtin_expect(__pl_r, 0)) \ |
717 | | break; /* return !__pl_r */ \ |
718 | | __pl_r = pl_deref_long(lock); \ |
719 | | } \ |
720 | | } \ |
721 | | !__pl_r; /* return value */ \ |
722 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
723 | | register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \ |
724 | | pl_barrier(); \ |
725 | | if (!__builtin_expect(__pl_r, 0)) { \ |
726 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1); \ |
727 | | while (1) { \ |
728 | | if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \ |
729 | | pl_sub_noret_lax((lock), PLOCK32_WL_1); \ |
730 | | break; /* return !__pl_r */ \ |
731 | | } \ |
732 | | __pl_r &= PLOCK32_RL_ANY; \ |
733 | | if (!__builtin_expect(__pl_r, 0)) \ |
734 | | break; /* return !__pl_r */ \ |
735 | | __pl_r = pl_deref_int(lock); \ |
736 | | } \ |
737 | | } \ |
738 | | !__pl_r; /* return value */ \ |
739 | | }) : ({ \ |
740 | | void __unsupported_argument_size_for_pl_try_a__(char *,int); \ |
741 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
742 | | __unsupported_argument_size_for_pl_try_a__(__FILE__,__LINE__); \ |
743 | | 0; \ |
744 | | }) \ |
745 | | ) |
746 | | |
747 | | /* request atomic write access (A) and wait for it. See comments in pl_try_a() for |
748 | | * explanations. |
749 | | */ |
750 | | #define pl_take_a(lock) \ |
751 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
752 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
753 | | register unsigned long __set_r = PLOCK64_WL_1; \ |
754 | | register unsigned long __msk_r = PLOCK64_SL_ANY; \ |
755 | | register unsigned long __pl_r; \ |
756 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
757 | | while (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0)) { \ |
758 | | if (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
759 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
760 | | pl_wait_unlock_long(__lk_r, __msk_r); \ |
761 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
762 | | continue; \ |
763 | | } \ |
764 | | /* wait for all readers to leave or upgrade */ \ |
765 | | pl_cpu_relax(); pl_cpu_relax(); pl_cpu_relax(); \ |
766 | | __pl_r = pl_deref_long(lock); \ |
767 | | } \ |
768 | | pl_barrier(); \ |
769 | | 0; \ |
770 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
771 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
772 | | register unsigned int __set_r = PLOCK32_WL_1; \ |
773 | | register unsigned int __msk_r = PLOCK32_SL_ANY; \ |
774 | | register unsigned int __pl_r; \ |
775 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
776 | | while (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0)) { \ |
777 | | if (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
778 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
779 | | pl_wait_unlock_int(__lk_r, __msk_r); \ |
780 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ |
781 | | continue; \ |
782 | | } \ |
783 | | /* wait for all readers to leave or upgrade */ \ |
784 | | pl_cpu_relax(); pl_cpu_relax(); pl_cpu_relax(); \ |
785 | | __pl_r = pl_deref_int(lock); \ |
786 | | } \ |
787 | | pl_barrier(); \ |
788 | | 0; \ |
789 | | }) : ({ \ |
790 | | void __unsupported_argument_size_for_pl_take_a__(char *,int); \ |
791 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
792 | | __unsupported_argument_size_for_pl_take_a__(__FILE__,__LINE__); \ |
793 | | 0; \ |
794 | | }) |
795 | | |
796 | | /* release atomic write access (A) lock */ |
797 | | #define pl_drop_a(lock) ( \ |
798 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
799 | | pl_barrier(); \ |
800 | | pl_sub_noret_rel(lock, PLOCK64_WL_1); \ |
801 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
802 | | pl_barrier(); \ |
803 | | pl_sub_noret_rel(lock, PLOCK32_WL_1); \ |
804 | | }) : ({ \ |
805 | | void __unsupported_argument_size_for_pl_drop_a__(char *,int); \ |
806 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
807 | | __unsupported_argument_size_for_pl_drop_a__(__FILE__,__LINE__); \ |
808 | | }) \ |
809 | | ) |
810 | | |
811 | | /* Downgrade A to R. Inc(R), dec(W) then wait for W==0 */ |
812 | | #define pl_ator(lock) ( \ |
813 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
814 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
815 | | register unsigned long __set_r = PLOCK64_RL_1 - PLOCK64_WL_1; \ |
816 | | register unsigned long __msk_r = PLOCK64_WL_ANY; \ |
817 | | register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ |
818 | | while (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
819 | | __pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \ |
820 | | } \ |
821 | | pl_barrier(); \ |
822 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
823 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
824 | | register unsigned int __set_r = PLOCK32_RL_1 - PLOCK32_WL_1; \ |
825 | | register unsigned int __msk_r = PLOCK32_WL_ANY; \ |
826 | | register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ |
827 | | while (__builtin_expect(__pl_r & __msk_r, 0)) { \ |
828 | | __pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \ |
829 | | } \ |
830 | | pl_barrier(); \ |
831 | | }) : ({ \ |
832 | | void __unsupported_argument_size_for_pl_ator__(char *,int); \ |
833 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
834 | | __unsupported_argument_size_for_pl_ator__(__FILE__,__LINE__); \ |
835 | | }) \ |
836 | | ) |
837 | | |
838 | | /* Try to upgrade from R to A, return non-zero on success, otherwise 0. |
839 | | * This lock will fail if S is held or appears while waiting (typically due to |
840 | | * a previous grab that was disguised as a W due to an overflow). In case of |
841 | | * failure to grab the lock, it MUST NOT be retried without first dropping R, |
842 | | * or it may never complete due to S waiting for R to leave before upgrading |
843 | | * to W. The lock succeeds once there's no more R (ie all of them have either |
844 | | * completed or were turned to A). |
845 | | */ |
846 | | #define pl_try_rtoa(lock) ( \ |
847 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
848 | | register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \ |
849 | | pl_barrier(); \ |
850 | | if (!__builtin_expect(__pl_r, 0)) { \ |
851 | | __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ |
852 | | while (1) { \ |
853 | | if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \ |
854 | | pl_sub_noret_lax((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ |
855 | | break; /* return !__pl_r */ \ |
856 | | } \ |
857 | | __pl_r &= PLOCK64_RL_ANY; \ |
858 | | if (!__builtin_expect(__pl_r, 0)) \ |
859 | | break; /* return !__pl_r */ \ |
860 | | __pl_r = pl_deref_long(lock); \ |
861 | | } \ |
862 | | } \ |
863 | | !__pl_r; /* return value */ \ |
864 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
865 | | register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \ |
866 | | pl_barrier(); \ |
867 | | if (!__builtin_expect(__pl_r, 0)) { \ |
868 | | __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ |
869 | | while (1) { \ |
870 | | if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \ |
871 | | pl_sub_noret_lax((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ |
872 | | break; /* return !__pl_r */ \ |
873 | | } \ |
874 | | __pl_r &= PLOCK32_RL_ANY; \ |
875 | | if (!__builtin_expect(__pl_r, 0)) \ |
876 | | break; /* return !__pl_r */ \ |
877 | | __pl_r = pl_deref_int(lock); \ |
878 | | } \ |
879 | | } \ |
880 | | !__pl_r; /* return value */ \ |
881 | | }) : ({ \ |
882 | | void __unsupported_argument_size_for_pl_try_rtoa__(char *,int); \ |
883 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
884 | | __unsupported_argument_size_for_pl_try_rtoa__(__FILE__,__LINE__); \ |
885 | | 0; \ |
886 | | }) \ |
887 | | ) |
888 | | |
889 | | |
890 | | /* |
891 | | * The following operations cover the multiple writers model : U->R->J->C->A |
892 | | */ |
893 | | |
894 | | |
895 | | /* Upgrade R to J. Inc(W) then wait for R==W or S != 0 */ |
896 | | #define pl_rtoj(lock) ( \ |
897 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
898 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
899 | | register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\ |
900 | | register unsigned char __m = 0; \ |
901 | | while (!(__pl_r & PLOCK64_SL_ANY) && \ |
902 | | (__pl_r / PLOCK64_WL_1 != (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1)) { \ |
903 | | unsigned char __loops = __m + 1; \ |
904 | | __m = (__m << 1) + 1; \ |
905 | | do { \ |
906 | | pl_cpu_relax(); \ |
907 | | pl_cpu_relax(); \ |
908 | | } while (--__loops); \ |
909 | | __pl_r = pl_deref_long(__lk_r); \ |
910 | | } \ |
911 | | pl_barrier(); \ |
912 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
913 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
914 | | register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\ |
915 | | register unsigned char __m = 0; \ |
916 | | while (!(__pl_r & PLOCK32_SL_ANY) && \ |
917 | | (__pl_r / PLOCK32_WL_1 != (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1)) { \ |
918 | | unsigned char __loops = __m + 1; \ |
919 | | __m = (__m << 1) + 1; \ |
920 | | do { \ |
921 | | pl_cpu_relax(); \ |
922 | | pl_cpu_relax(); \ |
923 | | } while (--__loops); \ |
924 | | __pl_r = pl_deref_int(__lk_r); \ |
925 | | } \ |
926 | | pl_barrier(); \ |
927 | | }) : ({ \ |
928 | | void __unsupported_argument_size_for_pl_rtoj__(char *,int); \ |
929 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
930 | | __unsupported_argument_size_for_pl_rtoj__(__FILE__,__LINE__); \ |
931 | | }) \ |
932 | | ) |
933 | | |
934 | | /* Upgrade J to C. Set S. Only one thread needs to do it though it's idempotent */ |
935 | | #define pl_jtoc(lock) ( \ |
936 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
937 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
938 | | register unsigned long __pl_r = pl_deref_long(__lk_r); \ |
939 | | if (!(__pl_r & PLOCK64_SL_ANY)) \ |
940 | | pl_or_noret(__lk_r, PLOCK64_SL_1); \ |
941 | | pl_barrier(); \ |
942 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
943 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
944 | | register unsigned int __pl_r = pl_deref_int(__lk_r); \ |
945 | | if (!(__pl_r & PLOCK32_SL_ANY)) \ |
946 | | pl_or_noret(__lk_r, PLOCK32_SL_1); \ |
947 | | pl_barrier(); \ |
948 | | }) : ({ \ |
949 | | void __unsupported_argument_size_for_pl_jtoc__(char *,int); \ |
950 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
951 | | __unsupported_argument_size_for_pl_jtoc__(__FILE__,__LINE__); \ |
952 | | }) \ |
953 | | ) |
954 | | |
955 | | /* Upgrade R to C. Inc(W) then wait for R==W or S != 0 */ |
956 | | #define pl_rtoc(lock) ( \ |
957 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
958 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
959 | | register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\ |
960 | | register unsigned char __m = 0; \ |
961 | | while (__builtin_expect(!(__pl_r & PLOCK64_SL_ANY), 0)) { \ |
962 | | unsigned char __loops; \ |
963 | | if (__pl_r / PLOCK64_WL_1 == (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1) { \ |
964 | | pl_or_noret(__lk_r, PLOCK64_SL_1); \ |
965 | | break; \ |
966 | | } \ |
967 | | __loops = __m + 1; \ |
968 | | __m = (__m << 1) + 1; \ |
969 | | do { \ |
970 | | pl_cpu_relax(); \ |
971 | | pl_cpu_relax(); \ |
972 | | } while (--__loops); \ |
973 | | __pl_r = pl_deref_long(__lk_r); \ |
974 | | } \ |
975 | | pl_barrier(); \ |
976 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
977 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
978 | | register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\ |
979 | | register unsigned char __m = 0; \ |
980 | | while (__builtin_expect(!(__pl_r & PLOCK32_SL_ANY), 0)) { \ |
981 | | unsigned char __loops; \ |
982 | | if (__pl_r / PLOCK32_WL_1 == (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1) { \ |
983 | | pl_or_noret(__lk_r, PLOCK32_SL_1); \ |
984 | | break; \ |
985 | | } \ |
986 | | __loops = __m + 1; \ |
987 | | __m = (__m << 1) + 1; \ |
988 | | do { \ |
989 | | pl_cpu_relax(); \ |
990 | | pl_cpu_relax(); \ |
991 | | } while (--__loops); \ |
992 | | __pl_r = pl_deref_int(__lk_r); \ |
993 | | } \ |
994 | | pl_barrier(); \ |
995 | | }) : ({ \ |
996 | | void __unsupported_argument_size_for_pl_rtoj__(char *,int); \ |
997 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
998 | | __unsupported_argument_size_for_pl_rtoj__(__FILE__,__LINE__); \ |
999 | | }) \ |
1000 | | ) |
1001 | | |
1002 | | /* Drop the claim (C) lock : R--,W-- then clear S if !R */ |
1003 | | #define pl_drop_c(lock) ( \ |
1004 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1005 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
1006 | | register unsigned long __set_r = - PLOCK64_RL_1 - PLOCK64_WL_1; \ |
1007 | | register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ |
1008 | | if (!(__pl_r & PLOCK64_RL_ANY)) \ |
1009 | | pl_and_noret(__lk_r, ~PLOCK64_SL_1); \ |
1010 | | pl_barrier(); \ |
1011 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1012 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
1013 | | register unsigned int __set_r = - PLOCK32_RL_1 - PLOCK32_WL_1; \ |
1014 | | register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ |
1015 | | if (!(__pl_r & PLOCK32_RL_ANY)) \ |
1016 | | pl_and_noret(__lk_r, ~PLOCK32_SL_1); \ |
1017 | | pl_barrier(); \ |
1018 | | }) : ({ \ |
1019 | | void __unsupported_argument_size_for_pl_drop_c__(char *,int); \ |
1020 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1021 | | __unsupported_argument_size_for_pl_drop_c__(__FILE__,__LINE__); \ |
1022 | | }) \ |
1023 | | ) |
1024 | | |
1025 | | /* Upgrade C to A. R-- then wait for !S or clear S if !R */ |
1026 | | #define pl_ctoa(lock) ( \ |
1027 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1028 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
1029 | | register unsigned long __pl_r = pl_ldadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1;\ |
1030 | | while (__pl_r & PLOCK64_SL_ANY) { \ |
1031 | | if (!(__pl_r & PLOCK64_RL_ANY)) { \ |
1032 | | pl_and_noret(__lk_r, ~PLOCK64_SL_1); \ |
1033 | | break; \ |
1034 | | } \ |
1035 | | pl_cpu_relax(); \ |
1036 | | pl_cpu_relax(); \ |
1037 | | __pl_r = pl_deref_long(__lk_r); \ |
1038 | | } \ |
1039 | | pl_barrier(); \ |
1040 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1041 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
1042 | | register unsigned int __pl_r = pl_ldadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \ |
1043 | | while (__pl_r & PLOCK32_SL_ANY) { \ |
1044 | | if (!(__pl_r & PLOCK32_RL_ANY)) { \ |
1045 | | pl_and_noret(__lk_r, ~PLOCK32_SL_1); \ |
1046 | | break; \ |
1047 | | } \ |
1048 | | pl_cpu_relax(); \ |
1049 | | pl_cpu_relax(); \ |
1050 | | __pl_r = pl_deref_int(__lk_r); \ |
1051 | | } \ |
1052 | | pl_barrier(); \ |
1053 | | }) : ({ \ |
1054 | | void __unsupported_argument_size_for_pl_ctoa__(char *,int); \ |
1055 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1056 | | __unsupported_argument_size_for_pl_ctoa__(__FILE__,__LINE__); \ |
1057 | | }) \ |
1058 | | ) |
1059 | | |
1060 | | /* downgrade the atomic write access lock (A) to join (J) */ |
1061 | | #define pl_atoj(lock) ( \ |
1062 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1063 | | pl_barrier(); \ |
1064 | | pl_add_noret(lock, PLOCK64_RL_1); \ |
1065 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1066 | | pl_barrier(); \ |
1067 | | pl_add_noret(lock, PLOCK32_RL_1); \ |
1068 | | }) : ({ \ |
1069 | | void __unsupported_argument_size_for_pl_atoj__(char *,int); \ |
1070 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1071 | | __unsupported_argument_size_for_pl_atoj__(__FILE__,__LINE__); \ |
1072 | | }) \ |
1073 | | ) |
1074 | | |
1075 | | /* Returns non-zero if the thread calling it is the last writer, otherwise zero. It is |
1076 | | * designed to be called before pl_drop_j(), pl_drop_c() or pl_drop_a() for operations |
1077 | | * which need to be called only once. |
1078 | | */ |
1079 | | #define pl_last_writer(lock) ( \ |
1080 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1081 | | !(pl_deref_long(lock) & PLOCK64_WL_2PL); \ |
1082 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1083 | | !(pl_deref_int(lock) & PLOCK32_WL_2PL); \ |
1084 | | }) : ({ \ |
1085 | | void __unsupported_argument_size_for_pl_last_j__(char *,int); \ |
1086 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1087 | | __unsupported_argument_size_for_pl_last_j__(__FILE__,__LINE__); \ |
1088 | | 0; \ |
1089 | | }) \ |
1090 | | ) |
1091 | | |
1092 | | /* attempt to get an exclusive write access via the J lock and wait for it. |
1093 | | * Only one thread may succeed in this operation. It will not conflict with |
1094 | | * other users and will first wait for all writers to leave, then for all |
1095 | | * readers to leave before starting. This offers a solution to obtain an |
1096 | | * exclusive access to a shared resource in the R/J/C/A model. A concurrent |
1097 | | * take_a() will wait for this one to finish first. Using a CAS instead of XADD |
1098 | | * should make the operation converge slightly faster. Returns non-zero on |
1099 | | * success otherwise 0. |
1100 | | */ |
1101 | | #define pl_try_j(lock) ( \ |
1102 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1103 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
1104 | | register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_RL_1; \ |
1105 | | register unsigned long __msk_r = PLOCK64_WL_ANY; \ |
1106 | | register unsigned long __pl_r; \ |
1107 | | register unsigned char __m; \ |
1108 | | pl_wait_unlock_long(__lk_r, __msk_r); \ |
1109 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ |
1110 | | /* wait for all other readers to leave */ \ |
1111 | | __m = 0; \ |
1112 | | while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \ |
1113 | | unsigned char __loops; \ |
1114 | | /* give up on other writers */ \ |
1115 | | if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \ |
1116 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
1117 | | __pl_r = 0; /* failed to get the lock */ \ |
1118 | | break; \ |
1119 | | } \ |
1120 | | __loops = __m + 1; \ |
1121 | | __m = (__m << 1) + 1; \ |
1122 | | do { \ |
1123 | | pl_cpu_relax(); \ |
1124 | | pl_cpu_relax(); \ |
1125 | | } while (--__loops); \ |
1126 | | __pl_r = pl_deref_long(__lk_r); \ |
1127 | | } \ |
1128 | | pl_barrier(); \ |
1129 | | __pl_r; /* return value, cannot be null on success */ \ |
1130 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1131 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
1132 | | register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_RL_1; \ |
1133 | | register unsigned int __msk_r = PLOCK32_WL_ANY; \ |
1134 | | register unsigned int __pl_r; \ |
1135 | | register unsigned char __m; \ |
1136 | | pl_wait_unlock_int(__lk_r, __msk_r); \ |
1137 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ |
1138 | | /* wait for all other readers to leave */ \ |
1139 | | __m = 0; \ |
1140 | | while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \ |
1141 | | unsigned char __loops; \ |
1142 | | /* but rollback on other writers */ \ |
1143 | | if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \ |
1144 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
1145 | | __pl_r = 0; /* failed to get the lock */ \ |
1146 | | break; \ |
1147 | | } \ |
1148 | | __loops = __m + 1; \ |
1149 | | __m = (__m << 1) + 1; \ |
1150 | | do { \ |
1151 | | pl_cpu_relax(); \ |
1152 | | pl_cpu_relax(); \ |
1153 | | } while (--__loops); \ |
1154 | | __pl_r = pl_deref_int(__lk_r); \ |
1155 | | } \ |
1156 | | pl_barrier(); \ |
1157 | | __pl_r; /* return value, cannot be null on success */ \ |
1158 | | }) : ({ \ |
1159 | | void __unsupported_argument_size_for_pl_try_j__(char *,int); \ |
1160 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1161 | | __unsupported_argument_size_for_pl_try_j__(__FILE__,__LINE__); \ |
1162 | | 0; \ |
1163 | | }) \ |
1164 | | ) |
1165 | | |
1166 | | /* request an exclusive write access via the J lock and wait for it. Only one |
1167 | | * thread may succeed in this operation. It will not conflict with other users |
1168 | | * and will first wait for all writers to leave, then for all readers to leave |
1169 | | * before starting. This offers a solution to obtain an exclusive access to a |
1170 | | * shared resource in the R/J/C/A model. A concurrent take_a() will wait for |
1171 | | * this one to finish first. Using a CAS instead of XADD should make the |
1172 | | * operation converge slightly faster. |
1173 | | */ |
1174 | | #define pl_take_j(lock) ( \ |
1175 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1176 | | __label__ __retry; \ |
1177 | | register unsigned long *__lk_r = (unsigned long *)(lock); \ |
1178 | | register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_RL_1; \ |
1179 | | register unsigned long __msk_r = PLOCK64_WL_ANY; \ |
1180 | | register unsigned long __pl_r; \ |
1181 | | register unsigned char __m; \ |
1182 | | __retry: \ |
1183 | | pl_wait_unlock_long(__lk_r, __msk_r); \ |
1184 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ |
1185 | | /* wait for all other readers to leave */ \ |
1186 | | __m = 0; \ |
1187 | | while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \ |
1188 | | unsigned char __loops; \ |
1189 | | /* but rollback on other writers */ \ |
1190 | | if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \ |
1191 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
1192 | | goto __retry; \ |
1193 | | } \ |
1194 | | __loops = __m + 1; \ |
1195 | | __m = (__m << 1) + 1; \ |
1196 | | do { \ |
1197 | | pl_cpu_relax(); \ |
1198 | | pl_cpu_relax(); \ |
1199 | | } while (--__loops); \ |
1200 | | __pl_r = pl_deref_long(__lk_r); \ |
1201 | | } \ |
1202 | | pl_barrier(); \ |
1203 | | 0; \ |
1204 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1205 | | __label__ __retry; \ |
1206 | | register unsigned int *__lk_r = (unsigned int *)(lock); \ |
1207 | | register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_RL_1; \ |
1208 | | register unsigned int __msk_r = PLOCK32_WL_ANY; \ |
1209 | | register unsigned int __pl_r; \ |
1210 | | register unsigned char __m; \ |
1211 | | __retry: \ |
1212 | | pl_wait_unlock_int(__lk_r, __msk_r); \ |
1213 | | __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ |
1214 | | /* wait for all other readers to leave */ \ |
1215 | | __m = 0; \ |
1216 | | while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \ |
1217 | | unsigned char __loops; \ |
1218 | | /* but rollback on other writers */ \ |
1219 | | if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \ |
1220 | | pl_sub_noret_lax(__lk_r, __set_r); \ |
1221 | | goto __retry; \ |
1222 | | } \ |
1223 | | __loops = __m + 1; \ |
1224 | | __m = (__m << 1) + 1; \ |
1225 | | do { \ |
1226 | | pl_cpu_relax(); \ |
1227 | | pl_cpu_relax(); \ |
1228 | | } while (--__loops); \ |
1229 | | __pl_r = pl_deref_int(__lk_r); \ |
1230 | | } \ |
1231 | | pl_barrier(); \ |
1232 | | 0; \ |
1233 | | }) : ({ \ |
1234 | | void __unsupported_argument_size_for_pl_take_j__(char *,int); \ |
1235 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1236 | | __unsupported_argument_size_for_pl_take_j__(__FILE__,__LINE__); \ |
1237 | | 0; \ |
1238 | | }) \ |
1239 | | ) |
1240 | | |
1241 | | /* drop the join (J) lock entirely */ |
1242 | | #define pl_drop_j(lock) ( \ |
1243 | | (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ |
1244 | | pl_barrier(); \ |
1245 | | pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_RL_1); \ |
1246 | | }) : (sizeof(*(lock)) == 4) ? ({ \ |
1247 | | pl_barrier(); \ |
1248 | | pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_RL_1); \ |
1249 | | }) : ({ \ |
1250 | | void __unsupported_argument_size_for_pl_drop_j__(char *,int); \ |
1251 | | if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ |
1252 | | __unsupported_argument_size_for_pl_drop_j__(__FILE__,__LINE__); \ |
1253 | | }) \ |
1254 | | ) |
1255 | | |
1256 | | /* |
1257 | | * The part below is for Low Overhead R/W locks (LORW). These ones are not |
1258 | | * upgradable and not necessarily fair but they try to be fast when uncontended |
1259 | | * and to limit the cost and perturbation during contention. Writers always |
1260 | | * have precedence over readers to preserve latency as much as possible. |
1261 | | * |
1262 | | * The principle is to offer a fast no-contention path and a limited total |
1263 | | * number of writes for the contended path. Since R/W locks are expected to be |
1264 | | * used in situations where there is a benefit in separating reads from writes, |
1265 | | * it is expected that reads are common (typ >= 50%) and that there is often at |
1266 | | * least one reader (otherwise a spinlock wouldn't be a problem). As such, a |
1267 | | * reader will try to pass instantly, detect contention and immediately retract |
1268 | | * and wait in the queue in case there is contention. A writer will first also |
1269 | | * try to pass instantly, and if it fails due to pending readers, it will mark |
1270 | | * that it's waiting so that readers stop entering. This will leave the writer |
1271 | | * waiting as close as possible to the point of being granted access. New |
1272 | | * writers will also notice this previous contention and will wait outside. |
1273 | | * This means that a successful access for a reader or a writer requires a |
1274 | | * single CAS, and a contended attempt will require one failed CAS and one |
1275 | | * successful XADD for a reader, or an optional OR and a N+1 CAS for the |
1276 | | * writer. |
1277 | | * |
1278 | | * A counter of shared users indicates the number of active readers, while a |
1279 | | * (single-bit) counter of exclusive writers indicates whether the lock is |
1280 | | * currently held for writes. This distinction also permits to use a single |
1281 | | * function to release the lock if desired, since the exclusive bit indicates |
1282 | | * the state of the caller of unlock(). The WRQ bit is cleared during the |
1283 | | * unlock. |
1284 | | * |
1285 | | * Layout: (32/64 bit): |
1286 | | * 31 2 1 0 |
1287 | | * +-----------+--------------+-----+-----+ |
1288 | | * | | SHR | WRQ | EXC | |
1289 | | * +-----------+--------------+-----+-----+ |
1290 | | * |
1291 | | * In order to minimize operations, the WRQ bit is held during EXC so that the |
1292 | | * write waiter that had to fight for EXC doesn't have to release WRQ during |
1293 | | * its operations, and will just drop it along with EXC upon unlock. |
1294 | | * |
1295 | | * This means the following costs: |
1296 | | * reader: |
1297 | | * success: 1 CAS |
1298 | | * failure: 1 CAS + 1 XADD |
1299 | | * unlock: 1 SUB |
1300 | | * writer: |
1301 | | * success: 1 RD + 1 CAS |
1302 | | * failure: 1 RD + 1 CAS + 0/1 OR + N CAS |
1303 | | * unlock: 1 AND |
1304 | | */ |
1305 | | |
1306 | | #define PLOCK_LORW_EXC_BIT ((sizeof(long) == 8) ? 0 : 0) |
1307 | | #define PLOCK_LORW_EXC_SIZE ((sizeof(long) == 8) ? 1 : 1) |
1308 | | #define PLOCK_LORW_EXC_BASE (1UL << PLOCK_LORW_EXC_BIT) |
1309 | | #define PLOCK_LORW_EXC_MASK (((1UL << PLOCK_LORW_EXC_SIZE) - 1UL) << PLOCK_LORW_EXC_BIT) |
1310 | | |
1311 | | #define PLOCK_LORW_WRQ_BIT ((sizeof(long) == 8) ? 1 : 1) |
1312 | | #define PLOCK_LORW_WRQ_SIZE ((sizeof(long) == 8) ? 1 : 1) |
1313 | | #define PLOCK_LORW_WRQ_BASE (1UL << PLOCK_LORW_WRQ_BIT) |
1314 | | #define PLOCK_LORW_WRQ_MASK (((1UL << PLOCK_LORW_WRQ_SIZE) - 1UL) << PLOCK_LORW_WRQ_BIT) |
1315 | | |
1316 | | #define PLOCK_LORW_SHR_BIT ((sizeof(long) == 8) ? 2 : 2) |
1317 | | #define PLOCK_LORW_SHR_SIZE ((sizeof(long) == 8) ? 30 : 30) |
1318 | | #define PLOCK_LORW_SHR_BASE (1UL << PLOCK_LORW_SHR_BIT) |
1319 | | #define PLOCK_LORW_SHR_MASK (((1UL << PLOCK_LORW_SHR_SIZE) - 1UL) << PLOCK_LORW_SHR_BIT) |
1320 | | |
1321 | | __attribute__((unused,always_inline,no_instrument_function)) |
1322 | | static inline void pl_lorw_rdlock(unsigned long *lock) |
1323 | 0 | { |
1324 | 0 | unsigned long lk = 0; |
1325 | 0 |
|
1326 | 0 | /* First, assume we're alone and try to get the read lock (fast path). |
1327 | 0 | * It often works because read locks are often used on low-contention |
1328 | 0 | * structs. |
1329 | 0 | */ |
1330 | 0 | lk = pl_cmpxchg(lock, 0, PLOCK_LORW_SHR_BASE); |
1331 | 0 | if (!lk) |
1332 | 0 | return; |
1333 | 0 |
|
1334 | 0 | /* so we were not alone, make sure there's no writer waiting for the |
1335 | 0 | * lock to be empty of visitors. |
1336 | 0 | */ |
1337 | 0 | if (lk & PLOCK_LORW_WRQ_MASK) |
1338 | 0 | #if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO) |
1339 | 0 | lk = __pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK); |
1340 | 0 | #else |
1341 | 0 | lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK); |
1342 | 0 | #endif |
1343 | 0 |
|
1344 | 0 | /* count us as visitor among others */ |
1345 | 0 | lk = pl_ldadd_acq(lock, PLOCK_LORW_SHR_BASE); |
1346 | 0 |
|
1347 | 0 | /* wait for end of exclusive access if any */ |
1348 | 0 | if (lk & PLOCK_LORW_EXC_MASK) |
1349 | 0 | #if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO) |
1350 | 0 | lk = __pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK); |
1351 | 0 | #else |
1352 | 0 | lk = pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK); |
1353 | 0 | #endif |
1354 | 0 | } |
1355 | | |
1356 | | |
1357 | | __attribute__((unused,always_inline,no_instrument_function)) |
1358 | | static inline void pl_lorw_wrlock(unsigned long *lock) |
1359 | 0 | { |
1360 | 0 | unsigned long lk = 0; |
1361 | 0 | unsigned long old = 0; |
1362 | 0 |
|
1363 | 0 | /* first, make sure another writer is not already blocked waiting for |
1364 | 0 | * readers to leave. Note that tests have shown that it can be even |
1365 | 0 | * faster to avoid the first check and to unconditionally wait. |
1366 | 0 | */ |
1367 | 0 | lk = pl_deref_long(lock); |
1368 | 0 | if (__builtin_expect(lk & PLOCK_LORW_WRQ_MASK, 1)) |
1369 | 0 | #if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO) |
1370 | 0 | lk = __pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK); |
1371 | 0 | #else |
1372 | 0 | lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK); |
1373 | 0 | #endif |
1374 | 0 |
|
1375 | 0 | do { |
1376 | 0 | /* let's check for the two sources of contention at once */ |
1377 | 0 |
|
1378 | 0 | if (__builtin_expect(lk & (PLOCK_LORW_SHR_MASK | PLOCK_LORW_EXC_MASK), 1)) { |
1379 | 0 | /* check if there are still readers coming. If so, close the door and |
1380 | 0 | * wait for them to leave. |
1381 | 0 | */ |
1382 | 0 | if (lk & PLOCK_LORW_SHR_MASK) { |
1383 | 0 | /* note below, an OR is significantly cheaper than BTS or XADD */ |
1384 | 0 | if (!(lk & PLOCK_LORW_WRQ_MASK)) |
1385 | 0 | pl_or_noret(lock, PLOCK_LORW_WRQ_BASE); |
1386 | 0 | #if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO) |
1387 | 0 | lk = __pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK); |
1388 | 0 | #else |
1389 | 0 | lk = pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK); |
1390 | 0 | #endif |
1391 | 0 | } |
1392 | 0 |
|
1393 | 0 | /* And also wait for a previous writer to finish. */ |
1394 | 0 | if (lk & PLOCK_LORW_EXC_MASK) |
1395 | 0 | #if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO) |
1396 | 0 | lk = __pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK); |
1397 | 0 | #else |
1398 | 0 | lk = pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK); |
1399 | 0 | #endif |
1400 | 0 | } |
1401 | 0 |
|
1402 | 0 | /* A fresh new reader may appear right now if there were none |
1403 | 0 | * above and we didn't close the door. |
1404 | 0 | */ |
1405 | 0 | old = lk & ~PLOCK_LORW_SHR_MASK & ~PLOCK_LORW_EXC_MASK; |
1406 | 0 | lk = pl_cmpxchg(lock, old, old | PLOCK_LORW_EXC_BASE); |
1407 | 0 | } while (lk != old); |
1408 | 0 |
|
1409 | 0 | /* done, not waiting anymore, the WRQ bit if any, will be dropped by the |
1410 | 0 | * unlock |
1411 | 0 | */ |
1412 | 0 | } |
1413 | | |
1414 | | |
1415 | | __attribute__((unused,always_inline,no_instrument_function)) |
1416 | | static inline void pl_lorw_rdunlock(unsigned long *lock) |
1417 | 0 | { |
1418 | 0 | pl_sub_noret_rel(lock, PLOCK_LORW_SHR_BASE); |
1419 | 0 | } |
1420 | | |
1421 | | __attribute__((unused,always_inline,no_instrument_function)) |
1422 | | static inline void pl_lorw_wrunlock(unsigned long *lock) |
1423 | 0 | { |
1424 | 0 | pl_and_noret_rel(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK)); |
1425 | 0 | } |
1426 | | |
1427 | | __attribute__((unused,always_inline,no_instrument_function)) |
1428 | | static inline void pl_lorw_unlock(unsigned long *lock) |
1429 | 0 | { |
1430 | 0 | if (pl_deref_long(lock) & PLOCK_LORW_EXC_MASK) |
1431 | 0 | pl_lorw_wrunlock(lock); |
1432 | 0 | else |
1433 | 0 | pl_lorw_rdunlock(lock); |
1434 | 0 | } |
1435 | | |
1436 | | #endif /* PL_PLOCK_H */ |