Coverage Report

Created: 2025-10-10 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/haproxy/include/import/plock.h
Line
Count
Source
1
/* plock - progressive locks
2
 *
3
 * Copyright (C) 2012-2017 Willy Tarreau <w@1wt.eu>
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining
6
 * a copy of this software and associated documentation files (the
7
 * "Software"), to deal in the Software without restriction, including
8
 * without limitation the rights to use, copy, modify, merge, publish,
9
 * distribute, sublicense, and/or sell copies of the Software, and to
10
 * permit persons to whom the Software is furnished to do so, subject to
11
 * the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be
14
 * included in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23
 * OTHER DEALINGS IN THE SOFTWARE.
24
 */
25
26
#ifndef PL_PLOCK_H
27
#define PL_PLOCK_H
28
29
#include "atomic-ops.h"
30
#ifdef _POSIX_PRIORITY_SCHEDULING
31
#include <sched.h>
32
#endif
33
34
/* 64 bit */
35
#define PLOCK64_RL_1   0x0000000000000004ULL
36
#define PLOCK64_RL_2PL 0x00000000FFFFFFF8ULL
37
#define PLOCK64_RL_ANY 0x00000000FFFFFFFCULL
38
#define PLOCK64_SL_1   0x0000000100000000ULL
39
#define PLOCK64_SL_ANY 0x0000000300000000ULL
40
#define PLOCK64_WL_1   0x0000000400000000ULL
41
#define PLOCK64_WL_2PL 0xFFFFFFF800000000ULL
42
#define PLOCK64_WL_ANY 0xFFFFFFFC00000000ULL
43
44
/* 32 bit */
45
#define PLOCK32_RL_1   0x00000004
46
#define PLOCK32_RL_2PL 0x0000FFF8
47
#define PLOCK32_RL_ANY 0x0000FFFC
48
#define PLOCK32_SL_1   0x00010000
49
#define PLOCK32_SL_ANY 0x00030000
50
#define PLOCK32_WL_1   0x00040000
51
#define PLOCK32_WL_2PL 0xFFF80000
52
#define PLOCK32_WL_ANY 0xFFFC0000
53
54
/* dereferences <*p> as unsigned long without causing aliasing issues */
55
0
#define pl_deref_long(p) ({ volatile unsigned long *__pl_l = (unsigned long *)(p); *__pl_l; })
56
57
/* dereferences <*p> as unsigned int without causing aliasing issues */
58
#define pl_deref_int(p) ({ volatile unsigned int *__pl_i = (unsigned int *)(p); *__pl_i; })
59
60
/* This function waits for <lock> to release all bits covered by <mask>, and
61
 * enforces an exponential backoff using CPU pauses to limit the pollution to
62
 * the other threads' caches. The progression follows (1.5^N)-1, limited to
63
 * 16384 iterations, which is way sufficient even for very large numbers of
64
 * threads. It's possible to disable exponential backoff (EBO) for debugging
65
 * purposes by setting PLOCK_DISABLE_EBO, in which case the function will be
66
 * replaced with a simpler macro. This may for example be useful to more
67
 * easily track callers' CPU usage. The macro was not designed to be used
68
 * outside of the functions defined here.
69
 */
70
#if defined(PLOCK_DISABLE_EBO)
71
#define pl_wait_unlock_long(lock, mask)           \
72
  ({                                        \
73
    unsigned long _r;                 \
74
    do {                              \
75
      pl_cpu_relax();           \
76
      _r = pl_deref_long(lock); \
77
    } while (_r & mask);              \
78
    _r; /* return value */            \
79
  })
80
#else /* not PLOCK_DISABLE_EBO */
81
__attribute__((unused,always_inline,no_instrument_function)) inline
82
static unsigned long __pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask)
83
0
{
84
0
  unsigned long ret;
85
0
  unsigned int m = 0;
86
0
87
0
  do {
88
0
    unsigned int loops = m;
89
0
90
0
#ifdef _POSIX_PRIORITY_SCHEDULING
91
0
    if (loops >= 65536) {
92
0
      sched_yield();
93
0
      loops -= 32768;
94
0
    }
95
0
#endif
96
0
    for (; loops >= 90; loops --)
97
0
      pl_cpu_relax();
98
0
99
0
    for (; loops >= 1; loops--)
100
0
      pl_barrier();
101
0
102
0
    ret = pl_load(lock);
103
0
    if (__builtin_expect(ret & mask, 0) == 0)
104
0
      break;
105
0
106
0
    /* the below produces an exponential growth with loops to lower
107
0
     * values and still growing. This allows competing threads to
108
0
     * wait different times once the threshold is reached.
109
0
     */
110
0
    m = ((m + (m >> 2)) + 1) & 0x1ffff;
111
0
  } while (1);
112
0
113
0
  return ret;
114
0
}
115
116
# if defined(PLOCK_INLINE_EBO)
117
__attribute__((unused,always_inline,no_instrument_function)) inline
118
# else
119
__attribute__((unused,noinline,no_instrument_function))
120
# endif
121
static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask)
122
0
{
123
0
  return __pl_wait_unlock_long(lock, mask);
124
0
}
125
#endif /* PLOCK_DISABLE_EBO */
126
127
/* This function waits for <lock> to release all bits covered by <mask>, and
128
 * enforces an exponential backoff using CPU pauses to limit the pollution to
129
 * the other threads' caches. The progression follows (2^N)-1, limited to 255
130
 * iterations, which is way sufficient even for very large numbers of threads.
131
 * The function slightly benefits from size optimization under gcc, but Clang
132
 * cannot do it, so it's not done here, as it doesn't make a big difference.
133
 * It is possible to disable exponential backoff (EBO) for debugging purposes
134
 * by setting PLOCK_DISABLE_EBO, in which case the function will be replaced
135
 * with a simpler macro. This may for example be useful to more easily track
136
 * callers' CPU usage. The macro was not designed to be used outside of the
137
 * functions defined here.
138
 */
139
#if defined(PLOCK_DISABLE_EBO)
140
#define pl_wait_unlock_int(lock, mask)            \
141
  ({                                        \
142
    unsigned int _r;                  \
143
    do {                              \
144
      pl_cpu_relax();           \
145
      _r = pl_deref_int(lock);  \
146
    } while (_r & mask);              \
147
    _r; /* return value */            \
148
  })
149
#else
150
__attribute__((unused,always_inline,no_instrument_function)) inline
151
static unsigned int __pl_wait_unlock_int(const unsigned int *lock, const unsigned int mask)
152
0
{
153
0
  unsigned int ret;
154
0
  unsigned int m = 0;
155
0
156
0
  do {
157
0
    unsigned int loops = m;
158
0
159
0
#ifdef _POSIX_PRIORITY_SCHEDULING
160
0
    if (loops >= 65536) {
161
0
      sched_yield();
162
0
      loops -= 32768;
163
0
    }
164
0
#endif
165
0
    for (; loops >= 200; loops -= 10)
166
0
      pl_cpu_relax();
167
0
168
0
    for (; loops >= 1; loops--)
169
0
      pl_barrier();
170
0
171
0
    ret = pl_deref_int(lock);
172
0
    if (__builtin_expect(ret & mask, 0) == 0)
173
0
      break;
174
0
175
0
    /* the below produces an exponential growth with loops to lower
176
0
     * values and still growing. This allows competing threads to
177
0
     * wait different times once the threshold is reached.
178
0
     */
179
0
    m = ((m + (m >> 2)) + 1) & 0x1ffff;
180
0
  } while (1);
181
0
182
0
  return ret;
183
0
}
184
185
# if defined(PLOCK_INLINE_EBO)
186
__attribute__((unused,always_inline,no_instrument_function)) inline
187
# else
188
__attribute__((unused,noinline,no_instrument_function))
189
# endif
190
static unsigned int pl_wait_unlock_int(const unsigned int *lock, const unsigned int mask)
191
0
{
192
0
  return __pl_wait_unlock_int(lock, mask);
193
0
}
194
#endif /* PLOCK_DISABLE_EBO */
195
196
/* This function waits for <lock> to change from value <prev> and returns the
197
 * new value. It enforces an exponential backoff using CPU pauses to limit the
198
 * pollution to the other threads' caches. The progression follows (2^N)-1,
199
 * limited to 255 iterations, which is way sufficient even for very large
200
 * numbers of threads. It is designed to be called after a first test which
201
 * retrieves the previous value, so it starts by waiting. The function slightly
202
 * benefits from size optimization under gcc, but Clang cannot do it, so it's
203
 * not done here, as it doesn't make a big difference.
204
 */
205
__attribute__((unused,noinline,no_instrument_function))
206
static unsigned long pl_wait_new_long(const unsigned long *lock, const unsigned long prev)
207
0
{
208
0
  unsigned char m = 0;
209
0
  unsigned long curr;
210
211
0
  do {
212
0
    unsigned char loops = m + 1;
213
0
    m = (m << 1) + 1;
214
0
    do {
215
0
      pl_cpu_relax();
216
0
    } while (__builtin_expect(--loops, 0));
217
0
    curr = pl_deref_long(lock);
218
0
  } while (__builtin_expect(curr == prev, 0));
219
0
  return curr;
220
0
}
221
222
/* This function waits for <lock> to change from value <prev> and returns the
223
 * new value. It enforces an exponential backoff using CPU pauses to limit the
224
 * pollution to the other threads' caches. The progression follows (2^N)-1,
225
 * limited to 255 iterations, which is way sufficient even for very large
226
 * numbers of threads. It is designed to be called after a first test which
227
 * retrieves the previous value, so it starts by waiting. The function slightly
228
 * benefits from size optimization under gcc, but Clang cannot do it, so it's
229
 * not done here, as it doesn't make a big difference.
230
 */
231
__attribute__((unused,noinline,no_instrument_function))
232
static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int prev)
233
0
{
234
0
  unsigned char m = 0;
235
0
  unsigned int curr;
236
0
237
0
  do {
238
0
    unsigned char loops = m + 1;
239
0
    m = (m << 1) + 1;
240
0
    do {
241
0
      pl_cpu_relax();
242
0
    } while (__builtin_expect(--loops, 0));
243
0
    curr = pl_deref_int(lock);
244
0
  } while (__builtin_expect(curr == prev, 0));
245
0
  return curr;
246
0
}
247
248
/* request shared read access (R), return non-zero on success, otherwise 0 */
249
#define pl_try_r(lock) (                                                                       \
250
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
251
    register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_WL_ANY;          \
252
    pl_barrier();                                                                  \
253
    if (!__builtin_expect(__pl_r, 0)) {                                            \
254
      __pl_r = pl_ldadd_acq((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY;          \
255
      if (__builtin_expect(__pl_r, 0))                                       \
256
        pl_sub_noret((lock), PLOCK64_RL_1);                            \
257
    }                                                                              \
258
    !__pl_r; /* return value */                                                    \
259
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
260
    register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_WL_ANY;            \
261
    pl_barrier();                                                                  \
262
    if (!__builtin_expect(__pl_r, 0)) {                                            \
263
      __pl_r = pl_ldadd_acq((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY;          \
264
      if (__builtin_expect(__pl_r, 0))                                       \
265
        pl_sub_noret((lock), PLOCK32_RL_1);                            \
266
    }                                                                              \
267
    !__pl_r; /* return value */                                                    \
268
  }) : ({                                                                                \
269
    void __unsupported_argument_size_for_pl_try_r__(char *,int);                   \
270
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
271
      __unsupported_argument_size_for_pl_try_r__(__FILE__,__LINE__);         \
272
    0;                                                                             \
273
  })                                                                                     \
274
)
275
276
/* request shared read access (R) and wait for it. In order not to disturb a W
277
 * lock waiting for all readers to leave, we first check if a W lock is held
278
 * before trying to claim the R lock.
279
 */
280
#define pl_take_r(lock)                                                                        \
281
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
282
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
283
    register unsigned long __set_r = PLOCK64_RL_1;                                 \
284
    register unsigned long __msk_r = PLOCK64_WL_ANY;                               \
285
    register unsigned long __old_r = pl_cmpxchg(__lk_r, 0, __set_r);               \
286
    if (__old_r) {                                                                 \
287
      while (1) {                                                            \
288
        if (__old_r & __msk_r)                                         \
289
          pl_wait_unlock_long(__lk_r, __msk_r);                  \
290
        if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r))                \
291
          break;                                                 \
292
        __old_r = pl_sub_lax(__lk_r, __set_r);                         \
293
      }                                                                      \
294
    }                                                                              \
295
    pl_barrier();                                                                  \
296
    0;                                                                             \
297
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
298
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
299
    register unsigned int __set_r = PLOCK32_RL_1;                                  \
300
    register unsigned int __msk_r = PLOCK32_WL_ANY;                                \
301
    register unsigned int __old_r = pl_cmpxchg(__lk_r, 0, __set_r);                \
302
    if (__old_r) {                                                                 \
303
      while (1) {                                                            \
304
        if (__old_r & __msk_r)                                         \
305
          pl_wait_unlock_int(__lk_r, __msk_r);                   \
306
        if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r))                \
307
          break;                                                 \
308
        __old_r = pl_sub_lax(__lk_r, __set_r);                         \
309
      }                                                                      \
310
    }                                                                              \
311
    pl_barrier();                                                                  \
312
    0;                                                                             \
313
  }) : ({                                                                                \
314
    void __unsupported_argument_size_for_pl_take_r__(char *,int);                  \
315
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
316
      __unsupported_argument_size_for_pl_take_r__(__FILE__,__LINE__);        \
317
    0;                                                                             \
318
  })
319
320
/* release the read access (R) lock */
321
#define pl_drop_r(lock) (                                                                      \
322
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
323
    pl_barrier();                                                                  \
324
    pl_sub_noret_rel(lock, PLOCK64_RL_1);                                          \
325
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
326
    pl_barrier();                                                                  \
327
    pl_sub_noret_rel(lock, PLOCK32_RL_1);                                          \
328
  }) : ({                                                                                \
329
    void __unsupported_argument_size_for_pl_drop_r__(char *,int);                  \
330
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
331
      __unsupported_argument_size_for_pl_drop_r__(__FILE__,__LINE__);        \
332
  })                                                                                     \
333
)
334
335
/* request a seek access (S), return non-zero on success, otherwise 0 */
336
#define pl_try_s(lock) (                                                                       \
337
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
338
    register unsigned long __pl_r = pl_deref_long(lock);                           \
339
    pl_barrier();                                                                  \
340
    if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) {        \
341
      __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1 | PLOCK64_RL_1) &           \
342
            (PLOCK64_WL_ANY | PLOCK64_SL_ANY);                               \
343
      if (__builtin_expect(__pl_r, 0))                                       \
344
        pl_sub_noret_lax((lock), PLOCK64_SL_1 | PLOCK64_RL_1);         \
345
    }                                                                              \
346
    !__pl_r; /* return value */                                                    \
347
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
348
    register unsigned int __pl_r = pl_deref_int(lock);                             \
349
    pl_barrier();                                                                  \
350
    if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) {        \
351
      __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1 | PLOCK32_RL_1) &           \
352
            (PLOCK32_WL_ANY | PLOCK32_SL_ANY);                               \
353
      if (__builtin_expect(__pl_r, 0))                                       \
354
        pl_sub_noret_lax((lock), PLOCK32_SL_1 | PLOCK32_RL_1);         \
355
    }                                                                              \
356
    !__pl_r; /* return value */                                                    \
357
  }) : ({                                                                                \
358
    void __unsupported_argument_size_for_pl_try_s__(char *,int);                   \
359
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
360
      __unsupported_argument_size_for_pl_try_s__(__FILE__,__LINE__);         \
361
    0;                                                                             \
362
  })                                                                                     \
363
)
364
365
/* request a seek access (S) and wait for it. The lock is immediately claimed,
366
 * and only upon failure an exponential backoff is used. S locks rarely compete
367
 * with W locks so S will generally not disturb W. As the S lock may be used as
368
 * a spinlock, it's important to grab it as fast as possible.
369
 */
370
#define pl_take_s(lock)                                                                        \
371
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
372
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
373
    register unsigned long __set_r = PLOCK64_SL_1 | PLOCK64_RL_1;                  \
374
    register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY;              \
375
    while (1) {                                                                    \
376
      if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0))     \
377
        break;                                                         \
378
      pl_sub_noret_lax(__lk_r, __set_r);                                     \
379
      pl_wait_unlock_long(__lk_r, __msk_r);                                  \
380
    }                                                                              \
381
    pl_barrier();                                                                  \
382
    0;                                                                             \
383
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
384
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
385
    register unsigned int __set_r = PLOCK32_SL_1 | PLOCK32_RL_1;                   \
386
    register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY;               \
387
    while (1) {                                                                    \
388
      if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0))     \
389
        break;                                                         \
390
      pl_sub_noret_lax(__lk_r, __set_r);                                     \
391
      pl_wait_unlock_int(__lk_r, __msk_r);                                   \
392
    }                                                                              \
393
    pl_barrier();                                                                  \
394
    0;                                                                             \
395
  }) : ({                                                                                \
396
    void __unsupported_argument_size_for_pl_take_s__(char *,int);                  \
397
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
398
      __unsupported_argument_size_for_pl_take_s__(__FILE__,__LINE__);        \
399
    0;                                                                             \
400
  })
401
402
/* release the seek access (S) lock */
403
#define pl_drop_s(lock) (                                                                      \
404
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
405
    pl_barrier();                                                                  \
406
    pl_sub_noret_rel(lock, PLOCK64_SL_1 + PLOCK64_RL_1);                           \
407
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
408
    pl_barrier();                                                                  \
409
    pl_sub_noret_rel(lock, PLOCK32_SL_1 + PLOCK32_RL_1);                           \
410
  }) : ({                                                                                \
411
    void __unsupported_argument_size_for_pl_drop_s__(char *,int);                  \
412
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
413
      __unsupported_argument_size_for_pl_drop_s__(__FILE__,__LINE__);        \
414
  })                                                                                     \
415
)
416
417
/* drop the S lock and go back to the R lock */
418
#define pl_stor(lock) (                                                                        \
419
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
420
    pl_barrier();                                                                  \
421
    pl_sub_noret(lock, PLOCK64_SL_1);                                              \
422
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
423
    pl_barrier();                                                                  \
424
    pl_sub_noret(lock, PLOCK32_SL_1);                                              \
425
  }) : ({                                                                                \
426
    void __unsupported_argument_size_for_pl_stor__(char *,int);                    \
427
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
428
      __unsupported_argument_size_for_pl_stor__(__FILE__,__LINE__);          \
429
  })                                                                                     \
430
)
431
432
/* take the W lock under the S lock */
433
#define pl_stow(lock) (                                                                        \
434
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
435
    register unsigned long __pl_r = pl_ldadd((lock), PLOCK64_WL_1);                \
436
    if (__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1))                                 \
437
      __pl_r = pl_wait_unlock_long((const unsigned long*)lock, (PLOCK64_RL_ANY & ~PLOCK64_RL_1));  \
438
    pl_barrier();                                                                  \
439
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
440
    register unsigned int __pl_r = pl_ldadd((lock), PLOCK32_WL_1);                 \
441
    if (__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1))                                 \
442
      __pl_r = pl_wait_unlock_int((const unsigned int*)lock, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)); \
443
    pl_barrier();                                                                  \
444
  }) : ({                                                                                \
445
    void __unsupported_argument_size_for_pl_stow__(char *,int);                    \
446
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
447
      __unsupported_argument_size_for_pl_stow__(__FILE__,__LINE__);          \
448
  })                                                                                     \
449
)
450
451
/* drop the W lock and go back to the S lock */
452
#define pl_wtos(lock) (                                                                        \
453
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
454
    pl_barrier();                                                                  \
455
    pl_sub_noret(lock, PLOCK64_WL_1);                                              \
456
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
457
    pl_barrier();                                                                  \
458
    pl_sub_noret(lock, PLOCK32_WL_1);                                              \
459
  }) : ({                                                                                \
460
    void __unsupported_argument_size_for_pl_wtos__(char *,int);                    \
461
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
462
      __unsupported_argument_size_for_pl_wtos__(__FILE__,__LINE__);          \
463
  })                                                                                     \
464
)
465
466
/* drop the W lock and go back to the R lock */
467
#define pl_wtor(lock) (                                                                        \
468
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
469
    pl_barrier();                                                                  \
470
    pl_sub_noret(lock, PLOCK64_WL_1 | PLOCK64_SL_1);                               \
471
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
472
    pl_barrier();                                                                  \
473
    pl_sub_noret(lock, PLOCK32_WL_1 | PLOCK32_SL_1);                               \
474
  }) : ({                                                                                \
475
    void __unsupported_argument_size_for_pl_wtor__(char *,int);                    \
476
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
477
      __unsupported_argument_size_for_pl_wtor__(__FILE__,__LINE__);          \
478
  })                                                                                     \
479
)
480
481
/* request a write access (W), return non-zero on success, otherwise 0.
482
 *
483
 * Below there is something important : by taking both W and S, we will cause
484
 * an overflow of W at 4/5 of the maximum value that can be stored into W due
485
 * to the fact that S is 2 bits, so we're effectively adding 5 to the word
486
 * composed by W:S. But for all words multiple of 4 bits, the maximum value is
487
 * multiple of 15 thus of 5. So the largest value we can store with all bits
488
 * set to one will be met by adding 5, and then adding 5 again will place value
489
 * 1 in W and value 0 in S, so we never leave W with 0. Also, even upon such an
490
 * overflow, there's no risk to confuse it with an atomic lock because R is not
491
 * null since it will not have overflown. For 32-bit locks, this situation
492
 * happens when exactly 13108 threads try to grab the lock at once, W=1, S=0
493
 * and R=13108. For 64-bit locks, it happens at 858993460 concurrent writers
494
 * where W=1, S=0 and R=858993460.
495
 */
496
#define pl_try_w(lock) (                                                                       \
497
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
498
    register unsigned long __pl_r = pl_deref_long(lock);                           \
499
    pl_barrier();                                                                  \
500
    if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) {        \
501
      __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\
502
      if (__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
503
        /* a writer, seeker or atomic is present, let's leave */       \
504
        pl_sub_noret_lax((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\
505
        __pl_r &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */\
506
      } else {                                                               \
507
        /* wait for all other readers to leave */                      \
508
        while (__pl_r)                                                 \
509
          __pl_r = pl_deref_long(lock) -                         \
510
            (PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);  \
511
      }                                                                      \
512
    }                                                                              \
513
    !__pl_r; /* return value */                                                    \
514
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
515
    register unsigned int __pl_r = pl_deref_int(lock);                             \
516
    pl_barrier();                                                                  \
517
    if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) {        \
518
      __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\
519
      if (__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
520
        /* a writer, seeker or atomic is present, let's leave */       \
521
        pl_sub_noret_lax((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\
522
        __pl_r &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */\
523
      } else {                                                               \
524
        /* wait for all other readers to leave */                      \
525
        while (__pl_r)                                                 \
526
          __pl_r = pl_deref_int(lock) -                          \
527
            (PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);  \
528
      }                                                                      \
529
    }                                                                              \
530
    !__pl_r; /* return value */                                                    \
531
  }) : ({                                                                                \
532
    void __unsupported_argument_size_for_pl_try_w__(char *,int);                   \
533
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
534
      __unsupported_argument_size_for_pl_try_w__(__FILE__,__LINE__);         \
535
    0;                                                                             \
536
  })                                                                                     \
537
)
538
539
/* request a write access (W) and wait for it. The lock is immediately claimed,
540
 * and only upon failure an exponential backoff is used.
541
 */
542
#define pl_take_w(lock)                                                                        \
543
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
544
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
545
    register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1;   \
546
    register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY;              \
547
    register unsigned long __pl_r;                                                 \
548
    while (1) {                                                                    \
549
      __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
550
      if (!__builtin_expect(__pl_r & __msk_r, 0))                            \
551
        break;                                                         \
552
      if (!__builtin_expect(__pl_r & PLOCK64_WL_ANY, 0)) {                   \
553
        /* S only: let it finish but impose ourselves */               \
554
        pl_sub_noret_lax(__lk_r, PLOCK64_RL_1);                        \
555
        __pl_r = pl_wait_unlock_long(__lk_r, PLOCK64_RL_ANY);          \
556
        __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_RL_1);                   \
557
        break;                                                         \
558
      }                                                                      \
559
      pl_sub_noret_lax(__lk_r, __set_r);                                     \
560
      __pl_r = pl_wait_unlock_long(__lk_r, __msk_r);                         \
561
    }                                                                              \
562
    /* wait for all other readers to leave */                                      \
563
    if (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0))            \
564
      __pl_r = pl_wait_unlock_long(__lk_r, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) - __set_r;  \
565
    pl_barrier();                                                                  \
566
    0;                                                                             \
567
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
568
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
569
    register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1;    \
570
    register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY;               \
571
    register unsigned int __pl_r;                                                  \
572
    while (1) {                                                                    \
573
      __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
574
      if (!__builtin_expect(__pl_r & __msk_r, 0))                            \
575
        break;                                                         \
576
      if (!__builtin_expect(__pl_r & PLOCK32_WL_ANY, 0)) {                   \
577
        /* S only: let it finish but impose ourselves */               \
578
        pl_sub_noret_lax(__lk_r, PLOCK32_RL_1);                        \
579
        __pl_r = pl_wait_unlock_int(__lk_r, PLOCK32_RL_ANY);          \
580
        __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_RL_1);                   \
581
        break;                                                         \
582
      }                                                                      \
583
      pl_sub_noret_lax(__lk_r, __set_r);                                     \
584
      __pl_r = pl_wait_unlock_int(__lk_r, __msk_r);                          \
585
    }                                                                              \
586
    /* wait for all other readers to leave */                                      \
587
    if (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0))            \
588
      __pl_r = pl_wait_unlock_int(__lk_r, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) - __set_r;  \
589
    pl_barrier();                                                                  \
590
    0;                                                                             \
591
  }) : ({                                                                                \
592
    void __unsupported_argument_size_for_pl_take_w__(char *,int);                  \
593
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
594
      __unsupported_argument_size_for_pl_take_w__(__FILE__,__LINE__);        \
595
    0;                                                                             \
596
  })
597
598
/* drop the write (W) lock entirely */
599
#define pl_drop_w(lock) (                                                                      \
600
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
601
    pl_barrier();                                                                  \
602
    pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);            \
603
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
604
    pl_barrier();                                                                  \
605
    pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);            \
606
  }) : ({                                                                                \
607
    void __unsupported_argument_size_for_pl_drop_w__(char *,int);                  \
608
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
609
      __unsupported_argument_size_for_pl_drop_w__(__FILE__,__LINE__);        \
610
  })                                                                                     \
611
)
612
613
/* Try to upgrade from R to S, return non-zero on success, otherwise 0.
614
 * This lock will fail if S or W are already held. In case of failure to grab
615
 * the lock, it MUST NOT be retried without first dropping R, or it may never
616
 * complete due to S waiting for R to leave before upgrading to W.
617
 */
618
#define pl_try_rtos(lock) (                                                                    \
619
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
620
    register unsigned long __pl_r;                                                 \
621
    __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1) & (PLOCK64_WL_ANY | PLOCK64_SL_ANY);\
622
    if (__builtin_expect(__pl_r, 0))                                               \
623
      pl_sub_noret_lax((lock), PLOCK64_SL_1);                                \
624
    !__pl_r; /* return value */                                                    \
625
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
626
    register unsigned int __pl_r;                                                  \
627
    __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1) & (PLOCK32_WL_ANY | PLOCK32_SL_ANY);\
628
    if (__builtin_expect(__pl_r, 0))                                               \
629
      pl_sub_noret_lax((lock), PLOCK32_SL_1);                                \
630
    !__pl_r; /* return value */                                                    \
631
  }) : ({                                                                                \
632
    void __unsupported_argument_size_for_pl_try_rtos__(char *,int);                \
633
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
634
      __unsupported_argument_size_for_pl_try_rtos__(__FILE__,__LINE__);      \
635
    0;                                                                             \
636
  })                                                                                     \
637
)
638
639
640
/* Try to upgrade from R to W, return non-zero on success, otherwise 0.
641
 * This lock will fail if S or W are already held. In case of failure to grab
642
 * the lock, it MUST NOT be retried without first dropping R, or it may never
643
 * complete due to S waiting for R to leave before upgrading to W. It waits for
644
 * the last readers to leave.
645
 */
646
#define pl_try_rtow(lock) (                                                                    \
647
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
648
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
649
    register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_SL_1;                  \
650
    register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY;              \
651
    register unsigned long __pl_r;                                                 \
652
    pl_barrier();                                                                  \
653
    while (1) {                                                                    \
654
      __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
655
      if (__builtin_expect(__pl_r & __msk_r, 0)) {                           \
656
        if (pl_ldadd_lax(__lk_r, - __set_r))                           \
657
          break; /* the caller needs to drop the lock now */     \
658
        continue;  /* lock was released, try again */                  \
659
      }                                                                      \
660
      /* ok we're the only writer, wait for readers to leave */              \
661
      while (__builtin_expect(__pl_r, 0))                                    \
662
        __pl_r = pl_deref_long(__lk_r) - (PLOCK64_WL_1|PLOCK64_SL_1|PLOCK64_RL_1); \
663
      /* now return with __pl_r = 0 */                                       \
664
      break;                                                                 \
665
    }                                                                              \
666
    !__pl_r; /* return value */                                                    \
667
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
668
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
669
    register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_SL_1;                   \
670
    register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY;               \
671
    register unsigned int __pl_r;                                                  \
672
    pl_barrier();                                                                  \
673
    while (1) {                                                                    \
674
      __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
675
      if (__builtin_expect(__pl_r & __msk_r, 0)) {                           \
676
        if (pl_ldadd_lax(__lk_r, - __set_r))                           \
677
          break; /* the caller needs to drop the lock now */     \
678
        continue;  /* lock was released, try again */                  \
679
      }                                                                      \
680
      /* ok we're the only writer, wait for readers to leave */              \
681
      while (__builtin_expect(__pl_r, 0))                                    \
682
        __pl_r = pl_deref_int(__lk_r) - (PLOCK32_WL_1|PLOCK32_SL_1|PLOCK32_RL_1); \
683
      /* now return with __pl_r = 0 */                                       \
684
      break;                                                                 \
685
    }                                                                              \
686
    !__pl_r; /* return value */                                                    \
687
  }) : ({                                                                                \
688
    void __unsupported_argument_size_for_pl_try_rtow__(char *,int);                \
689
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
690
      __unsupported_argument_size_for_pl_try_rtow__(__FILE__,__LINE__);      \
691
    0;                                                                             \
692
  })                                                                                     \
693
)
694
695
696
/* request atomic write access (A), return non-zero on success, otherwise 0.
697
 * It's a bit tricky as we only use the W bits for this and want to distinguish
698
 * between other atomic users and regular lock users. We have to give up if an
699
 * S lock appears. It's possible that such a lock stays hidden in the W bits
700
 * after an overflow, but in this case R is still held, ensuring we stay in the
701
 * loop until we discover the conflict. The lock only return successfully if all
702
 * readers are gone (or converted to A).
703
 */
704
#define pl_try_a(lock) (                                                                       \
705
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
706
    register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY;          \
707
    pl_barrier();                                                                  \
708
    if (!__builtin_expect(__pl_r, 0)) {                                            \
709
      __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1);                           \
710
      while (1) {                                                            \
711
        if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) {            \
712
          pl_sub_noret_lax((lock), PLOCK64_WL_1);                \
713
          break;  /* return !__pl_r */                           \
714
        }                                                              \
715
        __pl_r &= PLOCK64_RL_ANY;                                      \
716
        if (!__builtin_expect(__pl_r, 0))                              \
717
          break;  /* return !__pl_r */                           \
718
        __pl_r = pl_deref_long(lock);                                  \
719
      }                                                                      \
720
    }                                                                              \
721
    !__pl_r; /* return value */                                                    \
722
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
723
    register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY;            \
724
    pl_barrier();                                                                  \
725
    if (!__builtin_expect(__pl_r, 0)) {                                            \
726
      __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1);                           \
727
      while (1) {                                                            \
728
        if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) {            \
729
          pl_sub_noret_lax((lock), PLOCK32_WL_1);                \
730
          break;  /* return !__pl_r */                           \
731
        }                                                              \
732
        __pl_r &= PLOCK32_RL_ANY;                                      \
733
        if (!__builtin_expect(__pl_r, 0))                              \
734
          break;  /* return !__pl_r */                           \
735
        __pl_r = pl_deref_int(lock);                                   \
736
      }                                                                      \
737
    }                                                                              \
738
    !__pl_r; /* return value */                                                    \
739
  }) : ({                                                                                \
740
    void __unsupported_argument_size_for_pl_try_a__(char *,int);                   \
741
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
742
      __unsupported_argument_size_for_pl_try_a__(__FILE__,__LINE__);         \
743
    0;                                                                             \
744
  })                                                                                     \
745
)
746
747
/* request atomic write access (A) and wait for it. See comments in pl_try_a() for
748
 * explanations.
749
 */
750
#define pl_take_a(lock)                                                                        \
751
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
752
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
753
    register unsigned long __set_r = PLOCK64_WL_1;                                 \
754
    register unsigned long __msk_r = PLOCK64_SL_ANY;                               \
755
    register unsigned long __pl_r;                                                 \
756
    __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                        \
757
    while (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0)) {                         \
758
      if (__builtin_expect(__pl_r & __msk_r, 0)) {                           \
759
        pl_sub_noret_lax(__lk_r, __set_r);                             \
760
        pl_wait_unlock_long(__lk_r, __msk_r);                          \
761
        __pl_r = pl_ldadd_acq(__lk_r, __set_r);                        \
762
        continue;                                                      \
763
      }                                                                      \
764
      /* wait for all readers to leave or upgrade */                         \
765
      pl_cpu_relax(); pl_cpu_relax(); pl_cpu_relax();                        \
766
      __pl_r = pl_deref_long(lock);                                          \
767
    }                                                                              \
768
    pl_barrier();                                                                  \
769
    0;                                                                             \
770
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
771
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
772
    register unsigned int __set_r = PLOCK32_WL_1;                                  \
773
    register unsigned int __msk_r = PLOCK32_SL_ANY;                                \
774
    register unsigned int __pl_r;                                                  \
775
    __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                        \
776
    while (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0)) {                         \
777
      if (__builtin_expect(__pl_r & __msk_r, 0)) {                           \
778
        pl_sub_noret_lax(__lk_r, __set_r);                             \
779
        pl_wait_unlock_int(__lk_r, __msk_r);                           \
780
        __pl_r = pl_ldadd_acq(__lk_r, __set_r);                        \
781
        continue;                                                      \
782
      }                                                                      \
783
      /* wait for all readers to leave or upgrade */                         \
784
      pl_cpu_relax(); pl_cpu_relax(); pl_cpu_relax();                        \
785
      __pl_r = pl_deref_int(lock);                                           \
786
    }                                                                              \
787
    pl_barrier();                                                                  \
788
    0;                                                                             \
789
  }) : ({                                                                                \
790
    void __unsupported_argument_size_for_pl_take_a__(char *,int);                  \
791
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
792
      __unsupported_argument_size_for_pl_take_a__(__FILE__,__LINE__);        \
793
    0;                                                                             \
794
  })
795
796
/* release atomic write access (A) lock */
797
#define pl_drop_a(lock) (                                                                      \
798
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
799
    pl_barrier();                                                                  \
800
    pl_sub_noret_rel(lock, PLOCK64_WL_1);                                          \
801
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
802
    pl_barrier();                                                                  \
803
    pl_sub_noret_rel(lock, PLOCK32_WL_1);                                          \
804
  }) : ({                                                                                \
805
    void __unsupported_argument_size_for_pl_drop_a__(char *,int);                  \
806
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
807
      __unsupported_argument_size_for_pl_drop_a__(__FILE__,__LINE__);        \
808
  })                                                                                     \
809
)
810
811
/* Downgrade A to R. Inc(R), dec(W) then wait for W==0 */
812
#define pl_ator(lock) (                                                                        \
813
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
814
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
815
    register unsigned long __set_r = PLOCK64_RL_1 - PLOCK64_WL_1;                  \
816
    register unsigned long __msk_r = PLOCK64_WL_ANY;                               \
817
    register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r;           \
818
    while (__builtin_expect(__pl_r & __msk_r, 0)) {                                \
819
      __pl_r = pl_wait_unlock_long(__lk_r, __msk_r);                         \
820
    }                                                                              \
821
    pl_barrier();                                                                  \
822
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
823
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
824
    register unsigned int __set_r = PLOCK32_RL_1 - PLOCK32_WL_1;                   \
825
    register unsigned int __msk_r = PLOCK32_WL_ANY;                                \
826
    register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r;            \
827
    while (__builtin_expect(__pl_r & __msk_r, 0)) {                                \
828
      __pl_r = pl_wait_unlock_int(__lk_r, __msk_r);                          \
829
    }                                                                              \
830
    pl_barrier();                                                                  \
831
  }) : ({                                                                                \
832
    void __unsupported_argument_size_for_pl_ator__(char *,int);                    \
833
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
834
      __unsupported_argument_size_for_pl_ator__(__FILE__,__LINE__);          \
835
  })                                                                                     \
836
)
837
838
/* Try to upgrade from R to A, return non-zero on success, otherwise 0.
839
 * This lock will fail if S is held or appears while waiting (typically due to
840
 * a previous grab that was disguised as a W due to an overflow). In case of
841
 * failure to grab the lock, it MUST NOT be retried without first dropping R,
842
 * or it may never complete due to S waiting for R to leave before upgrading
843
 * to W. The lock succeeds once there's no more R (ie all of them have either
844
 * completed or were turned to A).
845
 */
846
#define pl_try_rtoa(lock) (                                                                    \
847
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
848
    register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY;          \
849
    pl_barrier();                                                                  \
850
    if (!__builtin_expect(__pl_r, 0)) {                                            \
851
      __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 - PLOCK64_RL_1);            \
852
      while (1) {                                                            \
853
        if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) {            \
854
          pl_sub_noret_lax((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
855
          break;  /* return !__pl_r */                           \
856
        }                                                              \
857
        __pl_r &= PLOCK64_RL_ANY;                                      \
858
        if (!__builtin_expect(__pl_r, 0))                              \
859
          break;  /* return !__pl_r */                           \
860
        __pl_r = pl_deref_long(lock);                                  \
861
      }                                                                      \
862
    }                                                                              \
863
    !__pl_r; /* return value */                                                    \
864
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
865
    register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY;            \
866
    pl_barrier();                                                                  \
867
    if (!__builtin_expect(__pl_r, 0)) {                                            \
868
      __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 - PLOCK32_RL_1);            \
869
      while (1) {                                                            \
870
        if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) {            \
871
          pl_sub_noret_lax((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
872
          break;  /* return !__pl_r */                           \
873
        }                                                              \
874
        __pl_r &= PLOCK32_RL_ANY;                                      \
875
        if (!__builtin_expect(__pl_r, 0))                              \
876
          break;  /* return !__pl_r */                           \
877
        __pl_r = pl_deref_int(lock);                                   \
878
      }                                                                      \
879
    }                                                                              \
880
    !__pl_r; /* return value */                                                    \
881
  }) : ({                                                                                \
882
    void __unsupported_argument_size_for_pl_try_rtoa__(char *,int);                \
883
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
884
      __unsupported_argument_size_for_pl_try_rtoa__(__FILE__,__LINE__);      \
885
    0;                                                                             \
886
  })                                                                                     \
887
)
888
889
890
/*
891
 * The following operations cover the multiple writers model : U->R->J->C->A
892
 */
893
894
895
/* Upgrade R to J. Inc(W) then wait for R==W or S != 0 */
896
#define pl_rtoj(lock) (                                                                        \
897
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
898
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
899
    register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\
900
    register unsigned char __m = 0;                                                \
901
    while (!(__pl_r & PLOCK64_SL_ANY) &&                                           \
902
           (__pl_r / PLOCK64_WL_1 != (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1)) {  \
903
      unsigned char __loops = __m + 1;                                       \
904
      __m = (__m << 1) + 1;                                                  \
905
      do {                                                                   \
906
        pl_cpu_relax();                                                \
907
        pl_cpu_relax();                                                \
908
      } while (--__loops);                                                   \
909
      __pl_r = pl_deref_long(__lk_r);                                        \
910
    }                                                                              \
911
    pl_barrier();                                                                  \
912
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
913
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
914
    register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\
915
    register unsigned char __m = 0;                                                \
916
    while (!(__pl_r & PLOCK32_SL_ANY) &&                                           \
917
           (__pl_r / PLOCK32_WL_1 != (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1)) {  \
918
      unsigned char __loops = __m + 1;                                       \
919
      __m = (__m << 1) + 1;                                                  \
920
      do {                                                                   \
921
        pl_cpu_relax();                                                \
922
        pl_cpu_relax();                                                \
923
      } while (--__loops);                                                   \
924
      __pl_r = pl_deref_int(__lk_r);                                         \
925
    }                                                                              \
926
    pl_barrier();                                                                  \
927
  }) : ({                                                                                \
928
    void __unsupported_argument_size_for_pl_rtoj__(char *,int);                    \
929
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
930
      __unsupported_argument_size_for_pl_rtoj__(__FILE__,__LINE__);          \
931
  })                                                                                     \
932
)
933
934
/* Upgrade J to C. Set S. Only one thread needs to do it though it's idempotent */
935
#define pl_jtoc(lock) (                                                                        \
936
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
937
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
938
    register unsigned long __pl_r = pl_deref_long(__lk_r);                         \
939
    if (!(__pl_r & PLOCK64_SL_ANY))                                                \
940
      pl_or_noret(__lk_r, PLOCK64_SL_1);                                     \
941
    pl_barrier();                                                                  \
942
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
943
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
944
    register unsigned int __pl_r = pl_deref_int(__lk_r);                           \
945
    if (!(__pl_r & PLOCK32_SL_ANY))                                                \
946
      pl_or_noret(__lk_r, PLOCK32_SL_1);                                     \
947
    pl_barrier();                                                                  \
948
  }) : ({                                                                                \
949
    void __unsupported_argument_size_for_pl_jtoc__(char *,int);                    \
950
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
951
      __unsupported_argument_size_for_pl_jtoc__(__FILE__,__LINE__);          \
952
  })                                                                                     \
953
)
954
955
/* Upgrade R to C. Inc(W) then wait for R==W or S != 0 */
956
#define pl_rtoc(lock) (                                                                        \
957
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
958
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
959
    register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\
960
    register unsigned char __m = 0;                                                \
961
    while (__builtin_expect(!(__pl_r & PLOCK64_SL_ANY), 0)) {                      \
962
      unsigned char __loops;                                                 \
963
      if (__pl_r / PLOCK64_WL_1 == (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1) { \
964
        pl_or_noret(__lk_r, PLOCK64_SL_1);                             \
965
        break;                                                         \
966
      }                                                                      \
967
      __loops = __m + 1;                                                     \
968
      __m = (__m << 1) + 1;                                                  \
969
      do {                                                                   \
970
        pl_cpu_relax();                                                \
971
        pl_cpu_relax();                                                \
972
      } while (--__loops);                                                   \
973
      __pl_r = pl_deref_long(__lk_r);                                        \
974
    }                                                                              \
975
    pl_barrier();                                                                  \
976
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
977
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
978
    register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\
979
    register unsigned char __m = 0;                                                \
980
    while (__builtin_expect(!(__pl_r & PLOCK32_SL_ANY), 0)) {                      \
981
      unsigned char __loops;                                                 \
982
      if (__pl_r / PLOCK32_WL_1 == (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1) { \
983
        pl_or_noret(__lk_r, PLOCK32_SL_1);                             \
984
        break;                                                         \
985
      }                                                                      \
986
      __loops = __m + 1;                                                     \
987
      __m = (__m << 1) + 1;                                                  \
988
      do {                                                                   \
989
        pl_cpu_relax();                                                \
990
        pl_cpu_relax();                                                \
991
      } while (--__loops);                                                   \
992
      __pl_r = pl_deref_int(__lk_r);                                         \
993
    }                                                                              \
994
    pl_barrier();                                                                  \
995
  }) : ({                                                                                \
996
    void __unsupported_argument_size_for_pl_rtoj__(char *,int);                    \
997
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
998
      __unsupported_argument_size_for_pl_rtoj__(__FILE__,__LINE__);          \
999
  })                                                                                     \
1000
)
1001
1002
/* Drop the claim (C) lock : R--,W-- then clear S if !R */
1003
#define pl_drop_c(lock) (                                                                      \
1004
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1005
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
1006
    register unsigned long __set_r = - PLOCK64_RL_1 - PLOCK64_WL_1;                \
1007
    register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r;           \
1008
    if (!(__pl_r & PLOCK64_RL_ANY))                                                \
1009
      pl_and_noret(__lk_r, ~PLOCK64_SL_1);                                   \
1010
    pl_barrier();                                                                  \
1011
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1012
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
1013
    register unsigned int __set_r = - PLOCK32_RL_1 - PLOCK32_WL_1;                 \
1014
    register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r;            \
1015
    if (!(__pl_r & PLOCK32_RL_ANY))                                                \
1016
      pl_and_noret(__lk_r, ~PLOCK32_SL_1);                                   \
1017
    pl_barrier();                                                                  \
1018
  }) : ({                                                                                \
1019
    void __unsupported_argument_size_for_pl_drop_c__(char *,int);                  \
1020
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1021
      __unsupported_argument_size_for_pl_drop_c__(__FILE__,__LINE__);        \
1022
  })                                                                                     \
1023
)
1024
1025
/* Upgrade C to A. R-- then wait for !S or clear S if !R */
1026
#define pl_ctoa(lock) (                                                                        \
1027
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1028
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
1029
    register unsigned long __pl_r = pl_ldadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1;\
1030
    while (__pl_r & PLOCK64_SL_ANY) {                                              \
1031
      if (!(__pl_r & PLOCK64_RL_ANY)) {                                      \
1032
        pl_and_noret(__lk_r, ~PLOCK64_SL_1);                           \
1033
        break;                                                         \
1034
      }                                                                      \
1035
      pl_cpu_relax();                                                        \
1036
      pl_cpu_relax();                                                        \
1037
      __pl_r = pl_deref_long(__lk_r);                                        \
1038
    }                                                                              \
1039
    pl_barrier();                                                                  \
1040
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1041
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
1042
    register unsigned int __pl_r = pl_ldadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \
1043
    while (__pl_r & PLOCK32_SL_ANY) {                                              \
1044
      if (!(__pl_r & PLOCK32_RL_ANY)) {                                      \
1045
        pl_and_noret(__lk_r, ~PLOCK32_SL_1);                           \
1046
        break;                                                         \
1047
      }                                                                      \
1048
      pl_cpu_relax();                                                        \
1049
      pl_cpu_relax();                                                        \
1050
      __pl_r = pl_deref_int(__lk_r);                                         \
1051
    }                                                                              \
1052
    pl_barrier();                                                                  \
1053
  }) : ({                                                                                \
1054
    void __unsupported_argument_size_for_pl_ctoa__(char *,int);                    \
1055
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1056
      __unsupported_argument_size_for_pl_ctoa__(__FILE__,__LINE__);          \
1057
  })                                                                                     \
1058
)
1059
1060
/* downgrade the atomic write access lock (A) to join (J) */
1061
#define pl_atoj(lock) (                                                                        \
1062
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1063
    pl_barrier();                                                                  \
1064
    pl_add_noret(lock, PLOCK64_RL_1);                                              \
1065
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1066
    pl_barrier();                                                                  \
1067
    pl_add_noret(lock, PLOCK32_RL_1);                                              \
1068
  }) : ({                                                                                \
1069
    void __unsupported_argument_size_for_pl_atoj__(char *,int);                    \
1070
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1071
      __unsupported_argument_size_for_pl_atoj__(__FILE__,__LINE__);          \
1072
  })                                                                                     \
1073
)
1074
1075
/* Returns non-zero if the thread calling it is the last writer, otherwise zero. It is
1076
 * designed to be called before pl_drop_j(), pl_drop_c() or pl_drop_a() for operations
1077
 * which need to be called only once.
1078
 */
1079
#define pl_last_writer(lock) (                                                                 \
1080
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1081
    !(pl_deref_long(lock) & PLOCK64_WL_2PL);                                       \
1082
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1083
    !(pl_deref_int(lock) & PLOCK32_WL_2PL);                                        \
1084
  }) : ({                                                                                \
1085
    void __unsupported_argument_size_for_pl_last_j__(char *,int);                  \
1086
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1087
      __unsupported_argument_size_for_pl_last_j__(__FILE__,__LINE__);        \
1088
    0;                                                                             \
1089
  })                                                                                     \
1090
)
1091
1092
/* attempt to get an exclusive write access via the J lock and wait for it.
1093
 * Only one thread may succeed in this operation. It will not conflict with
1094
 * other users and will first wait for all writers to leave, then for all
1095
 * readers to leave before starting. This offers a solution to obtain an
1096
 * exclusive access to a shared resource in the R/J/C/A model. A concurrent
1097
 * take_a() will wait for this one to finish first. Using a CAS instead of XADD
1098
 * should make the operation converge slightly faster. Returns non-zero on
1099
 * success otherwise 0.
1100
 */
1101
#define pl_try_j(lock) (                                                                       \
1102
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1103
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
1104
    register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_RL_1;                  \
1105
    register unsigned long __msk_r = PLOCK64_WL_ANY;                               \
1106
    register unsigned long __pl_r;                                                 \
1107
    register unsigned char __m;                                                    \
1108
    pl_wait_unlock_long(__lk_r, __msk_r);                                          \
1109
    __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r;                              \
1110
    /* wait for all other readers to leave */                                      \
1111
    __m = 0;                                                                       \
1112
    while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) {                         \
1113
      unsigned char __loops;                                                 \
1114
      /* give up on other writers */                                         \
1115
      if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) {                    \
1116
        pl_sub_noret_lax(__lk_r, __set_r);                             \
1117
        __pl_r = 0; /* failed to get the lock */                       \
1118
        break;                                                         \
1119
      }                                                                      \
1120
      __loops = __m + 1;                                                     \
1121
      __m = (__m << 1) + 1;                                                  \
1122
      do {                                                                   \
1123
        pl_cpu_relax();                                                \
1124
        pl_cpu_relax();                                                \
1125
      } while (--__loops);                                                   \
1126
      __pl_r = pl_deref_long(__lk_r);                                        \
1127
    }                                                                              \
1128
    pl_barrier();                                                                  \
1129
    __pl_r; /* return value, cannot be null on success */                          \
1130
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1131
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
1132
    register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_RL_1;                   \
1133
    register unsigned int __msk_r = PLOCK32_WL_ANY;                                \
1134
    register unsigned int __pl_r;                                                  \
1135
    register unsigned char __m;                                                    \
1136
    pl_wait_unlock_int(__lk_r, __msk_r);                                           \
1137
    __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r;                              \
1138
    /* wait for all other readers to leave */                                      \
1139
    __m = 0;                                                                       \
1140
    while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) {                         \
1141
      unsigned char __loops;                                                 \
1142
      /* but rollback on other writers */                                    \
1143
      if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) {                    \
1144
        pl_sub_noret_lax(__lk_r, __set_r);                             \
1145
        __pl_r = 0; /* failed to get the lock */                       \
1146
        break;                                                         \
1147
      }                                                                      \
1148
      __loops = __m + 1;                                                     \
1149
      __m = (__m << 1) + 1;                                                  \
1150
      do {                                                                   \
1151
        pl_cpu_relax();                                                \
1152
        pl_cpu_relax();                                                \
1153
      } while (--__loops);                                                   \
1154
      __pl_r = pl_deref_int(__lk_r);                                         \
1155
    }                                                                              \
1156
    pl_barrier();                                                                  \
1157
    __pl_r; /* return value, cannot be null on success */                          \
1158
  }) : ({                                                                                \
1159
    void __unsupported_argument_size_for_pl_try_j__(char *,int);                   \
1160
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1161
      __unsupported_argument_size_for_pl_try_j__(__FILE__,__LINE__);         \
1162
    0;                                                                             \
1163
  })                                                                                     \
1164
)
1165
1166
/* request an exclusive write access via the J lock and wait for it. Only one
1167
 * thread may succeed in this operation. It will not conflict with other users
1168
 * and will first wait for all writers to leave, then for all readers to leave
1169
 * before starting. This offers a solution to obtain an exclusive access to a
1170
 * shared resource in the R/J/C/A model. A concurrent take_a() will wait for
1171
 * this one to finish first. Using a CAS instead of XADD should make the
1172
 * operation converge slightly faster.
1173
 */
1174
#define pl_take_j(lock) (                                                                      \
1175
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1176
    __label__ __retry;                                                             \
1177
    register unsigned long *__lk_r = (unsigned long *)(lock);                      \
1178
    register unsigned long __set_r = PLOCK64_WL_1 | PLOCK64_RL_1;                  \
1179
    register unsigned long __msk_r = PLOCK64_WL_ANY;                               \
1180
    register unsigned long __pl_r;                                                 \
1181
    register unsigned char __m;                                                    \
1182
  __retry:                                                                               \
1183
    pl_wait_unlock_long(__lk_r, __msk_r);                                          \
1184
    __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r;                              \
1185
    /* wait for all other readers to leave */                                      \
1186
    __m = 0;                                                                       \
1187
    while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) {                         \
1188
      unsigned char __loops;                                                 \
1189
      /* but rollback on other writers */                                    \
1190
      if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) {                    \
1191
        pl_sub_noret_lax(__lk_r, __set_r);                             \
1192
        goto __retry;                                                  \
1193
      }                                                                      \
1194
      __loops = __m + 1;                                                     \
1195
      __m = (__m << 1) + 1;                                                  \
1196
      do {                                                                   \
1197
        pl_cpu_relax();                                                \
1198
        pl_cpu_relax();                                                \
1199
      } while (--__loops);                                                   \
1200
      __pl_r = pl_deref_long(__lk_r);                                        \
1201
    }                                                                              \
1202
    pl_barrier();                                                                  \
1203
    0;                                                                             \
1204
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1205
    __label__ __retry;                                                             \
1206
    register unsigned int *__lk_r = (unsigned int *)(lock);                        \
1207
    register unsigned int __set_r = PLOCK32_WL_1 | PLOCK32_RL_1;                   \
1208
    register unsigned int __msk_r = PLOCK32_WL_ANY;                                \
1209
    register unsigned int __pl_r;                                                  \
1210
    register unsigned char __m;                                                    \
1211
  __retry:                                                                               \
1212
    pl_wait_unlock_int(__lk_r, __msk_r);                                           \
1213
    __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r;                              \
1214
    /* wait for all other readers to leave */                                      \
1215
    __m = 0;                                                                       \
1216
    while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) {                         \
1217
      unsigned char __loops;                                                 \
1218
      /* but rollback on other writers */                                    \
1219
      if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) {                    \
1220
        pl_sub_noret_lax(__lk_r, __set_r);                             \
1221
        goto __retry;                                                  \
1222
      }                                                                      \
1223
      __loops = __m + 1;                                                     \
1224
      __m = (__m << 1) + 1;                                                  \
1225
      do {                                                                   \
1226
        pl_cpu_relax();                                                \
1227
        pl_cpu_relax();                                                \
1228
      } while (--__loops);                                                   \
1229
      __pl_r = pl_deref_int(__lk_r);                                         \
1230
    }                                                                              \
1231
    pl_barrier();                                                                  \
1232
    0;                                                                             \
1233
  }) : ({                                                                                \
1234
    void __unsupported_argument_size_for_pl_take_j__(char *,int);                  \
1235
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1236
      __unsupported_argument_size_for_pl_take_j__(__FILE__,__LINE__);        \
1237
    0;                                                                             \
1238
  })                                                                                     \
1239
)
1240
1241
/* drop the join (J) lock entirely */
1242
#define pl_drop_j(lock) (                                                                      \
1243
  (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({                                       \
1244
    pl_barrier();                                                                  \
1245
    pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_RL_1);                           \
1246
  }) : (sizeof(*(lock)) == 4) ? ({                                                       \
1247
    pl_barrier();                                                                  \
1248
    pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_RL_1);                           \
1249
  }) : ({                                                                                \
1250
    void __unsupported_argument_size_for_pl_drop_j__(char *,int);                  \
1251
    if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8))       \
1252
      __unsupported_argument_size_for_pl_drop_j__(__FILE__,__LINE__);        \
1253
  })                                                                                     \
1254
)
1255
1256
/*
1257
 * The part below is for Low Overhead R/W locks (LORW). These ones are not
1258
 * upgradable and not necessarily fair but they try to be fast when uncontended
1259
 * and to limit the cost and perturbation during contention. Writers always
1260
 * have precedence over readers to preserve latency as much as possible.
1261
 *
1262
 * The principle is to offer a fast no-contention path and a limited total
1263
 * number of writes for the contended path. Since R/W locks are expected to be
1264
 * used in situations where there is a benefit in separating reads from writes,
1265
 * it is expected that reads are common (typ >= 50%) and that there is often at
1266
 * least one reader (otherwise a spinlock wouldn't be a problem). As such, a
1267
 * reader will try to pass instantly, detect contention and immediately retract
1268
 * and wait in the queue in case there is contention. A writer will first also
1269
 * try to pass instantly, and if it fails due to pending readers, it will mark
1270
 * that it's waiting so that readers stop entering. This will leave the writer
1271
 * waiting as close as possible to the point of being granted access. New
1272
 * writers will also notice this previous contention and will wait outside.
1273
 * This means that a successful access for a reader or a writer requires a
1274
 * single CAS, and a contended attempt will require one failed CAS and one
1275
 * successful XADD for a reader, or an optional OR and a N+1 CAS for the
1276
 * writer.
1277
 *
1278
 * A counter of shared users indicates the number of active readers, while a
1279
 * (single-bit) counter of exclusive writers indicates whether the lock is
1280
 * currently held for writes. This distinction also permits to use a single
1281
 * function to release the lock if desired, since the exclusive bit indicates
1282
 * the state of the caller of unlock(). The WRQ bit is cleared during the
1283
 * unlock.
1284
 *
1285
 * Layout: (32/64 bit):
1286
 *                      31           2   1     0
1287
 *         +-----------+--------------+-----+-----+
1288
 *         |           |     SHR      | WRQ | EXC |
1289
 *         +-----------+--------------+-----+-----+
1290
 *
1291
 * In order to minimize operations, the WRQ bit is held during EXC so that the
1292
 * write waiter that had to fight for EXC doesn't have to release WRQ during
1293
 * its operations, and will just drop it along with EXC upon unlock.
1294
 *
1295
 * This means the following costs:
1296
 *   reader:
1297
 *      success: 1 CAS
1298
 *      failure: 1 CAS + 1 XADD
1299
 *      unlock:  1 SUB
1300
 *   writer:
1301
 *      success: 1 RD + 1 CAS
1302
 *      failure: 1 RD + 1 CAS + 0/1 OR + N CAS
1303
 *      unlock:  1 AND
1304
 */
1305
1306
#define PLOCK_LORW_EXC_BIT    ((sizeof(long) == 8) ?  0 :  0)
1307
#define PLOCK_LORW_EXC_SIZE   ((sizeof(long) == 8) ?  1 :  1)
1308
#define PLOCK_LORW_EXC_BASE   (1UL << PLOCK_LORW_EXC_BIT)
1309
#define PLOCK_LORW_EXC_MASK   (((1UL << PLOCK_LORW_EXC_SIZE) - 1UL) << PLOCK_LORW_EXC_BIT)
1310
1311
#define PLOCK_LORW_WRQ_BIT    ((sizeof(long) == 8) ?  1 :  1)
1312
#define PLOCK_LORW_WRQ_SIZE   ((sizeof(long) == 8) ?  1 :  1)
1313
#define PLOCK_LORW_WRQ_BASE   (1UL << PLOCK_LORW_WRQ_BIT)
1314
#define PLOCK_LORW_WRQ_MASK   (((1UL << PLOCK_LORW_WRQ_SIZE) - 1UL) << PLOCK_LORW_WRQ_BIT)
1315
1316
#define PLOCK_LORW_SHR_BIT    ((sizeof(long) == 8) ?  2 :  2)
1317
#define PLOCK_LORW_SHR_SIZE   ((sizeof(long) == 8) ? 30 : 30)
1318
#define PLOCK_LORW_SHR_BASE   (1UL << PLOCK_LORW_SHR_BIT)
1319
#define PLOCK_LORW_SHR_MASK   (((1UL << PLOCK_LORW_SHR_SIZE) - 1UL) << PLOCK_LORW_SHR_BIT)
1320
1321
__attribute__((unused,always_inline,no_instrument_function))
1322
static inline void pl_lorw_rdlock(unsigned long *lock)
1323
0
{
1324
0
  unsigned long lk = 0;
1325
0
1326
0
  /* First, assume we're alone and try to get the read lock (fast path).
1327
0
   * It often works because read locks are often used on low-contention
1328
0
   * structs.
1329
0
   */
1330
0
  lk = pl_cmpxchg(lock, 0, PLOCK_LORW_SHR_BASE);
1331
0
  if (!lk)
1332
0
    return;
1333
0
1334
0
  /* so we were not alone, make sure there's no writer waiting for the
1335
0
   * lock to be empty of visitors.
1336
0
   */
1337
0
  if (lk & PLOCK_LORW_WRQ_MASK)
1338
0
#if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO)
1339
0
    lk = __pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK);
1340
0
#else
1341
0
    lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK);
1342
0
#endif
1343
0
1344
0
  /* count us as visitor among others */
1345
0
  lk = pl_ldadd_acq(lock, PLOCK_LORW_SHR_BASE);
1346
0
1347
0
  /* wait for end of exclusive access if any */
1348
0
  if (lk & PLOCK_LORW_EXC_MASK)
1349
0
#if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO)
1350
0
    lk = __pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK);
1351
0
#else
1352
0
    lk = pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK);
1353
0
#endif
1354
0
}
1355
1356
1357
__attribute__((unused,always_inline,no_instrument_function))
1358
static inline void pl_lorw_wrlock(unsigned long *lock)
1359
0
{
1360
0
  unsigned long lk = 0;
1361
0
  unsigned long old = 0;
1362
0
1363
0
  /* first, make sure another writer is not already blocked waiting for
1364
0
   * readers to leave. Note that tests have shown that it can be even
1365
0
   * faster to avoid the first check and to unconditionally wait.
1366
0
   */
1367
0
  lk = pl_deref_long(lock);
1368
0
  if (__builtin_expect(lk & PLOCK_LORW_WRQ_MASK, 1))
1369
0
#if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO)
1370
0
    lk = __pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK);
1371
0
#else
1372
0
    lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK);
1373
0
#endif
1374
0
1375
0
  do {
1376
0
    /* let's check for the two sources of contention at once */
1377
0
1378
0
    if (__builtin_expect(lk & (PLOCK_LORW_SHR_MASK | PLOCK_LORW_EXC_MASK), 1)) {
1379
0
      /* check if there are still readers coming. If so, close the door and
1380
0
       * wait for them to leave.
1381
0
       */
1382
0
      if (lk & PLOCK_LORW_SHR_MASK) {
1383
0
        /* note below, an OR is significantly cheaper than BTS or XADD */
1384
0
        if (!(lk & PLOCK_LORW_WRQ_MASK))
1385
0
          pl_or_noret(lock, PLOCK_LORW_WRQ_BASE);
1386
0
#if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO)
1387
0
        lk = __pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK);
1388
0
#else
1389
0
        lk = pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK);
1390
0
#endif
1391
0
      }
1392
0
1393
0
      /* And also wait for a previous writer to finish. */
1394
0
      if (lk & PLOCK_LORW_EXC_MASK)
1395
0
#if defined(PLOCK_LORW_INLINE_WAIT) && !defined(PLOCK_DISABLE_EBO)
1396
0
        lk = __pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK);
1397
0
#else
1398
0
        lk = pl_wait_unlock_long(lock, PLOCK_LORW_EXC_MASK);
1399
0
#endif
1400
0
    }
1401
0
1402
0
    /* A fresh new reader may appear right now if there were none
1403
0
     * above and we didn't close the door.
1404
0
     */
1405
0
    old = lk & ~PLOCK_LORW_SHR_MASK & ~PLOCK_LORW_EXC_MASK;
1406
0
    lk = pl_cmpxchg(lock, old, old | PLOCK_LORW_EXC_BASE);
1407
0
  } while (lk != old);
1408
0
1409
0
  /* done, not waiting anymore, the WRQ bit if any, will be dropped by the
1410
0
   * unlock
1411
0
   */
1412
0
}
1413
1414
1415
__attribute__((unused,always_inline,no_instrument_function))
1416
static inline void pl_lorw_rdunlock(unsigned long *lock)
1417
0
{
1418
0
  pl_sub_noret_rel(lock, PLOCK_LORW_SHR_BASE);
1419
0
}
1420
1421
__attribute__((unused,always_inline,no_instrument_function))
1422
static inline void pl_lorw_wrunlock(unsigned long *lock)
1423
0
{
1424
0
  pl_and_noret_rel(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK));
1425
0
}
1426
1427
__attribute__((unused,always_inline,no_instrument_function))
1428
static inline void pl_lorw_unlock(unsigned long *lock)
1429
0
{
1430
0
  if (pl_deref_long(lock) & PLOCK_LORW_EXC_MASK)
1431
0
    pl_lorw_wrunlock(lock);
1432
0
  else
1433
0
    pl_lorw_rdunlock(lock);
1434
0
}
1435
1436
#endif /* PL_PLOCK_H */