Coverage Report

Created: 2025-07-23 07:04

/src/samba/lib/tdb/common/mutex.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   Unix SMB/CIFS implementation.
3
4
   trivial database library
5
6
   Copyright (C) Volker Lendecke 2012,2013
7
   Copyright (C) Stefan Metzmacher 2013,2014
8
   Copyright (C) Michael Adam 2014
9
10
     ** NOTE! The following LGPL license applies to the tdb
11
     ** library. This does NOT imply that all of Samba is released
12
     ** under the LGPL
13
14
   This library is free software; you can redistribute it and/or
15
   modify it under the terms of the GNU Lesser General Public
16
   License as published by the Free Software Foundation; either
17
   version 3 of the License, or (at your option) any later version.
18
19
   This library is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22
   Lesser General Public License for more details.
23
24
   You should have received a copy of the GNU Lesser General Public
25
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
26
*/
27
#include "tdb_private.h"
28
#include "system/threads.h"
29
30
#ifdef USE_TDB_MUTEX_LOCKING
31
32
/*
33
 * If we run with mutexes, we store the "struct tdb_mutexes" at the
34
 * beginning of the file. We store an additional tdb_header right
35
 * beyond the mutex area, page aligned. All the offsets within the tdb
36
 * are relative to the area behind the mutex area. tdb->map_ptr points
37
 * behind the mmap area as well, so the read and write path in the
38
 * mutex case can remain unchanged.
39
 *
40
 * Early in the mutex development the mutexes were placed between the hash
41
 * chain pointers and the real tdb data. This had two drawbacks: First, it
42
 * made pointer calculations more complex. Second, we had to mmap the mutex
43
 * area twice. One was the normal map_ptr in the tdb. This frequently changed
44
 * from within tdb_oob. At least the Linux glibc robust mutex code assumes
45
 * constant pointers in memory, so a constantly changing mmap area destroys
46
 * the mutex list. So we had to mmap the first bytes of the file with a second
47
 * mmap call. With that scheme, very weird errors happened that could be
48
 * easily fixed by doing the mutex mmap in a second file. It seemed that
49
 * mapping the same memory area twice does not end up in accessing the same
50
 * physical page, looking at the mutexes in gdb it seemed that old data showed
51
 * up after some re-mapping. To avoid a separate mutex file, the code now puts
52
 * the real content of the tdb file after the mutex area. This way we do not
53
 * have overlapping mmap areas, the mutex area is mmapped once and not
54
 * changed, the tdb data area's mmap is constantly changed but does not
55
 * overlap.
56
 */
57
58
struct tdb_mutexes {
59
  struct tdb_header hdr;
60
61
  /* protect allrecord_lock */
62
  pthread_mutex_t allrecord_mutex;
63
64
  /*
65
   * F_UNLCK: free,
66
   * F_RDLCK: shared,
67
   * F_WRLCK: exclusive
68
   */
69
  short int allrecord_lock;
70
71
  /*
72
   * Index 0 is the freelist mutex, followed by
73
   * one mutex per hashchain.
74
   */
75
  pthread_mutex_t hashchains[1];
76
};
77
78
bool tdb_have_mutexes(struct tdb_context *tdb)
79
0
{
80
0
  return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
81
0
}
82
83
size_t tdb_mutex_size(struct tdb_context *tdb)
84
0
{
85
0
  size_t mutex_size;
86
87
0
  if (!tdb_have_mutexes(tdb)) {
88
0
    return 0;
89
0
  }
90
91
0
  mutex_size = sizeof(struct tdb_mutexes);
92
0
  mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
93
94
0
  return TDB_ALIGN(mutex_size, tdb->page_size);
95
0
}
96
97
/*
98
 * Get the index for a chain mutex
99
 */
100
static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
101
          unsigned *idx)
102
0
{
103
  /*
104
   * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
105
   * the 4 bytes of the freelist start and the hash chain that is about
106
   * to be locked. See lock_offset() where the freelist is -1 vs the
107
   * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
108
   * the tdb file itself as data, we need to adjust the offset here.
109
   */
110
0
  const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
111
112
0
  if (!tdb_have_mutexes(tdb)) {
113
0
    return false;
114
0
  }
115
0
  if (len != 1) {
116
    /* Possibly the allrecord lock */
117
0
    return false;
118
0
  }
119
0
  if (off < freelist_lock_ofs) {
120
    /* One of the special locks */
121
0
    return false;
122
0
  }
123
0
  if (tdb->hash_size == 0) {
124
    /* tdb not initialized yet, called from tdb_open_ex() */
125
0
    return false;
126
0
  }
127
0
  if (off >= TDB_DATA_START(tdb->hash_size)) {
128
    /* Single record lock from traverses */
129
0
    return false;
130
0
  }
131
132
  /*
133
   * Now we know it's a freelist or hash chain lock. Those are always 4
134
   * byte aligned. Paranoia check.
135
   */
136
0
  if ((off % sizeof(tdb_off_t)) != 0) {
137
0
    abort();
138
0
  }
139
140
  /*
141
   * Re-index the fcntl offset into an offset into the mutex array
142
   */
143
0
  off -= freelist_lock_ofs; /* rebase to index 0 */
144
0
  off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
145
146
0
  *idx = off;
147
0
  return true;
148
0
}
149
150
static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
151
0
{
152
0
  int i;
153
154
0
  for (i=0; i < tdb->num_lockrecs; i++) {
155
0
    bool ret;
156
0
    unsigned idx;
157
158
0
    ret = tdb_mutex_index(tdb,
159
0
              tdb->lockrecs[i].off,
160
0
              tdb->lockrecs[i].count,
161
0
              &idx);
162
0
    if (!ret) {
163
0
      continue;
164
0
    }
165
166
0
    if (idx == 0) {
167
      /* this is the freelist mutex */
168
0
      continue;
169
0
    }
170
171
0
    return true;
172
0
  }
173
174
0
  return false;
175
0
}
176
177
static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
178
0
{
179
0
  int ret;
180
181
0
  if (waitflag) {
182
0
    ret = pthread_mutex_lock(m);
183
0
  } else {
184
0
    ret = pthread_mutex_trylock(m);
185
0
  }
186
0
  if (ret != EOWNERDEAD) {
187
0
    return ret;
188
0
  }
189
190
  /*
191
   * For chainlocks, we don't do any cleanup (yet?)
192
   */
193
0
  return pthread_mutex_consistent(m);
194
0
}
195
196
static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
197
0
{
198
0
  int ret;
199
200
0
  if (waitflag) {
201
0
    ret = pthread_mutex_lock(&m->allrecord_mutex);
202
0
  } else {
203
0
    ret = pthread_mutex_trylock(&m->allrecord_mutex);
204
0
  }
205
0
  if (ret != EOWNERDEAD) {
206
0
    return ret;
207
0
  }
208
209
  /*
210
   * The allrecord lock holder died. We need to reset the allrecord_lock
211
   * to F_UNLCK. This should also be the indication for
212
   * tdb_needs_recovery.
213
   */
214
0
  m->allrecord_lock = F_UNLCK;
215
216
0
  return pthread_mutex_consistent(&m->allrecord_mutex);
217
0
}
218
219
bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
220
        bool waitflag, int *pret)
221
0
{
222
0
  struct tdb_mutexes *m = tdb->mutexes;
223
0
  pthread_mutex_t *chain;
224
0
  int ret;
225
0
  unsigned idx;
226
0
  bool allrecord_ok;
227
228
0
  if (!tdb_mutex_index(tdb, off, len, &idx)) {
229
0
    return false;
230
0
  }
231
0
  chain = &m->hashchains[idx];
232
233
0
again:
234
0
  ret = chain_mutex_lock(chain, waitflag);
235
0
  if (ret == EBUSY) {
236
0
    ret = EAGAIN;
237
0
  }
238
0
  if (ret != 0) {
239
0
    errno = ret;
240
0
    goto fail;
241
0
  }
242
243
0
  if (idx == 0) {
244
    /*
245
     * This is a freelist lock, which is independent to
246
     * the allrecord lock. So we're done once we got the
247
     * freelist mutex.
248
     */
249
0
    *pret = 0;
250
0
    return true;
251
0
  }
252
253
0
  if (tdb_have_mutex_chainlocks(tdb)) {
254
    /*
255
     * We can only check the allrecord lock once. If we do it with
256
     * one chain mutex locked, we will deadlock with the allrecord
257
     * locker process in the following way: We lock the first hash
258
     * chain, we check for the allrecord lock. We keep the hash
259
     * chain locked. Then the allrecord locker locks the
260
     * allrecord_mutex. It walks the list of chain mutexes,
261
     * locking them all in sequence. Meanwhile, we have the chain
262
     * mutex locked, so the allrecord locker blocks trying to lock
263
     * our chain mutex. Then we come in and try to lock the second
264
     * chain lock, which in most cases will be the freelist. We
265
     * see that the allrecord lock is locked and put ourselves on
266
     * the allrecord_mutex. This will never be signalled though
267
     * because the allrecord locker waits for us to give up the
268
     * chain lock.
269
     */
270
271
0
    *pret = 0;
272
0
    return true;
273
0
  }
274
275
  /*
276
   * Check if someone is has the allrecord lock: queue if so.
277
   */
278
279
0
  allrecord_ok = false;
280
281
0
  if (m->allrecord_lock == F_UNLCK) {
282
    /*
283
     * allrecord lock not taken
284
     */
285
0
    allrecord_ok = true;
286
0
  }
287
288
0
  if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
289
    /*
290
     * allrecord shared lock taken, but we only want to read
291
     */
292
0
    allrecord_ok = true;
293
0
  }
294
295
0
  if (allrecord_ok) {
296
0
    *pret = 0;
297
0
    return true;
298
0
  }
299
300
0
  ret = pthread_mutex_unlock(chain);
301
0
  if (ret != 0) {
302
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
303
0
       "(chain_mutex) failed: %s\n", strerror(ret)));
304
0
    errno = ret;
305
0
    goto fail;
306
0
  }
307
0
  ret = allrecord_mutex_lock(m, waitflag);
308
0
  if (ret == EBUSY) {
309
0
    ret = EAGAIN;
310
0
  }
311
0
  if (ret != 0) {
312
0
    if (waitflag || (ret != EAGAIN)) {
313
0
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
314
0
         "(allrecord_mutex) failed: %s\n",
315
0
         waitflag ? "" : "try_",  strerror(ret)));
316
0
    }
317
0
    errno = ret;
318
0
    goto fail;
319
0
  }
320
0
  ret = pthread_mutex_unlock(&m->allrecord_mutex);
321
0
  if (ret != 0) {
322
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
323
0
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
324
0
    errno = ret;
325
0
    goto fail;
326
0
  }
327
0
  goto again;
328
329
0
fail:
330
0
  *pret = -1;
331
0
  return true;
332
0
}
333
334
bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
335
          int *pret)
336
0
{
337
0
  struct tdb_mutexes *m = tdb->mutexes;
338
0
  pthread_mutex_t *chain;
339
0
  int ret;
340
0
  unsigned idx;
341
342
0
  if (!tdb_mutex_index(tdb, off, len, &idx)) {
343
0
    return false;
344
0
  }
345
0
  chain = &m->hashchains[idx];
346
347
0
  ret = pthread_mutex_unlock(chain);
348
0
  if (ret == 0) {
349
0
    *pret = 0;
350
0
    return true;
351
0
  }
352
0
  errno = ret;
353
0
  *pret = -1;
354
0
  return true;
355
0
}
356
357
int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
358
           enum tdb_lock_flags flags)
359
0
{
360
0
  struct tdb_mutexes *m = tdb->mutexes;
361
0
  int ret;
362
0
  uint32_t i;
363
0
  bool waitflag = (flags & TDB_LOCK_WAIT);
364
0
  int saved_errno;
365
366
0
  if (tdb->flags & TDB_NOLOCK) {
367
0
    return 0;
368
0
  }
369
370
0
  if (flags & TDB_LOCK_MARK_ONLY) {
371
0
    return 0;
372
0
  }
373
374
0
  ret = allrecord_mutex_lock(m, waitflag);
375
0
  if (!waitflag && (ret == EBUSY)) {
376
0
    errno = EAGAIN;
377
0
    tdb->ecode = TDB_ERR_LOCK;
378
0
    return -1;
379
0
  }
380
0
  if (ret != 0) {
381
0
    if (!(flags & TDB_LOCK_PROBE)) {
382
0
      TDB_LOG((tdb, TDB_DEBUG_TRACE,
383
0
         "allrecord_mutex_lock() failed: %s\n",
384
0
         strerror(ret)));
385
0
    }
386
0
    tdb->ecode = TDB_ERR_LOCK;
387
0
    return -1;
388
0
  }
389
390
0
  if (m->allrecord_lock != F_UNLCK) {
391
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
392
0
       (int)m->allrecord_lock));
393
0
    goto fail_unlock_allrecord_mutex;
394
0
  }
395
0
  m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
396
397
0
  for (i=0; i<tdb->hash_size; i++) {
398
399
    /* ignore hashchains[0], the freelist */
400
0
    pthread_mutex_t *chain = &m->hashchains[i+1];
401
402
0
    ret = chain_mutex_lock(chain, waitflag);
403
0
    if (!waitflag && (ret == EBUSY)) {
404
0
      errno = EAGAIN;
405
0
      goto fail_unroll_allrecord_lock;
406
0
    }
407
0
    if (ret != 0) {
408
0
      if (!(flags & TDB_LOCK_PROBE)) {
409
0
        TDB_LOG((tdb, TDB_DEBUG_TRACE,
410
0
           "chain_mutex_lock() failed: %s\n",
411
0
           strerror(ret)));
412
0
      }
413
0
      errno = ret;
414
0
      goto fail_unroll_allrecord_lock;
415
0
    }
416
417
0
    ret = pthread_mutex_unlock(chain);
418
0
    if (ret != 0) {
419
0
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
420
0
         "(chainlock) failed: %s\n", strerror(ret)));
421
0
      errno = ret;
422
0
      goto fail_unroll_allrecord_lock;
423
0
    }
424
0
  }
425
  /*
426
   * We leave this routine with m->allrecord_mutex locked
427
   */
428
0
  return 0;
429
430
0
fail_unroll_allrecord_lock:
431
0
  m->allrecord_lock = F_UNLCK;
432
433
0
fail_unlock_allrecord_mutex:
434
0
  saved_errno = errno;
435
0
  ret = pthread_mutex_unlock(&m->allrecord_mutex);
436
0
  if (ret != 0) {
437
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
438
0
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
439
0
  }
440
0
  errno = saved_errno;
441
0
  tdb->ecode = TDB_ERR_LOCK;
442
0
  return -1;
443
0
}
444
445
int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
446
0
{
447
0
  struct tdb_mutexes *m = tdb->mutexes;
448
0
  int ret;
449
0
  uint32_t i;
450
451
0
  if (tdb->flags & TDB_NOLOCK) {
452
0
    return 0;
453
0
  }
454
455
  /*
456
   * Our only caller tdb_allrecord_upgrade()
457
   * guarantees that we already own the allrecord lock.
458
   *
459
   * Which means m->allrecord_mutex is still locked by us.
460
   */
461
462
0
  if (m->allrecord_lock != F_RDLCK) {
463
0
    tdb->ecode = TDB_ERR_LOCK;
464
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
465
0
       (int)m->allrecord_lock));
466
0
    return -1;
467
0
  }
468
469
0
  m->allrecord_lock = F_WRLCK;
470
471
0
  for (i=0; i<tdb->hash_size; i++) {
472
473
    /* ignore hashchains[0], the freelist */
474
0
    pthread_mutex_t *chain = &m->hashchains[i+1];
475
476
0
    ret = chain_mutex_lock(chain, true);
477
0
    if (ret != 0) {
478
0
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
479
0
         "(chainlock) failed: %s\n", strerror(ret)));
480
0
      goto fail_unroll_allrecord_lock;
481
0
    }
482
483
0
    ret = pthread_mutex_unlock(chain);
484
0
    if (ret != 0) {
485
0
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
486
0
         "(chainlock) failed: %s\n", strerror(ret)));
487
0
      goto fail_unroll_allrecord_lock;
488
0
    }
489
0
  }
490
491
0
  return 0;
492
493
0
fail_unroll_allrecord_lock:
494
0
  m->allrecord_lock = F_RDLCK;
495
0
  tdb->ecode = TDB_ERR_LOCK;
496
0
  return -1;
497
0
}
498
499
void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
500
0
{
501
0
  struct tdb_mutexes *m = tdb->mutexes;
502
503
  /*
504
   * Our only caller tdb_allrecord_upgrade() (in the error case)
505
   * guarantees that we already own the allrecord lock.
506
   *
507
   * Which means m->allrecord_mutex is still locked by us.
508
   */
509
510
0
  if (m->allrecord_lock != F_WRLCK) {
511
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
512
0
       (int)m->allrecord_lock));
513
0
    return;
514
0
  }
515
516
0
  m->allrecord_lock = F_RDLCK;
517
0
  return;
518
0
}
519
520
521
int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
522
0
{
523
0
  struct tdb_mutexes *m = tdb->mutexes;
524
0
  short old;
525
0
  int ret;
526
527
0
  if (tdb->flags & TDB_NOLOCK) {
528
0
    return 0;
529
0
  }
530
531
  /*
532
   * Our only callers tdb_allrecord_unlock() and
533
   * tdb_allrecord_lock() (in the error path)
534
   * guarantee that we already own the allrecord lock.
535
   *
536
   * Which means m->allrecord_mutex is still locked by us.
537
   */
538
539
0
  if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
540
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
541
0
       (int)m->allrecord_lock));
542
0
    return -1;
543
0
  }
544
545
0
  old = m->allrecord_lock;
546
0
  m->allrecord_lock = F_UNLCK;
547
548
0
  ret = pthread_mutex_unlock(&m->allrecord_mutex);
549
0
  if (ret != 0) {
550
0
    m->allrecord_lock = old;
551
0
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
552
0
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
553
0
    return -1;
554
0
  }
555
0
  return 0;
556
0
}
557
558
int tdb_mutex_init(struct tdb_context *tdb)
559
0
{
560
0
  struct tdb_mutexes *m;
561
0
  pthread_mutexattr_t ma;
562
0
  uint32_t i;
563
0
  int ret;
564
565
0
  ret = tdb_mutex_mmap(tdb);
566
0
  if (ret == -1) {
567
0
    return -1;
568
0
  }
569
0
  m = tdb->mutexes;
570
571
0
  ret = pthread_mutexattr_init(&ma);
572
0
  if (ret != 0) {
573
0
    goto fail_munmap;
574
0
  }
575
0
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
576
0
  if (ret != 0) {
577
0
    goto fail;
578
0
  }
579
0
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
580
0
  if (ret != 0) {
581
0
    goto fail;
582
0
  }
583
0
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
584
0
  if (ret != 0) {
585
0
    goto fail;
586
0
  }
587
588
0
  for (i=0; i<tdb->hash_size+1; i++) {
589
0
    pthread_mutex_t *chain = &m->hashchains[i];
590
591
0
    ret = pthread_mutex_init(chain, &ma);
592
0
    if (ret != 0) {
593
0
      goto fail;
594
0
    }
595
0
  }
596
597
0
  m->allrecord_lock = F_UNLCK;
598
599
0
  ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
600
0
  if (ret != 0) {
601
0
    goto fail;
602
0
  }
603
0
  ret = 0;
604
0
fail:
605
0
  pthread_mutexattr_destroy(&ma);
606
0
fail_munmap:
607
608
0
  if (ret == 0) {
609
0
    return 0;
610
0
  }
611
612
0
  tdb_mutex_munmap(tdb);
613
614
0
  errno = ret;
615
0
  return -1;
616
0
}
617
618
int tdb_mutex_mmap(struct tdb_context *tdb)
619
0
{
620
0
  size_t len;
621
0
  void *ptr;
622
623
0
  len = tdb_mutex_size(tdb);
624
0
  if (len == 0) {
625
0
    return 0;
626
0
  }
627
628
0
  if (tdb->mutexes != NULL) {
629
0
    return 0;
630
0
  }
631
632
0
  ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
633
0
       tdb->fd, 0);
634
0
  if (ptr == MAP_FAILED) {
635
0
    return -1;
636
0
  }
637
0
  tdb->mutexes = (struct tdb_mutexes *)ptr;
638
639
0
  return 0;
640
0
}
641
642
int tdb_mutex_munmap(struct tdb_context *tdb)
643
0
{
644
0
  size_t len;
645
0
  int ret;
646
647
0
  len = tdb_mutex_size(tdb);
648
0
  if (len == 0) {
649
0
    return 0;
650
0
  }
651
652
0
  ret = munmap(tdb->mutexes, len);
653
0
  if (ret == -1) {
654
0
    return -1;
655
0
  }
656
0
  tdb->mutexes = NULL;
657
658
0
  return 0;
659
0
}
660
661
static bool tdb_mutex_locking_cached;
662
663
static bool tdb_mutex_locking_supported(void)
664
0
{
665
0
  pthread_mutexattr_t ma;
666
0
  pthread_mutex_t m;
667
0
  int ret;
668
0
  static bool initialized;
669
670
0
  if (initialized) {
671
0
    return tdb_mutex_locking_cached;
672
0
  }
673
674
0
  initialized = true;
675
676
0
  ret = pthread_mutexattr_init(&ma);
677
0
  if (ret != 0) {
678
0
    return false;
679
0
  }
680
0
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
681
0
  if (ret != 0) {
682
0
    goto cleanup_ma;
683
0
  }
684
0
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
685
0
  if (ret != 0) {
686
0
    goto cleanup_ma;
687
0
  }
688
0
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
689
0
  if (ret != 0) {
690
0
    goto cleanup_ma;
691
0
  }
692
0
  ret = pthread_mutex_init(&m, &ma);
693
0
  if (ret != 0) {
694
0
    goto cleanup_ma;
695
0
  }
696
0
  ret = pthread_mutex_lock(&m);
697
0
  if (ret != 0) {
698
0
    goto cleanup_m;
699
0
  }
700
  /*
701
   * This makes sure we have real mutexes
702
   * from a threading library instead of just
703
   * stubs from libc.
704
   */
705
0
  ret = pthread_mutex_lock(&m);
706
0
  if (ret != EDEADLK) {
707
0
    goto cleanup_lock;
708
0
  }
709
0
  ret = pthread_mutex_unlock(&m);
710
0
  if (ret != 0) {
711
0
    goto cleanup_m;
712
0
  }
713
714
0
  tdb_mutex_locking_cached = true;
715
0
  goto cleanup_m;
716
717
0
cleanup_lock:
718
0
  pthread_mutex_unlock(&m);
719
0
cleanup_m:
720
0
  pthread_mutex_destroy(&m);
721
0
cleanup_ma:
722
0
  pthread_mutexattr_destroy(&ma);
723
0
  return tdb_mutex_locking_cached;
724
0
}
725
726
static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
727
static pid_t tdb_robust_mutex_pid = -1;
728
729
static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
730
      void (**p_old_handler)(int))
731
0
{
732
0
#ifdef HAVE_SIGACTION
733
0
  struct sigaction act;
734
0
  struct sigaction oldact;
735
736
0
  memset(&act, '\0', sizeof(act));
737
738
0
  act.sa_handler = handler;
739
0
#ifdef SA_RESTART
740
0
  act.sa_flags = SA_RESTART;
741
0
#endif
742
0
  sigemptyset(&act.sa_mask);
743
0
  sigaddset(&act.sa_mask, SIGCHLD);
744
0
  sigaction(SIGCHLD, &act, &oldact);
745
0
  if (p_old_handler) {
746
0
    *p_old_handler = oldact.sa_handler;
747
0
  }
748
0
  return true;
749
#else /* !HAVE_SIGACTION */
750
  return false;
751
#endif
752
0
}
753
754
static void tdb_robust_mutex_handler(int sig)
755
0
{
756
0
  pid_t child_pid = tdb_robust_mutex_pid;
757
758
0
  if (child_pid != -1) {
759
0
    pid_t pid;
760
761
0
    pid = waitpid(child_pid, NULL, WNOHANG);
762
0
    if (pid == -1) {
763
0
      switch (errno) {
764
0
      case ECHILD:
765
0
        tdb_robust_mutex_pid = -1;
766
0
        return;
767
768
0
      default:
769
0
        return;
770
0
      }
771
0
    }
772
0
    if (pid == child_pid) {
773
0
      tdb_robust_mutex_pid = -1;
774
0
      return;
775
0
    }
776
0
  }
777
778
0
  if (tdb_robust_mutext_old_handler == SIG_DFL) {
779
0
    return;
780
0
  }
781
0
  if (tdb_robust_mutext_old_handler == SIG_IGN) {
782
0
    return;
783
0
  }
784
0
  if (tdb_robust_mutext_old_handler == SIG_ERR) {
785
0
    return;
786
0
  }
787
788
0
  tdb_robust_mutext_old_handler(sig);
789
0
}
790
791
static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
792
0
{
793
0
  int options = WNOHANG;
794
795
0
  if (*child_pid == -1) {
796
0
    return;
797
0
  }
798
799
0
  while (tdb_robust_mutex_pid > 0) {
800
0
    pid_t pid;
801
802
    /*
803
     * First we try with WNOHANG, as the process might not exist
804
     * anymore. Once we've sent SIGKILL we block waiting for the
805
     * exit.
806
     */
807
0
    pid = waitpid(*child_pid, NULL, options);
808
0
    if (pid == -1) {
809
0
      if (errno == EINTR) {
810
0
        continue;
811
0
      } else if (errno == ECHILD) {
812
0
        break;
813
0
      } else {
814
0
        abort();
815
0
      }
816
0
    }
817
0
    if (pid == *child_pid) {
818
0
      break;
819
0
    }
820
821
0
    kill(*child_pid, SIGKILL);
822
0
    options = 0;
823
0
  }
824
825
0
  tdb_robust_mutex_pid = -1;
826
0
  *child_pid = -1;
827
0
}
828
829
_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
830
0
{
831
0
  void *ptr = NULL;
832
0
  pthread_mutex_t *m = NULL;
833
0
  pthread_mutexattr_t ma;
834
0
  int ret = 1;
835
0
  int pipe_down[2] = { -1, -1 };
836
0
  int pipe_up[2] = { -1, -1 };
837
0
  ssize_t nread;
838
0
  char c = 0;
839
0
  bool ok;
840
0
  static bool initialized;
841
0
  pid_t saved_child_pid = -1;
842
0
  bool cleanup_ma = false;
843
844
0
  if (initialized) {
845
0
    return tdb_mutex_locking_cached;
846
0
  }
847
848
0
  initialized = true;
849
850
0
  ok = tdb_mutex_locking_supported();
851
0
  if (!ok) {
852
0
    return false;
853
0
  }
854
855
0
  tdb_mutex_locking_cached = false;
856
857
0
  ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
858
0
       MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
859
0
  if (ptr == MAP_FAILED) {
860
0
    return false;
861
0
  }
862
863
0
  ret = pipe(pipe_down);
864
0
  if (ret != 0) {
865
0
    goto cleanup;
866
0
  }
867
0
  ret = pipe(pipe_up);
868
0
  if (ret != 0) {
869
0
    goto cleanup;
870
0
  }
871
872
0
  ret = pthread_mutexattr_init(&ma);
873
0
  if (ret != 0) {
874
0
    goto cleanup;
875
0
  }
876
0
  cleanup_ma = true;
877
0
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
878
0
  if (ret != 0) {
879
0
    goto cleanup;
880
0
  }
881
0
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
882
0
  if (ret != 0) {
883
0
    goto cleanup;
884
0
  }
885
0
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
886
0
  if (ret != 0) {
887
0
    goto cleanup;
888
0
  }
889
0
  ret = pthread_mutex_init(ptr, &ma);
890
0
  if (ret != 0) {
891
0
    goto cleanup;
892
0
  }
893
0
  m = (pthread_mutex_t *)ptr;
894
895
0
  if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
896
0
      &tdb_robust_mutext_old_handler) == false) {
897
0
    goto cleanup;
898
0
  }
899
900
0
  tdb_robust_mutex_pid = fork();
901
0
  saved_child_pid = tdb_robust_mutex_pid;
902
0
  if (tdb_robust_mutex_pid == 0) {
903
0
    size_t nwritten;
904
0
    close(pipe_down[1]);
905
0
    close(pipe_up[0]);
906
0
    ret = pthread_mutex_lock(m);
907
0
    nwritten = write(pipe_up[1], &ret, sizeof(ret));
908
0
    if (nwritten != sizeof(ret)) {
909
0
      _exit(1);
910
0
    }
911
0
    if (ret != 0) {
912
0
      _exit(1);
913
0
    }
914
0
    nread = read(pipe_down[0], &c, 1);
915
0
    if (nread != 1) {
916
0
      _exit(1);
917
0
    }
918
    /* leave locked */
919
0
    _exit(0);
920
0
  }
921
0
  if (tdb_robust_mutex_pid == -1) {
922
0
    goto cleanup;
923
0
  }
924
0
  close(pipe_down[0]);
925
0
  pipe_down[0] = -1;
926
0
  close(pipe_up[1]);
927
0
  pipe_up[1] = -1;
928
929
0
  nread = read(pipe_up[0], &ret, sizeof(ret));
930
0
  if (nread != sizeof(ret)) {
931
0
    goto cleanup;
932
0
  }
933
934
0
  ret = pthread_mutex_trylock(m);
935
0
  if (ret != EBUSY) {
936
0
    if (ret == 0) {
937
0
      pthread_mutex_unlock(m);
938
0
    }
939
0
    goto cleanup;
940
0
  }
941
942
0
  if (write(pipe_down[1], &c, 1) != 1) {
943
0
    goto cleanup;
944
0
  }
945
946
0
  nread = read(pipe_up[0], &c, 1);
947
0
  if (nread != 0) {
948
0
    goto cleanup;
949
0
  }
950
951
0
  tdb_robust_mutex_wait_for_child(&saved_child_pid);
952
953
0
  ret = pthread_mutex_trylock(m);
954
0
  if (ret != EOWNERDEAD) {
955
0
    if (ret == 0) {
956
0
      pthread_mutex_unlock(m);
957
0
    }
958
0
    goto cleanup;
959
0
  }
960
961
0
  ret = pthread_mutex_consistent(m);
962
0
  if (ret != 0) {
963
0
    goto cleanup;
964
0
  }
965
966
0
  ret = pthread_mutex_trylock(m);
967
0
  if (ret != EDEADLK && ret != EBUSY) {
968
0
    pthread_mutex_unlock(m);
969
0
    goto cleanup;
970
0
  }
971
972
0
  ret = pthread_mutex_unlock(m);
973
0
  if (ret != 0) {
974
0
    goto cleanup;
975
0
  }
976
977
0
  tdb_mutex_locking_cached = true;
978
979
0
cleanup:
980
  /*
981
   * Note that we don't reset the signal handler we just reset
982
   * tdb_robust_mutex_pid to -1. This is ok as this code path is only
983
   * called once per process.
984
   *
985
   * Leaving our signal handler avoids races with other threads potentially
986
   * setting up their SIGCHLD handlers.
987
   *
988
   * The worst thing that can happen is that the other newer signal
989
   * handler will get the SIGCHLD signal for our child and/or reap the
990
   * child with a wait() function. tdb_robust_mutex_wait_for_child()
991
   * handles the case where waitpid returns ECHILD.
992
   */
993
0
  tdb_robust_mutex_wait_for_child(&saved_child_pid);
994
995
0
  if (m != NULL) {
996
0
    pthread_mutex_destroy(m);
997
0
  }
998
0
  if (cleanup_ma) {
999
0
    pthread_mutexattr_destroy(&ma);
1000
0
  }
1001
0
  if (pipe_down[0] != -1) {
1002
0
    close(pipe_down[0]);
1003
0
  }
1004
0
  if (pipe_down[1] != -1) {
1005
0
    close(pipe_down[1]);
1006
0
  }
1007
0
  if (pipe_up[0] != -1) {
1008
0
    close(pipe_up[0]);
1009
0
  }
1010
0
  if (pipe_up[1] != -1) {
1011
0
    close(pipe_up[1]);
1012
0
  }
1013
0
  if (ptr != NULL) {
1014
0
    munmap(ptr, sizeof(pthread_mutex_t));
1015
0
  }
1016
1017
0
  return tdb_mutex_locking_cached;
1018
0
}
1019
1020
#else
1021
1022
size_t tdb_mutex_size(struct tdb_context *tdb)
1023
{
1024
  return 0;
1025
}
1026
1027
bool tdb_have_mutexes(struct tdb_context *tdb)
1028
{
1029
  return false;
1030
}
1031
1032
int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
1033
           enum tdb_lock_flags flags)
1034
{
1035
  tdb->ecode = TDB_ERR_LOCK;
1036
  return -1;
1037
}
1038
1039
int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
1040
{
1041
  return -1;
1042
}
1043
1044
int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
1045
{
1046
  tdb->ecode = TDB_ERR_LOCK;
1047
  return -1;
1048
}
1049
1050
void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
1051
{
1052
  return;
1053
}
1054
1055
int tdb_mutex_mmap(struct tdb_context *tdb)
1056
{
1057
  errno = ENOSYS;
1058
  return -1;
1059
}
1060
1061
int tdb_mutex_munmap(struct tdb_context *tdb)
1062
{
1063
  errno = ENOSYS;
1064
  return -1;
1065
}
1066
1067
int tdb_mutex_init(struct tdb_context *tdb)
1068
{
1069
  errno = ENOSYS;
1070
  return -1;
1071
}
1072
1073
_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
1074
{
1075
  return false;
1076
}
1077
1078
#endif