Coverage Report

Created: 2025-12-31 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/samba/lib/tdb/common/lock.c
Line
Count
Source
1
 /*
2
   Unix SMB/CIFS implementation.
3
4
   trivial database library
5
6
   Copyright (C) Andrew Tridgell              1999-2005
7
   Copyright (C) Paul `Rusty' Russell      2000
8
   Copyright (C) Jeremy Allison        2000-2003
9
10
     ** NOTE! The following LGPL license applies to the tdb
11
     ** library. This does NOT imply that all of Samba is released
12
     ** under the LGPL
13
14
   This library is free software; you can redistribute it and/or
15
   modify it under the terms of the GNU Lesser General Public
16
   License as published by the Free Software Foundation; either
17
   version 3 of the License, or (at your option) any later version.
18
19
   This library is distributed in the hope that it will be useful,
20
   but WITHOUT ANY WARRANTY; without even the implied warranty of
21
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22
   Lesser General Public License for more details.
23
24
   You should have received a copy of the GNU Lesser General Public
25
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
26
*/
27
28
#include "tdb_private.h"
29
30
_PUBLIC_ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr)
31
0
{
32
0
  tdb->interrupt_sig_ptr = ptr;
33
0
}
34
35
static int fcntl_lock(struct tdb_context *tdb,
36
          int rw, off_t off, off_t len, bool waitflag)
37
0
{
38
0
  struct flock fl;
39
0
  int cmd;
40
41
0
#ifdef USE_TDB_MUTEX_LOCKING
42
0
  {
43
0
    int ret;
44
0
    if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) {
45
0
      return ret;
46
0
    }
47
0
  }
48
0
#endif
49
50
0
  fl.l_type = rw;
51
0
  fl.l_whence = SEEK_SET;
52
0
  fl.l_start = off;
53
0
  fl.l_len = len;
54
0
  fl.l_pid = 0;
55
56
0
  cmd = waitflag ? F_SETLKW : F_SETLK;
57
58
0
  return fcntl(tdb->fd, cmd, &fl);
59
0
}
60
61
static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
62
0
{
63
0
  struct flock fl;
64
#if 0 /* Check they matched up locks and unlocks correctly. */
65
  char line[80];
66
  FILE *locks;
67
  bool found = false;
68
69
  locks = fopen("/proc/locks", "r");
70
71
  while (fgets(line, 80, locks)) {
72
    char *p;
73
    int type, start, l;
74
75
    /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
76
    p = strchr(line, ':') + 1;
77
    if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
78
      continue;
79
    p += strlen(" FLOCK  ADVISORY  ");
80
    if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
81
      type = F_RDLCK;
82
    else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
83
      type = F_WRLCK;
84
    else
85
      abort();
86
    p += 6;
87
    if (atoi(p) != getpid())
88
      continue;
89
    p = strchr(strchr(p, ' ') + 1, ' ') + 1;
90
    start = atoi(p);
91
    p = strchr(p, ' ') + 1;
92
    if (strncmp(p, "EOF", 3) == 0)
93
      l = 0;
94
    else
95
      l = atoi(p) - start + 1;
96
97
    if (off == start) {
98
      if (len != l) {
99
        fprintf(stderr, "Len %u should be %u: %s",
100
          (int)len, l, line);
101
        abort();
102
      }
103
      if (type != rw) {
104
        fprintf(stderr, "Type %s wrong: %s",
105
          rw == F_RDLCK ? "READ" : "WRITE", line);
106
        abort();
107
      }
108
      found = true;
109
      break;
110
    }
111
  }
112
113
  if (!found) {
114
    fprintf(stderr, "Unlock on %u@%u not found!\n",
115
      (int)off, (int)len);
116
    abort();
117
  }
118
119
  fclose(locks);
120
#endif
121
122
0
#ifdef USE_TDB_MUTEX_LOCKING
123
0
  {
124
0
    int ret;
125
0
    if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) {
126
0
      return ret;
127
0
    }
128
0
  }
129
0
#endif
130
131
0
  fl.l_type = F_UNLCK;
132
0
  fl.l_whence = SEEK_SET;
133
0
  fl.l_start = off;
134
0
  fl.l_len = len;
135
0
  fl.l_pid = 0;
136
137
0
  return fcntl(tdb->fd, F_SETLKW, &fl);
138
0
}
139
140
/*
141
 * Calculate the lock offset for a list
142
 *
143
 * list -1 is the freelist, otherwise a hash chain.
144
 *
145
 * Note that we consistently (but without real reason) lock hash chains at an
146
 * offset that is 4 bytes below the real offset of the corresponding list head
147
 * in the db.
148
 *
149
 * This is the memory layout of the hashchain array:
150
 *
151
 * FREELIST_TOP + 0 = freelist
152
 * FREELIST_TOP + 4 = hashtable list 0
153
 * FREELIST_TOP + 8 = hashtable list 1
154
 * ...
155
 *
156
 * Otoh lock_offset computes:
157
 *
158
 * freelist = FREELIST_TOP - 4
159
 * list 0   = FREELIST_TOP + 0
160
 * list 1   = FREELIST_TOP + 4
161
 * ...
162
 *
163
 * Unfortunately we can't change this calculation in order to align the locking
164
 * offset with the memory layout, as that would make the locking incompatible
165
 * between different tdb versions.
166
 */
167
static tdb_off_t lock_offset(int list)
168
0
{
169
0
  return FREELIST_TOP + 4*list;
170
0
}
171
172
/* a byte range locking function - return 0 on success
173
   this functions locks/unlocks "len" byte at the specified offset.
174
175
   On error, errno is also set so that errors are passed back properly
176
   through tdb_open().
177
178
   note that a len of zero means lock to end of file
179
*/
180
int tdb_brlock(struct tdb_context *tdb,
181
         int rw_type, tdb_off_t offset, size_t len,
182
         enum tdb_lock_flags flags)
183
0
{
184
0
  int ret;
185
186
0
  if (tdb->flags & TDB_NOLOCK) {
187
0
    return 0;
188
0
  }
189
190
0
  if (flags & TDB_LOCK_MARK_ONLY) {
191
0
    return 0;
192
0
  }
193
194
0
  if ((rw_type == F_WRLCK) && (tdb->read_only || tdb->traverse_read)) {
195
0
    tdb->ecode = TDB_ERR_RDONLY;
196
0
    return -1;
197
0
  }
198
199
0
  do {
200
0
    ret = fcntl_lock(tdb, rw_type, offset, len,
201
0
         flags & TDB_LOCK_WAIT);
202
    /* Check for a sigalarm break. */
203
0
    if (ret == -1 && errno == EINTR &&
204
0
        tdb->interrupt_sig_ptr &&
205
0
        *tdb->interrupt_sig_ptr) {
206
0
      break;
207
0
    }
208
0
  } while (ret == -1 && errno == EINTR);
209
210
0
  if (ret == -1) {
211
0
    tdb->ecode = TDB_ERR_LOCK;
212
    /* Generic lock error. errno set by fcntl.
213
     * EAGAIN is an expected return from non-blocking
214
     * locks. */
215
0
    if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
216
0
      TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %u rw_type=%d flags=%d len=%zu\n",
217
0
         tdb->fd, offset, rw_type, flags, len));
218
0
    }
219
0
    return -1;
220
0
  }
221
0
  return 0;
222
0
}
223
224
int tdb_brunlock(struct tdb_context *tdb,
225
     int rw_type, tdb_off_t offset, size_t len)
226
0
{
227
0
  int ret;
228
229
0
  if (tdb->flags & TDB_NOLOCK) {
230
0
    return 0;
231
0
  }
232
233
0
  do {
234
0
    ret = fcntl_unlock(tdb, rw_type, offset, len);
235
0
  } while (ret == -1 && errno == EINTR);
236
237
0
  if (ret == -1) {
238
0
    TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %u rw_type=%u len=%zu\n",
239
0
       tdb->fd, offset, rw_type, len));
240
0
  }
241
0
  return ret;
242
0
}
243
244
/*
245
 * Do a tdb_brlock in a loop. Some OSes (such as solaris) have too
246
 * conservative deadlock detection and claim a deadlock when progress can be
247
 * made. For those OSes we may loop for a while.
248
 */
249
250
static int tdb_brlock_retry(struct tdb_context *tdb,
251
          int rw_type, tdb_off_t offset, size_t len,
252
          enum tdb_lock_flags flags)
253
0
{
254
0
  int count = 1000;
255
256
0
  while (count--) {
257
0
    struct timeval tv;
258
0
    int ret;
259
260
0
    ret = tdb_brlock(tdb, rw_type, offset, len, flags);
261
0
    if (ret == 0) {
262
0
      return 0;
263
0
    }
264
0
    if (errno != EDEADLK) {
265
0
      break;
266
0
    }
267
    /* sleep for as short a time as we can - more portable than usleep() */
268
0
    tv.tv_sec = 0;
269
0
    tv.tv_usec = 1;
270
0
    select(0, NULL, NULL, NULL, &tv);
271
0
  }
272
0
  return -1;
273
0
}
274
275
/*
276
  upgrade a read lock to a write lock.
277
*/
278
int tdb_allrecord_upgrade(struct tdb_context *tdb)
279
0
{
280
0
  int ret;
281
282
0
  if (tdb->allrecord_lock.count != 1) {
283
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR,
284
0
       "tdb_allrecord_upgrade failed: count %u too high\n",
285
0
       tdb->allrecord_lock.count));
286
0
    tdb->ecode = TDB_ERR_LOCK;
287
0
    return -1;
288
0
  }
289
290
0
  if (tdb->allrecord_lock.off != 1) {
291
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR,
292
0
       "tdb_allrecord_upgrade failed: already upgraded?\n"));
293
0
    tdb->ecode = TDB_ERR_LOCK;
294
0
    return -1;
295
0
  }
296
297
0
  if (tdb_have_mutexes(tdb)) {
298
0
    ret = tdb_mutex_allrecord_upgrade(tdb);
299
0
    if (ret == -1) {
300
0
      goto fail;
301
0
    }
302
0
    ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size),
303
0
               0, TDB_LOCK_WAIT|TDB_LOCK_PROBE);
304
0
    if (ret == -1) {
305
0
      tdb_mutex_allrecord_downgrade(tdb);
306
0
    }
307
0
  } else {
308
0
    ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0,
309
0
               TDB_LOCK_WAIT|TDB_LOCK_PROBE);
310
0
  }
311
312
0
  if (ret == 0) {
313
0
    tdb->allrecord_lock.ltype = F_WRLCK;
314
0
    tdb->allrecord_lock.off = 0;
315
0
    return 0;
316
0
  }
317
0
fail:
318
0
  TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n"));
319
0
  return -1;
320
0
}
321
322
static struct tdb_lock_type *find_nestlock(struct tdb_context *tdb,
323
             tdb_off_t offset)
324
0
{
325
0
  int i;
326
327
0
  for (i=0; i<tdb->num_lockrecs; i++) {
328
0
    if (tdb->lockrecs[i].off == offset) {
329
0
      return &tdb->lockrecs[i];
330
0
    }
331
0
  }
332
0
  return NULL;
333
0
}
334
335
/* lock an offset in the database. */
336
int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
337
      enum tdb_lock_flags flags)
338
0
{
339
0
  struct tdb_lock_type *new_lck;
340
341
0
  if (offset >= lock_offset(tdb->hash_size)) {
342
0
    tdb->ecode = TDB_ERR_LOCK;
343
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n",
344
0
       offset, ltype));
345
0
    return -1;
346
0
  }
347
0
  if (tdb->flags & TDB_NOLOCK)
348
0
    return 0;
349
350
0
  new_lck = find_nestlock(tdb, offset);
351
0
  if (new_lck) {
352
0
    if ((new_lck->ltype == F_RDLCK) && (ltype == F_WRLCK)) {
353
0
      if (!tdb_have_mutexes(tdb)) {
354
0
        int ret;
355
        /*
356
         * Upgrade the underlying fcntl
357
         * lock. Mutexes don't do readlocks,
358
         * so this only applies to fcntl
359
         * locking.
360
         */
361
0
        ret = tdb_brlock(tdb, ltype, offset, 1, flags);
362
0
        if (ret != 0) {
363
0
          return ret;
364
0
        }
365
0
      }
366
0
      new_lck->ltype = F_WRLCK;
367
0
    }
368
    /*
369
     * Just increment the in-memory struct, posix locks
370
     * don't stack.
371
     */
372
0
    new_lck->count++;
373
0
    return 0;
374
0
  }
375
376
0
  if (tdb->num_lockrecs == tdb->lockrecs_array_length) {
377
0
    new_lck = (struct tdb_lock_type *)realloc(
378
0
      tdb->lockrecs,
379
0
      sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1));
380
0
    if (new_lck == NULL) {
381
0
      errno = ENOMEM;
382
0
      return -1;
383
0
    }
384
0
    tdb->lockrecs_array_length = tdb->num_lockrecs+1;
385
0
    tdb->lockrecs = new_lck;
386
0
  }
387
388
  /* Since fcntl locks don't nest, we do a lock for the first one,
389
     and simply bump the count for future ones */
390
0
  if (tdb_brlock(tdb, ltype, offset, 1, flags)) {
391
0
    return -1;
392
0
  }
393
394
0
  new_lck = &tdb->lockrecs[tdb->num_lockrecs];
395
396
0
  new_lck->off = offset;
397
0
  new_lck->count = 1;
398
0
  new_lck->ltype = ltype;
399
0
  tdb->num_lockrecs++;
400
401
0
  return 0;
402
0
}
403
404
static int tdb_lock_and_recover(struct tdb_context *tdb)
405
0
{
406
0
  int ret;
407
408
  /* We need to match locking order in transaction commit. */
409
0
  if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT)) {
410
0
    return -1;
411
0
  }
412
413
0
  if (tdb_brlock(tdb, F_WRLCK, OPEN_LOCK, 1, TDB_LOCK_WAIT)) {
414
0
    tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
415
0
    return -1;
416
0
  }
417
418
0
  ret = tdb_transaction_recover(tdb);
419
420
0
  tdb_brunlock(tdb, F_WRLCK, OPEN_LOCK, 1);
421
0
  tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
422
423
0
  return ret;
424
0
}
425
426
static bool have_data_locks(const struct tdb_context *tdb)
427
0
{
428
0
  int i;
429
430
0
  for (i = 0; i < tdb->num_lockrecs; i++) {
431
0
    if (tdb->lockrecs[i].off >= lock_offset(-1))
432
0
      return true;
433
0
  }
434
0
  return false;
435
0
}
436
437
/*
438
 * A allrecord lock allows us to avoid per chain locks. Check if the allrecord
439
 * lock is strong enough.
440
 */
441
static int tdb_lock_covered_by_allrecord_lock(struct tdb_context *tdb,
442
                int ltype)
443
0
{
444
0
  if (ltype == F_RDLCK) {
445
    /*
446
     * The allrecord_lock is equal (F_RDLCK) or stronger
447
     * (F_WRLCK). Pass.
448
     */
449
0
    return 0;
450
0
  }
451
452
0
  if (tdb->allrecord_lock.ltype == F_RDLCK) {
453
    /*
454
     * We ask for ltype==F_WRLCK, but the allrecord_lock
455
     * is too weak. We can't upgrade here, so fail.
456
     */
457
0
    tdb->ecode = TDB_ERR_LOCK;
458
0
    return -1;
459
0
  }
460
461
  /*
462
   * Asking for F_WRLCK, allrecord is F_WRLCK as well. Pass.
463
   */
464
0
  return 0;
465
0
}
466
467
static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype,
468
       enum tdb_lock_flags waitflag)
469
0
{
470
0
  int ret;
471
0
  bool check = false;
472
473
0
  if (tdb->allrecord_lock.count) {
474
0
    return tdb_lock_covered_by_allrecord_lock(tdb, ltype);
475
0
  }
476
477
  /*
478
   * Check for recoveries: Someone might have kill -9'ed a process
479
   * during a commit.
480
   */
481
0
  check = !have_data_locks(tdb);
482
0
  ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
483
484
0
  if (ret == 0 && check && tdb_needs_recovery(tdb)) {
485
0
    tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
486
487
0
    if (tdb_lock_and_recover(tdb) == -1) {
488
0
      return -1;
489
0
    }
490
0
    return tdb_lock_list(tdb, list, ltype, waitflag);
491
0
  }
492
0
  return ret;
493
0
}
494
495
/* lock a list in the database. list -1 is the alloc list */
496
int tdb_lock(struct tdb_context *tdb, int list, int ltype)
497
0
{
498
0
  int ret;
499
500
0
  ret = tdb_lock_list(tdb, list, ltype, TDB_LOCK_WAIT);
501
0
  if (ret) {
502
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock failed on list %d "
503
0
       "ltype=%d (%s)\n",  list, ltype, strerror(errno)));
504
0
  }
505
0
  return ret;
506
0
}
507
508
/* lock a list in the database. list -1 is the alloc list. non-blocking lock */
509
_PUBLIC_ int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
510
_PUBLIC_ int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
511
0
{
512
0
  return tdb_lock_list(tdb, list, ltype, TDB_LOCK_NOWAIT);
513
0
}
514
515
516
int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
517
        bool mark_lock)
518
0
{
519
0
  int ret = -1;
520
0
  struct tdb_lock_type *lck;
521
522
0
  if (tdb->flags & TDB_NOLOCK)
523
0
    return 0;
524
525
  /* Sanity checks */
526
0
  if (offset >= lock_offset(tdb->hash_size)) {
527
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->hash_size));
528
0
    return ret;
529
0
  }
530
531
0
  lck = find_nestlock(tdb, offset);
532
0
  if ((lck == NULL) || (lck->count == 0)) {
533
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: count is 0\n"));
534
0
    return -1;
535
0
  }
536
537
0
  if (lck->count > 1) {
538
0
    lck->count--;
539
0
    return 0;
540
0
  }
541
542
  /*
543
   * This lock has count==1 left, so we need to unlock it in the
544
   * kernel. We don't bother with decrementing the in-memory array
545
   * element, we're about to overwrite it with the last array element
546
   * anyway.
547
   */
548
549
0
  if (mark_lock) {
550
0
    ret = 0;
551
0
  } else {
552
0
    ret = tdb_brunlock(tdb, ltype, offset, 1);
553
0
  }
554
555
  /*
556
   * Shrink the array by overwriting the element just unlocked with the
557
   * last array element.
558
   */
559
0
  *lck = tdb->lockrecs[--tdb->num_lockrecs];
560
561
  /*
562
   * We don't bother with realloc when the array shrinks, but if we have
563
   * a completely idle tdb we should get rid of the locked array.
564
   */
565
566
0
  if (ret)
567
0
    TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n"));
568
0
  return ret;
569
0
}
570
571
_PUBLIC_ int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
572
_PUBLIC_ int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
573
0
{
574
  /* a global lock allows us to avoid per chain locks */
575
0
  if (tdb->allrecord_lock.count) {
576
0
    return tdb_lock_covered_by_allrecord_lock(tdb, ltype);
577
0
  }
578
579
0
  return tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
580
0
}
581
582
/*
583
  get the transaction lock
584
 */
585
int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
586
       enum tdb_lock_flags lockflags)
587
0
{
588
0
  return tdb_nest_lock(tdb, TRANSACTION_LOCK, ltype, lockflags);
589
0
}
590
591
/*
592
  release the transaction lock
593
 */
594
int tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
595
0
{
596
0
  return tdb_nest_unlock(tdb, TRANSACTION_LOCK, ltype, false);
597
0
}
598
599
/* Returns 0 if all done, -1 if error, 1 if ok. */
600
static int tdb_allrecord_check(struct tdb_context *tdb, int ltype,
601
             enum tdb_lock_flags flags, bool upgradable)
602
0
{
603
  /* There are no locks on read-only dbs */
604
0
  if (tdb->read_only || tdb->traverse_read) {
605
0
    tdb->ecode = TDB_ERR_LOCK;
606
0
    return -1;
607
0
  }
608
609
0
  if (tdb->allrecord_lock.count &&
610
0
      tdb->allrecord_lock.ltype == (uint32_t)ltype) {
611
0
    tdb->allrecord_lock.count++;
612
0
    return 0;
613
0
  }
614
615
0
  if (tdb->allrecord_lock.count) {
616
    /* a global lock of a different type exists */
617
0
    tdb->ecode = TDB_ERR_LOCK;
618
0
    return -1;
619
0
  }
620
621
0
  if (tdb_have_extra_locks(tdb)) {
622
    /* can't combine global and chain locks */
623
0
    tdb->ecode = TDB_ERR_LOCK;
624
0
    return -1;
625
0
  }
626
627
0
  if (upgradable && ltype != F_RDLCK) {
628
    /* tdb error: you can't upgrade a write lock! */
629
0
    tdb->ecode = TDB_ERR_LOCK;
630
0
    return -1;
631
0
  }
632
0
  return 1;
633
0
}
634
635
/* We only need to lock individual bytes, but Linux merges consecutive locks
636
 * so we lock in contiguous ranges. */
637
static int tdb_chainlock_gradual(struct tdb_context *tdb,
638
         int ltype, enum tdb_lock_flags flags,
639
         size_t off, size_t len)
640
0
{
641
0
  int ret;
642
0
  enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
643
644
0
  if (len <= 4) {
645
    /* Single record.  Just do blocking lock. */
646
0
    return tdb_brlock(tdb, ltype, off, len, flags);
647
0
  }
648
649
  /* First we try non-blocking. */
650
0
  ret = tdb_brlock(tdb, ltype, off, len, nb_flags);
651
0
  if (ret == 0) {
652
0
    return 0;
653
0
  }
654
655
  /* Try locking first half, then second. */
656
0
  ret = tdb_chainlock_gradual(tdb, ltype, flags, off, len / 2);
657
0
  if (ret == -1)
658
0
    return -1;
659
660
0
  ret = tdb_chainlock_gradual(tdb, ltype, flags,
661
0
            off + len / 2, len - len / 2);
662
0
  if (ret == -1) {
663
0
    tdb_brunlock(tdb, ltype, off, len / 2);
664
0
    return -1;
665
0
  }
666
0
  return 0;
667
0
}
668
669
/* lock/unlock entire database.  It can only be upgradable if you have some
670
 * other way of guaranteeing exclusivity (ie. transaction write lock).
671
 * We do the locking gradually to avoid being starved by smaller locks. */
672
int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
673
           enum tdb_lock_flags flags, bool upgradable)
674
0
{
675
0
  int ret;
676
677
0
  switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) {
678
0
  case -1:
679
0
    return -1;
680
0
  case 0:
681
0
    return 0;
682
0
  }
683
684
  /* We cover two kinds of locks:
685
   * 1) Normal chain locks.  Taken for almost all operations.
686
   * 2) Individual records locks.  Taken after normal or free
687
   *    chain locks.
688
   *
689
   * It is (1) which cause the starvation problem, so we're only
690
   * gradual for that. */
691
692
0
  if (tdb_have_mutexes(tdb)) {
693
0
    ret = tdb_mutex_allrecord_lock(tdb, ltype, flags);
694
0
  } else {
695
0
    ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
696
0
              tdb->hash_size * 4);
697
0
  }
698
699
0
  if (ret == -1) {
700
0
    return -1;
701
0
  }
702
703
  /* Grab individual record locks. */
704
0
  if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0,
705
0
           flags) == -1) {
706
0
    if (tdb_have_mutexes(tdb)) {
707
0
      tdb_mutex_allrecord_unlock(tdb);
708
0
    } else {
709
0
      tdb_brunlock(tdb, ltype, FREELIST_TOP,
710
0
             tdb->hash_size * 4);
711
0
    }
712
0
    return -1;
713
0
  }
714
715
0
  tdb->allrecord_lock.count = 1;
716
  /* If it's upgradable, it's actually exclusive so we can treat
717
   * it as a write lock. */
718
0
  tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
719
0
  tdb->allrecord_lock.off = upgradable;
720
721
0
  if (tdb_needs_recovery(tdb)) {
722
0
    bool mark = flags & TDB_LOCK_MARK_ONLY;
723
0
    tdb_allrecord_unlock(tdb, ltype, mark);
724
0
    if (mark) {
725
0
      tdb->ecode = TDB_ERR_LOCK;
726
0
      TDB_LOG((tdb, TDB_DEBUG_ERROR,
727
0
         "tdb_lockall_mark cannot do recovery\n"));
728
0
      return -1;
729
0
    }
730
0
    if (tdb_lock_and_recover(tdb) == -1) {
731
0
      return -1;
732
0
    }
733
0
    return tdb_allrecord_lock(tdb, ltype, flags, upgradable);
734
0
  }
735
736
0
  return 0;
737
0
}
738
739
740
741
/* unlock entire db */
742
int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock)
743
0
{
744
  /* There are no locks on read-only dbs */
745
0
  if (tdb->read_only || tdb->traverse_read) {
746
0
    tdb->ecode = TDB_ERR_LOCK;
747
0
    return -1;
748
0
  }
749
750
0
  if (tdb->allrecord_lock.count == 0) {
751
0
    tdb->ecode = TDB_ERR_LOCK;
752
0
    return -1;
753
0
  }
754
755
  /* Upgradable locks are marked as write locks. */
756
0
  if (tdb->allrecord_lock.ltype != (uint32_t)ltype
757
0
      && (!tdb->allrecord_lock.off || ltype != F_RDLCK)) {
758
0
    tdb->ecode = TDB_ERR_LOCK;
759
0
    return -1;
760
0
  }
761
762
0
  if (tdb->allrecord_lock.count > 1) {
763
0
    tdb->allrecord_lock.count--;
764
0
    return 0;
765
0
  }
766
767
0
  if (!mark_lock) {
768
0
    int ret;
769
770
0
    if (tdb_have_mutexes(tdb)) {
771
0
      ret = tdb_mutex_allrecord_unlock(tdb);
772
0
      if (ret == 0) {
773
0
        ret = tdb_brunlock(tdb, ltype,
774
0
               lock_offset(tdb->hash_size),
775
0
               0);
776
0
      }
777
0
    } else {
778
0
      ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0);
779
0
    }
780
781
0
    if (ret != 0) {
782
0
      TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed "
783
0
         "(%s)\n", strerror(errno)));
784
0
      return -1;
785
0
    }
786
0
  }
787
788
0
  tdb->allrecord_lock.count = 0;
789
0
  tdb->allrecord_lock.ltype = 0;
790
791
0
  return 0;
792
0
}
793
794
/* lock entire database with write lock */
795
_PUBLIC_ int tdb_lockall(struct tdb_context *tdb)
796
0
{
797
0
  tdb_trace(tdb, "tdb_lockall");
798
0
  return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
799
0
}
800
801
/* lock entire database with write lock - mark only */
802
_PUBLIC_ int tdb_lockall_mark(struct tdb_context *tdb)
803
0
{
804
0
  tdb_trace(tdb, "tdb_lockall_mark");
805
0
  return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY, false);
806
0
}
807
808
/* unlock entire database with write lock - unmark only */
809
_PUBLIC_ int tdb_lockall_unmark(struct tdb_context *tdb)
810
0
{
811
0
  tdb_trace(tdb, "tdb_lockall_unmark");
812
0
  return tdb_allrecord_unlock(tdb, F_WRLCK, true);
813
0
}
814
815
/* lock entire database with write lock - nonblocking variant */
816
_PUBLIC_ int tdb_lockall_nonblock(struct tdb_context *tdb)
817
0
{
818
0
  int ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false);
819
0
  tdb_trace_ret(tdb, "tdb_lockall_nonblock", ret);
820
0
  return ret;
821
0
}
822
823
/* unlock entire database with write lock */
824
_PUBLIC_ int tdb_unlockall(struct tdb_context *tdb)
825
0
{
826
0
  tdb_trace(tdb, "tdb_unlockall");
827
0
  return tdb_allrecord_unlock(tdb, F_WRLCK, false);
828
0
}
829
830
/* lock entire database with read lock */
831
_PUBLIC_ int tdb_lockall_read(struct tdb_context *tdb)
832
0
{
833
0
  tdb_trace(tdb, "tdb_lockall_read");
834
0
  return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
835
0
}
836
837
/* lock entire database with read lock - nonblock variant */
838
_PUBLIC_ int tdb_lockall_read_nonblock(struct tdb_context *tdb)
839
0
{
840
0
  int ret = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_NOWAIT, false);
841
0
  tdb_trace_ret(tdb, "tdb_lockall_read_nonblock", ret);
842
0
  return ret;
843
0
}
844
845
/* unlock entire database with read lock */
846
_PUBLIC_ int tdb_unlockall_read(struct tdb_context *tdb)
847
0
{
848
0
  tdb_trace(tdb, "tdb_unlockall_read");
849
0
  return tdb_allrecord_unlock(tdb, F_RDLCK, false);
850
0
}
851
852
/* lock/unlock one hash chain. This is meant to be used to reduce
853
   contention - it cannot guarantee how many records will be locked */
854
_PUBLIC_ int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
855
0
{
856
0
  int ret = tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
857
0
  tdb_trace_1rec(tdb, "tdb_chainlock", key);
858
0
  return ret;
859
0
}
860
861
/* lock/unlock one hash chain, non-blocking. This is meant to be used
862
   to reduce contention - it cannot guarantee how many records will be
863
   locked */
864
_PUBLIC_ int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
865
0
{
866
0
  int ret = tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
867
0
  tdb_trace_1rec_ret(tdb, "tdb_chainlock_nonblock", key, ret);
868
0
  return ret;
869
0
}
870
871
/* mark a chain as locked without actually locking it. Warning! use with great caution! */
872
_PUBLIC_ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
873
0
{
874
0
  int ret = tdb_nest_lock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
875
0
        F_WRLCK, TDB_LOCK_MARK_ONLY);
876
0
  tdb_trace_1rec(tdb, "tdb_chainlock_mark", key);
877
0
  return ret;
878
0
}
879
880
/* unmark a chain as locked without actually locking it. Warning! use with great caution! */
881
_PUBLIC_ int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key)
882
0
{
883
0
  tdb_trace_1rec(tdb, "tdb_chainlock_unmark", key);
884
0
  return tdb_nest_unlock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
885
0
             F_WRLCK, true);
886
0
}
887
888
_PUBLIC_ int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
889
0
{
890
0
  tdb_trace_1rec(tdb, "tdb_chainunlock", key);
891
0
  return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
892
0
}
893
894
_PUBLIC_ int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
895
0
{
896
0
  int ret;
897
0
  ret = tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
898
0
  tdb_trace_1rec(tdb, "tdb_chainlock_read", key);
899
0
  return ret;
900
0
}
901
902
_PUBLIC_ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
903
0
{
904
0
  tdb_trace_1rec(tdb, "tdb_chainunlock_read", key);
905
0
  return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
906
0
}
907
908
_PUBLIC_ int tdb_chainlock_read_nonblock(struct tdb_context *tdb, TDB_DATA key)
909
0
{
910
0
  int ret = tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
911
0
  tdb_trace_1rec_ret(tdb, "tdb_chainlock_read_nonblock", key, ret);
912
0
  return ret;
913
0
}
914
915
/* record lock stops delete underneath */
916
int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off)
917
0
{
918
0
  if (tdb->allrecord_lock.count) {
919
0
    return 0;
920
0
  }
921
0
  return off ? tdb_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0;
922
0
}
923
924
/*
925
  Write locks override our own fcntl readlocks, so check it here.
926
  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
927
  an error to fail to get the lock here.
928
*/
929
int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off)
930
0
{
931
0
  struct tdb_traverse_lock *i;
932
0
  if (tdb == NULL) {
933
0
    return -1;
934
0
  }
935
0
  for (i = &tdb->travlocks; i; i = i->next)
936
0
    if (i->off == off)
937
0
      return -1;
938
0
  if (tdb->allrecord_lock.count) {
939
0
    if (tdb->allrecord_lock.ltype == F_WRLCK) {
940
0
      return 0;
941
0
    }
942
0
    return -1;
943
0
  }
944
0
  return tdb_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
945
0
}
946
947
int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off)
948
0
{
949
0
  if (tdb->allrecord_lock.count) {
950
0
    return 0;
951
0
  }
952
0
  return tdb_brunlock(tdb, F_WRLCK, off, 1);
953
0
}
954
955
/* fcntl locks don't stack: avoid unlocking someone else's */
956
int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off)
957
0
{
958
0
  struct tdb_traverse_lock *i;
959
0
  uint32_t count = 0;
960
961
0
  if (tdb->allrecord_lock.count) {
962
0
    return 0;
963
0
  }
964
965
0
  if (off == 0)
966
0
    return 0;
967
0
  for (i = &tdb->travlocks; i; i = i->next)
968
0
    if (i->off == off)
969
0
      count++;
970
0
  return (count == 1 ? tdb_brunlock(tdb, F_RDLCK, off, 1) : 0);
971
0
}
972
973
bool tdb_have_extra_locks(struct tdb_context *tdb)
974
0
{
975
0
  unsigned int extra = tdb->num_lockrecs;
976
977
  /* A transaction holds the lock for all records. */
978
0
  if (!tdb->transaction && tdb->allrecord_lock.count) {
979
0
    return true;
980
0
  }
981
982
  /* We always hold the active lock if CLEAR_IF_FIRST. */
983
0
  if (find_nestlock(tdb, ACTIVE_LOCK)) {
984
0
    extra--;
985
0
  }
986
987
  /* In a transaction, we expect to hold the transaction lock */
988
0
  if (tdb->transaction && find_nestlock(tdb, TRANSACTION_LOCK)) {
989
0
    extra--;
990
0
  }
991
992
0
  return extra;
993
0
}
994
995
/* The transaction code uses this to remove all locks. */
996
void tdb_release_transaction_locks(struct tdb_context *tdb)
997
0
{
998
0
  int i;
999
0
  unsigned int active = 0;
1000
1001
0
  if (tdb->allrecord_lock.count != 0) {
1002
0
    tdb_allrecord_unlock(tdb, tdb->allrecord_lock.ltype, false);
1003
0
    tdb->allrecord_lock.count = 0;
1004
0
  }
1005
1006
0
  for (i=0;i<tdb->num_lockrecs;i++) {
1007
0
    struct tdb_lock_type *lck = &tdb->lockrecs[i];
1008
1009
    /* Don't release the active lock!  Copy it to first entry. */
1010
0
    if (lck->off == ACTIVE_LOCK) {
1011
0
      tdb->lockrecs[active++] = *lck;
1012
0
    } else {
1013
0
      tdb_brunlock(tdb, lck->ltype, lck->off, 1);
1014
0
    }
1015
0
  }
1016
0
  tdb->num_lockrecs = active;
1017
0
}
1018
1019
/* Following functions are added specifically to support CTDB. */
1020
1021
/* Don't do actual fcntl locking, just mark tdb locked */
1022
_PUBLIC_ int tdb_transaction_write_lock_mark(struct tdb_context *tdb);
1023
_PUBLIC_ int tdb_transaction_write_lock_mark(struct tdb_context *tdb)
1024
0
{
1025
0
  return tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY);
1026
0
}
1027
1028
/* Don't do actual fcntl unlocking, just mark tdb unlocked */
1029
_PUBLIC_ int tdb_transaction_write_lock_unmark(struct tdb_context *tdb);
1030
_PUBLIC_ int tdb_transaction_write_lock_unmark(struct tdb_context *tdb)
1031
0
{
1032
0
  return tdb_nest_unlock(tdb, TRANSACTION_LOCK, F_WRLCK, true);
1033
0
}