/src/samba/lib/tdb/common/mutex.c

Source (jump to first uncovered line)
/*
   Unix SMB/CIFS implementation.

   trivial database library

   Copyright (C) Volker Lendecke 2012,2013
   Copyright (C) Stefan Metzmacher 2013,2014
   Copyright (C) Michael Adam 2014

     ** NOTE! The following LGPL license applies to the tdb
     ** library. This does NOT imply that all of Samba is released
     ** under the LGPL

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "tdb_private.h"
#include "system/threads.h"

#ifdef USE_TDB_MUTEX_LOCKING

/*
 * If we run with mutexes, we store the "struct tdb_mutexes" at the
 * beginning of the file. We store an additional tdb_header right
 * beyond the mutex area, page aligned. All the offsets within the tdb
 * are relative to the area behind the mutex area. tdb->map_ptr points
 * behind the mmap area as well, so the read and write path in the
 * mutex case can remain unchanged.
 *
 * Early in the mutex development the mutexes were placed between the hash
 * chain pointers and the real tdb data. This had two drawbacks: First, it
 * made pointer calculations more complex. Second, we had to mmap the mutex
 * area twice. One was the normal map_ptr in the tdb. This frequently changed
 * from within tdb_oob. At least the Linux glibc robust mutex code assumes
 * constant pointers in memory, so a constantly changing mmap area destroys
 * the mutex list. So we had to mmap the first bytes of the file with a second
 * mmap call. With that scheme, very weird errors happened that could be
 * easily fixed by doing the mutex mmap in a second file. It seemed that
 * mapping the same memory area twice does not end up in accessing the same
 * physical page, looking at the mutexes in gdb it seemed that old data showed
 * up after some re-mapping. To avoid a separate mutex file, the code now puts
 * the real content of the tdb file after the mutex area. This way we do not
 * have overlapping mmap areas, the mutex area is mmapped once and not
 * changed, the tdb data area's mmap is constantly changed but does not
 * overlap.
 */

struct tdb_mutexes {
  struct tdb_header hdr;

  /* protect allrecord_lock */
  pthread_mutex_t allrecord_mutex;

  /*
   * F_UNLCK: free,
   * F_RDLCK: shared,
   * F_WRLCK: exclusive
   */
  short int allrecord_lock;

  /*
   * Index 0 is the freelist mutex, followed by
   * one mutex per hashchain.
   */
  pthread_mutex_t hashchains[1];
};

bool tdb_have_mutexes(struct tdb_context *tdb)
{
  return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
}

size_t tdb_mutex_size(struct tdb_context *tdb)
{
  size_t mutex_size;

  if (!tdb_have_mutexes(tdb)) {
    return 0;
  }

  mutex_size = sizeof(struct tdb_mutexes);
  mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);

  return TDB_ALIGN(mutex_size, tdb->page_size);
}

/*
 * Get the index for a chain mutex
 */
static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
          unsigned *idx)
{
  /*
   * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
   * the 4 bytes of the freelist start and the hash chain that is about
   * to be locked. See lock_offset() where the freelist is -1 vs the
   * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
   * the tdb file itself as data, we need to adjust the offset here.
   */
  const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);

  if (!tdb_have_mutexes(tdb)) {
    return false;
  }
  if (len != 1) {
    /* Possibly the allrecord lock */
    return false;
  }
  if (off < freelist_lock_ofs) {
    /* One of the special locks */
    return false;
  }
  if (tdb->hash_size == 0) {
    /* tdb not initialized yet, called from tdb_open_ex() */
    return false;
  }
  if (off >= TDB_DATA_START(tdb->hash_size)) {
    /* Single record lock from traverses */
    return false;
  }

  /*
   * Now we know it's a freelist or hash chain lock. Those are always 4
   * byte aligned. Paranoia check.
   */
  if ((off % sizeof(tdb_off_t)) != 0) {
    abort();
  }

  /*
   * Re-index the fcntl offset into an offset into the mutex array
   */
  off -= freelist_lock_ofs; /* rebase to index 0 */
  off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */

  *idx = off;
  return true;
}

static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
{
  int i;

  for (i=0; i < tdb->num_lockrecs; i++) {
    bool ret;
    unsigned idx;

    ret = tdb_mutex_index(tdb,
              tdb->lockrecs[i].off,
              tdb->lockrecs[i].count,
              &idx);
    if (!ret) {
      continue;
    }

    if (idx == 0) {
      /* this is the freelist mutex */
      continue;
    }

    return true;
  }

  return false;
}

static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
{
  int ret;

  if (waitflag) {
    ret = pthread_mutex_lock(m);
  } else {
    ret = pthread_mutex_trylock(m);
  }
  if (ret != EOWNERDEAD) {
    return ret;
  }

  /*
   * For chainlocks, we don't do any cleanup (yet?)
   */
  return pthread_mutex_consistent(m);
}

static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
{
  int ret;

  if (waitflag) {
    ret = pthread_mutex_lock(&m->allrecord_mutex);
  } else {
    ret = pthread_mutex_trylock(&m->allrecord_mutex);
  }
  if (ret != EOWNERDEAD) {
    return ret;
  }

  /*
   * The allrecord lock holder died. We need to reset the allrecord_lock
   * to F_UNLCK. This should also be the indication for
   * tdb_needs_recovery.
   */
  m->allrecord_lock = F_UNLCK;

  return pthread_mutex_consistent(&m->allrecord_mutex);
}

bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
        bool waitflag, int *pret)
{
  struct tdb_mutexes *m = tdb->mutexes;
  pthread_mutex_t *chain;
  int ret;
  unsigned idx;
  bool allrecord_ok;

  if (!tdb_mutex_index(tdb, off, len, &idx)) {
    return false;
  }
  chain = &m->hashchains[idx];

again:
  ret = chain_mutex_lock(chain, waitflag);
  if (ret == EBUSY) {
    ret = EAGAIN;
  }
  if (ret != 0) {
    errno = ret;
    goto fail;
  }

  if (idx == 0) {
    /*
     * This is a freelist lock, which is independent to
     * the allrecord lock. So we're done once we got the
     * freelist mutex.
     */
    *pret = 0;
    return true;
  }

  if (tdb_have_mutex_chainlocks(tdb)) {
    /*
     * We can only check the allrecord lock once. If we do it with
     * one chain mutex locked, we will deadlock with the allrecord
     * locker process in the following way: We lock the first hash
     * chain, we check for the allrecord lock. We keep the hash
     * chain locked. Then the allrecord locker locks the
     * allrecord_mutex. It walks the list of chain mutexes,
     * locking them all in sequence. Meanwhile, we have the chain
     * mutex locked, so the allrecord locker blocks trying to lock
     * our chain mutex. Then we come in and try to lock the second
     * chain lock, which in most cases will be the freelist. We
     * see that the allrecord lock is locked and put ourselves on
     * the allrecord_mutex. This will never be signalled though
     * because the allrecord locker waits for us to give up the
     * chain lock.
     */

    *pret = 0;
    return true;
  }

  /*
   * Check if someone is has the allrecord lock: queue if so.
   */

  allrecord_ok = false;

  if (m->allrecord_lock == F_UNLCK) {
    /*
     * allrecord lock not taken
     */
    allrecord_ok = true;
  }

  if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
    /*
     * allrecord shared lock taken, but we only want to read
     */
    allrecord_ok = true;
  }

  if (allrecord_ok) {
    *pret = 0;
    return true;
  }

  ret = pthread_mutex_unlock(chain);
  if (ret != 0) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
       "(chain_mutex) failed: %s\n", strerror(ret)));
    errno = ret;
    goto fail;
  }
  ret = allrecord_mutex_lock(m, waitflag);
  if (ret == EBUSY) {
    ret = EAGAIN;
  }
  if (ret != 0) {
    if (waitflag || (ret != EAGAIN)) {
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
         "(allrecord_mutex) failed: %s\n",
         waitflag ? "" : "try_",  strerror(ret)));
    }
    errno = ret;
    goto fail;
  }
  ret = pthread_mutex_unlock(&m->allrecord_mutex);
  if (ret != 0) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
    errno = ret;
    goto fail;
  }
  goto again;

fail:
  *pret = -1;
  return true;
}

bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
          int *pret)
{
  struct tdb_mutexes *m = tdb->mutexes;
  pthread_mutex_t *chain;
  int ret;
  unsigned idx;

  if (!tdb_mutex_index(tdb, off, len, &idx)) {
    return false;
  }
  chain = &m->hashchains[idx];

  ret = pthread_mutex_unlock(chain);
  if (ret == 0) {
    *pret = 0;
    return true;
  }
  errno = ret;
  *pret = -1;
  return true;
}

int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
           enum tdb_lock_flags flags)
{
  struct tdb_mutexes *m = tdb->mutexes;
  int ret;
  uint32_t i;
  bool waitflag = (flags & TDB_LOCK_WAIT);
  int saved_errno;

  if (tdb->flags & TDB_NOLOCK) {
    return 0;
  }

  if (flags & TDB_LOCK_MARK_ONLY) {
    return 0;
  }

  ret = allrecord_mutex_lock(m, waitflag);
  if (!waitflag && (ret == EBUSY)) {
    errno = EAGAIN;
    tdb->ecode = TDB_ERR_LOCK;
    return -1;
  }
  if (ret != 0) {
    if (!(flags & TDB_LOCK_PROBE)) {
      TDB_LOG((tdb, TDB_DEBUG_TRACE,
         "allrecord_mutex_lock() failed: %s\n",
         strerror(ret)));
    }
    tdb->ecode = TDB_ERR_LOCK;
    return -1;
  }

  if (m->allrecord_lock != F_UNLCK) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
       (int)m->allrecord_lock));
    goto fail_unlock_allrecord_mutex;
  }
  m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;

  for (i=0; i<tdb->hash_size; i++) {

    /* ignore hashchains[0], the freelist */
    pthread_mutex_t *chain = &m->hashchains[i+1];

    ret = chain_mutex_lock(chain, waitflag);
    if (!waitflag && (ret == EBUSY)) {
      errno = EAGAIN;
      goto fail_unroll_allrecord_lock;
    }
    if (ret != 0) {
      if (!(flags & TDB_LOCK_PROBE)) {
        TDB_LOG((tdb, TDB_DEBUG_TRACE,
           "chain_mutex_lock() failed: %s\n",
           strerror(ret)));
      }
      errno = ret;
      goto fail_unroll_allrecord_lock;
    }

    ret = pthread_mutex_unlock(chain);
    if (ret != 0) {
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
         "(chainlock) failed: %s\n", strerror(ret)));
      errno = ret;
      goto fail_unroll_allrecord_lock;
    }
  }
  /*
   * We leave this routine with m->allrecord_mutex locked
   */
  return 0;

fail_unroll_allrecord_lock:
  m->allrecord_lock = F_UNLCK;

fail_unlock_allrecord_mutex:
  saved_errno = errno;
  ret = pthread_mutex_unlock(&m->allrecord_mutex);
  if (ret != 0) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
  }
  errno = saved_errno;
  tdb->ecode = TDB_ERR_LOCK;
  return -1;
}

int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
{
  struct tdb_mutexes *m = tdb->mutexes;
  int ret;
  uint32_t i;

  if (tdb->flags & TDB_NOLOCK) {
    return 0;
  }

  /*
   * Our only caller tdb_allrecord_upgrade()
   * guarantees that we already own the allrecord lock.
   *
   * Which means m->allrecord_mutex is still locked by us.
   */

  if (m->allrecord_lock != F_RDLCK) {
    tdb->ecode = TDB_ERR_LOCK;
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
       (int)m->allrecord_lock));
    return -1;
  }

  m->allrecord_lock = F_WRLCK;

  for (i=0; i<tdb->hash_size; i++) {

    /* ignore hashchains[0], the freelist */
    pthread_mutex_t *chain = &m->hashchains[i+1];

    ret = chain_mutex_lock(chain, true);
    if (ret != 0) {
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
         "(chainlock) failed: %s\n", strerror(ret)));
      goto fail_unroll_allrecord_lock;
    }

    ret = pthread_mutex_unlock(chain);
    if (ret != 0) {
      TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
         "(chainlock) failed: %s\n", strerror(ret)));
      goto fail_unroll_allrecord_lock;
    }
  }

  return 0;

fail_unroll_allrecord_lock:
  m->allrecord_lock = F_RDLCK;
  tdb->ecode = TDB_ERR_LOCK;
  return -1;
}

void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
{
  struct tdb_mutexes *m = tdb->mutexes;

  /*
   * Our only caller tdb_allrecord_upgrade() (in the error case)
   * guarantees that we already own the allrecord lock.
   *
   * Which means m->allrecord_mutex is still locked by us.
   */

  if (m->allrecord_lock != F_WRLCK) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
       (int)m->allrecord_lock));
    return;
  }

  m->allrecord_lock = F_RDLCK;
  return;
}


int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
{
  struct tdb_mutexes *m = tdb->mutexes;
  short old;
  int ret;

  if (tdb->flags & TDB_NOLOCK) {
    return 0;
  }

  /*
   * Our only callers tdb_allrecord_unlock() and
   * tdb_allrecord_lock() (in the error path)
   * guarantee that we already own the allrecord lock.
   *
   * Which means m->allrecord_mutex is still locked by us.
   */

  if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
       (int)m->allrecord_lock));
    return -1;
  }

  old = m->allrecord_lock;
  m->allrecord_lock = F_UNLCK;

  ret = pthread_mutex_unlock(&m->allrecord_mutex);
  if (ret != 0) {
    m->allrecord_lock = old;
    TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
       "(allrecord_mutex) failed: %s\n", strerror(ret)));
    return -1;
  }
  return 0;
}

int tdb_mutex_init(struct tdb_context *tdb)
{
  struct tdb_mutexes *m;
  pthread_mutexattr_t ma;
  uint32_t i;
  int ret;

  ret = tdb_mutex_mmap(tdb);
  if (ret == -1) {
    return -1;
  }
  m = tdb->mutexes;

  ret = pthread_mutexattr_init(&ma);
  if (ret != 0) {
    goto fail_munmap;
  }
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
  if (ret != 0) {
    goto fail;
  }
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
  if (ret != 0) {
    goto fail;
  }
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
  if (ret != 0) {
    goto fail;
  }

  for (i=0; i<tdb->hash_size+1; i++) {
    pthread_mutex_t *chain = &m->hashchains[i];

    ret = pthread_mutex_init(chain, &ma);
    if (ret != 0) {
      goto fail;
    }
  }

  m->allrecord_lock = F_UNLCK;

  ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
  if (ret != 0) {
    goto fail;
  }
  ret = 0;
fail:
  pthread_mutexattr_destroy(&ma);
fail_munmap:

  if (ret == 0) {
    return 0;
  }

  tdb_mutex_munmap(tdb);

  errno = ret;
  return -1;
}

int tdb_mutex_mmap(struct tdb_context *tdb)
{
  size_t len;
  void *ptr;

  len = tdb_mutex_size(tdb);
  if (len == 0) {
    return 0;
  }

  if (tdb->mutexes != NULL) {
    return 0;
  }

  ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
       tdb->fd, 0);
  if (ptr == MAP_FAILED) {
    return -1;
  }
  tdb->mutexes = (struct tdb_mutexes *)ptr;

  return 0;
}

int tdb_mutex_munmap(struct tdb_context *tdb)
{
  size_t len;
  int ret;

  len = tdb_mutex_size(tdb);
  if (len == 0) {
    return 0;
  }

  ret = munmap(tdb->mutexes, len);
  if (ret == -1) {
    return -1;
  }
  tdb->mutexes = NULL;

  return 0;
}

static bool tdb_mutex_locking_cached;

static bool tdb_mutex_locking_supported(void)
{
  pthread_mutexattr_t ma;
  pthread_mutex_t m;
  int ret;
  static bool initialized;

  if (initialized) {
    return tdb_mutex_locking_cached;
  }

  initialized = true;

  ret = pthread_mutexattr_init(&ma);
  if (ret != 0) {
    return false;
  }
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
  if (ret != 0) {
    goto cleanup_ma;
  }
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
  if (ret != 0) {
    goto cleanup_ma;
  }
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
  if (ret != 0) {
    goto cleanup_ma;
  }
  ret = pthread_mutex_init(&m, &ma);
  if (ret != 0) {
    goto cleanup_ma;
  }
  ret = pthread_mutex_lock(&m);
  if (ret != 0) {
    goto cleanup_m;
  }
  /*
   * This makes sure we have real mutexes
   * from a threading library instead of just
   * stubs from libc.
   */
  ret = pthread_mutex_lock(&m);
  if (ret != EDEADLK) {
    goto cleanup_lock;
  }
  ret = pthread_mutex_unlock(&m);
  if (ret != 0) {
    goto cleanup_m;
  }

  tdb_mutex_locking_cached = true;
  goto cleanup_m;

cleanup_lock:
  pthread_mutex_unlock(&m);
cleanup_m:
  pthread_mutex_destroy(&m);
cleanup_ma:
  pthread_mutexattr_destroy(&ma);
  return tdb_mutex_locking_cached;
}

static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
static pid_t tdb_robust_mutex_pid = -1;

static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
      void (**p_old_handler)(int))
{
#ifdef HAVE_SIGACTION
  struct sigaction act;
  struct sigaction oldact;

  memset(&act, '\0', sizeof(act));

  act.sa_handler = handler;
#ifdef SA_RESTART
  act.sa_flags = SA_RESTART;
#endif
  sigemptyset(&act.sa_mask);
  sigaddset(&act.sa_mask, SIGCHLD);
  sigaction(SIGCHLD, &act, &oldact);
  if (p_old_handler) {
    *p_old_handler = oldact.sa_handler;
  }
  return true;
#else /* !HAVE_SIGACTION */
  return false;
#endif
}

static void tdb_robust_mutex_handler(int sig)
{
  pid_t child_pid = tdb_robust_mutex_pid;

  if (child_pid != -1) {
    pid_t pid;

    pid = waitpid(child_pid, NULL, WNOHANG);
    if (pid == -1) {
      switch (errno) {
      case ECHILD:
        tdb_robust_mutex_pid = -1;
        return;

      default:
        return;
      }
    }
    if (pid == child_pid) {
      tdb_robust_mutex_pid = -1;
      return;
    }
  }

  if (tdb_robust_mutext_old_handler == SIG_DFL) {
    return;
  }
  if (tdb_robust_mutext_old_handler == SIG_IGN) {
    return;
  }
  if (tdb_robust_mutext_old_handler == SIG_ERR) {
    return;
  }

  tdb_robust_mutext_old_handler(sig);
}

static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
{
  int options = WNOHANG;

  if (*child_pid == -1) {
    return;
  }

  while (tdb_robust_mutex_pid > 0) {
    pid_t pid;

    /*
     * First we try with WNOHANG, as the process might not exist
     * anymore. Once we've sent SIGKILL we block waiting for the
     * exit.
     */
    pid = waitpid(*child_pid, NULL, options);
    if (pid == -1) {
      if (errno == EINTR) {
        continue;
      } else if (errno == ECHILD) {
        break;
      } else {
        abort();
      }
    }
    if (pid == *child_pid) {
      break;
    }

    kill(*child_pid, SIGKILL);
    options = 0;
  }

  tdb_robust_mutex_pid = -1;
  *child_pid = -1;
}

_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
{
  void *ptr = NULL;
  pthread_mutex_t *m = NULL;
  pthread_mutexattr_t ma;
  int ret = 1;
  int pipe_down[2] = { -1, -1 };
  int pipe_up[2] = { -1, -1 };
  ssize_t nread;
  char c = 0;
  bool ok;
  static bool initialized;
  pid_t saved_child_pid = -1;
  bool cleanup_ma = false;

  if (initialized) {
    return tdb_mutex_locking_cached;
  }

  initialized = true;

  ok = tdb_mutex_locking_supported();
  if (!ok) {
    return false;
  }

  tdb_mutex_locking_cached = false;

  ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
       MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
  if (ptr == MAP_FAILED) {
    return false;
  }

  ret = pipe(pipe_down);
  if (ret != 0) {
    goto cleanup;
  }
  ret = pipe(pipe_up);
  if (ret != 0) {
    goto cleanup;
  }

  ret = pthread_mutexattr_init(&ma);
  if (ret != 0) {
    goto cleanup;
  }
  cleanup_ma = true;
  ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
  if (ret != 0) {
    goto cleanup;
  }
  ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
  if (ret != 0) {
    goto cleanup;
  }
  ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
  if (ret != 0) {
    goto cleanup;
  }
  ret = pthread_mutex_init(ptr, &ma);
  if (ret != 0) {
    goto cleanup;
  }
  m = (pthread_mutex_t *)ptr;

  if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
      &tdb_robust_mutext_old_handler) == false) {
    goto cleanup;
  }

  tdb_robust_mutex_pid = fork();
  saved_child_pid = tdb_robust_mutex_pid;
  if (tdb_robust_mutex_pid == 0) {
    size_t nwritten;
    close(pipe_down[1]);
    close(pipe_up[0]);
    ret = pthread_mutex_lock(m);
    nwritten = write(pipe_up[1], &ret, sizeof(ret));
    if (nwritten != sizeof(ret)) {
      _exit(1);
    }
    if (ret != 0) {
      _exit(1);
    }
    nread = read(pipe_down[0], &c, 1);
    if (nread != 1) {
      _exit(1);
    }
    /* leave locked */
    _exit(0);
  }
  if (tdb_robust_mutex_pid == -1) {
    goto cleanup;
  }
  close(pipe_down[0]);
  pipe_down[0] = -1;
  close(pipe_up[1]);
  pipe_up[1] = -1;

  nread = read(pipe_up[0], &ret, sizeof(ret));
  if (nread != sizeof(ret)) {
    goto cleanup;
  }

  ret = pthread_mutex_trylock(m);
  if (ret != EBUSY) {
    if (ret == 0) {
      pthread_mutex_unlock(m);
    }
    goto cleanup;
  }

  if (write(pipe_down[1], &c, 1) != 1) {
    goto cleanup;
  }

  nread = read(pipe_up[0], &c, 1);
  if (nread != 0) {
    goto cleanup;
  }

  tdb_robust_mutex_wait_for_child(&saved_child_pid);

  ret = pthread_mutex_trylock(m);
  if (ret != EOWNERDEAD) {
    if (ret == 0) {
      pthread_mutex_unlock(m);
    }
    goto cleanup;
  }

  ret = pthread_mutex_consistent(m);
  if (ret != 0) {
    goto cleanup;
  }

  ret = pthread_mutex_trylock(m);
  if (ret != EDEADLK && ret != EBUSY) {
    pthread_mutex_unlock(m);
    goto cleanup;
  }

  ret = pthread_mutex_unlock(m);
  if (ret != 0) {
    goto cleanup;
  }

  tdb_mutex_locking_cached = true;

cleanup:
  /*
   * Note that we don't reset the signal handler we just reset
   * tdb_robust_mutex_pid to -1. This is ok as this code path is only
   * called once per process.
   *
   * Leaving our signal handler avoids races with other threads potentially
   * setting up their SIGCHLD handlers.
   *
   * The worst thing that can happen is that the other newer signal
   * handler will get the SIGCHLD signal for our child and/or reap the
   * child with a wait() function. tdb_robust_mutex_wait_for_child()
   * handles the case where waitpid returns ECHILD.
   */
  tdb_robust_mutex_wait_for_child(&saved_child_pid);

  if (m != NULL) {
    pthread_mutex_destroy(m);
  }
  if (cleanup_ma) {
    pthread_mutexattr_destroy(&ma);
  }
  if (pipe_down[0] != -1) {
    close(pipe_down[0]);
  }
  if (pipe_down[1] != -1) {
    close(pipe_down[1]);
  }
  if (pipe_up[0] != -1) {
    close(pipe_up[0]);
  }
  if (pipe_up[1] != -1) {
    close(pipe_up[1]);
  }
  if (ptr != NULL) {
    munmap(ptr, sizeof(pthread_mutex_t));
  }

  return tdb_mutex_locking_cached;
}

#else

size_t tdb_mutex_size(struct tdb_context *tdb)
{
  return 0;
}

bool tdb_have_mutexes(struct tdb_context *tdb)
{
  return false;
}

int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
           enum tdb_lock_flags flags)
{
  tdb->ecode = TDB_ERR_LOCK;
  return -1;
}

int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
{
  return -1;
}

int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
{
  tdb->ecode = TDB_ERR_LOCK;
  return -1;
}

void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
{
  return;
}

int tdb_mutex_mmap(struct tdb_context *tdb)
{
  errno = ENOSYS;
  return -1;
}

int tdb_mutex_munmap(struct tdb_context *tdb)
{
  errno = ENOSYS;
  return -1;
}

int tdb_mutex_init(struct tdb_context *tdb)
{
  errno = ENOSYS;
  return -1;
}

_PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
{
  return false;
}

#endif

Coverage Report

Created: 2025-07-23 07:04