/src/frr/lib/frrcu.c

Source (jump to first uncovered line)
// SPDX-License-Identifier: ISC
/*
 * Copyright (c) 2017-19  David Lamparter, for NetDEF, Inc.
 */

/* implementation notes:  this is an epoch-based RCU implementation.  rcu_seq
 * (global variable) counts the current epoch.  Threads hold a specific epoch
 * in rcu_read_lock().  This is the oldest epoch a thread might be accessing
 * data from.
 *
 * The rcu_seq global is only pushed forward on rcu_read_lock() and
 * rcu_read_unlock() calls.  This makes things a tad more efficient since
 * those are the only places it matters:
 * - on rcu_read_lock, we don't want to hold an old epoch pointlessly
 * - on rcu_read_unlock, we want to make sure we're not stuck on an old epoch
 *   when heading into a long idle period where no thread holds RCU
 *
 * rcu_thread structures themselves are RCU-free'd.
 *
 * rcu_head structures are the most iffy;  normally for an ATOMLIST we would
 * need to make sure we use rcu_free or pthread_rwlock to deallocate old items
 * to prevent ABA or use-after-free problems.  However, our ATOMLIST code
 * guarantees that if the list remains non-empty in all cases, we only need
 * the "last" pointer to do an "add_tail()", i.e. we can't run into ABA/UAF
 * issues - but we do need to keep at least 1 item on the list.
 *
 * (Search the atomlist code for all uses of "last")
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <pthread.h>
#ifdef HAVE_PTHREAD_NP_H
#include <pthread_np.h>
#endif
#include <string.h>
#include <unistd.h>
#include <signal.h>

#include "frrcu.h"
#include "seqlock.h"
#include "atomlist.h"

DEFINE_MTYPE_STATIC(LIB, RCU_THREAD,    "RCU thread");
DEFINE_MTYPE_STATIC(LIB, RCU_NEXT,      "RCU sequence barrier");

DECLARE_ATOMLIST(rcu_heads, struct rcu_head, head);

PREDECL_ATOMLIST(rcu_threads);
struct rcu_thread {
  struct rcu_threads_item head;

  struct rcu_head rcu_head;

  struct seqlock rcu;

  /* only accessed by thread itself, not atomic */
  unsigned depth;
};
DECLARE_ATOMLIST(rcu_threads, struct rcu_thread, head);

static const struct rcu_action rcua_next  = { .type = RCUA_NEXT };
static const struct rcu_action rcua_end   = { .type = RCUA_END };
static const struct rcu_action rcua_close = { .type = RCUA_CLOSE };

struct rcu_next {
  struct rcu_head head_free;
  struct rcu_head head_next;
};

#define rcu_free_internal(mtype, ptr, field)                                   \
  do {                                                                   \
    typeof(ptr) _ptr = (ptr);                                      \
    struct rcu_head *_rcu_head = &_ptr->field;                     \
    static const struct rcu_action _rcu_action = {                 \
      .type = RCUA_FREE,                                     \
      .u.free = {                                            \
        .mt = mtype,                                   \
        .offset = offsetof(typeof(*_ptr), field),      \
      },                                                     \
    };                                                             \
    _rcu_head->action = &_rcu_action;                              \
    rcu_heads_add_tail(&rcu_heads, _rcu_head);                     \
  } while (0)

/* primary global RCU position */
static struct seqlock rcu_seq;
/* this is set to rcu_seq whenever something is added on the RCU queue.
 * rcu_read_lock() and rcu_read_unlock() will then bump rcu_seq up one step.
 */
static _Atomic seqlock_val_t rcu_dirty;

static struct rcu_threads_head rcu_threads;
static struct rcu_heads_head rcu_heads;

/* main thread & RCU sweeper have pre-setup rcu_thread structures.  The
 * reasons are different:
 *
 * - rcu_thread_main is there because the main thread isn't started like
 *   other threads, it's implicitly created when the program is started.  So
 *   rcu_thread_main matches up implicitly.
 *
 * - rcu_thread_rcu isn't actually put on the rcu_threads list (makes no
 *   sense really), it only exists so we can call RCU-using functions from
 *   the RCU thread without special handling in rcu_read_lock/unlock.
 */
static struct rcu_thread rcu_thread_main;
static struct rcu_thread rcu_thread_rcu;

static pthread_t rcu_pthread;
static pthread_key_t rcu_thread_key;
static bool rcu_active;

static void rcu_start(void);
static void rcu_bump(void);

/*
 * preinitialization for main thread
 */
static void rcu_thread_end(void *rcu_thread);

static void rcu_preinit(void) __attribute__((constructor));
static void rcu_preinit(void)
{
  struct rcu_thread *rt;

  rt = &rcu_thread_main;
  rt->depth = 1;
  seqlock_init(&rt->rcu);
  seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);

  pthread_key_create(&rcu_thread_key, rcu_thread_end);
  pthread_setspecific(rcu_thread_key, rt);

  rcu_threads_add_tail(&rcu_threads, rt);

  /* RCU sweeper's rcu_thread is a dummy, NOT added to rcu_threads */
  rt = &rcu_thread_rcu;
  rt->depth = 1;

  seqlock_init(&rcu_seq);
  seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
}

static struct rcu_thread *rcu_self(void)
{
  return (struct rcu_thread *)pthread_getspecific(rcu_thread_key);
}

/*
 * thread management (for the non-main thread)
 */
struct rcu_thread *rcu_thread_prepare(void)
{
  struct rcu_thread *rt, *cur;

  rcu_assert_read_locked();

  if (!rcu_active)
    rcu_start();

  cur = rcu_self();
  assert(cur->depth);

  /* new thread always starts with rcu_read_lock held at depth 1, and
   * holding the same epoch as the parent (this makes it possible to
   * use RCU for things passed into the thread through its arg)
   */
  rt = XCALLOC(MTYPE_RCU_THREAD, sizeof(*rt));
  rt->depth = 1;

  seqlock_init(&rt->rcu);
  seqlock_acquire(&rt->rcu, &cur->rcu);

  rcu_threads_add_tail(&rcu_threads, rt);

  return rt;
}

void rcu_thread_start(struct rcu_thread *rt)
{
  pthread_setspecific(rcu_thread_key, rt);
}

void rcu_thread_unprepare(struct rcu_thread *rt)
{
  if (rt == &rcu_thread_rcu)
    return;

  rt->depth = 1;
  seqlock_acquire(&rt->rcu, &rcu_seq);

  rcu_bump();
  if (rt != &rcu_thread_main)
    /* this free() happens after seqlock_release() below */
    rcu_free_internal(MTYPE_RCU_THREAD, rt, rcu_head);

  rcu_threads_del(&rcu_threads, rt);
  seqlock_release(&rt->rcu);
}

static void rcu_thread_end(void *rtvoid)
{
  struct rcu_thread *rt = rtvoid;
  rcu_thread_unprepare(rt);
}

/*
 * main RCU control aspects
 */

static void rcu_bump(void)
{
  struct rcu_next *rn;

  rn = XMALLOC(MTYPE_RCU_NEXT, sizeof(*rn));

  /* note: each RCUA_NEXT item corresponds to exactly one seqno bump.
   * This means we don't need to communicate which seqno is which
   * RCUA_NEXT, since we really don't care.
   */

  /*
   * Important race condition:  while rcu_heads_add_tail is executing,
   * there is an intermediate point where the rcu_heads "last" pointer
   * already points to rn->head_next, but rn->head_next isn't added to
   * the list yet.  That means any other "add_tail" calls append to this
   * item, which isn't fully on the list yet.  Freeze this thread at
   * that point and look at another thread doing a rcu_bump.  It adds
   * these two items and then does a seqlock_bump.  But the rcu_heads
   * list is still "interrupted" and there's no RCUA_NEXT on the list
   * yet (from either the frozen thread or the second thread).  So
   * rcu_main() might actually hit the end of the list at the
   * "interrupt".
   *
   * This situation is prevented by requiring that rcu_read_lock is held
   * for any calls to rcu_bump, since if we're holding the current RCU
   * epoch, that means rcu_main can't be chewing on rcu_heads and hit
   * that interruption point.  Only by the time the thread has continued
   * to rcu_read_unlock() - and therefore completed the add_tail - the
   * RCU sweeper gobbles up the epoch and can be sure to find at least
   * the RCUA_NEXT and RCUA_FREE items on rcu_heads.
   */
  rn->head_next.action = &rcua_next;
  rcu_heads_add_tail(&rcu_heads, &rn->head_next);

  /* free rn that we allocated above.
   *
   * This is INTENTIONALLY not built into the RCUA_NEXT action.  This
   * ensures that after the action above is popped off the queue, there
   * is still at least 1 item on the RCU queue.  This means we never
   * delete the last item, which is extremely important since it keeps
   * the atomlist ->last pointer alive and well.
   *
   * If we were to "run dry" on the RCU queue, add_tail may run into the
   * "last item is being deleted - start over" case, and then we may end
   * up accessing old RCU queue items that are already free'd.
   */
  rcu_free_internal(MTYPE_RCU_NEXT, rn, head_free);

  /* Only allow the RCU sweeper to run after these 2 items are queued.
   *
   * If another thread enqueues some RCU action in the intermediate
   * window here, nothing bad happens - the queued action is associated
   * with a larger seq# than strictly necessary.  Thus, it might get
   * executed a bit later, but that's not a problem.
   *
   * If another thread acquires the read lock in this window, it holds
   * the previous epoch, but its RCU queue actions will be in the next
   * epoch.  This isn't a problem either, just a tad inefficient.
   */
  seqlock_bump(&rcu_seq);
}

static void rcu_bump_maybe(void)
{
  seqlock_val_t dirty;

  dirty = atomic_load_explicit(&rcu_dirty, memory_order_relaxed);
  /* no problem if we race here and multiple threads bump rcu_seq;
   * bumping too much causes no issues while not bumping enough will
   * result in delayed cleanup
   */
  if (dirty == seqlock_cur(&rcu_seq))
    rcu_bump();
}

void rcu_read_lock(void)
{
  struct rcu_thread *rt = rcu_self();

  assert(rt);
  if (rt->depth++ > 0)
    return;

  seqlock_acquire(&rt->rcu, &rcu_seq);
  /* need to hold RCU for bump ... */
  rcu_bump_maybe();
  /* ... but no point in holding the old epoch if we just bumped */
  seqlock_acquire(&rt->rcu, &rcu_seq);
}

void rcu_read_unlock(void)
{
  struct rcu_thread *rt = rcu_self();

  assert(rt && rt->depth);
  if (--rt->depth > 0)
    return;
  rcu_bump_maybe();
  seqlock_release(&rt->rcu);
}

void rcu_assert_read_locked(void)
{
  struct rcu_thread *rt = rcu_self();
  assert(rt && rt->depth && seqlock_held(&rt->rcu));
}

void rcu_assert_read_unlocked(void)
{
  struct rcu_thread *rt = rcu_self();
  assert(rt && !rt->depth && !seqlock_held(&rt->rcu));
}

/*
 * RCU resource-release thread
 */

static void *rcu_main(void *arg);

static void rcu_start(void)
{
  /* ensure we never handle signals on the RCU thread by blocking
   * everything here (new thread inherits signal mask)
   */
  sigset_t oldsigs, blocksigs;

  sigfillset(&blocksigs);
  pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs);

  rcu_active = true;

  assert(!pthread_create(&rcu_pthread, NULL, rcu_main, NULL));

  pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);

#ifdef HAVE_PTHREAD_SETNAME_NP
# ifdef GNU_LINUX
  pthread_setname_np(rcu_pthread, "RCU sweeper");
# elif defined(__NetBSD__)
  pthread_setname_np(rcu_pthread, "RCU sweeper", NULL);
# endif
#elif defined(HAVE_PTHREAD_SET_NAME_NP)
  pthread_set_name_np(rcu_pthread, "RCU sweeper");
#endif
}

static void rcu_do(struct rcu_head *rh)
{
  struct rcu_head_close *rhc;
  void *p;

  switch (rh->action->type) {
  case RCUA_FREE:
    p = (char *)rh - rh->action->u.free.offset;
    if (rh->action->u.free.mt)
      qfree(rh->action->u.free.mt, p);
    else
      free(p);
    break;
  case RCUA_CLOSE:
    rhc = container_of(rh, struct rcu_head_close,
           rcu_head);
    close(rhc->fd);
    break;
  case RCUA_CALL:
    p = (char *)rh - rh->action->u.call.offset;
    rh->action->u.call.fptr(p);
    break;

  case RCUA_INVALID:
  case RCUA_NEXT:
  case RCUA_END:
  default:
    assert(0);
  }
}

static void rcu_watchdog(struct rcu_thread *rt)
{
#if 0
  /* future work: print a backtrace for the thread that's holding up
   * RCU.  The only (good) way of doing that is to send a signal to the
   * other thread, save away the backtrace in the signal handler, and
   * block here until the signal is done processing.
   *
   * Just haven't implemented that yet.
   */
  fprintf(stderr, "RCU watchdog %p\n", rt);
#endif
}

static void *rcu_main(void *arg)
{
  struct rcu_thread *rt;
  struct rcu_head *rh = NULL;
  bool end = false;
  struct timespec maxwait;

  seqlock_val_t rcuval = SEQLOCK_STARTVAL;

  pthread_setspecific(rcu_thread_key, &rcu_thread_rcu);

  while (!end) {
    seqlock_wait(&rcu_seq, rcuval);

    /* RCU watchdog timeout, TODO: configurable value */
    clock_gettime(CLOCK_MONOTONIC, &maxwait);
    maxwait.tv_nsec += 100 * 1000 * 1000;
    if (maxwait.tv_nsec >= 1000000000) {
      maxwait.tv_sec++;
      maxwait.tv_nsec -= 1000000000;
    }

    frr_each (rcu_threads, &rcu_threads, rt)
      if (!seqlock_timedwait(&rt->rcu, rcuval, &maxwait)) {
        rcu_watchdog(rt);
        seqlock_wait(&rt->rcu, rcuval);
      }

    while ((rh = rcu_heads_pop(&rcu_heads))) {
      if (rh->action->type == RCUA_NEXT)
        break;
      else if (rh->action->type == RCUA_END)
        end = true;
      else
        rcu_do(rh);
    }

    rcuval += SEQLOCK_INCR;
  }

  /* rcu_shutdown can only be called singlethreaded, and it does a
   * pthread_join, so it should be impossible that anything ended up
   * on the queue after RCUA_END
   */
#if 1
  assert(!rcu_heads_first(&rcu_heads));
#else
  while ((rh = rcu_heads_pop(&rcu_heads)))
    if (rh->action->type >= RCUA_FREE)
      rcu_do(rh);
#endif
  return NULL;
}

void rcu_shutdown(void)
{
  static struct rcu_head rcu_head_end;
  struct rcu_thread *rt = rcu_self();
  void *retval;

  if (!rcu_active)
    return;

  rcu_assert_read_locked();
  assert(rcu_threads_count(&rcu_threads) == 1);

  rcu_enqueue(&rcu_head_end, &rcua_end);

  rt->depth = 0;
  seqlock_release(&rt->rcu);
  seqlock_release(&rcu_seq);
  rcu_active = false;

  /* clearing rcu_active is before pthread_join in case we hang in
   * pthread_join & get a SIGTERM or something - in that case, just
   * ignore the maybe-still-running RCU thread
   */
  if (pthread_join(rcu_pthread, &retval) == 0) {
    seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
    seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);
    rt->depth = 1;
  }
}

/*
 * RCU'd free functions
 */

void rcu_enqueue(struct rcu_head *rh, const struct rcu_action *action)
{
  /* refer to rcu_bump() for why we need to hold RCU when adding items
   * to rcu_heads
   */
  rcu_assert_read_locked();

  rh->action = action;

  if (!rcu_active) {
    rcu_do(rh);
    return;
  }
  rcu_heads_add_tail(&rcu_heads, rh);
  atomic_store_explicit(&rcu_dirty, seqlock_cur(&rcu_seq),
            memory_order_relaxed);
}

void rcu_close(struct rcu_head_close *rhc, int fd)
{
  rhc->fd = fd;
  rcu_enqueue(&rhc->rcu_head, &rcua_close);
}

Coverage Report

Created: 2025-08-03 06:36

Line	Count	Source (jump to first uncovered line)
1		// SPDX-License-Identifier: ISC
2		/*
3		* Copyright (c) 2017-19 David Lamparter, for NetDEF, Inc.
4		*/
5
6		/* implementation notes: this is an epoch-based RCU implementation. rcu_seq
7		* (global variable) counts the current epoch. Threads hold a specific epoch
8		* in rcu_read_lock(). This is the oldest epoch a thread might be accessing
9		* data from.
10		*
11		* The rcu_seq global is only pushed forward on rcu_read_lock() and
12		* rcu_read_unlock() calls. This makes things a tad more efficient since
13		* those are the only places it matters:
14		* - on rcu_read_lock, we don't want to hold an old epoch pointlessly
15		* - on rcu_read_unlock, we want to make sure we're not stuck on an old epoch
16		* when heading into a long idle period where no thread holds RCU
17		*
18		* rcu_thread structures themselves are RCU-free'd.
19		*
20		* rcu_head structures are the most iffy; normally for an ATOMLIST we would
21		* need to make sure we use rcu_free or pthread_rwlock to deallocate old items
22		* to prevent ABA or use-after-free problems. However, our ATOMLIST code
23		* guarantees that if the list remains non-empty in all cases, we only need
24		* the "last" pointer to do an "add_tail()", i.e. we can't run into ABA/UAF
25		* issues - but we do need to keep at least 1 item on the list.
26		*
27		* (Search the atomlist code for all uses of "last")
28		*/
29
30		#ifdef HAVE_CONFIG_H
31		#include "config.h"
32		#endif
33
34		#include <pthread.h>
35		#ifdef HAVE_PTHREAD_NP_H
36		#include <pthread_np.h>
37		#endif
38		#include <string.h>
39		#include <unistd.h>
40		#include <signal.h>
41
42		#include "frrcu.h"
43		#include "seqlock.h"
44		#include "atomlist.h"
45
46		DEFINE_MTYPE_STATIC(LIB, RCU_THREAD, "RCU thread");
47		DEFINE_MTYPE_STATIC(LIB, RCU_NEXT, "RCU sequence barrier");
48
49		DECLARE_ATOMLIST(rcu_heads, struct rcu_head, head);
50
51		PREDECL_ATOMLIST(rcu_threads);
52		struct rcu_thread {
53		struct rcu_threads_item head;
54
55		struct rcu_head rcu_head;
56
57		struct seqlock rcu;
58
59		/* only accessed by thread itself, not atomic */
60		unsigned depth;
61		};
62		DECLARE_ATOMLIST(rcu_threads, struct rcu_thread, head);
63
64		static const struct rcu_action rcua_next = { .type = RCUA_NEXT };
65		static const struct rcu_action rcua_end = { .type = RCUA_END };
66		static const struct rcu_action rcua_close = { .type = RCUA_CLOSE };
67
68		struct rcu_next {
69		struct rcu_head head_free;
70		struct rcu_head head_next;
71		};
72
73		#define rcu_free_internal(mtype, ptr, field) \
74	0	do { \
75	0	typeof(ptr) _ptr = (ptr); \
76	0	struct rcu_head *_rcu_head = &_ptr->field; \
77	0	static const struct rcu_action _rcu_action = { \
78	0	.type = RCUA_FREE, \
79	0	.u.free = { \
80	0	.mt = mtype, \
81	0	.offset = offsetof(typeof(*_ptr), field), \
82	0	}, \
83	0	}; \
84	0	_rcu_head->action = &_rcu_action; \
85	0	rcu_heads_add_tail(&rcu_heads, _rcu_head); \
86	0	} while (0)
87
88		/* primary global RCU position */
89		static struct seqlock rcu_seq;
90		/* this is set to rcu_seq whenever something is added on the RCU queue.
91		* rcu_read_lock() and rcu_read_unlock() will then bump rcu_seq up one step.
92		*/
93		static _Atomic seqlock_val_t rcu_dirty;
94
95		static struct rcu_threads_head rcu_threads;
96		static struct rcu_heads_head rcu_heads;
97
98		/* main thread & RCU sweeper have pre-setup rcu_thread structures. The
99		* reasons are different:
100		*
101		* - rcu_thread_main is there because the main thread isn't started like
102		* other threads, it's implicitly created when the program is started. So
103		* rcu_thread_main matches up implicitly.
104		*
105		* - rcu_thread_rcu isn't actually put on the rcu_threads list (makes no
106		* sense really), it only exists so we can call RCU-using functions from
107		* the RCU thread without special handling in rcu_read_lock/unlock.
108		*/
109		static struct rcu_thread rcu_thread_main;
110		static struct rcu_thread rcu_thread_rcu;
111
112		static pthread_t rcu_pthread;
113		static pthread_key_t rcu_thread_key;
114		static bool rcu_active;
115
116		static void rcu_start(void);
117		static void rcu_bump(void);
118
119		/*
120		* preinitialization for main thread
121		*/
122		static void rcu_thread_end(void *rcu_thread);
123
124		static void rcu_preinit(void) __attribute__((constructor));
125		static void rcu_preinit(void)
126	6	{
127	6	struct rcu_thread *rt;
128
129	6	rt = &rcu_thread_main;
130	6	rt->depth = 1;
131	6	seqlock_init(&rt->rcu);
132	6	seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);
133
134	6	pthread_key_create(&rcu_thread_key, rcu_thread_end);
135	6	pthread_setspecific(rcu_thread_key, rt);
136
137	6	rcu_threads_add_tail(&rcu_threads, rt);
138
139		/* RCU sweeper's rcu_thread is a dummy, NOT added to rcu_threads */
140	6	rt = &rcu_thread_rcu;
141	6	rt->depth = 1;
142
143	6	seqlock_init(&rcu_seq);
144	6	seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
145	6	}
146
147		static struct rcu_thread *rcu_self(void)
148	0	{
149	0	return (struct rcu_thread *)pthread_getspecific(rcu_thread_key);
150	0	}
151
152		/*
153		* thread management (for the non-main thread)
154		*/
155		struct rcu_thread *rcu_thread_prepare(void)
156	0	{
157	0	struct rcu_thread rt, cur;
158
159	0	rcu_assert_read_locked();
160
161	0	if (!rcu_active)
162	0	rcu_start();
163
164	0	cur = rcu_self();
165	0	assert(cur->depth);
166
167		/* new thread always starts with rcu_read_lock held at depth 1, and
168		* holding the same epoch as the parent (this makes it possible to
169		* use RCU for things passed into the thread through its arg)
170		*/
171	0	rt = XCALLOC(MTYPE_RCU_THREAD, sizeof(*rt));
172	0	rt->depth = 1;
173
174	0	seqlock_init(&rt->rcu);
175	0	seqlock_acquire(&rt->rcu, &cur->rcu);
176
177	0	rcu_threads_add_tail(&rcu_threads, rt);
178
179	0	return rt;
180	0	}
181
182		void rcu_thread_start(struct rcu_thread *rt)
183	0	{
184	0	pthread_setspecific(rcu_thread_key, rt);
185	0	}
186
187		void rcu_thread_unprepare(struct rcu_thread *rt)
188	0	{
189	0	if (rt == &rcu_thread_rcu)
190	0	return;
191
192	0	rt->depth = 1;
193	0	seqlock_acquire(&rt->rcu, &rcu_seq);
194
195	0	rcu_bump();
196	0	if (rt != &rcu_thread_main)
197		/* this free() happens after seqlock_release() below */
198	0	rcu_free_internal(MTYPE_RCU_THREAD, rt, rcu_head);
199
200	0	rcu_threads_del(&rcu_threads, rt);
201	0	seqlock_release(&rt->rcu);
202	0	}
203
204		static void rcu_thread_end(void *rtvoid)
205	0	{
206	0	struct rcu_thread *rt = rtvoid;
207	0	rcu_thread_unprepare(rt);
208	0	}
209
210		/*
211		* main RCU control aspects
212		*/
213
214		static void rcu_bump(void)
215	0	{
216	0	struct rcu_next *rn;
217
218	0	rn = XMALLOC(MTYPE_RCU_NEXT, sizeof(*rn));
219
220		/* note: each RCUA_NEXT item corresponds to exactly one seqno bump.
221		* This means we don't need to communicate which seqno is which
222		* RCUA_NEXT, since we really don't care.
223		*/
224
225		/*
226		* Important race condition: while rcu_heads_add_tail is executing,
227		* there is an intermediate point where the rcu_heads "last" pointer
228		* already points to rn->head_next, but rn->head_next isn't added to
229		* the list yet. That means any other "add_tail" calls append to this
230		* item, which isn't fully on the list yet. Freeze this thread at
231		* that point and look at another thread doing a rcu_bump. It adds
232		* these two items and then does a seqlock_bump. But the rcu_heads
233		* list is still "interrupted" and there's no RCUA_NEXT on the list
234		* yet (from either the frozen thread or the second thread). So
235		* rcu_main() might actually hit the end of the list at the
236		* "interrupt".
237		*
238		* This situation is prevented by requiring that rcu_read_lock is held
239		* for any calls to rcu_bump, since if we're holding the current RCU
240		* epoch, that means rcu_main can't be chewing on rcu_heads and hit
241		* that interruption point. Only by the time the thread has continued
242		* to rcu_read_unlock() - and therefore completed the add_tail - the
243		* RCU sweeper gobbles up the epoch and can be sure to find at least
244		* the RCUA_NEXT and RCUA_FREE items on rcu_heads.
245		*/
246	0	rn->head_next.action = &rcua_next;
247	0	rcu_heads_add_tail(&rcu_heads, &rn->head_next);
248
249		/* free rn that we allocated above.
250		*
251		* This is INTENTIONALLY not built into the RCUA_NEXT action. This
252		* ensures that after the action above is popped off the queue, there
253		* is still at least 1 item on the RCU queue. This means we never
254		* delete the last item, which is extremely important since it keeps
255		* the atomlist ->last pointer alive and well.
256		*
257		* If we were to "run dry" on the RCU queue, add_tail may run into the
258		* "last item is being deleted - start over" case, and then we may end
259		* up accessing old RCU queue items that are already free'd.
260		*/
261	0	rcu_free_internal(MTYPE_RCU_NEXT, rn, head_free);
262
263		/* Only allow the RCU sweeper to run after these 2 items are queued.
264		*
265		* If another thread enqueues some RCU action in the intermediate
266		* window here, nothing bad happens - the queued action is associated
267		* with a larger seq# than strictly necessary. Thus, it might get
268		* executed a bit later, but that's not a problem.
269		*
270		* If another thread acquires the read lock in this window, it holds
271		* the previous epoch, but its RCU queue actions will be in the next
272		* epoch. This isn't a problem either, just a tad inefficient.
273		*/
274	0	seqlock_bump(&rcu_seq);
275	0	}
276
277		static void rcu_bump_maybe(void)
278	0	{
279	0	seqlock_val_t dirty;
280
281	0	dirty = atomic_load_explicit(&rcu_dirty, memory_order_relaxed);
282		/* no problem if we race here and multiple threads bump rcu_seq;
283		* bumping too much causes no issues while not bumping enough will
284		* result in delayed cleanup
285		*/
286	0	if (dirty == seqlock_cur(&rcu_seq))
287	0	rcu_bump();
288	0	}
289
290		void rcu_read_lock(void)
291	0	{
292	0	struct rcu_thread *rt = rcu_self();
293
294	0	assert(rt);
295	0	if (rt->depth++ > 0)
296	0	return;
297
298	0	seqlock_acquire(&rt->rcu, &rcu_seq);
299		/* need to hold RCU for bump ... */
300	0	rcu_bump_maybe();
301		/* ... but no point in holding the old epoch if we just bumped */
302	0	seqlock_acquire(&rt->rcu, &rcu_seq);
303	0	}
304
305		void rcu_read_unlock(void)
306	0	{
307	0	struct rcu_thread *rt = rcu_self();
308
309	0	assert(rt && rt->depth);
310	0	if (--rt->depth > 0)
311	0	return;
312	0	rcu_bump_maybe();
313	0	seqlock_release(&rt->rcu);
314	0	}
315
316		void rcu_assert_read_locked(void)
317	0	{
318	0	struct rcu_thread *rt = rcu_self();
319	0	assert(rt && rt->depth && seqlock_held(&rt->rcu));
320	0	}
321
322		void rcu_assert_read_unlocked(void)
323	0	{
324	0	struct rcu_thread *rt = rcu_self();
325	0	assert(rt && !rt->depth && !seqlock_held(&rt->rcu));
326	0	}
327
328		/*
329		* RCU resource-release thread
330		*/
331
332		static void rcu_main(void arg);
333
334		static void rcu_start(void)
335	0	{
336		/* ensure we never handle signals on the RCU thread by blocking
337		* everything here (new thread inherits signal mask)
338		*/
339	0	sigset_t oldsigs, blocksigs;
340
341	0	sigfillset(&blocksigs);
342	0	pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs);
343
344	0	rcu_active = true;
345
346	0	assert(!pthread_create(&rcu_pthread, NULL, rcu_main, NULL));
347
348	0	pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
349
350	0	#ifdef HAVE_PTHREAD_SETNAME_NP
351	0	# ifdef GNU_LINUX
352	0	pthread_setname_np(rcu_pthread, "RCU sweeper");
353		# elif defined(__NetBSD__)
354		pthread_setname_np(rcu_pthread, "RCU sweeper", NULL);
355		# endif
356		#elif defined(HAVE_PTHREAD_SET_NAME_NP)
357		pthread_set_name_np(rcu_pthread, "RCU sweeper");
358		#endif
359	0	}
360
361		static void rcu_do(struct rcu_head *rh)
362	0	{
363	0	struct rcu_head_close *rhc;
364	0	void *p;
365
366	0	switch (rh->action->type) {
367	0	case RCUA_FREE:
368	0	p = (char *)rh - rh->action->u.free.offset;
369	0	if (rh->action->u.free.mt)
370	0	qfree(rh->action->u.free.mt, p);
371	0	else
372	0	free(p);
373	0	break;
374	0	case RCUA_CLOSE:
375	0	rhc = container_of(rh, struct rcu_head_close,
376	0	rcu_head);
377	0	close(rhc->fd);
378	0	break;
379	0	case RCUA_CALL:
380	0	p = (char *)rh - rh->action->u.call.offset;
381	0	rh->action->u.call.fptr(p);
382	0	break;
383
384	0	case RCUA_INVALID:
385	0	case RCUA_NEXT:
386	0	case RCUA_END:
387	0	default:
388	0	assert(0);
389	0	}
390	0	}
391
392		static void rcu_watchdog(struct rcu_thread *rt)
393	0	{
394		#if 0
395		/* future work: print a backtrace for the thread that's holding up
396		* RCU. The only (good) way of doing that is to send a signal to the
397		* other thread, save away the backtrace in the signal handler, and
398		* block here until the signal is done processing.
399		*
400		* Just haven't implemented that yet.
401		*/
402		fprintf(stderr, "RCU watchdog %p\n", rt);
403		#endif
404	0	}
405
406		static void rcu_main(void arg)
407	0	{
408	0	struct rcu_thread *rt;
409	0	struct rcu_head *rh = NULL;
410	0	bool end = false;
411	0	struct timespec maxwait;
412
413	0	seqlock_val_t rcuval = SEQLOCK_STARTVAL;
414
415	0	pthread_setspecific(rcu_thread_key, &rcu_thread_rcu);
416
417	0	while (!end) {
418	0	seqlock_wait(&rcu_seq, rcuval);
419
420		/* RCU watchdog timeout, TODO: configurable value */
421	0	clock_gettime(CLOCK_MONOTONIC, &maxwait);
422	0	maxwait.tv_nsec += 100 * 1000 * 1000;
423	0	if (maxwait.tv_nsec >= 1000000000) {
424	0	maxwait.tv_sec++;
425	0	maxwait.tv_nsec -= 1000000000;
426	0	}
427
428	0	frr_each (rcu_threads, &rcu_threads, rt)
429	0	if (!seqlock_timedwait(&rt->rcu, rcuval, &maxwait)) {
430	0	rcu_watchdog(rt);
431	0	seqlock_wait(&rt->rcu, rcuval);
432	0	}
433
434	0	while ((rh = rcu_heads_pop(&rcu_heads))) {
435	0	if (rh->action->type == RCUA_NEXT)
436	0	break;
437	0	else if (rh->action->type == RCUA_END)
438	0	end = true;
439	0	else
440	0	rcu_do(rh);
441	0	}
442
443	0	rcuval += SEQLOCK_INCR;
444	0	}
445
446		/* rcu_shutdown can only be called singlethreaded, and it does a
447		* pthread_join, so it should be impossible that anything ended up
448		* on the queue after RCUA_END
449		*/
450	0	#if 1
451	0	assert(!rcu_heads_first(&rcu_heads));
452		#else
453		while ((rh = rcu_heads_pop(&rcu_heads)))
454		if (rh->action->type >= RCUA_FREE)
455		rcu_do(rh);
456		#endif
457	0	return NULL;
458	0	}
459
460		void rcu_shutdown(void)
461	0	{
462	0	static struct rcu_head rcu_head_end;
463	0	struct rcu_thread *rt = rcu_self();
464	0	void *retval;
465
466	0	if (!rcu_active)
467	0	return;
468
469	0	rcu_assert_read_locked();
470	0	assert(rcu_threads_count(&rcu_threads) == 1);
471
472	0	rcu_enqueue(&rcu_head_end, &rcua_end);
473
474	0	rt->depth = 0;
475	0	seqlock_release(&rt->rcu);
476	0	seqlock_release(&rcu_seq);
477	0	rcu_active = false;
478
479		/* clearing rcu_active is before pthread_join in case we hang in
480		* pthread_join & get a SIGTERM or something - in that case, just
481		* ignore the maybe-still-running RCU thread
482		*/
483	0	if (pthread_join(rcu_pthread, &retval) == 0) {
484	0	seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL);
485	0	seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL);
486	0	rt->depth = 1;
487	0	}
488	0	}
489
490		/*
491		* RCU'd free functions
492		*/
493
494		void rcu_enqueue(struct rcu_head rh, const struct rcu_action action)
495	0	{
496		/* refer to rcu_bump() for why we need to hold RCU when adding items
497		* to rcu_heads
498		*/
499	0	rcu_assert_read_locked();
500
501	0	rh->action = action;
502
503	0	if (!rcu_active) {
504	0	rcu_do(rh);
505	0	return;
506	0	}
507	0	rcu_heads_add_tail(&rcu_heads, rh);
508	0	atomic_store_explicit(&rcu_dirty, seqlock_cur(&rcu_seq),
509	0	memory_order_relaxed);
510	0	}
511
512		void rcu_close(struct rcu_head_close *rhc, int fd)
513	0	{
514	0	rhc->fd = fd;
515	0	rcu_enqueue(&rhc->rcu_head, &rcua_close);
516	0	}