Coverage Report

Created: 2025-10-08 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/frr/lib/event.c
Line
Count
Source
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* Thread management routine
3
 * Copyright (C) 1998, 2000 Kunihiro Ishiguro <kunihiro@zebra.org>
4
 */
5
6
/* #define DEBUG */
7
8
#include <zebra.h>
9
#include <sys/resource.h>
10
11
#include "frrevent.h"
12
#include "memory.h"
13
#include "frrcu.h"
14
#include "log.h"
15
#include "hash.h"
16
#include "command.h"
17
#include "sigevent.h"
18
#include "network.h"
19
#include "jhash.h"
20
#include "frratomic.h"
21
#include "frr_pthread.h"
22
#include "lib_errors.h"
23
#include "libfrr_trace.h"
24
#include "libfrr.h"
25
26
8
DEFINE_MTYPE_STATIC(LIB, THREAD, "Thread");
27
8
DEFINE_MTYPE_STATIC(LIB, EVENT_MASTER, "Thread master");
28
8
DEFINE_MTYPE_STATIC(LIB, EVENT_POLL, "Thread Poll Info");
29
8
DEFINE_MTYPE_STATIC(LIB, EVENT_STATS, "Thread stats");
30
8
31
8
DECLARE_LIST(event_list, struct event, eventitem);
32
33
struct cancel_req {
34
  int flags;
35
  struct event *thread;
36
  void *eventobj;
37
  struct event **threadref;
38
};
39
40
/* Flags for task cancellation */
41
0
#define EVENT_CANCEL_FLAG_READY 0x01
42
43
static int event_timer_cmp(const struct event *a, const struct event *b)
44
0
{
45
0
  if (a->u.sands.tv_sec < b->u.sands.tv_sec)
46
0
    return -1;
47
0
  if (a->u.sands.tv_sec > b->u.sands.tv_sec)
48
0
    return 1;
49
0
  if (a->u.sands.tv_usec < b->u.sands.tv_usec)
50
0
    return -1;
51
0
  if (a->u.sands.tv_usec > b->u.sands.tv_usec)
52
0
    return 1;
53
0
  return 0;
54
0
}
55
56
0
DECLARE_HEAP(event_timer_list, struct event, timeritem, event_timer_cmp);
Unexecuted instantiation: event.c:event_timer_list_add
Unexecuted instantiation: event.c:event_timer_list_del
57
58
#if defined(__APPLE__)
59
#include <mach/mach.h>
60
#include <mach/mach_time.h>
61
#endif
62
63
#define AWAKEN(m)                                                              \
64
0
  do {                                                                   \
65
0
    const unsigned char wakebyte = 0x01;                           \
66
0
    write(m->io_pipe[1], &wakebyte, 1);                            \
67
0
  } while (0)
68
69
/* control variable for initializer */
70
static pthread_once_t init_once = PTHREAD_ONCE_INIT;
71
pthread_key_t thread_current;
72
73
static pthread_mutex_t masters_mtx = PTHREAD_MUTEX_INITIALIZER;
74
static struct list *masters;
75
76
static void thread_free(struct event_loop *master, struct event *thread);
77
78
#ifndef EXCLUDE_CPU_TIME
79
#define EXCLUDE_CPU_TIME 0
80
#endif
81
#ifndef CONSUMED_TIME_CHECK
82
#define CONSUMED_TIME_CHECK 0
83
#endif
84
85
bool cputime_enabled = !EXCLUDE_CPU_TIME;
86
unsigned long cputime_threshold = CONSUMED_TIME_CHECK;
87
unsigned long walltime_threshold = CONSUMED_TIME_CHECK;
88
89
/* CLI start ---------------------------------------------------------------- */
90
#include "lib/event_clippy.c"
91
92
static unsigned int cpu_record_hash_key(const struct cpu_event_history *a)
93
0
{
94
0
  int size = sizeof(a->func);
95
96
0
  return jhash(&a->func, size, 0);
97
0
}
98
99
static bool cpu_record_hash_cmp(const struct cpu_event_history *a,
100
        const struct cpu_event_history *b)
101
0
{
102
0
  return a->func == b->func;
103
0
}
104
105
static void *cpu_record_hash_alloc(struct cpu_event_history *a)
106
0
{
107
0
  struct cpu_event_history *new;
108
109
0
  new = XCALLOC(MTYPE_EVENT_STATS, sizeof(struct cpu_event_history));
110
0
  new->func = a->func;
111
0
  new->funcname = a->funcname;
112
0
  return new;
113
0
}
114
115
static void cpu_record_hash_free(void *a)
116
0
{
117
0
  struct cpu_event_history *hist = a;
118
119
0
  XFREE(MTYPE_EVENT_STATS, hist);
120
0
}
121
122
static void vty_out_cpu_event_history(struct vty *vty,
123
              struct cpu_event_history *a)
124
0
{
125
0
  vty_out(vty,
126
0
    "%5zu %10zu.%03zu %9zu %8zu %9zu %8zu %9zu %9zu %9zu %10zu",
127
0
    a->total_active, a->cpu.total / 1000, a->cpu.total % 1000,
128
0
    a->total_calls, (a->cpu.total / a->total_calls), a->cpu.max,
129
0
    (a->real.total / a->total_calls), a->real.max,
130
0
    a->total_cpu_warn, a->total_wall_warn, a->total_starv_warn);
131
0
  vty_out(vty, "  %c%c%c%c%c  %s\n",
132
0
    a->types & (1 << EVENT_READ) ? 'R' : ' ',
133
0
    a->types & (1 << EVENT_WRITE) ? 'W' : ' ',
134
0
    a->types & (1 << EVENT_TIMER) ? 'T' : ' ',
135
0
    a->types & (1 << EVENT_EVENT) ? 'E' : ' ',
136
0
    a->types & (1 << EVENT_EXECUTE) ? 'X' : ' ', a->funcname);
137
0
}
138
139
static void cpu_record_hash_print(struct hash_bucket *bucket, void *args[])
140
0
{
141
0
  struct cpu_event_history *totals = args[0];
142
0
  struct cpu_event_history copy;
143
0
  struct vty *vty = args[1];
144
0
  uint8_t *filter = args[2];
145
146
0
  struct cpu_event_history *a = bucket->data;
147
148
0
  copy.total_active =
149
0
    atomic_load_explicit(&a->total_active, memory_order_seq_cst);
150
0
  copy.total_calls =
151
0
    atomic_load_explicit(&a->total_calls, memory_order_seq_cst);
152
0
  copy.total_cpu_warn =
153
0
    atomic_load_explicit(&a->total_cpu_warn, memory_order_seq_cst);
154
0
  copy.total_wall_warn =
155
0
    atomic_load_explicit(&a->total_wall_warn, memory_order_seq_cst);
156
0
  copy.total_starv_warn = atomic_load_explicit(&a->total_starv_warn,
157
0
                 memory_order_seq_cst);
158
0
  copy.cpu.total =
159
0
    atomic_load_explicit(&a->cpu.total, memory_order_seq_cst);
160
0
  copy.cpu.max = atomic_load_explicit(&a->cpu.max, memory_order_seq_cst);
161
0
  copy.real.total =
162
0
    atomic_load_explicit(&a->real.total, memory_order_seq_cst);
163
0
  copy.real.max =
164
0
    atomic_load_explicit(&a->real.max, memory_order_seq_cst);
165
0
  copy.types = atomic_load_explicit(&a->types, memory_order_seq_cst);
166
0
  copy.funcname = a->funcname;
167
168
0
  if (!(copy.types & *filter))
169
0
    return;
170
171
0
  vty_out_cpu_event_history(vty, &copy);
172
0
  totals->total_active += copy.total_active;
173
0
  totals->total_calls += copy.total_calls;
174
0
  totals->total_cpu_warn += copy.total_cpu_warn;
175
0
  totals->total_wall_warn += copy.total_wall_warn;
176
0
  totals->total_starv_warn += copy.total_starv_warn;
177
0
  totals->real.total += copy.real.total;
178
0
  if (totals->real.max < copy.real.max)
179
0
    totals->real.max = copy.real.max;
180
0
  totals->cpu.total += copy.cpu.total;
181
0
  if (totals->cpu.max < copy.cpu.max)
182
0
    totals->cpu.max = copy.cpu.max;
183
0
}
184
185
static void cpu_record_print(struct vty *vty, uint8_t filter)
186
0
{
187
0
  struct cpu_event_history tmp;
188
0
  void *args[3] = {&tmp, vty, &filter};
189
0
  struct event_loop *m;
190
0
  struct listnode *ln;
191
192
0
  if (!cputime_enabled)
193
0
    vty_out(vty,
194
0
      "\n"
195
0
      "Collecting CPU time statistics is currently disabled.  Following statistics\n"
196
0
      "will be zero or may display data from when collection was enabled.  Use the\n"
197
0
      "  \"service cputime-stats\"  command to start collecting data.\n"
198
0
      "\nCounters and wallclock times are always maintained and should be accurate.\n");
199
200
0
  memset(&tmp, 0, sizeof(tmp));
201
0
  tmp.funcname = "TOTAL";
202
0
  tmp.types = filter;
203
204
0
  frr_with_mutex (&masters_mtx) {
205
0
    for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
206
0
      const char *name = m->name ? m->name : "main";
207
0
      char underline[strlen(name) + 1];
208
209
0
      memset(underline, '-', sizeof(underline));
210
0
      underline[sizeof(underline) - 1] = '\0';
211
212
0
      vty_out(vty, "\n");
213
0
      vty_out(vty, "Showing statistics for pthread %s\n",
214
0
        name);
215
0
      vty_out(vty, "-------------------------------%s\n",
216
0
        underline);
217
0
      vty_out(vty, "%30s %18s %18s\n", "",
218
0
        "CPU (user+system):", "Real (wall-clock):");
219
0
      vty_out(vty,
220
0
        "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
221
0
      vty_out(vty, " Avg uSec Max uSecs");
222
0
      vty_out(vty,
223
0
        "  CPU_Warn Wall_Warn Starv_Warn Type   Thread\n");
224
225
0
      if (m->cpu_record->count)
226
0
        hash_iterate(
227
0
          m->cpu_record,
228
0
          (void (*)(struct hash_bucket *,
229
0
              void *))cpu_record_hash_print,
230
0
          args);
231
0
      else
232
0
        vty_out(vty, "No data to display yet.\n");
233
234
0
      vty_out(vty, "\n");
235
0
    }
236
0
  }
237
238
0
  vty_out(vty, "\n");
239
0
  vty_out(vty, "Total thread statistics\n");
240
0
  vty_out(vty, "-------------------------\n");
241
0
  vty_out(vty, "%30s %18s %18s\n", "",
242
0
    "CPU (user+system):", "Real (wall-clock):");
243
0
  vty_out(vty, "Active   Runtime(ms)   Invoked Avg uSec Max uSecs");
244
0
  vty_out(vty, " Avg uSec Max uSecs  CPU_Warn Wall_Warn");
245
0
  vty_out(vty, "  Type  Thread\n");
246
247
0
  if (tmp.total_calls > 0)
248
0
    vty_out_cpu_event_history(vty, &tmp);
249
0
}
250
251
static void cpu_record_hash_clear(struct hash_bucket *bucket, void *args[])
252
0
{
253
0
  uint8_t *filter = args[0];
254
0
  struct hash *cpu_record = args[1];
255
256
0
  struct cpu_event_history *a = bucket->data;
257
258
0
  if (!(a->types & *filter))
259
0
    return;
260
261
0
  hash_release(cpu_record, bucket->data);
262
0
}
263
264
static void cpu_record_clear(uint8_t filter)
265
0
{
266
0
  uint8_t *tmp = &filter;
267
0
  struct event_loop *m;
268
0
  struct listnode *ln;
269
270
0
  frr_with_mutex (&masters_mtx) {
271
0
    for (ALL_LIST_ELEMENTS_RO(masters, ln, m)) {
272
0
      frr_with_mutex (&m->mtx) {
273
0
        void *args[2] = {tmp, m->cpu_record};
274
275
0
        hash_iterate(
276
0
          m->cpu_record,
277
0
          (void (*)(struct hash_bucket *,
278
0
              void *))cpu_record_hash_clear,
279
0
          args);
280
0
      }
281
0
    }
282
0
  }
283
0
}
284
285
static uint8_t parse_filter(const char *filterstr)
286
0
{
287
0
  int i = 0;
288
0
  int filter = 0;
289
290
0
  while (filterstr[i] != '\0') {
291
0
    switch (filterstr[i]) {
292
0
    case 'r':
293
0
    case 'R':
294
0
      filter |= (1 << EVENT_READ);
295
0
      break;
296
0
    case 'w':
297
0
    case 'W':
298
0
      filter |= (1 << EVENT_WRITE);
299
0
      break;
300
0
    case 't':
301
0
    case 'T':
302
0
      filter |= (1 << EVENT_TIMER);
303
0
      break;
304
0
    case 'e':
305
0
    case 'E':
306
0
      filter |= (1 << EVENT_EVENT);
307
0
      break;
308
0
    case 'x':
309
0
    case 'X':
310
0
      filter |= (1 << EVENT_EXECUTE);
311
0
      break;
312
0
    default:
313
0
      break;
314
0
    }
315
0
    ++i;
316
0
  }
317
0
  return filter;
318
0
}
319
320
DEFUN_NOSH (show_thread_cpu,
321
      show_thread_cpu_cmd,
322
      "show thread cpu [FILTER]",
323
      SHOW_STR
324
      "Thread information\n"
325
      "Thread CPU usage\n"
326
      "Display filter (rwtex)\n")
327
0
{
328
0
  uint8_t filter = (uint8_t)-1U;
329
0
  int idx = 0;
330
331
0
  if (argv_find(argv, argc, "FILTER", &idx)) {
332
0
    filter = parse_filter(argv[idx]->arg);
333
0
    if (!filter) {
334
0
      vty_out(vty,
335
0
        "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
336
0
        argv[idx]->arg);
337
0
      return CMD_WARNING;
338
0
    }
339
0
  }
340
341
0
  cpu_record_print(vty, filter);
342
0
  return CMD_SUCCESS;
343
0
}
344
345
DEFPY (service_cputime_stats,
346
       service_cputime_stats_cmd,
347
       "[no] service cputime-stats",
348
       NO_STR
349
       "Set up miscellaneous service\n"
350
       "Collect CPU usage statistics\n")
351
0
{
352
0
  cputime_enabled = !no;
353
0
  return CMD_SUCCESS;
354
0
}
355
356
DEFPY (service_cputime_warning,
357
       service_cputime_warning_cmd,
358
       "[no] service cputime-warning ![(1-4294967295)]",
359
       NO_STR
360
       "Set up miscellaneous service\n"
361
       "Warn for tasks exceeding CPU usage threshold\n"
362
       "Warning threshold in milliseconds\n")
363
0
{
364
0
  if (no)
365
0
    cputime_threshold = 0;
366
0
  else
367
0
    cputime_threshold = cputime_warning * 1000;
368
0
  return CMD_SUCCESS;
369
0
}
370
371
DEFPY (service_walltime_warning,
372
       service_walltime_warning_cmd,
373
       "[no] service walltime-warning ![(1-4294967295)]",
374
       NO_STR
375
       "Set up miscellaneous service\n"
376
       "Warn for tasks exceeding total wallclock threshold\n"
377
       "Warning threshold in milliseconds\n")
378
0
{
379
0
  if (no)
380
0
    walltime_threshold = 0;
381
0
  else
382
0
    walltime_threshold = walltime_warning * 1000;
383
0
  return CMD_SUCCESS;
384
0
}
385
386
static void show_thread_poll_helper(struct vty *vty, struct event_loop *m)
387
0
{
388
0
  const char *name = m->name ? m->name : "main";
389
0
  char underline[strlen(name) + 1];
390
0
  struct event *thread;
391
0
  uint32_t i;
392
393
0
  memset(underline, '-', sizeof(underline));
394
0
  underline[sizeof(underline) - 1] = '\0';
395
396
0
  vty_out(vty, "\nShowing poll FD's for %s\n", name);
397
0
  vty_out(vty, "----------------------%s\n", underline);
398
0
  vty_out(vty, "Count: %u/%d\n", (uint32_t)m->handler.pfdcount,
399
0
    m->fd_limit);
400
0
  for (i = 0; i < m->handler.pfdcount; i++) {
401
0
    vty_out(vty, "\t%6d fd:%6d events:%2d revents:%2d\t\t", i,
402
0
      m->handler.pfds[i].fd, m->handler.pfds[i].events,
403
0
      m->handler.pfds[i].revents);
404
405
0
    if (m->handler.pfds[i].events & POLLIN) {
406
0
      thread = m->read[m->handler.pfds[i].fd];
407
408
0
      if (!thread)
409
0
        vty_out(vty, "ERROR ");
410
0
      else
411
0
        vty_out(vty, "%s ", thread->xref->funcname);
412
0
    } else
413
0
      vty_out(vty, " ");
414
415
0
    if (m->handler.pfds[i].events & POLLOUT) {
416
0
      thread = m->write[m->handler.pfds[i].fd];
417
418
0
      if (!thread)
419
0
        vty_out(vty, "ERROR\n");
420
0
      else
421
0
        vty_out(vty, "%s\n", thread->xref->funcname);
422
0
    } else
423
0
      vty_out(vty, "\n");
424
0
  }
425
0
}
426
427
DEFUN_NOSH (show_thread_poll,
428
      show_thread_poll_cmd,
429
      "show thread poll",
430
      SHOW_STR
431
      "Thread information\n"
432
      "Show poll FD's and information\n")
433
0
{
434
0
  struct listnode *node;
435
0
  struct event_loop *m;
436
437
0
  frr_with_mutex (&masters_mtx) {
438
0
    for (ALL_LIST_ELEMENTS_RO(masters, node, m))
439
0
      show_thread_poll_helper(vty, m);
440
0
  }
441
442
0
  return CMD_SUCCESS;
443
0
}
444
445
446
DEFUN (clear_thread_cpu,
447
       clear_thread_cpu_cmd,
448
       "clear thread cpu [FILTER]",
449
       "Clear stored data in all pthreads\n"
450
       "Thread information\n"
451
       "Thread CPU usage\n"
452
       "Display filter (rwtexb)\n")
453
0
{
454
0
  uint8_t filter = (uint8_t)-1U;
455
0
  int idx = 0;
456
457
0
  if (argv_find(argv, argc, "FILTER", &idx)) {
458
0
    filter = parse_filter(argv[idx]->arg);
459
0
    if (!filter) {
460
0
      vty_out(vty,
461
0
        "Invalid filter \"%s\" specified; must contain at leastone of 'RWTEXB'\n",
462
0
        argv[idx]->arg);
463
0
      return CMD_WARNING;
464
0
    }
465
0
  }
466
467
0
  cpu_record_clear(filter);
468
0
  return CMD_SUCCESS;
469
0
}
470
471
static void show_thread_timers_helper(struct vty *vty, struct event_loop *m)
472
0
{
473
0
  const char *name = m->name ? m->name : "main";
474
0
  char underline[strlen(name) + 1];
475
0
  struct event *thread;
476
477
0
  memset(underline, '-', sizeof(underline));
478
0
  underline[sizeof(underline) - 1] = '\0';
479
480
0
  vty_out(vty, "\nShowing timers for %s\n", name);
481
0
  vty_out(vty, "-------------------%s\n", underline);
482
483
0
  frr_each (event_timer_list, &m->timer, thread) {
484
0
    vty_out(vty, "  %-50s%pTH\n", thread->hist->funcname, thread);
485
0
  }
486
0
}
487
488
DEFPY_NOSH (show_thread_timers,
489
      show_thread_timers_cmd,
490
      "show thread timers",
491
      SHOW_STR
492
      "Thread information\n"
493
      "Show all timers and how long they have in the system\n")
494
0
{
495
0
  struct listnode *node;
496
0
  struct event_loop *m;
497
498
0
  frr_with_mutex (&masters_mtx) {
499
0
    for (ALL_LIST_ELEMENTS_RO(masters, node, m))
500
0
      show_thread_timers_helper(vty, m);
501
0
  }
502
503
0
  return CMD_SUCCESS;
504
0
}
505
506
void event_cmd_init(void)
507
4
{
508
4
  install_element(VIEW_NODE, &show_thread_cpu_cmd);
509
4
  install_element(VIEW_NODE, &show_thread_poll_cmd);
510
4
  install_element(ENABLE_NODE, &clear_thread_cpu_cmd);
511
512
4
  install_element(CONFIG_NODE, &service_cputime_stats_cmd);
513
4
  install_element(CONFIG_NODE, &service_cputime_warning_cmd);
514
4
  install_element(CONFIG_NODE, &service_walltime_warning_cmd);
515
516
4
  install_element(VIEW_NODE, &show_thread_timers_cmd);
517
4
}
518
/* CLI end ------------------------------------------------------------------ */
519
520
521
static void cancelreq_del(void *cr)
522
0
{
523
0
  XFREE(MTYPE_TMP, cr);
524
0
}
525
526
/* initializer, only ever called once */
527
static void initializer(void)
528
4
{
529
4
  pthread_key_create(&thread_current, NULL);
530
4
}
531
532
struct event_loop *event_master_create(const char *name)
533
4
{
534
4
  struct event_loop *rv;
535
4
  struct rlimit limit;
536
537
4
  pthread_once(&init_once, &initializer);
538
539
4
  rv = XCALLOC(MTYPE_EVENT_MASTER, sizeof(struct event_loop));
540
541
  /* Initialize master mutex */
542
4
  pthread_mutex_init(&rv->mtx, NULL);
543
4
  pthread_cond_init(&rv->cancel_cond, NULL);
544
545
  /* Set name */
546
4
  name = name ? name : "default";
547
4
  rv->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
548
549
  /* Initialize I/O task data structures */
550
551
  /* Use configured limit if present, ulimit otherwise. */
552
4
  rv->fd_limit = frr_get_fd_limit();
553
4
  if (rv->fd_limit == 0) {
554
4
    getrlimit(RLIMIT_NOFILE, &limit);
555
4
    rv->fd_limit = (int)limit.rlim_cur;
556
4
  }
557
558
4
  rv->read = XCALLOC(MTYPE_EVENT_POLL,
559
4
         sizeof(struct event *) * rv->fd_limit);
560
561
4
  rv->write = XCALLOC(MTYPE_EVENT_POLL,
562
4
          sizeof(struct event *) * rv->fd_limit);
563
564
4
  char tmhashname[strlen(name) + 32];
565
566
4
  snprintf(tmhashname, sizeof(tmhashname), "%s - threadmaster event hash",
567
4
     name);
568
4
  rv->cpu_record = hash_create_size(
569
4
    8, (unsigned int (*)(const void *))cpu_record_hash_key,
570
4
    (bool (*)(const void *, const void *))cpu_record_hash_cmp,
571
4
    tmhashname);
572
573
4
  event_list_init(&rv->event);
574
4
  event_list_init(&rv->ready);
575
4
  event_list_init(&rv->unuse);
576
4
  event_timer_list_init(&rv->timer);
577
578
  /* Initialize event_fetch() settings */
579
4
  rv->spin = true;
580
4
  rv->handle_signals = true;
581
582
  /* Set pthread owner, should be updated by actual owner */
583
4
  rv->owner = pthread_self();
584
4
  rv->cancel_req = list_new();
585
4
  rv->cancel_req->del = cancelreq_del;
586
4
  rv->canceled = true;
587
588
  /* Initialize pipe poker */
589
4
  pipe(rv->io_pipe);
590
4
  set_nonblocking(rv->io_pipe[0]);
591
4
  set_nonblocking(rv->io_pipe[1]);
592
593
  /* Initialize data structures for poll() */
594
4
  rv->handler.pfdsize = rv->fd_limit;
595
4
  rv->handler.pfdcount = 0;
596
4
  rv->handler.pfds = XCALLOC(MTYPE_EVENT_MASTER,
597
4
           sizeof(struct pollfd) * rv->handler.pfdsize);
598
4
  rv->handler.copy = XCALLOC(MTYPE_EVENT_MASTER,
599
4
           sizeof(struct pollfd) * rv->handler.pfdsize);
600
601
  /* add to list of threadmasters */
602
4
  frr_with_mutex (&masters_mtx) {
603
4
    if (!masters)
604
4
      masters = list_new();
605
606
4
    listnode_add(masters, rv);
607
4
  }
608
609
4
  return rv;
610
4
}
611
612
void event_master_set_name(struct event_loop *master, const char *name)
613
0
{
614
0
  frr_with_mutex (&master->mtx) {
615
0
    XFREE(MTYPE_EVENT_MASTER, master->name);
616
0
    master->name = XSTRDUP(MTYPE_EVENT_MASTER, name);
617
0
  }
618
0
}
619
620
0
#define EVENT_UNUSED_DEPTH 10
621
622
/* Move thread to unuse list. */
623
static void thread_add_unuse(struct event_loop *m, struct event *thread)
624
0
{
625
0
  pthread_mutex_t mtxc = thread->mtx;
626
627
0
  assert(m != NULL && thread != NULL);
628
629
0
  thread->hist->total_active--;
630
0
  memset(thread, 0, sizeof(struct event));
631
0
  thread->type = EVENT_UNUSED;
632
633
  /* Restore the thread mutex context. */
634
0
  thread->mtx = mtxc;
635
636
0
  if (event_list_count(&m->unuse) < EVENT_UNUSED_DEPTH) {
637
0
    event_list_add_tail(&m->unuse, thread);
638
0
    return;
639
0
  }
640
641
0
  thread_free(m, thread);
642
0
}
643
644
/* Free all unused thread. */
645
static void thread_list_free(struct event_loop *m, struct event_list_head *list)
646
0
{
647
0
  struct event *t;
648
649
0
  while ((t = event_list_pop(list)))
650
0
    thread_free(m, t);
651
0
}
652
653
static void thread_array_free(struct event_loop *m, struct event **thread_array)
654
0
{
655
0
  struct event *t;
656
0
  int index;
657
658
0
  for (index = 0; index < m->fd_limit; ++index) {
659
0
    t = thread_array[index];
660
0
    if (t) {
661
0
      thread_array[index] = NULL;
662
0
      thread_free(m, t);
663
0
    }
664
0
  }
665
0
  XFREE(MTYPE_EVENT_POLL, thread_array);
666
0
}
667
668
/*
669
 * event_master_free_unused
670
 *
671
 * As threads are finished with they are put on the
672
 * unuse list for later reuse.
673
 * If we are shutting down, Free up unused threads
674
 * So we can see if we forget to shut anything off
675
 */
676
void event_master_free_unused(struct event_loop *m)
677
0
{
678
0
  frr_with_mutex (&m->mtx) {
679
0
    struct event *t;
680
681
0
    while ((t = event_list_pop(&m->unuse)))
682
0
      thread_free(m, t);
683
0
  }
684
0
}
685
686
/* Stop thread scheduler. */
687
void event_master_free(struct event_loop *m)
688
0
{
689
0
  struct event *t;
690
691
0
  frr_with_mutex (&masters_mtx) {
692
0
    listnode_delete(masters, m);
693
0
    if (masters->count == 0)
694
0
      list_delete(&masters);
695
0
  }
696
697
0
  thread_array_free(m, m->read);
698
0
  thread_array_free(m, m->write);
699
0
  while ((t = event_timer_list_pop(&m->timer)))
700
0
    thread_free(m, t);
701
0
  thread_list_free(m, &m->event);
702
0
  thread_list_free(m, &m->ready);
703
0
  thread_list_free(m, &m->unuse);
704
0
  pthread_mutex_destroy(&m->mtx);
705
0
  pthread_cond_destroy(&m->cancel_cond);
706
0
  close(m->io_pipe[0]);
707
0
  close(m->io_pipe[1]);
708
0
  list_delete(&m->cancel_req);
709
0
  m->cancel_req = NULL;
710
711
0
  hash_clean_and_free(&m->cpu_record, cpu_record_hash_free);
712
713
0
  XFREE(MTYPE_EVENT_MASTER, m->name);
714
0
  XFREE(MTYPE_EVENT_MASTER, m->handler.pfds);
715
0
  XFREE(MTYPE_EVENT_MASTER, m->handler.copy);
716
0
  XFREE(MTYPE_EVENT_MASTER, m);
717
0
}
718
719
/* Return remain time in milliseconds. */
720
unsigned long event_timer_remain_msec(struct event *thread)
721
0
{
722
0
  int64_t remain;
723
724
0
  if (!event_is_scheduled(thread))
725
0
    return 0;
726
727
0
  frr_with_mutex (&thread->mtx) {
728
0
    remain = monotime_until(&thread->u.sands, NULL) / 1000LL;
729
0
  }
730
731
0
  return remain < 0 ? 0 : remain;
732
0
}
733
734
/* Return remain time in seconds. */
735
unsigned long event_timer_remain_second(struct event *thread)
736
0
{
737
0
  return event_timer_remain_msec(thread) / 1000LL;
738
0
}
739
740
struct timeval event_timer_remain(struct event *thread)
741
0
{
742
0
  struct timeval remain;
743
744
0
  frr_with_mutex (&thread->mtx) {
745
0
    monotime_until(&thread->u.sands, &remain);
746
0
  }
747
0
  return remain;
748
0
}
749
750
static int time_hhmmss(char *buf, int buf_size, long sec)
751
0
{
752
0
  long hh;
753
0
  long mm;
754
0
  int wr;
755
756
0
  assert(buf_size >= 8);
757
758
0
  hh = sec / 3600;
759
0
  sec %= 3600;
760
0
  mm = sec / 60;
761
0
  sec %= 60;
762
763
0
  wr = snprintf(buf, buf_size, "%02ld:%02ld:%02ld", hh, mm, sec);
764
765
0
  return wr != 8;
766
0
}
767
768
char *event_timer_to_hhmmss(char *buf, int buf_size, struct event *t_timer)
769
0
{
770
0
  if (t_timer)
771
0
    time_hhmmss(buf, buf_size, event_timer_remain_second(t_timer));
772
0
  else
773
0
    snprintf(buf, buf_size, "--:--:--");
774
775
0
  return buf;
776
0
}
777
778
/* Get new thread.  */
779
static struct event *thread_get(struct event_loop *m, uint8_t type,
780
        void (*func)(struct event *), void *arg,
781
        const struct xref_eventsched *xref)
782
0
{
783
0
  struct event *thread = event_list_pop(&m->unuse);
784
0
  struct cpu_event_history tmp;
785
786
0
  if (!thread) {
787
0
    thread = XCALLOC(MTYPE_THREAD, sizeof(struct event));
788
    /* mutex only needs to be initialized at struct creation. */
789
0
    pthread_mutex_init(&thread->mtx, NULL);
790
0
    m->alloc++;
791
0
  }
792
793
0
  thread->type = type;
794
0
  thread->add_type = type;
795
0
  thread->master = m;
796
0
  thread->arg = arg;
797
0
  thread->yield = EVENT_YIELD_TIME_SLOT; /* default */
798
0
  thread->ref = NULL;
799
0
  thread->ignore_timer_late = false;
800
801
  /*
802
   * So if the passed in funcname is not what we have
803
   * stored that means the thread->hist needs to be
804
   * updated.  We keep the last one around in unused
805
   * under the assumption that we are probably
806
   * going to immediately allocate the same
807
   * type of thread.
808
   * This hopefully saves us some serious
809
   * hash_get lookups.
810
   */
811
0
  if ((thread->xref && thread->xref->funcname != xref->funcname)
812
0
      || thread->func != func) {
813
0
    tmp.func = func;
814
0
    tmp.funcname = xref->funcname;
815
0
    thread->hist =
816
0
      hash_get(m->cpu_record, &tmp,
817
0
         (void *(*)(void *))cpu_record_hash_alloc);
818
0
  }
819
0
  thread->hist->total_active++;
820
0
  thread->func = func;
821
0
  thread->xref = xref;
822
823
0
  return thread;
824
0
}
825
826
static void thread_free(struct event_loop *master, struct event *thread)
827
0
{
828
  /* Update statistics. */
829
0
  assert(master->alloc > 0);
830
0
  master->alloc--;
831
832
  /* Free allocated resources. */
833
0
  pthread_mutex_destroy(&thread->mtx);
834
0
  XFREE(MTYPE_THREAD, thread);
835
0
}
836
837
static int fd_poll(struct event_loop *m, const struct timeval *timer_wait,
838
       bool *eintr_p)
839
0
{
840
0
  sigset_t origsigs;
841
0
  unsigned char trash[64];
842
0
  nfds_t count = m->handler.copycount;
843
844
  /*
845
   * If timer_wait is null here, that means poll() should block
846
   * indefinitely, unless the event_master has overridden it by setting
847
   * ->selectpoll_timeout.
848
   *
849
   * If the value is positive, it specifies the maximum number of
850
   * milliseconds to wait. If the timeout is -1, it specifies that
851
   * we should never wait and always return immediately even if no
852
   * event is detected. If the value is zero, the behavior is default.
853
   */
854
0
  int timeout = -1;
855
856
  /* number of file descriptors with events */
857
0
  int num;
858
859
0
  if (timer_wait != NULL && m->selectpoll_timeout == 0) {
860
    /* use the default value */
861
0
    timeout = (timer_wait->tv_sec * 1000)
862
0
        + (timer_wait->tv_usec / 1000);
863
0
  } else if (m->selectpoll_timeout > 0) {
864
    /* use the user's timeout */
865
0
    timeout = m->selectpoll_timeout;
866
0
  } else if (m->selectpoll_timeout < 0) {
867
    /* effect a poll (return immediately) */
868
0
    timeout = 0;
869
0
  }
870
871
0
  zlog_tls_buffer_flush();
872
0
  rcu_read_unlock();
873
0
  rcu_assert_read_unlocked();
874
875
  /* add poll pipe poker */
876
0
  assert(count + 1 < m->handler.pfdsize);
877
0
  m->handler.copy[count].fd = m->io_pipe[0];
878
0
  m->handler.copy[count].events = POLLIN;
879
0
  m->handler.copy[count].revents = 0x00;
880
881
  /* We need to deal with a signal-handling race here: we
882
   * don't want to miss a crucial signal, such as SIGTERM or SIGINT,
883
   * that may arrive just before we enter poll(). We will block the
884
   * key signals, then check whether any have arrived - if so, we return
885
   * before calling poll(). If not, we'll re-enable the signals
886
   * in the ppoll() call.
887
   */
888
889
0
  sigemptyset(&origsigs);
890
0
  if (m->handle_signals) {
891
    /* Main pthread that handles the app signals */
892
0
    if (frr_sigevent_check(&origsigs)) {
893
      /* Signal to process - restore signal mask and return */
894
0
      pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
895
0
      num = -1;
896
0
      *eintr_p = true;
897
0
      goto done;
898
0
    }
899
0
  } else {
900
    /* Don't make any changes for the non-main pthreads */
901
0
    pthread_sigmask(SIG_SETMASK, NULL, &origsigs);
902
0
  }
903
904
0
#if defined(HAVE_PPOLL)
905
0
  struct timespec ts, *tsp;
906
907
0
  if (timeout >= 0) {
908
0
    ts.tv_sec = timeout / 1000;
909
0
    ts.tv_nsec = (timeout % 1000) * 1000000;
910
0
    tsp = &ts;
911
0
  } else
912
0
    tsp = NULL;
913
914
0
  num = ppoll(m->handler.copy, count + 1, tsp, &origsigs);
915
0
  pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
916
#else
917
  /* Not ideal - there is a race after we restore the signal mask */
918
  pthread_sigmask(SIG_SETMASK, &origsigs, NULL);
919
  num = poll(m->handler.copy, count + 1, timeout);
920
#endif
921
922
0
done:
923
924
0
  if (num < 0 && errno == EINTR)
925
0
    *eintr_p = true;
926
927
0
  if (num > 0 && m->handler.copy[count].revents != 0 && num--)
928
0
    while (read(m->io_pipe[0], &trash, sizeof(trash)) > 0)
929
0
      ;
930
931
0
  rcu_read_lock();
932
933
0
  return num;
934
0
}
935
936
/* Add new read thread. */
937
void _event_add_read_write(const struct xref_eventsched *xref,
938
         struct event_loop *m, void (*func)(struct event *),
939
         void *arg, int fd, struct event **t_ptr)
940
0
{
941
0
  int dir = xref->event_type;
942
0
  struct event *thread = NULL;
943
0
  struct event **thread_array;
944
945
0
  if (dir == EVENT_READ)
946
0
    frrtrace(9, frr_libfrr, schedule_read, m,
947
0
       xref->funcname, xref->xref.file, xref->xref.line,
948
0
       t_ptr, fd, 0, arg, 0);
949
0
  else
950
0
    frrtrace(9, frr_libfrr, schedule_write, m,
951
0
       xref->funcname, xref->xref.file, xref->xref.line,
952
0
       t_ptr, fd, 0, arg, 0);
953
954
0
  assert(fd >= 0);
955
0
  if (fd >= m->fd_limit)
956
0
    assert(!"Number of FD's open is greater than FRR currently configured to handle, aborting");
957
958
0
  frr_with_mutex (&m->mtx) {
959
    /* Thread is already scheduled; don't reschedule */
960
0
    if (t_ptr && *t_ptr)
961
0
      break;
962
963
    /* default to a new pollfd */
964
0
    nfds_t queuepos = m->handler.pfdcount;
965
966
0
    if (dir == EVENT_READ)
967
0
      thread_array = m->read;
968
0
    else
969
0
      thread_array = m->write;
970
971
    /*
972
     * if we already have a pollfd for our file descriptor, find and
973
     * use it
974
     */
975
0
    for (nfds_t i = 0; i < m->handler.pfdcount; i++)
976
0
      if (m->handler.pfds[i].fd == fd) {
977
0
        queuepos = i;
978
979
#ifdef DEV_BUILD
980
        /*
981
         * What happens if we have a thread already
982
         * created for this event?
983
         */
984
        if (thread_array[fd])
985
          assert(!"Thread already scheduled for file descriptor");
986
#endif
987
0
        break;
988
0
      }
989
990
    /* make sure we have room for this fd + pipe poker fd */
991
0
    assert(queuepos + 1 < m->handler.pfdsize);
992
993
0
    thread = thread_get(m, dir, func, arg, xref);
994
995
0
    m->handler.pfds[queuepos].fd = fd;
996
0
    m->handler.pfds[queuepos].events |=
997
0
      (dir == EVENT_READ ? POLLIN : POLLOUT);
998
999
0
    if (queuepos == m->handler.pfdcount)
1000
0
      m->handler.pfdcount++;
1001
1002
0
    if (thread) {
1003
0
      frr_with_mutex (&thread->mtx) {
1004
0
        thread->u.fd = fd;
1005
0
        thread_array[thread->u.fd] = thread;
1006
0
      }
1007
1008
0
      if (t_ptr) {
1009
0
        *t_ptr = thread;
1010
0
        thread->ref = t_ptr;
1011
0
      }
1012
0
    }
1013
1014
0
    AWAKEN(m);
1015
0
  }
1016
0
}
1017
1018
static void _event_add_timer_timeval(const struct xref_eventsched *xref,
1019
             struct event_loop *m,
1020
             void (*func)(struct event *), void *arg,
1021
             struct timeval *time_relative,
1022
             struct event **t_ptr)
1023
0
{
1024
0
  struct event *thread;
1025
0
  struct timeval t;
1026
1027
0
  assert(m != NULL);
1028
1029
0
  assert(time_relative);
1030
1031
0
  frrtrace(9, frr_libfrr, schedule_timer, m,
1032
0
     xref->funcname, xref->xref.file, xref->xref.line,
1033
0
     t_ptr, 0, 0, arg, (long)time_relative->tv_sec);
1034
1035
  /* Compute expiration/deadline time. */
1036
0
  monotime(&t);
1037
0
  timeradd(&t, time_relative, &t);
1038
1039
0
  frr_with_mutex (&m->mtx) {
1040
0
    if (t_ptr && *t_ptr)
1041
      /* thread is already scheduled; don't reschedule */
1042
0
      return;
1043
1044
0
    thread = thread_get(m, EVENT_TIMER, func, arg, xref);
1045
1046
0
    frr_with_mutex (&thread->mtx) {
1047
0
      thread->u.sands = t;
1048
0
      event_timer_list_add(&m->timer, thread);
1049
0
      if (t_ptr) {
1050
0
        *t_ptr = thread;
1051
0
        thread->ref = t_ptr;
1052
0
      }
1053
0
    }
1054
1055
    /* The timer list is sorted - if this new timer
1056
     * might change the time we'll wait for, give the pthread
1057
     * a chance to re-compute.
1058
     */
1059
0
    if (event_timer_list_first(&m->timer) == thread)
1060
0
      AWAKEN(m);
1061
0
  }
1062
0
#define ONEYEAR2SEC (60 * 60 * 24 * 365)
1063
0
  if (time_relative->tv_sec > ONEYEAR2SEC)
1064
0
    flog_err(
1065
0
      EC_LIB_TIMER_TOO_LONG,
1066
0
      "Timer: %pTHD is created with an expiration that is greater than 1 year",
1067
0
      thread);
1068
0
}
1069
1070
1071
/* Add timer event thread. */
1072
void _event_add_timer(const struct xref_eventsched *xref, struct event_loop *m,
1073
          void (*func)(struct event *), void *arg, long timer,
1074
          struct event **t_ptr)
1075
0
{
1076
0
  struct timeval trel;
1077
1078
0
  assert(m != NULL);
1079
1080
0
  trel.tv_sec = timer;
1081
0
  trel.tv_usec = 0;
1082
1083
0
  _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1084
0
}
1085
1086
/* Add timer event thread with "millisecond" resolution */
1087
void _event_add_timer_msec(const struct xref_eventsched *xref,
1088
         struct event_loop *m, void (*func)(struct event *),
1089
         void *arg, long timer, struct event **t_ptr)
1090
0
{
1091
0
  struct timeval trel;
1092
1093
0
  assert(m != NULL);
1094
1095
0
  trel.tv_sec = timer / 1000;
1096
0
  trel.tv_usec = 1000 * (timer % 1000);
1097
1098
0
  _event_add_timer_timeval(xref, m, func, arg, &trel, t_ptr);
1099
0
}
1100
1101
/* Add timer event thread with "timeval" resolution */
1102
void _event_add_timer_tv(const struct xref_eventsched *xref,
1103
       struct event_loop *m, void (*func)(struct event *),
1104
       void *arg, struct timeval *tv, struct event **t_ptr)
1105
0
{
1106
0
  _event_add_timer_timeval(xref, m, func, arg, tv, t_ptr);
1107
0
}
1108
1109
/* Add simple event thread. */
1110
void _event_add_event(const struct xref_eventsched *xref, struct event_loop *m,
1111
          void (*func)(struct event *), void *arg, int val,
1112
          struct event **t_ptr)
1113
0
{
1114
0
  struct event *thread = NULL;
1115
1116
0
  frrtrace(9, frr_libfrr, schedule_event, m,
1117
0
     xref->funcname, xref->xref.file, xref->xref.line,
1118
0
     t_ptr, 0, val, arg, 0);
1119
1120
0
  assert(m != NULL);
1121
1122
0
  frr_with_mutex (&m->mtx) {
1123
0
    if (t_ptr && *t_ptr)
1124
      /* thread is already scheduled; don't reschedule */
1125
0
      break;
1126
1127
0
    thread = thread_get(m, EVENT_EVENT, func, arg, xref);
1128
0
    frr_with_mutex (&thread->mtx) {
1129
0
      thread->u.val = val;
1130
0
      event_list_add_tail(&m->event, thread);
1131
0
    }
1132
1133
0
    if (t_ptr) {
1134
0
      *t_ptr = thread;
1135
0
      thread->ref = t_ptr;
1136
0
    }
1137
1138
0
    AWAKEN(m);
1139
0
  }
1140
0
}
1141
1142
/* Thread cancellation ------------------------------------------------------ */
1143
1144
/**
1145
 * NOT's out the .events field of pollfd corresponding to the given file
1146
 * descriptor. The event to be NOT'd is passed in the 'state' parameter.
1147
 *
1148
 * This needs to happen for both copies of pollfd's. See 'event_fetch'
1149
 * implementation for details.
1150
 *
1151
 * @param master
1152
 * @param fd
1153
 * @param state the event to cancel. One or more (OR'd together) of the
1154
 * following:
1155
 *   - POLLIN
1156
 *   - POLLOUT
1157
 */
1158
static void event_cancel_rw(struct event_loop *master, int fd, short state,
1159
          int idx_hint)
1160
0
{
1161
0
  bool found = false;
1162
1163
  /* find the index of corresponding pollfd */
1164
0
  nfds_t i;
1165
1166
  /* Cancel POLLHUP too just in case some bozo set it */
1167
0
  state |= POLLHUP;
1168
1169
  /* Some callers know the index of the pfd already */
1170
0
  if (idx_hint >= 0) {
1171
0
    i = idx_hint;
1172
0
    found = true;
1173
0
  } else {
1174
    /* Have to look for the fd in the pfd array */
1175
0
    for (i = 0; i < master->handler.pfdcount; i++)
1176
0
      if (master->handler.pfds[i].fd == fd) {
1177
0
        found = true;
1178
0
        break;
1179
0
      }
1180
0
  }
1181
1182
0
  if (!found) {
1183
0
    zlog_debug(
1184
0
      "[!] Received cancellation request for nonexistent rw job");
1185
0
    zlog_debug("[!] threadmaster: %s | fd: %d",
1186
0
         master->name ? master->name : "", fd);
1187
0
    return;
1188
0
  }
1189
1190
  /* NOT out event. */
1191
0
  master->handler.pfds[i].events &= ~(state);
1192
1193
  /* If all events are canceled, delete / resize the pollfd array. */
1194
0
  if (master->handler.pfds[i].events == 0) {
1195
0
    memmove(master->handler.pfds + i, master->handler.pfds + i + 1,
1196
0
      (master->handler.pfdcount - i - 1)
1197
0
        * sizeof(struct pollfd));
1198
0
    master->handler.pfdcount--;
1199
0
    master->handler.pfds[master->handler.pfdcount].fd = 0;
1200
0
    master->handler.pfds[master->handler.pfdcount].events = 0;
1201
0
  }
1202
1203
  /*
1204
   * If we have the same pollfd in the copy, perform the same operations,
1205
   * otherwise return.
1206
   */
1207
0
  if (i >= master->handler.copycount)
1208
0
    return;
1209
1210
0
  master->handler.copy[i].events &= ~(state);
1211
1212
0
  if (master->handler.copy[i].events == 0) {
1213
0
    memmove(master->handler.copy + i, master->handler.copy + i + 1,
1214
0
      (master->handler.copycount - i - 1)
1215
0
        * sizeof(struct pollfd));
1216
0
    master->handler.copycount--;
1217
0
    master->handler.copy[master->handler.copycount].fd = 0;
1218
0
    master->handler.copy[master->handler.copycount].events = 0;
1219
0
  }
1220
0
}
1221
1222
/*
1223
 * Process task cancellation given a task argument: iterate through the
1224
 * various lists of tasks, looking for any that match the argument.
1225
 */
1226
static void cancel_arg_helper(struct event_loop *master,
1227
            const struct cancel_req *cr)
1228
0
{
1229
0
  struct event *t;
1230
0
  nfds_t i;
1231
0
  int fd;
1232
0
  struct pollfd *pfd;
1233
1234
  /* We're only processing arg-based cancellations here. */
1235
0
  if (cr->eventobj == NULL)
1236
0
    return;
1237
1238
  /* First process the ready lists. */
1239
0
  frr_each_safe (event_list, &master->event, t) {
1240
0
    if (t->arg != cr->eventobj)
1241
0
      continue;
1242
0
    event_list_del(&master->event, t);
1243
0
    if (t->ref)
1244
0
      *t->ref = NULL;
1245
0
    thread_add_unuse(master, t);
1246
0
  }
1247
1248
0
  frr_each_safe (event_list, &master->ready, t) {
1249
0
    if (t->arg != cr->eventobj)
1250
0
      continue;
1251
0
    event_list_del(&master->ready, t);
1252
0
    if (t->ref)
1253
0
      *t->ref = NULL;
1254
0
    thread_add_unuse(master, t);
1255
0
  }
1256
1257
  /* If requested, stop here and ignore io and timers */
1258
0
  if (CHECK_FLAG(cr->flags, EVENT_CANCEL_FLAG_READY))
1259
0
    return;
1260
1261
  /* Check the io tasks */
1262
0
  for (i = 0; i < master->handler.pfdcount;) {
1263
0
    pfd = master->handler.pfds + i;
1264
1265
0
    if (pfd->events & POLLIN)
1266
0
      t = master->read[pfd->fd];
1267
0
    else
1268
0
      t = master->write[pfd->fd];
1269
1270
0
    if (t && t->arg == cr->eventobj) {
1271
0
      fd = pfd->fd;
1272
1273
      /* Found a match to cancel: clean up fd arrays */
1274
0
      event_cancel_rw(master, pfd->fd, pfd->events, i);
1275
1276
      /* Clean up thread arrays */
1277
0
      master->read[fd] = NULL;
1278
0
      master->write[fd] = NULL;
1279
1280
      /* Clear caller's ref */
1281
0
      if (t->ref)
1282
0
        *t->ref = NULL;
1283
1284
0
      thread_add_unuse(master, t);
1285
1286
      /* Don't increment 'i' since the cancellation will have
1287
       * removed the entry from the pfd array
1288
       */
1289
0
    } else
1290
0
      i++;
1291
0
  }
1292
1293
  /* Check the timer tasks */
1294
0
  t = event_timer_list_first(&master->timer);
1295
0
  while (t) {
1296
0
    struct event *t_next;
1297
1298
0
    t_next = event_timer_list_next(&master->timer, t);
1299
1300
0
    if (t->arg == cr->eventobj) {
1301
0
      event_timer_list_del(&master->timer, t);
1302
0
      if (t->ref)
1303
0
        *t->ref = NULL;
1304
0
      thread_add_unuse(master, t);
1305
0
    }
1306
1307
0
    t = t_next;
1308
0
  }
1309
0
}
1310
1311
/**
1312
 * Process cancellation requests.
1313
 *
1314
 * This may only be run from the pthread which owns the event_master.
1315
 *
1316
 * @param master the thread master to process
1317
 * @REQUIRE master->mtx
1318
 */
1319
static void do_event_cancel(struct event_loop *master)
1320
0
{
1321
0
  struct event_list_head *list = NULL;
1322
0
  struct event **thread_array = NULL;
1323
0
  struct event *thread;
1324
0
  struct cancel_req *cr;
1325
0
  struct listnode *ln;
1326
1327
0
  for (ALL_LIST_ELEMENTS_RO(master->cancel_req, ln, cr)) {
1328
    /*
1329
     * If this is an event object cancellation, search
1330
     * through task lists deleting any tasks which have the
1331
     * specified argument - use this handy helper function.
1332
     */
1333
0
    if (cr->eventobj) {
1334
0
      cancel_arg_helper(master, cr);
1335
0
      continue;
1336
0
    }
1337
1338
    /*
1339
     * The pointer varies depending on whether the cancellation
1340
     * request was made asynchronously or not. If it was, we
1341
     * need to check whether the thread even exists anymore
1342
     * before cancelling it.
1343
     */
1344
0
    thread = (cr->thread) ? cr->thread : *cr->threadref;
1345
1346
0
    if (!thread)
1347
0
      continue;
1348
1349
0
    list = NULL;
1350
0
    thread_array = NULL;
1351
1352
    /* Determine the appropriate queue to cancel the thread from */
1353
0
    switch (thread->type) {
1354
0
    case EVENT_READ:
1355
0
      event_cancel_rw(master, thread->u.fd, POLLIN, -1);
1356
0
      thread_array = master->read;
1357
0
      break;
1358
0
    case EVENT_WRITE:
1359
0
      event_cancel_rw(master, thread->u.fd, POLLOUT, -1);
1360
0
      thread_array = master->write;
1361
0
      break;
1362
0
    case EVENT_TIMER:
1363
0
      event_timer_list_del(&master->timer, thread);
1364
0
      break;
1365
0
    case EVENT_EVENT:
1366
0
      list = &master->event;
1367
0
      break;
1368
0
    case EVENT_READY:
1369
0
      list = &master->ready;
1370
0
      break;
1371
0
    case EVENT_UNUSED:
1372
0
    case EVENT_EXECUTE:
1373
0
      continue;
1374
0
      break;
1375
0
    }
1376
1377
0
    if (list)
1378
0
      event_list_del(list, thread);
1379
0
    else if (thread_array)
1380
0
      thread_array[thread->u.fd] = NULL;
1381
1382
0
    if (thread->ref)
1383
0
      *thread->ref = NULL;
1384
1385
0
    thread_add_unuse(thread->master, thread);
1386
0
  }
1387
1388
  /* Delete and free all cancellation requests */
1389
0
  if (master->cancel_req)
1390
0
    list_delete_all_node(master->cancel_req);
1391
1392
  /* Wake up any threads which may be blocked in event_cancel_async() */
1393
0
  master->canceled = true;
1394
0
  pthread_cond_broadcast(&master->cancel_cond);
1395
0
}
1396
1397
/*
1398
 * Helper function used for multiple flavors of arg-based cancellation.
1399
 */
1400
static void cancel_event_helper(struct event_loop *m, void *arg, int flags)
1401
0
{
1402
0
  struct cancel_req *cr;
1403
1404
0
  assert(m->owner == pthread_self());
1405
1406
  /* Only worth anything if caller supplies an arg. */
1407
0
  if (arg == NULL)
1408
0
    return;
1409
1410
0
  cr = XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1411
1412
0
  cr->flags = flags;
1413
1414
0
  frr_with_mutex (&m->mtx) {
1415
0
    cr->eventobj = arg;
1416
0
    listnode_add(m->cancel_req, cr);
1417
0
    do_event_cancel(m);
1418
0
  }
1419
0
}
1420
1421
/**
1422
 * Cancel any events which have the specified argument.
1423
 *
1424
 * MT-Unsafe
1425
 *
1426
 * @param m the event_master to cancel from
1427
 * @param arg the argument passed when creating the event
1428
 */
1429
void event_cancel_event(struct event_loop *master, void *arg)
1430
0
{
1431
0
  cancel_event_helper(master, arg, 0);
1432
0
}
1433
1434
/*
1435
 * Cancel ready tasks with an arg matching 'arg'
1436
 *
1437
 * MT-Unsafe
1438
 *
1439
 * @param m the event_master to cancel from
1440
 * @param arg the argument passed when creating the event
1441
 */
1442
void event_cancel_event_ready(struct event_loop *m, void *arg)
1443
0
{
1444
1445
  /* Only cancel ready/event tasks */
1446
0
  cancel_event_helper(m, arg, EVENT_CANCEL_FLAG_READY);
1447
0
}
1448
1449
/**
1450
 * Cancel a specific task.
1451
 *
1452
 * MT-Unsafe
1453
 *
1454
 * @param thread task to cancel
1455
 */
1456
void event_cancel(struct event **thread)
1457
0
{
1458
0
  struct event_loop *master;
1459
1460
0
  if (thread == NULL || *thread == NULL)
1461
0
    return;
1462
1463
0
  master = (*thread)->master;
1464
1465
0
  frrtrace(9, frr_libfrr, event_cancel, master, (*thread)->xref->funcname,
1466
0
     (*thread)->xref->xref.file, (*thread)->xref->xref.line, NULL,
1467
0
     (*thread)->u.fd, (*thread)->u.val, (*thread)->arg,
1468
0
     (*thread)->u.sands.tv_sec);
1469
1470
0
  assert(master->owner == pthread_self());
1471
1472
0
  frr_with_mutex (&master->mtx) {
1473
0
    struct cancel_req *cr =
1474
0
      XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1475
0
    cr->thread = *thread;
1476
0
    listnode_add(master->cancel_req, cr);
1477
0
    do_event_cancel(master);
1478
0
  }
1479
1480
0
  *thread = NULL;
1481
0
}
1482
1483
/**
1484
 * Asynchronous cancellation.
1485
 *
1486
 * Called with either a struct event ** or void * to an event argument,
1487
 * this function posts the correct cancellation request and blocks until it is
1488
 * serviced.
1489
 *
1490
 * If the thread is currently running, execution blocks until it completes.
1491
 *
1492
 * The last two parameters are mutually exclusive, i.e. if you pass one the
1493
 * other must be NULL.
1494
 *
1495
 * When the cancellation procedure executes on the target event_master, the
1496
 * thread * provided is checked for nullity. If it is null, the thread is
1497
 * assumed to no longer exist and the cancellation request is a no-op. Thus
1498
 * users of this API must pass a back-reference when scheduling the original
1499
 * task.
1500
 *
1501
 * MT-Safe
1502
 *
1503
 * @param master the thread master with the relevant event / task
1504
 * @param thread pointer to thread to cancel
1505
 * @param eventobj the event
1506
 */
1507
void event_cancel_async(struct event_loop *master, struct event **thread,
1508
      void *eventobj)
1509
0
{
1510
0
  assert(!(thread && eventobj) && (thread || eventobj));
1511
1512
0
  if (thread && *thread)
1513
0
    frrtrace(9, frr_libfrr, event_cancel_async, master,
1514
0
       (*thread)->xref->funcname, (*thread)->xref->xref.file,
1515
0
       (*thread)->xref->xref.line, NULL, (*thread)->u.fd,
1516
0
       (*thread)->u.val, (*thread)->arg,
1517
0
       (*thread)->u.sands.tv_sec);
1518
0
  else
1519
0
    frrtrace(9, frr_libfrr, event_cancel_async, master, NULL, NULL,
1520
0
       0, NULL, 0, 0, eventobj, 0);
1521
1522
0
  assert(master->owner != pthread_self());
1523
1524
0
  frr_with_mutex (&master->mtx) {
1525
0
    master->canceled = false;
1526
1527
0
    if (thread) {
1528
0
      struct cancel_req *cr =
1529
0
        XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1530
0
      cr->threadref = thread;
1531
0
      listnode_add(master->cancel_req, cr);
1532
0
    } else if (eventobj) {
1533
0
      struct cancel_req *cr =
1534
0
        XCALLOC(MTYPE_TMP, sizeof(struct cancel_req));
1535
0
      cr->eventobj = eventobj;
1536
0
      listnode_add(master->cancel_req, cr);
1537
0
    }
1538
0
    AWAKEN(master);
1539
1540
0
    while (!master->canceled)
1541
0
      pthread_cond_wait(&master->cancel_cond, &master->mtx);
1542
0
  }
1543
1544
0
  if (thread)
1545
0
    *thread = NULL;
1546
0
}
1547
/* ------------------------------------------------------------------------- */
1548
1549
static struct timeval *thread_timer_wait(struct event_timer_list_head *timers,
1550
           struct timeval *timer_val)
1551
0
{
1552
0
  if (!event_timer_list_count(timers))
1553
0
    return NULL;
1554
1555
0
  struct event *next_timer = event_timer_list_first(timers);
1556
1557
0
  monotime_until(&next_timer->u.sands, timer_val);
1558
0
  return timer_val;
1559
0
}
1560
1561
static struct event *thread_run(struct event_loop *m, struct event *thread,
1562
        struct event *fetch)
1563
0
{
1564
0
  *fetch = *thread;
1565
0
  thread_add_unuse(m, thread);
1566
0
  return fetch;
1567
0
}
1568
1569
static int thread_process_io_helper(struct event_loop *m, struct event *thread,
1570
            short state, short actual_state, int pos)
1571
0
{
1572
0
  struct event **thread_array;
1573
1574
  /*
1575
   * poll() clears the .events field, but the pollfd array we
1576
   * pass to poll() is a copy of the one used to schedule threads.
1577
   * We need to synchronize state between the two here by applying
1578
   * the same changes poll() made on the copy of the "real" pollfd
1579
   * array.
1580
   *
1581
   * This cleans up a possible infinite loop where we refuse
1582
   * to respond to a poll event but poll is insistent that
1583
   * we should.
1584
   */
1585
0
  m->handler.pfds[pos].events &= ~(state);
1586
1587
0
  if (!thread) {
1588
0
    if ((actual_state & (POLLHUP|POLLIN)) != POLLHUP)
1589
0
      flog_err(EC_LIB_NO_THREAD,
1590
0
         "Attempting to process an I/O event but for fd: %d(%d) no thread to handle this!",
1591
0
         m->handler.pfds[pos].fd, actual_state);
1592
0
    return 0;
1593
0
  }
1594
1595
0
  if (thread->type == EVENT_READ)
1596
0
    thread_array = m->read;
1597
0
  else
1598
0
    thread_array = m->write;
1599
1600
0
  thread_array[thread->u.fd] = NULL;
1601
0
  event_list_add_tail(&m->ready, thread);
1602
0
  thread->type = EVENT_READY;
1603
1604
0
  return 1;
1605
0
}
1606
1607
/**
1608
 * Process I/O events.
1609
 *
1610
 * Walks through file descriptor array looking for those pollfds whose .revents
1611
 * field has something interesting. Deletes any invalid file descriptors.
1612
 *
1613
 * @param m the thread master
1614
 * @param num the number of active file descriptors (return value of poll())
1615
 */
1616
static void thread_process_io(struct event_loop *m, unsigned int num)
1617
0
{
1618
0
  unsigned int ready = 0;
1619
0
  struct pollfd *pfds = m->handler.copy;
1620
1621
0
  for (nfds_t i = 0; i < m->handler.copycount && ready < num; ++i) {
1622
    /* no event for current fd? immediately continue */
1623
0
    if (pfds[i].revents == 0)
1624
0
      continue;
1625
1626
0
    ready++;
1627
1628
    /*
1629
     * Unless someone has called event_cancel from another
1630
     * pthread, the only thing that could have changed in
1631
     * m->handler.pfds while we were asleep is the .events
1632
     * field in a given pollfd. Barring event_cancel() that
1633
     * value should be a superset of the values we have in our
1634
     * copy, so there's no need to update it. Similarily,
1635
     * barring deletion, the fd should still be a valid index
1636
     * into the master's pfds.
1637
     *
1638
     * We are including POLLERR here to do a READ event
1639
     * this is because the read should fail and the
1640
     * read function should handle it appropriately
1641
     */
1642
0
    if (pfds[i].revents & (POLLIN | POLLHUP | POLLERR)) {
1643
0
      thread_process_io_helper(m, m->read[pfds[i].fd], POLLIN,
1644
0
             pfds[i].revents, i);
1645
0
    }
1646
0
    if (pfds[i].revents & POLLOUT)
1647
0
      thread_process_io_helper(m, m->write[pfds[i].fd],
1648
0
             POLLOUT, pfds[i].revents, i);
1649
1650
    /*
1651
     * if one of our file descriptors is garbage, remove the same
1652
     * from both pfds + update sizes and index
1653
     */
1654
0
    if (pfds[i].revents & POLLNVAL) {
1655
0
      memmove(m->handler.pfds + i, m->handler.pfds + i + 1,
1656
0
        (m->handler.pfdcount - i - 1)
1657
0
          * sizeof(struct pollfd));
1658
0
      m->handler.pfdcount--;
1659
0
      m->handler.pfds[m->handler.pfdcount].fd = 0;
1660
0
      m->handler.pfds[m->handler.pfdcount].events = 0;
1661
1662
0
      memmove(pfds + i, pfds + i + 1,
1663
0
        (m->handler.copycount - i - 1)
1664
0
          * sizeof(struct pollfd));
1665
0
      m->handler.copycount--;
1666
0
      m->handler.copy[m->handler.copycount].fd = 0;
1667
0
      m->handler.copy[m->handler.copycount].events = 0;
1668
1669
0
      i--;
1670
0
    }
1671
0
  }
1672
0
}
1673
1674
/* Add all timers that have popped to the ready list. */
1675
static unsigned int thread_process_timers(struct event_loop *m,
1676
            struct timeval *timenow)
1677
0
{
1678
0
  struct timeval prev = *timenow;
1679
0
  bool displayed = false;
1680
0
  struct event *thread;
1681
0
  unsigned int ready = 0;
1682
1683
0
  while ((thread = event_timer_list_first(&m->timer))) {
1684
0
    if (timercmp(timenow, &thread->u.sands, <))
1685
0
      break;
1686
0
    prev = thread->u.sands;
1687
0
    prev.tv_sec += 4;
1688
    /*
1689
     * If the timer would have popped 4 seconds in the
1690
     * past then we are in a situation where we are
1691
     * really getting behind on handling of events.
1692
     * Let's log it and do the right thing with it.
1693
     */
1694
0
    if (timercmp(timenow, &prev, >)) {
1695
0
      atomic_fetch_add_explicit(
1696
0
        &thread->hist->total_starv_warn, 1,
1697
0
        memory_order_seq_cst);
1698
0
      if (!displayed && !thread->ignore_timer_late) {
1699
0
        flog_warn(
1700
0
          EC_LIB_STARVE_THREAD,
1701
0
          "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago",
1702
0
          thread);
1703
0
        displayed = true;
1704
0
      }
1705
0
    }
1706
1707
0
    event_timer_list_pop(&m->timer);
1708
0
    thread->type = EVENT_READY;
1709
0
    event_list_add_tail(&m->ready, thread);
1710
0
    ready++;
1711
0
  }
1712
1713
0
  return ready;
1714
0
}
1715
1716
/* process a list en masse, e.g. for event thread lists */
1717
static unsigned int thread_process(struct event_list_head *list)
1718
0
{
1719
0
  struct event *thread;
1720
0
  unsigned int ready = 0;
1721
1722
0
  while ((thread = event_list_pop(list))) {
1723
0
    thread->type = EVENT_READY;
1724
0
    event_list_add_tail(&thread->master->ready, thread);
1725
0
    ready++;
1726
0
  }
1727
0
  return ready;
1728
0
}
1729
1730
1731
/* Fetch next ready thread. */
1732
struct event *event_fetch(struct event_loop *m, struct event *fetch)
1733
0
{
1734
0
  struct event *thread = NULL;
1735
0
  struct timeval now;
1736
0
  struct timeval zerotime = {0, 0};
1737
0
  struct timeval tv;
1738
0
  struct timeval *tw = NULL;
1739
0
  bool eintr_p = false;
1740
0
  int num = 0;
1741
1742
0
  do {
1743
    /* Handle signals if any */
1744
0
    if (m->handle_signals)
1745
0
      frr_sigevent_process();
1746
1747
0
    pthread_mutex_lock(&m->mtx);
1748
1749
    /* Process any pending cancellation requests */
1750
0
    do_event_cancel(m);
1751
1752
    /*
1753
     * Attempt to flush ready queue before going into poll().
1754
     * This is performance-critical. Think twice before modifying.
1755
     */
1756
0
    if ((thread = event_list_pop(&m->ready))) {
1757
0
      fetch = thread_run(m, thread, fetch);
1758
0
      if (fetch->ref)
1759
0
        *fetch->ref = NULL;
1760
0
      pthread_mutex_unlock(&m->mtx);
1761
0
      if (!m->ready_run_loop)
1762
0
        GETRUSAGE(&m->last_getrusage);
1763
0
      m->ready_run_loop = true;
1764
0
      break;
1765
0
    }
1766
1767
0
    m->ready_run_loop = false;
1768
    /* otherwise, tick through scheduling sequence */
1769
1770
    /*
1771
     * Post events to ready queue. This must come before the
1772
     * following block since events should occur immediately
1773
     */
1774
0
    thread_process(&m->event);
1775
1776
    /*
1777
     * If there are no tasks on the ready queue, we will poll()
1778
     * until a timer expires or we receive I/O, whichever comes
1779
     * first. The strategy for doing this is:
1780
     *
1781
     * - If there are events pending, set the poll() timeout to zero
1782
     * - If there are no events pending, but there are timers
1783
     * pending, set the timeout to the smallest remaining time on
1784
     * any timer.
1785
     * - If there are neither timers nor events pending, but there
1786
     * are file descriptors pending, block indefinitely in poll()
1787
     * - If nothing is pending, it's time for the application to die
1788
     *
1789
     * In every case except the last, we need to hit poll() at least
1790
     * once per loop to avoid starvation by events
1791
     */
1792
0
    if (!event_list_count(&m->ready))
1793
0
      tw = thread_timer_wait(&m->timer, &tv);
1794
1795
0
    if (event_list_count(&m->ready) ||
1796
0
        (tw && !timercmp(tw, &zerotime, >)))
1797
0
      tw = &zerotime;
1798
1799
0
    if (!tw && m->handler.pfdcount == 0) { /* die */
1800
0
      pthread_mutex_unlock(&m->mtx);
1801
0
      fetch = NULL;
1802
0
      break;
1803
0
    }
1804
1805
    /*
1806
     * Copy pollfd array + # active pollfds in it. Not necessary to
1807
     * copy the array size as this is fixed.
1808
     */
1809
0
    m->handler.copycount = m->handler.pfdcount;
1810
0
    memcpy(m->handler.copy, m->handler.pfds,
1811
0
           m->handler.copycount * sizeof(struct pollfd));
1812
1813
0
    pthread_mutex_unlock(&m->mtx);
1814
0
    {
1815
0
      eintr_p = false;
1816
0
      num = fd_poll(m, tw, &eintr_p);
1817
0
    }
1818
0
    pthread_mutex_lock(&m->mtx);
1819
1820
    /* Handle any errors received in poll() */
1821
0
    if (num < 0) {
1822
0
      if (eintr_p) {
1823
0
        pthread_mutex_unlock(&m->mtx);
1824
        /* loop around to signal handler */
1825
0
        continue;
1826
0
      }
1827
1828
      /* else die */
1829
0
      flog_err(EC_LIB_SYSTEM_CALL, "poll() error: %s",
1830
0
         safe_strerror(errno));
1831
0
      pthread_mutex_unlock(&m->mtx);
1832
0
      fetch = NULL;
1833
0
      break;
1834
0
    }
1835
1836
    /* Post timers to ready queue. */
1837
0
    monotime(&now);
1838
0
    thread_process_timers(m, &now);
1839
1840
    /* Post I/O to ready queue. */
1841
0
    if (num > 0)
1842
0
      thread_process_io(m, num);
1843
1844
0
    pthread_mutex_unlock(&m->mtx);
1845
1846
0
  } while (!thread && m->spin);
1847
1848
0
  return fetch;
1849
0
}
1850
1851
static unsigned long timeval_elapsed(struct timeval a, struct timeval b)
1852
0
{
1853
0
  return (((a.tv_sec - b.tv_sec) * TIMER_SECOND_MICRO)
1854
0
    + (a.tv_usec - b.tv_usec));
1855
0
}
1856
1857
unsigned long event_consumed_time(RUSAGE_T *now, RUSAGE_T *start,
1858
          unsigned long *cputime)
1859
0
{
1860
0
#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1861
1862
#ifdef __FreeBSD__
1863
  /*
1864
   * FreeBSD appears to have an issue when calling clock_gettime
1865
   * with CLOCK_THREAD_CPUTIME_ID really close to each other
1866
   * occassionally the now time will be before the start time.
1867
   * This is not good and FRR is ending up with CPU HOG's
1868
   * when the subtraction wraps to very large numbers
1869
   *
1870
   * What we are going to do here is cheat a little bit
1871
   * and notice that this is a problem and just correct
1872
   * it so that it is impossible to happen
1873
   */
1874
  if (start->cpu.tv_sec == now->cpu.tv_sec &&
1875
      start->cpu.tv_nsec > now->cpu.tv_nsec)
1876
    now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1877
  else if (start->cpu.tv_sec > now->cpu.tv_sec) {
1878
    now->cpu.tv_sec = start->cpu.tv_sec;
1879
    now->cpu.tv_nsec = start->cpu.tv_nsec + 1;
1880
  }
1881
#endif
1882
0
  *cputime = (now->cpu.tv_sec - start->cpu.tv_sec) * TIMER_SECOND_MICRO
1883
0
       + (now->cpu.tv_nsec - start->cpu.tv_nsec) / 1000;
1884
#else
1885
  /* This is 'user + sys' time.  */
1886
  *cputime = timeval_elapsed(now->cpu.ru_utime, start->cpu.ru_utime)
1887
       + timeval_elapsed(now->cpu.ru_stime, start->cpu.ru_stime);
1888
#endif
1889
0
  return timeval_elapsed(now->real, start->real);
1890
0
}
1891
1892
/*
1893
 * We should aim to yield after yield milliseconds, which defaults
1894
 * to EVENT_YIELD_TIME_SLOT .
1895
 * Note: we are using real (wall clock) time for this calculation.
1896
 * It could be argued that CPU time may make more sense in certain
1897
 * contexts.  The things to consider are whether the thread may have
1898
 * blocked (in which case wall time increases, but CPU time does not),
1899
 * or whether the system is heavily loaded with other processes competing
1900
 * for CPU time.  On balance, wall clock time seems to make sense.
1901
 * Plus it has the added benefit that gettimeofday should be faster
1902
 * than calling getrusage.
1903
 */
1904
int event_should_yield(struct event *thread)
1905
0
{
1906
0
  int result;
1907
1908
0
  frr_with_mutex (&thread->mtx) {
1909
0
    result = monotime_since(&thread->real, NULL)
1910
0
       > (int64_t)thread->yield;
1911
0
  }
1912
0
  return result;
1913
0
}
1914
1915
void event_set_yield_time(struct event *thread, unsigned long yield_time)
1916
0
{
1917
0
  frr_with_mutex (&thread->mtx) {
1918
0
    thread->yield = yield_time;
1919
0
  }
1920
0
}
1921
1922
void event_getrusage(RUSAGE_T *r)
1923
0
{
1924
0
  monotime(&r->real);
1925
0
  if (!cputime_enabled) {
1926
0
    memset(&r->cpu, 0, sizeof(r->cpu));
1927
0
    return;
1928
0
  }
1929
1930
0
#ifdef HAVE_CLOCK_THREAD_CPUTIME_ID
1931
  /* not currently implemented in Linux's vDSO, but maybe at some point
1932
   * in the future?
1933
   */
1934
0
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &r->cpu);
1935
#else /* !HAVE_CLOCK_THREAD_CPUTIME_ID */
1936
#if defined RUSAGE_THREAD
1937
#define FRR_RUSAGE RUSAGE_THREAD
1938
#else
1939
#define FRR_RUSAGE RUSAGE_SELF
1940
#endif
1941
  getrusage(FRR_RUSAGE, &(r->cpu));
1942
#endif
1943
0
}
1944
1945
/*
1946
 * Call a thread.
1947
 *
1948
 * This function will atomically update the thread's usage history. At present
1949
 * this is the only spot where usage history is written. Nevertheless the code
1950
 * has been written such that the introduction of writers in the future should
1951
 * not need to update it provided the writers atomically perform only the
1952
 * operations done here, i.e. updating the total and maximum times. In
1953
 * particular, the maximum real and cpu times must be monotonically increasing
1954
 * or this code is not correct.
1955
 */
1956
void event_call(struct event *thread)
1957
0
{
1958
0
  RUSAGE_T before, after;
1959
1960
  /* if the thread being called is the CLI, it may change cputime_enabled
1961
   * ("service cputime-stats" command), which can result in nonsensical
1962
   * and very confusing warnings
1963
   */
1964
0
  bool cputime_enabled_here = cputime_enabled;
1965
1966
0
  if (thread->master->ready_run_loop)
1967
0
    before = thread->master->last_getrusage;
1968
0
  else
1969
0
    GETRUSAGE(&before);
1970
1971
0
  thread->real = before.real;
1972
1973
0
  frrtrace(9, frr_libfrr, event_call, thread->master,
1974
0
     thread->xref->funcname, thread->xref->xref.file,
1975
0
     thread->xref->xref.line, NULL, thread->u.fd, thread->u.val,
1976
0
     thread->arg, thread->u.sands.tv_sec);
1977
1978
0
  pthread_setspecific(thread_current, thread);
1979
0
  (*thread->func)(thread);
1980
0
  pthread_setspecific(thread_current, NULL);
1981
1982
0
  GETRUSAGE(&after);
1983
0
  thread->master->last_getrusage = after;
1984
1985
0
  unsigned long walltime, cputime;
1986
0
  unsigned long exp;
1987
1988
0
  walltime = event_consumed_time(&after, &before, &cputime);
1989
1990
  /* update walltime */
1991
0
  atomic_fetch_add_explicit(&thread->hist->real.total, walltime,
1992
0
          memory_order_seq_cst);
1993
0
  exp = atomic_load_explicit(&thread->hist->real.max,
1994
0
           memory_order_seq_cst);
1995
0
  while (exp < walltime
1996
0
         && !atomic_compare_exchange_weak_explicit(
1997
0
           &thread->hist->real.max, &exp, walltime,
1998
0
           memory_order_seq_cst, memory_order_seq_cst))
1999
0
    ;
2000
2001
0
  if (cputime_enabled_here && cputime_enabled) {
2002
    /* update cputime */
2003
0
    atomic_fetch_add_explicit(&thread->hist->cpu.total, cputime,
2004
0
            memory_order_seq_cst);
2005
0
    exp = atomic_load_explicit(&thread->hist->cpu.max,
2006
0
             memory_order_seq_cst);
2007
0
    while (exp < cputime
2008
0
           && !atomic_compare_exchange_weak_explicit(
2009
0
             &thread->hist->cpu.max, &exp, cputime,
2010
0
             memory_order_seq_cst, memory_order_seq_cst))
2011
0
      ;
2012
0
  }
2013
2014
0
  atomic_fetch_add_explicit(&thread->hist->total_calls, 1,
2015
0
          memory_order_seq_cst);
2016
0
  atomic_fetch_or_explicit(&thread->hist->types, 1 << thread->add_type,
2017
0
         memory_order_seq_cst);
2018
2019
0
  if (cputime_enabled_here && cputime_enabled && cputime_threshold
2020
0
      && cputime > cputime_threshold) {
2021
    /*
2022
     * We have a CPU Hog on our hands.  The time FRR has spent
2023
     * doing actual work (not sleeping) is greater than 5 seconds.
2024
     * Whinge about it now, so we're aware this is yet another task
2025
     * to fix.
2026
     */
2027
0
    atomic_fetch_add_explicit(&thread->hist->total_cpu_warn,
2028
0
            1, memory_order_seq_cst);
2029
0
    flog_warn(
2030
0
      EC_LIB_SLOW_THREAD_CPU,
2031
0
      "CPU HOG: task %s (%lx) ran for %lums (cpu time %lums)",
2032
0
      thread->xref->funcname, (unsigned long)thread->func,
2033
0
      walltime / 1000, cputime / 1000);
2034
2035
0
  } else if (walltime_threshold && walltime > walltime_threshold) {
2036
    /*
2037
     * The runtime for a task is greater than 5 seconds, but the
2038
     * cpu time is under 5 seconds.  Let's whine about this because
2039
     * this could imply some sort of scheduling issue.
2040
     */
2041
0
    atomic_fetch_add_explicit(&thread->hist->total_wall_warn,
2042
0
            1, memory_order_seq_cst);
2043
0
    flog_warn(
2044
0
      EC_LIB_SLOW_THREAD_WALL,
2045
0
      "STARVATION: task %s (%lx) ran for %lums (cpu time %lums)",
2046
0
      thread->xref->funcname, (unsigned long)thread->func,
2047
0
      walltime / 1000, cputime / 1000);
2048
0
  }
2049
0
}
2050
2051
/* Execute thread */
2052
void _event_execute(const struct xref_eventsched *xref, struct event_loop *m,
2053
        void (*func)(struct event *), void *arg, int val)
2054
0
{
2055
0
  struct event *thread;
2056
2057
  /* Get or allocate new thread to execute. */
2058
0
  frr_with_mutex (&m->mtx) {
2059
0
    thread = thread_get(m, EVENT_EVENT, func, arg, xref);
2060
2061
    /* Set its event value. */
2062
0
    frr_with_mutex (&thread->mtx) {
2063
0
      thread->add_type = EVENT_EXECUTE;
2064
0
      thread->u.val = val;
2065
0
      thread->ref = &thread;
2066
0
    }
2067
0
  }
2068
2069
  /* Execute thread doing all accounting. */
2070
0
  event_call(thread);
2071
2072
  /* Give back or free thread. */
2073
0
  thread_add_unuse(m, thread);
2074
0
}
2075
2076
/* Debug signal mask - if 'sigs' is NULL, use current effective mask. */
2077
void debug_signals(const sigset_t *sigs)
2078
0
{
2079
0
  int i, found;
2080
0
  sigset_t tmpsigs;
2081
0
  char buf[300];
2082
2083
  /*
2084
   * We're only looking at the non-realtime signals here, so we need
2085
   * some limit value. Platform differences mean at some point we just
2086
   * need to pick a reasonable value.
2087
   */
2088
0
#if defined SIGRTMIN
2089
0
#  define LAST_SIGNAL SIGRTMIN
2090
#else
2091
#  define LAST_SIGNAL 32
2092
#endif
2093
2094
2095
0
  if (sigs == NULL) {
2096
0
    sigemptyset(&tmpsigs);
2097
0
    pthread_sigmask(SIG_BLOCK, NULL, &tmpsigs);
2098
0
    sigs = &tmpsigs;
2099
0
  }
2100
2101
0
  found = 0;
2102
0
  buf[0] = '\0';
2103
2104
0
  for (i = 0; i < LAST_SIGNAL; i++) {
2105
0
    char tmp[20];
2106
2107
0
    if (sigismember(sigs, i) > 0) {
2108
0
      if (found > 0)
2109
0
        strlcat(buf, ",", sizeof(buf));
2110
0
      snprintf(tmp, sizeof(tmp), "%d", i);
2111
0
      strlcat(buf, tmp, sizeof(buf));
2112
0
      found++;
2113
0
    }
2114
0
  }
2115
2116
0
  if (found == 0)
2117
0
    snprintf(buf, sizeof(buf), "<none>");
2118
2119
0
  zlog_debug("%s: %s", __func__, buf);
2120
0
}
2121
2122
static ssize_t printfrr_thread_dbg(struct fbuf *buf, struct printfrr_eargs *ea,
2123
           const struct event *thread)
2124
0
{
2125
0
  static const char *const types[] = {
2126
0
    [EVENT_READ] = "read",    [EVENT_WRITE] = "write",
2127
0
    [EVENT_TIMER] = "timer",  [EVENT_EVENT] = "event",
2128
0
    [EVENT_READY] = "ready",  [EVENT_UNUSED] = "unused",
2129
0
    [EVENT_EXECUTE] = "exec",
2130
0
  };
2131
0
  ssize_t rv = 0;
2132
0
  char info[16] = "";
2133
2134
0
  if (!thread)
2135
0
    return bputs(buf, "{(thread *)NULL}");
2136
2137
0
  rv += bprintfrr(buf, "{(thread *)%p arg=%p", thread, thread->arg);
2138
2139
0
  if (thread->type < array_size(types) && types[thread->type])
2140
0
    rv += bprintfrr(buf, " %-6s", types[thread->type]);
2141
0
  else
2142
0
    rv += bprintfrr(buf, " INVALID(%u)", thread->type);
2143
2144
0
  switch (thread->type) {
2145
0
  case EVENT_READ:
2146
0
  case EVENT_WRITE:
2147
0
    snprintfrr(info, sizeof(info), "fd=%d", thread->u.fd);
2148
0
    break;
2149
2150
0
  case EVENT_TIMER:
2151
0
    snprintfrr(info, sizeof(info), "r=%pTVMud", &thread->u.sands);
2152
0
    break;
2153
0
  case EVENT_READY:
2154
0
  case EVENT_EVENT:
2155
0
  case EVENT_UNUSED:
2156
0
  case EVENT_EXECUTE:
2157
0
    break;
2158
0
  }
2159
2160
0
  rv += bprintfrr(buf, " %-12s %s() %s from %s:%d}", info,
2161
0
      thread->xref->funcname, thread->xref->dest,
2162
0
      thread->xref->xref.file, thread->xref->xref.line);
2163
0
  return rv;
2164
0
}
2165
2166
printfrr_ext_autoreg_p("TH", printfrr_thread);
2167
static ssize_t printfrr_thread(struct fbuf *buf, struct printfrr_eargs *ea,
2168
             const void *ptr)
2169
0
{
2170
0
  const struct event *thread = ptr;
2171
0
  struct timespec remain = {};
2172
2173
0
  if (ea->fmt[0] == 'D') {
2174
0
    ea->fmt++;
2175
0
    return printfrr_thread_dbg(buf, ea, thread);
2176
0
  }
2177
2178
0
  if (!thread) {
2179
    /* need to jump over time formatting flag characters in the
2180
     * input format string, i.e. adjust ea->fmt!
2181
     */
2182
0
    printfrr_time(buf, ea, &remain,
2183
0
            TIMEFMT_TIMER_DEADLINE | TIMEFMT_SKIP);
2184
0
    return bputch(buf, '-');
2185
0
  }
2186
2187
0
  TIMEVAL_TO_TIMESPEC(&thread->u.sands, &remain);
2188
0
  return printfrr_time(buf, ea, &remain, TIMEFMT_TIMER_DEADLINE);
2189
0
}